delimit-cli 4.0.0 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/gateway/ai/cross_model_audit.py +600 -0
- package/gateway/ai/github_scanner.py +622 -0
- package/gateway/ai/handoff_receipts.py +409 -0
- package/gateway/ai/license_core.py +1 -2
- package/gateway/ai/notify.py +8 -8
- package/gateway/ai/reddit_scanner.py +562 -0
- package/gateway/ai/server.py +15 -7
- package/gateway/ai/session_phoenix.py +371 -0
- package/gateway/ai/swarm.py +2 -2
- package/gateway/ai/toolcard_cache.py +327 -0
- package/gateway/core/contract_ledger.py +1 -1
- package/gateway/core/dependency_graph.py +1 -1
- package/gateway/core/dependency_manifest.py +1 -1
- package/gateway/core/event_backbone.py +2 -2
- package/gateway/core/event_schema.py +1 -1
- package/gateway/core/impact_analyzer.py +1 -1
- package/package.json +1 -7
- package/scripts/security-check.sh +6 -50
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Toolcard Delta Cache — LED-219
|
|
3
|
+
|
|
4
|
+
MCP servers dump full tool definitions every session. GitHub's MCP server
|
|
5
|
+
alone sends 40K+ tokens of tool schemas. This module stores hashed tool
|
|
6
|
+
schemas and only surfaces diffs when schemas change, cutting token waste
|
|
7
|
+
on tool definitions dramatically.
|
|
8
|
+
|
|
9
|
+
This is a MEASUREMENT tool first — it shows the savings potential. The
|
|
10
|
+
actual MCP protocol optimization to send compressed schemas is a separate
|
|
11
|
+
step.
|
|
12
|
+
|
|
13
|
+
Architecture:
|
|
14
|
+
- SHA256 hash of each tool's canonical schema (name + description + parameters)
|
|
15
|
+
- Persistent JSON cache at ~/.delimit/toolcard_cache.json
|
|
16
|
+
- Per-session JSONL logs at ~/.delimit/toolcard_sessions/{date}.jsonl
|
|
17
|
+
- Thread-safe via atomic writes (write to tmp, rename)
|
|
18
|
+
- No external dependencies — stdlib only
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
import tempfile
|
|
26
|
+
import time
|
|
27
|
+
from datetime import datetime, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any, Dict, List, Optional
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger("delimit.toolcard_cache")
|
|
32
|
+
|
|
33
|
+
CACHE_FILE = Path.home() / ".delimit" / "toolcard_cache.json"
|
|
34
|
+
SESSION_DIR = Path.home() / ".delimit" / "toolcard_sessions"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _canonical_json(obj: Any) -> str:
|
|
38
|
+
"""Produce a deterministic JSON string for hashing."""
|
|
39
|
+
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _hash_schema(tool: Dict[str, Any]) -> str:
|
|
43
|
+
"""SHA256 hash of a tool's canonical schema (name + description + parameters)."""
|
|
44
|
+
canonical = {
|
|
45
|
+
"name": tool.get("name", ""),
|
|
46
|
+
"description": tool.get("description", ""),
|
|
47
|
+
"parameters": tool.get("parameters", {}),
|
|
48
|
+
}
|
|
49
|
+
return hashlib.sha256(_canonical_json(canonical).encode("utf-8")).hexdigest()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _estimate_tokens(obj: Any) -> int:
|
|
53
|
+
"""Estimate token count: len(JSON) / 4 (standard approximation)."""
|
|
54
|
+
return max(1, len(_canonical_json(obj)) // 4)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _atomic_write_json(path: Path, data: Any) -> None:
|
|
58
|
+
"""Write JSON atomically: write to temp file, then rename."""
|
|
59
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
fd, tmp_path = tempfile.mkstemp(
|
|
61
|
+
dir=str(path.parent), suffix=".tmp", prefix=".toolcard_"
|
|
62
|
+
)
|
|
63
|
+
try:
|
|
64
|
+
with os.fdopen(fd, "w") as f:
|
|
65
|
+
json.dump(data, f, indent=2, default=str)
|
|
66
|
+
os.replace(tmp_path, str(path))
|
|
67
|
+
except Exception:
|
|
68
|
+
# Clean up temp file on failure
|
|
69
|
+
try:
|
|
70
|
+
os.unlink(tmp_path)
|
|
71
|
+
except OSError:
|
|
72
|
+
pass
|
|
73
|
+
raise
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ToolcardCache:
|
|
77
|
+
"""Hashed tool schema registry. Sends full schemas on first session, diffs after."""
|
|
78
|
+
|
|
79
|
+
def __init__(self, cache_file: Optional[Path] = None, session_dir: Optional[Path] = None):
|
|
80
|
+
self._cache_file = cache_file or CACHE_FILE
|
|
81
|
+
self._session_dir = session_dir or SESSION_DIR
|
|
82
|
+
self.cache: Dict[str, Dict[str, Any]] = self._load()
|
|
83
|
+
# Per-session tracking
|
|
84
|
+
self._session_start = datetime.now(timezone.utc).isoformat()
|
|
85
|
+
self._session_calls: Dict[str, int] = {} # tool_name -> call count
|
|
86
|
+
self._session_registered = 0
|
|
87
|
+
self._session_hits = 0
|
|
88
|
+
self._session_misses = 0
|
|
89
|
+
|
|
90
|
+
def _load(self) -> Dict[str, Dict[str, Any]]:
|
|
91
|
+
"""Load cache from disk. Returns empty dict if missing or corrupt."""
|
|
92
|
+
try:
|
|
93
|
+
if self._cache_file.exists():
|
|
94
|
+
with open(self._cache_file, "r") as f:
|
|
95
|
+
data = json.load(f)
|
|
96
|
+
if isinstance(data, dict):
|
|
97
|
+
return data
|
|
98
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
99
|
+
logger.warning("Toolcard cache load failed: %s", e)
|
|
100
|
+
return {}
|
|
101
|
+
|
|
102
|
+
def _save(self) -> None:
|
|
103
|
+
"""Persist cache to disk atomically."""
|
|
104
|
+
_atomic_write_json(self._cache_file, self.cache)
|
|
105
|
+
|
|
106
|
+
def register_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
107
|
+
"""Register tool schemas. Returns only NEW or CHANGED tools.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
tools: List of tool schema dicts, each with 'name', 'description', 'parameters'.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Dict with:
|
|
114
|
+
new_tools: list of tool schemas not previously cached
|
|
115
|
+
changed_tools: list of tool schemas whose hash differs
|
|
116
|
+
unchanged_tools: list of tool names (no schema, just names)
|
|
117
|
+
full_tokens: estimated tokens if all schemas were sent
|
|
118
|
+
delta_tokens: actual tokens for just new/changed
|
|
119
|
+
savings_pct: percentage reduction
|
|
120
|
+
saved_tokens: absolute token count saved
|
|
121
|
+
"""
|
|
122
|
+
new_tools = []
|
|
123
|
+
changed_tools = []
|
|
124
|
+
unchanged_names = []
|
|
125
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
126
|
+
|
|
127
|
+
for tool in tools:
|
|
128
|
+
name = tool.get("name", "")
|
|
129
|
+
if not name:
|
|
130
|
+
continue
|
|
131
|
+
h = _hash_schema(tool)
|
|
132
|
+
|
|
133
|
+
if name not in self.cache:
|
|
134
|
+
# New tool
|
|
135
|
+
new_tools.append(tool)
|
|
136
|
+
self.cache[name] = {
|
|
137
|
+
"hash": h,
|
|
138
|
+
"schema": tool,
|
|
139
|
+
"first_seen": now,
|
|
140
|
+
"last_changed": now,
|
|
141
|
+
}
|
|
142
|
+
self._session_misses += 1
|
|
143
|
+
elif self.cache[name]["hash"] != h:
|
|
144
|
+
# Changed tool
|
|
145
|
+
changed_tools.append(tool)
|
|
146
|
+
self.cache[name] = {
|
|
147
|
+
"hash": h,
|
|
148
|
+
"schema": tool,
|
|
149
|
+
"first_seen": self.cache[name].get("first_seen", now),
|
|
150
|
+
"last_changed": now,
|
|
151
|
+
}
|
|
152
|
+
self._session_misses += 1
|
|
153
|
+
else:
|
|
154
|
+
# Unchanged — cache hit
|
|
155
|
+
unchanged_names.append(name)
|
|
156
|
+
self._session_hits += 1
|
|
157
|
+
|
|
158
|
+
self._session_registered = len(tools)
|
|
159
|
+
self._save()
|
|
160
|
+
|
|
161
|
+
# Token calculations
|
|
162
|
+
full_tokens = sum(_estimate_tokens(t) for t in tools)
|
|
163
|
+
delta_schemas = new_tools + changed_tools
|
|
164
|
+
delta_tokens = sum(_estimate_tokens(t) for t in delta_schemas)
|
|
165
|
+
# Unchanged tools still need their names sent (compact summary)
|
|
166
|
+
delta_tokens += sum(len(n) // 4 + 1 for n in unchanged_names)
|
|
167
|
+
|
|
168
|
+
saved_tokens = max(0, full_tokens - delta_tokens)
|
|
169
|
+
savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"new_tools": new_tools,
|
|
173
|
+
"changed_tools": changed_tools,
|
|
174
|
+
"unchanged_tools": unchanged_names,
|
|
175
|
+
"full_tokens": full_tokens,
|
|
176
|
+
"delta_tokens": delta_tokens,
|
|
177
|
+
"savings_pct": savings_pct,
|
|
178
|
+
"saved_tokens": saved_tokens,
|
|
179
|
+
"total_registered": len(tools),
|
|
180
|
+
"cache_size": len(self.cache),
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
def get_delta(self, tool_names: List[str]) -> Dict[str, Any]:
|
|
184
|
+
"""Return only schemas that changed since last check.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
tool_names: List of tool names to check against the cache.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Dict with cached (hit) and missing (miss) tools.
|
|
191
|
+
"""
|
|
192
|
+
cached = []
|
|
193
|
+
missing = []
|
|
194
|
+
|
|
195
|
+
for name in tool_names:
|
|
196
|
+
if name in self.cache:
|
|
197
|
+
cached.append(name)
|
|
198
|
+
self._session_hits += 1
|
|
199
|
+
else:
|
|
200
|
+
missing.append(name)
|
|
201
|
+
self._session_misses += 1
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
"cached": cached,
|
|
205
|
+
"missing": missing,
|
|
206
|
+
"cached_count": len(cached),
|
|
207
|
+
"missing_count": len(missing),
|
|
208
|
+
"hit_rate": round(len(cached) / len(tool_names) * 100, 1) if tool_names else 0.0,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
def record_call(self, tool_name: str) -> None:
|
|
212
|
+
"""Record that a tool was called in the current session."""
|
|
213
|
+
self._session_calls[tool_name] = self._session_calls.get(tool_name, 0) + 1
|
|
214
|
+
|
|
215
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
216
|
+
"""Return cache stats: total tools, cached, cache hit rate, token savings."""
|
|
217
|
+
total_checks = self._session_hits + self._session_misses
|
|
218
|
+
hit_rate = round(
|
|
219
|
+
(self._session_hits / total_checks * 100), 1
|
|
220
|
+
) if total_checks > 0 else 0.0
|
|
221
|
+
|
|
222
|
+
# Estimate total cached schema tokens
|
|
223
|
+
cached_tokens = sum(
|
|
224
|
+
_estimate_tokens(entry.get("schema", {}))
|
|
225
|
+
for entry in self.cache.values()
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Most called tools this session
|
|
229
|
+
top_tools = sorted(
|
|
230
|
+
self._session_calls.items(), key=lambda x: x[1], reverse=True
|
|
231
|
+
)[:10]
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
"total_cached_tools": len(self.cache),
|
|
235
|
+
"session_registered": self._session_registered,
|
|
236
|
+
"session_hits": self._session_hits,
|
|
237
|
+
"session_misses": self._session_misses,
|
|
238
|
+
"session_hit_rate": hit_rate,
|
|
239
|
+
"cached_schema_tokens": cached_tokens,
|
|
240
|
+
"session_tools_called": dict(top_tools),
|
|
241
|
+
"session_start": self._session_start,
|
|
242
|
+
"cache_file": str(self._cache_file),
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
def estimate_savings(self, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
246
|
+
"""Estimate token savings without modifying the cache.
|
|
247
|
+
|
|
248
|
+
Dry-run version of register_tools — shows what WOULD be saved.
|
|
249
|
+
"""
|
|
250
|
+
hits = 0
|
|
251
|
+
misses = 0
|
|
252
|
+
|
|
253
|
+
for tool in tools:
|
|
254
|
+
name = tool.get("name", "")
|
|
255
|
+
if not name:
|
|
256
|
+
continue
|
|
257
|
+
h = _hash_schema(tool)
|
|
258
|
+
if name in self.cache and self.cache[name]["hash"] == h:
|
|
259
|
+
hits += 1
|
|
260
|
+
else:
|
|
261
|
+
misses += 1
|
|
262
|
+
|
|
263
|
+
full_tokens = sum(_estimate_tokens(t) for t in tools)
|
|
264
|
+
# If all cached, only names need sending
|
|
265
|
+
cached_names_tokens = sum(len(t.get("name", "")) // 4 + 1 for t in tools if t.get("name") in self.cache and self.cache.get(t.get("name"), {}).get("hash") == _hash_schema(t))
|
|
266
|
+
missed_tokens = sum(
|
|
267
|
+
_estimate_tokens(t) for t in tools
|
|
268
|
+
if t.get("name") not in self.cache or self.cache.get(t.get("name"), {}).get("hash") != _hash_schema(t)
|
|
269
|
+
)
|
|
270
|
+
delta_tokens = cached_names_tokens + missed_tokens
|
|
271
|
+
saved_tokens = max(0, full_tokens - delta_tokens)
|
|
272
|
+
savings_pct = round((saved_tokens / full_tokens * 100), 1) if full_tokens > 0 else 0.0
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
"total_tools": len(tools),
|
|
276
|
+
"would_be_cached": hits,
|
|
277
|
+
"would_need_sending": misses,
|
|
278
|
+
"full_tokens": full_tokens,
|
|
279
|
+
"delta_tokens": delta_tokens,
|
|
280
|
+
"savings_pct": savings_pct,
|
|
281
|
+
"saved_tokens": saved_tokens,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
def clear(self) -> Dict[str, Any]:
|
|
285
|
+
"""Clear the cache. Forces full schema send next session."""
|
|
286
|
+
count = len(self.cache)
|
|
287
|
+
self.cache = {}
|
|
288
|
+
self._save()
|
|
289
|
+
return {
|
|
290
|
+
"cleared": count,
|
|
291
|
+
"message": f"Cleared {count} cached tool schemas. Next session will send full schemas.",
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
def flush_session(self) -> Dict[str, Any]:
|
|
295
|
+
"""Write session stats to the per-session JSONL log."""
|
|
296
|
+
self._session_dir.mkdir(parents=True, exist_ok=True)
|
|
297
|
+
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
298
|
+
session_file = self._session_dir / f"{date_str}.jsonl"
|
|
299
|
+
|
|
300
|
+
record = {
|
|
301
|
+
"session_start": self._session_start,
|
|
302
|
+
"flushed_at": datetime.now(timezone.utc).isoformat(),
|
|
303
|
+
"stats": self.get_stats(),
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
with open(session_file, "a") as f:
|
|
307
|
+
f.write(json.dumps(record, default=str) + "\n")
|
|
308
|
+
|
|
309
|
+
return {"written_to": str(session_file), "record": record}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# Module-level singleton for use by server.py
|
|
313
|
+
_cache_instance: Optional[ToolcardCache] = None
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def get_cache() -> ToolcardCache:
|
|
317
|
+
"""Get or create the module-level cache singleton."""
|
|
318
|
+
global _cache_instance
|
|
319
|
+
if _cache_instance is None:
|
|
320
|
+
_cache_instance = ToolcardCache()
|
|
321
|
+
return _cache_instance
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def reset_cache() -> None:
|
|
325
|
+
"""Reset the singleton (for testing)."""
|
|
326
|
+
global _cache_instance
|
|
327
|
+
_cache_instance = None
|
|
@@ -3,7 +3,7 @@ Delimit Contract Ledger
|
|
|
3
3
|
Reads, validates, and queries the append-only JSONL event ledger.
|
|
4
4
|
Optional SQLite index for fast lookups (never required for CI).
|
|
5
5
|
|
|
6
|
-
Per
|
|
6
|
+
Per Jamsons Doctrine:
|
|
7
7
|
- Deterministic outputs
|
|
8
8
|
- Append-only artifacts
|
|
9
9
|
- SQLite index is optional, not required for CI
|
|
@@ -5,7 +5,7 @@ Constructs a deterministic service dependency graph from manifests.
|
|
|
5
5
|
The graph maps each API/service to its downstream consumers,
|
|
6
6
|
enabling impact analysis when an API contract changes.
|
|
7
7
|
|
|
8
|
-
Per
|
|
8
|
+
Per Jamsons Doctrine:
|
|
9
9
|
- Deterministic outputs (sorted, reproducible)
|
|
10
10
|
- No telemetry
|
|
11
11
|
- Graceful degradation when manifests are missing
|
|
@@ -3,7 +3,7 @@ Delimit Event Backbone
|
|
|
3
3
|
Constructs ledger events, generates SHA-256 hashes, links hash chains,
|
|
4
4
|
and appends to the append-only JSONL ledger.
|
|
5
5
|
|
|
6
|
-
Per
|
|
6
|
+
Per Jamsons Doctrine:
|
|
7
7
|
- Deterministic outputs
|
|
8
8
|
- Append-only artifacts
|
|
9
9
|
- Fail-closed CI behavior (ledger failures never affect CI)
|
|
@@ -199,7 +199,7 @@ class EventBackbone:
|
|
|
199
199
|
This is the primary API for event generation. It is best-effort:
|
|
200
200
|
if the ledger write fails, the event is still returned but not persisted.
|
|
201
201
|
|
|
202
|
-
CRITICAL: This method NEVER raises exceptions. Per
|
|
202
|
+
CRITICAL: This method NEVER raises exceptions. Per Jamsons Doctrine,
|
|
203
203
|
ledger failures must not affect CI pass/fail outcome.
|
|
204
204
|
|
|
205
205
|
Returns:
|
|
@@ -3,7 +3,7 @@ Delimit Impact Analyzer
|
|
|
3
3
|
Determines downstream consumers affected by an API change
|
|
4
4
|
and produces informational impact summaries for CI output.
|
|
5
5
|
|
|
6
|
-
Per
|
|
6
|
+
Per Jamsons Doctrine:
|
|
7
7
|
- Impact analysis is INFORMATIONAL ONLY
|
|
8
8
|
- NEVER affects CI pass/fail outcome
|
|
9
9
|
- Deterministic outputs
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "delimit-cli",
|
|
3
3
|
"mcpName": "io.github.delimit-ai/delimit-mcp-server",
|
|
4
|
-
"version": "4.0.
|
|
4
|
+
"version": "4.0.2",
|
|
5
5
|
"description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"files": [
|
|
@@ -14,12 +14,6 @@
|
|
|
14
14
|
"!gateway/ai/founding_users.py",
|
|
15
15
|
"!gateway/ai/inbox_daemon.py",
|
|
16
16
|
"!gateway/ai/deliberation.py",
|
|
17
|
-
"!gateway/ai/reddit_scanner.py",
|
|
18
|
-
"!gateway/ai/github_scanner.py",
|
|
19
|
-
"!gateway/ai/cross_model_audit.py",
|
|
20
|
-
"!gateway/ai/session_phoenix.py",
|
|
21
|
-
"!gateway/ai/handoff_receipts.py",
|
|
22
|
-
"!gateway/ai/toolcard_cache.py",
|
|
23
17
|
"scripts/",
|
|
24
18
|
"server.json",
|
|
25
19
|
"README.md",
|
|
@@ -25,7 +25,7 @@ fi
|
|
|
25
25
|
|
|
26
26
|
# 2. Blocklist terms
|
|
27
27
|
echo -n " Blocklist... "
|
|
28
|
-
BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env
|
|
28
|
+
BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env"
|
|
29
29
|
if grep -rEi "$BLOCKLIST" "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null; then
|
|
30
30
|
echo "❌ BLOCKED TERMS FOUND"
|
|
31
31
|
FAIL=1
|
|
@@ -42,17 +42,9 @@ else
|
|
|
42
42
|
echo "✅ clean"
|
|
43
43
|
fi
|
|
44
44
|
|
|
45
|
-
# 4.
|
|
46
|
-
echo -n " Internal ticket IDs... "
|
|
47
|
-
if grep -rE "LED-[0-9]{3}|STR-[0-9]{3}" "$TMPDIR/package/" --include="*.py" --include="*.js" 2>/dev/null | grep -v "node_modules" | head -1; then
|
|
48
|
-
echo " WARNING: Internal ticket IDs found (cosmetic, not blocking)"
|
|
49
|
-
else
|
|
50
|
-
echo "clean"
|
|
51
|
-
fi
|
|
52
|
-
|
|
53
|
-
# 5. Proprietary files that shouldn't ship
|
|
45
|
+
# 4. Proprietary files that shouldn't ship
|
|
54
46
|
echo -n " Proprietary files... "
|
|
55
|
-
PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py
|
|
47
|
+
PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py"
|
|
56
48
|
if find "$TMPDIR/package/" -name "*.py" | grep -Ei "$PROPRIETARY" 2>/dev/null; then
|
|
57
49
|
echo "❌ PROPRIETARY FILES IN PACKAGE"
|
|
58
50
|
FAIL=1
|
|
@@ -60,51 +52,15 @@ else
|
|
|
60
52
|
echo "✅ clean"
|
|
61
53
|
fi
|
|
62
54
|
|
|
63
|
-
# Cleanup
|
|
55
|
+
# Cleanup
|
|
64
56
|
rm -rf "$TMPDIR"
|
|
65
57
|
|
|
66
|
-
# ── PyPI dist scan (if dist/ exists) ─────────────────────────────────
|
|
67
|
-
PYPI_DIST="/home/delimit/delimit-gateway/dist"
|
|
68
|
-
if [ -d "$PYPI_DIST" ] && ls "$PYPI_DIST"/*.tar.gz 1>/dev/null 2>&1; then
|
|
69
|
-
echo ""
|
|
70
|
-
echo "PyPI dist scan..."
|
|
71
|
-
PYPI_TMPDIR=$(mktemp -d)
|
|
72
|
-
PYPI_TARBALL=$(ls -t "$PYPI_DIST"/*.tar.gz | head -1)
|
|
73
|
-
tar -xzf "$PYPI_TARBALL" -C "$PYPI_TMPDIR" 2>/dev/null
|
|
74
|
-
|
|
75
|
-
echo -n " Credentials... "
|
|
76
|
-
if grep -rEi '(password|passwd|secret|api_key|apikey)\s*[:=]\s*["\x27][^"\x27]{4,}' "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null | grep -v 'environ\|getenv\|os\.environ\|<configured\|example\|placeholder\|REDACTED'; then
|
|
77
|
-
echo "FOUND CREDENTIALS IN PYPI DIST"
|
|
78
|
-
FAIL=1
|
|
79
|
-
else
|
|
80
|
-
echo "clean"
|
|
81
|
-
fi
|
|
82
|
-
|
|
83
|
-
echo -n " Blocklist... "
|
|
84
|
-
if grep -rEi "$BLOCKLIST" "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null; then
|
|
85
|
-
echo "BLOCKED TERMS IN PYPI DIST"
|
|
86
|
-
FAIL=1
|
|
87
|
-
else
|
|
88
|
-
echo "clean"
|
|
89
|
-
fi
|
|
90
|
-
|
|
91
|
-
echo -n " PII... "
|
|
92
|
-
if grep -rEi '[a-z0-9._%+-]+@(gmail|yahoo|hotmail|outlook|proton|jamsons|wire\.report|domainvested)' "$PYPI_TMPDIR/" --include="*.py" 2>/dev/null | grep -v "example\|placeholder\|<configured\|noreply\|e\.g\.\|docstring"; then
|
|
93
|
-
echo "PII IN PYPI DIST"
|
|
94
|
-
FAIL=1
|
|
95
|
-
else
|
|
96
|
-
echo "clean"
|
|
97
|
-
fi
|
|
98
|
-
|
|
99
|
-
rm -rf "$PYPI_TMPDIR"
|
|
100
|
-
fi
|
|
101
|
-
|
|
102
58
|
if [ $FAIL -ne 0 ]; then
|
|
103
59
|
echo ""
|
|
104
|
-
echo "SECURITY CHECK FAILED
|
|
60
|
+
echo "❌ SECURITY CHECK FAILED — do not publish"
|
|
105
61
|
exit 1
|
|
106
62
|
fi
|
|
107
63
|
|
|
108
64
|
echo ""
|
|
109
|
-
echo "All security checks passed"
|
|
65
|
+
echo "✅ All security checks passed"
|
|
110
66
|
exit 0
|