delimit-cli 4.1.44 → 4.1.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ -- Tweet corpus + cache + budget schema
2
+ -- See DECISION_TWTTR241_CORPUS.md
3
+ -- Invariants:
4
+ -- tweets = append-only moat, never purged
5
+ -- cache = disposable, TTL-gated
6
+ -- budget = single gate for all Twttr241 HTTP calls
7
+
8
+ PRAGMA journal_mode=WAL;
9
+ PRAGMA synchronous=NORMAL;
10
+
11
+ -- Corpus (moat, never purged)
12
+ CREATE TABLE IF NOT EXISTS tweets (
13
+ tweet_id TEXT PRIMARY KEY,
14
+ author_handle TEXT NOT NULL,
15
+ author_id TEXT,
16
+ text TEXT NOT NULL,
17
+ created_at INTEGER NOT NULL,
18
+ fetched_at INTEGER NOT NULL,
19
+ lang TEXT,
20
+ reply_to_id TEXT,
21
+ quote_of_id TEXT,
22
+ like_count INTEGER,
23
+ retweet_count INTEGER,
24
+ reply_count INTEGER,
25
+ view_count INTEGER,
26
+ has_media INTEGER,
27
+ urls_json TEXT,
28
+ hashtags_json TEXT,
29
+ mentions_json TEXT,
30
+ venture_tags TEXT, -- comma-joined, e.g. 'delimit,wirereport'
31
+ raw_json TEXT
32
+ );
33
+ CREATE INDEX IF NOT EXISTS idx_tweets_author_time ON tweets(author_handle, created_at DESC);
34
+ CREATE INDEX IF NOT EXISTS idx_tweets_created ON tweets(created_at DESC);
35
+ CREATE INDEX IF NOT EXISTS idx_tweets_venture ON tweets(venture_tags);
36
+
37
+ -- Full-text search over the corpus (contentless external-content pattern)
38
+ CREATE VIRTUAL TABLE IF NOT EXISTS tweets_fts USING fts5(
39
+ text, author_handle,
40
+ content='tweets', content_rowid='rowid'
41
+ );
42
+
43
+ -- Users (opportunistic)
44
+ CREATE TABLE IF NOT EXISTS users (
45
+ user_id TEXT PRIMARY KEY,
46
+ handle TEXT NOT NULL,
47
+ display_name TEXT,
48
+ bio TEXT,
49
+ followers_count INTEGER,
50
+ following_count INTEGER,
51
+ first_seen_at INTEGER NOT NULL,
52
+ last_refreshed_at INTEGER NOT NULL,
53
+ raw_json TEXT
54
+ );
55
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_users_handle ON users(handle);
56
+
57
+ -- Cache (disposable, TTL-gated)
58
+ CREATE TABLE IF NOT EXISTS cache (
59
+ cache_key TEXT PRIMARY KEY,
60
+ endpoint TEXT NOT NULL,
61
+ response_json TEXT NOT NULL,
62
+ fetched_at INTEGER NOT NULL,
63
+ expires_at INTEGER NOT NULL
64
+ );
65
+ CREATE INDEX IF NOT EXISTS idx_cache_expires ON cache(expires_at);
66
+
67
+ -- Budget tracker (one row per hour bucket)
68
+ CREATE TABLE IF NOT EXISTS budget (
69
+ hour_bucket INTEGER PRIMARY KEY,
70
+ day_bucket INTEGER NOT NULL,
71
+ month_bucket TEXT NOT NULL,
72
+ requests INTEGER NOT NULL DEFAULT 0,
73
+ hit_429 INTEGER NOT NULL DEFAULT 0
74
+ );
75
+ CREATE INDEX IF NOT EXISTS idx_budget_day ON budget(day_bucket);
76
+ CREATE INDEX IF NOT EXISTS idx_budget_month ON budget(month_bucket);
@@ -127,10 +127,14 @@ class OpenAPIDiffEngine:
127
127
  for path in old_set & new_set:
128
128
  self._compare_methods(path, old_paths[path], new_paths[path])
129
129
 
130
+ # LED-290: include "trace" (OpenAPI 3.0+) and "query" (OpenAPI 3.2.0
131
+ # adds the QUERY HTTP method for safe, idempotent requests with bodies).
132
+ HTTP_METHODS = ("get", "post", "put", "delete", "patch", "head", "options", "trace", "query")
133
+
130
134
  def _compare_methods(self, path: str, old_methods: Dict, new_methods: Dict):
131
135
  """Compare HTTP methods for an endpoint."""
132
- old_set = set(m for m in old_methods.keys() if m in ["get", "post", "put", "delete", "patch", "head", "options"])
133
- new_set = set(m for m in new_methods.keys() if m in ["get", "post", "put", "delete", "patch", "head", "options"])
136
+ old_set = set(m for m in old_methods.keys() if m in self.HTTP_METHODS)
137
+ new_set = set(m for m in new_methods.keys() if m in self.HTTP_METHODS)
134
138
 
135
139
  # Check removed methods
136
140
  for method in old_set - new_set:
@@ -0,0 +1,242 @@
1
+ """Generator drift detection (LED-713).
2
+
3
+ Detects when a committed generated artifact (e.g. agentspec's
4
+ schemas/v1/agent.schema.json regenerated from a Zod source) has drifted
5
+ from what its generator script would produce today.
6
+
7
+ Use case: a maintainer changes the source of truth (Zod schema, OpenAPI
8
+ generator, protobuf, etc.) but forgets to regenerate and commit the
9
+ artifact. CI catches the drift before the stale generated file ships.
10
+
11
+ Generic over generators — caller supplies the regen command and the
12
+ artifact path. Returns a structured drift report that can be merged into
13
+ the standard delimit-action PR comment.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import os
20
+ import shlex
21
+ import shutil
22
+ import subprocess
23
+ import tempfile
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional
27
+
28
+
29
+ @dataclass
30
+ class DriftResult:
31
+ drifted: bool
32
+ artifact_path: str
33
+ regen_command: str
34
+ changes: List[Any] = field(default_factory=list) # JSONSchemaChange list when drift detected
35
+ error: Optional[str] = None
36
+ runtime_seconds: float = 0.0
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ return {
40
+ "drifted": self.drifted,
41
+ "artifact_path": self.artifact_path,
42
+ "regen_command": self.regen_command,
43
+ "change_count": len(self.changes),
44
+ "changes": [
45
+ {
46
+ "type": c.type.value,
47
+ "path": c.path,
48
+ "message": c.message,
49
+ "is_breaking": c.is_breaking,
50
+ }
51
+ for c in self.changes
52
+ ],
53
+ "error": self.error,
54
+ "runtime_seconds": round(self.runtime_seconds, 3),
55
+ }
56
+
57
+
58
+ def detect_drift(
59
+ repo_root: str,
60
+ artifact_path: str,
61
+ regen_command: str,
62
+ timeout_seconds: int = 60,
63
+ ) -> DriftResult:
64
+ """Check whether the committed artifact matches its generator output.
65
+
66
+ Args:
67
+ repo_root: Absolute path to the repo checkout.
68
+ artifact_path: Path to the generated artifact, relative to repo_root.
69
+ regen_command: Shell command that regenerates the artifact in place.
70
+ Example: "pnpm -r run build" or "node packages/sdk/dist/scripts/export-schema.js"
71
+ timeout_seconds: Hard timeout for the generator (default 60).
72
+
73
+ Returns:
74
+ DriftResult with drift status, classified changes, and runtime.
75
+ """
76
+ import time
77
+
78
+ repo_root_p = Path(repo_root).resolve()
79
+ artifact_p = (repo_root_p / artifact_path).resolve()
80
+
81
+ if not artifact_p.exists():
82
+ return DriftResult(
83
+ drifted=False,
84
+ artifact_path=artifact_path,
85
+ regen_command=regen_command,
86
+ error=f"Artifact not found: {artifact_path}",
87
+ )
88
+
89
+ # Snapshot the committed artifact before regen
90
+ try:
91
+ committed_text = artifact_p.read_text()
92
+ committed_doc = json.loads(committed_text)
93
+ except (OSError, json.JSONDecodeError) as e:
94
+ return DriftResult(
95
+ drifted=False,
96
+ artifact_path=artifact_path,
97
+ regen_command=regen_command,
98
+ error=f"Failed to read committed artifact: {e}",
99
+ )
100
+
101
+ # Parse the command safely — shell=False to avoid command injection.
102
+ # Users needing shell features (&&, |, env vars, etc.) should point
103
+ # generator_command at a script file instead of an inline chain.
104
+ try:
105
+ argv = shlex.split(regen_command)
106
+ except ValueError as e:
107
+ return DriftResult(
108
+ drifted=False,
109
+ artifact_path=artifact_path,
110
+ regen_command=regen_command,
111
+ error=f"Could not parse generator_command: {e}",
112
+ )
113
+ if not argv:
114
+ return DriftResult(
115
+ drifted=False,
116
+ artifact_path=artifact_path,
117
+ regen_command=regen_command,
118
+ error="generator_command is empty",
119
+ )
120
+ # Reject obvious shell metacharacters — force users to use a script
121
+ # file if they need chaining or redirection.
122
+ SHELL_META = set("&|;><`$")
123
+ if any(ch in token for token in argv for ch in SHELL_META):
124
+ return DriftResult(
125
+ drifted=False,
126
+ artifact_path=artifact_path,
127
+ regen_command=regen_command,
128
+ error="generator_command contains shell metacharacters (&|;><`$). Point it at a script file instead of chaining inline.",
129
+ )
130
+
131
+ # Run the regenerator
132
+ start = time.time()
133
+ try:
134
+ result = subprocess.run(
135
+ argv,
136
+ shell=False,
137
+ cwd=str(repo_root_p),
138
+ capture_output=True,
139
+ text=True,
140
+ timeout=timeout_seconds,
141
+ )
142
+ except subprocess.TimeoutExpired:
143
+ return DriftResult(
144
+ drifted=False,
145
+ artifact_path=artifact_path,
146
+ regen_command=regen_command,
147
+ error=f"Generator timed out after {timeout_seconds}s",
148
+ runtime_seconds=time.time() - start,
149
+ )
150
+ except FileNotFoundError as e:
151
+ return DriftResult(
152
+ drifted=False,
153
+ artifact_path=artifact_path,
154
+ regen_command=regen_command,
155
+ error=f"Generator executable not found: {e}",
156
+ runtime_seconds=time.time() - start,
157
+ )
158
+
159
+ runtime = time.time() - start
160
+
161
+ if result.returncode != 0:
162
+ return DriftResult(
163
+ drifted=False,
164
+ artifact_path=artifact_path,
165
+ regen_command=regen_command,
166
+ error=f"Generator exited {result.returncode}: {result.stderr.strip()[:500]}",
167
+ runtime_seconds=runtime,
168
+ )
169
+
170
+ # Read the regenerated artifact
171
+ try:
172
+ regen_text = artifact_p.read_text()
173
+ regen_doc = json.loads(regen_text)
174
+ except (OSError, json.JSONDecodeError) as e:
175
+ # Restore committed version so we don't leave the workspace dirty
176
+ artifact_p.write_text(committed_text)
177
+ return DriftResult(
178
+ drifted=False,
179
+ artifact_path=artifact_path,
180
+ regen_command=regen_command,
181
+ error=f"Failed to read regenerated artifact: {e}",
182
+ runtime_seconds=runtime,
183
+ )
184
+
185
+ # Restore the committed file before diffing — leave the workspace clean
186
+ artifact_p.write_text(committed_text)
187
+
188
+ # Quick equality check first
189
+ if committed_doc == regen_doc:
190
+ return DriftResult(
191
+ drifted=False,
192
+ artifact_path=artifact_path,
193
+ regen_command=regen_command,
194
+ runtime_seconds=runtime,
195
+ )
196
+
197
+ # Drift detected — classify the changes via the JSON Schema diff engine
198
+ from .json_schema_diff import JSONSchemaDiffEngine
199
+
200
+ engine = JSONSchemaDiffEngine()
201
+ changes = engine.compare(committed_doc, regen_doc)
202
+ return DriftResult(
203
+ drifted=True,
204
+ artifact_path=artifact_path,
205
+ regen_command=regen_command,
206
+ changes=changes,
207
+ runtime_seconds=runtime,
208
+ )
209
+
210
+
211
+ def format_drift_report(result: DriftResult) -> str:
212
+ """Render a drift report as a markdown block for PR comments."""
213
+ if result.error:
214
+ return (
215
+ f"### Generator drift check\n\n"
216
+ f"Artifact: `{result.artifact_path}` \n"
217
+ f"Status: error \n"
218
+ f"Detail: {result.error}\n"
219
+ )
220
+ if not result.drifted:
221
+ return (
222
+ f"### Generator drift check\n\n"
223
+ f"Artifact: `{result.artifact_path}` \n"
224
+ f"Status: clean (committed artifact matches generator output) \n"
225
+ f"Generator runtime: {result.runtime_seconds:.2f}s\n"
226
+ )
227
+ breaking = sum(1 for c in result.changes if c.is_breaking)
228
+ non_breaking = len(result.changes) - breaking
229
+ lines = [
230
+ "### Generator drift check",
231
+ "",
232
+ f"Artifact: `{result.artifact_path}` ",
233
+ f"Status: drifted ({len(result.changes)} change(s) — {breaking} breaking, {non_breaking} non-breaking) ",
234
+ f"Generator runtime: {result.runtime_seconds:.2f}s ",
235
+ "",
236
+ "The committed artifact does not match what the generator produces today. Re-run the generator and commit the result, or revert the source change.",
237
+ "",
238
+ ]
239
+ for c in result.changes:
240
+ marker = "breaking" if c.is_breaking else "ok"
241
+ lines.append(f"- [{marker}] {c.type.value} at `{c.path}` — {c.message}")
242
+ return "\n".join(lines) + "\n"