continuum-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
continuum/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Continuum: rule engine so coding agents obey repo rules."""
2
+
3
+ from importlib.metadata import version as _version
4
+
5
+ try:
6
+ __version__ = _version("continuum-code")
7
+ except Exception:
8
+ __version__ = "0.2.0"
@@ -0,0 +1,416 @@
1
+ """Build ChangeSummary from git diff or from an agent plan."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import fnmatch
6
+ import json
7
+ import re
8
+ import subprocess
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import yaml
13
+ from pydantic import BaseModel, Field
14
+
15
+ # Path globs for which we scan diff content for command-like strings (DAGs, scripts, CI, Dockerfile)
16
+ COMMAND_SENSITIVE_GLOBS = [
17
+ "dags/**/*.py",
18
+ "**/*.sh",
19
+ "Makefile",
20
+ "makefile",
21
+ ".github/workflows/**/*.yml",
22
+ ".github/workflows/**/*.yaml",
23
+ "Dockerfile",
24
+ "**/Dockerfile*",
25
+ ]
26
+
27
+
28
+ class DiffStats(BaseModel):
29
+ """Optional diff statistics."""
30
+
31
+ files: int = 0
32
+ insertions: int = 0
33
+ deletions: int = 0
34
+
35
+
36
+ class ChangeSummary(BaseModel):
37
+ """Minimal v0.1 input for the evaluator: what changed."""
38
+
39
+ paths_changed: list[str] = Field(default_factory=list)
40
+ deps_added: list[str] = Field(default_factory=list)
41
+ deps_removed: list[str] = Field(default_factory=list)
42
+ commands_requested: list[str] | None = None
43
+ diff_stats: DiffStats | None = None
44
+ diff_content: str | None = None # Unified diff for pattern bans (from_git sets when paths_changed)
45
+
46
+
47
+ def from_plan(plan: dict[str, Any]) -> ChangeSummary:
48
+ """Build ChangeSummary from an agent plan (paths, deps, commands). No git."""
49
+ paths = plan.get("paths_changed") or plan.get("paths") or []
50
+ deps_added = plan.get("deps_added") or []
51
+ deps_removed = plan.get("deps_removed") or []
52
+ commands = plan.get("commands_requested") or plan.get("commands")
53
+ if not isinstance(paths, list):
54
+ paths = [paths] if paths else []
55
+ if not isinstance(deps_added, list):
56
+ deps_added = [deps_added] if deps_added else []
57
+ if not isinstance(deps_removed, list):
58
+ deps_removed = [deps_removed] if deps_removed else []
59
+ if commands is not None and not isinstance(commands, list):
60
+ commands = [commands] if commands else []
61
+ return ChangeSummary(
62
+ paths_changed=[str(p) for p in paths],
63
+ deps_added=[str(d) for d in deps_added],
64
+ deps_removed=[str(d) for d in deps_removed],
65
+ commands_requested=commands,
66
+ )
67
+
68
+
69
+ def from_git(
70
+ repo_root: str | Path,
71
+ base_ref: str | None = None,
72
+ head_ref: str = "HEAD",
73
+ staged_only: bool = False,
74
+ ) -> ChangeSummary:
75
+ """Build ChangeSummary from git diff and dependency file changes."""
76
+ repo_root = Path(repo_root)
77
+ paths_changed = _git_changed_paths(repo_root, base_ref, head_ref, staged_only)
78
+ deps_added, deps_removed = _deps_diff(repo_root, base_ref, head_ref)
79
+ stats = _git_diff_stats(repo_root, base_ref, head_ref, staged_only)
80
+ diff_content = (
81
+ _git_diff_unified(repo_root, base_ref, head_ref, staged_only, paths_changed)
82
+ if paths_changed
83
+ else ""
84
+ )
85
+ commands = _commands_from_diff(
86
+ repo_root, base_ref, head_ref, staged_only, paths_changed, diff_text=diff_content or None
87
+ )
88
+ return ChangeSummary(
89
+ paths_changed=paths_changed,
90
+ deps_added=deps_added,
91
+ deps_removed=deps_removed,
92
+ commands_requested=commands if commands else None,
93
+ diff_stats=stats,
94
+ diff_content=diff_content or None,
95
+ )
96
+
97
+
98
+ def _run_git(repo_root: Path, *args: str) -> str:
99
+ cmd = ["git", "-C", str(repo_root), *args]
100
+ try:
101
+ return subprocess.run(cmd, capture_output=True, text=True, timeout=30).stdout.strip()
102
+ except (subprocess.SubprocessError, FileNotFoundError):
103
+ return ""
104
+
105
+
106
+ def _git_changed_paths(
107
+ repo_root: Path,
108
+ base_ref: str | None,
109
+ head_ref: str,
110
+ staged_only: bool,
111
+ ) -> list[str]:
112
+ if staged_only:
113
+ out = _run_git(repo_root, "diff", "--name-only", "--cached")
114
+ elif base_ref:
115
+ out = _run_git(repo_root, "diff", "--name-only", base_ref, head_ref)
116
+ else:
117
+ out = _run_git(repo_root, "diff", "--name-only", head_ref)
118
+ if not out:
119
+ return []
120
+ return [p.strip() for p in out.splitlines() if p.strip()]
121
+
122
+
123
+ def _path_matches_glob(path: str, pattern: str) -> bool:
124
+ """True if path matches glob pattern (supports **)."""
125
+ path = path.replace("\\", "/")
126
+ pattern = pattern.replace("\\", "/")
127
+ if "**" not in pattern:
128
+ return fnmatch.fnmatch(path, pattern)
129
+ parts = pattern.split("**")
130
+ if len(parts) == 1:
131
+ return fnmatch.fnmatch(path, pattern)
132
+ start = parts[0].rstrip("/")
133
+ end = parts[-1].lstrip("/")
134
+ if start and not path.startswith(start if start.endswith("/") else start + "/"):
135
+ if not path.startswith(start):
136
+ return False
137
+ if end:
138
+ # end may be "*.py" or "/*.py" - require path to match the suffix as fnmatch
139
+ if not fnmatch.fnmatch(path, "*" + end if end.startswith("/") else "*" + end):
140
+ return False
141
+ return True
142
+
143
+
144
+ def _is_command_sensitive_path(path: str) -> bool:
145
+ return any(_path_matches_glob(path, g) for g in COMMAND_SENSITIVE_GLOBS)
146
+
147
+
148
+ def _git_diff_unified(
149
+ repo_root: Path,
150
+ base_ref: str | None,
151
+ head_ref: str,
152
+ staged_only: bool,
153
+ paths: list[str] | None = None,
154
+ ) -> str:
155
+ """Return unified diff for the given refs, optionally limited to paths."""
156
+ args = ["diff", "-U0"]
157
+ if staged_only:
158
+ args.append("--cached")
159
+ if base_ref:
160
+ args.extend([base_ref, head_ref])
161
+ else:
162
+ args.append(head_ref)
163
+ if paths:
164
+ args.append("--")
165
+ args.extend(paths)
166
+ return _run_git(repo_root, *args) or ""
167
+
168
+
169
+ def _extract_commands_from_line(line: str) -> list[str]:
170
+ """Heuristic: extract command-like strings (dbt, airflow, aws, gcloud, or quoted commands)."""
171
+ found: list[str] = []
172
+ # Strip leading + and spaces from diff line
173
+ line = line.strip()
174
+ if line.startswith("+"):
175
+ line = line[1:].strip()
176
+ # Known prefixes: capture token after prefix (e.g. dbt run --full-refresh)
177
+ for prefix in ("dbt ", "airflow ", "aws ", "gcloud "):
178
+ for m in re.finditer(re.escape(prefix) + r"([^\s\"']+(?:\s+[^\s\"']+)*)", line):
179
+ cmd = (prefix + m.group(1)).strip()
180
+ if len(cmd) < 200: # bound length
181
+ found.append(cmd)
182
+ # Quoted strings that look like commands (e.g. "dbt run --full-refresh", 'airflow dags backfill')
183
+ for quote in ('"', "'"):
184
+ pattern = quote + r"([^" + quote + r"]*?(?:dbt|airflow|aws|gcloud)[^" + quote + r"]*?)" + quote
185
+ for m in re.finditer(pattern, line):
186
+ cmd = m.group(1).strip()
187
+ if 0 < len(cmd) < 200:
188
+ found.append(cmd)
189
+ return found
190
+
191
+
192
+ def _commands_from_diff(
193
+ repo_root: Path,
194
+ base_ref: str | None,
195
+ head_ref: str,
196
+ staged_only: bool,
197
+ paths_changed: list[str],
198
+ diff_text: str | None = None,
199
+ ) -> list[str]:
200
+ """Extract command-like strings from unified diff of command-sensitive paths."""
201
+ sensitive = [p for p in paths_changed if _is_command_sensitive_path(p)]
202
+ if not sensitive:
203
+ return []
204
+ diff = diff_text or _git_diff_unified(repo_root, base_ref, head_ref, staged_only, sensitive)
205
+ if not diff or not diff.strip():
206
+ return []
207
+ commands: list[str] = []
208
+ current_file: str | None = None
209
+ for line in diff.splitlines():
210
+ if line.startswith("--- ") or line.startswith("+++ "):
211
+ if line.startswith("+++ "):
212
+ # +++ a/dags/foo.py -> dags/foo.py
213
+ current_file = line[4:].strip().lstrip("a/b/")
214
+ continue
215
+ if line.startswith("+") and current_file and _is_command_sensitive_path(current_file):
216
+ commands.extend(_extract_commands_from_line(line))
217
+ return list(dict.fromkeys(commands))
218
+
219
+
220
+ def _git_diff_stats(
221
+ repo_root: Path,
222
+ base_ref: str | None,
223
+ head_ref: str,
224
+ staged_only: bool,
225
+ ) -> DiffStats | None:
226
+ if staged_only:
227
+ out = _run_git(repo_root, "diff", "--cached", "--shortstat")
228
+ elif base_ref:
229
+ out = _run_git(repo_root, "diff", "--shortstat", base_ref, head_ref)
230
+ else:
231
+ out = _run_git(repo_root, "diff", "--shortstat", head_ref)
232
+ if not out:
233
+ return None
234
+ # " 2 files changed, 3 insertions(+), 1 deletion(-)"
235
+ m = re.search(r"(\d+)\s+files?\s+changed", out)
236
+ files = int(m.group(1)) if m else 0
237
+ m = re.search(r"(\d+)\s+insertion", out)
238
+ insertions = int(m.group(1)) if m else 0
239
+ m = re.search(r"(\d+)\s+deletion", out)
240
+ deletions = int(m.group(1)) if m else 0
241
+ return DiffStats(files=files, insertions=insertions, deletions=deletions)
242
+
243
+
244
+ def _deps_diff(repo_root: Path, base_ref: str | None, head_ref: str) -> tuple[list[str], list[str]]:
245
+ """Detect added/removed deps by comparing lock/requirements between refs."""
246
+ # Get file content at head and base
247
+ def read_at(ref: str, path: str) -> str:
248
+ if ref == "HEAD" or not ref:
249
+ p = repo_root / path
250
+ return p.read_text(encoding="utf-8", errors="ignore") if p.exists() else ""
251
+ return _run_git(repo_root, "show", f"{ref}:{path}") or ""
252
+
253
+ added: list[str] = []
254
+ removed: list[str] = []
255
+ base = base_ref or "HEAD^"
256
+
257
+ # Python: requirements.txt, pyproject.toml, poetry.lock, uv.lock
258
+ for path in ("requirements.txt", "requirements-base.txt", "pyproject.toml"):
259
+ head_content = read_at(head_ref, path)
260
+ base_content = read_at(base, path) if base_ref else ""
261
+ if not head_content and not base_content:
262
+ continue
263
+ head_deps = _parse_python_deps(head_content, path)
264
+ base_deps = _parse_python_deps(base_content, path)
265
+ added.extend(n for n in head_deps if n not in base_deps)
266
+ removed.extend(n for n in base_deps if n not in head_deps)
267
+
268
+ for path in ("poetry.lock", "uv.lock"):
269
+ head_content = read_at(head_ref, path)
270
+ base_content = read_at(base, path) if base_ref else ""
271
+ if not head_content and not base_content:
272
+ continue
273
+ head_deps = _parse_lock_toml_deps(head_content)
274
+ base_deps = _parse_lock_toml_deps(base_content)
275
+ added.extend(n for n in head_deps if n not in base_deps)
276
+ removed.extend(n for n in base_deps if n not in head_deps)
277
+
278
+ # Node: package.json (declared deps)
279
+ head_pkg = read_at(head_ref, "package.json")
280
+ base_pkg = read_at(base, "package.json") if base_ref else ""
281
+ if head_pkg or base_pkg:
282
+ head_deps = _parse_package_json_deps(head_pkg)
283
+ base_deps = _parse_package_json_deps(base_pkg)
284
+ added.extend(n for n in head_deps if n not in base_deps)
285
+ removed.extend(n for n in base_deps if n not in head_deps)
286
+
287
+ # Node: lockfiles (resolved deps)
288
+ for path in ("package-lock.json", "pnpm-lock.yaml", "yarn.lock"):
289
+ head_content = read_at(head_ref, path)
290
+ base_content = read_at(base, path) if base_ref else ""
291
+ if not head_content and not base_content:
292
+ continue
293
+ if path == "package-lock.json":
294
+ head_deps = _parse_package_lock_json(head_content)
295
+ base_deps = _parse_package_lock_json(base_content)
296
+ elif path == "pnpm-lock.yaml":
297
+ head_deps = _parse_pnpm_lock(head_content)
298
+ base_deps = _parse_pnpm_lock(base_content)
299
+ else:
300
+ head_deps = _parse_yarn_lock(head_content)
301
+ base_deps = _parse_yarn_lock(base_content)
302
+ added.extend(n for n in head_deps if n not in base_deps)
303
+ removed.extend(n for n in base_deps if n not in head_deps)
304
+
305
+ return (list(dict.fromkeys(added)), list(dict.fromkeys(removed)))
306
+
307
+
308
+ def _parse_python_deps(content: str, path: str) -> list[str]:
309
+ """Extract package names from requirements.txt or pyproject.toml."""
310
+ names: list[str] = []
311
+ if "pyproject.toml" in path:
312
+ # [project] dependencies = ["foo>=1", "bar"]
313
+ in_deps = False
314
+ for line in content.splitlines():
315
+ if line.strip().startswith("dependencies") and "=" in line:
316
+ in_deps = True
317
+ rest = line.split("=", 1)[1].strip()
318
+ if rest.startswith("["):
319
+ for part in re.split(r"[\"'\s,]+", rest.strip("[]")):
320
+ if part and not part.startswith("{"):
321
+ names.append(_normalize_pypi_name(part))
322
+ continue
323
+ if in_deps and "]" in line:
324
+ break
325
+ if in_deps and line.strip():
326
+ names.append(_normalize_pypi_name(line.strip().strip('"').strip("'")))
327
+ return names
328
+ # requirements.txt: foo==1.0, foo[ext], -r other.txt
329
+ for line in content.splitlines():
330
+ line = line.split("#")[0].strip()
331
+ if not line or line.startswith("-") or line.startswith("["):
332
+ continue
333
+ name = re.split(r"[\s\[=<>]", line)[0]
334
+ if name:
335
+ names.append(_normalize_pypi_name(name))
336
+ return names
337
+
338
+
339
+ def _normalize_pypi_name(name: str) -> str:
340
+ return name.lower().replace("_", "-")
341
+
342
+
343
+ def _parse_lock_toml_deps(content: str) -> list[str]:
344
+ """Extract package names from poetry.lock or uv.lock (TOML with [[package]] or [package])."""
345
+ names: list[str] = []
346
+ # Match name = "pkg" or name = 'pkg' (Poetry/uv use this inside package sections)
347
+ for m in re.finditer(r'name\s*=\s*["\']([^"\']+)["\']', content):
348
+ names.append(_normalize_pypi_name(m.group(1)))
349
+ return names
350
+
351
+
352
+ def _parse_package_lock_json(content: str) -> list[str]:
353
+ """Extract dependency names from package-lock.json (v1: dependencies keys; v2+: packages keys)."""
354
+ if not content.strip():
355
+ return []
356
+ try:
357
+ data = json.loads(content)
358
+ names: list[str] = []
359
+ deps = data.get("dependencies") or {}
360
+ names.extend(str(k) for k in deps)
361
+ # v2+ lockfile has "packages" with keys like "node_modules/foo" or "node_modules/bar@1.0"
362
+ packages = data.get("packages") or {}
363
+ for key in packages:
364
+ if key == "":
365
+ continue
366
+ # "node_modules/name" or "node_modules/name@version"
367
+ part = key.replace("node_modules/", "").split("@")[0]
368
+ if part:
369
+ names.append(part)
370
+ return list(dict.fromkeys(names))
371
+ except Exception:
372
+ return []
373
+
374
+
375
+ def _parse_pnpm_lock(content: str) -> list[str]:
376
+ """Extract package names from pnpm-lock.yaml (packages: 'name@version': ...)."""
377
+ if not content.strip():
378
+ return []
379
+ try:
380
+ data = yaml.safe_load(content)
381
+ if not data or not isinstance(data, dict):
382
+ return []
383
+ names: list[str] = []
384
+ packages = data.get("packages") or {}
385
+ for key in packages:
386
+ # key can be "registry.npmjs.org/lodash@4.17.21" or "lodash@4.17.21"
387
+ part = key.split("/")[-1].split("@")[0]
388
+ if part and not part.startswith("."):
389
+ names.append(part)
390
+ return names
391
+ except Exception:
392
+ return []
393
+
394
+
395
+ def _parse_yarn_lock(content: str) -> list[str]:
396
+ """Extract package names from yarn.lock (optional "name@version": or name@version:)."""
397
+ names: list[str] = []
398
+ # Lines like: "lodash@4.17.21": or lodash@^4.0.0:
399
+ for m in re.finditer(r'^["]?([^@"\s]+)@', content, re.MULTILINE):
400
+ name = m.group(1)
401
+ if name and not name.startswith("http"):
402
+ names.append(name)
403
+ return list(dict.fromkeys(names))
404
+
405
+
406
+ def _parse_package_json_deps(content: str) -> list[str]:
407
+ """Extract dependency names from package.json (dependencies + devDependencies)."""
408
+ if not content.strip():
409
+ return []
410
+ try:
411
+ data = json.loads(content)
412
+ deps = list(data.get("dependencies") or {})
413
+ deps += list(data.get("devDependencies") or {})
414
+ return deps
415
+ except Exception:
416
+ return []