code-workflow-probe 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
code_workflow_probe.py
ADDED
|
@@ -0,0 +1,3221 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""code-workflow-probe: deterministic repo workflow profile syncer.
|
|
3
|
+
|
|
4
|
+
API:
|
|
5
|
+
sync(root=".", cache_path=None, changed_files=None, write=True, format="text", verbose=False, incremental=True, paths_only=False, progress=None)
|
|
6
|
+
sync_async(root=".", cache_path=None, changed_files=None, write=True, format="text", verbose=False, incremental=True, paths_only=False, progress=None, executor=None)
|
|
7
|
+
status(root=".", cache_path=None, format="text", verbose=False, detail="compact", limit=8, depth=2)
|
|
8
|
+
edit(root=".", changed_files=None, cache_path=None, format="text", verbose=False)
|
|
9
|
+
affected(root=".", changed_files=None, cache_path=None, format="text", verbose=False)
|
|
10
|
+
install_skill(tool="codex", skills_dir=None, dry_run=False, overwrite=True, format="text", verbose=False)
|
|
11
|
+
|
|
12
|
+
CLI:
|
|
13
|
+
python code_workflow_probe.py sync --root .
|
|
14
|
+
python code_workflow_probe.py status --root .
|
|
15
|
+
python code_workflow_probe.py edit --changed path/to/file
|
|
16
|
+
python code_workflow_probe.py affected --changed path/to/file
|
|
17
|
+
python code_workflow_probe.py install-skill
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import copy
|
|
24
|
+
import fnmatch
|
|
25
|
+
import hashlib
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import re
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
from concurrent.futures import Executor, Future, ThreadPoolExecutor
|
|
32
|
+
from collections import defaultdict
|
|
33
|
+
from datetime import datetime, timezone
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import tomllib
|
|
39
|
+
except ModuleNotFoundError: # pragma: no cover - Python < 3.11 fallback.
|
|
40
|
+
tomllib = None # type: ignore[assignment]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
VERSION = "0.1.4"
|
|
44
|
+
SCHEMA_VERSION = 1
|
|
45
|
+
DEFAULT_CACHE_NAME = ".code-workflow-probe.json"
|
|
46
|
+
SKILL_NAME = "code-workflow-probe"
|
|
47
|
+
DEFAULT_STATUS_LIMIT = 8
|
|
48
|
+
DEFAULT_STATUS_DEPTH = 2
|
|
49
|
+
STATUS_DETAILS = {"compact", "standard", "full"}
|
|
50
|
+
|
|
51
|
+
WORKFLOW_KINDS = ("install", "test", "lint", "format", "build", "dev")
|
|
52
|
+
STATUS_WORKFLOW_KIND_ORDER = ("test", "lint", "format", "build", "install", "dev")
|
|
53
|
+
|
|
54
|
+
IGNORED_DIRS = {
|
|
55
|
+
".git",
|
|
56
|
+
".hg",
|
|
57
|
+
".svn",
|
|
58
|
+
".idea",
|
|
59
|
+
".vscode",
|
|
60
|
+
"__pycache__",
|
|
61
|
+
".pytest_cache",
|
|
62
|
+
".mypy_cache",
|
|
63
|
+
".ruff_cache",
|
|
64
|
+
".tox",
|
|
65
|
+
".nox",
|
|
66
|
+
".venv",
|
|
67
|
+
"venv",
|
|
68
|
+
"env",
|
|
69
|
+
"node_modules",
|
|
70
|
+
"dist",
|
|
71
|
+
"build",
|
|
72
|
+
"target",
|
|
73
|
+
"vendor",
|
|
74
|
+
".gradle",
|
|
75
|
+
".next",
|
|
76
|
+
".turbo",
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
COMPONENT_MANIFESTS = {
|
|
80
|
+
"package.json",
|
|
81
|
+
"pyproject.toml",
|
|
82
|
+
"requirements.txt",
|
|
83
|
+
"requirements-dev.txt",
|
|
84
|
+
"setup.py",
|
|
85
|
+
"setup.cfg",
|
|
86
|
+
"Pipfile",
|
|
87
|
+
"go.mod",
|
|
88
|
+
"Cargo.toml",
|
|
89
|
+
"pom.xml",
|
|
90
|
+
"build.gradle",
|
|
91
|
+
"build.gradle.kts",
|
|
92
|
+
"Gemfile",
|
|
93
|
+
"composer.json",
|
|
94
|
+
"deno.json",
|
|
95
|
+
"deno.jsonc",
|
|
96
|
+
"Package.swift",
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
DOTNET_PROJECT_EXTENSIONS = {".csproj", ".fsproj", ".vbproj"}
|
|
100
|
+
DOTNET_SOLUTION_EXTENSIONS = {".sln", ".slnx"}
|
|
101
|
+
ADJACENT_PROFILE_FILE_NAMES = {
|
|
102
|
+
"package-lock.json",
|
|
103
|
+
"npm-shrinkwrap.json",
|
|
104
|
+
"yarn.lock",
|
|
105
|
+
"pnpm-lock.yaml",
|
|
106
|
+
"bun.lock",
|
|
107
|
+
"bun.lockb",
|
|
108
|
+
"uv.lock",
|
|
109
|
+
"poetry.lock",
|
|
110
|
+
"pdm.lock",
|
|
111
|
+
"Pipfile.lock",
|
|
112
|
+
"go.sum",
|
|
113
|
+
"Cargo.lock",
|
|
114
|
+
"Gemfile.lock",
|
|
115
|
+
".ruby-version",
|
|
116
|
+
"Rakefile",
|
|
117
|
+
".rubocop.yml",
|
|
118
|
+
".rubocop_todo.yml",
|
|
119
|
+
"composer.lock",
|
|
120
|
+
"phpunit.xml",
|
|
121
|
+
"phpunit.xml.dist",
|
|
122
|
+
"phpstan.neon",
|
|
123
|
+
"phpstan.neon.dist",
|
|
124
|
+
"phpcs.xml",
|
|
125
|
+
"phpcs.xml.dist",
|
|
126
|
+
".php-cs-fixer.php",
|
|
127
|
+
".php-cs-fixer.dist.php",
|
|
128
|
+
"pint.json",
|
|
129
|
+
"Package.resolved",
|
|
130
|
+
".swiftformat",
|
|
131
|
+
".swiftlint.yml",
|
|
132
|
+
"global.json",
|
|
133
|
+
"NuGet.config",
|
|
134
|
+
"Directory.Build.props",
|
|
135
|
+
"Directory.Build.targets",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
PROFILE_FILE_NAMES = COMPONENT_MANIFESTS | {
|
|
139
|
+
".gitignore",
|
|
140
|
+
*ADJACENT_PROFILE_FILE_NAMES,
|
|
141
|
+
"gradlew",
|
|
142
|
+
"gradlew.bat",
|
|
143
|
+
"Makefile",
|
|
144
|
+
"makefile",
|
|
145
|
+
"justfile",
|
|
146
|
+
"Justfile",
|
|
147
|
+
"Taskfile.yml",
|
|
148
|
+
"Taskfile.yaml",
|
|
149
|
+
"tsconfig.json",
|
|
150
|
+
"tsconfig.build.json",
|
|
151
|
+
"jsconfig.json",
|
|
152
|
+
"angular.json",
|
|
153
|
+
"eslint.config.js",
|
|
154
|
+
"eslint.config.mjs",
|
|
155
|
+
"eslint.config.cjs",
|
|
156
|
+
"eslint.config.ts",
|
|
157
|
+
".eslintrc",
|
|
158
|
+
".eslintrc.js",
|
|
159
|
+
".eslintrc.cjs",
|
|
160
|
+
".eslintrc.json",
|
|
161
|
+
".eslintrc.yml",
|
|
162
|
+
".eslintrc.yaml",
|
|
163
|
+
"prettier.config.js",
|
|
164
|
+
"prettier.config.mjs",
|
|
165
|
+
"prettier.config.cjs",
|
|
166
|
+
"prettier.config.ts",
|
|
167
|
+
".prettierrc",
|
|
168
|
+
".prettierrc.json",
|
|
169
|
+
".prettierrc.yml",
|
|
170
|
+
".prettierrc.yaml",
|
|
171
|
+
".prettierrc.js",
|
|
172
|
+
"vite.config.js",
|
|
173
|
+
"vite.config.mjs",
|
|
174
|
+
"vite.config.ts",
|
|
175
|
+
"next.config.js",
|
|
176
|
+
"next.config.mjs",
|
|
177
|
+
"next.config.ts",
|
|
178
|
+
"svelte.config.js",
|
|
179
|
+
"nuxt.config.js",
|
|
180
|
+
"nuxt.config.ts",
|
|
181
|
+
"tox.ini",
|
|
182
|
+
"noxfile.py",
|
|
183
|
+
"pytest.ini",
|
|
184
|
+
"ruff.toml",
|
|
185
|
+
".ruff.toml",
|
|
186
|
+
".flake8",
|
|
187
|
+
".pylintrc",
|
|
188
|
+
"mypy.ini",
|
|
189
|
+
".pre-commit-config.yaml",
|
|
190
|
+
".pre-commit-config.yml",
|
|
191
|
+
".golangci.yml",
|
|
192
|
+
".golangci.yaml",
|
|
193
|
+
"rustfmt.toml",
|
|
194
|
+
".rustfmt.toml",
|
|
195
|
+
".gitlab-ci.yml",
|
|
196
|
+
".gitlab-ci.yaml",
|
|
197
|
+
"Jenkinsfile",
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
SOURCE_EXTENSIONS = {
|
|
201
|
+
".py": "python",
|
|
202
|
+
".pyi": "python",
|
|
203
|
+
".js": "javascript",
|
|
204
|
+
".jsx": "javascript",
|
|
205
|
+
".mjs": "javascript",
|
|
206
|
+
".cjs": "javascript",
|
|
207
|
+
".ts": "typescript",
|
|
208
|
+
".tsx": "typescript",
|
|
209
|
+
".go": "go",
|
|
210
|
+
".rs": "rust",
|
|
211
|
+
".java": "java",
|
|
212
|
+
".kt": "kotlin",
|
|
213
|
+
".kts": "kotlin",
|
|
214
|
+
".rb": "ruby",
|
|
215
|
+
".php": "php",
|
|
216
|
+
".cs": "csharp",
|
|
217
|
+
".fs": "fsharp",
|
|
218
|
+
".vb": "visualbasic",
|
|
219
|
+
".swift": "swift",
|
|
220
|
+
".scala": "scala",
|
|
221
|
+
".clj": "clojure",
|
|
222
|
+
".ex": "elixir",
|
|
223
|
+
".exs": "elixir",
|
|
224
|
+
".erl": "erlang",
|
|
225
|
+
".hrl": "erlang",
|
|
226
|
+
".c": "c",
|
|
227
|
+
".h": "c",
|
|
228
|
+
".cc": "cpp",
|
|
229
|
+
".cpp": "cpp",
|
|
230
|
+
".cxx": "cpp",
|
|
231
|
+
".hpp": "cpp",
|
|
232
|
+
".hxx": "cpp",
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
DANGEROUS_WORDS = {
|
|
236
|
+
"clean",
|
|
237
|
+
"deploy",
|
|
238
|
+
"destroy",
|
|
239
|
+
"drop",
|
|
240
|
+
"migrate",
|
|
241
|
+
"publish",
|
|
242
|
+
"release",
|
|
243
|
+
"reset",
|
|
244
|
+
"rollback",
|
|
245
|
+
"terraform apply",
|
|
246
|
+
"kubectl delete",
|
|
247
|
+
"docker push",
|
|
248
|
+
"npm publish",
|
|
249
|
+
"rm -rf",
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def sync(
|
|
254
|
+
root: str | os.PathLike[str] = ".",
|
|
255
|
+
cache_path: str | os.PathLike[str] | None = None,
|
|
256
|
+
changed_files: Optional[Sequence[str]] = None,
|
|
257
|
+
write: bool = True,
|
|
258
|
+
format: str = "text",
|
|
259
|
+
verbose: bool = False,
|
|
260
|
+
incremental: bool = True,
|
|
261
|
+
paths_only: bool = False,
|
|
262
|
+
progress: Optional[Callable[[str], None]] = None,
|
|
263
|
+
) -> Dict[str, Any] | str:
|
|
264
|
+
"""Build an aligned workflow profile and optionally write it to cache."""
|
|
265
|
+
|
|
266
|
+
root_path = _resolve_root(root)
|
|
267
|
+
cache = _resolve_cache_path(root_path, cache_path)
|
|
268
|
+
normalized = _normalize_changed_files(root_path, changed_files or [])
|
|
269
|
+
_emit_progress(progress, "sync: start")
|
|
270
|
+
|
|
271
|
+
if incremental:
|
|
272
|
+
_emit_progress(progress, "sync: check cache")
|
|
273
|
+
cached = _load_json(cache)
|
|
274
|
+
reused = _try_incremental_sync(root_path, cache, cached, normalized)
|
|
275
|
+
if reused is not None:
|
|
276
|
+
_emit_progress(progress, "sync: reused cached profile")
|
|
277
|
+
if write:
|
|
278
|
+
_write_json(cache, reused)
|
|
279
|
+
_emit_progress(progress, "sync: wrote cache")
|
|
280
|
+
_emit_progress(progress, "sync: done")
|
|
281
|
+
return _format_result(reused, format, verbose=verbose)
|
|
282
|
+
|
|
283
|
+
if paths_only:
|
|
284
|
+
_emit_progress(progress, "sync: paths-only")
|
|
285
|
+
profile = _sync_paths_only(root_path, cache, _load_json(cache), normalized)
|
|
286
|
+
if write and profile.get("project") is not None:
|
|
287
|
+
_write_json(cache, profile)
|
|
288
|
+
_emit_progress(progress, "sync: wrote cache")
|
|
289
|
+
_emit_progress(progress, "sync: done")
|
|
290
|
+
return _format_result(profile, format, verbose=verbose)
|
|
291
|
+
|
|
292
|
+
_emit_progress(progress, "sync: scan repo")
|
|
293
|
+
builder = _ProfileBuilder(root_path, cache)
|
|
294
|
+
profile = builder.build(changed_files=normalized)
|
|
295
|
+
if write:
|
|
296
|
+
_write_json(cache, profile)
|
|
297
|
+
_emit_progress(progress, "sync: wrote cache")
|
|
298
|
+
_emit_progress(progress, "sync: done")
|
|
299
|
+
return _format_result(profile, format, verbose=verbose)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def sync_async(
|
|
303
|
+
root: str | os.PathLike[str] = ".",
|
|
304
|
+
cache_path: str | os.PathLike[str] | None = None,
|
|
305
|
+
changed_files: Optional[Sequence[str]] = None,
|
|
306
|
+
write: bool = True,
|
|
307
|
+
format: str = "text",
|
|
308
|
+
verbose: bool = False,
|
|
309
|
+
incremental: bool = True,
|
|
310
|
+
paths_only: bool = False,
|
|
311
|
+
progress: Optional[Callable[[str], None]] = None,
|
|
312
|
+
executor: Optional[Executor] = None,
|
|
313
|
+
) -> Future:
|
|
314
|
+
"""Run sync in a background thread and return a Future."""
|
|
315
|
+
|
|
316
|
+
kwargs = {
|
|
317
|
+
"root": root,
|
|
318
|
+
"cache_path": cache_path,
|
|
319
|
+
"changed_files": changed_files,
|
|
320
|
+
"write": write,
|
|
321
|
+
"format": format,
|
|
322
|
+
"verbose": verbose,
|
|
323
|
+
"incremental": incremental,
|
|
324
|
+
"paths_only": paths_only,
|
|
325
|
+
"progress": progress,
|
|
326
|
+
}
|
|
327
|
+
if executor is not None:
|
|
328
|
+
return executor.submit(sync, **kwargs)
|
|
329
|
+
|
|
330
|
+
local_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="code-workflow-probe-sync")
|
|
331
|
+
future = local_executor.submit(sync, **kwargs)
|
|
332
|
+
future.add_done_callback(lambda _: local_executor.shutdown(wait=False))
|
|
333
|
+
return future
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def status(
|
|
337
|
+
root: str | os.PathLike[str] = ".",
|
|
338
|
+
cache_path: str | os.PathLike[str] | None = None,
|
|
339
|
+
format: str = "text",
|
|
340
|
+
verbose: bool = False,
|
|
341
|
+
detail: str = "compact",
|
|
342
|
+
limit: int = DEFAULT_STATUS_LIMIT,
|
|
343
|
+
depth: int = DEFAULT_STATUS_DEPTH,
|
|
344
|
+
) -> Dict[str, Any] | str:
|
|
345
|
+
"""Return whether the cached profile is aligned with current repo files."""
|
|
346
|
+
|
|
347
|
+
root_path = _resolve_root(root)
|
|
348
|
+
cache = _resolve_cache_path(root_path, cache_path)
|
|
349
|
+
cached = _load_json(cache)
|
|
350
|
+
checked_at = _utc_now()
|
|
351
|
+
status_detail = _normalize_status_detail(detail, verbose)
|
|
352
|
+
status_limit = _normalize_limit(limit)
|
|
353
|
+
status_depth = _normalize_depth(depth)
|
|
354
|
+
|
|
355
|
+
if cached is None:
|
|
356
|
+
return _format_result({
|
|
357
|
+
"operation": "status",
|
|
358
|
+
"tool": "code-workflow-probe",
|
|
359
|
+
"schema_version": SCHEMA_VERSION,
|
|
360
|
+
"root": str(root_path),
|
|
361
|
+
"cache_path": str(cache),
|
|
362
|
+
"alignment": {
|
|
363
|
+
"aligned": False,
|
|
364
|
+
"reason": "cache_missing",
|
|
365
|
+
"checked_at": checked_at,
|
|
366
|
+
"stale_files": [],
|
|
367
|
+
"new_profile_files": [],
|
|
368
|
+
"removed_profile_files": [],
|
|
369
|
+
},
|
|
370
|
+
"profile": None,
|
|
371
|
+
"warnings": ["Run sync before using workflow conclusions."],
|
|
372
|
+
}, format, verbose=verbose, status_detail=status_detail, limit=status_limit, depth=status_depth)
|
|
373
|
+
|
|
374
|
+
stale = _compare_watch_state(root_path, cache, cached.get("watch", {}))
|
|
375
|
+
aligned = not stale["stale_files"] and not stale["new_profile_files"] and not stale["removed_profile_files"] and not stale["source_summary_changed"]
|
|
376
|
+
reason = "aligned" if aligned else "cache_stale"
|
|
377
|
+
warnings = [] if aligned else ["Cached profile is not aligned; run sync before using workflow conclusions."]
|
|
378
|
+
|
|
379
|
+
cached["alignment"] = {
|
|
380
|
+
"aligned": aligned,
|
|
381
|
+
"reason": reason,
|
|
382
|
+
"checked_at": checked_at,
|
|
383
|
+
"stale_files": stale["stale_files"],
|
|
384
|
+
"new_profile_files": stale["new_profile_files"],
|
|
385
|
+
"removed_profile_files": stale["removed_profile_files"],
|
|
386
|
+
"source_summary_changed": stale["source_summary_changed"],
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return _format_result({
|
|
390
|
+
"operation": "status",
|
|
391
|
+
"tool": "code-workflow-probe",
|
|
392
|
+
"schema_version": SCHEMA_VERSION,
|
|
393
|
+
"root": str(root_path),
|
|
394
|
+
"cache_path": str(cache),
|
|
395
|
+
"alignment": cached["alignment"],
|
|
396
|
+
"profile": cached if aligned else None,
|
|
397
|
+
"warnings": warnings,
|
|
398
|
+
}, format, verbose=verbose, status_detail=status_detail, limit=status_limit, depth=status_depth)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def edit(
|
|
402
|
+
root: str | os.PathLike[str] = ".",
|
|
403
|
+
changed_files: Optional[Sequence[str]] = None,
|
|
404
|
+
cache_path: str | os.PathLike[str] | None = None,
|
|
405
|
+
format: str = "text",
|
|
406
|
+
verbose: bool = False,
|
|
407
|
+
) -> Dict[str, Any] | str:
|
|
408
|
+
"""Edit hook: update profile when changed files invalidate it."""
|
|
409
|
+
|
|
410
|
+
root_path = _resolve_root(root)
|
|
411
|
+
cache = _resolve_cache_path(root_path, cache_path)
|
|
412
|
+
normalized = _normalize_changed_files(root_path, changed_files or [])
|
|
413
|
+
cached = _load_json(cache)
|
|
414
|
+
reused = _try_incremental_sync(root_path, cache, cached, normalized)
|
|
415
|
+
if reused is not None:
|
|
416
|
+
affected_result = _affected_from_profile(root_path, reused, normalized)
|
|
417
|
+
return _format_result({
|
|
418
|
+
"operation": "edit",
|
|
419
|
+
"tool": "code-workflow-probe",
|
|
420
|
+
"schema_version": SCHEMA_VERSION,
|
|
421
|
+
"root": str(root_path),
|
|
422
|
+
"cache_path": str(cache),
|
|
423
|
+
"changed_files": normalized,
|
|
424
|
+
"profile_updated": False,
|
|
425
|
+
"alignment": reused["alignment"],
|
|
426
|
+
"affected": affected_result["affected"],
|
|
427
|
+
"suggested_workflows": affected_result["suggested_workflows"],
|
|
428
|
+
"profile": reused,
|
|
429
|
+
"warnings": affected_result["warnings"],
|
|
430
|
+
}, format, verbose=verbose)
|
|
431
|
+
|
|
432
|
+
current_status = status(root_path, cache, format="json")
|
|
433
|
+
profile_updated = False
|
|
434
|
+
|
|
435
|
+
if not current_status["alignment"]["aligned"]:
|
|
436
|
+
profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
|
|
437
|
+
profile_updated = True
|
|
438
|
+
elif any(_changed_file_affects_profile(path, current_status["profile"]) for path in normalized):
|
|
439
|
+
profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
|
|
440
|
+
profile_updated = True
|
|
441
|
+
else:
|
|
442
|
+
profile = current_status["profile"]
|
|
443
|
+
|
|
444
|
+
affected_result = _affected_from_profile(root_path, profile, normalized)
|
|
445
|
+
return _format_result({
|
|
446
|
+
"operation": "edit",
|
|
447
|
+
"tool": "code-workflow-probe",
|
|
448
|
+
"schema_version": SCHEMA_VERSION,
|
|
449
|
+
"root": str(root_path),
|
|
450
|
+
"cache_path": str(cache),
|
|
451
|
+
"changed_files": normalized,
|
|
452
|
+
"profile_updated": profile_updated,
|
|
453
|
+
"alignment": profile["alignment"],
|
|
454
|
+
"affected": affected_result["affected"],
|
|
455
|
+
"suggested_workflows": affected_result["suggested_workflows"],
|
|
456
|
+
"profile": profile,
|
|
457
|
+
"warnings": affected_result["warnings"],
|
|
458
|
+
}, format, verbose=verbose)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def affected(
|
|
462
|
+
root: str | os.PathLike[str] = ".",
|
|
463
|
+
changed_files: Optional[Sequence[str]] = None,
|
|
464
|
+
cache_path: str | os.PathLike[str] | None = None,
|
|
465
|
+
format: str = "text",
|
|
466
|
+
verbose: bool = False,
|
|
467
|
+
) -> Dict[str, Any] | str:
|
|
468
|
+
"""Map changed files to components and relevant local workflows."""
|
|
469
|
+
|
|
470
|
+
root_path = _resolve_root(root)
|
|
471
|
+
cache = _resolve_cache_path(root_path, cache_path)
|
|
472
|
+
normalized = _normalize_changed_files(root_path, changed_files or [])
|
|
473
|
+
cached = _load_json(cache)
|
|
474
|
+
reused = _try_incremental_sync(root_path, cache, cached, normalized)
|
|
475
|
+
if reused is not None:
|
|
476
|
+
result = _affected_from_profile(root_path, reused, normalized)
|
|
477
|
+
return _format_result({
|
|
478
|
+
"operation": "affected",
|
|
479
|
+
"tool": "code-workflow-probe",
|
|
480
|
+
"schema_version": SCHEMA_VERSION,
|
|
481
|
+
"root": str(root_path),
|
|
482
|
+
"cache_path": str(cache),
|
|
483
|
+
"changed_files": normalized,
|
|
484
|
+
"alignment": reused["alignment"],
|
|
485
|
+
"affected": result["affected"],
|
|
486
|
+
"suggested_workflows": result["suggested_workflows"],
|
|
487
|
+
"warnings": result["warnings"],
|
|
488
|
+
}, format, verbose=verbose)
|
|
489
|
+
|
|
490
|
+
current_status = status(root_path, cache, format="json")
|
|
491
|
+
|
|
492
|
+
if current_status["alignment"]["aligned"]:
|
|
493
|
+
profile = current_status["profile"]
|
|
494
|
+
else:
|
|
495
|
+
profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
|
|
496
|
+
|
|
497
|
+
result = _affected_from_profile(root_path, profile, normalized)
|
|
498
|
+
return _format_result({
|
|
499
|
+
"operation": "affected",
|
|
500
|
+
"tool": "code-workflow-probe",
|
|
501
|
+
"schema_version": SCHEMA_VERSION,
|
|
502
|
+
"root": str(root_path),
|
|
503
|
+
"cache_path": str(cache),
|
|
504
|
+
"changed_files": normalized,
|
|
505
|
+
"alignment": profile["alignment"],
|
|
506
|
+
"affected": result["affected"],
|
|
507
|
+
"suggested_workflows": result["suggested_workflows"],
|
|
508
|
+
"warnings": result["warnings"],
|
|
509
|
+
}, format, verbose=verbose)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def install_skill(
|
|
513
|
+
tool: str = "codex",
|
|
514
|
+
skills_dir: str | os.PathLike[str] | None = None,
|
|
515
|
+
dry_run: bool = False,
|
|
516
|
+
overwrite: bool = True,
|
|
517
|
+
format: str = "text",
|
|
518
|
+
verbose: bool = False,
|
|
519
|
+
) -> Dict[str, Any] | str:
|
|
520
|
+
"""Install a Codex skill that teaches agents to use code-workflow-probe."""
|
|
521
|
+
|
|
522
|
+
if tool != "codex":
|
|
523
|
+
raise ValueError("install_skill currently supports only tool='codex'")
|
|
524
|
+
|
|
525
|
+
base_dir = _resolve_codex_skills_dir(skills_dir)
|
|
526
|
+
skill_dir = base_dir / SKILL_NAME
|
|
527
|
+
skill_path = skill_dir / "SKILL.md"
|
|
528
|
+
content = _codex_skill_markdown()
|
|
529
|
+
exists = skill_path.exists()
|
|
530
|
+
|
|
531
|
+
warnings = []
|
|
532
|
+
installed = False
|
|
533
|
+
if exists and not overwrite:
|
|
534
|
+
warnings.append("Skill already exists and overwrite is disabled.")
|
|
535
|
+
elif not dry_run:
|
|
536
|
+
skill_dir.mkdir(parents=True, exist_ok=True)
|
|
537
|
+
skill_path.write_text(content, encoding="utf-8")
|
|
538
|
+
installed = True
|
|
539
|
+
|
|
540
|
+
result = {
|
|
541
|
+
"operation": "install-skill",
|
|
542
|
+
"tool": "code-workflow-probe",
|
|
543
|
+
"schema_version": SCHEMA_VERSION,
|
|
544
|
+
"target": "codex",
|
|
545
|
+
"skill_name": SKILL_NAME,
|
|
546
|
+
"skills_dir": str(base_dir),
|
|
547
|
+
"skill_path": str(skill_path),
|
|
548
|
+
"installed": installed,
|
|
549
|
+
"dry_run": dry_run,
|
|
550
|
+
"overwritten": installed and exists,
|
|
551
|
+
"content": content if dry_run else None,
|
|
552
|
+
"warnings": warnings,
|
|
553
|
+
}
|
|
554
|
+
return _format_result(result, format, verbose=verbose)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
class _EvidenceStore:
|
|
558
|
+
def __init__(self, root: Path) -> None:
|
|
559
|
+
self.root = root
|
|
560
|
+
self._items: Dict[str, Dict[str, Any]] = {}
|
|
561
|
+
|
|
562
|
+
def add(self, rel_path: str, role: str) -> str:
|
|
563
|
+
rel = _clean_rel(rel_path)
|
|
564
|
+
if not rel:
|
|
565
|
+
return "."
|
|
566
|
+
path = self.root / rel
|
|
567
|
+
item = self._items.get(rel)
|
|
568
|
+
if item is None:
|
|
569
|
+
item = _fingerprint(path)
|
|
570
|
+
item["path"] = rel
|
|
571
|
+
item["roles"] = []
|
|
572
|
+
self._items[rel] = item
|
|
573
|
+
if role not in item["roles"]:
|
|
574
|
+
item["roles"].append(role)
|
|
575
|
+
item["roles"].sort()
|
|
576
|
+
return rel
|
|
577
|
+
|
|
578
|
+
def add_many(self, rel_paths: Iterable[str], role: str) -> List[str]:
|
|
579
|
+
return [self.add(path, role) for path in rel_paths]
|
|
580
|
+
|
|
581
|
+
def as_dict(self) -> Dict[str, Dict[str, Any]]:
|
|
582
|
+
return {path: dict(value) for path, value in sorted(self._items.items())}
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class _ProfileBuilder:
|
|
586
|
+
def __init__(
|
|
587
|
+
self,
|
|
588
|
+
root: Path,
|
|
589
|
+
cache_path: Path,
|
|
590
|
+
profile_files: Optional[Sequence[str]] = None,
|
|
591
|
+
allow_source_scan: bool = True,
|
|
592
|
+
) -> None:
|
|
593
|
+
self.root = root
|
|
594
|
+
self.cache_path = cache_path
|
|
595
|
+
self.profile_files = list(profile_files) if profile_files is not None else None
|
|
596
|
+
self.profile_file_set: Set[str] = set(self.profile_files or [])
|
|
597
|
+
self.allow_source_scan = allow_source_scan
|
|
598
|
+
self.ignore = _GitIgnore(root)
|
|
599
|
+
self.evidence = _EvidenceStore(root)
|
|
600
|
+
self.warnings: List[str] = []
|
|
601
|
+
|
|
602
|
+
def build(self, changed_files: Optional[Sequence[str]] = None) -> Dict[str, Any]:
|
|
603
|
+
profile_files = self.profile_files if self.profile_files is not None else _discover_profile_files(self.root, self.cache_path)
|
|
604
|
+
self.profile_files = list(profile_files)
|
|
605
|
+
self.profile_file_set = set(profile_files)
|
|
606
|
+
self.evidence.add_many(profile_files, "profile_watch")
|
|
607
|
+
source_summary = _empty_source_summary()
|
|
608
|
+
component_roots = self._component_roots(profile_files, source_summary)
|
|
609
|
+
if not component_roots and self.allow_source_scan:
|
|
610
|
+
source_summary = _source_summary(self.root)
|
|
611
|
+
component_roots = self._component_roots(profile_files, source_summary)
|
|
612
|
+
components = [self._build_component(path, component_roots) for path in component_roots]
|
|
613
|
+
if not source_summary["languages"]:
|
|
614
|
+
source_summary = _component_language_summary(components)
|
|
615
|
+
repo_workflows = self._repo_workflows(components)
|
|
616
|
+
ci_workflows = self._ci_workflows(profile_files)
|
|
617
|
+
technologies = _merge_facts(component.get("languages", []) + component.get("frameworks", []) for component in components)
|
|
618
|
+
package_managers = _merge_facts(
|
|
619
|
+
[component["package_manager"]] for component in components if component.get("package_manager")
|
|
620
|
+
)
|
|
621
|
+
project_type = _project_type(components)
|
|
622
|
+
watch_files = {
|
|
623
|
+
path: _fingerprint_with_rel(self.root, path)
|
|
624
|
+
for path in sorted(set(profile_files) | set(self.evidence.as_dict().keys()))
|
|
625
|
+
if path != _rel_to_root(self.root, self.cache_path)
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
profile = {
|
|
629
|
+
"schema_version": SCHEMA_VERSION,
|
|
630
|
+
"tool": "code-workflow-probe",
|
|
631
|
+
"version": VERSION,
|
|
632
|
+
"root": str(self.root),
|
|
633
|
+
"cache_path": str(self.cache_path),
|
|
634
|
+
"generated_at": _utc_now(),
|
|
635
|
+
"alignment": {
|
|
636
|
+
"aligned": True,
|
|
637
|
+
"reason": "synced",
|
|
638
|
+
"checked_at": _utc_now(),
|
|
639
|
+
"stale_files": [],
|
|
640
|
+
"new_profile_files": [],
|
|
641
|
+
"removed_profile_files": [],
|
|
642
|
+
"source_summary_changed": False,
|
|
643
|
+
},
|
|
644
|
+
"project": {
|
|
645
|
+
"type": project_type,
|
|
646
|
+
"components": components,
|
|
647
|
+
"technologies": technologies,
|
|
648
|
+
"package_managers": package_managers,
|
|
649
|
+
"repo_workflows": repo_workflows,
|
|
650
|
+
"ci_workflows": ci_workflows,
|
|
651
|
+
},
|
|
652
|
+
"evidence_files": self.evidence.as_dict(),
|
|
653
|
+
"watch": {
|
|
654
|
+
"files": watch_files,
|
|
655
|
+
"source_summary": source_summary,
|
|
656
|
+
},
|
|
657
|
+
"changed_files": _normalize_changed_files(self.root, changed_files or []),
|
|
658
|
+
"warnings": self.warnings,
|
|
659
|
+
}
|
|
660
|
+
return profile
|
|
661
|
+
|
|
662
|
+
def _component_roots(self, profile_files: Sequence[str], source_summary: Dict[str, Any]) -> List[str]:
|
|
663
|
+
roots: Set[str] = set()
|
|
664
|
+
for rel in profile_files:
|
|
665
|
+
if _is_component_manifest(rel):
|
|
666
|
+
roots.add(_dirname_rel(rel))
|
|
667
|
+
|
|
668
|
+
if not roots and source_summary["languages"]:
|
|
669
|
+
roots.add(".")
|
|
670
|
+
for sample in source_summary.get("samples", []):
|
|
671
|
+
self.evidence.add(sample, "source_language_sample")
|
|
672
|
+
|
|
673
|
+
if not roots and any(Path(path).name in {"Makefile", "makefile", "justfile", "Justfile"} for path in profile_files):
|
|
674
|
+
roots.add(".")
|
|
675
|
+
|
|
676
|
+
return sorted(roots, key=lambda item: (item.count("/"), item))
|
|
677
|
+
|
|
678
|
+
def _build_component(self, path: str, all_roots: Sequence[str]) -> Dict[str, Any]:
|
|
679
|
+
component_dir = self.root if path == "." else self.root / path
|
|
680
|
+
evidence: List[str] = []
|
|
681
|
+
languages: List[Dict[str, Any]] = []
|
|
682
|
+
frameworks: List[Dict[str, Any]] = []
|
|
683
|
+
workflows: List[Dict[str, Any]] = []
|
|
684
|
+
package_manager: Optional[Dict[str, Any]] = None
|
|
685
|
+
|
|
686
|
+
manifests = self._existing_names(path, COMPONENT_MANIFESTS)
|
|
687
|
+
for name in manifests:
|
|
688
|
+
evidence.append(self.evidence.add(_join_rel(path, name), "component_manifest"))
|
|
689
|
+
|
|
690
|
+
scope = _component_scope(path, all_roots)
|
|
691
|
+
|
|
692
|
+
if self._has_file(path, "package.json"):
|
|
693
|
+
js = self._javascript_component(path, scope, all_roots)
|
|
694
|
+
languages.extend(js["languages"])
|
|
695
|
+
frameworks.extend(js["frameworks"])
|
|
696
|
+
package_manager = js["package_manager"]
|
|
697
|
+
workflows.extend(js["workflows"])
|
|
698
|
+
|
|
699
|
+
if self._has_any(path, {"pyproject.toml", "requirements.txt", "requirements-dev.txt", "setup.py", "setup.cfg", "Pipfile"}):
|
|
700
|
+
py = self._python_component(path, scope)
|
|
701
|
+
languages.extend(py["languages"])
|
|
702
|
+
frameworks.extend(py["frameworks"])
|
|
703
|
+
package_manager = package_manager or py["package_manager"]
|
|
704
|
+
workflows.extend(py["workflows"])
|
|
705
|
+
|
|
706
|
+
if self._has_file(path, "go.mod"):
|
|
707
|
+
go = self._go_component(path, scope)
|
|
708
|
+
languages.extend(go["languages"])
|
|
709
|
+
package_manager = package_manager or go["package_manager"]
|
|
710
|
+
workflows.extend(go["workflows"])
|
|
711
|
+
|
|
712
|
+
if self._has_file(path, "Cargo.toml"):
|
|
713
|
+
rust = self._rust_component(path, scope)
|
|
714
|
+
languages.extend(rust["languages"])
|
|
715
|
+
package_manager = package_manager or rust["package_manager"]
|
|
716
|
+
workflows.extend(rust["workflows"])
|
|
717
|
+
|
|
718
|
+
if self._has_any(path, {"pom.xml", "build.gradle", "build.gradle.kts"}):
|
|
719
|
+
java = self._java_component(path, scope)
|
|
720
|
+
languages.extend(java["languages"])
|
|
721
|
+
package_manager = package_manager or java["package_manager"]
|
|
722
|
+
workflows.extend(java["workflows"])
|
|
723
|
+
|
|
724
|
+
if self._has_file(path, "Gemfile"):
|
|
725
|
+
ruby = self._ruby_component(path, scope)
|
|
726
|
+
languages.extend(ruby["languages"])
|
|
727
|
+
frameworks.extend(ruby["frameworks"])
|
|
728
|
+
package_manager = package_manager or ruby["package_manager"]
|
|
729
|
+
workflows.extend(ruby["workflows"])
|
|
730
|
+
|
|
731
|
+
if self._has_file(path, "composer.json"):
|
|
732
|
+
php = self._php_component(path, scope)
|
|
733
|
+
languages.extend(php["languages"])
|
|
734
|
+
frameworks.extend(php["frameworks"])
|
|
735
|
+
package_manager = package_manager or php["package_manager"]
|
|
736
|
+
workflows.extend(php["workflows"])
|
|
737
|
+
|
|
738
|
+
if self._has_any(path, {"deno.json", "deno.jsonc"}):
|
|
739
|
+
deno = self._deno_component(path, scope, all_roots)
|
|
740
|
+
languages.extend(deno["languages"])
|
|
741
|
+
frameworks.extend(deno["frameworks"])
|
|
742
|
+
package_manager = package_manager or deno["package_manager"]
|
|
743
|
+
workflows.extend(deno["workflows"])
|
|
744
|
+
|
|
745
|
+
if self._has_file(path, "Package.swift"):
|
|
746
|
+
swift = self._swift_component(path, scope)
|
|
747
|
+
languages.extend(swift["languages"])
|
|
748
|
+
package_manager = package_manager or swift["package_manager"]
|
|
749
|
+
workflows.extend(swift["workflows"])
|
|
750
|
+
|
|
751
|
+
dotnet_manifests = self._dotnet_manifest_files(path)
|
|
752
|
+
if dotnet_manifests:
|
|
753
|
+
dotnet = self._dotnet_component(path, scope, dotnet_manifests)
|
|
754
|
+
languages.extend(dotnet["languages"])
|
|
755
|
+
frameworks.extend(dotnet["frameworks"])
|
|
756
|
+
package_manager = package_manager or dotnet["package_manager"]
|
|
757
|
+
workflows.extend(dotnet["workflows"])
|
|
758
|
+
|
|
759
|
+
if not languages and self.allow_source_scan:
|
|
760
|
+
fallback = self._source_fallback_component(path, all_roots)
|
|
761
|
+
languages.extend(fallback["languages"])
|
|
762
|
+
evidence.extend(fallback["evidence"])
|
|
763
|
+
|
|
764
|
+
workflows.extend(self._task_runner_workflows(path, scope))
|
|
765
|
+
workflows = _dedupe_workflows(workflows)
|
|
766
|
+
component_type = _component_type(languages)
|
|
767
|
+
component_evidence = set(evidence)
|
|
768
|
+
for fact in languages + frameworks:
|
|
769
|
+
component_evidence.update(fact.get("evidence", []))
|
|
770
|
+
if package_manager:
|
|
771
|
+
component_evidence.update(package_manager.get("evidence", []))
|
|
772
|
+
for workflow in workflows:
|
|
773
|
+
component_evidence.update(workflow.get("evidence", []))
|
|
774
|
+
|
|
775
|
+
return {
|
|
776
|
+
"id": "root" if path == "." else path,
|
|
777
|
+
"path": path,
|
|
778
|
+
"type": component_type,
|
|
779
|
+
"languages": _dedupe_facts(languages),
|
|
780
|
+
"frameworks": _dedupe_facts(frameworks),
|
|
781
|
+
"package_manager": package_manager,
|
|
782
|
+
"workflows": workflows,
|
|
783
|
+
"evidence": sorted(component_evidence),
|
|
784
|
+
"warnings": [],
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
def _has_file(self, component_path: str, name: str) -> bool:
|
|
788
|
+
rel = _join_rel(component_path, name)
|
|
789
|
+
return rel in self.profile_file_set or _visible_file(self.root, self.ignore, rel)
|
|
790
|
+
|
|
791
|
+
def _existing_names(self, component_path: str, names: Iterable[str]) -> List[str]:
|
|
792
|
+
return sorted(name for name in names if self._has_file(component_path, name))
|
|
793
|
+
|
|
794
|
+
def _has_any(self, component_path: str, names: Iterable[str]) -> bool:
|
|
795
|
+
return any(self._has_file(component_path, name) for name in names)
|
|
796
|
+
|
|
797
|
+
def _javascript_component(self, path: str, scope: str, all_roots: Sequence[str]) -> Dict[str, Any]:
|
|
798
|
+
rel_package = _join_rel(path, "package.json")
|
|
799
|
+
package_path = self.root / rel_package
|
|
800
|
+
package = _load_json(package_path) or {}
|
|
801
|
+
evidence = [self.evidence.add(rel_package, "javascript_manifest")]
|
|
802
|
+
dependencies = _package_dependencies(package)
|
|
803
|
+
ts_evidence = self._typescript_evidence(path, all_roots, dependencies, rel_package)
|
|
804
|
+
language = "typescript" if ts_evidence else "javascript"
|
|
805
|
+
language_evidence = evidence + ts_evidence
|
|
806
|
+
languages = [_fact(language, 0.9, language_evidence, "package.json and TypeScript evidence" if ts_evidence else "package.json")]
|
|
807
|
+
frameworks = [_fact(name, 0.8, evidence, "package dependency") for name in _js_frameworks(dependencies)]
|
|
808
|
+
pm = self._js_package_manager(path, package)
|
|
809
|
+
workflows = [self._workflow("install", _js_install_command(pm), path, scope, evidence + pm["evidence"], pm["confidence"], "local", True)]
|
|
810
|
+
|
|
811
|
+
scripts = package.get("scripts", {}) if isinstance(package.get("scripts"), dict) else {}
|
|
812
|
+
for kind in WORKFLOW_KINDS:
|
|
813
|
+
if kind not in scripts:
|
|
814
|
+
continue
|
|
815
|
+
command = _js_script_command(pm["name"], kind)
|
|
816
|
+
script_text = str(scripts.get(kind, ""))
|
|
817
|
+
risk = _risk_for_command(kind, script_text)
|
|
818
|
+
workflows.append(
|
|
819
|
+
self._workflow(
|
|
820
|
+
kind,
|
|
821
|
+
command,
|
|
822
|
+
path,
|
|
823
|
+
scope,
|
|
824
|
+
evidence,
|
|
825
|
+
"high",
|
|
826
|
+
"local",
|
|
827
|
+
recommended=True,
|
|
828
|
+
risk=risk,
|
|
829
|
+
reason=f"package.json script '{kind}'",
|
|
830
|
+
command_preview=script_text,
|
|
831
|
+
)
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
return {
|
|
835
|
+
"languages": languages,
|
|
836
|
+
"frameworks": frameworks,
|
|
837
|
+
"package_manager": pm,
|
|
838
|
+
"workflows": workflows,
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
def _js_package_manager(self, path: str, package: Dict[str, Any]) -> Dict[str, Any]:
|
|
842
|
+
component_dir = self.root / path
|
|
843
|
+
candidates = [
|
|
844
|
+
("pnpm-lock.yaml", "pnpm"),
|
|
845
|
+
("yarn.lock", "yarn"),
|
|
846
|
+
("bun.lock", "bun"),
|
|
847
|
+
("bun.lockb", "bun"),
|
|
848
|
+
("package-lock.json", "npm"),
|
|
849
|
+
("npm-shrinkwrap.json", "npm"),
|
|
850
|
+
]
|
|
851
|
+
for filename, name in candidates:
|
|
852
|
+
if self._has_file(path, filename):
|
|
853
|
+
evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
|
|
854
|
+
evidence.append(self.evidence.add(_join_rel(path, filename), "package_manager_lockfile"))
|
|
855
|
+
return _package_manager(name, _pm_executable(name), 0.95, evidence)
|
|
856
|
+
|
|
857
|
+
package_manager = package.get("packageManager")
|
|
858
|
+
if isinstance(package_manager, str) and "@" in package_manager:
|
|
859
|
+
name = package_manager.split("@", 1)[0]
|
|
860
|
+
if name in {"npm", "pnpm", "yarn", "bun"}:
|
|
861
|
+
evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
|
|
862
|
+
return _package_manager(name, _pm_executable(name), 0.85, evidence)
|
|
863
|
+
|
|
864
|
+
evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
|
|
865
|
+
return _package_manager("npm", "npm", 0.6, evidence, warnings=["No JS lockfile or packageManager field; npm is only a candidate."])
|
|
866
|
+
|
|
867
|
+
def _python_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
868
|
+
component_dir = self.root / path
|
|
869
|
+
pyproject = _load_toml(component_dir / "pyproject.toml")
|
|
870
|
+
evidence = [
|
|
871
|
+
self.evidence.add(_join_rel(path, name), "python_manifest")
|
|
872
|
+
for name in ("pyproject.toml", "requirements.txt", "requirements-dev.txt", "setup.py", "setup.cfg", "Pipfile")
|
|
873
|
+
if self._has_file(path, name)
|
|
874
|
+
]
|
|
875
|
+
languages = [_fact("python", 0.9, evidence, "python manifest")]
|
|
876
|
+
visible_requirement_files = [name for name in ("requirements.txt", "requirements-dev.txt") if self._has_file(path, name)]
|
|
877
|
+
frameworks = [
|
|
878
|
+
_fact(name, 0.8, evidence, "python dependency")
|
|
879
|
+
for name in _python_frameworks(component_dir, pyproject, visible_requirement_files)
|
|
880
|
+
]
|
|
881
|
+
pm = self._python_package_manager(path, pyproject)
|
|
882
|
+
workflows: List[Dict[str, Any]] = []
|
|
883
|
+
|
|
884
|
+
install = _python_install_command(pm)
|
|
885
|
+
if install:
|
|
886
|
+
workflows.append(self._workflow("install", install, path, scope, pm["evidence"], pm["confidence"], "local", True))
|
|
887
|
+
|
|
888
|
+
pytest_evidence = self._pytest_evidence(path, pyproject)
|
|
889
|
+
if pytest_evidence:
|
|
890
|
+
workflows.append(
|
|
891
|
+
self._workflow(
|
|
892
|
+
"test",
|
|
893
|
+
"python -m pytest",
|
|
894
|
+
path,
|
|
895
|
+
scope,
|
|
896
|
+
pytest_evidence,
|
|
897
|
+
"high",
|
|
898
|
+
"local",
|
|
899
|
+
recommended=True,
|
|
900
|
+
)
|
|
901
|
+
)
|
|
902
|
+
elif _has_test_sample(self.root, path, self.ignore):
|
|
903
|
+
samples = self.evidence.add_many(_test_samples(self.root, path, self.ignore), "python_test_sample")
|
|
904
|
+
workflows.append(
|
|
905
|
+
self._workflow(
|
|
906
|
+
"test",
|
|
907
|
+
"python -m pytest",
|
|
908
|
+
path,
|
|
909
|
+
scope,
|
|
910
|
+
samples,
|
|
911
|
+
"medium",
|
|
912
|
+
"local",
|
|
913
|
+
recommended=False,
|
|
914
|
+
reason="test files exist but no pytest configuration was found",
|
|
915
|
+
)
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
if self._has_file(path, "tox.ini"):
|
|
919
|
+
workflows.append(self._workflow("test", "tox", path, scope, [self.evidence.add(_join_rel(path, "tox.ini"), "test_runner")], "high", "local", True))
|
|
920
|
+
if self._has_file(path, "noxfile.py"):
|
|
921
|
+
workflows.append(self._workflow("test", "nox", path, scope, [self.evidence.add(_join_rel(path, "noxfile.py"), "test_runner")], "high", "local", True))
|
|
922
|
+
|
|
923
|
+
ruff = self._ruff_evidence(path, pyproject)
|
|
924
|
+
if ruff:
|
|
925
|
+
workflows.append(self._workflow("lint", "ruff check .", path, scope, ruff, "high", "local", True))
|
|
926
|
+
workflows.append(
|
|
927
|
+
self._workflow(
|
|
928
|
+
"format",
|
|
929
|
+
"ruff format .",
|
|
930
|
+
path,
|
|
931
|
+
scope,
|
|
932
|
+
ruff,
|
|
933
|
+
"medium",
|
|
934
|
+
"local",
|
|
935
|
+
recommended=False,
|
|
936
|
+
reason="ruff is configured; formatter availability may depend on ruff version",
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
black = self._black_evidence(path, pyproject)
|
|
941
|
+
if black:
|
|
942
|
+
workflows.append(self._workflow("format", "black .", path, scope, black, "high", "local", True))
|
|
943
|
+
|
|
944
|
+
if self._has_file(path, ".flake8") or (self._has_file(path, "setup.cfg") and _setup_cfg_has_section(component_dir / "setup.cfg", "flake8")):
|
|
945
|
+
flake8_evidence = [
|
|
946
|
+
self.evidence.add(_join_rel(path, name), "lint_config")
|
|
947
|
+
for name in (".flake8", "setup.cfg")
|
|
948
|
+
if self._has_file(path, name)
|
|
949
|
+
]
|
|
950
|
+
workflows.append(self._workflow("lint", "flake8 .", path, scope, flake8_evidence, "high", "local", True))
|
|
951
|
+
|
|
952
|
+
if evidence and self._has_file(path, "pyproject.toml"):
|
|
953
|
+
workflows.append(
|
|
954
|
+
self._workflow(
|
|
955
|
+
"build",
|
|
956
|
+
"python -m build",
|
|
957
|
+
path,
|
|
958
|
+
scope,
|
|
959
|
+
[self.evidence.add(_join_rel(path, "pyproject.toml"), "build_config")],
|
|
960
|
+
"medium",
|
|
961
|
+
"local",
|
|
962
|
+
recommended=False,
|
|
963
|
+
)
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
return {
|
|
967
|
+
"languages": languages,
|
|
968
|
+
"frameworks": frameworks,
|
|
969
|
+
"package_manager": pm,
|
|
970
|
+
"workflows": workflows,
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
def _typescript_evidence(
|
|
974
|
+
self,
|
|
975
|
+
path: str,
|
|
976
|
+
all_roots: Sequence[str],
|
|
977
|
+
dependencies: Set[str],
|
|
978
|
+
package_json: str,
|
|
979
|
+
) -> List[str]:
|
|
980
|
+
component_dir = self.root if path == "." else self.root / path
|
|
981
|
+
evidence = []
|
|
982
|
+
if "typescript" in dependencies:
|
|
983
|
+
evidence.append(self.evidence.add(package_json, "typescript_dependency"))
|
|
984
|
+
return evidence
|
|
985
|
+
|
|
986
|
+
for name in ("tsconfig.json", "tsconfig.build.json"):
|
|
987
|
+
if self._has_file(path, name):
|
|
988
|
+
evidence.append(self.evidence.add(_join_rel(path, name), "typescript_config"))
|
|
989
|
+
if evidence:
|
|
990
|
+
return evidence
|
|
991
|
+
if not self.allow_source_scan:
|
|
992
|
+
return []
|
|
993
|
+
|
|
994
|
+
ignored_roots = [
|
|
995
|
+
candidate
|
|
996
|
+
for candidate in all_roots
|
|
997
|
+
if candidate != "." and candidate != path and _is_under(candidate, path)
|
|
998
|
+
]
|
|
999
|
+
for file_path in _walk_files(component_dir, self.root, self.ignore):
|
|
1000
|
+
rel = _rel_to_root(self.root, file_path)
|
|
1001
|
+
if any(_is_under(rel, ignored) for ignored in ignored_roots):
|
|
1002
|
+
continue
|
|
1003
|
+
if file_path.suffix in {".ts", ".tsx"}:
|
|
1004
|
+
evidence.append(self.evidence.add(rel, "source_language_sample"))
|
|
1005
|
+
if len(evidence) >= 3:
|
|
1006
|
+
break
|
|
1007
|
+
return evidence
|
|
1008
|
+
|
|
1009
|
+
def _python_package_manager(self, path: str, pyproject: Dict[str, Any]) -> Dict[str, Any]:
|
|
1010
|
+
component_dir = self.root / path
|
|
1011
|
+
checks = [
|
|
1012
|
+
("uv.lock", "uv", "uv"),
|
|
1013
|
+
("poetry.lock", "poetry", "poetry"),
|
|
1014
|
+
("pdm.lock", "pdm", "pdm"),
|
|
1015
|
+
("Pipfile.lock", "pipenv", "pipenv"),
|
|
1016
|
+
("Pipfile", "pipenv", "pipenv"),
|
|
1017
|
+
]
|
|
1018
|
+
for filename, name, command in checks:
|
|
1019
|
+
if self._has_file(path, filename):
|
|
1020
|
+
evidence = [self.evidence.add(_join_rel(path, filename), "package_manager_lockfile")]
|
|
1021
|
+
if self._has_file(path, "pyproject.toml"):
|
|
1022
|
+
evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager"))
|
|
1023
|
+
return _package_manager(name, command, 0.95, evidence)
|
|
1024
|
+
|
|
1025
|
+
tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
|
|
1026
|
+
if isinstance(tool, dict):
|
|
1027
|
+
if "poetry" in tool:
|
|
1028
|
+
evidence = [self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager")]
|
|
1029
|
+
return _package_manager("poetry", "poetry", 0.9, evidence)
|
|
1030
|
+
if "pdm" in tool:
|
|
1031
|
+
evidence = [self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager")]
|
|
1032
|
+
return _package_manager("pdm", "pdm", 0.9, evidence)
|
|
1033
|
+
|
|
1034
|
+
if self._has_file(path, "requirements.txt") or self._has_file(path, "requirements-dev.txt"):
|
|
1035
|
+
evidence = [
|
|
1036
|
+
self.evidence.add(_join_rel(path, name), "package_manager")
|
|
1037
|
+
for name in ("requirements.txt", "requirements-dev.txt")
|
|
1038
|
+
if self._has_file(path, name)
|
|
1039
|
+
]
|
|
1040
|
+
return _package_manager("pip", "python -m pip", 0.85, evidence)
|
|
1041
|
+
|
|
1042
|
+
evidence = [
|
|
1043
|
+
self.evidence.add(_join_rel(path, name), "package_manager")
|
|
1044
|
+
for name in ("pyproject.toml", "setup.py", "setup.cfg")
|
|
1045
|
+
if self._has_file(path, name)
|
|
1046
|
+
]
|
|
1047
|
+
return _package_manager("pip", "python -m pip", 0.6, evidence, warnings=["No Python lockfile; pip workflow is a candidate."])
|
|
1048
|
+
|
|
1049
|
+
def _go_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1050
|
+
evidence = [self.evidence.add(_join_rel(path, "go.mod"), "go_manifest")]
|
|
1051
|
+
if self._has_file(path, "go.sum"):
|
|
1052
|
+
evidence.append(self.evidence.add(_join_rel(path, "go.sum"), "go_lockfile"))
|
|
1053
|
+
workflows = [
|
|
1054
|
+
self._workflow("install", "go mod download", path, scope, evidence, "high", "local", True),
|
|
1055
|
+
self._workflow("test", "go test ./...", path, scope, evidence, "high", "local", True),
|
|
1056
|
+
self._workflow("build", "go build ./...", path, scope, evidence, "medium", "local", False),
|
|
1057
|
+
]
|
|
1058
|
+
lint_config = _join_rel(path, ".golangci.yml")
|
|
1059
|
+
if _visible_file(self.root, self.ignore, lint_config):
|
|
1060
|
+
workflows.append(self._workflow("lint", "golangci-lint run", path, scope, [self.evidence.add(lint_config, "lint_config")], "high", "local", True))
|
|
1061
|
+
return {
|
|
1062
|
+
"languages": [_fact("go", 0.95, evidence, "go.mod")],
|
|
1063
|
+
"package_manager": _package_manager("go modules", "go", 0.95, evidence),
|
|
1064
|
+
"workflows": workflows,
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
def _rust_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1068
|
+
evidence = [self.evidence.add(_join_rel(path, "Cargo.toml"), "rust_manifest")]
|
|
1069
|
+
if self._has_file(path, "Cargo.lock"):
|
|
1070
|
+
evidence.append(self.evidence.add(_join_rel(path, "Cargo.lock"), "rust_lockfile"))
|
|
1071
|
+
return {
|
|
1072
|
+
"languages": [_fact("rust", 0.95, evidence, "Cargo.toml")],
|
|
1073
|
+
"package_manager": _package_manager("cargo", "cargo", 0.95, evidence),
|
|
1074
|
+
"workflows": [
|
|
1075
|
+
self._workflow("install", "cargo fetch", path, scope, evidence, "high", "local", True),
|
|
1076
|
+
self._workflow("test", "cargo test", path, scope, evidence, "high", "local", True),
|
|
1077
|
+
self._workflow("build", "cargo build", path, scope, evidence, "medium", "local", False),
|
|
1078
|
+
self._workflow("format", "cargo fmt", path, scope, evidence, "medium", "local", False),
|
|
1079
|
+
],
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
def _java_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1083
|
+
component_dir = self.root / path
|
|
1084
|
+
if self._has_file(path, "pom.xml"):
|
|
1085
|
+
evidence = [self.evidence.add(_join_rel(path, "pom.xml"), "java_manifest")]
|
|
1086
|
+
pm = _package_manager("maven", "mvn", 0.95, evidence)
|
|
1087
|
+
workflows = [
|
|
1088
|
+
self._workflow("test", "mvn test", path, scope, evidence, "high", "local", True),
|
|
1089
|
+
self._workflow("build", "mvn package", path, scope, evidence, "medium", "local", False),
|
|
1090
|
+
]
|
|
1091
|
+
else:
|
|
1092
|
+
build_file = "build.gradle" if self._has_file(path, "build.gradle") else "build.gradle.kts"
|
|
1093
|
+
evidence = [self.evidence.add(_join_rel(path, build_file), "java_manifest")]
|
|
1094
|
+
gradle = "./gradlew" if self._has_file(path, "gradlew") else "gradle"
|
|
1095
|
+
if self._has_file(path, "gradlew"):
|
|
1096
|
+
evidence.append(self.evidence.add(_join_rel(path, "gradlew"), "task_runner"))
|
|
1097
|
+
pm = _package_manager("gradle", gradle, 0.9, evidence)
|
|
1098
|
+
workflows = [
|
|
1099
|
+
self._workflow("test", f"{gradle} test", path, scope, evidence, "high", "local", True),
|
|
1100
|
+
self._workflow("build", f"{gradle} build", path, scope, evidence, "medium", "local", False),
|
|
1101
|
+
]
|
|
1102
|
+
return {
|
|
1103
|
+
"languages": [_fact("java", 0.85, evidence, "java build manifest")],
|
|
1104
|
+
"package_manager": pm,
|
|
1105
|
+
"workflows": workflows,
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
def _ruby_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1109
|
+
evidence = [self.evidence.add(_join_rel(path, "Gemfile"), "ruby_manifest")]
|
|
1110
|
+
if self._has_file(path, "Gemfile.lock"):
|
|
1111
|
+
evidence.append(self.evidence.add(_join_rel(path, "Gemfile.lock"), "ruby_lockfile"))
|
|
1112
|
+
gems = _ruby_gem_names(self.root / path / "Gemfile")
|
|
1113
|
+
frameworks = [_fact(name, 0.8, evidence, "Gemfile dependency") for name in _ruby_frameworks(gems)]
|
|
1114
|
+
pm = _package_manager("bundler", "bundle", 0.95 if self._has_file(path, "Gemfile.lock") else 0.9, evidence)
|
|
1115
|
+
workflows = [self._workflow("install", "bundle install", path, scope, evidence, pm["confidence"], "local", True)]
|
|
1116
|
+
|
|
1117
|
+
if {"rspec", "rspec-rails"} & gems:
|
|
1118
|
+
workflows.append(self._workflow("test", "bundle exec rspec", path, scope, evidence, "high", "local", True))
|
|
1119
|
+
|
|
1120
|
+
rubocop_evidence = list(evidence) if "rubocop" in gems else []
|
|
1121
|
+
for name in (".rubocop.yml", ".rubocop_todo.yml"):
|
|
1122
|
+
if self._has_file(path, name):
|
|
1123
|
+
rubocop_evidence.append(self.evidence.add(_join_rel(path, name), "lint_config"))
|
|
1124
|
+
if rubocop_evidence:
|
|
1125
|
+
workflows.append(self._workflow("lint", "bundle exec rubocop", path, scope, rubocop_evidence, "high", "local", True))
|
|
1126
|
+
workflows.append(
|
|
1127
|
+
self._workflow(
|
|
1128
|
+
"format",
|
|
1129
|
+
"bundle exec rubocop -A",
|
|
1130
|
+
path,
|
|
1131
|
+
scope,
|
|
1132
|
+
rubocop_evidence,
|
|
1133
|
+
"medium",
|
|
1134
|
+
"local",
|
|
1135
|
+
recommended=False,
|
|
1136
|
+
reason="rubocop autocorrect changes files and should be reviewed before running",
|
|
1137
|
+
)
|
|
1138
|
+
)
|
|
1139
|
+
|
|
1140
|
+
return {
|
|
1141
|
+
"languages": [_fact("ruby", 0.95, evidence, "Gemfile")],
|
|
1142
|
+
"frameworks": frameworks,
|
|
1143
|
+
"package_manager": pm,
|
|
1144
|
+
"workflows": workflows,
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
def _php_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1148
|
+
rel_composer = _join_rel(path, "composer.json")
|
|
1149
|
+
composer = _load_json(self.root / rel_composer) or {}
|
|
1150
|
+
evidence = [self.evidence.add(rel_composer, "php_manifest")]
|
|
1151
|
+
if self._has_file(path, "composer.lock"):
|
|
1152
|
+
evidence.append(self.evidence.add(_join_rel(path, "composer.lock"), "php_lockfile"))
|
|
1153
|
+
dependencies = _composer_dependencies(composer)
|
|
1154
|
+
frameworks = [_fact(name, 0.8, evidence, "composer dependency") for name in _php_frameworks(dependencies)]
|
|
1155
|
+
pm = _package_manager("composer", "composer", 0.95 if self._has_file(path, "composer.lock") else 0.9, evidence)
|
|
1156
|
+
workflows = [self._workflow("install", "composer install", path, scope, evidence, pm["confidence"], "local", True)]
|
|
1157
|
+
|
|
1158
|
+
scripts = composer.get("scripts", {}) if isinstance(composer.get("scripts"), dict) else {}
|
|
1159
|
+
for kind in WORKFLOW_KINDS:
|
|
1160
|
+
if kind not in scripts:
|
|
1161
|
+
continue
|
|
1162
|
+
script_value = scripts.get(kind)
|
|
1163
|
+
script_text = _script_preview(script_value)
|
|
1164
|
+
workflows.append(
|
|
1165
|
+
self._workflow(
|
|
1166
|
+
kind,
|
|
1167
|
+
f"composer {kind}",
|
|
1168
|
+
path,
|
|
1169
|
+
scope,
|
|
1170
|
+
evidence,
|
|
1171
|
+
"high",
|
|
1172
|
+
"local",
|
|
1173
|
+
recommended=True,
|
|
1174
|
+
risk=_risk_for_command(kind, script_text),
|
|
1175
|
+
reason=f"composer.json script '{kind}'",
|
|
1176
|
+
command_preview=script_text,
|
|
1177
|
+
)
|
|
1178
|
+
)
|
|
1179
|
+
|
|
1180
|
+
phpunit_evidence = [
|
|
1181
|
+
self.evidence.add(_join_rel(path, name), "test_config")
|
|
1182
|
+
for name in ("phpunit.xml", "phpunit.xml.dist")
|
|
1183
|
+
if self._has_file(path, name)
|
|
1184
|
+
]
|
|
1185
|
+
if phpunit_evidence:
|
|
1186
|
+
workflows.append(self._workflow("test", "vendor/bin/phpunit", path, scope, phpunit_evidence, "high", "local", True))
|
|
1187
|
+
|
|
1188
|
+
phpstan_evidence = [
|
|
1189
|
+
self.evidence.add(_join_rel(path, name), "lint_config")
|
|
1190
|
+
for name in ("phpstan.neon", "phpstan.neon.dist")
|
|
1191
|
+
if self._has_file(path, name)
|
|
1192
|
+
]
|
|
1193
|
+
if phpstan_evidence:
|
|
1194
|
+
workflows.append(self._workflow("lint", "vendor/bin/phpstan analyse", path, scope, phpstan_evidence, "high", "local", True))
|
|
1195
|
+
|
|
1196
|
+
phpcs_evidence = [
|
|
1197
|
+
self.evidence.add(_join_rel(path, name), "lint_config")
|
|
1198
|
+
for name in ("phpcs.xml", "phpcs.xml.dist")
|
|
1199
|
+
if self._has_file(path, name)
|
|
1200
|
+
]
|
|
1201
|
+
if phpcs_evidence:
|
|
1202
|
+
workflows.append(self._workflow("lint", "vendor/bin/phpcs", path, scope, phpcs_evidence, "high", "local", True))
|
|
1203
|
+
|
|
1204
|
+
pint_evidence = []
|
|
1205
|
+
if "laravel/pint" in dependencies:
|
|
1206
|
+
pint_evidence.extend(evidence)
|
|
1207
|
+
if self._has_file(path, "pint.json"):
|
|
1208
|
+
pint_evidence.append(self.evidence.add(_join_rel(path, "pint.json"), "format_config"))
|
|
1209
|
+
if pint_evidence:
|
|
1210
|
+
workflows.append(self._workflow("format", "vendor/bin/pint", path, scope, pint_evidence, "medium", "local", False))
|
|
1211
|
+
|
|
1212
|
+
fixer_evidence = [
|
|
1213
|
+
self.evidence.add(_join_rel(path, name), "format_config")
|
|
1214
|
+
for name in (".php-cs-fixer.php", ".php-cs-fixer.dist.php")
|
|
1215
|
+
if self._has_file(path, name)
|
|
1216
|
+
]
|
|
1217
|
+
if fixer_evidence:
|
|
1218
|
+
workflows.append(self._workflow("format", "vendor/bin/php-cs-fixer fix", path, scope, fixer_evidence, "medium", "local", False))
|
|
1219
|
+
|
|
1220
|
+
return {
|
|
1221
|
+
"languages": [_fact("php", 0.95, evidence, "composer.json")],
|
|
1222
|
+
"frameworks": frameworks,
|
|
1223
|
+
"package_manager": pm,
|
|
1224
|
+
"workflows": workflows,
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
def _deno_component(self, path: str, scope: str, all_roots: Sequence[str]) -> Dict[str, Any]:
|
|
1228
|
+
manifest_name = "deno.json" if self._has_file(path, "deno.json") else "deno.jsonc"
|
|
1229
|
+
rel_manifest = _join_rel(path, manifest_name)
|
|
1230
|
+
manifest = _load_json_or_jsonc(self.root / rel_manifest) or {}
|
|
1231
|
+
evidence = [self.evidence.add(rel_manifest, "deno_manifest")]
|
|
1232
|
+
languages = self._source_language_facts(path, all_roots, {".ts", ".tsx", ".js", ".jsx"})
|
|
1233
|
+
frameworks = [_fact("deno", 0.95, evidence, "deno manifest")]
|
|
1234
|
+
pm = _package_manager("deno", "deno", 0.95, evidence)
|
|
1235
|
+
workflows: List[Dict[str, Any]] = []
|
|
1236
|
+
tasks = manifest.get("tasks", {}) if isinstance(manifest.get("tasks"), dict) else {}
|
|
1237
|
+
task_names = {"format": ("format", "fmt")}
|
|
1238
|
+
for kind in WORKFLOW_KINDS:
|
|
1239
|
+
names = task_names.get(kind, (kind,))
|
|
1240
|
+
task_name = next((name for name in names if name in tasks), None)
|
|
1241
|
+
if not task_name:
|
|
1242
|
+
continue
|
|
1243
|
+
task_text = _script_preview(tasks.get(task_name))
|
|
1244
|
+
workflows.append(
|
|
1245
|
+
self._workflow(
|
|
1246
|
+
kind,
|
|
1247
|
+
f"deno task {task_name}",
|
|
1248
|
+
path,
|
|
1249
|
+
scope,
|
|
1250
|
+
evidence,
|
|
1251
|
+
"high",
|
|
1252
|
+
"local",
|
|
1253
|
+
recommended=True,
|
|
1254
|
+
risk=_risk_for_command(kind, task_text),
|
|
1255
|
+
reason=f"{manifest_name} task '{task_name}'",
|
|
1256
|
+
command_preview=task_text,
|
|
1257
|
+
)
|
|
1258
|
+
)
|
|
1259
|
+
return {
|
|
1260
|
+
"languages": languages,
|
|
1261
|
+
"frameworks": frameworks,
|
|
1262
|
+
"package_manager": pm,
|
|
1263
|
+
"workflows": workflows,
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
def _swift_component(self, path: str, scope: str) -> Dict[str, Any]:
|
|
1267
|
+
evidence = [self.evidence.add(_join_rel(path, "Package.swift"), "swift_manifest")]
|
|
1268
|
+
if self._has_file(path, "Package.resolved"):
|
|
1269
|
+
evidence.append(self.evidence.add(_join_rel(path, "Package.resolved"), "swift_lockfile"))
|
|
1270
|
+
workflows = [
|
|
1271
|
+
self._workflow("install", "swift package resolve", path, scope, evidence, "high", "local", True),
|
|
1272
|
+
self._workflow("test", "swift test", path, scope, evidence, "high", "local", True),
|
|
1273
|
+
self._workflow("build", "swift build", path, scope, evidence, "medium", "local", False),
|
|
1274
|
+
]
|
|
1275
|
+
if self._has_file(path, ".swiftlint.yml"):
|
|
1276
|
+
workflows.append(self._workflow("lint", "swiftlint", path, scope, [self.evidence.add(_join_rel(path, ".swiftlint.yml"), "lint_config")], "medium", "local", False))
|
|
1277
|
+
if self._has_file(path, ".swiftformat"):
|
|
1278
|
+
workflows.append(self._workflow("format", "swiftformat .", path, scope, [self.evidence.add(_join_rel(path, ".swiftformat"), "format_config")], "medium", "local", False))
|
|
1279
|
+
return {
|
|
1280
|
+
"languages": [_fact("swift", 0.95, evidence, "Package.swift")],
|
|
1281
|
+
"package_manager": _package_manager("swift package manager", "swift", 0.95, evidence),
|
|
1282
|
+
"workflows": workflows,
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
def _dotnet_component(self, path: str, scope: str, manifests: Sequence[str]) -> Dict[str, Any]:
|
|
1286
|
+
evidence = [self.evidence.add(rel, "dotnet_manifest") for rel in manifests]
|
|
1287
|
+
languages = _dotnet_languages_from_manifests(evidence)
|
|
1288
|
+
frameworks = [_fact("dotnet", 0.95, evidence, ".NET project or solution manifest")]
|
|
1289
|
+
pm = _package_manager("dotnet", "dotnet", 0.95, evidence)
|
|
1290
|
+
workflows = [
|
|
1291
|
+
self._workflow("install", "dotnet restore", path, scope, evidence, "high", "local", True),
|
|
1292
|
+
self._workflow("test", "dotnet test", path, scope, evidence, "high", "local", True),
|
|
1293
|
+
self._workflow("build", "dotnet build", path, scope, evidence, "medium", "local", False),
|
|
1294
|
+
self._workflow("format", "dotnet format", path, scope, evidence, "medium", "local", False),
|
|
1295
|
+
]
|
|
1296
|
+
return {
|
|
1297
|
+
"languages": languages,
|
|
1298
|
+
"frameworks": frameworks,
|
|
1299
|
+
"package_manager": pm,
|
|
1300
|
+
"workflows": workflows,
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
def _dotnet_manifest_files(self, path: str) -> List[str]:
|
|
1304
|
+
component_dir = self.root if path == "." else self.root / path
|
|
1305
|
+
if not component_dir.is_dir():
|
|
1306
|
+
return []
|
|
1307
|
+
manifests = []
|
|
1308
|
+
for child in component_dir.iterdir():
|
|
1309
|
+
if not child.is_file():
|
|
1310
|
+
continue
|
|
1311
|
+
rel = _rel_to_root(self.root, child)
|
|
1312
|
+
if _is_dotnet_manifest(rel) and _visible_file(self.root, self.ignore, rel):
|
|
1313
|
+
manifests.append(rel)
|
|
1314
|
+
return sorted(manifests)
|
|
1315
|
+
|
|
1316
|
+
def _source_language_facts(
|
|
1317
|
+
self,
|
|
1318
|
+
path: str,
|
|
1319
|
+
all_roots: Sequence[str],
|
|
1320
|
+
extensions: Set[str],
|
|
1321
|
+
) -> List[Dict[str, Any]]:
|
|
1322
|
+
if not self.allow_source_scan:
|
|
1323
|
+
return []
|
|
1324
|
+
component_dir = self.root if path == "." else self.root / path
|
|
1325
|
+
ignored_roots = [root for root in all_roots if root != "." and root != path and _is_under(root, path)]
|
|
1326
|
+
samples: Dict[str, List[str]] = defaultdict(list)
|
|
1327
|
+
for file_path in _walk_files(component_dir, self.root, self.ignore):
|
|
1328
|
+
rel = _rel_to_root(self.root, file_path)
|
|
1329
|
+
if any(_is_under(rel, ignored) for ignored in ignored_roots):
|
|
1330
|
+
continue
|
|
1331
|
+
if file_path.suffix not in extensions:
|
|
1332
|
+
continue
|
|
1333
|
+
language = SOURCE_EXTENSIONS.get(file_path.suffix)
|
|
1334
|
+
if language and len(samples[language]) < 3:
|
|
1335
|
+
samples[language].append(rel)
|
|
1336
|
+
facts = []
|
|
1337
|
+
for language, paths in sorted(samples.items()):
|
|
1338
|
+
facts.append(_fact(language, 0.85, self.evidence.add_many(paths, "source_language_sample"), "source file extension sample"))
|
|
1339
|
+
return facts
|
|
1340
|
+
|
|
1341
|
+
def _source_fallback_component(self, path: str, all_roots: Sequence[str]) -> Dict[str, Any]:
|
|
1342
|
+
component_dir = self.root if path == "." else self.root / path
|
|
1343
|
+
ignored_roots = [root for root in all_roots if root != "." and root != path and _is_under(root, path)]
|
|
1344
|
+
samples: Dict[str, List[str]] = defaultdict(list)
|
|
1345
|
+
for file_path in _walk_files(component_dir, self.root, self.ignore):
|
|
1346
|
+
rel = _rel_to_root(self.root, file_path)
|
|
1347
|
+
if any(_is_under(rel, ignored) for ignored in ignored_roots):
|
|
1348
|
+
continue
|
|
1349
|
+
language = SOURCE_EXTENSIONS.get(file_path.suffix)
|
|
1350
|
+
if language and len(samples[language]) < 3:
|
|
1351
|
+
samples[language].append(rel)
|
|
1352
|
+
|
|
1353
|
+
evidence: List[str] = []
|
|
1354
|
+
facts: List[Dict[str, Any]] = []
|
|
1355
|
+
for language, paths in sorted(samples.items()):
|
|
1356
|
+
added = self.evidence.add_many(paths, "source_language_sample")
|
|
1357
|
+
evidence.extend(added)
|
|
1358
|
+
facts.append(_fact(language, 0.55, added, "source file extension sample"))
|
|
1359
|
+
return {"languages": facts, "evidence": evidence}
|
|
1360
|
+
|
|
1361
|
+
def _task_runner_workflows(self, path: str, scope: str) -> List[Dict[str, Any]]:
|
|
1362
|
+
component_dir = self.root if path == "." else self.root / path
|
|
1363
|
+
workflows: List[Dict[str, Any]] = []
|
|
1364
|
+
task_files = [
|
|
1365
|
+
("Makefile", "make"),
|
|
1366
|
+
("makefile", "make"),
|
|
1367
|
+
("justfile", "just"),
|
|
1368
|
+
("Justfile", "just"),
|
|
1369
|
+
]
|
|
1370
|
+
for filename, runner in task_files:
|
|
1371
|
+
task_file = component_dir / filename
|
|
1372
|
+
if not self._has_file(path, filename):
|
|
1373
|
+
continue
|
|
1374
|
+
rel = self.evidence.add(_join_rel(path, filename), "task_runner")
|
|
1375
|
+
targets = _parse_task_targets(task_file, runner)
|
|
1376
|
+
for kind in WORKFLOW_KINDS:
|
|
1377
|
+
if kind not in targets:
|
|
1378
|
+
continue
|
|
1379
|
+
command = f"{runner} {kind}"
|
|
1380
|
+
recipe = targets[kind]
|
|
1381
|
+
workflows.append(
|
|
1382
|
+
self._workflow(
|
|
1383
|
+
kind,
|
|
1384
|
+
command,
|
|
1385
|
+
path,
|
|
1386
|
+
scope,
|
|
1387
|
+
[rel],
|
|
1388
|
+
"high",
|
|
1389
|
+
"local",
|
|
1390
|
+
recommended=True,
|
|
1391
|
+
risk=_risk_for_command(kind, recipe),
|
|
1392
|
+
reason=f"{filename} target '{kind}'",
|
|
1393
|
+
command_preview=recipe,
|
|
1394
|
+
)
|
|
1395
|
+
)
|
|
1396
|
+
return workflows
|
|
1397
|
+
|
|
1398
|
+
def _repo_workflows(self, components: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
1399
|
+
if len(components) <= 1:
|
|
1400
|
+
return []
|
|
1401
|
+
repo_workflows: List[Dict[str, Any]] = []
|
|
1402
|
+
for component in components:
|
|
1403
|
+
if component["path"] != ".":
|
|
1404
|
+
continue
|
|
1405
|
+
for workflow in component.get("workflows", []):
|
|
1406
|
+
workflow = dict(workflow)
|
|
1407
|
+
workflow["scope"] = "repo"
|
|
1408
|
+
workflow["component_id"] = None
|
|
1409
|
+
repo_workflows.append(workflow)
|
|
1410
|
+
return repo_workflows
|
|
1411
|
+
|
|
1412
|
+
def _ci_workflows(self, profile_files: Sequence[str]) -> List[Dict[str, Any]]:
|
|
1413
|
+
workflows: List[Dict[str, Any]] = []
|
|
1414
|
+
for rel in sorted(profile_files):
|
|
1415
|
+
if not _is_ci_file(rel):
|
|
1416
|
+
continue
|
|
1417
|
+
evidence = [self.evidence.add(rel, "ci_workflow")]
|
|
1418
|
+
commands = _extract_ci_commands(self.root / rel)
|
|
1419
|
+
if not commands:
|
|
1420
|
+
workflows.append(
|
|
1421
|
+
self._workflow(
|
|
1422
|
+
"ci",
|
|
1423
|
+
None,
|
|
1424
|
+
".",
|
|
1425
|
+
"repo",
|
|
1426
|
+
evidence,
|
|
1427
|
+
"medium",
|
|
1428
|
+
"ci",
|
|
1429
|
+
recommended=False,
|
|
1430
|
+
risk="high",
|
|
1431
|
+
ci_only=True,
|
|
1432
|
+
reason="CI workflow file found; commands were not statically extracted",
|
|
1433
|
+
)
|
|
1434
|
+
)
|
|
1435
|
+
for command in commands:
|
|
1436
|
+
workflows.append(
|
|
1437
|
+
self._workflow(
|
|
1438
|
+
_classify_workflow_kind(command),
|
|
1439
|
+
command,
|
|
1440
|
+
".",
|
|
1441
|
+
"repo",
|
|
1442
|
+
evidence,
|
|
1443
|
+
"medium",
|
|
1444
|
+
"ci",
|
|
1445
|
+
recommended=False,
|
|
1446
|
+
risk=_risk_for_command("ci", command),
|
|
1447
|
+
ci_only=True,
|
|
1448
|
+
reason="command extracted from CI workflow; not a local workflow recommendation",
|
|
1449
|
+
command_preview=command,
|
|
1450
|
+
)
|
|
1451
|
+
)
|
|
1452
|
+
return workflows
|
|
1453
|
+
|
|
1454
|
+
def _pytest_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
|
|
1455
|
+
component_dir = self.root / path
|
|
1456
|
+
evidence = []
|
|
1457
|
+
if self._has_file(path, "pytest.ini"):
|
|
1458
|
+
evidence.append(self.evidence.add(_join_rel(path, "pytest.ini"), "test_config"))
|
|
1459
|
+
if self._has_file(path, "setup.cfg") and _setup_cfg_has_section(component_dir / "setup.cfg", "tool:pytest"):
|
|
1460
|
+
evidence.append(self.evidence.add(_join_rel(path, "setup.cfg"), "test_config"))
|
|
1461
|
+
tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
|
|
1462
|
+
if isinstance(tool, dict) and "pytest" in tool and self._has_file(path, "pyproject.toml"):
|
|
1463
|
+
evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "test_config"))
|
|
1464
|
+
return evidence
|
|
1465
|
+
|
|
1466
|
+
def _ruff_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
|
|
1467
|
+
component_dir = self.root / path
|
|
1468
|
+
evidence = []
|
|
1469
|
+
for name in ("ruff.toml", ".ruff.toml"):
|
|
1470
|
+
if self._has_file(path, name):
|
|
1471
|
+
evidence.append(self.evidence.add(_join_rel(path, name), "lint_config"))
|
|
1472
|
+
tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
|
|
1473
|
+
if isinstance(tool, dict) and "ruff" in tool and self._has_file(path, "pyproject.toml"):
|
|
1474
|
+
evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "lint_config"))
|
|
1475
|
+
return evidence
|
|
1476
|
+
|
|
1477
|
+
def _black_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
|
|
1478
|
+
component_dir = self.root / path
|
|
1479
|
+
tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
|
|
1480
|
+
if isinstance(tool, dict) and "black" in tool and self._has_file(path, "pyproject.toml"):
|
|
1481
|
+
return [self.evidence.add(_join_rel(path, "pyproject.toml"), "format_config")]
|
|
1482
|
+
return []
|
|
1483
|
+
|
|
1484
|
+
def _workflow(
|
|
1485
|
+
self,
|
|
1486
|
+
kind: str,
|
|
1487
|
+
command: Optional[str],
|
|
1488
|
+
cwd: str,
|
|
1489
|
+
scope: str,
|
|
1490
|
+
evidence: Sequence[str],
|
|
1491
|
+
confidence: str,
|
|
1492
|
+
source: str,
|
|
1493
|
+
recommended: bool,
|
|
1494
|
+
risk: Optional[str] = None,
|
|
1495
|
+
ci_only: bool = False,
|
|
1496
|
+
reason: str = "",
|
|
1497
|
+
command_preview: Optional[str] = None,
|
|
1498
|
+
) -> Dict[str, Any]:
|
|
1499
|
+
risk_level = risk or _risk_for_command(kind, command or "")
|
|
1500
|
+
cwd_known = bool(cwd)
|
|
1501
|
+
candidate = not recommended
|
|
1502
|
+
safe_auto = (
|
|
1503
|
+
source == "local"
|
|
1504
|
+
and not ci_only
|
|
1505
|
+
and cwd_known
|
|
1506
|
+
and confidence == "high"
|
|
1507
|
+
and risk_level == "low"
|
|
1508
|
+
and recommended
|
|
1509
|
+
and kind in {"test", "lint"}
|
|
1510
|
+
)
|
|
1511
|
+
warnings = []
|
|
1512
|
+
if ci_only:
|
|
1513
|
+
warnings.append("CI-only workflow; do not execute as a local command without review.")
|
|
1514
|
+
if not cwd_known:
|
|
1515
|
+
warnings.append("cwd is not known; command is not recommended for execution.")
|
|
1516
|
+
if risk_level != "low":
|
|
1517
|
+
warnings.append("Workflow is not low risk; confirm before execution.")
|
|
1518
|
+
if confidence != "high":
|
|
1519
|
+
warnings.append("Workflow confidence is not high.")
|
|
1520
|
+
if candidate:
|
|
1521
|
+
warnings.append("Workflow is a candidate, not a recommendation.")
|
|
1522
|
+
|
|
1523
|
+
return {
|
|
1524
|
+
"kind": kind,
|
|
1525
|
+
"command": command,
|
|
1526
|
+
"cwd": cwd,
|
|
1527
|
+
"scope": scope,
|
|
1528
|
+
"source": source,
|
|
1529
|
+
"evidence": sorted(set(evidence)),
|
|
1530
|
+
"confidence": confidence,
|
|
1531
|
+
"confidence_score": _confidence_score(confidence),
|
|
1532
|
+
"risk": risk_level,
|
|
1533
|
+
"safe_auto": safe_auto,
|
|
1534
|
+
"candidate": candidate,
|
|
1535
|
+
"recommended": recommended,
|
|
1536
|
+
"needs_confirmation": not safe_auto,
|
|
1537
|
+
"ci_only": ci_only,
|
|
1538
|
+
"reason": reason,
|
|
1539
|
+
"command_preview": command_preview,
|
|
1540
|
+
"warnings": warnings,
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
|
|
1544
|
+
def _affected_from_profile(root: Path, profile: Dict[str, Any], changed_files: Sequence[str]) -> Dict[str, Any]:
|
|
1545
|
+
components = profile.get("project", {}).get("components", [])
|
|
1546
|
+
affected_items = []
|
|
1547
|
+
component_ids: Set[str] = set()
|
|
1548
|
+
suggested: Dict[str, Dict[str, Any]] = {}
|
|
1549
|
+
|
|
1550
|
+
for changed in changed_files:
|
|
1551
|
+
component = _match_component(components, changed)
|
|
1552
|
+
profile_affecting = _is_profile_file(changed) or changed in profile.get("watch", {}).get("files", {})
|
|
1553
|
+
item = {
|
|
1554
|
+
"file": changed,
|
|
1555
|
+
"component_id": component.get("id") if component else None,
|
|
1556
|
+
"component_path": component.get("path") if component else None,
|
|
1557
|
+
"profile_affecting": profile_affecting,
|
|
1558
|
+
"reason": "profile evidence/config file" if profile_affecting else "matched by component path",
|
|
1559
|
+
}
|
|
1560
|
+
affected_items.append(item)
|
|
1561
|
+
if component:
|
|
1562
|
+
component_ids.add(component["id"])
|
|
1563
|
+
for workflow in component.get("workflows", []):
|
|
1564
|
+
if workflow.get("source") != "local":
|
|
1565
|
+
continue
|
|
1566
|
+
if workflow.get("kind") not in {"test", "lint", "build"}:
|
|
1567
|
+
continue
|
|
1568
|
+
key = f"{component['id']}:{workflow['kind']}:{workflow.get('command')}"
|
|
1569
|
+
suggested[key] = dict(workflow, component_id=component["id"])
|
|
1570
|
+
|
|
1571
|
+
warnings = []
|
|
1572
|
+
if not profile.get("alignment", {}).get("aligned"):
|
|
1573
|
+
warnings.append("Profile is not aligned; suggested workflows must not be executed.")
|
|
1574
|
+
|
|
1575
|
+
return {
|
|
1576
|
+
"affected": {
|
|
1577
|
+
"components": sorted(component_ids),
|
|
1578
|
+
"files": affected_items,
|
|
1579
|
+
},
|
|
1580
|
+
"suggested_workflows": list(suggested.values()),
|
|
1581
|
+
"warnings": warnings,
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
|
|
1585
|
+
def _try_incremental_sync(
|
|
1586
|
+
root: Path,
|
|
1587
|
+
cache_path: Path,
|
|
1588
|
+
cached: Optional[Dict[str, Any]],
|
|
1589
|
+
changed_files: Sequence[str],
|
|
1590
|
+
) -> Optional[Dict[str, Any]]:
|
|
1591
|
+
if not cached or not changed_files:
|
|
1592
|
+
return None
|
|
1593
|
+
if cached.get("schema_version") != SCHEMA_VERSION or cached.get("root") != str(root):
|
|
1594
|
+
return None
|
|
1595
|
+
if not cached.get("alignment", {}).get("aligned"):
|
|
1596
|
+
return None
|
|
1597
|
+
|
|
1598
|
+
ignore = _GitIgnore(root)
|
|
1599
|
+
languages = set(cached.get("watch", {}).get("source_summary", {}).get("languages", []))
|
|
1600
|
+
watched = set(cached.get("watch", {}).get("files", {}))
|
|
1601
|
+
watched_profile_files = {rel for rel in watched if _is_profile_file(rel)}
|
|
1602
|
+
for changed in changed_files:
|
|
1603
|
+
if ignore.ignored(changed, is_dir=False):
|
|
1604
|
+
if _is_profile_file(changed) and (
|
|
1605
|
+
changed in watched
|
|
1606
|
+
or (_profile_file_exists(root, changed) and _is_adjacent_profile_evidence_file(changed, watched_profile_files))
|
|
1607
|
+
):
|
|
1608
|
+
return None
|
|
1609
|
+
continue
|
|
1610
|
+
if changed in watched or _is_profile_file(changed):
|
|
1611
|
+
return None
|
|
1612
|
+
language = SOURCE_EXTENSIONS.get(Path(changed).suffix)
|
|
1613
|
+
if language and language not in languages:
|
|
1614
|
+
return None
|
|
1615
|
+
|
|
1616
|
+
profile = dict(cached)
|
|
1617
|
+
profile["generated_at"] = _utc_now()
|
|
1618
|
+
profile["cache_path"] = str(cache_path)
|
|
1619
|
+
profile["changed_files"] = list(changed_files)
|
|
1620
|
+
profile["alignment"] = {
|
|
1621
|
+
"aligned": True,
|
|
1622
|
+
"reason": "incremental_reuse",
|
|
1623
|
+
"checked_at": _utc_now(),
|
|
1624
|
+
"stale_files": [],
|
|
1625
|
+
"new_profile_files": [],
|
|
1626
|
+
"removed_profile_files": [],
|
|
1627
|
+
"source_summary_changed": False,
|
|
1628
|
+
}
|
|
1629
|
+
return profile
|
|
1630
|
+
|
|
1631
|
+
|
|
1632
|
+
def _sync_paths_only(
|
|
1633
|
+
root: Path,
|
|
1634
|
+
cache_path: Path,
|
|
1635
|
+
cached: Optional[Dict[str, Any]],
|
|
1636
|
+
changed_files: Sequence[str],
|
|
1637
|
+
) -> Dict[str, Any]:
|
|
1638
|
+
if not cached:
|
|
1639
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "cache_missing_paths_only", "Path-only sync requires an existing cache.")
|
|
1640
|
+
if not changed_files:
|
|
1641
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "changed_files_required", "Path-only sync requires changed_files.")
|
|
1642
|
+
if cached.get("schema_version") != SCHEMA_VERSION or cached.get("root") != str(root):
|
|
1643
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "cache_incompatible", "Cached profile is not compatible with this repo.")
|
|
1644
|
+
if not cached.get("alignment", {}).get("aligned"):
|
|
1645
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "cache_not_aligned", "Path-only sync requires an aligned cache.")
|
|
1646
|
+
|
|
1647
|
+
reused = _try_incremental_sync(root, cache_path, cached, changed_files)
|
|
1648
|
+
if reused is not None:
|
|
1649
|
+
return reused
|
|
1650
|
+
|
|
1651
|
+
if ".gitignore" in changed_files:
|
|
1652
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "gitignore_changed_paths_only", "Path-only sync cannot safely apply .gitignore changes; run full sync.")
|
|
1653
|
+
|
|
1654
|
+
ignore = _GitIgnore(root)
|
|
1655
|
+
cached_profile_files = {rel for rel in cached.get("watch", {}).get("files", {}) if _is_profile_file(rel)}
|
|
1656
|
+
profile_files = {
|
|
1657
|
+
rel
|
|
1658
|
+
for rel in cached_profile_files
|
|
1659
|
+
if _visible_file(root, ignore, rel)
|
|
1660
|
+
or (_profile_file_exists(root, rel) and _is_adjacent_profile_evidence_file(rel, cached_profile_files))
|
|
1661
|
+
}
|
|
1662
|
+
profile_changed = False
|
|
1663
|
+
watched_non_profile_changed = False
|
|
1664
|
+
for changed in changed_files:
|
|
1665
|
+
language = SOURCE_EXTENSIONS.get(Path(changed).suffix)
|
|
1666
|
+
if language and language not in set(cached.get("watch", {}).get("source_summary", {}).get("languages", [])):
|
|
1667
|
+
return _paths_only_unavailable(root, cache_path, changed_files, "new_source_language_paths_only", "Changed files introduce a source language not present in cache; run full sync.")
|
|
1668
|
+
if _is_profile_file(changed):
|
|
1669
|
+
if changed in cached_profile_files and not _profile_file_exists(root, changed):
|
|
1670
|
+
profile_changed = True
|
|
1671
|
+
profile_files.discard(changed)
|
|
1672
|
+
continue
|
|
1673
|
+
visible = _visible_file(root, ignore, changed)
|
|
1674
|
+
adjacent_evidence = _profile_file_exists(root, changed) and _is_adjacent_profile_evidence_file(changed, profile_files | cached_profile_files)
|
|
1675
|
+
if not visible and not adjacent_evidence:
|
|
1676
|
+
continue
|
|
1677
|
+
profile_changed = True
|
|
1678
|
+
if visible or adjacent_evidence:
|
|
1679
|
+
profile_files.add(changed)
|
|
1680
|
+
else:
|
|
1681
|
+
profile_files.discard(changed)
|
|
1682
|
+
elif changed in cached.get("watch", {}).get("files", {}):
|
|
1683
|
+
watched_non_profile_changed = True
|
|
1684
|
+
|
|
1685
|
+
if not profile_changed and watched_non_profile_changed:
|
|
1686
|
+
profile = copy.deepcopy(cached)
|
|
1687
|
+
_refresh_changed_fingerprints(root, profile, changed_files)
|
|
1688
|
+
_mark_synced(profile, cache_path, changed_files, "paths_only_synced")
|
|
1689
|
+
return profile
|
|
1690
|
+
|
|
1691
|
+
builder = _ProfileBuilder(root, cache_path, profile_files=sorted(profile_files), allow_source_scan=False)
|
|
1692
|
+
profile = builder.build(changed_files=changed_files)
|
|
1693
|
+
profile["alignment"]["reason"] = "paths_only_synced"
|
|
1694
|
+
return profile
|
|
1695
|
+
|
|
1696
|
+
|
|
1697
|
+
def _refresh_changed_fingerprints(root: Path, profile: Dict[str, Any], changed_files: Sequence[str]) -> None:
|
|
1698
|
+
for rel in changed_files:
|
|
1699
|
+
if rel not in profile.get("watch", {}).get("files", {}) and rel not in profile.get("evidence_files", {}):
|
|
1700
|
+
continue
|
|
1701
|
+
if not _profile_file_exists(root, rel):
|
|
1702
|
+
profile.get("watch", {}).get("files", {}).pop(rel, None)
|
|
1703
|
+
profile.get("evidence_files", {}).pop(rel, None)
|
|
1704
|
+
continue
|
|
1705
|
+
fingerprint = _fingerprint_with_rel(root, rel)
|
|
1706
|
+
if rel in profile.get("watch", {}).get("files", {}):
|
|
1707
|
+
profile["watch"]["files"][rel] = fingerprint
|
|
1708
|
+
if rel in profile.get("evidence_files", {}):
|
|
1709
|
+
roles = profile["evidence_files"][rel].get("roles", [])
|
|
1710
|
+
profile["evidence_files"][rel] = dict(fingerprint, roles=roles)
|
|
1711
|
+
|
|
1712
|
+
|
|
1713
|
+
def _mark_synced(profile: Dict[str, Any], cache_path: Path, changed_files: Sequence[str], reason: str) -> None:
|
|
1714
|
+
profile["generated_at"] = _utc_now()
|
|
1715
|
+
profile["cache_path"] = str(cache_path)
|
|
1716
|
+
profile["changed_files"] = list(changed_files)
|
|
1717
|
+
profile["alignment"] = {
|
|
1718
|
+
"aligned": True,
|
|
1719
|
+
"reason": reason,
|
|
1720
|
+
"checked_at": _utc_now(),
|
|
1721
|
+
"stale_files": [],
|
|
1722
|
+
"new_profile_files": [],
|
|
1723
|
+
"removed_profile_files": [],
|
|
1724
|
+
"source_summary_changed": False,
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
|
|
1728
|
+
def _paths_only_unavailable(
|
|
1729
|
+
root: Path,
|
|
1730
|
+
cache_path: Path,
|
|
1731
|
+
changed_files: Sequence[str],
|
|
1732
|
+
reason: str,
|
|
1733
|
+
warning: str,
|
|
1734
|
+
) -> Dict[str, Any]:
|
|
1735
|
+
return {
|
|
1736
|
+
"operation": "sync",
|
|
1737
|
+
"tool": "code-workflow-probe",
|
|
1738
|
+
"schema_version": SCHEMA_VERSION,
|
|
1739
|
+
"root": str(root),
|
|
1740
|
+
"cache_path": str(cache_path),
|
|
1741
|
+
"changed_files": list(changed_files),
|
|
1742
|
+
"alignment": {
|
|
1743
|
+
"aligned": False,
|
|
1744
|
+
"reason": reason,
|
|
1745
|
+
"checked_at": _utc_now(),
|
|
1746
|
+
"stale_files": [],
|
|
1747
|
+
"new_profile_files": [],
|
|
1748
|
+
"removed_profile_files": [],
|
|
1749
|
+
"source_summary_changed": False,
|
|
1750
|
+
},
|
|
1751
|
+
"profile": None,
|
|
1752
|
+
"warnings": [warning],
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
|
|
1756
|
+
def _compare_watch_state(root: Path, cache_path: Path, watch: Dict[str, Any]) -> Dict[str, Any]:
|
|
1757
|
+
cached_files = watch.get("files", {}) if isinstance(watch, dict) else {}
|
|
1758
|
+
current_profile_files = _discover_profile_files(root, cache_path)
|
|
1759
|
+
current_profile_file_set = set(current_profile_files)
|
|
1760
|
+
current_file_set = set(current_profile_files) | set(cached_files.keys())
|
|
1761
|
+
ignore = _GitIgnore(root)
|
|
1762
|
+
current_files = {
|
|
1763
|
+
rel: _fingerprint_with_rel(root, rel)
|
|
1764
|
+
for rel in sorted(current_file_set)
|
|
1765
|
+
if rel != _rel_to_root(root, cache_path)
|
|
1766
|
+
and (
|
|
1767
|
+
(rel in current_profile_file_set and _profile_file_exists(root, rel))
|
|
1768
|
+
or _visible_file(root, ignore, rel)
|
|
1769
|
+
)
|
|
1770
|
+
}
|
|
1771
|
+
cached_file_set = set(cached_files.keys())
|
|
1772
|
+
current_existing_set = set(current_files.keys())
|
|
1773
|
+
stale_files = []
|
|
1774
|
+
for rel in sorted(cached_file_set & current_existing_set):
|
|
1775
|
+
cached_fp = cached_files.get(rel, {})
|
|
1776
|
+
current_fp = current_files.get(rel, {})
|
|
1777
|
+
if cached_fp.get("sha256") != current_fp.get("sha256") or cached_fp.get("size") != current_fp.get("size"):
|
|
1778
|
+
stale_files.append(rel)
|
|
1779
|
+
return {
|
|
1780
|
+
"stale_files": stale_files,
|
|
1781
|
+
"new_profile_files": sorted(set(current_profile_files) - cached_file_set),
|
|
1782
|
+
"removed_profile_files": sorted(cached_file_set - current_existing_set),
|
|
1783
|
+
"source_summary_changed": False,
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
def _changed_file_affects_profile(path: str, profile: Dict[str, Any]) -> bool:
|
|
1788
|
+
return _is_profile_file(path) or path in profile.get("watch", {}).get("files", {})
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
def _discover_profile_files(root: Path, cache_path: Path) -> List[str]:
|
|
1792
|
+
cache_rel = _rel_to_root(root, cache_path)
|
|
1793
|
+
files: Set[str] = set()
|
|
1794
|
+
for file_path in _walk_files(root):
|
|
1795
|
+
rel = _rel_to_root(root, file_path)
|
|
1796
|
+
if rel == cache_rel:
|
|
1797
|
+
continue
|
|
1798
|
+
if _is_profile_file(rel):
|
|
1799
|
+
files.add(rel)
|
|
1800
|
+
files.update(_discover_adjacent_profile_files(root, cache_rel, files))
|
|
1801
|
+
return sorted(files)
|
|
1802
|
+
|
|
1803
|
+
|
|
1804
|
+
def _discover_adjacent_profile_files(root: Path, cache_rel: str, profile_files: Set[str]) -> Set[str]:
|
|
1805
|
+
files: Set[str] = set()
|
|
1806
|
+
component_dirs = {_dirname_rel(rel) for rel in profile_files if _is_component_manifest(rel)}
|
|
1807
|
+
for component_dir in component_dirs:
|
|
1808
|
+
base = root if component_dir == "." else root / component_dir
|
|
1809
|
+
if not base.is_dir() or _is_ignored_generated_dir(base, root):
|
|
1810
|
+
continue
|
|
1811
|
+
for name in ADJACENT_PROFILE_FILE_NAMES:
|
|
1812
|
+
rel = _join_rel(component_dir, name)
|
|
1813
|
+
if rel == cache_rel:
|
|
1814
|
+
continue
|
|
1815
|
+
path = root / rel
|
|
1816
|
+
if path.is_file():
|
|
1817
|
+
files.add(rel)
|
|
1818
|
+
for child in base.iterdir():
|
|
1819
|
+
if child.is_file():
|
|
1820
|
+
rel = _rel_to_root(root, child)
|
|
1821
|
+
if rel != cache_rel and _is_dotnet_manifest(rel):
|
|
1822
|
+
files.add(rel)
|
|
1823
|
+
return files
|
|
1824
|
+
|
|
1825
|
+
|
|
1826
|
+
def _is_adjacent_profile_evidence_file(rel_path: str, profile_files: Set[str]) -> bool:
|
|
1827
|
+
rel = _clean_rel(rel_path)
|
|
1828
|
+
if Path(rel).name not in ADJACENT_PROFILE_FILE_NAMES:
|
|
1829
|
+
return False
|
|
1830
|
+
component_dir = _dirname_rel(rel)
|
|
1831
|
+
return any(_dirname_rel(profile_file) == component_dir and _is_component_manifest(profile_file) for profile_file in profile_files)
|
|
1832
|
+
|
|
1833
|
+
|
|
1834
|
+
def _is_ignored_generated_dir(path: Path, root: Path) -> bool:
|
|
1835
|
+
rel = _rel_to_root(root, path)
|
|
1836
|
+
return any(part in IGNORED_DIRS for part in rel.split("/") if part)
|
|
1837
|
+
|
|
1838
|
+
|
|
1839
|
+
def _source_summary(root: Path) -> Dict[str, Any]:
|
|
1840
|
+
counts: Dict[str, int] = defaultdict(int)
|
|
1841
|
+
samples: Dict[str, List[str]] = defaultdict(list)
|
|
1842
|
+
for file_path in _walk_files(root):
|
|
1843
|
+
language = SOURCE_EXTENSIONS.get(file_path.suffix)
|
|
1844
|
+
if not language:
|
|
1845
|
+
continue
|
|
1846
|
+
rel = _rel_to_root(root, file_path)
|
|
1847
|
+
counts[language] += 1
|
|
1848
|
+
if len(samples[language]) < 3:
|
|
1849
|
+
samples[language].append(rel)
|
|
1850
|
+
return {
|
|
1851
|
+
"languages": sorted(counts.keys()),
|
|
1852
|
+
"language_counts": dict(sorted(counts.items())),
|
|
1853
|
+
"samples": [sample for _, values in sorted(samples.items()) for sample in values],
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
|
|
1857
|
+
def _empty_source_summary() -> Dict[str, Any]:
|
|
1858
|
+
return {"languages": [], "language_counts": {}, "samples": []}
|
|
1859
|
+
|
|
1860
|
+
|
|
1861
|
+
def _component_language_summary(components: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1862
|
+
languages = sorted(
|
|
1863
|
+
{
|
|
1864
|
+
language.get("name")
|
|
1865
|
+
for component in components
|
|
1866
|
+
for language in component.get("languages", [])
|
|
1867
|
+
if language.get("name")
|
|
1868
|
+
}
|
|
1869
|
+
)
|
|
1870
|
+
return {"languages": languages, "language_counts": {name: 0 for name in languages}, "samples": []}
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
class _GitIgnore:
|
|
1874
|
+
def __init__(self, root: Path) -> None:
|
|
1875
|
+
self.root = root
|
|
1876
|
+
self.rules = _load_gitignore_rules(root / ".gitignore")
|
|
1877
|
+
self.has_negation = any(rule["negated"] for rule in self.rules)
|
|
1878
|
+
|
|
1879
|
+
def ignored(self, rel_path: str, is_dir: bool) -> bool:
|
|
1880
|
+
rel = _clean_rel(rel_path)
|
|
1881
|
+
if rel == ".gitignore":
|
|
1882
|
+
return False
|
|
1883
|
+
ignored = False
|
|
1884
|
+
for rule in self.rules:
|
|
1885
|
+
if _gitignore_rule_matches(rule, rel, is_dir):
|
|
1886
|
+
ignored = not rule["negated"]
|
|
1887
|
+
return ignored
|
|
1888
|
+
|
|
1889
|
+
|
|
1890
|
+
def _load_gitignore_rules(path: Path) -> List[Dict[str, Any]]:
|
|
1891
|
+
text = _read_text(path)
|
|
1892
|
+
rules = []
|
|
1893
|
+
for raw_line in text.splitlines():
|
|
1894
|
+
line = raw_line.rstrip()
|
|
1895
|
+
if not line or line.startswith("#"):
|
|
1896
|
+
continue
|
|
1897
|
+
negated = line.startswith("!")
|
|
1898
|
+
if negated:
|
|
1899
|
+
line = line[1:]
|
|
1900
|
+
line = line.strip()
|
|
1901
|
+
if not line:
|
|
1902
|
+
continue
|
|
1903
|
+
anchored = line.startswith("/")
|
|
1904
|
+
if anchored:
|
|
1905
|
+
line = line.lstrip("/")
|
|
1906
|
+
directory_only = line.endswith("/")
|
|
1907
|
+
line = line.rstrip("/")
|
|
1908
|
+
if not line:
|
|
1909
|
+
continue
|
|
1910
|
+
rules.append(
|
|
1911
|
+
{
|
|
1912
|
+
"pattern": line,
|
|
1913
|
+
"negated": negated,
|
|
1914
|
+
"anchored": anchored,
|
|
1915
|
+
"directory_only": directory_only,
|
|
1916
|
+
"has_slash": "/" in line,
|
|
1917
|
+
}
|
|
1918
|
+
)
|
|
1919
|
+
return rules
|
|
1920
|
+
|
|
1921
|
+
|
|
1922
|
+
def _gitignore_rule_matches(rule: Dict[str, Any], rel_path: str, is_dir: bool) -> bool:
|
|
1923
|
+
pattern = rule["pattern"]
|
|
1924
|
+
if rule["directory_only"] and not is_dir:
|
|
1925
|
+
parent_parts = rel_path.split("/")[:-1]
|
|
1926
|
+
parents = ["/".join(parent_parts[:index]) for index in range(1, len(parent_parts) + 1)]
|
|
1927
|
+
return any(_gitignore_rule_matches(rule, parent, is_dir=True) for parent in parents)
|
|
1928
|
+
|
|
1929
|
+
if rule["anchored"] or rule["has_slash"]:
|
|
1930
|
+
return rel_path == pattern or fnmatch.fnmatchcase(rel_path, pattern)
|
|
1931
|
+
|
|
1932
|
+
parts = rel_path.split("/")
|
|
1933
|
+
if is_dir:
|
|
1934
|
+
return any(part == pattern or fnmatch.fnmatchcase(part, pattern) for part in parts)
|
|
1935
|
+
return fnmatch.fnmatchcase(parts[-1], pattern)
|
|
1936
|
+
|
|
1937
|
+
|
|
1938
|
+
def _walk_files(root: Path, repo_root: Optional[Path] = None, ignore: Optional[_GitIgnore] = None) -> Iterable[Path]:
|
|
1939
|
+
if not root.exists():
|
|
1940
|
+
return []
|
|
1941
|
+
repo = (repo_root or root).resolve()
|
|
1942
|
+
git_files = _git_visible_files(repo, root.resolve())
|
|
1943
|
+
if git_files is not None:
|
|
1944
|
+
for path in git_files:
|
|
1945
|
+
yield path
|
|
1946
|
+
return
|
|
1947
|
+
|
|
1948
|
+
matcher = ignore or _GitIgnore(repo)
|
|
1949
|
+
for current, dirs, files in os.walk(root):
|
|
1950
|
+
current_path = Path(current)
|
|
1951
|
+
kept_dirs = []
|
|
1952
|
+
for name in dirs:
|
|
1953
|
+
path = current_path / name
|
|
1954
|
+
rel = _rel_to_root(repo, path)
|
|
1955
|
+
if name in IGNORED_DIRS or (matcher.ignored(rel, is_dir=True) and not matcher.has_negation):
|
|
1956
|
+
continue
|
|
1957
|
+
kept_dirs.append(name)
|
|
1958
|
+
dirs[:] = kept_dirs
|
|
1959
|
+
for filename in files:
|
|
1960
|
+
path = current_path / filename
|
|
1961
|
+
rel = _rel_to_root(repo, path)
|
|
1962
|
+
if matcher.ignored(rel, is_dir=False):
|
|
1963
|
+
continue
|
|
1964
|
+
yield path
|
|
1965
|
+
|
|
1966
|
+
|
|
1967
|
+
def _git_visible_files(repo_root: Path, root: Path) -> Optional[List[Path]]:
|
|
1968
|
+
try:
|
|
1969
|
+
subprocess.run(
|
|
1970
|
+
["git", "-C", str(repo_root), "rev-parse", "--is-inside-work-tree"],
|
|
1971
|
+
check=True,
|
|
1972
|
+
capture_output=True,
|
|
1973
|
+
text=True,
|
|
1974
|
+
)
|
|
1975
|
+
except (OSError, subprocess.CalledProcessError):
|
|
1976
|
+
return None
|
|
1977
|
+
|
|
1978
|
+
args = ["git", "-C", str(repo_root), "ls-files", "--cached", "--others", "--exclude-standard", "-z", "--"]
|
|
1979
|
+
rel = _rel_to_root(repo_root, root)
|
|
1980
|
+
if rel != ".":
|
|
1981
|
+
args.append(rel)
|
|
1982
|
+
try:
|
|
1983
|
+
result = subprocess.run(args, check=True, capture_output=True)
|
|
1984
|
+
except (OSError, subprocess.CalledProcessError):
|
|
1985
|
+
return None
|
|
1986
|
+
|
|
1987
|
+
files = []
|
|
1988
|
+
for raw in result.stdout.split(b"\0"):
|
|
1989
|
+
if not raw:
|
|
1990
|
+
continue
|
|
1991
|
+
rel_path = raw.decode("utf-8", errors="replace")
|
|
1992
|
+
path = repo_root / rel_path
|
|
1993
|
+
if path.is_file():
|
|
1994
|
+
files.append(path)
|
|
1995
|
+
return files
|
|
1996
|
+
|
|
1997
|
+
|
|
1998
|
+
def _is_profile_file(rel_path: str) -> bool:
|
|
1999
|
+
rel = _clean_rel(rel_path)
|
|
2000
|
+
name = Path(rel).name
|
|
2001
|
+
if name in PROFILE_FILE_NAMES or _is_dotnet_manifest(rel):
|
|
2002
|
+
return True
|
|
2003
|
+
if rel.startswith(".github/workflows/") and Path(rel).suffix in {".yml", ".yaml"}:
|
|
2004
|
+
return True
|
|
2005
|
+
if rel == ".circleci/config.yml":
|
|
2006
|
+
return True
|
|
2007
|
+
return False
|
|
2008
|
+
|
|
2009
|
+
|
|
2010
|
+
def _is_component_manifest(rel_path: str) -> bool:
|
|
2011
|
+
rel = _clean_rel(rel_path)
|
|
2012
|
+
return Path(rel).name in COMPONENT_MANIFESTS or _is_dotnet_manifest(rel)
|
|
2013
|
+
|
|
2014
|
+
|
|
2015
|
+
def _is_dotnet_manifest(rel_path: str) -> bool:
|
|
2016
|
+
suffix = Path(_clean_rel(rel_path)).suffix
|
|
2017
|
+
return suffix in DOTNET_PROJECT_EXTENSIONS or suffix in DOTNET_SOLUTION_EXTENSIONS
|
|
2018
|
+
|
|
2019
|
+
|
|
2020
|
+
def _is_ci_file(rel_path: str) -> bool:
|
|
2021
|
+
rel = _clean_rel(rel_path)
|
|
2022
|
+
return (
|
|
2023
|
+
rel.startswith(".github/workflows/")
|
|
2024
|
+
or rel in {".gitlab-ci.yml", ".gitlab-ci.yaml", ".circleci/config.yml", "Jenkinsfile"}
|
|
2025
|
+
)
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
def _extract_ci_commands(path: Path) -> List[str]:
|
|
2029
|
+
text = _read_text(path)
|
|
2030
|
+
if not text:
|
|
2031
|
+
return []
|
|
2032
|
+
commands = []
|
|
2033
|
+
for raw_line in text.splitlines():
|
|
2034
|
+
line = raw_line.strip()
|
|
2035
|
+
if line.startswith("run:"):
|
|
2036
|
+
command = line.split(":", 1)[1].strip().strip("'\"")
|
|
2037
|
+
if command and command not in {"|", ">"}:
|
|
2038
|
+
commands.append(command)
|
|
2039
|
+
elif line.startswith("- run:"):
|
|
2040
|
+
command = line.split(":", 1)[1].strip().strip("'\"")
|
|
2041
|
+
if command and command not in {"|", ">"}:
|
|
2042
|
+
commands.append(command)
|
|
2043
|
+
elif re.match(r"^script:\s*.+", line):
|
|
2044
|
+
command = line.split(":", 1)[1].strip().strip("'\"")
|
|
2045
|
+
if command and command not in {"|", ">"}:
|
|
2046
|
+
commands.append(command)
|
|
2047
|
+
return commands[:50]
|
|
2048
|
+
|
|
2049
|
+
|
|
2050
|
+
def _parse_task_targets(path: Path, runner: str) -> Dict[str, str]:
|
|
2051
|
+
text = _read_text(path)
|
|
2052
|
+
if not text:
|
|
2053
|
+
return {}
|
|
2054
|
+
|
|
2055
|
+
targets: Dict[str, List[str]] = {}
|
|
2056
|
+
current: Optional[str] = None
|
|
2057
|
+
for raw_line in text.splitlines():
|
|
2058
|
+
line = raw_line.rstrip()
|
|
2059
|
+
if not line.strip() or line.lstrip().startswith("#"):
|
|
2060
|
+
continue
|
|
2061
|
+
target = _target_from_line(line, runner)
|
|
2062
|
+
if target:
|
|
2063
|
+
current = target if target in WORKFLOW_KINDS else None
|
|
2064
|
+
if current and current not in targets:
|
|
2065
|
+
targets[current] = []
|
|
2066
|
+
continue
|
|
2067
|
+
if current and (raw_line.startswith("\t") or raw_line.startswith(" ")):
|
|
2068
|
+
targets[current].append(line.strip())
|
|
2069
|
+
|
|
2070
|
+
return {target: "\n".join(commands) for target, commands in targets.items()}
|
|
2071
|
+
|
|
2072
|
+
|
|
2073
|
+
def _target_from_line(line: str, runner: str) -> Optional[str]:
|
|
2074
|
+
if runner == "make":
|
|
2075
|
+
if line.startswith("."):
|
|
2076
|
+
return None
|
|
2077
|
+
match = re.match(r"^([A-Za-z0-9_.-]+)\s*:(?:\s|$)", line)
|
|
2078
|
+
return match.group(1) if match else None
|
|
2079
|
+
match = re.match(r"^([A-Za-z0-9_.-]+)\s*:", line)
|
|
2080
|
+
return match.group(1) if match else None
|
|
2081
|
+
|
|
2082
|
+
|
|
2083
|
+
def _risk_for_command(kind: str, command: str) -> str:
|
|
2084
|
+
lowered = (command or "").lower()
|
|
2085
|
+
if kind in {"deploy", "release", "publish"}:
|
|
2086
|
+
return "high"
|
|
2087
|
+
if any(word in lowered for word in DANGEROUS_WORDS):
|
|
2088
|
+
return "high"
|
|
2089
|
+
if kind in {"install", "format", "build", "dev", "ci"}:
|
|
2090
|
+
return "medium"
|
|
2091
|
+
return "low"
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
def _classify_workflow_kind(command: str) -> str:
|
|
2095
|
+
lowered = command.lower()
|
|
2096
|
+
for kind in ("test", "lint", "format", "build", "install", "dev"):
|
|
2097
|
+
if re.search(rf"\b{kind}\b", lowered):
|
|
2098
|
+
return kind
|
|
2099
|
+
return "ci"
|
|
2100
|
+
|
|
2101
|
+
|
|
2102
|
+
def _fact(name: str, confidence: float, evidence: Sequence[str], reason: str) -> Dict[str, Any]:
|
|
2103
|
+
return {
|
|
2104
|
+
"name": name,
|
|
2105
|
+
"confidence": _confidence_label(confidence),
|
|
2106
|
+
"confidence_score": round(confidence, 2),
|
|
2107
|
+
"evidence": sorted(set(evidence)),
|
|
2108
|
+
"reason": reason,
|
|
2109
|
+
}
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
def _package_manager(
|
|
2113
|
+
name: str,
|
|
2114
|
+
command: str,
|
|
2115
|
+
confidence: float,
|
|
2116
|
+
evidence: Sequence[str],
|
|
2117
|
+
warnings: Optional[Sequence[str]] = None,
|
|
2118
|
+
) -> Dict[str, Any]:
|
|
2119
|
+
return {
|
|
2120
|
+
"name": name,
|
|
2121
|
+
"command": command,
|
|
2122
|
+
"confidence": _confidence_label(confidence),
|
|
2123
|
+
"confidence_score": round(confidence, 2),
|
|
2124
|
+
"evidence": sorted(set(evidence)),
|
|
2125
|
+
"warnings": list(warnings or []),
|
|
2126
|
+
}
|
|
2127
|
+
|
|
2128
|
+
|
|
2129
|
+
def _confidence_label(value: float) -> str:
|
|
2130
|
+
if value >= 0.85:
|
|
2131
|
+
return "high"
|
|
2132
|
+
if value >= 0.6:
|
|
2133
|
+
return "medium"
|
|
2134
|
+
return "low"
|
|
2135
|
+
|
|
2136
|
+
|
|
2137
|
+
def _confidence_score(label: str) -> float:
|
|
2138
|
+
return {"high": 0.95, "medium": 0.65, "low": 0.35}.get(label, 0.0)
|
|
2139
|
+
|
|
2140
|
+
|
|
2141
|
+
def _merge_facts(groups: Iterable[Iterable[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
2142
|
+
merged: Dict[str, Dict[str, Any]] = {}
|
|
2143
|
+
for group in groups:
|
|
2144
|
+
for fact in group:
|
|
2145
|
+
if not fact:
|
|
2146
|
+
continue
|
|
2147
|
+
name = fact.get("name")
|
|
2148
|
+
if not name:
|
|
2149
|
+
continue
|
|
2150
|
+
current = merged.get(name)
|
|
2151
|
+
if current is None or fact.get("confidence_score", 0) > current.get("confidence_score", 0):
|
|
2152
|
+
merged[name] = dict(fact)
|
|
2153
|
+
else:
|
|
2154
|
+
current["evidence"] = sorted(set(current.get("evidence", [])) | set(fact.get("evidence", [])))
|
|
2155
|
+
return [merged[name] for name in sorted(merged)]
|
|
2156
|
+
|
|
2157
|
+
|
|
2158
|
+
def _dedupe_facts(facts: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
2159
|
+
return _merge_facts([facts])
|
|
2160
|
+
|
|
2161
|
+
|
|
2162
|
+
def _dedupe_workflows(workflows: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
2163
|
+
seen: Dict[Tuple[str, Optional[str], str], Dict[str, Any]] = {}
|
|
2164
|
+
for workflow in workflows:
|
|
2165
|
+
key = (workflow.get("kind", ""), workflow.get("command"), workflow.get("cwd", ""))
|
|
2166
|
+
current = seen.get(key)
|
|
2167
|
+
if current is None:
|
|
2168
|
+
seen[key] = dict(workflow)
|
|
2169
|
+
continue
|
|
2170
|
+
current["evidence"] = sorted(set(current.get("evidence", [])) | set(workflow.get("evidence", [])))
|
|
2171
|
+
if workflow.get("recommended") and not current.get("recommended"):
|
|
2172
|
+
seen[key] = dict(workflow)
|
|
2173
|
+
return list(seen.values())
|
|
2174
|
+
|
|
2175
|
+
|
|
2176
|
+
def _project_type(components: Sequence[Dict[str, Any]]) -> str:
|
|
2177
|
+
if not components:
|
|
2178
|
+
return "unknown"
|
|
2179
|
+
if len(components) == 1:
|
|
2180
|
+
return "single-component"
|
|
2181
|
+
return "multi-component"
|
|
2182
|
+
|
|
2183
|
+
|
|
2184
|
+
def _component_type(languages: Sequence[Dict[str, Any]]) -> str:
|
|
2185
|
+
names = {item.get("name") for item in languages if item.get("name")}
|
|
2186
|
+
if not names:
|
|
2187
|
+
return "unknown"
|
|
2188
|
+
if len(names) == 1:
|
|
2189
|
+
return next(iter(names))
|
|
2190
|
+
return "mixed"
|
|
2191
|
+
|
|
2192
|
+
|
|
2193
|
+
def _component_scope(path: str, all_roots: Sequence[str]) -> str:
|
|
2194
|
+
if path == "." and len(all_roots) > 1:
|
|
2195
|
+
return "repo"
|
|
2196
|
+
return "component"
|
|
2197
|
+
|
|
2198
|
+
|
|
2199
|
+
def _match_component(components: Sequence[Dict[str, Any]], rel_path: str) -> Optional[Dict[str, Any]]:
|
|
2200
|
+
matches = []
|
|
2201
|
+
for component in components:
|
|
2202
|
+
path = component.get("path", ".")
|
|
2203
|
+
if path == "." or rel_path == path or rel_path.startswith(path + "/"):
|
|
2204
|
+
matches.append(component)
|
|
2205
|
+
if not matches:
|
|
2206
|
+
return None
|
|
2207
|
+
return sorted(matches, key=lambda item: len(item.get("path", "")), reverse=True)[0]
|
|
2208
|
+
|
|
2209
|
+
|
|
2210
|
+
def _package_dependencies(package: Dict[str, Any]) -> Set[str]:
|
|
2211
|
+
deps: Set[str] = set()
|
|
2212
|
+
for key in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
|
|
2213
|
+
value = package.get(key)
|
|
2214
|
+
if isinstance(value, dict):
|
|
2215
|
+
deps.update(str(name).lower() for name in value)
|
|
2216
|
+
return deps
|
|
2217
|
+
|
|
2218
|
+
|
|
2219
|
+
def _js_frameworks(dependencies: Set[str]) -> List[str]:
|
|
2220
|
+
known = {
|
|
2221
|
+
"next": "nextjs",
|
|
2222
|
+
"react": "react",
|
|
2223
|
+
"vue": "vue",
|
|
2224
|
+
"svelte": "svelte",
|
|
2225
|
+
"@angular/core": "angular",
|
|
2226
|
+
"vite": "vite",
|
|
2227
|
+
"nuxt": "nuxt",
|
|
2228
|
+
"express": "express",
|
|
2229
|
+
"nestjs": "nestjs",
|
|
2230
|
+
}
|
|
2231
|
+
return sorted({label for dep, label in known.items() if dep in dependencies})
|
|
2232
|
+
|
|
2233
|
+
|
|
2234
|
+
def _python_frameworks(component_dir: Path, pyproject: Dict[str, Any], requirement_files: Sequence[str]) -> List[str]:
|
|
2235
|
+
deps = set()
|
|
2236
|
+
deps.update(_pyproject_dependency_names(pyproject))
|
|
2237
|
+
for filename in requirement_files:
|
|
2238
|
+
deps.update(_requirements_dependency_names(component_dir / filename))
|
|
2239
|
+
known = {
|
|
2240
|
+
"django": "django",
|
|
2241
|
+
"flask": "flask",
|
|
2242
|
+
"fastapi": "fastapi",
|
|
2243
|
+
"pytest": "pytest",
|
|
2244
|
+
"ruff": "ruff",
|
|
2245
|
+
"black": "black",
|
|
2246
|
+
}
|
|
2247
|
+
return sorted({label for dep, label in known.items() if dep in deps})
|
|
2248
|
+
|
|
2249
|
+
|
|
2250
|
+
def _pyproject_dependency_names(pyproject: Dict[str, Any]) -> Set[str]:
|
|
2251
|
+
names: Set[str] = set()
|
|
2252
|
+
project = pyproject.get("project", {}) if isinstance(pyproject, dict) else {}
|
|
2253
|
+
if isinstance(project, dict):
|
|
2254
|
+
names.update(_dependency_name(item) for item in project.get("dependencies", []) if isinstance(item, str))
|
|
2255
|
+
optional = project.get("optional-dependencies", {})
|
|
2256
|
+
if isinstance(optional, dict):
|
|
2257
|
+
for values in optional.values():
|
|
2258
|
+
if isinstance(values, list):
|
|
2259
|
+
names.update(_dependency_name(item) for item in values if isinstance(item, str))
|
|
2260
|
+
tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
|
|
2261
|
+
poetry = tool.get("poetry", {}) if isinstance(tool, dict) else {}
|
|
2262
|
+
if isinstance(poetry, dict):
|
|
2263
|
+
for key in ("dependencies", "dev-dependencies"):
|
|
2264
|
+
value = poetry.get(key)
|
|
2265
|
+
if isinstance(value, dict):
|
|
2266
|
+
names.update(str(name).lower() for name in value.keys())
|
|
2267
|
+
return {name for name in names if name}
|
|
2268
|
+
|
|
2269
|
+
|
|
2270
|
+
def _requirements_dependency_names(path: Path) -> Set[str]:
|
|
2271
|
+
text = _read_text(path)
|
|
2272
|
+
names = set()
|
|
2273
|
+
for line in text.splitlines():
|
|
2274
|
+
stripped = line.strip()
|
|
2275
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("-"):
|
|
2276
|
+
continue
|
|
2277
|
+
names.add(_dependency_name(stripped))
|
|
2278
|
+
return {name for name in names if name}
|
|
2279
|
+
|
|
2280
|
+
|
|
2281
|
+
def _dependency_name(value: str) -> str:
|
|
2282
|
+
match = re.match(r"^\s*([A-Za-z0-9_.-]+)", value)
|
|
2283
|
+
return match.group(1).lower().replace("_", "-") if match else ""
|
|
2284
|
+
|
|
2285
|
+
|
|
2286
|
+
def _python_install_command(pm: Dict[str, Any]) -> Optional[str]:
|
|
2287
|
+
name = pm["name"]
|
|
2288
|
+
if name == "uv":
|
|
2289
|
+
return "uv sync"
|
|
2290
|
+
if name == "poetry":
|
|
2291
|
+
return "poetry install"
|
|
2292
|
+
if name == "pdm":
|
|
2293
|
+
return "pdm install"
|
|
2294
|
+
if name == "pipenv":
|
|
2295
|
+
return "pipenv install --dev"
|
|
2296
|
+
if name == "pip":
|
|
2297
|
+
if any(path.endswith("requirements.txt") for path in pm.get("evidence", [])):
|
|
2298
|
+
return "python -m pip install -r requirements.txt"
|
|
2299
|
+
return "python -m pip install -e ."
|
|
2300
|
+
return None
|
|
2301
|
+
|
|
2302
|
+
|
|
2303
|
+
def _ruby_gem_names(path: Path) -> Set[str]:
|
|
2304
|
+
text = _read_text(path)
|
|
2305
|
+
names = set()
|
|
2306
|
+
for match in re.finditer(r"^\s*gem\s+['\"]([^'\"]+)['\"]", text, flags=re.MULTILINE):
|
|
2307
|
+
names.add(match.group(1).lower())
|
|
2308
|
+
return names
|
|
2309
|
+
|
|
2310
|
+
|
|
2311
|
+
def _ruby_frameworks(gems: Set[str]) -> List[str]:
|
|
2312
|
+
known = {
|
|
2313
|
+
"rails": "rails",
|
|
2314
|
+
"sinatra": "sinatra",
|
|
2315
|
+
"rspec": "rspec",
|
|
2316
|
+
"rspec-rails": "rspec",
|
|
2317
|
+
"rubocop": "rubocop",
|
|
2318
|
+
}
|
|
2319
|
+
return sorted({label for gem, label in known.items() if gem in gems})
|
|
2320
|
+
|
|
2321
|
+
|
|
2322
|
+
def _composer_dependencies(composer: Dict[str, Any]) -> Set[str]:
|
|
2323
|
+
dependencies: Set[str] = set()
|
|
2324
|
+
for key in ("require", "require-dev"):
|
|
2325
|
+
value = composer.get(key)
|
|
2326
|
+
if isinstance(value, dict):
|
|
2327
|
+
dependencies.update(str(name).lower() for name in value)
|
|
2328
|
+
return dependencies
|
|
2329
|
+
|
|
2330
|
+
|
|
2331
|
+
def _php_frameworks(dependencies: Set[str]) -> List[str]:
|
|
2332
|
+
known = {
|
|
2333
|
+
"laravel/framework": "laravel",
|
|
2334
|
+
"symfony/framework-bundle": "symfony",
|
|
2335
|
+
"phpunit/phpunit": "phpunit",
|
|
2336
|
+
"phpstan/phpstan": "phpstan",
|
|
2337
|
+
"squizlabs/php_codesniffer": "phpcs",
|
|
2338
|
+
"friendsofphp/php-cs-fixer": "php-cs-fixer",
|
|
2339
|
+
"laravel/pint": "pint",
|
|
2340
|
+
}
|
|
2341
|
+
return sorted({label for dep, label in known.items() if dep in dependencies})
|
|
2342
|
+
|
|
2343
|
+
|
|
2344
|
+
def _dotnet_languages_from_manifests(manifests: Sequence[str]) -> List[Dict[str, Any]]:
|
|
2345
|
+
languages = []
|
|
2346
|
+
for suffix, language in ((".csproj", "csharp"), (".fsproj", "fsharp"), (".vbproj", "visualbasic")):
|
|
2347
|
+
evidence = [path for path in manifests if path.endswith(suffix)]
|
|
2348
|
+
if evidence:
|
|
2349
|
+
languages.append(_fact(language, 0.95, evidence, f".NET {suffix} project file"))
|
|
2350
|
+
return languages
|
|
2351
|
+
|
|
2352
|
+
|
|
2353
|
+
def _script_preview(value: Any) -> str:
|
|
2354
|
+
if isinstance(value, str):
|
|
2355
|
+
return value
|
|
2356
|
+
if isinstance(value, list):
|
|
2357
|
+
return " && ".join(str(item) for item in value)
|
|
2358
|
+
return str(value or "")
|
|
2359
|
+
|
|
2360
|
+
|
|
2361
|
+
def _js_install_command(pm: Dict[str, Any]) -> str:
|
|
2362
|
+
name = pm["name"]
|
|
2363
|
+
evidence = set(pm.get("evidence", []))
|
|
2364
|
+
if name == "npm":
|
|
2365
|
+
return "npm ci" if any(path.endswith(("package-lock.json", "npm-shrinkwrap.json")) for path in evidence) else "npm install"
|
|
2366
|
+
if name == "pnpm":
|
|
2367
|
+
return "pnpm install --frozen-lockfile" if any(path.endswith("pnpm-lock.yaml") for path in evidence) else "pnpm install"
|
|
2368
|
+
if name == "yarn":
|
|
2369
|
+
return "yarn install --immutable" if any(path.endswith("yarn.lock") for path in evidence) else "yarn install"
|
|
2370
|
+
if name == "bun":
|
|
2371
|
+
return "bun install --frozen-lockfile" if any("bun.lock" in path for path in evidence) else "bun install"
|
|
2372
|
+
return f"{name} install"
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
def _js_script_command(pm_name: str, script: str) -> str:
|
|
2376
|
+
if pm_name == "yarn":
|
|
2377
|
+
return f"yarn {script}"
|
|
2378
|
+
if pm_name == "bun":
|
|
2379
|
+
return f"bun run {script}"
|
|
2380
|
+
if pm_name == "pnpm":
|
|
2381
|
+
return f"pnpm run {script}"
|
|
2382
|
+
return f"npm run {script}"
|
|
2383
|
+
|
|
2384
|
+
|
|
2385
|
+
def _pm_executable(name: str) -> str:
|
|
2386
|
+
return {"npm": "npm", "pnpm": "pnpm", "yarn": "yarn", "bun": "bun"}.get(name, name)
|
|
2387
|
+
|
|
2388
|
+
|
|
2389
|
+
def _has_test_sample(root: Path, component_path: str, ignore: _GitIgnore) -> bool:
|
|
2390
|
+
return bool(_test_samples(root, component_path, ignore))
|
|
2391
|
+
|
|
2392
|
+
|
|
2393
|
+
def _test_samples(root: Path, component_path: str, ignore: _GitIgnore) -> List[str]:
|
|
2394
|
+
component_dir = root if component_path == "." else root / component_path
|
|
2395
|
+
samples = []
|
|
2396
|
+
patterns = ("test_*.py", "*_test.py")
|
|
2397
|
+
for test_root in (component_dir / "tests", component_dir):
|
|
2398
|
+
if not test_root.exists():
|
|
2399
|
+
continue
|
|
2400
|
+
for pattern in patterns:
|
|
2401
|
+
for path in test_root.glob(pattern):
|
|
2402
|
+
rel = _rel_to_root(root, path)
|
|
2403
|
+
if _visible_file(root, ignore, rel):
|
|
2404
|
+
samples.append(rel)
|
|
2405
|
+
if len(samples) >= 3:
|
|
2406
|
+
return samples
|
|
2407
|
+
return samples
|
|
2408
|
+
|
|
2409
|
+
|
|
2410
|
+
def _setup_cfg_has_section(path: Path, section: str) -> bool:
|
|
2411
|
+
text = _read_text(path)
|
|
2412
|
+
return f"[{section}]" in text
|
|
2413
|
+
|
|
2414
|
+
|
|
2415
|
+
def _load_json(path: Path) -> Optional[Dict[str, Any]]:
|
|
2416
|
+
try:
|
|
2417
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
2418
|
+
value = json.load(handle)
|
|
2419
|
+
return value if isinstance(value, dict) else None
|
|
2420
|
+
except (OSError, json.JSONDecodeError):
|
|
2421
|
+
return None
|
|
2422
|
+
|
|
2423
|
+
|
|
2424
|
+
def _load_json_or_jsonc(path: Path) -> Optional[Dict[str, Any]]:
|
|
2425
|
+
if path.suffix != ".jsonc":
|
|
2426
|
+
return _load_json(path)
|
|
2427
|
+
text = _read_text(path)
|
|
2428
|
+
if not text:
|
|
2429
|
+
return None
|
|
2430
|
+
try:
|
|
2431
|
+
value = json.loads(_strip_trailing_commas(_strip_json_comments(text)))
|
|
2432
|
+
return value if isinstance(value, dict) else None
|
|
2433
|
+
except json.JSONDecodeError:
|
|
2434
|
+
return None
|
|
2435
|
+
|
|
2436
|
+
|
|
2437
|
+
def _strip_json_comments(text: str) -> str:
|
|
2438
|
+
result = []
|
|
2439
|
+
index = 0
|
|
2440
|
+
in_string = False
|
|
2441
|
+
escape = False
|
|
2442
|
+
while index < len(text):
|
|
2443
|
+
char = text[index]
|
|
2444
|
+
nxt = text[index + 1] if index + 1 < len(text) else ""
|
|
2445
|
+
if in_string:
|
|
2446
|
+
result.append(char)
|
|
2447
|
+
if escape:
|
|
2448
|
+
escape = False
|
|
2449
|
+
elif char == "\\":
|
|
2450
|
+
escape = True
|
|
2451
|
+
elif char == '"':
|
|
2452
|
+
in_string = False
|
|
2453
|
+
index += 1
|
|
2454
|
+
continue
|
|
2455
|
+
if char == '"':
|
|
2456
|
+
in_string = True
|
|
2457
|
+
result.append(char)
|
|
2458
|
+
index += 1
|
|
2459
|
+
continue
|
|
2460
|
+
if char == "/" and nxt == "/":
|
|
2461
|
+
index += 2
|
|
2462
|
+
while index < len(text) and text[index] not in "\r\n":
|
|
2463
|
+
index += 1
|
|
2464
|
+
continue
|
|
2465
|
+
if char == "/" and nxt == "*":
|
|
2466
|
+
index += 2
|
|
2467
|
+
while index + 1 < len(text) and not (text[index] == "*" and text[index + 1] == "/"):
|
|
2468
|
+
index += 1
|
|
2469
|
+
index += 2
|
|
2470
|
+
continue
|
|
2471
|
+
result.append(char)
|
|
2472
|
+
index += 1
|
|
2473
|
+
return "".join(result)
|
|
2474
|
+
|
|
2475
|
+
|
|
2476
|
+
def _strip_trailing_commas(text: str) -> str:
|
|
2477
|
+
return re.sub(r",(\s*[}\]])", r"\1", text)
|
|
2478
|
+
|
|
2479
|
+
|
|
2480
|
+
def _load_toml(path: Path) -> Dict[str, Any]:
|
|
2481
|
+
if tomllib is None or not path.is_file():
|
|
2482
|
+
return {}
|
|
2483
|
+
try:
|
|
2484
|
+
with path.open("rb") as handle:
|
|
2485
|
+
value = tomllib.load(handle)
|
|
2486
|
+
return value if isinstance(value, dict) else {}
|
|
2487
|
+
except (OSError, tomllib.TOMLDecodeError):
|
|
2488
|
+
return {}
|
|
2489
|
+
|
|
2490
|
+
|
|
2491
|
+
def _write_json(path: Path, data: Dict[str, Any]) -> None:
|
|
2492
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
2493
|
+
tmp = path.with_suffix(path.suffix + ".tmp")
|
|
2494
|
+
with tmp.open("w", encoding="utf-8") as handle:
|
|
2495
|
+
json.dump(data, handle, indent=2, sort_keys=True)
|
|
2496
|
+
handle.write("\n")
|
|
2497
|
+
tmp.replace(path)
|
|
2498
|
+
|
|
2499
|
+
|
|
2500
|
+
def _read_text(path: Path, max_bytes: int = 1_000_000) -> str:
|
|
2501
|
+
try:
|
|
2502
|
+
if not path.is_file() or path.stat().st_size > max_bytes:
|
|
2503
|
+
return ""
|
|
2504
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
2505
|
+
except OSError:
|
|
2506
|
+
return ""
|
|
2507
|
+
|
|
2508
|
+
|
|
2509
|
+
def _fingerprint(path: Path) -> Dict[str, Any]:
|
|
2510
|
+
try:
|
|
2511
|
+
stat = path.stat()
|
|
2512
|
+
digest = hashlib.sha256()
|
|
2513
|
+
with path.open("rb") as handle:
|
|
2514
|
+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
2515
|
+
digest.update(chunk)
|
|
2516
|
+
return {
|
|
2517
|
+
"path": _clean_rel(str(path.name)) if not path.is_absolute() else str(path),
|
|
2518
|
+
"sha256": digest.hexdigest(),
|
|
2519
|
+
"size": stat.st_size,
|
|
2520
|
+
"mtime_ns": stat.st_mtime_ns,
|
|
2521
|
+
}
|
|
2522
|
+
except OSError:
|
|
2523
|
+
return {
|
|
2524
|
+
"path": str(path),
|
|
2525
|
+
"sha256": None,
|
|
2526
|
+
"size": None,
|
|
2527
|
+
"mtime_ns": None,
|
|
2528
|
+
"missing": True,
|
|
2529
|
+
}
|
|
2530
|
+
|
|
2531
|
+
|
|
2532
|
+
def _fingerprint_with_rel(root: Path, rel_path: str) -> Dict[str, Any]:
|
|
2533
|
+
item = _fingerprint(root / rel_path)
|
|
2534
|
+
item["path"] = rel_path
|
|
2535
|
+
return item
|
|
2536
|
+
|
|
2537
|
+
|
|
2538
|
+
def _visible_file(root: Path, ignore: _GitIgnore, rel_path: str) -> bool:
|
|
2539
|
+
rel = _clean_rel(rel_path)
|
|
2540
|
+
return (root / rel).is_file() and not ignore.ignored(rel, is_dir=False)
|
|
2541
|
+
|
|
2542
|
+
|
|
2543
|
+
def _profile_file_exists(root: Path, rel_path: str) -> bool:
|
|
2544
|
+
rel = _clean_rel(rel_path)
|
|
2545
|
+
return (root / rel).is_file()
|
|
2546
|
+
|
|
2547
|
+
|
|
2548
|
+
def _resolve_root(root: str | os.PathLike[str]) -> Path:
|
|
2549
|
+
return Path(root).expanduser().resolve()
|
|
2550
|
+
|
|
2551
|
+
|
|
2552
|
+
def _resolve_cache_path(root: Path, cache_path: str | os.PathLike[str] | None) -> Path:
|
|
2553
|
+
if cache_path is None:
|
|
2554
|
+
return root / DEFAULT_CACHE_NAME
|
|
2555
|
+
path = Path(cache_path).expanduser()
|
|
2556
|
+
return path.resolve() if path.is_absolute() else (root / path).resolve()
|
|
2557
|
+
|
|
2558
|
+
|
|
2559
|
+
def _normalize_changed_files(root: Path, changed_files: Sequence[str]) -> List[str]:
|
|
2560
|
+
normalized = []
|
|
2561
|
+
for item in changed_files:
|
|
2562
|
+
if not item:
|
|
2563
|
+
continue
|
|
2564
|
+
path = Path(item)
|
|
2565
|
+
if path.is_absolute():
|
|
2566
|
+
try:
|
|
2567
|
+
rel = path.resolve().relative_to(root)
|
|
2568
|
+
except ValueError:
|
|
2569
|
+
continue
|
|
2570
|
+
normalized.append(rel.as_posix())
|
|
2571
|
+
else:
|
|
2572
|
+
normalized.append(_clean_rel(item))
|
|
2573
|
+
return sorted(set(path for path in normalized if path and path != "."))
|
|
2574
|
+
|
|
2575
|
+
|
|
2576
|
+
def _rel_to_root(root: Path, path: Path) -> str:
|
|
2577
|
+
try:
|
|
2578
|
+
return path.resolve().relative_to(root).as_posix()
|
|
2579
|
+
except ValueError:
|
|
2580
|
+
return path.as_posix()
|
|
2581
|
+
|
|
2582
|
+
|
|
2583
|
+
def _clean_rel(path: str) -> str:
|
|
2584
|
+
rel = Path(path).as_posix()
|
|
2585
|
+
if rel == ".":
|
|
2586
|
+
return "."
|
|
2587
|
+
while rel.startswith("./"):
|
|
2588
|
+
rel = rel[2:]
|
|
2589
|
+
return rel.strip("/")
|
|
2590
|
+
|
|
2591
|
+
|
|
2592
|
+
def _join_rel(base: str, name: str) -> str:
|
|
2593
|
+
return name if base == "." else f"{base}/{name}"
|
|
2594
|
+
|
|
2595
|
+
|
|
2596
|
+
def _dirname_rel(path: str) -> str:
|
|
2597
|
+
parent = Path(path).parent.as_posix()
|
|
2598
|
+
return "." if parent == "." else parent
|
|
2599
|
+
|
|
2600
|
+
|
|
2601
|
+
def _is_under(path: str, parent: str) -> bool:
|
|
2602
|
+
if parent == ".":
|
|
2603
|
+
return True
|
|
2604
|
+
return path == parent or path.startswith(parent + "/")
|
|
2605
|
+
|
|
2606
|
+
|
|
2607
|
+
def _utc_now() -> str:
|
|
2608
|
+
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
2609
|
+
|
|
2610
|
+
|
|
2611
|
+
def _emit_progress(progress: Optional[Callable[[str], None]], message: str) -> None:
|
|
2612
|
+
if progress is not None:
|
|
2613
|
+
progress(message)
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
def _stderr_progress(message: str) -> None:
|
|
2617
|
+
labels = {
|
|
2618
|
+
"sync: start": (0, "start"),
|
|
2619
|
+
"sync: check cache": (20, "cache"),
|
|
2620
|
+
"sync: reused cached profile": (80, "reuse"),
|
|
2621
|
+
"sync: paths-only": (55, "paths"),
|
|
2622
|
+
"sync: scan repo": (45, "scan"),
|
|
2623
|
+
"sync: wrote cache": (90, "write"),
|
|
2624
|
+
"sync: done": (100, "done"),
|
|
2625
|
+
}
|
|
2626
|
+
percent, label = labels.get(message, (50, message.replace("sync: ", "")))
|
|
2627
|
+
width = 20
|
|
2628
|
+
filled = int(width * percent / 100)
|
|
2629
|
+
bar = "#" * filled + "-" * (width - filled)
|
|
2630
|
+
end = "\n" if percent >= 100 else "\r"
|
|
2631
|
+
print(f"cwp [{bar}] {percent:3d}% {label}", file=sys.stderr, end=end, flush=True)
|
|
2632
|
+
|
|
2633
|
+
|
|
2634
|
+
def _format_result(
|
|
2635
|
+
data: Dict[str, Any],
|
|
2636
|
+
output_format: str,
|
|
2637
|
+
verbose: bool = False,
|
|
2638
|
+
status_detail: Optional[str] = None,
|
|
2639
|
+
limit: int = DEFAULT_STATUS_LIMIT,
|
|
2640
|
+
depth: int = DEFAULT_STATUS_DEPTH,
|
|
2641
|
+
) -> Dict[str, Any] | str:
|
|
2642
|
+
fmt = _normalize_output_format(output_format)
|
|
2643
|
+
if fmt == "json":
|
|
2644
|
+
return data
|
|
2645
|
+
return _render_text(data, verbose=verbose, status_detail=status_detail, limit=limit, depth=depth)
|
|
2646
|
+
|
|
2647
|
+
|
|
2648
|
+
def _normalize_output_format(output_format: str) -> str:
|
|
2649
|
+
fmt = (output_format or "text").lower()
|
|
2650
|
+
if fmt not in {"text", "json"}:
|
|
2651
|
+
raise ValueError("format must be 'text' or 'json'")
|
|
2652
|
+
return fmt
|
|
2653
|
+
|
|
2654
|
+
|
|
2655
|
+
def _render_text(
|
|
2656
|
+
data: Dict[str, Any],
|
|
2657
|
+
verbose: bool = False,
|
|
2658
|
+
status_detail: Optional[str] = None,
|
|
2659
|
+
limit: int = DEFAULT_STATUS_LIMIT,
|
|
2660
|
+
depth: int = DEFAULT_STATUS_DEPTH,
|
|
2661
|
+
) -> str:
|
|
2662
|
+
operation = data.get("operation", "sync")
|
|
2663
|
+
if operation == "install-skill":
|
|
2664
|
+
return _render_install_skill_text(data, verbose=verbose)
|
|
2665
|
+
if operation == "status":
|
|
2666
|
+
detail = _normalize_status_detail(status_detail or "compact", verbose)
|
|
2667
|
+
if detail != "full":
|
|
2668
|
+
return _render_status_text(data, detail=detail, limit=limit, depth=depth)
|
|
2669
|
+
verbose = True
|
|
2670
|
+
|
|
2671
|
+
profile = data.get("profile") if isinstance(data.get("profile"), dict) else data if isinstance(data.get("project"), dict) else None
|
|
2672
|
+
alignment = data.get("alignment") or (profile or {}).get("alignment", {})
|
|
2673
|
+
lines = [
|
|
2674
|
+
"code-workflow-probe",
|
|
2675
|
+
f"{operation}: aligned={_bool_text(alignment.get('aligned'))} reason={alignment.get('reason', 'unknown')}",
|
|
2676
|
+
]
|
|
2677
|
+
|
|
2678
|
+
stale = alignment.get("stale_files", [])
|
|
2679
|
+
new_files = alignment.get("new_profile_files", [])
|
|
2680
|
+
removed = alignment.get("removed_profile_files", [])
|
|
2681
|
+
if stale:
|
|
2682
|
+
lines.append(f"stale_files: {', '.join(stale)}")
|
|
2683
|
+
if new_files:
|
|
2684
|
+
lines.append(f"new_profile_files: {', '.join(new_files)}")
|
|
2685
|
+
if removed:
|
|
2686
|
+
lines.append(f"removed_profile_files: {', '.join(removed)}")
|
|
2687
|
+
if "profile_updated" in data:
|
|
2688
|
+
lines.append(f"profile_updated: {_bool_text(data.get('profile_updated'))}")
|
|
2689
|
+
if data.get("changed_files"):
|
|
2690
|
+
lines.append(f"changed_files: {', '.join(data['changed_files'])}")
|
|
2691
|
+
|
|
2692
|
+
if data.get("affected"):
|
|
2693
|
+
_append_affected_text(lines, data)
|
|
2694
|
+
|
|
2695
|
+
if profile:
|
|
2696
|
+
_append_profile_text(lines, profile, verbose=verbose)
|
|
2697
|
+
elif operation in {"affected", "edit"}:
|
|
2698
|
+
pass
|
|
2699
|
+
else:
|
|
2700
|
+
lines.append("profile: unavailable")
|
|
2701
|
+
|
|
2702
|
+
warnings = list(data.get("warnings", []))
|
|
2703
|
+
if profile:
|
|
2704
|
+
warnings.extend(profile.get("warnings", []))
|
|
2705
|
+
if warnings:
|
|
2706
|
+
_append_list(lines, "warnings", warnings)
|
|
2707
|
+
return "\n".join(lines)
|
|
2708
|
+
|
|
2709
|
+
|
|
2710
|
+
def _render_install_skill_text(data: Dict[str, Any], verbose: bool = False) -> str:
|
|
2711
|
+
lines = [
|
|
2712
|
+
"code-workflow-probe",
|
|
2713
|
+
f"install-skill: target={data.get('target')} installed={_bool_text(data.get('installed'))} dry_run={_bool_text(data.get('dry_run'))}",
|
|
2714
|
+
f"path: {data.get('skill_path')}",
|
|
2715
|
+
"note: installed skill tells Codex to sync after editing project/workflow management files.",
|
|
2716
|
+
]
|
|
2717
|
+
if verbose and data.get("content"):
|
|
2718
|
+
lines.append("content:")
|
|
2719
|
+
lines.append(str(data["content"]).rstrip())
|
|
2720
|
+
if data.get("warnings"):
|
|
2721
|
+
_append_list(lines, "warnings", data.get("warnings", []))
|
|
2722
|
+
return "\n".join(lines)
|
|
2723
|
+
|
|
2724
|
+
|
|
2725
|
+
def _render_status_text(
|
|
2726
|
+
data: Dict[str, Any],
|
|
2727
|
+
detail: str = "compact",
|
|
2728
|
+
limit: int = DEFAULT_STATUS_LIMIT,
|
|
2729
|
+
depth: int = DEFAULT_STATUS_DEPTH,
|
|
2730
|
+
) -> str:
|
|
2731
|
+
profile = data.get("profile") if isinstance(data.get("profile"), dict) else None
|
|
2732
|
+
alignment = data.get("alignment", {})
|
|
2733
|
+
preview_limit = _normalize_limit(limit)
|
|
2734
|
+
preview_depth = _normalize_depth(depth)
|
|
2735
|
+
lines = [
|
|
2736
|
+
"code-workflow-probe",
|
|
2737
|
+
f"status: aligned={_bool_text(alignment.get('aligned'))} reason={alignment.get('reason', 'unknown')}",
|
|
2738
|
+
]
|
|
2739
|
+
stale = alignment.get("stale_files", [])
|
|
2740
|
+
new_files = alignment.get("new_profile_files", [])
|
|
2741
|
+
removed = alignment.get("removed_profile_files", [])
|
|
2742
|
+
if stale:
|
|
2743
|
+
lines.append(f"stale({len(stale)}): {_preview_names(stale, preview_limit)}")
|
|
2744
|
+
if new_files:
|
|
2745
|
+
lines.append(f"new_profile({len(new_files)}): {_preview_names(new_files, preview_limit)}")
|
|
2746
|
+
if removed:
|
|
2747
|
+
lines.append(f"removed({len(removed)}): {_preview_names(removed, preview_limit)}")
|
|
2748
|
+
if profile:
|
|
2749
|
+
project = profile.get("project", {})
|
|
2750
|
+
components = project.get("components", [])
|
|
2751
|
+
workflows = [workflow for component in components for workflow in component.get("workflows", [])]
|
|
2752
|
+
safe = sum(1 for workflow in workflows if workflow.get("safe_auto"))
|
|
2753
|
+
review = len(workflows) - safe
|
|
2754
|
+
lines.append("summary:")
|
|
2755
|
+
lines.append(f"- project: {project.get('type', 'unknown')}")
|
|
2756
|
+
lines.append(f"- components: {len(components)}")
|
|
2757
|
+
lines.append(f"- tech: {_format_fact_names(project.get('technologies', []))}")
|
|
2758
|
+
lines.append(f"- package_managers: {_format_package_managers(project.get('package_managers', []))}")
|
|
2759
|
+
lines.append(f"- workflows: safe_auto={safe} needs_review={review} ci={len(project.get('ci_workflows', []))}")
|
|
2760
|
+
if detail == "compact":
|
|
2761
|
+
_append_status_workflows(lines, components, preview_limit, include_component=True)
|
|
2762
|
+
elif detail == "standard":
|
|
2763
|
+
_append_status_components(lines, components, preview_limit, preview_depth)
|
|
2764
|
+
evidence_files = sorted(profile.get("evidence_files", {}))
|
|
2765
|
+
lines.append(f"evidence({len(evidence_files)}): {_preview_names(evidence_files, preview_limit)}")
|
|
2766
|
+
else:
|
|
2767
|
+
lines.append("profile: unavailable")
|
|
2768
|
+
if data.get("warnings"):
|
|
2769
|
+
_append_list(lines, "warnings", data.get("warnings", []))
|
|
2770
|
+
return "\n".join(lines)
|
|
2771
|
+
|
|
2772
|
+
|
|
2773
|
+
def _normalize_status_detail(detail: Optional[str], verbose: bool = False) -> str:
|
|
2774
|
+
if verbose:
|
|
2775
|
+
return "full"
|
|
2776
|
+
value = (detail or "compact").lower()
|
|
2777
|
+
if value not in STATUS_DETAILS:
|
|
2778
|
+
raise ValueError("detail must be 'compact', 'standard', or 'full'")
|
|
2779
|
+
return value
|
|
2780
|
+
|
|
2781
|
+
|
|
2782
|
+
def _normalize_limit(limit: int) -> int:
|
|
2783
|
+
try:
|
|
2784
|
+
value = int(limit)
|
|
2785
|
+
except (TypeError, ValueError):
|
|
2786
|
+
return DEFAULT_STATUS_LIMIT
|
|
2787
|
+
return max(1, value)
|
|
2788
|
+
|
|
2789
|
+
|
|
2790
|
+
def _normalize_depth(depth: int) -> int:
|
|
2791
|
+
try:
|
|
2792
|
+
value = int(depth)
|
|
2793
|
+
except (TypeError, ValueError):
|
|
2794
|
+
return DEFAULT_STATUS_DEPTH
|
|
2795
|
+
return max(0, value)
|
|
2796
|
+
|
|
2797
|
+
|
|
2798
|
+
def _append_status_components(lines: List[str], components: Sequence[Dict[str, Any]], limit: int, depth: int) -> None:
|
|
2799
|
+
if not components:
|
|
2800
|
+
return
|
|
2801
|
+
ordered = sorted(components, key=lambda component: str(component.get("path") or ""))
|
|
2802
|
+
visible = [component for component in ordered if _component_within_depth(component, depth)]
|
|
2803
|
+
selected = visible[:limit]
|
|
2804
|
+
lines.append(f"components(depth={depth}, shown={len(selected)}/{len(components)}):")
|
|
2805
|
+
for component in selected:
|
|
2806
|
+
workflows = component.get("workflows", [])
|
|
2807
|
+
safe = _workflow_kind_preview([workflow for workflow in workflows if workflow.get("safe_auto")])
|
|
2808
|
+
review = _workflow_kind_preview([workflow for workflow in workflows if not workflow.get("safe_auto")])
|
|
2809
|
+
lines.append(
|
|
2810
|
+
"- "
|
|
2811
|
+
f"id={component.get('id')} "
|
|
2812
|
+
f"path={component.get('path')} "
|
|
2813
|
+
f"lang={_format_fact_names(component.get('languages', []))} "
|
|
2814
|
+
f"pm={_format_package_manager(component.get('package_manager'))} "
|
|
2815
|
+
f"safe={safe} review={review}"
|
|
2816
|
+
)
|
|
2817
|
+
_append_status_workflows(lines, [component], limit, indent=" ", include_component=False)
|
|
2818
|
+
hidden_by_depth = len(ordered) - len(visible)
|
|
2819
|
+
hidden_by_limit = len(visible) - len(selected)
|
|
2820
|
+
if hidden_by_depth or hidden_by_limit:
|
|
2821
|
+
lines.append(f"- hidden: depth={hidden_by_depth} limit={hidden_by_limit}")
|
|
2822
|
+
|
|
2823
|
+
|
|
2824
|
+
def _component_within_depth(component: Dict[str, Any], depth: int) -> bool:
|
|
2825
|
+
path = str(component.get("path") or ".")
|
|
2826
|
+
if path == ".":
|
|
2827
|
+
return True
|
|
2828
|
+
return len([part for part in path.split("/") if part]) <= depth
|
|
2829
|
+
|
|
2830
|
+
|
|
2831
|
+
def _workflow_kind_preview(workflows: Sequence[Dict[str, Any]], limit: int = 4) -> str:
|
|
2832
|
+
kinds = sorted({str(workflow.get("kind")) for workflow in workflows if workflow.get("kind")})
|
|
2833
|
+
return _preview_names(kinds, limit)
|
|
2834
|
+
|
|
2835
|
+
|
|
2836
|
+
def _append_status_workflows(
|
|
2837
|
+
lines: List[str],
|
|
2838
|
+
components: Sequence[Dict[str, Any]],
|
|
2839
|
+
limit: int,
|
|
2840
|
+
indent: str = "",
|
|
2841
|
+
include_component: bool = True,
|
|
2842
|
+
) -> None:
|
|
2843
|
+
items = _status_workflow_items(components)
|
|
2844
|
+
selected = _select_status_workflow_items(items, limit)
|
|
2845
|
+
lines.append(f"{indent}workflows(local, shown={len(selected)}/{len(items)}):")
|
|
2846
|
+
if not items:
|
|
2847
|
+
lines.append(f"{indent}- none")
|
|
2848
|
+
return
|
|
2849
|
+
for item in selected:
|
|
2850
|
+
prefix = f"component={item['component_id']} " if include_component else ""
|
|
2851
|
+
lines.append(f"{indent}- {prefix}{_format_workflow(item['workflow'])}")
|
|
2852
|
+
if len(items) > len(selected):
|
|
2853
|
+
lines.append(f"{indent}- +{len(items) - len(selected)} more")
|
|
2854
|
+
|
|
2855
|
+
|
|
2856
|
+
def _status_workflow_items(components: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
2857
|
+
items: List[Dict[str, Any]] = []
|
|
2858
|
+
for component in components:
|
|
2859
|
+
for workflow in component.get("workflows", []):
|
|
2860
|
+
if workflow.get("source") != "local" or workflow.get("ci_only") or not workflow.get("command"):
|
|
2861
|
+
continue
|
|
2862
|
+
items.append({
|
|
2863
|
+
"component_id": component.get("id"),
|
|
2864
|
+
"component_path": component.get("path") or ".",
|
|
2865
|
+
"workflow": workflow,
|
|
2866
|
+
})
|
|
2867
|
+
return sorted(items, key=_status_workflow_sort_key)
|
|
2868
|
+
|
|
2869
|
+
|
|
2870
|
+
def _select_status_workflow_items(items: Sequence[Dict[str, Any]], limit: int) -> List[Dict[str, Any]]:
|
|
2871
|
+
max_items = _normalize_limit(limit)
|
|
2872
|
+
selected: List[Dict[str, Any]] = []
|
|
2873
|
+
selected_indexes: Set[int] = set()
|
|
2874
|
+
|
|
2875
|
+
for kind in STATUS_WORKFLOW_KIND_ORDER:
|
|
2876
|
+
for index, item in enumerate(items):
|
|
2877
|
+
if index in selected_indexes or item["workflow"].get("kind") != kind:
|
|
2878
|
+
continue
|
|
2879
|
+
selected.append(item)
|
|
2880
|
+
selected_indexes.add(index)
|
|
2881
|
+
break
|
|
2882
|
+
if len(selected) >= max_items:
|
|
2883
|
+
return sorted(selected, key=_status_workflow_sort_key)
|
|
2884
|
+
|
|
2885
|
+
for index, item in enumerate(items):
|
|
2886
|
+
if index in selected_indexes:
|
|
2887
|
+
continue
|
|
2888
|
+
selected.append(item)
|
|
2889
|
+
selected_indexes.add(index)
|
|
2890
|
+
if len(selected) >= max_items:
|
|
2891
|
+
break
|
|
2892
|
+
|
|
2893
|
+
return sorted(selected, key=_status_workflow_sort_key)
|
|
2894
|
+
|
|
2895
|
+
|
|
2896
|
+
def _status_workflow_sort_key(item: Dict[str, Any]) -> Tuple[int, str, int, int, str]:
|
|
2897
|
+
workflow = item["workflow"]
|
|
2898
|
+
kind = str(workflow.get("kind") or "")
|
|
2899
|
+
kind_order = {name: index for index, name in enumerate(STATUS_WORKFLOW_KIND_ORDER)}
|
|
2900
|
+
safe_rank = 0 if workflow.get("safe_auto") else 1
|
|
2901
|
+
recommended_rank = 0 if workflow.get("recommended") else 1
|
|
2902
|
+
return (
|
|
2903
|
+
kind_order.get(kind, len(STATUS_WORKFLOW_KIND_ORDER)),
|
|
2904
|
+
str(item.get("component_path") or ""),
|
|
2905
|
+
safe_rank,
|
|
2906
|
+
recommended_rank,
|
|
2907
|
+
str(workflow.get("command") or ""),
|
|
2908
|
+
)
|
|
2909
|
+
|
|
2910
|
+
|
|
2911
|
+
def _append_profile_text(lines: List[str], profile: Dict[str, Any], verbose: bool = False) -> None:
|
|
2912
|
+
project = profile.get("project", {})
|
|
2913
|
+
components = project.get("components", [])
|
|
2914
|
+
lines.append("summary:")
|
|
2915
|
+
lines.append(f"- project: {project.get('type', 'unknown')}")
|
|
2916
|
+
lines.append(f"- tech: {_format_fact_names(project.get('technologies', []), verbose=verbose)}")
|
|
2917
|
+
lines.append(f"- package_managers: {_format_package_managers(project.get('package_managers', []), verbose=verbose)}")
|
|
2918
|
+
lines.append("components:")
|
|
2919
|
+
if not components:
|
|
2920
|
+
lines.append("- none")
|
|
2921
|
+
for component in components:
|
|
2922
|
+
lines.append(
|
|
2923
|
+
"- "
|
|
2924
|
+
f"id={component.get('id')} "
|
|
2925
|
+
f"path={component.get('path')} "
|
|
2926
|
+
f"lang={_format_fact_names(component.get('languages', []), verbose=verbose)} "
|
|
2927
|
+
f"pm={_format_package_manager(component.get('package_manager'), verbose=verbose)}"
|
|
2928
|
+
)
|
|
2929
|
+
_append_workflow_groups(lines, component.get("workflows", []), indent=" ", verbose=verbose)
|
|
2930
|
+
|
|
2931
|
+
ci_workflows = project.get("ci_workflows", [])
|
|
2932
|
+
if ci_workflows:
|
|
2933
|
+
lines.append(f"ci: {len(ci_workflows)} candidate(s), not local")
|
|
2934
|
+
|
|
2935
|
+
evidence_files = sorted(profile.get("evidence_files", {}))
|
|
2936
|
+
if verbose:
|
|
2937
|
+
lines.append("evidence_files:")
|
|
2938
|
+
if not evidence_files:
|
|
2939
|
+
lines.append("- none")
|
|
2940
|
+
for path in evidence_files:
|
|
2941
|
+
fingerprint = profile["evidence_files"][path]
|
|
2942
|
+
sha = fingerprint.get("sha256") or "missing"
|
|
2943
|
+
roles = ",".join(fingerprint.get("roles", [])) or "unknown"
|
|
2944
|
+
lines.append(f"- {path}: sha256={sha} size={fingerprint.get('size')} roles={roles}")
|
|
2945
|
+
else:
|
|
2946
|
+
preview = ", ".join(evidence_files[:5]) if evidence_files else "none"
|
|
2947
|
+
suffix = "" if len(evidence_files) <= 5 else f", +{len(evidence_files) - 5} more"
|
|
2948
|
+
lines.append(f"evidence({len(evidence_files)}): {preview}{suffix}")
|
|
2949
|
+
|
|
2950
|
+
|
|
2951
|
+
def _append_affected_text(lines: List[str], data: Dict[str, Any]) -> None:
|
|
2952
|
+
affected_data = data.get("affected", {})
|
|
2953
|
+
components = affected_data.get("components", [])
|
|
2954
|
+
lines.append(f"affected: components={', '.join(components) if components else 'none'}")
|
|
2955
|
+
files = affected_data.get("files", [])
|
|
2956
|
+
for item in files[:8]:
|
|
2957
|
+
lines.append(
|
|
2958
|
+
"- "
|
|
2959
|
+
f"{item.get('file')} -> component={item.get('component_id') or 'none'} "
|
|
2960
|
+
f"profile_affecting={_bool_text(item.get('profile_affecting'))}"
|
|
2961
|
+
)
|
|
2962
|
+
|
|
2963
|
+
workflows = data.get("suggested_workflows", [])
|
|
2964
|
+
lines.append("suggested_workflows:")
|
|
2965
|
+
if not workflows:
|
|
2966
|
+
lines.append("- none")
|
|
2967
|
+
for workflow in workflows[:12]:
|
|
2968
|
+
component_id = workflow.get("component_id")
|
|
2969
|
+
prefix = f"component={component_id} " if component_id else ""
|
|
2970
|
+
lines.append(f"- {prefix}{_format_workflow(workflow)}")
|
|
2971
|
+
if len(workflows) > 12:
|
|
2972
|
+
lines.append(f"- +{len(workflows) - 12} more")
|
|
2973
|
+
|
|
2974
|
+
|
|
2975
|
+
def _append_workflow_groups(lines: List[str], workflows: Sequence[Dict[str, Any]], indent: str = "", verbose: bool = False) -> None:
|
|
2976
|
+
if not workflows:
|
|
2977
|
+
return
|
|
2978
|
+
safe = [workflow for workflow in workflows if workflow.get("safe_auto")]
|
|
2979
|
+
review = [workflow for workflow in workflows if not workflow.get("safe_auto")]
|
|
2980
|
+
if safe:
|
|
2981
|
+
lines.append(f"{indent}workflows.safe_auto:")
|
|
2982
|
+
for workflow in safe:
|
|
2983
|
+
lines.append(f"{indent}- {_format_workflow(workflow, verbose=verbose)}")
|
|
2984
|
+
if review:
|
|
2985
|
+
lines.append(f"{indent}workflows.needs_review:")
|
|
2986
|
+
for workflow in review[:8]:
|
|
2987
|
+
lines.append(f"{indent}- {_format_workflow(workflow, verbose=verbose)}")
|
|
2988
|
+
if len(review) > 8:
|
|
2989
|
+
lines.append(f"{indent}- +{len(review) - 8} more")
|
|
2990
|
+
|
|
2991
|
+
|
|
2992
|
+
def _format_workflow(workflow: Dict[str, Any], verbose: bool = False) -> str:
|
|
2993
|
+
if not verbose:
|
|
2994
|
+
notes = []
|
|
2995
|
+
if workflow.get("candidate"):
|
|
2996
|
+
notes.append("candidate")
|
|
2997
|
+
if workflow.get("risk") and workflow.get("risk") != "low":
|
|
2998
|
+
notes.append(f"risk={workflow.get('risk')}")
|
|
2999
|
+
if workflow.get("confidence") and workflow.get("confidence") != "high":
|
|
3000
|
+
notes.append(f"conf={workflow.get('confidence')}")
|
|
3001
|
+
if workflow.get("ci_only"):
|
|
3002
|
+
notes.append("ci-only")
|
|
3003
|
+
suffix = f" [{' '.join(notes)}]" if notes else ""
|
|
3004
|
+
return f"{workflow.get('kind')}: cwd={workflow.get('cwd') or '?'} command={workflow.get('command') or 'none'}{suffix}"
|
|
3005
|
+
return (
|
|
3006
|
+
f"kind={workflow.get('kind')} "
|
|
3007
|
+
f"command={workflow.get('command') or 'none'} "
|
|
3008
|
+
f"cwd={workflow.get('cwd') or 'unknown'} "
|
|
3009
|
+
f"scope={workflow.get('scope')} "
|
|
3010
|
+
f"source={workflow.get('source')} "
|
|
3011
|
+
f"confidence={workflow.get('confidence')} "
|
|
3012
|
+
f"risk={workflow.get('risk')} "
|
|
3013
|
+
f"safe_auto={_bool_text(workflow.get('safe_auto'))} "
|
|
3014
|
+
f"candidate={_bool_text(workflow.get('candidate'))} "
|
|
3015
|
+
f"ci_only={_bool_text(workflow.get('ci_only'))} "
|
|
3016
|
+
f"evidence={_format_names(workflow.get('evidence', []))}"
|
|
3017
|
+
)
|
|
3018
|
+
|
|
3019
|
+
|
|
3020
|
+
def _format_fact_names(facts: Sequence[Dict[str, Any]], verbose: bool = False) -> str:
|
|
3021
|
+
if not facts:
|
|
3022
|
+
return "none"
|
|
3023
|
+
if verbose:
|
|
3024
|
+
return ", ".join(f"{fact.get('name')}({fact.get('confidence')})" for fact in facts)
|
|
3025
|
+
return ",".join(str(fact.get("name")) for fact in facts)
|
|
3026
|
+
|
|
3027
|
+
|
|
3028
|
+
def _format_package_managers(package_managers: Sequence[Dict[str, Any]], verbose: bool = False) -> str:
|
|
3029
|
+
if not package_managers:
|
|
3030
|
+
return "none"
|
|
3031
|
+
return ", ".join(_format_package_manager(item, verbose=verbose) for item in package_managers)
|
|
3032
|
+
|
|
3033
|
+
|
|
3034
|
+
def _format_package_manager(package_manager: Optional[Dict[str, Any]], verbose: bool = False) -> str:
|
|
3035
|
+
if not package_manager:
|
|
3036
|
+
return "none"
|
|
3037
|
+
if not verbose:
|
|
3038
|
+
return str(package_manager.get("name"))
|
|
3039
|
+
return f"{package_manager.get('name')}({package_manager.get('confidence')}; command={package_manager.get('command')})"
|
|
3040
|
+
|
|
3041
|
+
|
|
3042
|
+
def _format_names(values: Sequence[str]) -> str:
|
|
3043
|
+
return ",".join(values) if values else "none"
|
|
3044
|
+
|
|
3045
|
+
|
|
3046
|
+
def _preview_names(values: Sequence[str], limit: int = 5) -> str:
|
|
3047
|
+
if not values:
|
|
3048
|
+
return "none"
|
|
3049
|
+
preview = ", ".join(values[:limit])
|
|
3050
|
+
if len(values) > limit:
|
|
3051
|
+
preview += f", +{len(values) - limit} more"
|
|
3052
|
+
return preview
|
|
3053
|
+
|
|
3054
|
+
|
|
3055
|
+
def _append_list(lines: List[str], label: str, values: Sequence[str]) -> None:
|
|
3056
|
+
lines.append(f"{label}:")
|
|
3057
|
+
if not values:
|
|
3058
|
+
lines.append("- none")
|
|
3059
|
+
return
|
|
3060
|
+
for value in values:
|
|
3061
|
+
lines.append(f"- {value}")
|
|
3062
|
+
|
|
3063
|
+
|
|
3064
|
+
def _bool_text(value: Any) -> str:
|
|
3065
|
+
return "true" if value is True else "false" if value is False else "unknown"
|
|
3066
|
+
|
|
3067
|
+
|
|
3068
|
+
def _resolve_codex_skills_dir(skills_dir: str | os.PathLike[str] | None) -> Path:
|
|
3069
|
+
if skills_dir is not None:
|
|
3070
|
+
return Path(skills_dir).expanduser().resolve()
|
|
3071
|
+
codex_home = os.environ.get("CODEX_HOME")
|
|
3072
|
+
if codex_home:
|
|
3073
|
+
return (Path(codex_home).expanduser() / "skills").resolve()
|
|
3074
|
+
return (Path.home() / ".codex" / "skills").resolve()
|
|
3075
|
+
|
|
3076
|
+
|
|
3077
|
+
def _codex_skill_markdown() -> str:
|
|
3078
|
+
return """---
|
|
3079
|
+
name: code-workflow-probe
|
|
3080
|
+
description: Use code-workflow-probe to keep repo workflow facts aligned before exploring, after relevant edits, and before validation.
|
|
3081
|
+
---
|
|
3082
|
+
|
|
3083
|
+
# Code Workflow Probe
|
|
3084
|
+
|
|
3085
|
+
Use `code-workflow-probe` when working in a repository and you need current, evidence-backed workflow facts for install, test, lint, format, build, dev, components, package managers, CI, and affected files.
|
|
3086
|
+
|
|
3087
|
+
## Workflow
|
|
3088
|
+
|
|
3089
|
+
1. At task start, run:
|
|
3090
|
+
`code-workflow-probe sync --root <repo>`
|
|
3091
|
+
2. Prefer the default text output for quick agent context.
|
|
3092
|
+
3. Use JSON when you need structured data:
|
|
3093
|
+
`code-workflow-probe sync --root <repo> --format json`
|
|
3094
|
+
4. After editing files, notify the probe:
|
|
3095
|
+
`code-workflow-probe edit --root <repo> --changed <path> [<path>...]`
|
|
3096
|
+
5. To update via incremental sync after known edits, pass the changed files:
|
|
3097
|
+
`code-workflow-probe sync --root <repo> --changed <path> [<path>...]`
|
|
3098
|
+
6. For very large repos, if you know the changed file list is complete and a cache already exists, use path-only sync:
|
|
3099
|
+
`code-workflow-probe sync --root <repo> --changed <path> [<path>...] --paths-only`
|
|
3100
|
+
7. Use progress for long syncs:
|
|
3101
|
+
`code-workflow-probe sync --root <repo> --changed <path> [<path>...] --progress`
|
|
3102
|
+
8. If changed files are unknown or incomplete, force a complete scan:
|
|
3103
|
+
`code-workflow-probe sync --root <repo> --full`
|
|
3104
|
+
9. Before validation, map changes to components and workflows:
|
|
3105
|
+
`code-workflow-probe affected --root <repo> --changed <path> [<path>...]`
|
|
3106
|
+
10. Use status when you need a bounded AI context summary of tech stack, package managers, and workflow commands. If compact status is too sparse, use `--detail standard --depth <n> --limit <n>`:
|
|
3107
|
+
`code-workflow-probe status --root <repo>`
|
|
3108
|
+
|
|
3109
|
+
## Important Sync Rule
|
|
3110
|
+
|
|
3111
|
+
Strongly prefer running `code-workflow-probe sync --root <repo> --changed <path> [<path>...]` after editing project or workflow management files, including manifests, lockfiles, package-manager files, task-runner files, CI files, test/lint/format/build config, and monorepo/component boundary files.
|
|
3112
|
+
|
|
3113
|
+
Examples include `package.json`, lockfiles, `pyproject.toml`, `requirements*.txt`, `go.mod`, `Cargo.toml`, `pom.xml`, Gradle files, `Makefile`, `justfile`, `.github/workflows/*`, `.gitlab-ci.yml`, `pytest.ini`, `ruff.toml`, ESLint config, Prettier config, and similar workflow evidence files.
|
|
3114
|
+
|
|
3115
|
+
Use `--paths-only` only when the changed path list is complete. If you are unsure whether files were added, removed, renamed, generated, or edited outside your view, do not use `--paths-only`; run normal sync or `--full`.
|
|
3116
|
+
|
|
3117
|
+
## Safety Rules
|
|
3118
|
+
|
|
3119
|
+
- Do not use stale profile data. If `aligned` is false or unknown, sync first.
|
|
3120
|
+
- Only auto-run workflows that are `safe_auto=true`, local, high confidence, low risk, and have a known cwd.
|
|
3121
|
+
- Treat CI-only, candidate, inferred, medium/high risk, and low-confidence workflows as requiring review.
|
|
3122
|
+
- Do not turn CI commands into local commands without checking cwd and local evidence.
|
|
3123
|
+
- Do not invent missing workflows.
|
|
3124
|
+
"""
|
|
3125
|
+
|
|
3126
|
+
|
|
3127
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
3128
|
+
parser = argparse.ArgumentParser(prog="code-workflow-probe", description="Deterministic repo workflow profile syncer.")
|
|
3129
|
+
parser.add_argument("--root", default=".", help="Repository root. Defaults to current directory.")
|
|
3130
|
+
parser.add_argument("--cache", default=None, help=f"Cache path. Defaults to {DEFAULT_CACHE_NAME} under root.")
|
|
3131
|
+
parser.add_argument("--format", choices=("text", "json"), default="text", help="Output format. Defaults to text.")
|
|
3132
|
+
parser.add_argument("--compact", action="store_true", help="Emit compact JSON when --format json is used.")
|
|
3133
|
+
parser.add_argument("--verbose", action="store_true", help="Expand text output with full evidence details.")
|
|
3134
|
+
parser.add_argument("--progress", action="store_true", help="Print progress messages to stderr.")
|
|
3135
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
3136
|
+
|
|
3137
|
+
def add_common(subparser: argparse.ArgumentParser) -> None:
|
|
3138
|
+
subparser.add_argument("--root", default=argparse.SUPPRESS, help="Repository root. Defaults to current directory.")
|
|
3139
|
+
subparser.add_argument("--cache", default=argparse.SUPPRESS, help=f"Cache path. Defaults to {DEFAULT_CACHE_NAME} under root.")
|
|
3140
|
+
subparser.add_argument("--format", choices=("text", "json"), default=argparse.SUPPRESS, help="Output format. Defaults to text.")
|
|
3141
|
+
subparser.add_argument("--compact", action="store_true", default=argparse.SUPPRESS, help="Emit compact JSON when --format json is used.")
|
|
3142
|
+
subparser.add_argument("--verbose", action="store_true", default=argparse.SUPPRESS, help="Expand text output with full evidence details.")
|
|
3143
|
+
subparser.add_argument("--progress", action="store_true", default=argparse.SUPPRESS, help="Print progress messages to stderr.")
|
|
3144
|
+
|
|
3145
|
+
sync_parser = subparsers.add_parser("sync", help="Build and cache an aligned profile.")
|
|
3146
|
+
add_common(sync_parser)
|
|
3147
|
+
sync_parser.add_argument("--changed", nargs="*", default=[], help="Changed files to include in output context.")
|
|
3148
|
+
sync_parser.add_argument("--no-write", action="store_true", help="Do not write cache.")
|
|
3149
|
+
sync_parser.add_argument("--full", action="store_true", help="Force a full repo scan instead of incremental cache reuse.")
|
|
3150
|
+
sync_parser.add_argument("--paths-only", action="store_true", help="Sync only from explicit changed paths plus existing cache; never discover the whole repo.")
|
|
3151
|
+
|
|
3152
|
+
status_parser = subparsers.add_parser("status", help="Check whether cached profile is aligned.")
|
|
3153
|
+
add_common(status_parser)
|
|
3154
|
+
status_parser.add_argument("--detail", choices=("compact", "standard", "full"), default="compact", help="Text detail level for status output.")
|
|
3155
|
+
status_parser.add_argument("--limit", type=int, default=DEFAULT_STATUS_LIMIT, help="Preview limit for compact and standard status output.")
|
|
3156
|
+
status_parser.add_argument("--depth", type=int, default=DEFAULT_STATUS_DEPTH, help="Directory depth for standard status component previews.")
|
|
3157
|
+
|
|
3158
|
+
edit_parser = subparsers.add_parser("edit", help="Notify changed files and update profile if needed.")
|
|
3159
|
+
add_common(edit_parser)
|
|
3160
|
+
edit_parser.add_argument("--changed", nargs="+", required=True, help="Changed files.")
|
|
3161
|
+
|
|
3162
|
+
affected_parser = subparsers.add_parser("affected", help="Map changed files to components and workflows.")
|
|
3163
|
+
add_common(affected_parser)
|
|
3164
|
+
affected_parser.add_argument("--changed", nargs="+", required=True, help="Changed files.")
|
|
3165
|
+
|
|
3166
|
+
skill_parser = subparsers.add_parser("install-skill", help="Install a Codex skill for code-workflow-probe.")
|
|
3167
|
+
add_common(skill_parser)
|
|
3168
|
+
skill_parser.add_argument("--tool", choices=("codex",), default="codex", help="Target AI coding tool. Only codex is supported.")
|
|
3169
|
+
skill_parser.add_argument("--skills-dir", default=None, help="Codex skills directory. Defaults to $CODEX_HOME/skills or ~/.codex/skills.")
|
|
3170
|
+
skill_parser.add_argument("--dry-run", action="store_true", help="Preview the target path and skill content without writing files.")
|
|
3171
|
+
skill_parser.add_argument("--no-overwrite", action="store_true", help="Do not overwrite an existing skill file.")
|
|
3172
|
+
|
|
3173
|
+
return parser
|
|
3174
|
+
|
|
3175
|
+
|
|
3176
|
+
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
3177
|
+
parser = _build_parser()
|
|
3178
|
+
args = parser.parse_args(argv)
|
|
3179
|
+
progress = _stderr_progress if args.progress else None
|
|
3180
|
+
|
|
3181
|
+
if args.command == "sync":
|
|
3182
|
+
output = sync(
|
|
3183
|
+
args.root,
|
|
3184
|
+
args.cache,
|
|
3185
|
+
changed_files=args.changed,
|
|
3186
|
+
write=not args.no_write,
|
|
3187
|
+
format=args.format,
|
|
3188
|
+
verbose=args.verbose,
|
|
3189
|
+
incremental=not args.full,
|
|
3190
|
+
paths_only=args.paths_only,
|
|
3191
|
+
progress=progress,
|
|
3192
|
+
)
|
|
3193
|
+
elif args.command == "status":
|
|
3194
|
+
output = status(args.root, args.cache, format=args.format, verbose=args.verbose, detail=args.detail, limit=args.limit, depth=args.depth)
|
|
3195
|
+
elif args.command == "edit":
|
|
3196
|
+
output = edit(args.root, args.changed, args.cache, format=args.format, verbose=args.verbose)
|
|
3197
|
+
elif args.command == "affected":
|
|
3198
|
+
output = affected(args.root, args.changed, args.cache, format=args.format, verbose=args.verbose)
|
|
3199
|
+
elif args.command == "install-skill":
|
|
3200
|
+
output = install_skill(
|
|
3201
|
+
tool=args.tool,
|
|
3202
|
+
skills_dir=args.skills_dir,
|
|
3203
|
+
dry_run=args.dry_run,
|
|
3204
|
+
overwrite=not args.no_overwrite,
|
|
3205
|
+
format=args.format,
|
|
3206
|
+
verbose=args.verbose,
|
|
3207
|
+
)
|
|
3208
|
+
else: # pragma: no cover - argparse prevents this.
|
|
3209
|
+
parser.error(f"unknown command: {args.command}")
|
|
3210
|
+
|
|
3211
|
+
if args.format == "json":
|
|
3212
|
+
json.dump(output, sys.stdout, separators=(",", ":") if args.compact else None, indent=None if args.compact else 2, sort_keys=True)
|
|
3213
|
+
sys.stdout.write("\n")
|
|
3214
|
+
else:
|
|
3215
|
+
sys.stdout.write(str(output))
|
|
3216
|
+
sys.stdout.write("\n")
|
|
3217
|
+
return 0
|
|
3218
|
+
|
|
3219
|
+
|
|
3220
|
+
if __name__ == "__main__":
|
|
3221
|
+
raise SystemExit(main())
|