code-workflow-probe 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
code_workflow_probe.py ADDED
@@ -0,0 +1,3221 @@
1
+ #!/usr/bin/env python3
2
+ """code-workflow-probe: deterministic repo workflow profile syncer.
3
+
4
+ API:
5
+ sync(root=".", cache_path=None, changed_files=None, write=True, format="text", verbose=False, incremental=True, paths_only=False, progress=None)
6
+ sync_async(root=".", cache_path=None, changed_files=None, write=True, format="text", verbose=False, incremental=True, paths_only=False, progress=None, executor=None)
7
+ status(root=".", cache_path=None, format="text", verbose=False, detail="compact", limit=8, depth=2)
8
+ edit(root=".", changed_files=None, cache_path=None, format="text", verbose=False)
9
+ affected(root=".", changed_files=None, cache_path=None, format="text", verbose=False)
10
+ install_skill(tool="codex", skills_dir=None, dry_run=False, overwrite=True, format="text", verbose=False)
11
+
12
+ CLI:
13
+ python code_workflow_probe.py sync --root .
14
+ python code_workflow_probe.py status --root .
15
+ python code_workflow_probe.py edit --changed path/to/file
16
+ python code_workflow_probe.py affected --changed path/to/file
17
+ python code_workflow_probe.py install-skill
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import copy
24
+ import fnmatch
25
+ import hashlib
26
+ import json
27
+ import os
28
+ import re
29
+ import subprocess
30
+ import sys
31
+ from concurrent.futures import Executor, Future, ThreadPoolExecutor
32
+ from collections import defaultdict
33
+ from datetime import datetime, timezone
34
+ from pathlib import Path
35
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
36
+
37
+ try:
38
+ import tomllib
39
+ except ModuleNotFoundError: # pragma: no cover - Python < 3.11 fallback.
40
+ tomllib = None # type: ignore[assignment]
41
+
42
+
43
+ VERSION = "0.1.4"
44
+ SCHEMA_VERSION = 1
45
+ DEFAULT_CACHE_NAME = ".code-workflow-probe.json"
46
+ SKILL_NAME = "code-workflow-probe"
47
+ DEFAULT_STATUS_LIMIT = 8
48
+ DEFAULT_STATUS_DEPTH = 2
49
+ STATUS_DETAILS = {"compact", "standard", "full"}
50
+
51
+ WORKFLOW_KINDS = ("install", "test", "lint", "format", "build", "dev")
52
+ STATUS_WORKFLOW_KIND_ORDER = ("test", "lint", "format", "build", "install", "dev")
53
+
54
+ IGNORED_DIRS = {
55
+ ".git",
56
+ ".hg",
57
+ ".svn",
58
+ ".idea",
59
+ ".vscode",
60
+ "__pycache__",
61
+ ".pytest_cache",
62
+ ".mypy_cache",
63
+ ".ruff_cache",
64
+ ".tox",
65
+ ".nox",
66
+ ".venv",
67
+ "venv",
68
+ "env",
69
+ "node_modules",
70
+ "dist",
71
+ "build",
72
+ "target",
73
+ "vendor",
74
+ ".gradle",
75
+ ".next",
76
+ ".turbo",
77
+ }
78
+
79
+ COMPONENT_MANIFESTS = {
80
+ "package.json",
81
+ "pyproject.toml",
82
+ "requirements.txt",
83
+ "requirements-dev.txt",
84
+ "setup.py",
85
+ "setup.cfg",
86
+ "Pipfile",
87
+ "go.mod",
88
+ "Cargo.toml",
89
+ "pom.xml",
90
+ "build.gradle",
91
+ "build.gradle.kts",
92
+ "Gemfile",
93
+ "composer.json",
94
+ "deno.json",
95
+ "deno.jsonc",
96
+ "Package.swift",
97
+ }
98
+
99
+ DOTNET_PROJECT_EXTENSIONS = {".csproj", ".fsproj", ".vbproj"}
100
+ DOTNET_SOLUTION_EXTENSIONS = {".sln", ".slnx"}
101
+ ADJACENT_PROFILE_FILE_NAMES = {
102
+ "package-lock.json",
103
+ "npm-shrinkwrap.json",
104
+ "yarn.lock",
105
+ "pnpm-lock.yaml",
106
+ "bun.lock",
107
+ "bun.lockb",
108
+ "uv.lock",
109
+ "poetry.lock",
110
+ "pdm.lock",
111
+ "Pipfile.lock",
112
+ "go.sum",
113
+ "Cargo.lock",
114
+ "Gemfile.lock",
115
+ ".ruby-version",
116
+ "Rakefile",
117
+ ".rubocop.yml",
118
+ ".rubocop_todo.yml",
119
+ "composer.lock",
120
+ "phpunit.xml",
121
+ "phpunit.xml.dist",
122
+ "phpstan.neon",
123
+ "phpstan.neon.dist",
124
+ "phpcs.xml",
125
+ "phpcs.xml.dist",
126
+ ".php-cs-fixer.php",
127
+ ".php-cs-fixer.dist.php",
128
+ "pint.json",
129
+ "Package.resolved",
130
+ ".swiftformat",
131
+ ".swiftlint.yml",
132
+ "global.json",
133
+ "NuGet.config",
134
+ "Directory.Build.props",
135
+ "Directory.Build.targets",
136
+ }
137
+
138
+ PROFILE_FILE_NAMES = COMPONENT_MANIFESTS | {
139
+ ".gitignore",
140
+ *ADJACENT_PROFILE_FILE_NAMES,
141
+ "gradlew",
142
+ "gradlew.bat",
143
+ "Makefile",
144
+ "makefile",
145
+ "justfile",
146
+ "Justfile",
147
+ "Taskfile.yml",
148
+ "Taskfile.yaml",
149
+ "tsconfig.json",
150
+ "tsconfig.build.json",
151
+ "jsconfig.json",
152
+ "angular.json",
153
+ "eslint.config.js",
154
+ "eslint.config.mjs",
155
+ "eslint.config.cjs",
156
+ "eslint.config.ts",
157
+ ".eslintrc",
158
+ ".eslintrc.js",
159
+ ".eslintrc.cjs",
160
+ ".eslintrc.json",
161
+ ".eslintrc.yml",
162
+ ".eslintrc.yaml",
163
+ "prettier.config.js",
164
+ "prettier.config.mjs",
165
+ "prettier.config.cjs",
166
+ "prettier.config.ts",
167
+ ".prettierrc",
168
+ ".prettierrc.json",
169
+ ".prettierrc.yml",
170
+ ".prettierrc.yaml",
171
+ ".prettierrc.js",
172
+ "vite.config.js",
173
+ "vite.config.mjs",
174
+ "vite.config.ts",
175
+ "next.config.js",
176
+ "next.config.mjs",
177
+ "next.config.ts",
178
+ "svelte.config.js",
179
+ "nuxt.config.js",
180
+ "nuxt.config.ts",
181
+ "tox.ini",
182
+ "noxfile.py",
183
+ "pytest.ini",
184
+ "ruff.toml",
185
+ ".ruff.toml",
186
+ ".flake8",
187
+ ".pylintrc",
188
+ "mypy.ini",
189
+ ".pre-commit-config.yaml",
190
+ ".pre-commit-config.yml",
191
+ ".golangci.yml",
192
+ ".golangci.yaml",
193
+ "rustfmt.toml",
194
+ ".rustfmt.toml",
195
+ ".gitlab-ci.yml",
196
+ ".gitlab-ci.yaml",
197
+ "Jenkinsfile",
198
+ }
199
+
200
+ SOURCE_EXTENSIONS = {
201
+ ".py": "python",
202
+ ".pyi": "python",
203
+ ".js": "javascript",
204
+ ".jsx": "javascript",
205
+ ".mjs": "javascript",
206
+ ".cjs": "javascript",
207
+ ".ts": "typescript",
208
+ ".tsx": "typescript",
209
+ ".go": "go",
210
+ ".rs": "rust",
211
+ ".java": "java",
212
+ ".kt": "kotlin",
213
+ ".kts": "kotlin",
214
+ ".rb": "ruby",
215
+ ".php": "php",
216
+ ".cs": "csharp",
217
+ ".fs": "fsharp",
218
+ ".vb": "visualbasic",
219
+ ".swift": "swift",
220
+ ".scala": "scala",
221
+ ".clj": "clojure",
222
+ ".ex": "elixir",
223
+ ".exs": "elixir",
224
+ ".erl": "erlang",
225
+ ".hrl": "erlang",
226
+ ".c": "c",
227
+ ".h": "c",
228
+ ".cc": "cpp",
229
+ ".cpp": "cpp",
230
+ ".cxx": "cpp",
231
+ ".hpp": "cpp",
232
+ ".hxx": "cpp",
233
+ }
234
+
235
+ DANGEROUS_WORDS = {
236
+ "clean",
237
+ "deploy",
238
+ "destroy",
239
+ "drop",
240
+ "migrate",
241
+ "publish",
242
+ "release",
243
+ "reset",
244
+ "rollback",
245
+ "terraform apply",
246
+ "kubectl delete",
247
+ "docker push",
248
+ "npm publish",
249
+ "rm -rf",
250
+ }
251
+
252
+
253
+ def sync(
254
+ root: str | os.PathLike[str] = ".",
255
+ cache_path: str | os.PathLike[str] | None = None,
256
+ changed_files: Optional[Sequence[str]] = None,
257
+ write: bool = True,
258
+ format: str = "text",
259
+ verbose: bool = False,
260
+ incremental: bool = True,
261
+ paths_only: bool = False,
262
+ progress: Optional[Callable[[str], None]] = None,
263
+ ) -> Dict[str, Any] | str:
264
+ """Build an aligned workflow profile and optionally write it to cache."""
265
+
266
+ root_path = _resolve_root(root)
267
+ cache = _resolve_cache_path(root_path, cache_path)
268
+ normalized = _normalize_changed_files(root_path, changed_files or [])
269
+ _emit_progress(progress, "sync: start")
270
+
271
+ if incremental:
272
+ _emit_progress(progress, "sync: check cache")
273
+ cached = _load_json(cache)
274
+ reused = _try_incremental_sync(root_path, cache, cached, normalized)
275
+ if reused is not None:
276
+ _emit_progress(progress, "sync: reused cached profile")
277
+ if write:
278
+ _write_json(cache, reused)
279
+ _emit_progress(progress, "sync: wrote cache")
280
+ _emit_progress(progress, "sync: done")
281
+ return _format_result(reused, format, verbose=verbose)
282
+
283
+ if paths_only:
284
+ _emit_progress(progress, "sync: paths-only")
285
+ profile = _sync_paths_only(root_path, cache, _load_json(cache), normalized)
286
+ if write and profile.get("project") is not None:
287
+ _write_json(cache, profile)
288
+ _emit_progress(progress, "sync: wrote cache")
289
+ _emit_progress(progress, "sync: done")
290
+ return _format_result(profile, format, verbose=verbose)
291
+
292
+ _emit_progress(progress, "sync: scan repo")
293
+ builder = _ProfileBuilder(root_path, cache)
294
+ profile = builder.build(changed_files=normalized)
295
+ if write:
296
+ _write_json(cache, profile)
297
+ _emit_progress(progress, "sync: wrote cache")
298
+ _emit_progress(progress, "sync: done")
299
+ return _format_result(profile, format, verbose=verbose)
300
+
301
+
302
+ def sync_async(
303
+ root: str | os.PathLike[str] = ".",
304
+ cache_path: str | os.PathLike[str] | None = None,
305
+ changed_files: Optional[Sequence[str]] = None,
306
+ write: bool = True,
307
+ format: str = "text",
308
+ verbose: bool = False,
309
+ incremental: bool = True,
310
+ paths_only: bool = False,
311
+ progress: Optional[Callable[[str], None]] = None,
312
+ executor: Optional[Executor] = None,
313
+ ) -> Future:
314
+ """Run sync in a background thread and return a Future."""
315
+
316
+ kwargs = {
317
+ "root": root,
318
+ "cache_path": cache_path,
319
+ "changed_files": changed_files,
320
+ "write": write,
321
+ "format": format,
322
+ "verbose": verbose,
323
+ "incremental": incremental,
324
+ "paths_only": paths_only,
325
+ "progress": progress,
326
+ }
327
+ if executor is not None:
328
+ return executor.submit(sync, **kwargs)
329
+
330
+ local_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="code-workflow-probe-sync")
331
+ future = local_executor.submit(sync, **kwargs)
332
+ future.add_done_callback(lambda _: local_executor.shutdown(wait=False))
333
+ return future
334
+
335
+
336
+ def status(
337
+ root: str | os.PathLike[str] = ".",
338
+ cache_path: str | os.PathLike[str] | None = None,
339
+ format: str = "text",
340
+ verbose: bool = False,
341
+ detail: str = "compact",
342
+ limit: int = DEFAULT_STATUS_LIMIT,
343
+ depth: int = DEFAULT_STATUS_DEPTH,
344
+ ) -> Dict[str, Any] | str:
345
+ """Return whether the cached profile is aligned with current repo files."""
346
+
347
+ root_path = _resolve_root(root)
348
+ cache = _resolve_cache_path(root_path, cache_path)
349
+ cached = _load_json(cache)
350
+ checked_at = _utc_now()
351
+ status_detail = _normalize_status_detail(detail, verbose)
352
+ status_limit = _normalize_limit(limit)
353
+ status_depth = _normalize_depth(depth)
354
+
355
+ if cached is None:
356
+ return _format_result({
357
+ "operation": "status",
358
+ "tool": "code-workflow-probe",
359
+ "schema_version": SCHEMA_VERSION,
360
+ "root": str(root_path),
361
+ "cache_path": str(cache),
362
+ "alignment": {
363
+ "aligned": False,
364
+ "reason": "cache_missing",
365
+ "checked_at": checked_at,
366
+ "stale_files": [],
367
+ "new_profile_files": [],
368
+ "removed_profile_files": [],
369
+ },
370
+ "profile": None,
371
+ "warnings": ["Run sync before using workflow conclusions."],
372
+ }, format, verbose=verbose, status_detail=status_detail, limit=status_limit, depth=status_depth)
373
+
374
+ stale = _compare_watch_state(root_path, cache, cached.get("watch", {}))
375
+ aligned = not stale["stale_files"] and not stale["new_profile_files"] and not stale["removed_profile_files"] and not stale["source_summary_changed"]
376
+ reason = "aligned" if aligned else "cache_stale"
377
+ warnings = [] if aligned else ["Cached profile is not aligned; run sync before using workflow conclusions."]
378
+
379
+ cached["alignment"] = {
380
+ "aligned": aligned,
381
+ "reason": reason,
382
+ "checked_at": checked_at,
383
+ "stale_files": stale["stale_files"],
384
+ "new_profile_files": stale["new_profile_files"],
385
+ "removed_profile_files": stale["removed_profile_files"],
386
+ "source_summary_changed": stale["source_summary_changed"],
387
+ }
388
+
389
+ return _format_result({
390
+ "operation": "status",
391
+ "tool": "code-workflow-probe",
392
+ "schema_version": SCHEMA_VERSION,
393
+ "root": str(root_path),
394
+ "cache_path": str(cache),
395
+ "alignment": cached["alignment"],
396
+ "profile": cached if aligned else None,
397
+ "warnings": warnings,
398
+ }, format, verbose=verbose, status_detail=status_detail, limit=status_limit, depth=status_depth)
399
+
400
+
401
+ def edit(
402
+ root: str | os.PathLike[str] = ".",
403
+ changed_files: Optional[Sequence[str]] = None,
404
+ cache_path: str | os.PathLike[str] | None = None,
405
+ format: str = "text",
406
+ verbose: bool = False,
407
+ ) -> Dict[str, Any] | str:
408
+ """Edit hook: update profile when changed files invalidate it."""
409
+
410
+ root_path = _resolve_root(root)
411
+ cache = _resolve_cache_path(root_path, cache_path)
412
+ normalized = _normalize_changed_files(root_path, changed_files or [])
413
+ cached = _load_json(cache)
414
+ reused = _try_incremental_sync(root_path, cache, cached, normalized)
415
+ if reused is not None:
416
+ affected_result = _affected_from_profile(root_path, reused, normalized)
417
+ return _format_result({
418
+ "operation": "edit",
419
+ "tool": "code-workflow-probe",
420
+ "schema_version": SCHEMA_VERSION,
421
+ "root": str(root_path),
422
+ "cache_path": str(cache),
423
+ "changed_files": normalized,
424
+ "profile_updated": False,
425
+ "alignment": reused["alignment"],
426
+ "affected": affected_result["affected"],
427
+ "suggested_workflows": affected_result["suggested_workflows"],
428
+ "profile": reused,
429
+ "warnings": affected_result["warnings"],
430
+ }, format, verbose=verbose)
431
+
432
+ current_status = status(root_path, cache, format="json")
433
+ profile_updated = False
434
+
435
+ if not current_status["alignment"]["aligned"]:
436
+ profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
437
+ profile_updated = True
438
+ elif any(_changed_file_affects_profile(path, current_status["profile"]) for path in normalized):
439
+ profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
440
+ profile_updated = True
441
+ else:
442
+ profile = current_status["profile"]
443
+
444
+ affected_result = _affected_from_profile(root_path, profile, normalized)
445
+ return _format_result({
446
+ "operation": "edit",
447
+ "tool": "code-workflow-probe",
448
+ "schema_version": SCHEMA_VERSION,
449
+ "root": str(root_path),
450
+ "cache_path": str(cache),
451
+ "changed_files": normalized,
452
+ "profile_updated": profile_updated,
453
+ "alignment": profile["alignment"],
454
+ "affected": affected_result["affected"],
455
+ "suggested_workflows": affected_result["suggested_workflows"],
456
+ "profile": profile,
457
+ "warnings": affected_result["warnings"],
458
+ }, format, verbose=verbose)
459
+
460
+
461
+ def affected(
462
+ root: str | os.PathLike[str] = ".",
463
+ changed_files: Optional[Sequence[str]] = None,
464
+ cache_path: str | os.PathLike[str] | None = None,
465
+ format: str = "text",
466
+ verbose: bool = False,
467
+ ) -> Dict[str, Any] | str:
468
+ """Map changed files to components and relevant local workflows."""
469
+
470
+ root_path = _resolve_root(root)
471
+ cache = _resolve_cache_path(root_path, cache_path)
472
+ normalized = _normalize_changed_files(root_path, changed_files or [])
473
+ cached = _load_json(cache)
474
+ reused = _try_incremental_sync(root_path, cache, cached, normalized)
475
+ if reused is not None:
476
+ result = _affected_from_profile(root_path, reused, normalized)
477
+ return _format_result({
478
+ "operation": "affected",
479
+ "tool": "code-workflow-probe",
480
+ "schema_version": SCHEMA_VERSION,
481
+ "root": str(root_path),
482
+ "cache_path": str(cache),
483
+ "changed_files": normalized,
484
+ "alignment": reused["alignment"],
485
+ "affected": result["affected"],
486
+ "suggested_workflows": result["suggested_workflows"],
487
+ "warnings": result["warnings"],
488
+ }, format, verbose=verbose)
489
+
490
+ current_status = status(root_path, cache, format="json")
491
+
492
+ if current_status["alignment"]["aligned"]:
493
+ profile = current_status["profile"]
494
+ else:
495
+ profile = sync(root_path, cache, changed_files=normalized, write=True, format="json")
496
+
497
+ result = _affected_from_profile(root_path, profile, normalized)
498
+ return _format_result({
499
+ "operation": "affected",
500
+ "tool": "code-workflow-probe",
501
+ "schema_version": SCHEMA_VERSION,
502
+ "root": str(root_path),
503
+ "cache_path": str(cache),
504
+ "changed_files": normalized,
505
+ "alignment": profile["alignment"],
506
+ "affected": result["affected"],
507
+ "suggested_workflows": result["suggested_workflows"],
508
+ "warnings": result["warnings"],
509
+ }, format, verbose=verbose)
510
+
511
+
512
+ def install_skill(
513
+ tool: str = "codex",
514
+ skills_dir: str | os.PathLike[str] | None = None,
515
+ dry_run: bool = False,
516
+ overwrite: bool = True,
517
+ format: str = "text",
518
+ verbose: bool = False,
519
+ ) -> Dict[str, Any] | str:
520
+ """Install a Codex skill that teaches agents to use code-workflow-probe."""
521
+
522
+ if tool != "codex":
523
+ raise ValueError("install_skill currently supports only tool='codex'")
524
+
525
+ base_dir = _resolve_codex_skills_dir(skills_dir)
526
+ skill_dir = base_dir / SKILL_NAME
527
+ skill_path = skill_dir / "SKILL.md"
528
+ content = _codex_skill_markdown()
529
+ exists = skill_path.exists()
530
+
531
+ warnings = []
532
+ installed = False
533
+ if exists and not overwrite:
534
+ warnings.append("Skill already exists and overwrite is disabled.")
535
+ elif not dry_run:
536
+ skill_dir.mkdir(parents=True, exist_ok=True)
537
+ skill_path.write_text(content, encoding="utf-8")
538
+ installed = True
539
+
540
+ result = {
541
+ "operation": "install-skill",
542
+ "tool": "code-workflow-probe",
543
+ "schema_version": SCHEMA_VERSION,
544
+ "target": "codex",
545
+ "skill_name": SKILL_NAME,
546
+ "skills_dir": str(base_dir),
547
+ "skill_path": str(skill_path),
548
+ "installed": installed,
549
+ "dry_run": dry_run,
550
+ "overwritten": installed and exists,
551
+ "content": content if dry_run else None,
552
+ "warnings": warnings,
553
+ }
554
+ return _format_result(result, format, verbose=verbose)
555
+
556
+
557
+ class _EvidenceStore:
558
+ def __init__(self, root: Path) -> None:
559
+ self.root = root
560
+ self._items: Dict[str, Dict[str, Any]] = {}
561
+
562
+ def add(self, rel_path: str, role: str) -> str:
563
+ rel = _clean_rel(rel_path)
564
+ if not rel:
565
+ return "."
566
+ path = self.root / rel
567
+ item = self._items.get(rel)
568
+ if item is None:
569
+ item = _fingerprint(path)
570
+ item["path"] = rel
571
+ item["roles"] = []
572
+ self._items[rel] = item
573
+ if role not in item["roles"]:
574
+ item["roles"].append(role)
575
+ item["roles"].sort()
576
+ return rel
577
+
578
+ def add_many(self, rel_paths: Iterable[str], role: str) -> List[str]:
579
+ return [self.add(path, role) for path in rel_paths]
580
+
581
+ def as_dict(self) -> Dict[str, Dict[str, Any]]:
582
+ return {path: dict(value) for path, value in sorted(self._items.items())}
583
+
584
+
585
+ class _ProfileBuilder:
586
+ def __init__(
587
+ self,
588
+ root: Path,
589
+ cache_path: Path,
590
+ profile_files: Optional[Sequence[str]] = None,
591
+ allow_source_scan: bool = True,
592
+ ) -> None:
593
+ self.root = root
594
+ self.cache_path = cache_path
595
+ self.profile_files = list(profile_files) if profile_files is not None else None
596
+ self.profile_file_set: Set[str] = set(self.profile_files or [])
597
+ self.allow_source_scan = allow_source_scan
598
+ self.ignore = _GitIgnore(root)
599
+ self.evidence = _EvidenceStore(root)
600
+ self.warnings: List[str] = []
601
+
602
+ def build(self, changed_files: Optional[Sequence[str]] = None) -> Dict[str, Any]:
603
+ profile_files = self.profile_files if self.profile_files is not None else _discover_profile_files(self.root, self.cache_path)
604
+ self.profile_files = list(profile_files)
605
+ self.profile_file_set = set(profile_files)
606
+ self.evidence.add_many(profile_files, "profile_watch")
607
+ source_summary = _empty_source_summary()
608
+ component_roots = self._component_roots(profile_files, source_summary)
609
+ if not component_roots and self.allow_source_scan:
610
+ source_summary = _source_summary(self.root)
611
+ component_roots = self._component_roots(profile_files, source_summary)
612
+ components = [self._build_component(path, component_roots) for path in component_roots]
613
+ if not source_summary["languages"]:
614
+ source_summary = _component_language_summary(components)
615
+ repo_workflows = self._repo_workflows(components)
616
+ ci_workflows = self._ci_workflows(profile_files)
617
+ technologies = _merge_facts(component.get("languages", []) + component.get("frameworks", []) for component in components)
618
+ package_managers = _merge_facts(
619
+ [component["package_manager"]] for component in components if component.get("package_manager")
620
+ )
621
+ project_type = _project_type(components)
622
+ watch_files = {
623
+ path: _fingerprint_with_rel(self.root, path)
624
+ for path in sorted(set(profile_files) | set(self.evidence.as_dict().keys()))
625
+ if path != _rel_to_root(self.root, self.cache_path)
626
+ }
627
+
628
+ profile = {
629
+ "schema_version": SCHEMA_VERSION,
630
+ "tool": "code-workflow-probe",
631
+ "version": VERSION,
632
+ "root": str(self.root),
633
+ "cache_path": str(self.cache_path),
634
+ "generated_at": _utc_now(),
635
+ "alignment": {
636
+ "aligned": True,
637
+ "reason": "synced",
638
+ "checked_at": _utc_now(),
639
+ "stale_files": [],
640
+ "new_profile_files": [],
641
+ "removed_profile_files": [],
642
+ "source_summary_changed": False,
643
+ },
644
+ "project": {
645
+ "type": project_type,
646
+ "components": components,
647
+ "technologies": technologies,
648
+ "package_managers": package_managers,
649
+ "repo_workflows": repo_workflows,
650
+ "ci_workflows": ci_workflows,
651
+ },
652
+ "evidence_files": self.evidence.as_dict(),
653
+ "watch": {
654
+ "files": watch_files,
655
+ "source_summary": source_summary,
656
+ },
657
+ "changed_files": _normalize_changed_files(self.root, changed_files or []),
658
+ "warnings": self.warnings,
659
+ }
660
+ return profile
661
+
662
+ def _component_roots(self, profile_files: Sequence[str], source_summary: Dict[str, Any]) -> List[str]:
663
+ roots: Set[str] = set()
664
+ for rel in profile_files:
665
+ if _is_component_manifest(rel):
666
+ roots.add(_dirname_rel(rel))
667
+
668
+ if not roots and source_summary["languages"]:
669
+ roots.add(".")
670
+ for sample in source_summary.get("samples", []):
671
+ self.evidence.add(sample, "source_language_sample")
672
+
673
+ if not roots and any(Path(path).name in {"Makefile", "makefile", "justfile", "Justfile"} for path in profile_files):
674
+ roots.add(".")
675
+
676
+ return sorted(roots, key=lambda item: (item.count("/"), item))
677
+
678
+ def _build_component(self, path: str, all_roots: Sequence[str]) -> Dict[str, Any]:
679
+ component_dir = self.root if path == "." else self.root / path
680
+ evidence: List[str] = []
681
+ languages: List[Dict[str, Any]] = []
682
+ frameworks: List[Dict[str, Any]] = []
683
+ workflows: List[Dict[str, Any]] = []
684
+ package_manager: Optional[Dict[str, Any]] = None
685
+
686
+ manifests = self._existing_names(path, COMPONENT_MANIFESTS)
687
+ for name in manifests:
688
+ evidence.append(self.evidence.add(_join_rel(path, name), "component_manifest"))
689
+
690
+ scope = _component_scope(path, all_roots)
691
+
692
+ if self._has_file(path, "package.json"):
693
+ js = self._javascript_component(path, scope, all_roots)
694
+ languages.extend(js["languages"])
695
+ frameworks.extend(js["frameworks"])
696
+ package_manager = js["package_manager"]
697
+ workflows.extend(js["workflows"])
698
+
699
+ if self._has_any(path, {"pyproject.toml", "requirements.txt", "requirements-dev.txt", "setup.py", "setup.cfg", "Pipfile"}):
700
+ py = self._python_component(path, scope)
701
+ languages.extend(py["languages"])
702
+ frameworks.extend(py["frameworks"])
703
+ package_manager = package_manager or py["package_manager"]
704
+ workflows.extend(py["workflows"])
705
+
706
+ if self._has_file(path, "go.mod"):
707
+ go = self._go_component(path, scope)
708
+ languages.extend(go["languages"])
709
+ package_manager = package_manager or go["package_manager"]
710
+ workflows.extend(go["workflows"])
711
+
712
+ if self._has_file(path, "Cargo.toml"):
713
+ rust = self._rust_component(path, scope)
714
+ languages.extend(rust["languages"])
715
+ package_manager = package_manager or rust["package_manager"]
716
+ workflows.extend(rust["workflows"])
717
+
718
+ if self._has_any(path, {"pom.xml", "build.gradle", "build.gradle.kts"}):
719
+ java = self._java_component(path, scope)
720
+ languages.extend(java["languages"])
721
+ package_manager = package_manager or java["package_manager"]
722
+ workflows.extend(java["workflows"])
723
+
724
+ if self._has_file(path, "Gemfile"):
725
+ ruby = self._ruby_component(path, scope)
726
+ languages.extend(ruby["languages"])
727
+ frameworks.extend(ruby["frameworks"])
728
+ package_manager = package_manager or ruby["package_manager"]
729
+ workflows.extend(ruby["workflows"])
730
+
731
+ if self._has_file(path, "composer.json"):
732
+ php = self._php_component(path, scope)
733
+ languages.extend(php["languages"])
734
+ frameworks.extend(php["frameworks"])
735
+ package_manager = package_manager or php["package_manager"]
736
+ workflows.extend(php["workflows"])
737
+
738
+ if self._has_any(path, {"deno.json", "deno.jsonc"}):
739
+ deno = self._deno_component(path, scope, all_roots)
740
+ languages.extend(deno["languages"])
741
+ frameworks.extend(deno["frameworks"])
742
+ package_manager = package_manager or deno["package_manager"]
743
+ workflows.extend(deno["workflows"])
744
+
745
+ if self._has_file(path, "Package.swift"):
746
+ swift = self._swift_component(path, scope)
747
+ languages.extend(swift["languages"])
748
+ package_manager = package_manager or swift["package_manager"]
749
+ workflows.extend(swift["workflows"])
750
+
751
+ dotnet_manifests = self._dotnet_manifest_files(path)
752
+ if dotnet_manifests:
753
+ dotnet = self._dotnet_component(path, scope, dotnet_manifests)
754
+ languages.extend(dotnet["languages"])
755
+ frameworks.extend(dotnet["frameworks"])
756
+ package_manager = package_manager or dotnet["package_manager"]
757
+ workflows.extend(dotnet["workflows"])
758
+
759
+ if not languages and self.allow_source_scan:
760
+ fallback = self._source_fallback_component(path, all_roots)
761
+ languages.extend(fallback["languages"])
762
+ evidence.extend(fallback["evidence"])
763
+
764
+ workflows.extend(self._task_runner_workflows(path, scope))
765
+ workflows = _dedupe_workflows(workflows)
766
+ component_type = _component_type(languages)
767
+ component_evidence = set(evidence)
768
+ for fact in languages + frameworks:
769
+ component_evidence.update(fact.get("evidence", []))
770
+ if package_manager:
771
+ component_evidence.update(package_manager.get("evidence", []))
772
+ for workflow in workflows:
773
+ component_evidence.update(workflow.get("evidence", []))
774
+
775
+ return {
776
+ "id": "root" if path == "." else path,
777
+ "path": path,
778
+ "type": component_type,
779
+ "languages": _dedupe_facts(languages),
780
+ "frameworks": _dedupe_facts(frameworks),
781
+ "package_manager": package_manager,
782
+ "workflows": workflows,
783
+ "evidence": sorted(component_evidence),
784
+ "warnings": [],
785
+ }
786
+
787
+ def _has_file(self, component_path: str, name: str) -> bool:
788
+ rel = _join_rel(component_path, name)
789
+ return rel in self.profile_file_set or _visible_file(self.root, self.ignore, rel)
790
+
791
+ def _existing_names(self, component_path: str, names: Iterable[str]) -> List[str]:
792
+ return sorted(name for name in names if self._has_file(component_path, name))
793
+
794
+ def _has_any(self, component_path: str, names: Iterable[str]) -> bool:
795
+ return any(self._has_file(component_path, name) for name in names)
796
+
797
+ def _javascript_component(self, path: str, scope: str, all_roots: Sequence[str]) -> Dict[str, Any]:
798
+ rel_package = _join_rel(path, "package.json")
799
+ package_path = self.root / rel_package
800
+ package = _load_json(package_path) or {}
801
+ evidence = [self.evidence.add(rel_package, "javascript_manifest")]
802
+ dependencies = _package_dependencies(package)
803
+ ts_evidence = self._typescript_evidence(path, all_roots, dependencies, rel_package)
804
+ language = "typescript" if ts_evidence else "javascript"
805
+ language_evidence = evidence + ts_evidence
806
+ languages = [_fact(language, 0.9, language_evidence, "package.json and TypeScript evidence" if ts_evidence else "package.json")]
807
+ frameworks = [_fact(name, 0.8, evidence, "package dependency") for name in _js_frameworks(dependencies)]
808
+ pm = self._js_package_manager(path, package)
809
+ workflows = [self._workflow("install", _js_install_command(pm), path, scope, evidence + pm["evidence"], pm["confidence"], "local", True)]
810
+
811
+ scripts = package.get("scripts", {}) if isinstance(package.get("scripts"), dict) else {}
812
+ for kind in WORKFLOW_KINDS:
813
+ if kind not in scripts:
814
+ continue
815
+ command = _js_script_command(pm["name"], kind)
816
+ script_text = str(scripts.get(kind, ""))
817
+ risk = _risk_for_command(kind, script_text)
818
+ workflows.append(
819
+ self._workflow(
820
+ kind,
821
+ command,
822
+ path,
823
+ scope,
824
+ evidence,
825
+ "high",
826
+ "local",
827
+ recommended=True,
828
+ risk=risk,
829
+ reason=f"package.json script '{kind}'",
830
+ command_preview=script_text,
831
+ )
832
+ )
833
+
834
+ return {
835
+ "languages": languages,
836
+ "frameworks": frameworks,
837
+ "package_manager": pm,
838
+ "workflows": workflows,
839
+ }
840
+
841
+ def _js_package_manager(self, path: str, package: Dict[str, Any]) -> Dict[str, Any]:
842
+ component_dir = self.root / path
843
+ candidates = [
844
+ ("pnpm-lock.yaml", "pnpm"),
845
+ ("yarn.lock", "yarn"),
846
+ ("bun.lock", "bun"),
847
+ ("bun.lockb", "bun"),
848
+ ("package-lock.json", "npm"),
849
+ ("npm-shrinkwrap.json", "npm"),
850
+ ]
851
+ for filename, name in candidates:
852
+ if self._has_file(path, filename):
853
+ evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
854
+ evidence.append(self.evidence.add(_join_rel(path, filename), "package_manager_lockfile"))
855
+ return _package_manager(name, _pm_executable(name), 0.95, evidence)
856
+
857
+ package_manager = package.get("packageManager")
858
+ if isinstance(package_manager, str) and "@" in package_manager:
859
+ name = package_manager.split("@", 1)[0]
860
+ if name in {"npm", "pnpm", "yarn", "bun"}:
861
+ evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
862
+ return _package_manager(name, _pm_executable(name), 0.85, evidence)
863
+
864
+ evidence = [self.evidence.add(_join_rel(path, "package.json"), "package_manager")]
865
+ return _package_manager("npm", "npm", 0.6, evidence, warnings=["No JS lockfile or packageManager field; npm is only a candidate."])
866
+
867
+ def _python_component(self, path: str, scope: str) -> Dict[str, Any]:
868
+ component_dir = self.root / path
869
+ pyproject = _load_toml(component_dir / "pyproject.toml")
870
+ evidence = [
871
+ self.evidence.add(_join_rel(path, name), "python_manifest")
872
+ for name in ("pyproject.toml", "requirements.txt", "requirements-dev.txt", "setup.py", "setup.cfg", "Pipfile")
873
+ if self._has_file(path, name)
874
+ ]
875
+ languages = [_fact("python", 0.9, evidence, "python manifest")]
876
+ visible_requirement_files = [name for name in ("requirements.txt", "requirements-dev.txt") if self._has_file(path, name)]
877
+ frameworks = [
878
+ _fact(name, 0.8, evidence, "python dependency")
879
+ for name in _python_frameworks(component_dir, pyproject, visible_requirement_files)
880
+ ]
881
+ pm = self._python_package_manager(path, pyproject)
882
+ workflows: List[Dict[str, Any]] = []
883
+
884
+ install = _python_install_command(pm)
885
+ if install:
886
+ workflows.append(self._workflow("install", install, path, scope, pm["evidence"], pm["confidence"], "local", True))
887
+
888
+ pytest_evidence = self._pytest_evidence(path, pyproject)
889
+ if pytest_evidence:
890
+ workflows.append(
891
+ self._workflow(
892
+ "test",
893
+ "python -m pytest",
894
+ path,
895
+ scope,
896
+ pytest_evidence,
897
+ "high",
898
+ "local",
899
+ recommended=True,
900
+ )
901
+ )
902
+ elif _has_test_sample(self.root, path, self.ignore):
903
+ samples = self.evidence.add_many(_test_samples(self.root, path, self.ignore), "python_test_sample")
904
+ workflows.append(
905
+ self._workflow(
906
+ "test",
907
+ "python -m pytest",
908
+ path,
909
+ scope,
910
+ samples,
911
+ "medium",
912
+ "local",
913
+ recommended=False,
914
+ reason="test files exist but no pytest configuration was found",
915
+ )
916
+ )
917
+
918
+ if self._has_file(path, "tox.ini"):
919
+ workflows.append(self._workflow("test", "tox", path, scope, [self.evidence.add(_join_rel(path, "tox.ini"), "test_runner")], "high", "local", True))
920
+ if self._has_file(path, "noxfile.py"):
921
+ workflows.append(self._workflow("test", "nox", path, scope, [self.evidence.add(_join_rel(path, "noxfile.py"), "test_runner")], "high", "local", True))
922
+
923
+ ruff = self._ruff_evidence(path, pyproject)
924
+ if ruff:
925
+ workflows.append(self._workflow("lint", "ruff check .", path, scope, ruff, "high", "local", True))
926
+ workflows.append(
927
+ self._workflow(
928
+ "format",
929
+ "ruff format .",
930
+ path,
931
+ scope,
932
+ ruff,
933
+ "medium",
934
+ "local",
935
+ recommended=False,
936
+ reason="ruff is configured; formatter availability may depend on ruff version",
937
+ )
938
+ )
939
+
940
+ black = self._black_evidence(path, pyproject)
941
+ if black:
942
+ workflows.append(self._workflow("format", "black .", path, scope, black, "high", "local", True))
943
+
944
+ if self._has_file(path, ".flake8") or (self._has_file(path, "setup.cfg") and _setup_cfg_has_section(component_dir / "setup.cfg", "flake8")):
945
+ flake8_evidence = [
946
+ self.evidence.add(_join_rel(path, name), "lint_config")
947
+ for name in (".flake8", "setup.cfg")
948
+ if self._has_file(path, name)
949
+ ]
950
+ workflows.append(self._workflow("lint", "flake8 .", path, scope, flake8_evidence, "high", "local", True))
951
+
952
+ if evidence and self._has_file(path, "pyproject.toml"):
953
+ workflows.append(
954
+ self._workflow(
955
+ "build",
956
+ "python -m build",
957
+ path,
958
+ scope,
959
+ [self.evidence.add(_join_rel(path, "pyproject.toml"), "build_config")],
960
+ "medium",
961
+ "local",
962
+ recommended=False,
963
+ )
964
+ )
965
+
966
+ return {
967
+ "languages": languages,
968
+ "frameworks": frameworks,
969
+ "package_manager": pm,
970
+ "workflows": workflows,
971
+ }
972
+
973
+ def _typescript_evidence(
974
+ self,
975
+ path: str,
976
+ all_roots: Sequence[str],
977
+ dependencies: Set[str],
978
+ package_json: str,
979
+ ) -> List[str]:
980
+ component_dir = self.root if path == "." else self.root / path
981
+ evidence = []
982
+ if "typescript" in dependencies:
983
+ evidence.append(self.evidence.add(package_json, "typescript_dependency"))
984
+ return evidence
985
+
986
+ for name in ("tsconfig.json", "tsconfig.build.json"):
987
+ if self._has_file(path, name):
988
+ evidence.append(self.evidence.add(_join_rel(path, name), "typescript_config"))
989
+ if evidence:
990
+ return evidence
991
+ if not self.allow_source_scan:
992
+ return []
993
+
994
+ ignored_roots = [
995
+ candidate
996
+ for candidate in all_roots
997
+ if candidate != "." and candidate != path and _is_under(candidate, path)
998
+ ]
999
+ for file_path in _walk_files(component_dir, self.root, self.ignore):
1000
+ rel = _rel_to_root(self.root, file_path)
1001
+ if any(_is_under(rel, ignored) for ignored in ignored_roots):
1002
+ continue
1003
+ if file_path.suffix in {".ts", ".tsx"}:
1004
+ evidence.append(self.evidence.add(rel, "source_language_sample"))
1005
+ if len(evidence) >= 3:
1006
+ break
1007
+ return evidence
1008
+
1009
+ def _python_package_manager(self, path: str, pyproject: Dict[str, Any]) -> Dict[str, Any]:
1010
+ component_dir = self.root / path
1011
+ checks = [
1012
+ ("uv.lock", "uv", "uv"),
1013
+ ("poetry.lock", "poetry", "poetry"),
1014
+ ("pdm.lock", "pdm", "pdm"),
1015
+ ("Pipfile.lock", "pipenv", "pipenv"),
1016
+ ("Pipfile", "pipenv", "pipenv"),
1017
+ ]
1018
+ for filename, name, command in checks:
1019
+ if self._has_file(path, filename):
1020
+ evidence = [self.evidence.add(_join_rel(path, filename), "package_manager_lockfile")]
1021
+ if self._has_file(path, "pyproject.toml"):
1022
+ evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager"))
1023
+ return _package_manager(name, command, 0.95, evidence)
1024
+
1025
+ tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
1026
+ if isinstance(tool, dict):
1027
+ if "poetry" in tool:
1028
+ evidence = [self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager")]
1029
+ return _package_manager("poetry", "poetry", 0.9, evidence)
1030
+ if "pdm" in tool:
1031
+ evidence = [self.evidence.add(_join_rel(path, "pyproject.toml"), "package_manager")]
1032
+ return _package_manager("pdm", "pdm", 0.9, evidence)
1033
+
1034
+ if self._has_file(path, "requirements.txt") or self._has_file(path, "requirements-dev.txt"):
1035
+ evidence = [
1036
+ self.evidence.add(_join_rel(path, name), "package_manager")
1037
+ for name in ("requirements.txt", "requirements-dev.txt")
1038
+ if self._has_file(path, name)
1039
+ ]
1040
+ return _package_manager("pip", "python -m pip", 0.85, evidence)
1041
+
1042
+ evidence = [
1043
+ self.evidence.add(_join_rel(path, name), "package_manager")
1044
+ for name in ("pyproject.toml", "setup.py", "setup.cfg")
1045
+ if self._has_file(path, name)
1046
+ ]
1047
+ return _package_manager("pip", "python -m pip", 0.6, evidence, warnings=["No Python lockfile; pip workflow is a candidate."])
1048
+
1049
+ def _go_component(self, path: str, scope: str) -> Dict[str, Any]:
1050
+ evidence = [self.evidence.add(_join_rel(path, "go.mod"), "go_manifest")]
1051
+ if self._has_file(path, "go.sum"):
1052
+ evidence.append(self.evidence.add(_join_rel(path, "go.sum"), "go_lockfile"))
1053
+ workflows = [
1054
+ self._workflow("install", "go mod download", path, scope, evidence, "high", "local", True),
1055
+ self._workflow("test", "go test ./...", path, scope, evidence, "high", "local", True),
1056
+ self._workflow("build", "go build ./...", path, scope, evidence, "medium", "local", False),
1057
+ ]
1058
+ lint_config = _join_rel(path, ".golangci.yml")
1059
+ if _visible_file(self.root, self.ignore, lint_config):
1060
+ workflows.append(self._workflow("lint", "golangci-lint run", path, scope, [self.evidence.add(lint_config, "lint_config")], "high", "local", True))
1061
+ return {
1062
+ "languages": [_fact("go", 0.95, evidence, "go.mod")],
1063
+ "package_manager": _package_manager("go modules", "go", 0.95, evidence),
1064
+ "workflows": workflows,
1065
+ }
1066
+
1067
+ def _rust_component(self, path: str, scope: str) -> Dict[str, Any]:
1068
+ evidence = [self.evidence.add(_join_rel(path, "Cargo.toml"), "rust_manifest")]
1069
+ if self._has_file(path, "Cargo.lock"):
1070
+ evidence.append(self.evidence.add(_join_rel(path, "Cargo.lock"), "rust_lockfile"))
1071
+ return {
1072
+ "languages": [_fact("rust", 0.95, evidence, "Cargo.toml")],
1073
+ "package_manager": _package_manager("cargo", "cargo", 0.95, evidence),
1074
+ "workflows": [
1075
+ self._workflow("install", "cargo fetch", path, scope, evidence, "high", "local", True),
1076
+ self._workflow("test", "cargo test", path, scope, evidence, "high", "local", True),
1077
+ self._workflow("build", "cargo build", path, scope, evidence, "medium", "local", False),
1078
+ self._workflow("format", "cargo fmt", path, scope, evidence, "medium", "local", False),
1079
+ ],
1080
+ }
1081
+
1082
+ def _java_component(self, path: str, scope: str) -> Dict[str, Any]:
1083
+ component_dir = self.root / path
1084
+ if self._has_file(path, "pom.xml"):
1085
+ evidence = [self.evidence.add(_join_rel(path, "pom.xml"), "java_manifest")]
1086
+ pm = _package_manager("maven", "mvn", 0.95, evidence)
1087
+ workflows = [
1088
+ self._workflow("test", "mvn test", path, scope, evidence, "high", "local", True),
1089
+ self._workflow("build", "mvn package", path, scope, evidence, "medium", "local", False),
1090
+ ]
1091
+ else:
1092
+ build_file = "build.gradle" if self._has_file(path, "build.gradle") else "build.gradle.kts"
1093
+ evidence = [self.evidence.add(_join_rel(path, build_file), "java_manifest")]
1094
+ gradle = "./gradlew" if self._has_file(path, "gradlew") else "gradle"
1095
+ if self._has_file(path, "gradlew"):
1096
+ evidence.append(self.evidence.add(_join_rel(path, "gradlew"), "task_runner"))
1097
+ pm = _package_manager("gradle", gradle, 0.9, evidence)
1098
+ workflows = [
1099
+ self._workflow("test", f"{gradle} test", path, scope, evidence, "high", "local", True),
1100
+ self._workflow("build", f"{gradle} build", path, scope, evidence, "medium", "local", False),
1101
+ ]
1102
+ return {
1103
+ "languages": [_fact("java", 0.85, evidence, "java build manifest")],
1104
+ "package_manager": pm,
1105
+ "workflows": workflows,
1106
+ }
1107
+
1108
+ def _ruby_component(self, path: str, scope: str) -> Dict[str, Any]:
1109
+ evidence = [self.evidence.add(_join_rel(path, "Gemfile"), "ruby_manifest")]
1110
+ if self._has_file(path, "Gemfile.lock"):
1111
+ evidence.append(self.evidence.add(_join_rel(path, "Gemfile.lock"), "ruby_lockfile"))
1112
+ gems = _ruby_gem_names(self.root / path / "Gemfile")
1113
+ frameworks = [_fact(name, 0.8, evidence, "Gemfile dependency") for name in _ruby_frameworks(gems)]
1114
+ pm = _package_manager("bundler", "bundle", 0.95 if self._has_file(path, "Gemfile.lock") else 0.9, evidence)
1115
+ workflows = [self._workflow("install", "bundle install", path, scope, evidence, pm["confidence"], "local", True)]
1116
+
1117
+ if {"rspec", "rspec-rails"} & gems:
1118
+ workflows.append(self._workflow("test", "bundle exec rspec", path, scope, evidence, "high", "local", True))
1119
+
1120
+ rubocop_evidence = list(evidence) if "rubocop" in gems else []
1121
+ for name in (".rubocop.yml", ".rubocop_todo.yml"):
1122
+ if self._has_file(path, name):
1123
+ rubocop_evidence.append(self.evidence.add(_join_rel(path, name), "lint_config"))
1124
+ if rubocop_evidence:
1125
+ workflows.append(self._workflow("lint", "bundle exec rubocop", path, scope, rubocop_evidence, "high", "local", True))
1126
+ workflows.append(
1127
+ self._workflow(
1128
+ "format",
1129
+ "bundle exec rubocop -A",
1130
+ path,
1131
+ scope,
1132
+ rubocop_evidence,
1133
+ "medium",
1134
+ "local",
1135
+ recommended=False,
1136
+ reason="rubocop autocorrect changes files and should be reviewed before running",
1137
+ )
1138
+ )
1139
+
1140
+ return {
1141
+ "languages": [_fact("ruby", 0.95, evidence, "Gemfile")],
1142
+ "frameworks": frameworks,
1143
+ "package_manager": pm,
1144
+ "workflows": workflows,
1145
+ }
1146
+
1147
+ def _php_component(self, path: str, scope: str) -> Dict[str, Any]:
1148
+ rel_composer = _join_rel(path, "composer.json")
1149
+ composer = _load_json(self.root / rel_composer) or {}
1150
+ evidence = [self.evidence.add(rel_composer, "php_manifest")]
1151
+ if self._has_file(path, "composer.lock"):
1152
+ evidence.append(self.evidence.add(_join_rel(path, "composer.lock"), "php_lockfile"))
1153
+ dependencies = _composer_dependencies(composer)
1154
+ frameworks = [_fact(name, 0.8, evidence, "composer dependency") for name in _php_frameworks(dependencies)]
1155
+ pm = _package_manager("composer", "composer", 0.95 if self._has_file(path, "composer.lock") else 0.9, evidence)
1156
+ workflows = [self._workflow("install", "composer install", path, scope, evidence, pm["confidence"], "local", True)]
1157
+
1158
+ scripts = composer.get("scripts", {}) if isinstance(composer.get("scripts"), dict) else {}
1159
+ for kind in WORKFLOW_KINDS:
1160
+ if kind not in scripts:
1161
+ continue
1162
+ script_value = scripts.get(kind)
1163
+ script_text = _script_preview(script_value)
1164
+ workflows.append(
1165
+ self._workflow(
1166
+ kind,
1167
+ f"composer {kind}",
1168
+ path,
1169
+ scope,
1170
+ evidence,
1171
+ "high",
1172
+ "local",
1173
+ recommended=True,
1174
+ risk=_risk_for_command(kind, script_text),
1175
+ reason=f"composer.json script '{kind}'",
1176
+ command_preview=script_text,
1177
+ )
1178
+ )
1179
+
1180
+ phpunit_evidence = [
1181
+ self.evidence.add(_join_rel(path, name), "test_config")
1182
+ for name in ("phpunit.xml", "phpunit.xml.dist")
1183
+ if self._has_file(path, name)
1184
+ ]
1185
+ if phpunit_evidence:
1186
+ workflows.append(self._workflow("test", "vendor/bin/phpunit", path, scope, phpunit_evidence, "high", "local", True))
1187
+
1188
+ phpstan_evidence = [
1189
+ self.evidence.add(_join_rel(path, name), "lint_config")
1190
+ for name in ("phpstan.neon", "phpstan.neon.dist")
1191
+ if self._has_file(path, name)
1192
+ ]
1193
+ if phpstan_evidence:
1194
+ workflows.append(self._workflow("lint", "vendor/bin/phpstan analyse", path, scope, phpstan_evidence, "high", "local", True))
1195
+
1196
+ phpcs_evidence = [
1197
+ self.evidence.add(_join_rel(path, name), "lint_config")
1198
+ for name in ("phpcs.xml", "phpcs.xml.dist")
1199
+ if self._has_file(path, name)
1200
+ ]
1201
+ if phpcs_evidence:
1202
+ workflows.append(self._workflow("lint", "vendor/bin/phpcs", path, scope, phpcs_evidence, "high", "local", True))
1203
+
1204
+ pint_evidence = []
1205
+ if "laravel/pint" in dependencies:
1206
+ pint_evidence.extend(evidence)
1207
+ if self._has_file(path, "pint.json"):
1208
+ pint_evidence.append(self.evidence.add(_join_rel(path, "pint.json"), "format_config"))
1209
+ if pint_evidence:
1210
+ workflows.append(self._workflow("format", "vendor/bin/pint", path, scope, pint_evidence, "medium", "local", False))
1211
+
1212
+ fixer_evidence = [
1213
+ self.evidence.add(_join_rel(path, name), "format_config")
1214
+ for name in (".php-cs-fixer.php", ".php-cs-fixer.dist.php")
1215
+ if self._has_file(path, name)
1216
+ ]
1217
+ if fixer_evidence:
1218
+ workflows.append(self._workflow("format", "vendor/bin/php-cs-fixer fix", path, scope, fixer_evidence, "medium", "local", False))
1219
+
1220
+ return {
1221
+ "languages": [_fact("php", 0.95, evidence, "composer.json")],
1222
+ "frameworks": frameworks,
1223
+ "package_manager": pm,
1224
+ "workflows": workflows,
1225
+ }
1226
+
1227
+ def _deno_component(self, path: str, scope: str, all_roots: Sequence[str]) -> Dict[str, Any]:
1228
+ manifest_name = "deno.json" if self._has_file(path, "deno.json") else "deno.jsonc"
1229
+ rel_manifest = _join_rel(path, manifest_name)
1230
+ manifest = _load_json_or_jsonc(self.root / rel_manifest) or {}
1231
+ evidence = [self.evidence.add(rel_manifest, "deno_manifest")]
1232
+ languages = self._source_language_facts(path, all_roots, {".ts", ".tsx", ".js", ".jsx"})
1233
+ frameworks = [_fact("deno", 0.95, evidence, "deno manifest")]
1234
+ pm = _package_manager("deno", "deno", 0.95, evidence)
1235
+ workflows: List[Dict[str, Any]] = []
1236
+ tasks = manifest.get("tasks", {}) if isinstance(manifest.get("tasks"), dict) else {}
1237
+ task_names = {"format": ("format", "fmt")}
1238
+ for kind in WORKFLOW_KINDS:
1239
+ names = task_names.get(kind, (kind,))
1240
+ task_name = next((name for name in names if name in tasks), None)
1241
+ if not task_name:
1242
+ continue
1243
+ task_text = _script_preview(tasks.get(task_name))
1244
+ workflows.append(
1245
+ self._workflow(
1246
+ kind,
1247
+ f"deno task {task_name}",
1248
+ path,
1249
+ scope,
1250
+ evidence,
1251
+ "high",
1252
+ "local",
1253
+ recommended=True,
1254
+ risk=_risk_for_command(kind, task_text),
1255
+ reason=f"{manifest_name} task '{task_name}'",
1256
+ command_preview=task_text,
1257
+ )
1258
+ )
1259
+ return {
1260
+ "languages": languages,
1261
+ "frameworks": frameworks,
1262
+ "package_manager": pm,
1263
+ "workflows": workflows,
1264
+ }
1265
+
1266
+ def _swift_component(self, path: str, scope: str) -> Dict[str, Any]:
1267
+ evidence = [self.evidence.add(_join_rel(path, "Package.swift"), "swift_manifest")]
1268
+ if self._has_file(path, "Package.resolved"):
1269
+ evidence.append(self.evidence.add(_join_rel(path, "Package.resolved"), "swift_lockfile"))
1270
+ workflows = [
1271
+ self._workflow("install", "swift package resolve", path, scope, evidence, "high", "local", True),
1272
+ self._workflow("test", "swift test", path, scope, evidence, "high", "local", True),
1273
+ self._workflow("build", "swift build", path, scope, evidence, "medium", "local", False),
1274
+ ]
1275
+ if self._has_file(path, ".swiftlint.yml"):
1276
+ workflows.append(self._workflow("lint", "swiftlint", path, scope, [self.evidence.add(_join_rel(path, ".swiftlint.yml"), "lint_config")], "medium", "local", False))
1277
+ if self._has_file(path, ".swiftformat"):
1278
+ workflows.append(self._workflow("format", "swiftformat .", path, scope, [self.evidence.add(_join_rel(path, ".swiftformat"), "format_config")], "medium", "local", False))
1279
+ return {
1280
+ "languages": [_fact("swift", 0.95, evidence, "Package.swift")],
1281
+ "package_manager": _package_manager("swift package manager", "swift", 0.95, evidence),
1282
+ "workflows": workflows,
1283
+ }
1284
+
1285
+ def _dotnet_component(self, path: str, scope: str, manifests: Sequence[str]) -> Dict[str, Any]:
1286
+ evidence = [self.evidence.add(rel, "dotnet_manifest") for rel in manifests]
1287
+ languages = _dotnet_languages_from_manifests(evidence)
1288
+ frameworks = [_fact("dotnet", 0.95, evidence, ".NET project or solution manifest")]
1289
+ pm = _package_manager("dotnet", "dotnet", 0.95, evidence)
1290
+ workflows = [
1291
+ self._workflow("install", "dotnet restore", path, scope, evidence, "high", "local", True),
1292
+ self._workflow("test", "dotnet test", path, scope, evidence, "high", "local", True),
1293
+ self._workflow("build", "dotnet build", path, scope, evidence, "medium", "local", False),
1294
+ self._workflow("format", "dotnet format", path, scope, evidence, "medium", "local", False),
1295
+ ]
1296
+ return {
1297
+ "languages": languages,
1298
+ "frameworks": frameworks,
1299
+ "package_manager": pm,
1300
+ "workflows": workflows,
1301
+ }
1302
+
1303
+ def _dotnet_manifest_files(self, path: str) -> List[str]:
1304
+ component_dir = self.root if path == "." else self.root / path
1305
+ if not component_dir.is_dir():
1306
+ return []
1307
+ manifests = []
1308
+ for child in component_dir.iterdir():
1309
+ if not child.is_file():
1310
+ continue
1311
+ rel = _rel_to_root(self.root, child)
1312
+ if _is_dotnet_manifest(rel) and _visible_file(self.root, self.ignore, rel):
1313
+ manifests.append(rel)
1314
+ return sorted(manifests)
1315
+
1316
+ def _source_language_facts(
1317
+ self,
1318
+ path: str,
1319
+ all_roots: Sequence[str],
1320
+ extensions: Set[str],
1321
+ ) -> List[Dict[str, Any]]:
1322
+ if not self.allow_source_scan:
1323
+ return []
1324
+ component_dir = self.root if path == "." else self.root / path
1325
+ ignored_roots = [root for root in all_roots if root != "." and root != path and _is_under(root, path)]
1326
+ samples: Dict[str, List[str]] = defaultdict(list)
1327
+ for file_path in _walk_files(component_dir, self.root, self.ignore):
1328
+ rel = _rel_to_root(self.root, file_path)
1329
+ if any(_is_under(rel, ignored) for ignored in ignored_roots):
1330
+ continue
1331
+ if file_path.suffix not in extensions:
1332
+ continue
1333
+ language = SOURCE_EXTENSIONS.get(file_path.suffix)
1334
+ if language and len(samples[language]) < 3:
1335
+ samples[language].append(rel)
1336
+ facts = []
1337
+ for language, paths in sorted(samples.items()):
1338
+ facts.append(_fact(language, 0.85, self.evidence.add_many(paths, "source_language_sample"), "source file extension sample"))
1339
+ return facts
1340
+
1341
+ def _source_fallback_component(self, path: str, all_roots: Sequence[str]) -> Dict[str, Any]:
1342
+ component_dir = self.root if path == "." else self.root / path
1343
+ ignored_roots = [root for root in all_roots if root != "." and root != path and _is_under(root, path)]
1344
+ samples: Dict[str, List[str]] = defaultdict(list)
1345
+ for file_path in _walk_files(component_dir, self.root, self.ignore):
1346
+ rel = _rel_to_root(self.root, file_path)
1347
+ if any(_is_under(rel, ignored) for ignored in ignored_roots):
1348
+ continue
1349
+ language = SOURCE_EXTENSIONS.get(file_path.suffix)
1350
+ if language and len(samples[language]) < 3:
1351
+ samples[language].append(rel)
1352
+
1353
+ evidence: List[str] = []
1354
+ facts: List[Dict[str, Any]] = []
1355
+ for language, paths in sorted(samples.items()):
1356
+ added = self.evidence.add_many(paths, "source_language_sample")
1357
+ evidence.extend(added)
1358
+ facts.append(_fact(language, 0.55, added, "source file extension sample"))
1359
+ return {"languages": facts, "evidence": evidence}
1360
+
1361
+ def _task_runner_workflows(self, path: str, scope: str) -> List[Dict[str, Any]]:
1362
+ component_dir = self.root if path == "." else self.root / path
1363
+ workflows: List[Dict[str, Any]] = []
1364
+ task_files = [
1365
+ ("Makefile", "make"),
1366
+ ("makefile", "make"),
1367
+ ("justfile", "just"),
1368
+ ("Justfile", "just"),
1369
+ ]
1370
+ for filename, runner in task_files:
1371
+ task_file = component_dir / filename
1372
+ if not self._has_file(path, filename):
1373
+ continue
1374
+ rel = self.evidence.add(_join_rel(path, filename), "task_runner")
1375
+ targets = _parse_task_targets(task_file, runner)
1376
+ for kind in WORKFLOW_KINDS:
1377
+ if kind not in targets:
1378
+ continue
1379
+ command = f"{runner} {kind}"
1380
+ recipe = targets[kind]
1381
+ workflows.append(
1382
+ self._workflow(
1383
+ kind,
1384
+ command,
1385
+ path,
1386
+ scope,
1387
+ [rel],
1388
+ "high",
1389
+ "local",
1390
+ recommended=True,
1391
+ risk=_risk_for_command(kind, recipe),
1392
+ reason=f"{filename} target '{kind}'",
1393
+ command_preview=recipe,
1394
+ )
1395
+ )
1396
+ return workflows
1397
+
1398
+ def _repo_workflows(self, components: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
1399
+ if len(components) <= 1:
1400
+ return []
1401
+ repo_workflows: List[Dict[str, Any]] = []
1402
+ for component in components:
1403
+ if component["path"] != ".":
1404
+ continue
1405
+ for workflow in component.get("workflows", []):
1406
+ workflow = dict(workflow)
1407
+ workflow["scope"] = "repo"
1408
+ workflow["component_id"] = None
1409
+ repo_workflows.append(workflow)
1410
+ return repo_workflows
1411
+
1412
+ def _ci_workflows(self, profile_files: Sequence[str]) -> List[Dict[str, Any]]:
1413
+ workflows: List[Dict[str, Any]] = []
1414
+ for rel in sorted(profile_files):
1415
+ if not _is_ci_file(rel):
1416
+ continue
1417
+ evidence = [self.evidence.add(rel, "ci_workflow")]
1418
+ commands = _extract_ci_commands(self.root / rel)
1419
+ if not commands:
1420
+ workflows.append(
1421
+ self._workflow(
1422
+ "ci",
1423
+ None,
1424
+ ".",
1425
+ "repo",
1426
+ evidence,
1427
+ "medium",
1428
+ "ci",
1429
+ recommended=False,
1430
+ risk="high",
1431
+ ci_only=True,
1432
+ reason="CI workflow file found; commands were not statically extracted",
1433
+ )
1434
+ )
1435
+ for command in commands:
1436
+ workflows.append(
1437
+ self._workflow(
1438
+ _classify_workflow_kind(command),
1439
+ command,
1440
+ ".",
1441
+ "repo",
1442
+ evidence,
1443
+ "medium",
1444
+ "ci",
1445
+ recommended=False,
1446
+ risk=_risk_for_command("ci", command),
1447
+ ci_only=True,
1448
+ reason="command extracted from CI workflow; not a local workflow recommendation",
1449
+ command_preview=command,
1450
+ )
1451
+ )
1452
+ return workflows
1453
+
1454
+ def _pytest_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
1455
+ component_dir = self.root / path
1456
+ evidence = []
1457
+ if self._has_file(path, "pytest.ini"):
1458
+ evidence.append(self.evidence.add(_join_rel(path, "pytest.ini"), "test_config"))
1459
+ if self._has_file(path, "setup.cfg") and _setup_cfg_has_section(component_dir / "setup.cfg", "tool:pytest"):
1460
+ evidence.append(self.evidence.add(_join_rel(path, "setup.cfg"), "test_config"))
1461
+ tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
1462
+ if isinstance(tool, dict) and "pytest" in tool and self._has_file(path, "pyproject.toml"):
1463
+ evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "test_config"))
1464
+ return evidence
1465
+
1466
+ def _ruff_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
1467
+ component_dir = self.root / path
1468
+ evidence = []
1469
+ for name in ("ruff.toml", ".ruff.toml"):
1470
+ if self._has_file(path, name):
1471
+ evidence.append(self.evidence.add(_join_rel(path, name), "lint_config"))
1472
+ tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
1473
+ if isinstance(tool, dict) and "ruff" in tool and self._has_file(path, "pyproject.toml"):
1474
+ evidence.append(self.evidence.add(_join_rel(path, "pyproject.toml"), "lint_config"))
1475
+ return evidence
1476
+
1477
+ def _black_evidence(self, path: str, pyproject: Dict[str, Any]) -> List[str]:
1478
+ component_dir = self.root / path
1479
+ tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
1480
+ if isinstance(tool, dict) and "black" in tool and self._has_file(path, "pyproject.toml"):
1481
+ return [self.evidence.add(_join_rel(path, "pyproject.toml"), "format_config")]
1482
+ return []
1483
+
1484
+ def _workflow(
1485
+ self,
1486
+ kind: str,
1487
+ command: Optional[str],
1488
+ cwd: str,
1489
+ scope: str,
1490
+ evidence: Sequence[str],
1491
+ confidence: str,
1492
+ source: str,
1493
+ recommended: bool,
1494
+ risk: Optional[str] = None,
1495
+ ci_only: bool = False,
1496
+ reason: str = "",
1497
+ command_preview: Optional[str] = None,
1498
+ ) -> Dict[str, Any]:
1499
+ risk_level = risk or _risk_for_command(kind, command or "")
1500
+ cwd_known = bool(cwd)
1501
+ candidate = not recommended
1502
+ safe_auto = (
1503
+ source == "local"
1504
+ and not ci_only
1505
+ and cwd_known
1506
+ and confidence == "high"
1507
+ and risk_level == "low"
1508
+ and recommended
1509
+ and kind in {"test", "lint"}
1510
+ )
1511
+ warnings = []
1512
+ if ci_only:
1513
+ warnings.append("CI-only workflow; do not execute as a local command without review.")
1514
+ if not cwd_known:
1515
+ warnings.append("cwd is not known; command is not recommended for execution.")
1516
+ if risk_level != "low":
1517
+ warnings.append("Workflow is not low risk; confirm before execution.")
1518
+ if confidence != "high":
1519
+ warnings.append("Workflow confidence is not high.")
1520
+ if candidate:
1521
+ warnings.append("Workflow is a candidate, not a recommendation.")
1522
+
1523
+ return {
1524
+ "kind": kind,
1525
+ "command": command,
1526
+ "cwd": cwd,
1527
+ "scope": scope,
1528
+ "source": source,
1529
+ "evidence": sorted(set(evidence)),
1530
+ "confidence": confidence,
1531
+ "confidence_score": _confidence_score(confidence),
1532
+ "risk": risk_level,
1533
+ "safe_auto": safe_auto,
1534
+ "candidate": candidate,
1535
+ "recommended": recommended,
1536
+ "needs_confirmation": not safe_auto,
1537
+ "ci_only": ci_only,
1538
+ "reason": reason,
1539
+ "command_preview": command_preview,
1540
+ "warnings": warnings,
1541
+ }
1542
+
1543
+
1544
+ def _affected_from_profile(root: Path, profile: Dict[str, Any], changed_files: Sequence[str]) -> Dict[str, Any]:
1545
+ components = profile.get("project", {}).get("components", [])
1546
+ affected_items = []
1547
+ component_ids: Set[str] = set()
1548
+ suggested: Dict[str, Dict[str, Any]] = {}
1549
+
1550
+ for changed in changed_files:
1551
+ component = _match_component(components, changed)
1552
+ profile_affecting = _is_profile_file(changed) or changed in profile.get("watch", {}).get("files", {})
1553
+ item = {
1554
+ "file": changed,
1555
+ "component_id": component.get("id") if component else None,
1556
+ "component_path": component.get("path") if component else None,
1557
+ "profile_affecting": profile_affecting,
1558
+ "reason": "profile evidence/config file" if profile_affecting else "matched by component path",
1559
+ }
1560
+ affected_items.append(item)
1561
+ if component:
1562
+ component_ids.add(component["id"])
1563
+ for workflow in component.get("workflows", []):
1564
+ if workflow.get("source") != "local":
1565
+ continue
1566
+ if workflow.get("kind") not in {"test", "lint", "build"}:
1567
+ continue
1568
+ key = f"{component['id']}:{workflow['kind']}:{workflow.get('command')}"
1569
+ suggested[key] = dict(workflow, component_id=component["id"])
1570
+
1571
+ warnings = []
1572
+ if not profile.get("alignment", {}).get("aligned"):
1573
+ warnings.append("Profile is not aligned; suggested workflows must not be executed.")
1574
+
1575
+ return {
1576
+ "affected": {
1577
+ "components": sorted(component_ids),
1578
+ "files": affected_items,
1579
+ },
1580
+ "suggested_workflows": list(suggested.values()),
1581
+ "warnings": warnings,
1582
+ }
1583
+
1584
+
1585
+ def _try_incremental_sync(
1586
+ root: Path,
1587
+ cache_path: Path,
1588
+ cached: Optional[Dict[str, Any]],
1589
+ changed_files: Sequence[str],
1590
+ ) -> Optional[Dict[str, Any]]:
1591
+ if not cached or not changed_files:
1592
+ return None
1593
+ if cached.get("schema_version") != SCHEMA_VERSION or cached.get("root") != str(root):
1594
+ return None
1595
+ if not cached.get("alignment", {}).get("aligned"):
1596
+ return None
1597
+
1598
+ ignore = _GitIgnore(root)
1599
+ languages = set(cached.get("watch", {}).get("source_summary", {}).get("languages", []))
1600
+ watched = set(cached.get("watch", {}).get("files", {}))
1601
+ watched_profile_files = {rel for rel in watched if _is_profile_file(rel)}
1602
+ for changed in changed_files:
1603
+ if ignore.ignored(changed, is_dir=False):
1604
+ if _is_profile_file(changed) and (
1605
+ changed in watched
1606
+ or (_profile_file_exists(root, changed) and _is_adjacent_profile_evidence_file(changed, watched_profile_files))
1607
+ ):
1608
+ return None
1609
+ continue
1610
+ if changed in watched or _is_profile_file(changed):
1611
+ return None
1612
+ language = SOURCE_EXTENSIONS.get(Path(changed).suffix)
1613
+ if language and language not in languages:
1614
+ return None
1615
+
1616
+ profile = dict(cached)
1617
+ profile["generated_at"] = _utc_now()
1618
+ profile["cache_path"] = str(cache_path)
1619
+ profile["changed_files"] = list(changed_files)
1620
+ profile["alignment"] = {
1621
+ "aligned": True,
1622
+ "reason": "incremental_reuse",
1623
+ "checked_at": _utc_now(),
1624
+ "stale_files": [],
1625
+ "new_profile_files": [],
1626
+ "removed_profile_files": [],
1627
+ "source_summary_changed": False,
1628
+ }
1629
+ return profile
1630
+
1631
+
1632
+ def _sync_paths_only(
1633
+ root: Path,
1634
+ cache_path: Path,
1635
+ cached: Optional[Dict[str, Any]],
1636
+ changed_files: Sequence[str],
1637
+ ) -> Dict[str, Any]:
1638
+ if not cached:
1639
+ return _paths_only_unavailable(root, cache_path, changed_files, "cache_missing_paths_only", "Path-only sync requires an existing cache.")
1640
+ if not changed_files:
1641
+ return _paths_only_unavailable(root, cache_path, changed_files, "changed_files_required", "Path-only sync requires changed_files.")
1642
+ if cached.get("schema_version") != SCHEMA_VERSION or cached.get("root") != str(root):
1643
+ return _paths_only_unavailable(root, cache_path, changed_files, "cache_incompatible", "Cached profile is not compatible with this repo.")
1644
+ if not cached.get("alignment", {}).get("aligned"):
1645
+ return _paths_only_unavailable(root, cache_path, changed_files, "cache_not_aligned", "Path-only sync requires an aligned cache.")
1646
+
1647
+ reused = _try_incremental_sync(root, cache_path, cached, changed_files)
1648
+ if reused is not None:
1649
+ return reused
1650
+
1651
+ if ".gitignore" in changed_files:
1652
+ return _paths_only_unavailable(root, cache_path, changed_files, "gitignore_changed_paths_only", "Path-only sync cannot safely apply .gitignore changes; run full sync.")
1653
+
1654
+ ignore = _GitIgnore(root)
1655
+ cached_profile_files = {rel for rel in cached.get("watch", {}).get("files", {}) if _is_profile_file(rel)}
1656
+ profile_files = {
1657
+ rel
1658
+ for rel in cached_profile_files
1659
+ if _visible_file(root, ignore, rel)
1660
+ or (_profile_file_exists(root, rel) and _is_adjacent_profile_evidence_file(rel, cached_profile_files))
1661
+ }
1662
+ profile_changed = False
1663
+ watched_non_profile_changed = False
1664
+ for changed in changed_files:
1665
+ language = SOURCE_EXTENSIONS.get(Path(changed).suffix)
1666
+ if language and language not in set(cached.get("watch", {}).get("source_summary", {}).get("languages", [])):
1667
+ return _paths_only_unavailable(root, cache_path, changed_files, "new_source_language_paths_only", "Changed files introduce a source language not present in cache; run full sync.")
1668
+ if _is_profile_file(changed):
1669
+ if changed in cached_profile_files and not _profile_file_exists(root, changed):
1670
+ profile_changed = True
1671
+ profile_files.discard(changed)
1672
+ continue
1673
+ visible = _visible_file(root, ignore, changed)
1674
+ adjacent_evidence = _profile_file_exists(root, changed) and _is_adjacent_profile_evidence_file(changed, profile_files | cached_profile_files)
1675
+ if not visible and not adjacent_evidence:
1676
+ continue
1677
+ profile_changed = True
1678
+ if visible or adjacent_evidence:
1679
+ profile_files.add(changed)
1680
+ else:
1681
+ profile_files.discard(changed)
1682
+ elif changed in cached.get("watch", {}).get("files", {}):
1683
+ watched_non_profile_changed = True
1684
+
1685
+ if not profile_changed and watched_non_profile_changed:
1686
+ profile = copy.deepcopy(cached)
1687
+ _refresh_changed_fingerprints(root, profile, changed_files)
1688
+ _mark_synced(profile, cache_path, changed_files, "paths_only_synced")
1689
+ return profile
1690
+
1691
+ builder = _ProfileBuilder(root, cache_path, profile_files=sorted(profile_files), allow_source_scan=False)
1692
+ profile = builder.build(changed_files=changed_files)
1693
+ profile["alignment"]["reason"] = "paths_only_synced"
1694
+ return profile
1695
+
1696
+
1697
+ def _refresh_changed_fingerprints(root: Path, profile: Dict[str, Any], changed_files: Sequence[str]) -> None:
1698
+ for rel in changed_files:
1699
+ if rel not in profile.get("watch", {}).get("files", {}) and rel not in profile.get("evidence_files", {}):
1700
+ continue
1701
+ if not _profile_file_exists(root, rel):
1702
+ profile.get("watch", {}).get("files", {}).pop(rel, None)
1703
+ profile.get("evidence_files", {}).pop(rel, None)
1704
+ continue
1705
+ fingerprint = _fingerprint_with_rel(root, rel)
1706
+ if rel in profile.get("watch", {}).get("files", {}):
1707
+ profile["watch"]["files"][rel] = fingerprint
1708
+ if rel in profile.get("evidence_files", {}):
1709
+ roles = profile["evidence_files"][rel].get("roles", [])
1710
+ profile["evidence_files"][rel] = dict(fingerprint, roles=roles)
1711
+
1712
+
1713
+ def _mark_synced(profile: Dict[str, Any], cache_path: Path, changed_files: Sequence[str], reason: str) -> None:
1714
+ profile["generated_at"] = _utc_now()
1715
+ profile["cache_path"] = str(cache_path)
1716
+ profile["changed_files"] = list(changed_files)
1717
+ profile["alignment"] = {
1718
+ "aligned": True,
1719
+ "reason": reason,
1720
+ "checked_at": _utc_now(),
1721
+ "stale_files": [],
1722
+ "new_profile_files": [],
1723
+ "removed_profile_files": [],
1724
+ "source_summary_changed": False,
1725
+ }
1726
+
1727
+
1728
+ def _paths_only_unavailable(
1729
+ root: Path,
1730
+ cache_path: Path,
1731
+ changed_files: Sequence[str],
1732
+ reason: str,
1733
+ warning: str,
1734
+ ) -> Dict[str, Any]:
1735
+ return {
1736
+ "operation": "sync",
1737
+ "tool": "code-workflow-probe",
1738
+ "schema_version": SCHEMA_VERSION,
1739
+ "root": str(root),
1740
+ "cache_path": str(cache_path),
1741
+ "changed_files": list(changed_files),
1742
+ "alignment": {
1743
+ "aligned": False,
1744
+ "reason": reason,
1745
+ "checked_at": _utc_now(),
1746
+ "stale_files": [],
1747
+ "new_profile_files": [],
1748
+ "removed_profile_files": [],
1749
+ "source_summary_changed": False,
1750
+ },
1751
+ "profile": None,
1752
+ "warnings": [warning],
1753
+ }
1754
+
1755
+
1756
+ def _compare_watch_state(root: Path, cache_path: Path, watch: Dict[str, Any]) -> Dict[str, Any]:
1757
+ cached_files = watch.get("files", {}) if isinstance(watch, dict) else {}
1758
+ current_profile_files = _discover_profile_files(root, cache_path)
1759
+ current_profile_file_set = set(current_profile_files)
1760
+ current_file_set = set(current_profile_files) | set(cached_files.keys())
1761
+ ignore = _GitIgnore(root)
1762
+ current_files = {
1763
+ rel: _fingerprint_with_rel(root, rel)
1764
+ for rel in sorted(current_file_set)
1765
+ if rel != _rel_to_root(root, cache_path)
1766
+ and (
1767
+ (rel in current_profile_file_set and _profile_file_exists(root, rel))
1768
+ or _visible_file(root, ignore, rel)
1769
+ )
1770
+ }
1771
+ cached_file_set = set(cached_files.keys())
1772
+ current_existing_set = set(current_files.keys())
1773
+ stale_files = []
1774
+ for rel in sorted(cached_file_set & current_existing_set):
1775
+ cached_fp = cached_files.get(rel, {})
1776
+ current_fp = current_files.get(rel, {})
1777
+ if cached_fp.get("sha256") != current_fp.get("sha256") or cached_fp.get("size") != current_fp.get("size"):
1778
+ stale_files.append(rel)
1779
+ return {
1780
+ "stale_files": stale_files,
1781
+ "new_profile_files": sorted(set(current_profile_files) - cached_file_set),
1782
+ "removed_profile_files": sorted(cached_file_set - current_existing_set),
1783
+ "source_summary_changed": False,
1784
+ }
1785
+
1786
+
1787
+ def _changed_file_affects_profile(path: str, profile: Dict[str, Any]) -> bool:
1788
+ return _is_profile_file(path) or path in profile.get("watch", {}).get("files", {})
1789
+
1790
+
1791
+ def _discover_profile_files(root: Path, cache_path: Path) -> List[str]:
1792
+ cache_rel = _rel_to_root(root, cache_path)
1793
+ files: Set[str] = set()
1794
+ for file_path in _walk_files(root):
1795
+ rel = _rel_to_root(root, file_path)
1796
+ if rel == cache_rel:
1797
+ continue
1798
+ if _is_profile_file(rel):
1799
+ files.add(rel)
1800
+ files.update(_discover_adjacent_profile_files(root, cache_rel, files))
1801
+ return sorted(files)
1802
+
1803
+
1804
+ def _discover_adjacent_profile_files(root: Path, cache_rel: str, profile_files: Set[str]) -> Set[str]:
1805
+ files: Set[str] = set()
1806
+ component_dirs = {_dirname_rel(rel) for rel in profile_files if _is_component_manifest(rel)}
1807
+ for component_dir in component_dirs:
1808
+ base = root if component_dir == "." else root / component_dir
1809
+ if not base.is_dir() or _is_ignored_generated_dir(base, root):
1810
+ continue
1811
+ for name in ADJACENT_PROFILE_FILE_NAMES:
1812
+ rel = _join_rel(component_dir, name)
1813
+ if rel == cache_rel:
1814
+ continue
1815
+ path = root / rel
1816
+ if path.is_file():
1817
+ files.add(rel)
1818
+ for child in base.iterdir():
1819
+ if child.is_file():
1820
+ rel = _rel_to_root(root, child)
1821
+ if rel != cache_rel and _is_dotnet_manifest(rel):
1822
+ files.add(rel)
1823
+ return files
1824
+
1825
+
1826
+ def _is_adjacent_profile_evidence_file(rel_path: str, profile_files: Set[str]) -> bool:
1827
+ rel = _clean_rel(rel_path)
1828
+ if Path(rel).name not in ADJACENT_PROFILE_FILE_NAMES:
1829
+ return False
1830
+ component_dir = _dirname_rel(rel)
1831
+ return any(_dirname_rel(profile_file) == component_dir and _is_component_manifest(profile_file) for profile_file in profile_files)
1832
+
1833
+
1834
+ def _is_ignored_generated_dir(path: Path, root: Path) -> bool:
1835
+ rel = _rel_to_root(root, path)
1836
+ return any(part in IGNORED_DIRS for part in rel.split("/") if part)
1837
+
1838
+
1839
+ def _source_summary(root: Path) -> Dict[str, Any]:
1840
+ counts: Dict[str, int] = defaultdict(int)
1841
+ samples: Dict[str, List[str]] = defaultdict(list)
1842
+ for file_path in _walk_files(root):
1843
+ language = SOURCE_EXTENSIONS.get(file_path.suffix)
1844
+ if not language:
1845
+ continue
1846
+ rel = _rel_to_root(root, file_path)
1847
+ counts[language] += 1
1848
+ if len(samples[language]) < 3:
1849
+ samples[language].append(rel)
1850
+ return {
1851
+ "languages": sorted(counts.keys()),
1852
+ "language_counts": dict(sorted(counts.items())),
1853
+ "samples": [sample for _, values in sorted(samples.items()) for sample in values],
1854
+ }
1855
+
1856
+
1857
+ def _empty_source_summary() -> Dict[str, Any]:
1858
+ return {"languages": [], "language_counts": {}, "samples": []}
1859
+
1860
+
1861
+ def _component_language_summary(components: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
1862
+ languages = sorted(
1863
+ {
1864
+ language.get("name")
1865
+ for component in components
1866
+ for language in component.get("languages", [])
1867
+ if language.get("name")
1868
+ }
1869
+ )
1870
+ return {"languages": languages, "language_counts": {name: 0 for name in languages}, "samples": []}
1871
+
1872
+
1873
+ class _GitIgnore:
1874
+ def __init__(self, root: Path) -> None:
1875
+ self.root = root
1876
+ self.rules = _load_gitignore_rules(root / ".gitignore")
1877
+ self.has_negation = any(rule["negated"] for rule in self.rules)
1878
+
1879
+ def ignored(self, rel_path: str, is_dir: bool) -> bool:
1880
+ rel = _clean_rel(rel_path)
1881
+ if rel == ".gitignore":
1882
+ return False
1883
+ ignored = False
1884
+ for rule in self.rules:
1885
+ if _gitignore_rule_matches(rule, rel, is_dir):
1886
+ ignored = not rule["negated"]
1887
+ return ignored
1888
+
1889
+
1890
+ def _load_gitignore_rules(path: Path) -> List[Dict[str, Any]]:
1891
+ text = _read_text(path)
1892
+ rules = []
1893
+ for raw_line in text.splitlines():
1894
+ line = raw_line.rstrip()
1895
+ if not line or line.startswith("#"):
1896
+ continue
1897
+ negated = line.startswith("!")
1898
+ if negated:
1899
+ line = line[1:]
1900
+ line = line.strip()
1901
+ if not line:
1902
+ continue
1903
+ anchored = line.startswith("/")
1904
+ if anchored:
1905
+ line = line.lstrip("/")
1906
+ directory_only = line.endswith("/")
1907
+ line = line.rstrip("/")
1908
+ if not line:
1909
+ continue
1910
+ rules.append(
1911
+ {
1912
+ "pattern": line,
1913
+ "negated": negated,
1914
+ "anchored": anchored,
1915
+ "directory_only": directory_only,
1916
+ "has_slash": "/" in line,
1917
+ }
1918
+ )
1919
+ return rules
1920
+
1921
+
1922
+ def _gitignore_rule_matches(rule: Dict[str, Any], rel_path: str, is_dir: bool) -> bool:
1923
+ pattern = rule["pattern"]
1924
+ if rule["directory_only"] and not is_dir:
1925
+ parent_parts = rel_path.split("/")[:-1]
1926
+ parents = ["/".join(parent_parts[:index]) for index in range(1, len(parent_parts) + 1)]
1927
+ return any(_gitignore_rule_matches(rule, parent, is_dir=True) for parent in parents)
1928
+
1929
+ if rule["anchored"] or rule["has_slash"]:
1930
+ return rel_path == pattern or fnmatch.fnmatchcase(rel_path, pattern)
1931
+
1932
+ parts = rel_path.split("/")
1933
+ if is_dir:
1934
+ return any(part == pattern or fnmatch.fnmatchcase(part, pattern) for part in parts)
1935
+ return fnmatch.fnmatchcase(parts[-1], pattern)
1936
+
1937
+
1938
+ def _walk_files(root: Path, repo_root: Optional[Path] = None, ignore: Optional[_GitIgnore] = None) -> Iterable[Path]:
1939
+ if not root.exists():
1940
+ return []
1941
+ repo = (repo_root or root).resolve()
1942
+ git_files = _git_visible_files(repo, root.resolve())
1943
+ if git_files is not None:
1944
+ for path in git_files:
1945
+ yield path
1946
+ return
1947
+
1948
+ matcher = ignore or _GitIgnore(repo)
1949
+ for current, dirs, files in os.walk(root):
1950
+ current_path = Path(current)
1951
+ kept_dirs = []
1952
+ for name in dirs:
1953
+ path = current_path / name
1954
+ rel = _rel_to_root(repo, path)
1955
+ if name in IGNORED_DIRS or (matcher.ignored(rel, is_dir=True) and not matcher.has_negation):
1956
+ continue
1957
+ kept_dirs.append(name)
1958
+ dirs[:] = kept_dirs
1959
+ for filename in files:
1960
+ path = current_path / filename
1961
+ rel = _rel_to_root(repo, path)
1962
+ if matcher.ignored(rel, is_dir=False):
1963
+ continue
1964
+ yield path
1965
+
1966
+
1967
+ def _git_visible_files(repo_root: Path, root: Path) -> Optional[List[Path]]:
1968
+ try:
1969
+ subprocess.run(
1970
+ ["git", "-C", str(repo_root), "rev-parse", "--is-inside-work-tree"],
1971
+ check=True,
1972
+ capture_output=True,
1973
+ text=True,
1974
+ )
1975
+ except (OSError, subprocess.CalledProcessError):
1976
+ return None
1977
+
1978
+ args = ["git", "-C", str(repo_root), "ls-files", "--cached", "--others", "--exclude-standard", "-z", "--"]
1979
+ rel = _rel_to_root(repo_root, root)
1980
+ if rel != ".":
1981
+ args.append(rel)
1982
+ try:
1983
+ result = subprocess.run(args, check=True, capture_output=True)
1984
+ except (OSError, subprocess.CalledProcessError):
1985
+ return None
1986
+
1987
+ files = []
1988
+ for raw in result.stdout.split(b"\0"):
1989
+ if not raw:
1990
+ continue
1991
+ rel_path = raw.decode("utf-8", errors="replace")
1992
+ path = repo_root / rel_path
1993
+ if path.is_file():
1994
+ files.append(path)
1995
+ return files
1996
+
1997
+
1998
+ def _is_profile_file(rel_path: str) -> bool:
1999
+ rel = _clean_rel(rel_path)
2000
+ name = Path(rel).name
2001
+ if name in PROFILE_FILE_NAMES or _is_dotnet_manifest(rel):
2002
+ return True
2003
+ if rel.startswith(".github/workflows/") and Path(rel).suffix in {".yml", ".yaml"}:
2004
+ return True
2005
+ if rel == ".circleci/config.yml":
2006
+ return True
2007
+ return False
2008
+
2009
+
2010
+ def _is_component_manifest(rel_path: str) -> bool:
2011
+ rel = _clean_rel(rel_path)
2012
+ return Path(rel).name in COMPONENT_MANIFESTS or _is_dotnet_manifest(rel)
2013
+
2014
+
2015
+ def _is_dotnet_manifest(rel_path: str) -> bool:
2016
+ suffix = Path(_clean_rel(rel_path)).suffix
2017
+ return suffix in DOTNET_PROJECT_EXTENSIONS or suffix in DOTNET_SOLUTION_EXTENSIONS
2018
+
2019
+
2020
+ def _is_ci_file(rel_path: str) -> bool:
2021
+ rel = _clean_rel(rel_path)
2022
+ return (
2023
+ rel.startswith(".github/workflows/")
2024
+ or rel in {".gitlab-ci.yml", ".gitlab-ci.yaml", ".circleci/config.yml", "Jenkinsfile"}
2025
+ )
2026
+
2027
+
2028
+ def _extract_ci_commands(path: Path) -> List[str]:
2029
+ text = _read_text(path)
2030
+ if not text:
2031
+ return []
2032
+ commands = []
2033
+ for raw_line in text.splitlines():
2034
+ line = raw_line.strip()
2035
+ if line.startswith("run:"):
2036
+ command = line.split(":", 1)[1].strip().strip("'\"")
2037
+ if command and command not in {"|", ">"}:
2038
+ commands.append(command)
2039
+ elif line.startswith("- run:"):
2040
+ command = line.split(":", 1)[1].strip().strip("'\"")
2041
+ if command and command not in {"|", ">"}:
2042
+ commands.append(command)
2043
+ elif re.match(r"^script:\s*.+", line):
2044
+ command = line.split(":", 1)[1].strip().strip("'\"")
2045
+ if command and command not in {"|", ">"}:
2046
+ commands.append(command)
2047
+ return commands[:50]
2048
+
2049
+
2050
+ def _parse_task_targets(path: Path, runner: str) -> Dict[str, str]:
2051
+ text = _read_text(path)
2052
+ if not text:
2053
+ return {}
2054
+
2055
+ targets: Dict[str, List[str]] = {}
2056
+ current: Optional[str] = None
2057
+ for raw_line in text.splitlines():
2058
+ line = raw_line.rstrip()
2059
+ if not line.strip() or line.lstrip().startswith("#"):
2060
+ continue
2061
+ target = _target_from_line(line, runner)
2062
+ if target:
2063
+ current = target if target in WORKFLOW_KINDS else None
2064
+ if current and current not in targets:
2065
+ targets[current] = []
2066
+ continue
2067
+ if current and (raw_line.startswith("\t") or raw_line.startswith(" ")):
2068
+ targets[current].append(line.strip())
2069
+
2070
+ return {target: "\n".join(commands) for target, commands in targets.items()}
2071
+
2072
+
2073
+ def _target_from_line(line: str, runner: str) -> Optional[str]:
2074
+ if runner == "make":
2075
+ if line.startswith("."):
2076
+ return None
2077
+ match = re.match(r"^([A-Za-z0-9_.-]+)\s*:(?:\s|$)", line)
2078
+ return match.group(1) if match else None
2079
+ match = re.match(r"^([A-Za-z0-9_.-]+)\s*:", line)
2080
+ return match.group(1) if match else None
2081
+
2082
+
2083
+ def _risk_for_command(kind: str, command: str) -> str:
2084
+ lowered = (command or "").lower()
2085
+ if kind in {"deploy", "release", "publish"}:
2086
+ return "high"
2087
+ if any(word in lowered for word in DANGEROUS_WORDS):
2088
+ return "high"
2089
+ if kind in {"install", "format", "build", "dev", "ci"}:
2090
+ return "medium"
2091
+ return "low"
2092
+
2093
+
2094
+ def _classify_workflow_kind(command: str) -> str:
2095
+ lowered = command.lower()
2096
+ for kind in ("test", "lint", "format", "build", "install", "dev"):
2097
+ if re.search(rf"\b{kind}\b", lowered):
2098
+ return kind
2099
+ return "ci"
2100
+
2101
+
2102
+ def _fact(name: str, confidence: float, evidence: Sequence[str], reason: str) -> Dict[str, Any]:
2103
+ return {
2104
+ "name": name,
2105
+ "confidence": _confidence_label(confidence),
2106
+ "confidence_score": round(confidence, 2),
2107
+ "evidence": sorted(set(evidence)),
2108
+ "reason": reason,
2109
+ }
2110
+
2111
+
2112
+ def _package_manager(
2113
+ name: str,
2114
+ command: str,
2115
+ confidence: float,
2116
+ evidence: Sequence[str],
2117
+ warnings: Optional[Sequence[str]] = None,
2118
+ ) -> Dict[str, Any]:
2119
+ return {
2120
+ "name": name,
2121
+ "command": command,
2122
+ "confidence": _confidence_label(confidence),
2123
+ "confidence_score": round(confidence, 2),
2124
+ "evidence": sorted(set(evidence)),
2125
+ "warnings": list(warnings or []),
2126
+ }
2127
+
2128
+
2129
+ def _confidence_label(value: float) -> str:
2130
+ if value >= 0.85:
2131
+ return "high"
2132
+ if value >= 0.6:
2133
+ return "medium"
2134
+ return "low"
2135
+
2136
+
2137
+ def _confidence_score(label: str) -> float:
2138
+ return {"high": 0.95, "medium": 0.65, "low": 0.35}.get(label, 0.0)
2139
+
2140
+
2141
+ def _merge_facts(groups: Iterable[Iterable[Dict[str, Any]]]) -> List[Dict[str, Any]]:
2142
+ merged: Dict[str, Dict[str, Any]] = {}
2143
+ for group in groups:
2144
+ for fact in group:
2145
+ if not fact:
2146
+ continue
2147
+ name = fact.get("name")
2148
+ if not name:
2149
+ continue
2150
+ current = merged.get(name)
2151
+ if current is None or fact.get("confidence_score", 0) > current.get("confidence_score", 0):
2152
+ merged[name] = dict(fact)
2153
+ else:
2154
+ current["evidence"] = sorted(set(current.get("evidence", [])) | set(fact.get("evidence", [])))
2155
+ return [merged[name] for name in sorted(merged)]
2156
+
2157
+
2158
+ def _dedupe_facts(facts: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
2159
+ return _merge_facts([facts])
2160
+
2161
+
2162
+ def _dedupe_workflows(workflows: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
2163
+ seen: Dict[Tuple[str, Optional[str], str], Dict[str, Any]] = {}
2164
+ for workflow in workflows:
2165
+ key = (workflow.get("kind", ""), workflow.get("command"), workflow.get("cwd", ""))
2166
+ current = seen.get(key)
2167
+ if current is None:
2168
+ seen[key] = dict(workflow)
2169
+ continue
2170
+ current["evidence"] = sorted(set(current.get("evidence", [])) | set(workflow.get("evidence", [])))
2171
+ if workflow.get("recommended") and not current.get("recommended"):
2172
+ seen[key] = dict(workflow)
2173
+ return list(seen.values())
2174
+
2175
+
2176
+ def _project_type(components: Sequence[Dict[str, Any]]) -> str:
2177
+ if not components:
2178
+ return "unknown"
2179
+ if len(components) == 1:
2180
+ return "single-component"
2181
+ return "multi-component"
2182
+
2183
+
2184
+ def _component_type(languages: Sequence[Dict[str, Any]]) -> str:
2185
+ names = {item.get("name") for item in languages if item.get("name")}
2186
+ if not names:
2187
+ return "unknown"
2188
+ if len(names) == 1:
2189
+ return next(iter(names))
2190
+ return "mixed"
2191
+
2192
+
2193
+ def _component_scope(path: str, all_roots: Sequence[str]) -> str:
2194
+ if path == "." and len(all_roots) > 1:
2195
+ return "repo"
2196
+ return "component"
2197
+
2198
+
2199
+ def _match_component(components: Sequence[Dict[str, Any]], rel_path: str) -> Optional[Dict[str, Any]]:
2200
+ matches = []
2201
+ for component in components:
2202
+ path = component.get("path", ".")
2203
+ if path == "." or rel_path == path or rel_path.startswith(path + "/"):
2204
+ matches.append(component)
2205
+ if not matches:
2206
+ return None
2207
+ return sorted(matches, key=lambda item: len(item.get("path", "")), reverse=True)[0]
2208
+
2209
+
2210
+ def _package_dependencies(package: Dict[str, Any]) -> Set[str]:
2211
+ deps: Set[str] = set()
2212
+ for key in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
2213
+ value = package.get(key)
2214
+ if isinstance(value, dict):
2215
+ deps.update(str(name).lower() for name in value)
2216
+ return deps
2217
+
2218
+
2219
+ def _js_frameworks(dependencies: Set[str]) -> List[str]:
2220
+ known = {
2221
+ "next": "nextjs",
2222
+ "react": "react",
2223
+ "vue": "vue",
2224
+ "svelte": "svelte",
2225
+ "@angular/core": "angular",
2226
+ "vite": "vite",
2227
+ "nuxt": "nuxt",
2228
+ "express": "express",
2229
+ "nestjs": "nestjs",
2230
+ }
2231
+ return sorted({label for dep, label in known.items() if dep in dependencies})
2232
+
2233
+
2234
+ def _python_frameworks(component_dir: Path, pyproject: Dict[str, Any], requirement_files: Sequence[str]) -> List[str]:
2235
+ deps = set()
2236
+ deps.update(_pyproject_dependency_names(pyproject))
2237
+ for filename in requirement_files:
2238
+ deps.update(_requirements_dependency_names(component_dir / filename))
2239
+ known = {
2240
+ "django": "django",
2241
+ "flask": "flask",
2242
+ "fastapi": "fastapi",
2243
+ "pytest": "pytest",
2244
+ "ruff": "ruff",
2245
+ "black": "black",
2246
+ }
2247
+ return sorted({label for dep, label in known.items() if dep in deps})
2248
+
2249
+
2250
+ def _pyproject_dependency_names(pyproject: Dict[str, Any]) -> Set[str]:
2251
+ names: Set[str] = set()
2252
+ project = pyproject.get("project", {}) if isinstance(pyproject, dict) else {}
2253
+ if isinstance(project, dict):
2254
+ names.update(_dependency_name(item) for item in project.get("dependencies", []) if isinstance(item, str))
2255
+ optional = project.get("optional-dependencies", {})
2256
+ if isinstance(optional, dict):
2257
+ for values in optional.values():
2258
+ if isinstance(values, list):
2259
+ names.update(_dependency_name(item) for item in values if isinstance(item, str))
2260
+ tool = pyproject.get("tool", {}) if isinstance(pyproject, dict) else {}
2261
+ poetry = tool.get("poetry", {}) if isinstance(tool, dict) else {}
2262
+ if isinstance(poetry, dict):
2263
+ for key in ("dependencies", "dev-dependencies"):
2264
+ value = poetry.get(key)
2265
+ if isinstance(value, dict):
2266
+ names.update(str(name).lower() for name in value.keys())
2267
+ return {name for name in names if name}
2268
+
2269
+
2270
+ def _requirements_dependency_names(path: Path) -> Set[str]:
2271
+ text = _read_text(path)
2272
+ names = set()
2273
+ for line in text.splitlines():
2274
+ stripped = line.strip()
2275
+ if not stripped or stripped.startswith("#") or stripped.startswith("-"):
2276
+ continue
2277
+ names.add(_dependency_name(stripped))
2278
+ return {name for name in names if name}
2279
+
2280
+
2281
+ def _dependency_name(value: str) -> str:
2282
+ match = re.match(r"^\s*([A-Za-z0-9_.-]+)", value)
2283
+ return match.group(1).lower().replace("_", "-") if match else ""
2284
+
2285
+
2286
+ def _python_install_command(pm: Dict[str, Any]) -> Optional[str]:
2287
+ name = pm["name"]
2288
+ if name == "uv":
2289
+ return "uv sync"
2290
+ if name == "poetry":
2291
+ return "poetry install"
2292
+ if name == "pdm":
2293
+ return "pdm install"
2294
+ if name == "pipenv":
2295
+ return "pipenv install --dev"
2296
+ if name == "pip":
2297
+ if any(path.endswith("requirements.txt") for path in pm.get("evidence", [])):
2298
+ return "python -m pip install -r requirements.txt"
2299
+ return "python -m pip install -e ."
2300
+ return None
2301
+
2302
+
2303
+ def _ruby_gem_names(path: Path) -> Set[str]:
2304
+ text = _read_text(path)
2305
+ names = set()
2306
+ for match in re.finditer(r"^\s*gem\s+['\"]([^'\"]+)['\"]", text, flags=re.MULTILINE):
2307
+ names.add(match.group(1).lower())
2308
+ return names
2309
+
2310
+
2311
+ def _ruby_frameworks(gems: Set[str]) -> List[str]:
2312
+ known = {
2313
+ "rails": "rails",
2314
+ "sinatra": "sinatra",
2315
+ "rspec": "rspec",
2316
+ "rspec-rails": "rspec",
2317
+ "rubocop": "rubocop",
2318
+ }
2319
+ return sorted({label for gem, label in known.items() if gem in gems})
2320
+
2321
+
2322
+ def _composer_dependencies(composer: Dict[str, Any]) -> Set[str]:
2323
+ dependencies: Set[str] = set()
2324
+ for key in ("require", "require-dev"):
2325
+ value = composer.get(key)
2326
+ if isinstance(value, dict):
2327
+ dependencies.update(str(name).lower() for name in value)
2328
+ return dependencies
2329
+
2330
+
2331
+ def _php_frameworks(dependencies: Set[str]) -> List[str]:
2332
+ known = {
2333
+ "laravel/framework": "laravel",
2334
+ "symfony/framework-bundle": "symfony",
2335
+ "phpunit/phpunit": "phpunit",
2336
+ "phpstan/phpstan": "phpstan",
2337
+ "squizlabs/php_codesniffer": "phpcs",
2338
+ "friendsofphp/php-cs-fixer": "php-cs-fixer",
2339
+ "laravel/pint": "pint",
2340
+ }
2341
+ return sorted({label for dep, label in known.items() if dep in dependencies})
2342
+
2343
+
2344
+ def _dotnet_languages_from_manifests(manifests: Sequence[str]) -> List[Dict[str, Any]]:
2345
+ languages = []
2346
+ for suffix, language in ((".csproj", "csharp"), (".fsproj", "fsharp"), (".vbproj", "visualbasic")):
2347
+ evidence = [path for path in manifests if path.endswith(suffix)]
2348
+ if evidence:
2349
+ languages.append(_fact(language, 0.95, evidence, f".NET {suffix} project file"))
2350
+ return languages
2351
+
2352
+
2353
+ def _script_preview(value: Any) -> str:
2354
+ if isinstance(value, str):
2355
+ return value
2356
+ if isinstance(value, list):
2357
+ return " && ".join(str(item) for item in value)
2358
+ return str(value or "")
2359
+
2360
+
2361
+ def _js_install_command(pm: Dict[str, Any]) -> str:
2362
+ name = pm["name"]
2363
+ evidence = set(pm.get("evidence", []))
2364
+ if name == "npm":
2365
+ return "npm ci" if any(path.endswith(("package-lock.json", "npm-shrinkwrap.json")) for path in evidence) else "npm install"
2366
+ if name == "pnpm":
2367
+ return "pnpm install --frozen-lockfile" if any(path.endswith("pnpm-lock.yaml") for path in evidence) else "pnpm install"
2368
+ if name == "yarn":
2369
+ return "yarn install --immutable" if any(path.endswith("yarn.lock") for path in evidence) else "yarn install"
2370
+ if name == "bun":
2371
+ return "bun install --frozen-lockfile" if any("bun.lock" in path for path in evidence) else "bun install"
2372
+ return f"{name} install"
2373
+
2374
+
2375
+ def _js_script_command(pm_name: str, script: str) -> str:
2376
+ if pm_name == "yarn":
2377
+ return f"yarn {script}"
2378
+ if pm_name == "bun":
2379
+ return f"bun run {script}"
2380
+ if pm_name == "pnpm":
2381
+ return f"pnpm run {script}"
2382
+ return f"npm run {script}"
2383
+
2384
+
2385
+ def _pm_executable(name: str) -> str:
2386
+ return {"npm": "npm", "pnpm": "pnpm", "yarn": "yarn", "bun": "bun"}.get(name, name)
2387
+
2388
+
2389
+ def _has_test_sample(root: Path, component_path: str, ignore: _GitIgnore) -> bool:
2390
+ return bool(_test_samples(root, component_path, ignore))
2391
+
2392
+
2393
+ def _test_samples(root: Path, component_path: str, ignore: _GitIgnore) -> List[str]:
2394
+ component_dir = root if component_path == "." else root / component_path
2395
+ samples = []
2396
+ patterns = ("test_*.py", "*_test.py")
2397
+ for test_root in (component_dir / "tests", component_dir):
2398
+ if not test_root.exists():
2399
+ continue
2400
+ for pattern in patterns:
2401
+ for path in test_root.glob(pattern):
2402
+ rel = _rel_to_root(root, path)
2403
+ if _visible_file(root, ignore, rel):
2404
+ samples.append(rel)
2405
+ if len(samples) >= 3:
2406
+ return samples
2407
+ return samples
2408
+
2409
+
2410
+ def _setup_cfg_has_section(path: Path, section: str) -> bool:
2411
+ text = _read_text(path)
2412
+ return f"[{section}]" in text
2413
+
2414
+
2415
+ def _load_json(path: Path) -> Optional[Dict[str, Any]]:
2416
+ try:
2417
+ with path.open("r", encoding="utf-8") as handle:
2418
+ value = json.load(handle)
2419
+ return value if isinstance(value, dict) else None
2420
+ except (OSError, json.JSONDecodeError):
2421
+ return None
2422
+
2423
+
2424
+ def _load_json_or_jsonc(path: Path) -> Optional[Dict[str, Any]]:
2425
+ if path.suffix != ".jsonc":
2426
+ return _load_json(path)
2427
+ text = _read_text(path)
2428
+ if not text:
2429
+ return None
2430
+ try:
2431
+ value = json.loads(_strip_trailing_commas(_strip_json_comments(text)))
2432
+ return value if isinstance(value, dict) else None
2433
+ except json.JSONDecodeError:
2434
+ return None
2435
+
2436
+
2437
+ def _strip_json_comments(text: str) -> str:
2438
+ result = []
2439
+ index = 0
2440
+ in_string = False
2441
+ escape = False
2442
+ while index < len(text):
2443
+ char = text[index]
2444
+ nxt = text[index + 1] if index + 1 < len(text) else ""
2445
+ if in_string:
2446
+ result.append(char)
2447
+ if escape:
2448
+ escape = False
2449
+ elif char == "\\":
2450
+ escape = True
2451
+ elif char == '"':
2452
+ in_string = False
2453
+ index += 1
2454
+ continue
2455
+ if char == '"':
2456
+ in_string = True
2457
+ result.append(char)
2458
+ index += 1
2459
+ continue
2460
+ if char == "/" and nxt == "/":
2461
+ index += 2
2462
+ while index < len(text) and text[index] not in "\r\n":
2463
+ index += 1
2464
+ continue
2465
+ if char == "/" and nxt == "*":
2466
+ index += 2
2467
+ while index + 1 < len(text) and not (text[index] == "*" and text[index + 1] == "/"):
2468
+ index += 1
2469
+ index += 2
2470
+ continue
2471
+ result.append(char)
2472
+ index += 1
2473
+ return "".join(result)
2474
+
2475
+
2476
+ def _strip_trailing_commas(text: str) -> str:
2477
+ return re.sub(r",(\s*[}\]])", r"\1", text)
2478
+
2479
+
2480
+ def _load_toml(path: Path) -> Dict[str, Any]:
2481
+ if tomllib is None or not path.is_file():
2482
+ return {}
2483
+ try:
2484
+ with path.open("rb") as handle:
2485
+ value = tomllib.load(handle)
2486
+ return value if isinstance(value, dict) else {}
2487
+ except (OSError, tomllib.TOMLDecodeError):
2488
+ return {}
2489
+
2490
+
2491
+ def _write_json(path: Path, data: Dict[str, Any]) -> None:
2492
+ path.parent.mkdir(parents=True, exist_ok=True)
2493
+ tmp = path.with_suffix(path.suffix + ".tmp")
2494
+ with tmp.open("w", encoding="utf-8") as handle:
2495
+ json.dump(data, handle, indent=2, sort_keys=True)
2496
+ handle.write("\n")
2497
+ tmp.replace(path)
2498
+
2499
+
2500
+ def _read_text(path: Path, max_bytes: int = 1_000_000) -> str:
2501
+ try:
2502
+ if not path.is_file() or path.stat().st_size > max_bytes:
2503
+ return ""
2504
+ return path.read_text(encoding="utf-8", errors="replace")
2505
+ except OSError:
2506
+ return ""
2507
+
2508
+
2509
+ def _fingerprint(path: Path) -> Dict[str, Any]:
2510
+ try:
2511
+ stat = path.stat()
2512
+ digest = hashlib.sha256()
2513
+ with path.open("rb") as handle:
2514
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
2515
+ digest.update(chunk)
2516
+ return {
2517
+ "path": _clean_rel(str(path.name)) if not path.is_absolute() else str(path),
2518
+ "sha256": digest.hexdigest(),
2519
+ "size": stat.st_size,
2520
+ "mtime_ns": stat.st_mtime_ns,
2521
+ }
2522
+ except OSError:
2523
+ return {
2524
+ "path": str(path),
2525
+ "sha256": None,
2526
+ "size": None,
2527
+ "mtime_ns": None,
2528
+ "missing": True,
2529
+ }
2530
+
2531
+
2532
+ def _fingerprint_with_rel(root: Path, rel_path: str) -> Dict[str, Any]:
2533
+ item = _fingerprint(root / rel_path)
2534
+ item["path"] = rel_path
2535
+ return item
2536
+
2537
+
2538
+ def _visible_file(root: Path, ignore: _GitIgnore, rel_path: str) -> bool:
2539
+ rel = _clean_rel(rel_path)
2540
+ return (root / rel).is_file() and not ignore.ignored(rel, is_dir=False)
2541
+
2542
+
2543
+ def _profile_file_exists(root: Path, rel_path: str) -> bool:
2544
+ rel = _clean_rel(rel_path)
2545
+ return (root / rel).is_file()
2546
+
2547
+
2548
+ def _resolve_root(root: str | os.PathLike[str]) -> Path:
2549
+ return Path(root).expanduser().resolve()
2550
+
2551
+
2552
+ def _resolve_cache_path(root: Path, cache_path: str | os.PathLike[str] | None) -> Path:
2553
+ if cache_path is None:
2554
+ return root / DEFAULT_CACHE_NAME
2555
+ path = Path(cache_path).expanduser()
2556
+ return path.resolve() if path.is_absolute() else (root / path).resolve()
2557
+
2558
+
2559
+ def _normalize_changed_files(root: Path, changed_files: Sequence[str]) -> List[str]:
2560
+ normalized = []
2561
+ for item in changed_files:
2562
+ if not item:
2563
+ continue
2564
+ path = Path(item)
2565
+ if path.is_absolute():
2566
+ try:
2567
+ rel = path.resolve().relative_to(root)
2568
+ except ValueError:
2569
+ continue
2570
+ normalized.append(rel.as_posix())
2571
+ else:
2572
+ normalized.append(_clean_rel(item))
2573
+ return sorted(set(path for path in normalized if path and path != "."))
2574
+
2575
+
2576
+ def _rel_to_root(root: Path, path: Path) -> str:
2577
+ try:
2578
+ return path.resolve().relative_to(root).as_posix()
2579
+ except ValueError:
2580
+ return path.as_posix()
2581
+
2582
+
2583
+ def _clean_rel(path: str) -> str:
2584
+ rel = Path(path).as_posix()
2585
+ if rel == ".":
2586
+ return "."
2587
+ while rel.startswith("./"):
2588
+ rel = rel[2:]
2589
+ return rel.strip("/")
2590
+
2591
+
2592
+ def _join_rel(base: str, name: str) -> str:
2593
+ return name if base == "." else f"{base}/{name}"
2594
+
2595
+
2596
+ def _dirname_rel(path: str) -> str:
2597
+ parent = Path(path).parent.as_posix()
2598
+ return "." if parent == "." else parent
2599
+
2600
+
2601
+ def _is_under(path: str, parent: str) -> bool:
2602
+ if parent == ".":
2603
+ return True
2604
+ return path == parent or path.startswith(parent + "/")
2605
+
2606
+
2607
+ def _utc_now() -> str:
2608
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
2609
+
2610
+
2611
+ def _emit_progress(progress: Optional[Callable[[str], None]], message: str) -> None:
2612
+ if progress is not None:
2613
+ progress(message)
2614
+
2615
+
2616
+ def _stderr_progress(message: str) -> None:
2617
+ labels = {
2618
+ "sync: start": (0, "start"),
2619
+ "sync: check cache": (20, "cache"),
2620
+ "sync: reused cached profile": (80, "reuse"),
2621
+ "sync: paths-only": (55, "paths"),
2622
+ "sync: scan repo": (45, "scan"),
2623
+ "sync: wrote cache": (90, "write"),
2624
+ "sync: done": (100, "done"),
2625
+ }
2626
+ percent, label = labels.get(message, (50, message.replace("sync: ", "")))
2627
+ width = 20
2628
+ filled = int(width * percent / 100)
2629
+ bar = "#" * filled + "-" * (width - filled)
2630
+ end = "\n" if percent >= 100 else "\r"
2631
+ print(f"cwp [{bar}] {percent:3d}% {label}", file=sys.stderr, end=end, flush=True)
2632
+
2633
+
2634
+ def _format_result(
2635
+ data: Dict[str, Any],
2636
+ output_format: str,
2637
+ verbose: bool = False,
2638
+ status_detail: Optional[str] = None,
2639
+ limit: int = DEFAULT_STATUS_LIMIT,
2640
+ depth: int = DEFAULT_STATUS_DEPTH,
2641
+ ) -> Dict[str, Any] | str:
2642
+ fmt = _normalize_output_format(output_format)
2643
+ if fmt == "json":
2644
+ return data
2645
+ return _render_text(data, verbose=verbose, status_detail=status_detail, limit=limit, depth=depth)
2646
+
2647
+
2648
+ def _normalize_output_format(output_format: str) -> str:
2649
+ fmt = (output_format or "text").lower()
2650
+ if fmt not in {"text", "json"}:
2651
+ raise ValueError("format must be 'text' or 'json'")
2652
+ return fmt
2653
+
2654
+
2655
+ def _render_text(
2656
+ data: Dict[str, Any],
2657
+ verbose: bool = False,
2658
+ status_detail: Optional[str] = None,
2659
+ limit: int = DEFAULT_STATUS_LIMIT,
2660
+ depth: int = DEFAULT_STATUS_DEPTH,
2661
+ ) -> str:
2662
+ operation = data.get("operation", "sync")
2663
+ if operation == "install-skill":
2664
+ return _render_install_skill_text(data, verbose=verbose)
2665
+ if operation == "status":
2666
+ detail = _normalize_status_detail(status_detail or "compact", verbose)
2667
+ if detail != "full":
2668
+ return _render_status_text(data, detail=detail, limit=limit, depth=depth)
2669
+ verbose = True
2670
+
2671
+ profile = data.get("profile") if isinstance(data.get("profile"), dict) else data if isinstance(data.get("project"), dict) else None
2672
+ alignment = data.get("alignment") or (profile or {}).get("alignment", {})
2673
+ lines = [
2674
+ "code-workflow-probe",
2675
+ f"{operation}: aligned={_bool_text(alignment.get('aligned'))} reason={alignment.get('reason', 'unknown')}",
2676
+ ]
2677
+
2678
+ stale = alignment.get("stale_files", [])
2679
+ new_files = alignment.get("new_profile_files", [])
2680
+ removed = alignment.get("removed_profile_files", [])
2681
+ if stale:
2682
+ lines.append(f"stale_files: {', '.join(stale)}")
2683
+ if new_files:
2684
+ lines.append(f"new_profile_files: {', '.join(new_files)}")
2685
+ if removed:
2686
+ lines.append(f"removed_profile_files: {', '.join(removed)}")
2687
+ if "profile_updated" in data:
2688
+ lines.append(f"profile_updated: {_bool_text(data.get('profile_updated'))}")
2689
+ if data.get("changed_files"):
2690
+ lines.append(f"changed_files: {', '.join(data['changed_files'])}")
2691
+
2692
+ if data.get("affected"):
2693
+ _append_affected_text(lines, data)
2694
+
2695
+ if profile:
2696
+ _append_profile_text(lines, profile, verbose=verbose)
2697
+ elif operation in {"affected", "edit"}:
2698
+ pass
2699
+ else:
2700
+ lines.append("profile: unavailable")
2701
+
2702
+ warnings = list(data.get("warnings", []))
2703
+ if profile:
2704
+ warnings.extend(profile.get("warnings", []))
2705
+ if warnings:
2706
+ _append_list(lines, "warnings", warnings)
2707
+ return "\n".join(lines)
2708
+
2709
+
2710
+ def _render_install_skill_text(data: Dict[str, Any], verbose: bool = False) -> str:
2711
+ lines = [
2712
+ "code-workflow-probe",
2713
+ f"install-skill: target={data.get('target')} installed={_bool_text(data.get('installed'))} dry_run={_bool_text(data.get('dry_run'))}",
2714
+ f"path: {data.get('skill_path')}",
2715
+ "note: installed skill tells Codex to sync after editing project/workflow management files.",
2716
+ ]
2717
+ if verbose and data.get("content"):
2718
+ lines.append("content:")
2719
+ lines.append(str(data["content"]).rstrip())
2720
+ if data.get("warnings"):
2721
+ _append_list(lines, "warnings", data.get("warnings", []))
2722
+ return "\n".join(lines)
2723
+
2724
+
2725
+ def _render_status_text(
2726
+ data: Dict[str, Any],
2727
+ detail: str = "compact",
2728
+ limit: int = DEFAULT_STATUS_LIMIT,
2729
+ depth: int = DEFAULT_STATUS_DEPTH,
2730
+ ) -> str:
2731
+ profile = data.get("profile") if isinstance(data.get("profile"), dict) else None
2732
+ alignment = data.get("alignment", {})
2733
+ preview_limit = _normalize_limit(limit)
2734
+ preview_depth = _normalize_depth(depth)
2735
+ lines = [
2736
+ "code-workflow-probe",
2737
+ f"status: aligned={_bool_text(alignment.get('aligned'))} reason={alignment.get('reason', 'unknown')}",
2738
+ ]
2739
+ stale = alignment.get("stale_files", [])
2740
+ new_files = alignment.get("new_profile_files", [])
2741
+ removed = alignment.get("removed_profile_files", [])
2742
+ if stale:
2743
+ lines.append(f"stale({len(stale)}): {_preview_names(stale, preview_limit)}")
2744
+ if new_files:
2745
+ lines.append(f"new_profile({len(new_files)}): {_preview_names(new_files, preview_limit)}")
2746
+ if removed:
2747
+ lines.append(f"removed({len(removed)}): {_preview_names(removed, preview_limit)}")
2748
+ if profile:
2749
+ project = profile.get("project", {})
2750
+ components = project.get("components", [])
2751
+ workflows = [workflow for component in components for workflow in component.get("workflows", [])]
2752
+ safe = sum(1 for workflow in workflows if workflow.get("safe_auto"))
2753
+ review = len(workflows) - safe
2754
+ lines.append("summary:")
2755
+ lines.append(f"- project: {project.get('type', 'unknown')}")
2756
+ lines.append(f"- components: {len(components)}")
2757
+ lines.append(f"- tech: {_format_fact_names(project.get('technologies', []))}")
2758
+ lines.append(f"- package_managers: {_format_package_managers(project.get('package_managers', []))}")
2759
+ lines.append(f"- workflows: safe_auto={safe} needs_review={review} ci={len(project.get('ci_workflows', []))}")
2760
+ if detail == "compact":
2761
+ _append_status_workflows(lines, components, preview_limit, include_component=True)
2762
+ elif detail == "standard":
2763
+ _append_status_components(lines, components, preview_limit, preview_depth)
2764
+ evidence_files = sorted(profile.get("evidence_files", {}))
2765
+ lines.append(f"evidence({len(evidence_files)}): {_preview_names(evidence_files, preview_limit)}")
2766
+ else:
2767
+ lines.append("profile: unavailable")
2768
+ if data.get("warnings"):
2769
+ _append_list(lines, "warnings", data.get("warnings", []))
2770
+ return "\n".join(lines)
2771
+
2772
+
2773
+ def _normalize_status_detail(detail: Optional[str], verbose: bool = False) -> str:
2774
+ if verbose:
2775
+ return "full"
2776
+ value = (detail or "compact").lower()
2777
+ if value not in STATUS_DETAILS:
2778
+ raise ValueError("detail must be 'compact', 'standard', or 'full'")
2779
+ return value
2780
+
2781
+
2782
+ def _normalize_limit(limit: int) -> int:
2783
+ try:
2784
+ value = int(limit)
2785
+ except (TypeError, ValueError):
2786
+ return DEFAULT_STATUS_LIMIT
2787
+ return max(1, value)
2788
+
2789
+
2790
+ def _normalize_depth(depth: int) -> int:
2791
+ try:
2792
+ value = int(depth)
2793
+ except (TypeError, ValueError):
2794
+ return DEFAULT_STATUS_DEPTH
2795
+ return max(0, value)
2796
+
2797
+
2798
+ def _append_status_components(lines: List[str], components: Sequence[Dict[str, Any]], limit: int, depth: int) -> None:
2799
+ if not components:
2800
+ return
2801
+ ordered = sorted(components, key=lambda component: str(component.get("path") or ""))
2802
+ visible = [component for component in ordered if _component_within_depth(component, depth)]
2803
+ selected = visible[:limit]
2804
+ lines.append(f"components(depth={depth}, shown={len(selected)}/{len(components)}):")
2805
+ for component in selected:
2806
+ workflows = component.get("workflows", [])
2807
+ safe = _workflow_kind_preview([workflow for workflow in workflows if workflow.get("safe_auto")])
2808
+ review = _workflow_kind_preview([workflow for workflow in workflows if not workflow.get("safe_auto")])
2809
+ lines.append(
2810
+ "- "
2811
+ f"id={component.get('id')} "
2812
+ f"path={component.get('path')} "
2813
+ f"lang={_format_fact_names(component.get('languages', []))} "
2814
+ f"pm={_format_package_manager(component.get('package_manager'))} "
2815
+ f"safe={safe} review={review}"
2816
+ )
2817
+ _append_status_workflows(lines, [component], limit, indent=" ", include_component=False)
2818
+ hidden_by_depth = len(ordered) - len(visible)
2819
+ hidden_by_limit = len(visible) - len(selected)
2820
+ if hidden_by_depth or hidden_by_limit:
2821
+ lines.append(f"- hidden: depth={hidden_by_depth} limit={hidden_by_limit}")
2822
+
2823
+
2824
+ def _component_within_depth(component: Dict[str, Any], depth: int) -> bool:
2825
+ path = str(component.get("path") or ".")
2826
+ if path == ".":
2827
+ return True
2828
+ return len([part for part in path.split("/") if part]) <= depth
2829
+
2830
+
2831
+ def _workflow_kind_preview(workflows: Sequence[Dict[str, Any]], limit: int = 4) -> str:
2832
+ kinds = sorted({str(workflow.get("kind")) for workflow in workflows if workflow.get("kind")})
2833
+ return _preview_names(kinds, limit)
2834
+
2835
+
2836
+ def _append_status_workflows(
2837
+ lines: List[str],
2838
+ components: Sequence[Dict[str, Any]],
2839
+ limit: int,
2840
+ indent: str = "",
2841
+ include_component: bool = True,
2842
+ ) -> None:
2843
+ items = _status_workflow_items(components)
2844
+ selected = _select_status_workflow_items(items, limit)
2845
+ lines.append(f"{indent}workflows(local, shown={len(selected)}/{len(items)}):")
2846
+ if not items:
2847
+ lines.append(f"{indent}- none")
2848
+ return
2849
+ for item in selected:
2850
+ prefix = f"component={item['component_id']} " if include_component else ""
2851
+ lines.append(f"{indent}- {prefix}{_format_workflow(item['workflow'])}")
2852
+ if len(items) > len(selected):
2853
+ lines.append(f"{indent}- +{len(items) - len(selected)} more")
2854
+
2855
+
2856
+ def _status_workflow_items(components: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
2857
+ items: List[Dict[str, Any]] = []
2858
+ for component in components:
2859
+ for workflow in component.get("workflows", []):
2860
+ if workflow.get("source") != "local" or workflow.get("ci_only") or not workflow.get("command"):
2861
+ continue
2862
+ items.append({
2863
+ "component_id": component.get("id"),
2864
+ "component_path": component.get("path") or ".",
2865
+ "workflow": workflow,
2866
+ })
2867
+ return sorted(items, key=_status_workflow_sort_key)
2868
+
2869
+
2870
+ def _select_status_workflow_items(items: Sequence[Dict[str, Any]], limit: int) -> List[Dict[str, Any]]:
2871
+ max_items = _normalize_limit(limit)
2872
+ selected: List[Dict[str, Any]] = []
2873
+ selected_indexes: Set[int] = set()
2874
+
2875
+ for kind in STATUS_WORKFLOW_KIND_ORDER:
2876
+ for index, item in enumerate(items):
2877
+ if index in selected_indexes or item["workflow"].get("kind") != kind:
2878
+ continue
2879
+ selected.append(item)
2880
+ selected_indexes.add(index)
2881
+ break
2882
+ if len(selected) >= max_items:
2883
+ return sorted(selected, key=_status_workflow_sort_key)
2884
+
2885
+ for index, item in enumerate(items):
2886
+ if index in selected_indexes:
2887
+ continue
2888
+ selected.append(item)
2889
+ selected_indexes.add(index)
2890
+ if len(selected) >= max_items:
2891
+ break
2892
+
2893
+ return sorted(selected, key=_status_workflow_sort_key)
2894
+
2895
+
2896
+ def _status_workflow_sort_key(item: Dict[str, Any]) -> Tuple[int, str, int, int, str]:
2897
+ workflow = item["workflow"]
2898
+ kind = str(workflow.get("kind") or "")
2899
+ kind_order = {name: index for index, name in enumerate(STATUS_WORKFLOW_KIND_ORDER)}
2900
+ safe_rank = 0 if workflow.get("safe_auto") else 1
2901
+ recommended_rank = 0 if workflow.get("recommended") else 1
2902
+ return (
2903
+ kind_order.get(kind, len(STATUS_WORKFLOW_KIND_ORDER)),
2904
+ str(item.get("component_path") or ""),
2905
+ safe_rank,
2906
+ recommended_rank,
2907
+ str(workflow.get("command") or ""),
2908
+ )
2909
+
2910
+
2911
+ def _append_profile_text(lines: List[str], profile: Dict[str, Any], verbose: bool = False) -> None:
2912
+ project = profile.get("project", {})
2913
+ components = project.get("components", [])
2914
+ lines.append("summary:")
2915
+ lines.append(f"- project: {project.get('type', 'unknown')}")
2916
+ lines.append(f"- tech: {_format_fact_names(project.get('technologies', []), verbose=verbose)}")
2917
+ lines.append(f"- package_managers: {_format_package_managers(project.get('package_managers', []), verbose=verbose)}")
2918
+ lines.append("components:")
2919
+ if not components:
2920
+ lines.append("- none")
2921
+ for component in components:
2922
+ lines.append(
2923
+ "- "
2924
+ f"id={component.get('id')} "
2925
+ f"path={component.get('path')} "
2926
+ f"lang={_format_fact_names(component.get('languages', []), verbose=verbose)} "
2927
+ f"pm={_format_package_manager(component.get('package_manager'), verbose=verbose)}"
2928
+ )
2929
+ _append_workflow_groups(lines, component.get("workflows", []), indent=" ", verbose=verbose)
2930
+
2931
+ ci_workflows = project.get("ci_workflows", [])
2932
+ if ci_workflows:
2933
+ lines.append(f"ci: {len(ci_workflows)} candidate(s), not local")
2934
+
2935
+ evidence_files = sorted(profile.get("evidence_files", {}))
2936
+ if verbose:
2937
+ lines.append("evidence_files:")
2938
+ if not evidence_files:
2939
+ lines.append("- none")
2940
+ for path in evidence_files:
2941
+ fingerprint = profile["evidence_files"][path]
2942
+ sha = fingerprint.get("sha256") or "missing"
2943
+ roles = ",".join(fingerprint.get("roles", [])) or "unknown"
2944
+ lines.append(f"- {path}: sha256={sha} size={fingerprint.get('size')} roles={roles}")
2945
+ else:
2946
+ preview = ", ".join(evidence_files[:5]) if evidence_files else "none"
2947
+ suffix = "" if len(evidence_files) <= 5 else f", +{len(evidence_files) - 5} more"
2948
+ lines.append(f"evidence({len(evidence_files)}): {preview}{suffix}")
2949
+
2950
+
2951
+ def _append_affected_text(lines: List[str], data: Dict[str, Any]) -> None:
2952
+ affected_data = data.get("affected", {})
2953
+ components = affected_data.get("components", [])
2954
+ lines.append(f"affected: components={', '.join(components) if components else 'none'}")
2955
+ files = affected_data.get("files", [])
2956
+ for item in files[:8]:
2957
+ lines.append(
2958
+ "- "
2959
+ f"{item.get('file')} -> component={item.get('component_id') or 'none'} "
2960
+ f"profile_affecting={_bool_text(item.get('profile_affecting'))}"
2961
+ )
2962
+
2963
+ workflows = data.get("suggested_workflows", [])
2964
+ lines.append("suggested_workflows:")
2965
+ if not workflows:
2966
+ lines.append("- none")
2967
+ for workflow in workflows[:12]:
2968
+ component_id = workflow.get("component_id")
2969
+ prefix = f"component={component_id} " if component_id else ""
2970
+ lines.append(f"- {prefix}{_format_workflow(workflow)}")
2971
+ if len(workflows) > 12:
2972
+ lines.append(f"- +{len(workflows) - 12} more")
2973
+
2974
+
2975
+ def _append_workflow_groups(lines: List[str], workflows: Sequence[Dict[str, Any]], indent: str = "", verbose: bool = False) -> None:
2976
+ if not workflows:
2977
+ return
2978
+ safe = [workflow for workflow in workflows if workflow.get("safe_auto")]
2979
+ review = [workflow for workflow in workflows if not workflow.get("safe_auto")]
2980
+ if safe:
2981
+ lines.append(f"{indent}workflows.safe_auto:")
2982
+ for workflow in safe:
2983
+ lines.append(f"{indent}- {_format_workflow(workflow, verbose=verbose)}")
2984
+ if review:
2985
+ lines.append(f"{indent}workflows.needs_review:")
2986
+ for workflow in review[:8]:
2987
+ lines.append(f"{indent}- {_format_workflow(workflow, verbose=verbose)}")
2988
+ if len(review) > 8:
2989
+ lines.append(f"{indent}- +{len(review) - 8} more")
2990
+
2991
+
2992
+ def _format_workflow(workflow: Dict[str, Any], verbose: bool = False) -> str:
2993
+ if not verbose:
2994
+ notes = []
2995
+ if workflow.get("candidate"):
2996
+ notes.append("candidate")
2997
+ if workflow.get("risk") and workflow.get("risk") != "low":
2998
+ notes.append(f"risk={workflow.get('risk')}")
2999
+ if workflow.get("confidence") and workflow.get("confidence") != "high":
3000
+ notes.append(f"conf={workflow.get('confidence')}")
3001
+ if workflow.get("ci_only"):
3002
+ notes.append("ci-only")
3003
+ suffix = f" [{' '.join(notes)}]" if notes else ""
3004
+ return f"{workflow.get('kind')}: cwd={workflow.get('cwd') or '?'} command={workflow.get('command') or 'none'}{suffix}"
3005
+ return (
3006
+ f"kind={workflow.get('kind')} "
3007
+ f"command={workflow.get('command') or 'none'} "
3008
+ f"cwd={workflow.get('cwd') or 'unknown'} "
3009
+ f"scope={workflow.get('scope')} "
3010
+ f"source={workflow.get('source')} "
3011
+ f"confidence={workflow.get('confidence')} "
3012
+ f"risk={workflow.get('risk')} "
3013
+ f"safe_auto={_bool_text(workflow.get('safe_auto'))} "
3014
+ f"candidate={_bool_text(workflow.get('candidate'))} "
3015
+ f"ci_only={_bool_text(workflow.get('ci_only'))} "
3016
+ f"evidence={_format_names(workflow.get('evidence', []))}"
3017
+ )
3018
+
3019
+
3020
+ def _format_fact_names(facts: Sequence[Dict[str, Any]], verbose: bool = False) -> str:
3021
+ if not facts:
3022
+ return "none"
3023
+ if verbose:
3024
+ return ", ".join(f"{fact.get('name')}({fact.get('confidence')})" for fact in facts)
3025
+ return ",".join(str(fact.get("name")) for fact in facts)
3026
+
3027
+
3028
+ def _format_package_managers(package_managers: Sequence[Dict[str, Any]], verbose: bool = False) -> str:
3029
+ if not package_managers:
3030
+ return "none"
3031
+ return ", ".join(_format_package_manager(item, verbose=verbose) for item in package_managers)
3032
+
3033
+
3034
+ def _format_package_manager(package_manager: Optional[Dict[str, Any]], verbose: bool = False) -> str:
3035
+ if not package_manager:
3036
+ return "none"
3037
+ if not verbose:
3038
+ return str(package_manager.get("name"))
3039
+ return f"{package_manager.get('name')}({package_manager.get('confidence')}; command={package_manager.get('command')})"
3040
+
3041
+
3042
+ def _format_names(values: Sequence[str]) -> str:
3043
+ return ",".join(values) if values else "none"
3044
+
3045
+
3046
+ def _preview_names(values: Sequence[str], limit: int = 5) -> str:
3047
+ if not values:
3048
+ return "none"
3049
+ preview = ", ".join(values[:limit])
3050
+ if len(values) > limit:
3051
+ preview += f", +{len(values) - limit} more"
3052
+ return preview
3053
+
3054
+
3055
+ def _append_list(lines: List[str], label: str, values: Sequence[str]) -> None:
3056
+ lines.append(f"{label}:")
3057
+ if not values:
3058
+ lines.append("- none")
3059
+ return
3060
+ for value in values:
3061
+ lines.append(f"- {value}")
3062
+
3063
+
3064
+ def _bool_text(value: Any) -> str:
3065
+ return "true" if value is True else "false" if value is False else "unknown"
3066
+
3067
+
3068
+ def _resolve_codex_skills_dir(skills_dir: str | os.PathLike[str] | None) -> Path:
3069
+ if skills_dir is not None:
3070
+ return Path(skills_dir).expanduser().resolve()
3071
+ codex_home = os.environ.get("CODEX_HOME")
3072
+ if codex_home:
3073
+ return (Path(codex_home).expanduser() / "skills").resolve()
3074
+ return (Path.home() / ".codex" / "skills").resolve()
3075
+
3076
+
3077
+ def _codex_skill_markdown() -> str:
3078
+ return """---
3079
+ name: code-workflow-probe
3080
+ description: Use code-workflow-probe to keep repo workflow facts aligned before exploring, after relevant edits, and before validation.
3081
+ ---
3082
+
3083
+ # Code Workflow Probe
3084
+
3085
+ Use `code-workflow-probe` when working in a repository and you need current, evidence-backed workflow facts for install, test, lint, format, build, dev, components, package managers, CI, and affected files.
3086
+
3087
+ ## Workflow
3088
+
3089
+ 1. At task start, run:
3090
+ `code-workflow-probe sync --root <repo>`
3091
+ 2. Prefer the default text output for quick agent context.
3092
+ 3. Use JSON when you need structured data:
3093
+ `code-workflow-probe sync --root <repo> --format json`
3094
+ 4. After editing files, notify the probe:
3095
+ `code-workflow-probe edit --root <repo> --changed <path> [<path>...]`
3096
+ 5. To update via incremental sync after known edits, pass the changed files:
3097
+ `code-workflow-probe sync --root <repo> --changed <path> [<path>...]`
3098
+ 6. For very large repos, if you know the changed file list is complete and a cache already exists, use path-only sync:
3099
+ `code-workflow-probe sync --root <repo> --changed <path> [<path>...] --paths-only`
3100
+ 7. Use progress for long syncs:
3101
+ `code-workflow-probe sync --root <repo> --changed <path> [<path>...] --progress`
3102
+ 8. If changed files are unknown or incomplete, force a complete scan:
3103
+ `code-workflow-probe sync --root <repo> --full`
3104
+ 9. Before validation, map changes to components and workflows:
3105
+ `code-workflow-probe affected --root <repo> --changed <path> [<path>...]`
3106
+ 10. Use status when you need a bounded AI context summary of tech stack, package managers, and workflow commands. If compact status is too sparse, use `--detail standard --depth <n> --limit <n>`:
3107
+ `code-workflow-probe status --root <repo>`
3108
+
3109
+ ## Important Sync Rule
3110
+
3111
+ Strongly prefer running `code-workflow-probe sync --root <repo> --changed <path> [<path>...]` after editing project or workflow management files, including manifests, lockfiles, package-manager files, task-runner files, CI files, test/lint/format/build config, and monorepo/component boundary files.
3112
+
3113
+ Examples include `package.json`, lockfiles, `pyproject.toml`, `requirements*.txt`, `go.mod`, `Cargo.toml`, `pom.xml`, Gradle files, `Makefile`, `justfile`, `.github/workflows/*`, `.gitlab-ci.yml`, `pytest.ini`, `ruff.toml`, ESLint config, Prettier config, and similar workflow evidence files.
3114
+
3115
+ Use `--paths-only` only when the changed path list is complete. If you are unsure whether files were added, removed, renamed, generated, or edited outside your view, do not use `--paths-only`; run normal sync or `--full`.
3116
+
3117
+ ## Safety Rules
3118
+
3119
+ - Do not use stale profile data. If `aligned` is false or unknown, sync first.
3120
+ - Only auto-run workflows that are `safe_auto=true`, local, high confidence, low risk, and have a known cwd.
3121
+ - Treat CI-only, candidate, inferred, medium/high risk, and low-confidence workflows as requiring review.
3122
+ - Do not turn CI commands into local commands without checking cwd and local evidence.
3123
+ - Do not invent missing workflows.
3124
+ """
3125
+
3126
+
3127
+ def _build_parser() -> argparse.ArgumentParser:
3128
+ parser = argparse.ArgumentParser(prog="code-workflow-probe", description="Deterministic repo workflow profile syncer.")
3129
+ parser.add_argument("--root", default=".", help="Repository root. Defaults to current directory.")
3130
+ parser.add_argument("--cache", default=None, help=f"Cache path. Defaults to {DEFAULT_CACHE_NAME} under root.")
3131
+ parser.add_argument("--format", choices=("text", "json"), default="text", help="Output format. Defaults to text.")
3132
+ parser.add_argument("--compact", action="store_true", help="Emit compact JSON when --format json is used.")
3133
+ parser.add_argument("--verbose", action="store_true", help="Expand text output with full evidence details.")
3134
+ parser.add_argument("--progress", action="store_true", help="Print progress messages to stderr.")
3135
+ subparsers = parser.add_subparsers(dest="command", required=True)
3136
+
3137
+ def add_common(subparser: argparse.ArgumentParser) -> None:
3138
+ subparser.add_argument("--root", default=argparse.SUPPRESS, help="Repository root. Defaults to current directory.")
3139
+ subparser.add_argument("--cache", default=argparse.SUPPRESS, help=f"Cache path. Defaults to {DEFAULT_CACHE_NAME} under root.")
3140
+ subparser.add_argument("--format", choices=("text", "json"), default=argparse.SUPPRESS, help="Output format. Defaults to text.")
3141
+ subparser.add_argument("--compact", action="store_true", default=argparse.SUPPRESS, help="Emit compact JSON when --format json is used.")
3142
+ subparser.add_argument("--verbose", action="store_true", default=argparse.SUPPRESS, help="Expand text output with full evidence details.")
3143
+ subparser.add_argument("--progress", action="store_true", default=argparse.SUPPRESS, help="Print progress messages to stderr.")
3144
+
3145
+ sync_parser = subparsers.add_parser("sync", help="Build and cache an aligned profile.")
3146
+ add_common(sync_parser)
3147
+ sync_parser.add_argument("--changed", nargs="*", default=[], help="Changed files to include in output context.")
3148
+ sync_parser.add_argument("--no-write", action="store_true", help="Do not write cache.")
3149
+ sync_parser.add_argument("--full", action="store_true", help="Force a full repo scan instead of incremental cache reuse.")
3150
+ sync_parser.add_argument("--paths-only", action="store_true", help="Sync only from explicit changed paths plus existing cache; never discover the whole repo.")
3151
+
3152
+ status_parser = subparsers.add_parser("status", help="Check whether cached profile is aligned.")
3153
+ add_common(status_parser)
3154
+ status_parser.add_argument("--detail", choices=("compact", "standard", "full"), default="compact", help="Text detail level for status output.")
3155
+ status_parser.add_argument("--limit", type=int, default=DEFAULT_STATUS_LIMIT, help="Preview limit for compact and standard status output.")
3156
+ status_parser.add_argument("--depth", type=int, default=DEFAULT_STATUS_DEPTH, help="Directory depth for standard status component previews.")
3157
+
3158
+ edit_parser = subparsers.add_parser("edit", help="Notify changed files and update profile if needed.")
3159
+ add_common(edit_parser)
3160
+ edit_parser.add_argument("--changed", nargs="+", required=True, help="Changed files.")
3161
+
3162
+ affected_parser = subparsers.add_parser("affected", help="Map changed files to components and workflows.")
3163
+ add_common(affected_parser)
3164
+ affected_parser.add_argument("--changed", nargs="+", required=True, help="Changed files.")
3165
+
3166
+ skill_parser = subparsers.add_parser("install-skill", help="Install a Codex skill for code-workflow-probe.")
3167
+ add_common(skill_parser)
3168
+ skill_parser.add_argument("--tool", choices=("codex",), default="codex", help="Target AI coding tool. Only codex is supported.")
3169
+ skill_parser.add_argument("--skills-dir", default=None, help="Codex skills directory. Defaults to $CODEX_HOME/skills or ~/.codex/skills.")
3170
+ skill_parser.add_argument("--dry-run", action="store_true", help="Preview the target path and skill content without writing files.")
3171
+ skill_parser.add_argument("--no-overwrite", action="store_true", help="Do not overwrite an existing skill file.")
3172
+
3173
+ return parser
3174
+
3175
+
3176
+ def main(argv: Optional[Sequence[str]] = None) -> int:
3177
+ parser = _build_parser()
3178
+ args = parser.parse_args(argv)
3179
+ progress = _stderr_progress if args.progress else None
3180
+
3181
+ if args.command == "sync":
3182
+ output = sync(
3183
+ args.root,
3184
+ args.cache,
3185
+ changed_files=args.changed,
3186
+ write=not args.no_write,
3187
+ format=args.format,
3188
+ verbose=args.verbose,
3189
+ incremental=not args.full,
3190
+ paths_only=args.paths_only,
3191
+ progress=progress,
3192
+ )
3193
+ elif args.command == "status":
3194
+ output = status(args.root, args.cache, format=args.format, verbose=args.verbose, detail=args.detail, limit=args.limit, depth=args.depth)
3195
+ elif args.command == "edit":
3196
+ output = edit(args.root, args.changed, args.cache, format=args.format, verbose=args.verbose)
3197
+ elif args.command == "affected":
3198
+ output = affected(args.root, args.changed, args.cache, format=args.format, verbose=args.verbose)
3199
+ elif args.command == "install-skill":
3200
+ output = install_skill(
3201
+ tool=args.tool,
3202
+ skills_dir=args.skills_dir,
3203
+ dry_run=args.dry_run,
3204
+ overwrite=not args.no_overwrite,
3205
+ format=args.format,
3206
+ verbose=args.verbose,
3207
+ )
3208
+ else: # pragma: no cover - argparse prevents this.
3209
+ parser.error(f"unknown command: {args.command}")
3210
+
3211
+ if args.format == "json":
3212
+ json.dump(output, sys.stdout, separators=(",", ":") if args.compact else None, indent=None if args.compact else 2, sort_keys=True)
3213
+ sys.stdout.write("\n")
3214
+ else:
3215
+ sys.stdout.write(str(output))
3216
+ sys.stdout.write("\n")
3217
+ return 0
3218
+
3219
+
3220
+ if __name__ == "__main__":
3221
+ raise SystemExit(main())