auditor-lambda 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +20 -24
  2. package/audit-code-wrapper-lib.mjs +52 -53
  3. package/dist/cli.js +43 -6
  4. package/dist/coverage.js +3 -1
  5. package/dist/extractors/disposition.js +8 -1
  6. package/dist/extractors/graph.d.ts +3 -1
  7. package/dist/extractors/graph.js +1147 -67
  8. package/dist/extractors/graphManifestEdges.d.ts +14 -0
  9. package/dist/extractors/graphManifestEdges.js +1158 -0
  10. package/dist/extractors/graphPathUtils.d.ts +5 -0
  11. package/dist/extractors/graphPathUtils.js +75 -0
  12. package/dist/extractors/pathPatterns.d.ts +1 -0
  13. package/dist/extractors/pathPatterns.js +3 -0
  14. package/dist/io/artifacts.d.ts +10 -1
  15. package/dist/io/artifacts.js +23 -3
  16. package/dist/orchestrator/internalExecutors.d.ts +4 -0
  17. package/dist/orchestrator/internalExecutors.js +35 -6
  18. package/dist/orchestrator/reviewPackets.js +1003 -31
  19. package/dist/orchestrator/syntaxResolutionExecutor.js +34 -0
  20. package/dist/types/externalAnalyzer.d.ts +9 -0
  21. package/dist/types/graph.d.ts +3 -0
  22. package/dist/types/reviewPlanning.d.ts +39 -0
  23. package/docs/contracts.md +215 -0
  24. package/docs/development.md +210 -0
  25. package/docs/handoff.md +204 -0
  26. package/docs/history.md +40 -0
  27. package/docs/operator-guide.md +189 -0
  28. package/docs/product.md +185 -0
  29. package/docs/release.md +131 -0
  30. package/package.json +1 -1
  31. package/schemas/audit_plan_metrics.schema.json +347 -0
  32. package/schemas/external_analyzer_results.schema.json +35 -0
  33. package/schemas/graph_bundle.schema.json +47 -2
  34. package/schemas/review_packets.schema.json +160 -0
  35. package/skills/audit-code/SKILL.md +7 -3
  36. package/skills/audit-code/audit-code.prompt.md +4 -1
  37. package/docs/agent-integrations.md +0 -317
  38. package/docs/agent-roles.md +0 -69
  39. package/docs/architecture.md +0 -90
  40. package/docs/artifacts.md +0 -36
  41. package/docs/bootstrap-install.md +0 -139
  42. package/docs/contract.md +0 -54
  43. package/docs/dispatch-implementation-plan.md +0 -302
  44. package/docs/field-trial-bug-report.md +0 -237
  45. package/docs/github-copilot.md +0 -66
  46. package/docs/model-selection.md +0 -97
  47. package/docs/next-steps.md +0 -202
  48. package/docs/packaging.md +0 -120
  49. package/docs/pipeline.md +0 -152
  50. package/docs/product-direction.md +0 -154
  51. package/docs/production-launch-bar.md +0 -92
  52. package/docs/production-readiness.md +0 -58
  53. package/docs/releasing.md +0 -145
  54. package/docs/remediation-baseline.md +0 -75
  55. package/docs/repo-layout.md +0 -30
  56. package/docs/run-flow.md +0 -56
  57. package/docs/session-config.md +0 -319
  58. package/docs/supervisor.md +0 -100
  59. package/docs/usage.md +0 -215
  60. package/docs/windows-setup.md +0 -146
  61. package/docs/workflow-refactor-brief.md +0 -124
@@ -1,5 +1,36 @@
1
+ import { existsSync, readFileSync } from "node:fs";
1
2
  import { join } from "node:path";
2
3
  import { resolveNodeTool, runFirstAvailableCommand } from "./localCommands.js";
4
+ const ESLINT_CONFIG_FILES = [
5
+ "eslint.config.js",
6
+ "eslint.config.mjs",
7
+ "eslint.config.cjs",
8
+ "eslint.config.ts",
9
+ "eslint.config.mts",
10
+ "eslint.config.cts",
11
+ ".eslintrc",
12
+ ".eslintrc.js",
13
+ ".eslintrc.cjs",
14
+ ".eslintrc.json",
15
+ ".eslintrc.yml",
16
+ ".eslintrc.yaml",
17
+ ];
18
+ function hasEslintConfig(root) {
19
+ if (ESLINT_CONFIG_FILES.some((file) => existsSync(join(root, file)))) {
20
+ return true;
21
+ }
22
+ const packageJsonPath = join(root, "package.json");
23
+ if (!existsSync(packageJsonPath)) {
24
+ return false;
25
+ }
26
+ try {
27
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
28
+ return packageJson.eslintConfig !== undefined;
29
+ }
30
+ catch {
31
+ return false;
32
+ }
33
+ }
3
34
  function runTsc(root) {
4
35
  const results = [];
5
36
  const command = runFirstAvailableCommand(root, [
@@ -35,6 +66,9 @@ function runTsc(root) {
35
66
  }
36
67
  function runEslint(root) {
37
68
  const results = [];
69
+ if (!hasEslintConfig(root)) {
70
+ return results;
71
+ }
38
72
  const command = runFirstAvailableCommand(root, [
39
73
  ...resolveNodeTool(root, join("node_modules", "eslint", "bin", "eslint.js"), [".", "--ext", ".ts,.js,.tsx,.jsx", "--format", "json"], "eslint . --ext .ts,.js,.tsx,.jsx --format json"),
40
74
  {
@@ -11,9 +11,18 @@ export interface ExternalAnalyzerResultItem {
11
11
  /** Preserves the analyzer-native payload when consumers need original detail. */
12
12
  raw?: unknown;
13
13
  }
14
+ /** A normalized analyzer hint that a bounded set of files belongs to a root. */
15
+ export interface ExternalAnalyzerOwnershipRoot {
16
+ root: string;
17
+ paths: string[];
18
+ kind?: string;
19
+ confidence?: number;
20
+ reason?: string;
21
+ }
14
22
  /** Imported analyzer output captured at a single generation time. */
15
23
  export interface ExternalAnalyzerResults {
16
24
  tool: string;
17
25
  generated_at?: string;
26
+ ownership_roots?: ExternalAnalyzerOwnershipRoot[];
18
27
  results: ExternalAnalyzerResultItem[];
19
28
  }
@@ -2,6 +2,9 @@ export interface GraphEdge {
2
2
  from: string;
3
3
  to: string;
4
4
  kind?: string;
5
+ direction?: "directed" | "undirected";
6
+ confidence?: number;
7
+ reason?: string;
5
8
  }
6
9
  export interface RouteEdge {
7
10
  path: string;
@@ -1,4 +1,23 @@
1
1
  import type { AuditTask, Lens } from "../types.js";
2
+ import type { GraphEdge } from "./graph.js";
3
+ export interface ReviewPacketGraphEdge extends Pick<GraphEdge, "from" | "to" | "kind" | "confidence" | "reason"> {
4
+ }
5
+ export interface ReviewPacketQuality {
6
+ cohesion_score: number;
7
+ internal_edge_count: number;
8
+ boundary_edge_count: number;
9
+ unexplained_file_count: number;
10
+ }
11
+ export interface WeaklyExplainedPacketSample {
12
+ packet_id: string;
13
+ primary_gap: "missing_internal_edges" | "unexplained_files" | "partial_cohesion";
14
+ file_count: number;
15
+ sample_file_paths: string[];
16
+ cohesion_score: number;
17
+ internal_edge_count: number;
18
+ boundary_edge_count: number;
19
+ unexplained_file_count: number;
20
+ }
2
21
  export interface ReviewPacket {
3
22
  packet_id: string;
4
23
  task_ids: string[];
@@ -10,6 +29,10 @@ export interface ReviewPacket {
10
29
  total_lines: number;
11
30
  priority: NonNullable<AuditTask["priority"]>;
12
31
  tags?: string[];
32
+ entrypoints?: string[];
33
+ key_edges?: ReviewPacketGraphEdge[];
34
+ boundary_files?: string[];
35
+ quality: ReviewPacketQuality;
13
36
  rationale: string;
14
37
  estimated_tokens: number;
15
38
  }
@@ -32,6 +55,22 @@ export interface AuditPlanMetrics {
32
55
  largest_packet_id?: string;
33
56
  lens_task_counts: Partial<Record<Lens, number>>;
34
57
  priority_task_counts: Record<NonNullable<AuditTask["priority"]>, number>;
58
+ packet_quality: {
59
+ average_cohesion_score: number;
60
+ boundary_crossing_count: number;
61
+ merge_edge_kind_counts: Record<string, number>;
62
+ boundary_edge_kind_counts: Record<string, number>;
63
+ orphan_task_count: number;
64
+ high_fan_in_file_count: number;
65
+ high_fan_out_file_count: number;
66
+ weakly_explained_gap_counts: Record<WeaklyExplainedPacketSample["primary_gap"], number>;
67
+ weakly_explained_file_extension_counts: Record<string, number>;
68
+ weakly_explained_packet_count: number;
69
+ weakly_explained_packet_ids: string[];
70
+ weakly_explained_packet_samples: WeaklyExplainedPacketSample[];
71
+ largest_unexplained_packet_id?: string;
72
+ largest_unexplained_packet_files: number;
73
+ };
35
74
  packet_size: {
36
75
  single_task_packets: number;
37
76
  multi_task_packets: number;
@@ -0,0 +1,215 @@
1
+ # Contracts
2
+
3
+ ## Versioned surfaces
4
+
5
+ The public contract is schema-first. Hosts, workers, prompts, and examples
6
+ should point at schemas and validated examples instead of duplicating fragile
7
+ field descriptions.
8
+
9
+ Important schemas live under `schemas/`, including:
10
+
11
+ - `audit-code-v1alpha1.schema.json`
12
+ - `audit_result.schema.json`
13
+ - `audit_task.schema.json`
14
+ - `audit_plan_metrics.schema.json`
15
+ - `graph_bundle.schema.json`
16
+ - `review_packets.schema.json`
17
+ - `runtime_validation_update.example.json` in `examples/`
18
+
19
+ ## Wrapper envelope
20
+
21
+ Until completion, `audit-code` returns a JSON envelope with:
22
+
23
+ - `contract_version`
24
+ - `audit_state`
25
+ - `selected_obligation`
26
+ - `selected_executor`
27
+ - `progress_made`
28
+ - `artifacts_written`
29
+ - `progress_summary`
30
+ - `next_likely_step`
31
+ - `handoff`
32
+
33
+ On completion, the canonical output is repo-root `audit-report.md`. Intermediate
34
+ `.audit-artifacts/` state is cleaned up when the completed report is promoted.
35
+
36
+ ## AuditResult
37
+
38
+ Workers submit `AuditResult[]` shaped by
39
+ `schemas/audit_result.schema.json`.
40
+
41
+ Important rules:
42
+
43
+ - `task_id`, `unit_id`, `pass_id`, and `lens` must match the assigned task
44
+ - every finding lens must match the assigned task lens
45
+ - `file_coverage` is required and must include assigned files only
46
+ - `file_coverage[].total_lines` must match the current file line count
47
+ - finding `affected_files` entries must be objects, not strings
48
+ - finding `evidence` must be an array of plain strings
49
+ - lens steward tasks emit `findings: []` plus `verification` metadata
50
+
51
+ Validate before ingestion:
52
+
53
+ ```bash
54
+ audit-code validate-results --results /path/to/results.json
55
+ ```
56
+
57
+ ## Artifact bundle
58
+
59
+ The backend stores resumable artifacts under `.audit-artifacts/`, including:
60
+
61
+ - `repo_manifest.json`
62
+ - `file_disposition.json`
63
+ - `surface_manifest.json`
64
+ - `critical_flows.json`
65
+ - `graph_bundle.json`
66
+ - `unit_manifest.json`
67
+ - `coverage_matrix.json`
68
+ - `risk_register.json`
69
+ - `audit_tasks.json`
70
+ - `review_packets.json`
71
+ - `audit_plan_metrics.json`
72
+ - `audit_results.jsonl`
73
+ - `runtime_validation_tasks.json`
74
+ - `runtime_validation_report.json`
75
+ - `synthesis_report.json`
76
+
77
+ Consumers should treat these as versioned JSON artifacts and validate them with
78
+ `audit-code validate` rather than inferring state from filenames alone.
79
+
80
+ ## Dispatch packets
81
+
82
+ Packet dispatch preserves the existing `AuditTask` and `AuditResult`
83
+ contracts. It changes the worker-facing unit of work.
84
+
85
+ Planning artifacts are shaped by:
86
+
87
+ - `schemas/review_packets.schema.json`
88
+ - `schemas/audit_plan_metrics.schema.json`
89
+ - `examples/review_packets.example.json`
90
+ - `examples/audit_plan_metrics.example.json`
91
+
92
+ Normal packet flow:
93
+
94
+ ```text
95
+ audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
96
+ conversation launches one worker per dispatch-plan entry
97
+ worker reads entry.prompt_path
98
+ worker submits AuditResult[] through submit-packet
99
+ audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
100
+ ```
101
+
102
+ Packet artifacts:
103
+
104
+ - `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`
105
+ - `<artifacts_dir>/runs/<run_id>/dispatch-result-map.json`
106
+ - `<artifacts_dir>/runs/<run_id>/task-results/*.prompt.md`
107
+ - `<artifacts_dir>/runs/<run_id>/task-results/*.anchors.json` for isolated
108
+ large-file packets
109
+ - `<artifacts_dir>/runs/<run_id>/task-results/*.json`
110
+ - `<artifacts_dir>/runs/<run_id>/dispatch-warnings.json` when needed
111
+
112
+ Workers should reply exactly:
113
+
114
+ ```text
115
+ valid: <packet_id>, findings=<total finding count>
116
+ ```
117
+
118
+ ## Graph contract
119
+
120
+ `graph_bundle.json` is language-neutral. Language-specific extractors may add
121
+ metadata, but consumers should rely on shared edge concepts:
122
+
123
+ - `from`
124
+ - `to`
125
+ - `kind`
126
+ - optional `direction`
127
+ - optional `confidence` from 0 to 1
128
+ - optional `reason`
129
+
130
+ Packet planning should use graph edges to explain why files belong together,
131
+ not merely to merge every connected group. Weak or high-fan-in edges should
132
+ become context hints rather than unlimited packet expansion.
133
+ Current deterministic import edges include JS/TS import kinds and Python
134
+ `python-import` / `python-from-import` edges when local modules resolve.
135
+ Current deterministic reference edges also include package entrypoints, package
136
+ script links, workspace/project module links, JSON Schema `$ref` links, schema
137
+ contract test links, bounded JSON Schema suite links, bounded GitHub Actions
138
+ workflow suite links, bounded package script suite links, bounded TypeScript
139
+ type contract suite links, and deterministic test/source naming links.
140
+
141
+ Consumers should treat graph evidence by authority:
142
+
143
+ - deterministic directed edges may drive packet expansion when confidence,
144
+ budget, and fan-in/fan-out guards allow it
145
+ - ownership edges may cluster small bounded groups, but should remain explainable
146
+ through `key_edges` and packet `quality`
147
+ - semantic-affinity or NLP-style relationships, if added, should default to
148
+ low-authority context and candidate `boundary_files` unless corroborated by a
149
+ deterministic edge
150
+
151
+ Bounded suite links are intentionally narrow: they connect small, same-directory
152
+ contract suites such as `*.schema.json` files, `.github/workflows/*.yml`
153
+ files, package-script-seeded `scripts/` files, or TypeScript files under
154
+ `types/` directories without turning broad directory proximity into packet
155
+ evidence.
156
+
157
+ Analyzer-supplied ownership roots should use this same graph contract instead
158
+ of requiring packet planners to understand a new language-specific artifact.
159
+ Normalized `external_analyzer_results.json` may include `ownership_roots`;
160
+ structure planning translates each bounded root/path membership into
161
+ `analyzer-ownership-root-link` reference edges. Packet planning then consumes
162
+ those edges through the same bounded `module-ownership-link` clustering path as
163
+ project-file evidence.
164
+ Planner metrics should make it possible to see which edge kinds changed packet
165
+ grouping and which stayed context-only.
166
+
167
+ `audit_plan_metrics.packet_quality` records that plan-level evidence through
168
+ `merge_edge_kind_counts`, `boundary_edge_kind_counts`, and weak-packet
169
+ diagnostics. Merge counts only include graph edges that joined distinct task
170
+ groups in a final packet; boundary counts include concrete graph edges that
171
+ remained adjacent context instead of internal packet evidence.
172
+ `weakly_explained_gap_counts` summarizes the primary gap type across all weak
173
+ packets, and `weakly_explained_file_extension_counts` summarizes the unique file
174
+ extensions represented by those packets. `weakly_explained_packet_samples` adds
175
+ a bounded snapshot of the weakest packet quality records, including sample file
176
+ paths and the primary gap, so extractor work can be prioritized without scanning
177
+ every packet first.
178
+
179
+ Review packets may expose graph-derived context for workers:
180
+
181
+ - `entrypoints` for route or handler context inside the packet
182
+ - `key_edges` for the strongest internal file relationships
183
+ - `boundary_files` for adjacent files that should only be checked when evidence
184
+ genuinely crosses the packet
185
+ - `quality` metrics for cohesion, internal edges, boundary edges, and
186
+ unexplained files
187
+
188
+ ## MCP contract
189
+
190
+ The local MCP server exposes:
191
+
192
+ - `start_audit`
193
+ - `get_status`
194
+ - `continue_audit`
195
+ - `explain_task`
196
+ - `validate_artifacts`
197
+ - `import_results`
198
+ - `import_runtime_updates`
199
+
200
+ It also exposes resources for current artifacts, operator handoff, install
201
+ guidance, and the current report. MCP consumers should prefer the tool and
202
+ resource contracts over reading internal files directly.
203
+
204
+ ## Guided recovery
205
+
206
+ Failure responses should distinguish:
207
+
208
+ - rerun the same command
209
+ - import these result/update files
210
+ - fix session config
211
+ - retry a worker submission after schema validation errors
212
+ - perform manual semantic review
213
+
214
+ Malformed results, invalid config, stale artifacts, and provider failures
215
+ should include field-level or action-level remediation whenever possible.
@@ -0,0 +1,210 @@
1
+ # Development
2
+
3
+ ## Repository layout
4
+
5
+ - `src/`: implementation code
6
+ - `schemas/`: JSON schemas for public and internal artifacts
7
+ - `examples/`: validated artifact examples
8
+ - `skills/audit-code/`: canonical prompt and skill-facing instructions
9
+ - `dispatch/`: packet-dispatch support data
10
+ - `tests/`: Node test suite and regression coverage
11
+ - `dist/`: checked-in compiled runtime used by packaged installs
12
+
13
+ ## Agent handoff
14
+
15
+ Use `docs/handoff.md` as the current pickup note for the next implementation
16
+ agent. It should name the latest completed slice, verification status, files
17
+ touched, and the most practical next steps. Keep long-term product direction in
18
+ `docs/product.md`; keep transient implementation pickup notes in the handoff.
19
+
20
+ ## Build and test
21
+
22
+ ```bash
23
+ npm install
24
+ npm run check
25
+ npm test
26
+ npm run verify:release
27
+ ```
28
+
29
+ The test suite is intentionally contract-heavy. Update tests when changing
30
+ schema shape, prompt contracts, dispatch behavior, installer output, or release
31
+ workflow semantics.
32
+
33
+ ## Production-readiness workflow
34
+
35
+ Use field trials to decide what to fix next. For each representative repository,
36
+ run to the local review handoff, validate the artifact bundle, and compare
37
+ `audit_plan_metrics.json` across runs. Track at least packet count, weak packet
38
+ count, average cohesion, `merge_edge_kind_counts`,
39
+ `boundary_edge_kind_counts`, and `weakly_explained_packet_samples`.
40
+
41
+ Only promote an extractor or planner change when those metrics expose a
42
+ deterministic gap. Prefer improving shared graph resolution or importing
43
+ generic analyzer ownership roots before adding another ecosystem-specific
44
+ manifest parser.
45
+
46
+ The latest remediator field trial closed the remaining mixed code/schema/test
47
+ weak packet by adding package script links, schema contract test links, bounded
48
+ TypeScript type contract suites, package-script-seeded script suite links, and
49
+ generated test artifact disposition. Keep future suite links similarly bounded
50
+ and evidence-led.
51
+
52
+ The Polar field trial added `conftest-link` (conftest.py → Python files in
53
+ scope) and `pyproject-testpaths-link` (pyproject.toml → conftest.py via
54
+ `[tool.pytest.ini_options] testpaths`). `conftest-link` fires only when the
55
+ conftest is inside a `isTestPath` directory to avoid O(n) fan-out from
56
+ root-level conftests. `pyproject.toml` was also added to `shouldReadForGraph`
57
+ so its content is available during the filesystem-backed build path. Together
58
+ these raised Polar's average cohesion from 0.625 to 0.857 and reduced weak
59
+ packets from 5 to 3.
60
+
61
+ A second Polar field trial added `yaml-path-reference-link` (YAML/YML files
62
+ → other config files referenced by explicit relative path). Resolution tries
63
+ repo-root-relative first, then file-directory-relative. The extractor only
64
+ fires for string values ending in `.yaml`, `.yml`, `.json`, or `.toml` that
65
+ resolve to an existing repo file. In Polar, this produced 4 edges from
66
+ `configs/benchmark.yaml` to its template files and raised `internal_edge_count`
67
+ in the `experiments-domains` packet from 90 to 94.
68
+
69
+ A third Polar field trial added `python-test-util-suite-link`, which chains
70
+ `.py` files co-located in `utils/`, `helpers/`, or `support/` subdirectories
71
+ within `isTestPath` directories (same bounded-suite pattern as the TypeScript
72
+ type, JSON schema, and package-script suite links). `conftest.py` is excluded
73
+ from the predicate. In Polar, this produced 2 intra-unit edges within the
74
+ `tests-utils` packet, raising its `internal_edge_count` from 0 to 2 and
75
+ eliminating it as a weak packet. Polar metrics improved from 0.857 to 1.000
76
+ cohesion and 3 to 2 weak packets. The 2 remaining weak packets share genuinely
77
+ isolated files (`.auditorignore`, `experiments/domains/__init__.py`,
78
+ `experiments/summarize_results.py`) that cannot be linked without false
79
+ positives; treat as the current floor. Note that intra-unit suite edges do not
80
+ appear in `merge_edge_kind_counts` — their effect is visible in the packet's
81
+ `internal_edge_count` and `unexplained_file_count` fields instead.
82
+
83
+ Before treating a build as production-ready, verify the complete review loop in
84
+ one real host:
85
+
86
+ ```text
87
+ audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
88
+ worker reviews each packet prompt
89
+ audit-code submit-packet ...
90
+ audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
91
+ audit-code validate
92
+ ```
93
+
94
+ On Windows, runtime validation runs package-manager shim commands such as
95
+ `npm`, `npx`, `pnpm`, and `yarn` through the command shell so `.cmd` wrappers
96
+ execute reliably. Keep that behavior covered when changing runtime command
97
+ execution.
98
+
99
+ If the final `audit-report.md` cannot be copied into the target repository
100
+ because of local permissions, completion should remain successful and the
101
+ artifact copy remains authoritative. Run `audit-code validate` against the
102
+ artifact bundle before treating the run as complete.
103
+
104
+ Then run `npm run verify:release` from a clean checkout.
105
+
106
+ ## Architecture
107
+
108
+ The system separates deterministic extraction from bounded LLM judgment:
109
+
110
+ 1. intake and file disposition
111
+ 2. surface, flow, graph, unit, risk, and coverage artifacts
112
+ 3. audit task planning
113
+ 4. review packet construction
114
+ 5. semantic review through the active conversation or fallback provider bridge
115
+ 6. result ingestion, selective deepening, runtime validation, and synthesis
116
+ 7. final `audit-report.md`
117
+
118
+ Portability rules:
119
+
120
+ - tool-specific collectors write tool-agnostic JSON
121
+ - prompts consume artifacts and bounded source context
122
+ - review work is attributable to files, lenses, passes, and tasks
123
+ - coverage gaps are machine-detectable
124
+
125
+ ## Adding language analyzers
126
+
127
+ Language support should be adapter-based. A new analyzer should enrich shared
128
+ artifacts instead of inventing a language-specific planning path.
129
+
130
+ Avoid adding another bespoke manifest or project-file parser by default. First
131
+ ask whether the signal is common in expected repositories, whether it provides
132
+ direction or ownership that path heuristics cannot, and whether the same value
133
+ could come from a generic analyzer-supplied ownership hint.
134
+
135
+ Preferred outputs:
136
+
137
+ - graph edges with kind, direction, confidence, and reason
138
+ - entrypoints and surfaces
139
+ - test-to-source links
140
+ - package/module ownership hints, including analyzer-supplied
141
+ `ownership_roots` that become `analyzer-ownership-root-link` graph references
142
+ - contract-suite links for small JSON Schema, workflow, package script, or
143
+ TypeScript type suites when planner metrics show otherwise weak packets
144
+ - external boundary hints
145
+ - line counts and anchor summaries for large files
146
+
147
+ Current analyzer priorities:
148
+
149
+ - planner observability before additional ecosystem breadth
150
+ - exercising the generic ownership-root input from analyzers or imported
151
+ evidence
152
+ - continued behavior-preserving extraction of high-concentration graph helpers
153
+ - JS/TS compiler-backed resolution only after the current regex edges stay
154
+ stable
155
+ - Python deterministic support beyond the current local import, package/module,
156
+ and pytest/unittest adjacency edges only where planner metrics show gaps
157
+ - generic fallback from path patterns, ctags/tree-sitter, LSP output, or
158
+ external analyzer results when available
159
+
160
+ Keep deep analyzers optional. Repositories should still produce useful packets
161
+ from manifests, paths, tests, and external analyzer results when a language has
162
+ only fallback support.
163
+
164
+ Command-backed analyzers should prove project intent before running. Prefer
165
+ repo-local config checks, such as `eslint.config.*`, `.eslintrc*`, or
166
+ `package.json` `eslintConfig`, over executing a globally installed tool and
167
+ parsing its no-config failure.
168
+
169
+ Language-agnostic semantic affinity can be useful for ranking adjacent context,
170
+ but it should be low authority. Do not let shared token frequency alone force
171
+ packet merges; use it for `boundary_files` or candidate explanations unless a
172
+ deterministic edge corroborates the relationship.
173
+
174
+ ## Packetization work
175
+
176
+ The current packetizer groups tasks across lenses and merges graph-connected
177
+ task groups within line budgets. Plan metrics now record which graph edge kinds
178
+ caused packet merges, which candidate edge kinds stayed as boundary context,
179
+ and which packets remain weakly explained. Weak-packet diagnostics aggregate
180
+ primary gap counts and unique file-extension counts, while bounded samples
181
+ include representative file paths. Together those metrics let real or fixture
182
+ runs point at the next deterministic extractor or analyzer-ownership
183
+ improvement. The next phase is consolidation and carefully chosen deterministic
184
+ depth:
185
+
186
+ - use packet-quality observations to prioritize extractor gaps
187
+ - keep manifest/project-file edge extraction isolated from packet planning code
188
+ - use the generic ownership-root contract before adding more ecosystem-specific
189
+ module formats
190
+ - keep bounded suite edges as contract evidence, not as a generic
191
+ same-directory merge rule
192
+ - exercise the Python import, package layout, and test/source edges against
193
+ fixture and real repositories before adding deeper Python framework handling
194
+
195
+ Keep `AuditTask` as the coverage identity and `AuditResult[]` as the ingestion
196
+ contract.
197
+
198
+ ## File-splitting priorities
199
+
200
+ The largest implementation files should be split conservatively and
201
+ behavior-preservingly:
202
+
203
+ - move CLI command families out of `src/cli.ts`
204
+ - move language metadata tables out of file inventory logic
205
+ - move graph manifest/project-file parsers out of `src/extractors/graph.ts`
206
+ - split selective-deepening task builders by trigger type
207
+ - keep packetization, recovery, and schema changes easier to review
208
+
209
+ Run the focused tests for each area before and after a split, then run
210
+ `npm test`.