@nomos-arc/arc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.nomos-config.json +5 -0
  3. package/CLAUDE.md +108 -0
  4. package/LICENSE +190 -0
  5. package/README.md +569 -0
  6. package/dist/cli.js +21120 -0
  7. package/docs/auth/googel_plan.yaml +1093 -0
  8. package/docs/auth/google_task.md +235 -0
  9. package/docs/auth/hardened_blueprint.yaml +1658 -0
  10. package/docs/auth/red_team_report.yaml +336 -0
  11. package/docs/auth/session_state.yaml +162 -0
  12. package/docs/certificate/cer_enhance_plan.md +605 -0
  13. package/docs/certificate/certificate_report.md +338 -0
  14. package/docs/dev_overview.md +419 -0
  15. package/docs/feature_assessment.md +156 -0
  16. package/docs/how_it_works.md +78 -0
  17. package/docs/infrastructure/map.md +867 -0
  18. package/docs/init/master_plan.md +3581 -0
  19. package/docs/init/red_team_report.md +215 -0
  20. package/docs/init/report_phase_1a.md +304 -0
  21. package/docs/integrity-gate/enhance_drift.md +703 -0
  22. package/docs/integrity-gate/overview.md +108 -0
  23. package/docs/management/manger-task.md +99 -0
  24. package/docs/management/scafffold.md +76 -0
  25. package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
  26. package/docs/map/RED_TEAM_REPORT.md +159 -0
  27. package/docs/map/map_task.md +147 -0
  28. package/docs/map/semantic_graph_task.md +792 -0
  29. package/docs/map/semantic_master_plan.md +705 -0
  30. package/docs/phase7/TEAM_RED.md +249 -0
  31. package/docs/phase7/plan.md +1682 -0
  32. package/docs/phase7/task.md +275 -0
  33. package/docs/prompts/USAGE.md +312 -0
  34. package/docs/prompts/architect.md +165 -0
  35. package/docs/prompts/executer.md +190 -0
  36. package/docs/prompts/hardener.md +190 -0
  37. package/docs/prompts/red_team.md +146 -0
  38. package/docs/verification/goveranance-overview.md +396 -0
  39. package/docs/verification/governance-overview.md +245 -0
  40. package/docs/verification/verification-arc-ar.md +560 -0
  41. package/docs/verification/verification-architecture.md +560 -0
  42. package/docs/very_next.md +52 -0
  43. package/docs/whitepaper.md +89 -0
  44. package/overview.md +1469 -0
  45. package/package.json +63 -0
  46. package/src/adapters/__tests__/git.test.ts +296 -0
  47. package/src/adapters/__tests__/stdio.test.ts +70 -0
  48. package/src/adapters/git.ts +226 -0
  49. package/src/adapters/pty.ts +159 -0
  50. package/src/adapters/stdio.ts +113 -0
  51. package/src/cli.ts +83 -0
  52. package/src/commands/apply.ts +47 -0
  53. package/src/commands/auth.ts +301 -0
  54. package/src/commands/certificate.ts +89 -0
  55. package/src/commands/discard.ts +24 -0
  56. package/src/commands/drift.ts +116 -0
  57. package/src/commands/index.ts +78 -0
  58. package/src/commands/init.ts +121 -0
  59. package/src/commands/list.ts +75 -0
  60. package/src/commands/map.ts +55 -0
  61. package/src/commands/plan.ts +30 -0
  62. package/src/commands/review.ts +58 -0
  63. package/src/commands/run.ts +63 -0
  64. package/src/commands/search.ts +147 -0
  65. package/src/commands/show.ts +63 -0
  66. package/src/commands/status.ts +59 -0
  67. package/src/core/__tests__/budget.test.ts +213 -0
  68. package/src/core/__tests__/certificate.test.ts +385 -0
  69. package/src/core/__tests__/config.test.ts +191 -0
  70. package/src/core/__tests__/preflight.test.ts +24 -0
  71. package/src/core/__tests__/prompt.test.ts +358 -0
  72. package/src/core/__tests__/review.test.ts +161 -0
  73. package/src/core/__tests__/state.test.ts +362 -0
  74. package/src/core/auth/__tests__/manager.test.ts +166 -0
  75. package/src/core/auth/__tests__/server.test.ts +220 -0
  76. package/src/core/auth/gcp-projects.ts +160 -0
  77. package/src/core/auth/manager.ts +114 -0
  78. package/src/core/auth/server.ts +141 -0
  79. package/src/core/budget.ts +119 -0
  80. package/src/core/certificate.ts +502 -0
  81. package/src/core/config.ts +212 -0
  82. package/src/core/errors.ts +54 -0
  83. package/src/core/factory.ts +49 -0
  84. package/src/core/graph/__tests__/builder.test.ts +272 -0
  85. package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
  86. package/src/core/graph/__tests__/enricher.test.ts +299 -0
  87. package/src/core/graph/__tests__/parser.test.ts +200 -0
  88. package/src/core/graph/__tests__/pipeline.test.ts +202 -0
  89. package/src/core/graph/__tests__/renderer.test.ts +128 -0
  90. package/src/core/graph/__tests__/resolver.test.ts +185 -0
  91. package/src/core/graph/__tests__/scanner.test.ts +231 -0
  92. package/src/core/graph/__tests__/show.test.ts +134 -0
  93. package/src/core/graph/builder.ts +303 -0
  94. package/src/core/graph/constraints.ts +94 -0
  95. package/src/core/graph/contract-writer.ts +93 -0
  96. package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
  97. package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
  98. package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
  99. package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
  100. package/src/core/graph/drift/classifier.ts +165 -0
  101. package/src/core/graph/drift/comparator.ts +205 -0
  102. package/src/core/graph/drift/reporter.ts +77 -0
  103. package/src/core/graph/enricher.ts +251 -0
  104. package/src/core/graph/grammar-paths.ts +30 -0
  105. package/src/core/graph/html-template.ts +493 -0
  106. package/src/core/graph/map-schema.ts +137 -0
  107. package/src/core/graph/parser.ts +336 -0
  108. package/src/core/graph/pipeline.ts +209 -0
  109. package/src/core/graph/renderer.ts +92 -0
  110. package/src/core/graph/resolver.ts +195 -0
  111. package/src/core/graph/scanner.ts +145 -0
  112. package/src/core/logger.ts +46 -0
  113. package/src/core/orchestrator.ts +792 -0
  114. package/src/core/plan-file-manager.ts +66 -0
  115. package/src/core/preflight.ts +64 -0
  116. package/src/core/prompt.ts +173 -0
  117. package/src/core/review.ts +95 -0
  118. package/src/core/state.ts +294 -0
  119. package/src/core/worktree-coordinator.ts +77 -0
  120. package/src/search/__tests__/chunk-extractor.test.ts +339 -0
  121. package/src/search/__tests__/embedder-auth.test.ts +124 -0
  122. package/src/search/__tests__/embedder.test.ts +267 -0
  123. package/src/search/__tests__/graph-enricher.test.ts +178 -0
  124. package/src/search/__tests__/indexer.test.ts +518 -0
  125. package/src/search/__tests__/integration.test.ts +649 -0
  126. package/src/search/__tests__/query-engine.test.ts +334 -0
  127. package/src/search/__tests__/similarity.test.ts +78 -0
  128. package/src/search/__tests__/vector-store.test.ts +281 -0
  129. package/src/search/chunk-extractor.ts +167 -0
  130. package/src/search/embedder.ts +209 -0
  131. package/src/search/graph-enricher.ts +95 -0
  132. package/src/search/indexer.ts +483 -0
  133. package/src/search/lexical-searcher.ts +190 -0
  134. package/src/search/query-engine.ts +225 -0
  135. package/src/search/vector-store.ts +311 -0
  136. package/src/types/index.ts +572 -0
  137. package/src/utils/__tests__/ansi.test.ts +54 -0
  138. package/src/utils/__tests__/frontmatter.test.ts +79 -0
  139. package/src/utils/__tests__/sanitize.test.ts +229 -0
  140. package/src/utils/ansi.ts +19 -0
  141. package/src/utils/context.ts +44 -0
  142. package/src/utils/frontmatter.ts +27 -0
  143. package/src/utils/sanitize.ts +78 -0
  144. package/test/e2e/lifecycle.test.ts +330 -0
  145. package/test/fixtures/mock-planner-hang.ts +5 -0
  146. package/test/fixtures/mock-planner.ts +26 -0
  147. package/test/fixtures/mock-reviewer-bad.ts +8 -0
  148. package/test/fixtures/mock-reviewer-retry.ts +34 -0
  149. package/test/fixtures/mock-reviewer.ts +18 -0
  150. package/test/fixtures/sample-project/src/circular-a.ts +6 -0
  151. package/test/fixtures/sample-project/src/circular-b.ts +6 -0
  152. package/test/fixtures/sample-project/src/config.ts +15 -0
  153. package/test/fixtures/sample-project/src/main.ts +19 -0
  154. package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
  155. package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
  156. package/test/fixtures/sample-project/src/types.ts +14 -0
  157. package/test/fixtures/sample-project/src/utils/index.ts +14 -0
  158. package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
  159. package/tsconfig.json +20 -0
  160. package/vitest.config.ts +12 -0
@@ -0,0 +1,867 @@
1
+ # `arc map` — Semantic Dependency Mapping
2
+
3
+ > **Command**: `arc map [patterns...] [--no-ai] [--force]`
4
+ > **Purpose**: Scan project files, parse their AST, resolve imports, build a dependency graph, and optionally enrich each file with AI-generated semantic descriptions.
5
+
6
+ ---
7
+
8
+ ## Table of Contents
9
+
10
+ - [Overview](#overview)
11
+ - [Command Interface](#command-interface)
12
+ - [Pipeline Architecture](#pipeline-architecture)
13
+ - [Stage-by-Stage Breakdown](#stage-by-stage-breakdown)
14
+ - [Stage A — Read Existing Map](#stage-a--read-existing-map)
15
+ - [Stage B — File Scanning](#stage-b--file-scanning)
16
+ - [Stage C — Empty Check](#stage-c--empty-check)
17
+ - [Stage D — AST Parser Initialization](#stage-d--ast-parser-initialization)
18
+ - [Stage E — Parse Files](#stage-e--parse-files)
19
+ - [Stage F — Merge Parsed + Carried](#stage-f--merge-parsed--carried)
20
+ - [Stage G — Import Resolution](#stage-g--import-resolution)
21
+ - [Stage H — Graph Building](#stage-h--graph-building)
22
+ - [Stage I — Intermediate Map Write](#stage-i--intermediate-map-write)
23
+ - [Stage J — AI Semantic Enrichment](#stage-j--ai-semantic-enrichment)
24
+ - [Stage K — Contract Writing](#stage-k--contract-writing)
25
+ - [Stage L — Final Map Write](#stage-l--final-map-write)
26
+ - [Data Structures](#data-structures)
27
+ - [Graph Algorithms](#graph-algorithms)
28
+ - [AI Enrichment Deep Dive](#ai-enrichment-deep-dive)
29
+ - [Incremental Processing](#incremental-processing)
30
+ - [Concurrency & Safety](#concurrency--safety)
31
+ - [Configuration Reference](#configuration-reference)
32
+ - [Output Files](#output-files)
33
+ - [Exit Codes](#exit-codes)
34
+ - [File Map (Source Code)](#file-map-source-code)
35
+ - [Design Decisions & Fixes](#design-decisions--fixes)
36
+
37
+ ---
38
+
39
+ ## Overview
40
+
41
+ The `arc map` command is the codebase intelligence engine of nomos-arc.ai. It produces a **`project_map.json`** file that serves as a structured, machine-readable representation of the entire project: every file, symbol, import, dependency edge, and (optionally) AI-generated semantic summary.
42
+
43
+ Other commands (`arc plan`, `arc review`) consume this map to understand the codebase without re-scanning it.
44
+
45
+ ```
46
+ +-----------+
47
+ | arc map |
48
+ +-----+-----+
49
+ |
50
+ +---------------+---------------+
51
+ | | |
52
+ +-----------+ +-----------+ +-----------+
53
+ | Scan | | Parse | | Enrich |
54
+ | (fs+glob) | | (AST/WASM)| | (Gemini) |
55
+ +-----------+ +-----------+ +-----------+
56
+ | | |
57
+ +---------------+---------------+
58
+ |
59
+ +-------+-------+
60
+ | project_map |
61
+ | .json |
62
+ +---------------+
63
+ ```
64
+
65
+ ---
66
+
67
+ ## Command Interface
68
+
69
+ ```bash
70
+ arc map [patterns...] [--no-ai] [--force]
71
+ ```
72
+
73
+ | Argument / Flag | Description |
74
+ |-------------------|--------------------------------------------------------------------|
75
+ | `[patterns...]` | Glob patterns to scope the scan (default: `**/*`) |
76
+ | `--no-ai` | Skip AI enrichment; produce structural map only |
77
+ | `--force` | Re-parse all files even if their hash hasn't changed |
78
+
79
+ **Examples**:
80
+
81
+ ```bash
82
+ arc map # Full scan + AI enrichment
83
+ arc map "src/**/*.ts" # Only map TypeScript files in src/
84
+ arc map --no-ai # Structural map, no Gemini calls
85
+ arc map --force # Re-parse everything from scratch
86
+ arc map "src/**" --no-ai --force # Combined flags
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Pipeline Architecture
92
+
93
+ The map command delegates all work to `MapPipeline`, which executes a strict sequence of stages. Each stage has a single responsibility and passes data forward to the next.
94
+
95
+ ```mermaid
96
+ flowchart TD
97
+ CMD["arc map [patterns] [flags]"]
98
+ CMD --> CFG["Load .nomos-config.json"]
99
+ CFG --> LOG["Create Logger"]
100
+ LOG --> AUTH["Initialize AuthManager"]
101
+ AUTH --> PIPE["MapPipeline.run()"]
102
+
103
+ PIPE --> A["A: Read existing map"]
104
+ A --> B["B: Scan filesystem"]
105
+ B --> C{"C: Any files found?"}
106
+ C -- No --> EMPTY["Return empty map"]
107
+ C -- Yes --> D["D: Init ASTParser (WASM)"]
108
+ D --> E["E: Parse new/changed files"]
109
+ E --> F["F: Merge parsed + carried"]
110
+ F --> G["G: Resolve imports (8 workers)"]
111
+ G --> H["H: Build dependency graph"]
112
+ H --> I["I: Write intermediate map"]
113
+ I --> AI{"AI enrichment enabled?"}
114
+ AI -- No --> RET["Return map"]
115
+ AI -- Yes --> J["J: Enrich via Gemini"]
116
+ J --> K["K: Write .semantic.md contracts"]
117
+ K --> L["L: Write final map"]
118
+ L --> RET
119
+
120
+ style CMD fill:#2d3748,stroke:#4a5568,color:#e2e8f0
121
+ style PIPE fill:#2b6cb0,stroke:#2c5282,color:#fff
122
+ style A fill:#2f855a,stroke:#276749,color:#fff
123
+ style B fill:#2f855a,stroke:#276749,color:#fff
124
+ style C fill:#d69e2e,stroke:#b7791f,color:#000
125
+ style D fill:#2f855a,stroke:#276749,color:#fff
126
+ style E fill:#2f855a,stroke:#276749,color:#fff
127
+ style F fill:#2f855a,stroke:#276749,color:#fff
128
+ style G fill:#2f855a,stroke:#276749,color:#fff
129
+ style H fill:#805ad5,stroke:#6b46c1,color:#fff
130
+ style I fill:#dd6b20,stroke:#c05621,color:#fff
131
+ style J fill:#e53e3e,stroke:#c53030,color:#fff
132
+ style K fill:#dd6b20,stroke:#c05621,color:#fff
133
+ style L fill:#dd6b20,stroke:#c05621,color:#fff
134
+ style EMPTY fill:#718096,stroke:#4a5568,color:#fff
135
+ style RET fill:#718096,stroke:#4a5568,color:#fff
136
+ style AI fill:#d69e2e,stroke:#b7791f,color:#000
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Stage-by-Stage Breakdown
142
+
143
+ ### Stage A — Read Existing Map
144
+
145
+ **File**: `src/core/graph/map-schema.ts`
146
+
147
+ Reads `tasks-management/graph/project_map.json` from disk. Returns `null` if the file doesn't exist (first run). Validates against a Zod schema with a forward-compatibility ceiling check — if the file's `schema_version` is newer than what this CLI version supports, it throws an error prompting the user to upgrade.
148
+
149
+ ```mermaid
150
+ flowchart LR
151
+ READ["Read project_map.json"] --> EXISTS{"File exists?"}
152
+ EXISTS -- Yes --> VALIDATE["Zod schema validation"]
153
+ EXISTS -- No --> NULL["Return null"]
154
+ VALIDATE --> VERSION{"schema_version <= current?"}
155
+ VERSION -- Yes --> RETURN["Return ProjectMap"]
156
+ VERSION -- No --> ERROR["Error: upgrade CLI"]
157
+
158
+ style READ fill:#2f855a,stroke:#276749,color:#fff
159
+ style ERROR fill:#e53e3e,stroke:#c53030,color:#fff
160
+ ```
161
+
162
+ ### Stage B — File Scanning
163
+
164
+ **File**: `src/core/graph/scanner.ts` | **Class**: `FileScanner`
165
+
166
+ Uses `fast-glob` to discover files matching the given patterns. Applies a multi-layer filter:
167
+
168
+ ```mermaid
169
+ flowchart TD
170
+ GLOB["fast-glob(patterns)"] --> IGNORE["Apply ignore patterns"]
171
+ IGNORE --> LANG{"Supported language?"}
172
+ LANG -- No --> SKIP1["Skip file"]
173
+ LANG -- "TS/TSX/JS/JSX/PY/GO/RS" --> SIZE{"Size <= 500KB?"}
174
+ SIZE -- No --> SKIP2["Skip file"]
175
+ SIZE -- Yes --> HASH["Compute SHA256 hash"]
176
+ HASH --> CHANGED{"Hash changed from existing map?"}
177
+ CHANGED -- "Yes (or --force)" --> NEW["Add to 'files' (new)"]
178
+ CHANGED -- No --> CARRY["Add to 'carried' (reuse)"]
179
+
180
+ style GLOB fill:#2b6cb0,stroke:#2c5282,color:#fff
181
+ style NEW fill:#2f855a,stroke:#276749,color:#fff
182
+ style CARRY fill:#805ad5,stroke:#6b46c1,color:#fff
183
+ style SKIP1 fill:#718096,stroke:#4a5568,color:#fff
184
+ style SKIP2 fill:#718096,stroke:#4a5568,color:#fff
185
+ ```
186
+
187
+ **Ignore sources**:
188
+ - Config `graph.exclude_patterns` (defaults: `node_modules`, `dist`, `*.test.*`, `*.spec.*`, `*.semantic.md`)
189
+ - `.gitignore` in project root
190
+
191
+ **Supported languages**: TypeScript, TSX, JavaScript, JSX, Python, Go, Rust
192
+
193
+ **Output**: `ScanResult { files: Map, carried: Map }`
194
+
195
+ ### Stage C — Empty Check
196
+
197
+ If both `files` and `carried` are empty (no scannable files found), immediately returns an empty `ProjectMap` with zero stats. No further stages execute.
198
+
199
+ ### Stage D — AST Parser Initialization
200
+
201
+ **File**: `src/core/graph/parser.ts` | **Class**: `ASTParser`
202
+
203
+ Initializes `web-tree-sitter` (WASM-based parser). Grammar `.wasm` files are loaded from:
204
+
205
+ 1. `tree-sitter-wasms` npm package (primary)
206
+ 2. Local `./grammars/` directory (fallback)
207
+
208
+ Grammar path resolution is handled by `src/core/graph/grammar-paths.ts`.
209
+
210
+ > **Why WASM?** [BLK-1] Native `.node` tree-sitter bindings cause portability issues across platforms and Node versions. WASM provides uniform behavior everywhere.
211
+
212
+ ### Stage E — Parse Files
213
+
214
+ For each newly scanned file, the AST parser extracts:
215
+
216
+ ```mermaid
217
+ flowchart LR
218
+ FILE["Source file"] --> TREE["tree-sitter parse"]
219
+ TREE --> SYMBOLS["Extract symbols"]
220
+ TREE --> IMPORTS["Extract imports"]
221
+
222
+ SYMBOLS --> FN["Functions"]
223
+ SYMBOLS --> CLS["Classes + Methods"]
224
+ SYMBOLS --> IF["Interfaces"]
225
+ SYMBOLS --> TP["Types"]
226
+ SYMBOLS --> EN["Enums"]
227
+ SYMBOLS --> VAR["Variables"]
228
+
229
+ IMPORTS --> ES6["ES6 imports"]
230
+ IMPORTS --> REQ["require() calls"]
231
+
232
+ style FILE fill:#2b6cb0,stroke:#2c5282,color:#fff
233
+ style TREE fill:#805ad5,stroke:#6b46c1,color:#fff
234
+ ```
235
+
236
+ **Error threshold**: If >20% of the parsed tree consists of ERROR nodes (malformed syntax), the parser returns an empty result for that file rather than producing garbage data.
237
+
238
+ **Semantic carry-forward**: If the file's hash matches the existing map entry, the previous `semantic` data is preserved on the new node.
239
+
240
+ ### Stage F — Merge Parsed + Carried
241
+
242
+ Merges newly parsed `FileNode` entries with carried (unchanged) entries into a single `allFiles` map. Parsed entries overwrite carried ones if both exist for the same path.
243
+
244
+ ### Stage G — Import Resolution
245
+
246
+ **File**: `src/core/graph/resolver.ts` | **Class**: `ImportResolver`
247
+
248
+ Runs with **8 concurrent workers** (via `p-limit`) to resolve every import in every file.
249
+
250
+ ```mermaid
251
+ flowchart TD
252
+ IMP["Import source string"] --> REL{"Starts with . or /?"}
253
+ REL -- No --> ALIAS{"Matches tsconfig path alias?"}
254
+ ALIAS -- No --> EXT["Mark as external"]
255
+ ALIAS -- Yes --> SUBST["Substitute alias path"]
256
+ SUBST --> PROBE
257
+
258
+ REL -- Yes --> PROBE["Probe extensions"]
259
+ PROBE --> TS[".ts"]
260
+ PROBE --> TSX[".tsx"]
261
+ PROBE --> JS[".js"]
262
+ PROBE --> IDX["index.ts/tsx/js"]
263
+
264
+ TS --> KNOWN{"In knownFiles Set?"}
265
+ TSX --> KNOWN
266
+ JS --> KNOWN
267
+ IDX --> KNOWN
268
+
269
+ KNOWN -- Yes --> RESOLVED["resolved = relative path"]
270
+ KNOWN -- No --> UNRESOLVED["resolved = null"]
271
+
272
+ style IMP fill:#2b6cb0,stroke:#2c5282,color:#fff
273
+ style EXT fill:#dd6b20,stroke:#c05621,color:#fff
274
+ style RESOLVED fill:#2f855a,stroke:#276749,color:#fff
275
+ style UNRESOLVED fill:#e53e3e,stroke:#c53030,color:#fff
276
+ ```
277
+
278
+ **Critical contract [WATCH-4]**: The resolver makes **zero filesystem calls**. All existence checks use `.has()` on a `knownFiles: Set<string>` populated from both scanned and carried files. This makes resolution fast and deterministic.
279
+
280
+ **Tsconfig support**: Reads `tsconfig.json` `compilerOptions.paths` for wildcard aliases (e.g., `@/* -> src/*`).
281
+
282
+ **Path traversal guard**: Any resolved path that escapes `projectRoot` is marked as external.
283
+
284
+ ### Stage H — Graph Building
285
+
286
+ **File**: `src/core/graph/builder.ts` | **Class**: `GraphBuilder`
287
+
288
+ Builds the dependency graph and computes topological depth for every file.
289
+
290
+ ```mermaid
291
+ flowchart TD
292
+ FILES["All FileNodes"] --> RESET["Reset graph fields"]
293
+ RESET --> ADJ["Build adjacency lists"]
294
+ ADJ --> DEP_OUT["dependencies (edges OUT)"]
295
+ ADJ --> DEP_IN["dependents (edges IN)"]
296
+
297
+ DEP_IN --> INDEG["Calculate in-degree"]
298
+ INDEG --> KAHN["Kahn's BFS Topological Sort"]
299
+
300
+ KAHN --> QUEUE["Queue: nodes with in-degree 0"]
301
+ QUEUE --> PROCESS["Process queue"]
302
+ PROCESS --> DEPTH["Assign depth = max(dep_depth, current + 1)"]
303
+
304
+ PROCESS --> CYCLE{"Remaining nodes?"}
305
+ CYCLE -- Yes --> TARJAN["Tarjan's SCC"]
306
+ TARJAN --> CYCLE_DEPTH["Cycle depth = max external depth + 1"]
307
+ CYCLE -- No --> STATS["Compute stats"]
308
+
309
+ STATS --> CORE["Identify core modules (top N by depth)"]
310
+
311
+ style FILES fill:#2b6cb0,stroke:#2c5282,color:#fff
312
+ style KAHN fill:#805ad5,stroke:#6b46c1,color:#fff
313
+ style TARJAN fill:#e53e3e,stroke:#c53030,color:#fff
314
+ style CORE fill:#d69e2e,stroke:#b7791f,color:#000
315
+ ```
316
+
317
+ **Depth semantics**: Entry points (files nobody imports) have `depth = 0`. The deeper a file, the more foundational it is.
318
+
319
+ **Stats output**:
320
+ ```json
321
+ {
322
+ "total_files": 42,
323
+ "total_symbols": 234,
324
+ "total_edges": 156,
325
+ "core_modules": ["src/index.ts", "src/utils.ts", "..."]
326
+ }
327
+ ```
328
+
329
+ See [Graph Algorithms](#graph-algorithms) for detailed algorithm descriptions.
330
+
331
+ ### Stage I — Intermediate Map Write
332
+
333
+ Writes `project_map.json` **before** AI enrichment begins. This ensures that even if enrichment crashes or is cancelled via SIGINT, the structural map is already persisted.
334
+
335
+ Uses atomic write pattern with directory-level locking (see [Concurrency & Safety](#concurrency--safety)).
336
+
337
+ ### Stage J — AI Semantic Enrichment
338
+
339
+ **File**: `src/core/graph/enricher.ts` | **Class**: `SemanticEnricher`
340
+
341
+ Calls Google Gemini to generate semantic descriptions for each file. See [AI Enrichment Deep Dive](#ai-enrichment-deep-dive) for full details.
342
+
343
+ ### Stage K — Contract Writing
344
+
345
+ **File**: `src/core/graph/contract-writer.ts` | **Class**: `ContractWriter`
346
+
347
+ Writes `.semantic.md` files next to each source file. These are human-readable summaries generated from the AI enrichment data. See [Output Files](#output-files) for the template.
348
+
349
+ ### Stage L — Final Map Write
350
+
351
+ Writes the complete `project_map.json` with AI semantic data included. Same atomic write + lock pattern as Stage I.
352
+
353
+ ---
354
+
355
+ ## Data Structures
356
+
357
+ ### FileNode
358
+
359
+ The core unit of the map. One per scanned file.
360
+
361
+ ```mermaid
362
+ classDiagram
363
+ class FileNode {
364
+ +string file
365
+ +string hash
366
+ +string language
367
+ +SymbolEntry[] symbols
368
+ +ImportEntry[] imports
369
+ +string[] dependents
370
+ +string[] dependencies
371
+ +number depth
372
+ +string last_parsed_at
373
+ +SemanticInfo semantic
374
+ }
375
+
376
+ class SymbolEntry {
377
+ +string name
378
+ +string kind
379
+ +number line
380
+ +number end_line
381
+ +string signature
382
+ +boolean exported
383
+ }
384
+
385
+ class ImportEntry {
386
+ +string source
387
+ +string resolved
388
+ +string[] symbols
389
+ +boolean is_external
390
+ }
391
+
392
+ class SemanticInfo {
393
+ +string overview
394
+ +string purpose
395
+ +string[] key_logic
396
+ +string[] usage_context
397
+ +string source_hash
398
+ +string enriched_at
399
+ +string model
400
+ }
401
+
402
+ FileNode --> "0..*" SymbolEntry
403
+ FileNode --> "0..*" ImportEntry
404
+ FileNode --> "0..1" SemanticInfo
405
+ ```
406
+
407
+ ### Symbol Kinds
408
+
409
+ | Kind | Source Construct |
410
+ |-------------|-----------------------------------------------------|
411
+ | `function` | `function name()` or `const name = () =>` |
412
+ | `class` | `class Name {}` |
413
+ | `method` | Method inside a class (stored as `ClassName.method`) |
414
+ | `interface` | `interface Name {}` |
415
+ | `type` | `type Name = ...` |
416
+ | `enum` | `enum Name {}` |
417
+ | `variable` | Named exports, const declarations |
418
+ | `export` | Re-exports |
419
+
420
+ ### ProjectMap (Top-Level Output)
421
+
422
+ ```typescript
423
+ {
424
+ schema_version: 1,
425
+ generated_at: string, // ISO 8601
426
+ root: string, // Absolute project root path
427
+ files: Record<string, FileNode>,
428
+ stats: {
429
+ total_files: number,
430
+ total_symbols: number,
431
+ total_edges: number, // Total import edges
432
+ core_modules: string[] // Top N files by depth
433
+ }
434
+ }
435
+ ```
436
+
437
+ ---
438
+
439
+ ## Graph Algorithms
440
+
441
+ ### Kahn's Algorithm (Topological Sort)
442
+
443
+ Used to assign **depth** to each file in the dependency graph.
444
+
445
+ ```
446
+ 1. Compute in-degree for every node (in-degree = number of dependents)
447
+ 2. Enqueue all nodes with in-degree 0 at depth 0
448
+ (these are entry points — no one imports them)
449
+ 3. While queue is not empty:
450
+ a. Dequeue node N
451
+ b. For each dependency D of N:
452
+ - Decrement D's in-degree
453
+ - D.depth = max(D.depth, N.depth + 1)
454
+ - If D's in-degree reaches 0, enqueue D
455
+ 4. Remaining nodes (in-degree > 0) are part of cycles
456
+ ```
457
+
458
+ ```mermaid
459
+ graph TD
460
+ subgraph "Depth 0 (Entry Points)"
461
+ A["main.ts"]
462
+ B["cli.ts"]
463
+ end
464
+ subgraph "Depth 1"
465
+ C["commands/map.ts"]
466
+ D["commands/plan.ts"]
467
+ end
468
+ subgraph "Depth 2"
469
+ E["core/pipeline.ts"]
470
+ end
471
+ subgraph "Depth 3 (Core Module)"
472
+ F["core/config.ts"]
473
+ end
474
+
475
+ A --> C
476
+ B --> C
477
+ B --> D
478
+ C --> E
479
+ D --> E
480
+ E --> F
481
+
482
+ style A fill:#2f855a,stroke:#276749,color:#fff
483
+ style B fill:#2f855a,stroke:#276749,color:#fff
484
+ style C fill:#2b6cb0,stroke:#2c5282,color:#fff
485
+ style D fill:#2b6cb0,stroke:#2c5282,color:#fff
486
+ style E fill:#805ad5,stroke:#6b46c1,color:#fff
487
+ style F fill:#d69e2e,stroke:#b7791f,color:#000
488
+ ```
489
+
490
+ ### Tarjan's SCC (Strongly Connected Components)
491
+
492
+ Handles **circular dependencies** that Kahn's algorithm cannot process (they never reach in-degree 0).
493
+
494
+ ```
495
+ 1. After Kahn's completes, check for remaining nodes
496
+ 2. Run Tarjan's SCC to identify all cycles
497
+ 3. For each cycle:
498
+ a. Find the maximum depth among external nodes that import into the cycle
499
+ b. Assign all cycle members: depth = maxExternalDepth + 1
500
+ c. Log the full cycle path as a warning
501
+ ```
502
+
503
+ ```mermaid
504
+ graph LR
505
+ subgraph "Cycle (SCC)"
506
+ X["moduleA.ts"] --> Y["moduleB.ts"]
507
+ Y --> Z["moduleC.ts"]
508
+ Z --> X
509
+ end
510
+
511
+ EXT["external.ts"] --> X
512
+ EXT2["other.ts"] --> Y
513
+
514
+ style X fill:#e53e3e,stroke:#c53030,color:#fff
515
+ style Y fill:#e53e3e,stroke:#c53030,color:#fff
516
+ style Z fill:#e53e3e,stroke:#c53030,color:#fff
517
+ style EXT fill:#2f855a,stroke:#276749,color:#fff
518
+ style EXT2 fill:#2f855a,stroke:#276749,color:#fff
519
+ ```
520
+
521
+ ---
522
+
523
+ ## AI Enrichment Deep Dive
524
+
525
+ ### Model & API
526
+
527
+ - **Provider**: Google Generative AI (Gemini)
528
+ - **Default model**: `gemini-1.5-flash`
529
+ - **SDK**: `@google/generative-ai`
530
+
531
+ ### Authentication Flow
532
+
533
+ ```mermaid
534
+ flowchart TD
535
+ CHECK_ENV{"GEMINI_API_KEY set?"} -- Yes --> USE_KEY["Use API key"]
536
+ CHECK_ENV -- No --> CHECK_OAUTH{"OAuth credentials exist?"}
537
+ CHECK_OAUTH -- Yes --> TOKEN["Get/refresh access token"]
538
+ CHECK_OAUTH -- No --> ERROR["Error: graph_ai_key_missing"]
539
+
540
+ style USE_KEY fill:#2f855a,stroke:#276749,color:#fff
541
+ style TOKEN fill:#2f855a,stroke:#276749,color:#fff
542
+ style ERROR fill:#e53e3e,stroke:#c53030,color:#fff
543
+ ```
544
+
545
+ Credentials path: `~/.nomos/credentials.json`
546
+
547
+ ### Enrichment Per File
548
+
549
+ For each file in the map:
550
+
551
+ ```mermaid
552
+ flowchart TD
553
+ FILE["FileNode"] --> STALE{"source_hash changed?"}
554
+ STALE -- No --> SKIP["Skip (already enriched)"]
555
+ STALE -- Yes --> READ["Read file from disk"]
556
+ READ --> TRUNC["Truncate to max_file_chars (4000)"]
557
+ TRUNC --> PROMPT["Build prompt"]
558
+ PROMPT --> CALL["Call Gemini (structured JSON)"]
559
+ CALL --> VALIDATE["Validate with Zod"]
560
+ VALIDATE --> STORE["Store in FileNode.semantic"]
561
+
562
+ CALL -- "Fail" --> RETRY{"Attempts < 3?"}
563
+ RETRY -- Yes --> BACKOFF["Backoff: 2s, 4s, 8s"]
564
+ BACKOFF --> CALL
565
+ RETRY -- No --> FAIL["Record failure"]
566
+
567
+ style SKIP fill:#718096,stroke:#4a5568,color:#fff
568
+ style STORE fill:#2f855a,stroke:#276749,color:#fff
569
+ style FAIL fill:#e53e3e,stroke:#c53030,color:#fff
570
+ ```
571
+
572
+ ### Prompt Context
573
+
574
+ Each Gemini call receives:
575
+ - Filename and language
576
+ - List of exported symbols
577
+ - List of dependent files (who imports this)
578
+ - File content (truncated at line boundary to `max_file_chars`)
579
+
580
+ ### Structured Output Schema
581
+
582
+ ```json
583
+ {
584
+ "overview": "One-line summary of what this file does",
585
+ "purpose": "Why this file exists in the architecture",
586
+ "key_logic": ["Important algorithm or behavior #1", "..."],
587
+ "usage_context": ["How other files use this", "..."]
588
+ }
589
+ ```
590
+
591
+ ### Rate Limiting
592
+
593
+ - **Concurrency**: `ai_concurrency` parallel requests (default: 5)
594
+ - **Rate limit**: `ai_requests_per_minute` (default: 14)
595
+ - **Minimum gap**: `60000ms / ai_requests_per_minute` = ~4286ms between requests
596
+ - **Retry backoff**: 2s, 4s, 8s (3 attempts total)
597
+
598
+ ### Graceful Cancellation
599
+
600
+ A SIGINT handler sets a `cancellation.cancelled` flag. Between each file enrichment, the enricher checks this flag and stops early if set. The intermediate structural map (written in Stage I) is already saved.
601
+
602
+ ---
603
+
604
+ ## Incremental Processing
605
+
606
+ The map command is designed for fast re-runs via incremental processing:
607
+
608
+ ```mermaid
609
+ flowchart LR
610
+ subgraph "First Run"
611
+ SCAN1["Scan all files"] --> PARSE1["Parse all"]
612
+ PARSE1 --> ENRICH1["Enrich all"]
613
+ end
614
+
615
+ subgraph "Second Run (incremental)"
616
+ SCAN2["Scan all files"] --> HASH{"Hash changed?"}
617
+ HASH -- Yes --> PARSE2["Re-parse"]
618
+ HASH -- No --> CARRY["Carry forward FileNode"]
619
+ CARRY --> MERGE["Merge"]
620
+ PARSE2 --> MERGE
621
+
622
+ MERGE --> ENRICH2{"semantic.source_hash matches?"}
623
+ ENRICH2 -- Yes --> SKIP_AI["Skip AI call"]
624
+ ENRICH2 -- No --> RE_ENRICH["Re-enrich"]
625
+ end
626
+
627
+ style CARRY fill:#805ad5,stroke:#6b46c1,color:#fff
628
+ style SKIP_AI fill:#805ad5,stroke:#6b46c1,color:#fff
629
+ ```
630
+
631
+ **What gets carried forward**:
632
+ - Files whose SHA256 hash hasn't changed since the last map
633
+ - Their complete `FileNode` including symbols, imports, and semantic data
634
+ - Graph fields (dependents, dependencies, depth) are **always** recalculated
635
+
636
+ **What triggers re-processing**:
637
+ - File content change (different hash)
638
+ - `--force` flag
639
+ - File deletion (removed from map)
640
+ - New file (added to map)
641
+
642
+ ---
643
+
644
+ ## Concurrency & Safety
645
+
646
+ ### Directory-Level Locking
647
+
648
+ ```mermaid
649
+ sequenceDiagram
650
+ participant P1 as arc map (process 1)
651
+ participant FS as Filesystem
652
+ participant P2 as arc map (process 2)
653
+
654
+ P1->>FS: lock(output_dir/.project-map.lock)
655
+ FS-->>P1: Lock acquired
656
+ P1->>FS: Write project_map.json.tmp
657
+ P1->>FS: fsync(tmp)
658
+ P1->>FS: rename(tmp -> project_map.json)
659
+ P1->>FS: release lock
660
+
661
+ Note over P2,FS: P2 retries up to 10 times<br/>(200ms-2000ms backoff)
662
+
663
+ P2->>FS: lock(output_dir/.project-map.lock)
664
+ FS-->>P2: Lock acquired
665
+ P2->>FS: Write project_map.json.tmp
666
+ P2->>FS: fsync(tmp)
667
+ P2->>FS: rename(tmp -> project_map.json)
668
+ P2->>FS: release lock
669
+ ```
670
+
671
+ **Library**: `proper-lockfile`
672
+ **Stale timeout**: 60 seconds (accommodates slow AI enrichment writes)
673
+ **Lock path**: `tasks-management/graph/.project-map.lock`
674
+
675
+ ### Atomic Write Pattern
676
+
677
+ Every map write follows: **write to `.tmp`** -> **`fsync`** -> **`rename`**. This ensures readers never see a partially written file.
678
+
679
+ ### Import Resolution Concurrency
680
+
681
+ 8 concurrent workers via `p-limit` resolve imports in parallel. The `knownFiles` Set is read-only during resolution, so no synchronization is needed.
682
+
683
+ ---
684
+
685
+ ## Configuration Reference
686
+
687
+ All map-related settings live under `graph` in `.nomos-config.json`:
688
+
689
+ ```json
690
+ {
691
+ "graph": {
692
+ "exclude_patterns": [
693
+ "node_modules", "dist", "*.test.*",
694
+ "*.spec.*", "*.semantic.md"
695
+ ],
696
+ "ai_enrichment": true,
697
+ "ai_model": "gemini-1.5-flash",
698
+ "ai_concurrency": 5,
699
+ "ai_requests_per_minute": 14,
700
+ "max_file_chars": 4000,
701
+ "core_modules_count": 10,
702
+ "output_dir": "tasks-management/graph"
703
+ }
704
+ }
705
+ ```
706
+
707
+ | Field | Type | Default | Description |
708
+ |--------------------------|------------|--------------------------|------------------------------------------|
709
+ | `exclude_patterns` | `string[]` | See above | Glob patterns to exclude from scanning |
710
+ | `ai_enrichment` | `boolean` | `true` | Enable/disable AI semantic enrichment |
711
+ | `ai_model` | `string` | `gemini-1.5-flash` | Gemini model to use |
712
+ | `ai_concurrency` | `number` | `5` | Max parallel AI requests |
713
+ | `ai_requests_per_minute` | `number` | `14` | Rate limit for Gemini API calls |
714
+ | `max_file_chars` | `number` | `4000` | Max characters sent to AI per file |
715
+ | `core_modules_count` | `number` | `10` | Number of core modules to identify |
716
+ | `output_dir` | `string` | `tasks-management/graph` | Directory for map output |
717
+
718
+ ---
719
+
720
+ ## Output Files
721
+
722
+ ### `project_map.json`
723
+
724
+ The primary output. Full schema:
725
+
726
+ ```json
727
+ {
728
+ "schema_version": 1,
729
+ "generated_at": "2026-04-10T12:34:56.789Z",
730
+ "root": "/absolute/project/root",
731
+ "files": {
732
+ "src/index.ts": {
733
+ "file": "src/index.ts",
734
+ "hash": "sha256:abc123...",
735
+ "language": "typescript",
736
+ "symbols": [
737
+ {
738
+ "name": "main",
739
+ "kind": "function",
740
+ "line": 10,
741
+ "end_line": 25,
742
+ "signature": "function main(): Promise<void>",
743
+ "exported": true
744
+ }
745
+ ],
746
+ "imports": [
747
+ {
748
+ "source": "./config",
749
+ "resolved": "src/config.ts",
750
+ "symbols": ["loadConfig"],
751
+ "is_external": false
752
+ }
753
+ ],
754
+ "dependents": [],
755
+ "dependencies": ["src/config.ts"],
756
+ "depth": 0,
757
+ "last_parsed_at": "2026-04-10T12:34:56.789Z",
758
+ "semantic": {
759
+ "overview": "Application entry point",
760
+ "purpose": "Bootstraps the CLI and starts execution",
761
+ "key_logic": ["Loads configuration", "Initializes services"],
762
+ "usage_context": ["Called from bin/arc"],
763
+ "source_hash": "sha256:abc123...",
764
+ "enriched_at": "2026-04-10T12:34:56.789Z",
765
+ "model": "gemini-1.5-flash"
766
+ }
767
+ }
768
+ },
769
+ "stats": {
770
+ "total_files": 42,
771
+ "total_symbols": 234,
772
+ "total_edges": 156,
773
+ "core_modules": ["src/config.ts", "src/utils.ts"]
774
+ }
775
+ }
776
+ ```
777
+
778
+ ### `.semantic.md` (Contract Files)
779
+
780
+ Written next to each source file. Example for `src/config.ts`:
781
+
782
+ ```markdown
783
+ # config.ts -- Semantic Contract
784
+ > Auto-generated by `arc map` -- do not edit manually.
785
+
786
+ ## Overview
787
+ Loads and validates the nomos-arc CLI configuration.
788
+
789
+ ## Purpose
790
+ Provides typed, validated configuration to all CLI commands.
791
+
792
+ ## Key Logic
793
+ 1. Walks up the filesystem to find .nomos-config.json
794
+ 2. Validates config with Zod schemas
795
+ 3. Applies default values for missing fields
796
+
797
+ ## Usage Context
798
+ Used by: commands/map.ts, commands/plan.ts, commands/review.ts
799
+ - Imported by every CLI command at startup
800
+ - Config object is passed down through the entire pipeline
801
+
802
+ ---
803
+ *Enriched at: 2026-04-10T12:34:56.789Z | Model: gemini-1.5-flash | Hash: sha256:abc123...*
804
+ ```
805
+
806
+ Contracts are **skipped** if the existing `.semantic.md` already contains the current `source_hash`.
807
+
808
+ ---
809
+
810
+ ## Exit Codes
811
+
812
+ | Code | Meaning |
813
+ |------|----------------------------------------|
814
+ | `0` | Success — all files mapped and enriched |
815
+ | `1` | Fatal error (config, auth, parse, I/O) |
816
+ | `10` | Partial success — some AI enrichments failed |
817
+
818
+ ---
819
+
820
+ ## File Map (Source Code)
821
+
822
+ ```
823
+ src/
824
+ ├── commands/
825
+ │ └── map.ts # Command registration & CLI interface
826
+ └── core/
827
+ ├── graph/
828
+ │ ├── pipeline.ts # MapPipeline orchestrator (all stages)
829
+ │ ├── scanner.ts # FileScanner (glob + hash + incremental)
830
+ │ ├── parser.ts # ASTParser (web-tree-sitter WASM)
831
+ │ ├── grammar-paths.ts # WASM grammar file resolution
832
+ │ ├── resolver.ts # ImportResolver (zero-fs, knownFiles Set)
833
+ │ ├── builder.ts # GraphBuilder (Kahn + Tarjan)
834
+ │ ├── enricher.ts # SemanticEnricher (Gemini API)
835
+ │ ├── contract-writer.ts # ContractWriter (.semantic.md)
836
+ │ └── map-schema.ts # Zod schemas + read/write helpers
837
+ ├── config.ts # Config loading & validation
838
+ ├── logger.ts # Console + file logging
839
+ ├── errors.ts # NomosError codes
840
+ └── auth/
841
+ └── manager.ts # AuthManager (API key + OAuth)
842
+ ```
843
+
844
+ ---
845
+
846
+ ## Design Decisions & Fixes
847
+
848
+ | Tag | Location | Decision |
849
+ |--------|----------------------|---------------------------------------------------------------------------------------------|
850
+ | BLK-1 | grammar-paths.ts | WASM-based parsing avoids native `.node` binding portability issues |
851
+ | BLK-2 | pipeline.ts | Directory-level lockfile for atomic, serialized map writes |
852
+ | BLK-3 | builder.ts | Reset all graph fields before building to prevent stale incremental data |
853
+ | BLK-4 | builder.ts | Kahn's BFS for correct topological depth (entry points = depth 0) |
854
+ | WATCH-1| parser.ts | web-tree-sitter WASM init is async; must await before parsing |
855
+ | WATCH-3| pipeline.ts | Lock the entire output directory, not just the map file |
856
+ | WATCH-4| resolver.ts | knownFiles Set contract: zero filesystem calls during resolution |
857
+ | AMB-1 | builder.ts | Depth direction clarified: in-degree = dependents.length |
858
+ | AMB-2 | enricher.ts | Read file from disk (not ScanResult) for fresh content during enrichment |
859
+ | AMB-3 | parser.ts | web-tree-sitter loads .wasm uniformly (no CJS/ESM ambiguity) |
860
+ | AMB-4 | resolver.ts | Zero filesystem calls; all existence checks via `knownFiles.has()` |
861
+ | AMB-5 | contract-writer.ts | Create parent directories before writing `.semantic.md` |
862
+ | AMB-6 | map.ts | Exit code 10 for partial AI failure (not 2) |
863
+ | GAP-1 | scanner.ts | Skip files >500KB to avoid memory exhaustion |
864
+ | GAP-2 | pipeline.ts | Write intermediate map before AI enrichment to preserve structural data on crash |
865
+ | GAP-3 | enricher.ts | Cancellation flag for graceful SIGINT during enrichment |
866
+ | GAP-5 | map-schema.ts | Forward compatibility ceiling: newer map files trigger upgrade error |
867
+ | GAP-6 | builder.ts | Tarjan's SCC algorithm for circular dependency handling |