@opencodehub/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +85 -0
  3. package/dist/agent-context.d.ts +54 -0
  4. package/dist/agent-context.d.ts.map +1 -0
  5. package/dist/agent-context.js +122 -0
  6. package/dist/agent-context.js.map +1 -0
  7. package/dist/cobol-proleap-setup.d.ts +77 -0
  8. package/dist/cobol-proleap-setup.d.ts.map +1 -0
  9. package/dist/cobol-proleap-setup.js +289 -0
  10. package/dist/cobol-proleap-setup.js.map +1 -0
  11. package/dist/commands/analyze.d.ts +234 -0
  12. package/dist/commands/analyze.d.ts.map +1 -0
  13. package/dist/commands/analyze.js +1096 -0
  14. package/dist/commands/analyze.js.map +1 -0
  15. package/dist/commands/augment.d.ts +48 -0
  16. package/dist/commands/augment.d.ts.map +1 -0
  17. package/dist/commands/augment.js +249 -0
  18. package/dist/commands/augment.js.map +1 -0
  19. package/dist/commands/baseline.d.ts +68 -0
  20. package/dist/commands/baseline.d.ts.map +1 -0
  21. package/dist/commands/baseline.js +110 -0
  22. package/dist/commands/baseline.js.map +1 -0
  23. package/dist/commands/bench.d.ts +54 -0
  24. package/dist/commands/bench.d.ts.map +1 -0
  25. package/dist/commands/bench.js +283 -0
  26. package/dist/commands/bench.js.map +1 -0
  27. package/dist/commands/ci-init.d.ts +37 -0
  28. package/dist/commands/ci-init.d.ts.map +1 -0
  29. package/dist/commands/ci-init.js +115 -0
  30. package/dist/commands/ci-init.js.map +1 -0
  31. package/dist/commands/clean.d.ts +13 -0
  32. package/dist/commands/clean.d.ts.map +1 -0
  33. package/dist/commands/clean.js +38 -0
  34. package/dist/commands/clean.js.map +1 -0
  35. package/dist/commands/code-pack.d.ts +105 -0
  36. package/dist/commands/code-pack.d.ts.map +1 -0
  37. package/dist/commands/code-pack.js +187 -0
  38. package/dist/commands/code-pack.js.map +1 -0
  39. package/dist/commands/context.d.ts +30 -0
  40. package/dist/commands/context.d.ts.map +1 -0
  41. package/dist/commands/context.js +237 -0
  42. package/dist/commands/context.js.map +1 -0
  43. package/dist/commands/detect-changes.d.ts +26 -0
  44. package/dist/commands/detect-changes.d.ts.map +1 -0
  45. package/dist/commands/detect-changes.js +73 -0
  46. package/dist/commands/detect-changes.js.map +1 -0
  47. package/dist/commands/doctor.d.ts +52 -0
  48. package/dist/commands/doctor.d.ts.map +1 -0
  49. package/dist/commands/doctor.js +472 -0
  50. package/dist/commands/doctor.js.map +1 -0
  51. package/dist/commands/find-enclosing-symbol.d.ts +67 -0
  52. package/dist/commands/find-enclosing-symbol.d.ts.map +1 -0
  53. package/dist/commands/find-enclosing-symbol.js +106 -0
  54. package/dist/commands/find-enclosing-symbol.js.map +1 -0
  55. package/dist/commands/group.d.ts +123 -0
  56. package/dist/commands/group.d.ts.map +1 -0
  57. package/dist/commands/group.js +448 -0
  58. package/dist/commands/group.js.map +1 -0
  59. package/dist/commands/impact.d.ts +23 -0
  60. package/dist/commands/impact.d.ts.map +1 -0
  61. package/dist/commands/impact.js +91 -0
  62. package/dist/commands/impact.js.map +1 -0
  63. package/dist/commands/index-repo.d.ts +39 -0
  64. package/dist/commands/index-repo.d.ts.map +1 -0
  65. package/dist/commands/index-repo.js +148 -0
  66. package/dist/commands/index-repo.js.map +1 -0
  67. package/dist/commands/ingest-sarif.d.ts +64 -0
  68. package/dist/commands/ingest-sarif.d.ts.map +1 -0
  69. package/dist/commands/ingest-sarif.js +381 -0
  70. package/dist/commands/ingest-sarif.js.map +1 -0
  71. package/dist/commands/init.d.ts +75 -0
  72. package/dist/commands/init.d.ts.map +1 -0
  73. package/dist/commands/init.js +315 -0
  74. package/dist/commands/init.js.map +1 -0
  75. package/dist/commands/list.d.ts +17 -0
  76. package/dist/commands/list.d.ts.map +1 -0
  77. package/dist/commands/list.js +79 -0
  78. package/dist/commands/list.js.map +1 -0
  79. package/dist/commands/mcp.d.ts +8 -0
  80. package/dist/commands/mcp.d.ts.map +1 -0
  81. package/dist/commands/mcp.js +28 -0
  82. package/dist/commands/mcp.js.map +1 -0
  83. package/dist/commands/open-store.d.ts +25 -0
  84. package/dist/commands/open-store.d.ts.map +1 -0
  85. package/dist/commands/open-store.js +47 -0
  86. package/dist/commands/open-store.js.map +1 -0
  87. package/dist/commands/pack.d.ts +35 -0
  88. package/dist/commands/pack.d.ts.map +1 -0
  89. package/dist/commands/pack.js +83 -0
  90. package/dist/commands/pack.js.map +1 -0
  91. package/dist/commands/query.d.ts +85 -0
  92. package/dist/commands/query.d.ts.map +1 -0
  93. package/dist/commands/query.js +309 -0
  94. package/dist/commands/query.js.map +1 -0
  95. package/dist/commands/scan.d.ts +81 -0
  96. package/dist/commands/scan.d.ts.map +1 -0
  97. package/dist/commands/scan.js +407 -0
  98. package/dist/commands/scan.js.map +1 -0
  99. package/dist/commands/setup.d.ts +178 -0
  100. package/dist/commands/setup.d.ts.map +1 -0
  101. package/dist/commands/setup.js +370 -0
  102. package/dist/commands/setup.js.map +1 -0
  103. package/dist/commands/sql.d.ts +19 -0
  104. package/dist/commands/sql.d.ts.map +1 -0
  105. package/dist/commands/sql.js +51 -0
  106. package/dist/commands/sql.js.map +1 -0
  107. package/dist/commands/status.d.ts +13 -0
  108. package/dist/commands/status.d.ts.map +1 -0
  109. package/dist/commands/status.js +66 -0
  110. package/dist/commands/status.js.map +1 -0
  111. package/dist/commands/verdict-render.d.ts +33 -0
  112. package/dist/commands/verdict-render.d.ts.map +1 -0
  113. package/dist/commands/verdict-render.js +123 -0
  114. package/dist/commands/verdict-render.js.map +1 -0
  115. package/dist/commands/verdict.d.ts +61 -0
  116. package/dist/commands/verdict.d.ts.map +1 -0
  117. package/dist/commands/verdict.js +146 -0
  118. package/dist/commands/verdict.js.map +1 -0
  119. package/dist/commands/wiki.d.ts +26 -0
  120. package/dist/commands/wiki.d.ts.map +1 -0
  121. package/dist/commands/wiki.js +74 -0
  122. package/dist/commands/wiki.js.map +1 -0
  123. package/dist/editors/claude-code.d.ts +23 -0
  124. package/dist/editors/claude-code.d.ts.map +1 -0
  125. package/dist/editors/claude-code.js +58 -0
  126. package/dist/editors/claude-code.js.map +1 -0
  127. package/dist/editors/codex.d.ts +22 -0
  128. package/dist/editors/codex.d.ts.map +1 -0
  129. package/dist/editors/codex.js +59 -0
  130. package/dist/editors/codex.js.map +1 -0
  131. package/dist/editors/cursor.d.ts +13 -0
  132. package/dist/editors/cursor.d.ts.map +1 -0
  133. package/dist/editors/cursor.js +21 -0
  134. package/dist/editors/cursor.js.map +1 -0
  135. package/dist/editors/index.d.ts +12 -0
  136. package/dist/editors/index.d.ts.map +1 -0
  137. package/dist/editors/index.js +11 -0
  138. package/dist/editors/index.js.map +1 -0
  139. package/dist/editors/opencode.d.ts +23 -0
  140. package/dist/editors/opencode.d.ts.map +1 -0
  141. package/dist/editors/opencode.js +61 -0
  142. package/dist/editors/opencode.js.map +1 -0
  143. package/dist/editors/types.d.ts +33 -0
  144. package/dist/editors/types.d.ts.map +1 -0
  145. package/dist/editors/types.js +19 -0
  146. package/dist/editors/types.js.map +1 -0
  147. package/dist/editors/windows-wrap.d.ts +19 -0
  148. package/dist/editors/windows-wrap.d.ts.map +1 -0
  149. package/dist/editors/windows-wrap.js +28 -0
  150. package/dist/editors/windows-wrap.js.map +1 -0
  151. package/dist/editors/windsurf.d.ts +12 -0
  152. package/dist/editors/windsurf.d.ts.map +1 -0
  153. package/dist/editors/windsurf.js +21 -0
  154. package/dist/editors/windsurf.js.map +1 -0
  155. package/dist/embedder-downloader.d.ts +87 -0
  156. package/dist/embedder-downloader.d.ts.map +1 -0
  157. package/dist/embedder-downloader.js +261 -0
  158. package/dist/embedder-downloader.js.map +1 -0
  159. package/dist/fs-atomic.d.ts +22 -0
  160. package/dist/fs-atomic.d.ts.map +1 -0
  161. package/dist/fs-atomic.js +28 -0
  162. package/dist/fs-atomic.js.map +1 -0
  163. package/dist/groups.d.ts +64 -0
  164. package/dist/groups.d.ts.map +1 -0
  165. package/dist/groups.js +172 -0
  166. package/dist/groups.js.map +1 -0
  167. package/dist/index.d.ts +11 -0
  168. package/dist/index.d.ts.map +1 -0
  169. package/dist/index.js +703 -0
  170. package/dist/index.js.map +1 -0
  171. package/dist/lib/is-indexed.d.ts +20 -0
  172. package/dist/lib/is-indexed.d.ts.map +1 -0
  173. package/dist/lib/is-indexed.js +35 -0
  174. package/dist/lib/is-indexed.js.map +1 -0
  175. package/dist/registry.d.ts +64 -0
  176. package/dist/registry.d.ts.map +1 -0
  177. package/dist/registry.js +145 -0
  178. package/dist/registry.js.map +1 -0
  179. package/dist/scip-downloader.d.ts +138 -0
  180. package/dist/scip-downloader.d.ts.map +1 -0
  181. package/dist/scip-downloader.js +372 -0
  182. package/dist/scip-downloader.js.map +1 -0
  183. package/dist/scip-pins.d.ts +99 -0
  184. package/dist/scip-pins.d.ts.map +1 -0
  185. package/dist/scip-pins.js +195 -0
  186. package/dist/scip-pins.js.map +1 -0
  187. package/dist/skills-gen.d.ts +47 -0
  188. package/dist/skills-gen.d.ts.map +1 -0
  189. package/dist/skills-gen.js +292 -0
  190. package/dist/skills-gen.js.map +1 -0
  191. package/package.json +81 -0
@@ -0,0 +1,1096 @@
1
+ /**
2
+ * `codehub analyze [path]` — index a repository.
3
+ *
4
+ * Flow:
5
+ * 1. Resolve `repoPath` (default `process.cwd()`).
6
+ * 2. Read the registry. If `!force` and the recorded `lastCommit` matches
7
+ * the pipeline's fresh commit, emit an "up to date" message and return
8
+ * without doing work.
9
+ * 3. Otherwise run `runIngestion(repoPath, {...})`, then open a writable
10
+ * `Store` (composed graph + temporal) via `openStore`, then
11
+ * `createSchema()`, `bulkLoad()`, and `setMeta()`.
12
+ * 4. Update the registry and, unless suppressed, stamp AGENTS.md + CLAUDE.md.
13
+ * 5. Print a one-line summary.
14
+ *
15
+ * The `--offline` flag is a hard promise: the ingestion pipeline never opens
16
+ * a network socket, and embeddings are a no-op for MVP. We log the promise so
17
+ * reviewers can audit call sites.
18
+ */
19
+ import { spawn } from "node:child_process";
20
+ import { mkdir } from "node:fs/promises";
21
+ import { basename, join, resolve } from "node:path";
22
+ import { NODE_KINDS, RELATION_TYPES, SCHEMA_VERSION, } from "@opencodehub/core-types";
23
+ import { pipeline } from "@opencodehub/ingestion";
24
+ import { openStore, resolveDbPath, resolveRepoMetaDir, writeStoreMeta, } from "@opencodehub/storage";
25
+ import { writeAgentContextFiles } from "../agent-context.js";
26
+ import { readRegistry, upsertRegistry } from "../registry.js";
27
+ import { generateSkills } from "../skills-gen.js";
28
+ export async function runAnalyze(path, opts = {}) {
29
+ const started = Date.now();
30
+ const repoPath = resolve(path);
31
+ const repoName = basename(repoPath);
32
+ if (opts.offline) {
33
+ log("codehub analyze: offline mode (no network calls will be made)");
34
+ }
35
+ if (opts.embeddings) {
36
+ log("codehub analyze: --embeddings enabled " +
37
+ "(requires `codehub setup --embeddings` to have installed weights)");
38
+ }
39
+ // Fast path: if the registry knows about this repo and the commit hasn't
40
+ // moved, short-circuit without re-ingesting.
41
+ if (!opts.force) {
42
+ const fastPath = await checkFastPath(repoName, repoPath, opts);
43
+ if (fastPath !== undefined) {
44
+ log(`codehub analyze: ${repoName} already up to date at ${fastPath.lastCommit ?? "unknown"} ` +
45
+ `(${fastPath.nodeCount} nodes, ${fastPath.edgeCount} edges)`);
46
+ return {
47
+ repoPath,
48
+ repoName,
49
+ nodeCount: fastPath.nodeCount,
50
+ edgeCount: fastPath.edgeCount,
51
+ graphHash: "",
52
+ durationMs: Date.now() - started,
53
+ upToDate: true,
54
+ warnings: [],
55
+ };
56
+ }
57
+ }
58
+ // Load a prior graph projection for the incremental-scope phase when the
59
+ // CLI was not invoked with --force. The projection is a thin wrapper
60
+ // around the prior DuckDB index (File nodes + IMPORTS / EXTENDS /
61
+ // IMPLEMENTS edges). `loadPreviousGraph` silently returns undefined if
62
+ // the store does not exist or cannot be opened; incremental-scope then
63
+ // reports mode="full" with reason="no-prior-graph".
64
+ const incrementalFrom = opts.force === true ? undefined : await loadPreviousGraph(repoPath);
65
+ // Resolve the effective `summaries` flag. P04 flipped the default ON, so
66
+ // `undefined` now means "on". The `CODEHUB_BEDROCK_DISABLED=1` env kill-
67
+ // switch forces off regardless of the flag; `offline` is enforced later
68
+ // inside the phase itself (the phase's own invariant).
69
+ const summariesEnabled = resolveSummariesEnabled(opts.summaries, process.env);
70
+ // Open a read-only store upfront so the `summarize` phase can probe the
71
+ // prior summary rows before work is queued AND so we can inspect the
72
+ // prior run's `storeMeta.stats` to resolve `--max-summaries auto`. We
73
+ // keep the handle open for the duration of `runIngestion` and close it
74
+ // in a finally block. `summaries` must be enabled for the adapter to
75
+ // matter; skip the cost of a read-only open when the flag is off.
76
+ const summaryCacheAdapter = summariesEnabled
77
+ ? await openSummaryCacheAdapter(repoPath)
78
+ : undefined;
79
+ // Mirror the same pattern for the embeddings phase's content-hash skip.
80
+ // Only open when `--embeddings` is on AND `--force` is off — force
81
+ // re-embeds everything, so the adapter would do no useful work. When the
82
+ // prior DB is absent the adapter returns undefined and the phase
83
+ // degrades to "every chunk is new".
84
+ const embeddingHashAdapter = opts.embeddings === true && opts.force !== true
85
+ ? await openEmbeddingHashCacheAdapter(repoPath)
86
+ : undefined;
87
+ // Resolve `--max-summaries auto` against the prior run's callable count,
88
+ // if any. `auto` bounds the cap at 10% of the SCIP-confirmed callable
89
+ // symbols (capped at 500); on a cold first run the prior meta is absent
90
+ // and we fall back to a conservative 50. `0` and positive integers pass
91
+ // through unchanged. Unknown inputs (string without the "auto" literal)
92
+ // are treated as "auto" for forward compatibility.
93
+ const resolvedMaxSummaries = await resolveMaxSummariesCap(repoPath, opts.maxSummariesPerRun, summariesEnabled);
94
+ const pipelineOptions = {
95
+ ...(opts.force !== undefined ? { force: opts.force } : {}),
96
+ ...(opts.offline !== undefined ? { offline: opts.offline } : {}),
97
+ ...(opts.verbose !== undefined ? { verbose: opts.verbose } : {}),
98
+ ...(opts.embeddings !== undefined ? { embeddings: opts.embeddings } : {}),
99
+ ...(opts.embeddingsVariant !== undefined ? { embeddingsVariant: opts.embeddingsVariant } : {}),
100
+ ...(opts.embeddingsModelDir !== undefined
101
+ ? { embeddingsModelDir: opts.embeddingsModelDir }
102
+ : {}),
103
+ ...(opts.embeddingsGranularity !== undefined
104
+ ? { embeddingsGranularity: opts.embeddingsGranularity }
105
+ : {}),
106
+ ...(opts.embeddingsWorkers !== undefined ? { embeddingsWorkers: opts.embeddingsWorkers } : {}),
107
+ ...(opts.embeddingsBatchSize !== undefined
108
+ ? { embeddingsBatchSize: opts.embeddingsBatchSize }
109
+ : {}),
110
+ ...(opts.sbom !== undefined ? { sbom: opts.sbom } : {}),
111
+ ...(opts.coverage !== undefined ? { coverage: opts.coverage } : {}),
112
+ summaries: summariesEnabled,
113
+ maxSummariesPerRun: resolvedMaxSummaries,
114
+ ...(opts.summaryModel !== undefined ? { summaryModel: opts.summaryModel } : {}),
115
+ ...(opts.strictDetectors !== undefined ? { strictDetectors: opts.strictDetectors } : {}),
116
+ ...(summaryCacheAdapter !== undefined
117
+ ? { summaryCacheAdapter: summaryCacheAdapter.adapter }
118
+ : {}),
119
+ ...(embeddingHashAdapter !== undefined
120
+ ? { embeddingHashCacheAdapter: embeddingHashAdapter.adapter }
121
+ : {}),
122
+ ...(incrementalFrom !== undefined ? { incrementalFrom } : {}),
123
+ };
124
+ let result;
125
+ try {
126
+ result = await pipeline.runIngestion(repoPath, pipelineOptions);
127
+ }
128
+ finally {
129
+ await summaryCacheAdapter?.close();
130
+ await embeddingHashAdapter?.close();
131
+ }
132
+ logWarnings(result.warnings, opts.verbose === true);
133
+ // Persist to the composed graph + temporal store. Backend resolution is
134
+ // env-driven (`CODEHUB_STORE`); the default `"duck"` writes to
135
+ // `<repo>/.codehub/graph.duckdb` exactly like the legacy path. The
136
+ // temporal-tier writes (`bulkLoadCochanges`, `bulkLoadSymbolSummaries`)
137
+ // route through `store.temporal`.
138
+ await mkdir(resolveRepoMetaDir(repoPath), { recursive: true });
139
+ const dbPath = resolveDbPath(repoPath);
140
+ const store = await openStore({ path: dbPath, backend: "auto" });
141
+ try {
142
+ await store.graph.open();
143
+ if (store.graphFile !== store.temporalFile)
144
+ await store.temporal.open();
145
+ await store.graph.createSchema();
146
+ if (store.graphFile !== store.temporalFile)
147
+ await store.temporal.createSchema();
148
+ await store.graph.bulkLoad(result.graph);
149
+ // Persist cochange rows to the dedicated `cochanges` table. `bulkLoad` in
150
+ // replace mode already truncated it, but `bulkLoadCochanges` does its own
151
+ // DELETE inside the same transaction so the call is idempotent even on
152
+ // upsert paths that keep the prior graph. Empty row sets collapse into a
153
+ // cheap DELETE.
154
+ if (result.cochange !== undefined) {
155
+ await store.temporal.bulkLoadCochanges(result.cochange.rows);
156
+ }
157
+ // Persist freshly produced summary rows. The phase returns an empty
158
+ // `rows` array in the common gated-off / dry-run case so this is a
159
+ // cheap no-op. A non-empty payload means the operator explicitly ran
160
+ // with `--summaries --max-summaries > 0` and accepted the Bedrock
161
+ // cost; we persist under the temporal-tier surface.
162
+ if (result.summarize !== undefined && result.summarize.rows.length > 0) {
163
+ await store.temporal.bulkLoadSymbolSummaries(result.summarize.rows);
164
+ log(`codehub analyze: persisted ${result.summarize.rows.length} symbol summaries ` +
165
+ `(promptVersion=${result.summarize.promptVersion})`);
166
+ }
167
+ // Surface the summarize-phase counters whenever the flag was enabled —
168
+ // even in dry-run (maxSummaries=0) mode — so operators can inspect how
169
+ // many symbols WOULD have been summarized before unlocking Bedrock.
170
+ if (summariesEnabled && result.summarize !== undefined) {
171
+ const s = result.summarize;
172
+ log(`codehub analyze: summarize — considered=${s.considered}, ` +
173
+ `skippedUnconfirmed=${s.skippedUnconfirmed}, cacheHits=${s.cacheHits}, ` +
174
+ `summarized=${s.summarized}, wouldHaveSummarized=${s.wouldHaveSummarized}, ` +
175
+ `failed=${s.failed} [promptVersion=${s.promptVersion}]`);
176
+ }
177
+ // Persist embeddings emitted by the `embeddings` phase (if any). The
178
+ // phase returns an empty `rows` array when `opts.embeddings` is false
179
+ // or when weights are missing, so this call is a cheap no-op in the
180
+ // common case. We upsert AFTER bulkLoad so the replace-mode wipe
181
+ // doesn't drop freshly-written embeddings.
182
+ if (result.embeddings !== undefined && result.embeddings.rows.length > 0) {
183
+ await store.graph.upsertEmbeddings(result.embeddings.rows);
184
+ log(`codehub analyze: upserted ${result.embeddings.rows.length} embeddings ` +
185
+ `(${result.embeddings.embeddingsModelId})`);
186
+ }
187
+ const indexedAt = new Date().toISOString();
188
+ // Numeric provenance stats, if any. embeddingsHash is a string and is
189
+ // persisted to the sidecar file instead of StoreMeta.stats (which is
190
+ // Record<string, number>).
191
+ const byKindStats = result.stats.byKind !== undefined ? { ...result.stats.byKind } : {};
192
+ if (result.embeddings?.ranEmbedder) {
193
+ byKindStats["embeddingsCount"] = result.embeddings.embeddingsInserted;
194
+ }
195
+ // Cache-health stats: the parse-cache hit ratio and on-disk size are
196
+ // surfaced to `codehub doctor` and `codehub status` via the meta
197
+ // sidecar. Missing ratio (no parse phase) → omit the field so pre-1.1
198
+ // meta.json snapshots keep round-tripping byte-identically.
199
+ const parseCache = result.stats.parseCache;
200
+ const cacheDir = join(repoPath, ".codehub", "parse-cache");
201
+ const cacheSize = await pipeline.computeCacheSize(cacheDir);
202
+ const storeMeta = {
203
+ schemaVersion: SCHEMA_VERSION,
204
+ indexedAt,
205
+ nodeCount: result.graph.nodeCount(),
206
+ edgeCount: result.graph.edgeCount(),
207
+ ...(result.stats.currentCommit !== undefined
208
+ ? { lastCommit: result.stats.currentCommit }
209
+ : {}),
210
+ stats: byKindStats,
211
+ ...(parseCache !== undefined ? { cacheHitRatio: parseCache.ratio } : {}),
212
+ cacheSizeBytes: cacheSize.bytes,
213
+ };
214
+ await store.graph.setMeta(storeMeta);
215
+ await writeStoreMeta(repoPath, storeMeta);
216
+ // Persist the scan-state sidecar so the next analyze invocation can feed
217
+ // the incremental-scope phase via loadPreviousGraph(). We write this
218
+ // alongside the DuckDB file under `<repo>/.codehub` so a clean of the
219
+ // meta dir invalidates both the index and the incremental state together.
220
+ if (result.scan !== undefined) {
221
+ await writeScanState(repoPath, result.scan.files.map((f) => ({ relPath: f.relPath, contentSha: f.sha256 })));
222
+ }
223
+ // Opt-in skill generation. Walk Community nodes just persisted above and
224
+ // emit one SKILL.md per cluster under `<repo>/.codehub/skills/`. Runs
225
+ // against the still-open DuckDB handle so there's no re-open cost, and
226
+ // any per-skill failure (read-only dir, permission denied, disk full)
227
+ // logs-and-continues — analyze never aborts because of a skill write.
228
+ if (opts.skills === true) {
229
+ try {
230
+ const emitted = await generateSkills(store.graph, repoPath, { log });
231
+ log(`codehub analyze: generated ${emitted} SKILL.md ${emitted === 1 ? "file" : "files"}`);
232
+ }
233
+ catch (err) {
234
+ log(`codehub analyze: skill generation failed: ${err.message}`);
235
+ }
236
+ }
237
+ }
238
+ finally {
239
+ await store.close();
240
+ }
241
+ const entry = {
242
+ name: repoName,
243
+ path: repoPath,
244
+ indexedAt: new Date().toISOString(),
245
+ nodeCount: result.graph.nodeCount(),
246
+ edgeCount: result.graph.edgeCount(),
247
+ ...(result.stats.currentCommit !== undefined ? { lastCommit: result.stats.currentCommit } : {}),
248
+ };
249
+ const registryOpts = opts.home !== undefined ? { home: opts.home } : {};
250
+ await upsertRegistry(entry, registryOpts);
251
+ if (!opts.skipAgentsMd) {
252
+ try {
253
+ await writeAgentContextFiles(repoPath);
254
+ }
255
+ catch (err) {
256
+ log(`codehub analyze: failed to write AGENTS.md stanza: ${err.message}`);
257
+ }
258
+ }
259
+ const durationMs = Date.now() - started;
260
+ // Surface incremental-scope + cache-hit stats on a single operational line
261
+ // so operators spot regressions without digging into meta.json.
262
+ const incrementalLine = result.incrementalScope !== undefined
263
+ ? ` [scope=${result.incrementalScope.mode}${result.incrementalScope.fullReindexBecause !== undefined
264
+ ? `:${result.incrementalScope.fullReindexBecause}`
265
+ : ""}, closure=${result.incrementalScope.closureFiles.length}/${result.incrementalScope.totalFiles}]`
266
+ : "";
267
+ const cacheLine = result.stats.parseCache !== undefined
268
+ ? ` [cache=${(result.stats.parseCache.ratio * 100).toFixed(0)}% (${result.stats.parseCache.hits}/${result.stats.parseCache.hits + result.stats.parseCache.misses})]`
269
+ : "";
270
+ log(`codehub analyze: ${repoName} — ${entry.nodeCount} nodes, ${entry.edgeCount} edges, ` +
271
+ `graph ${result.graphHash.slice(0, 8)}, ${durationMs} ms${incrementalLine}${cacheLine}`);
272
+ return {
273
+ repoPath,
274
+ repoName,
275
+ nodeCount: entry.nodeCount,
276
+ edgeCount: entry.edgeCount,
277
+ graphHash: result.graphHash,
278
+ durationMs,
279
+ upToDate: false,
280
+ warnings: result.warnings,
281
+ };
282
+ }
283
+ /**
284
+ * Build the {@link pipeline.PreviousGraph} projection expected by the
285
+ * incremental-scope phase from the prior DuckDB index + scan-state sidecar.
286
+ *
287
+ * The projection carries:
288
+ * - file paths + scan-time content hashes, read from
289
+ * `.codehub/scan-state.json` (written at the tail of the prior run),
290
+ * - IMPORTS + EXTENDS + IMPLEMENTS edges recovered from the `relations`
291
+ * table by stripping each endpoint id back to its enclosing file path,
292
+ * - the FULL prior node and edge snapshot, mapped back into
293
+ * {@link GraphNode} / {@link CodeRelation} via {@link rowToGraphNode}
294
+ * and {@link rowToCodeRelation}. Shipping these two arrays is what
295
+ * flips `resolveIncrementalView`
296
+ * (`packages/ingestion/src/pipeline/phases/incremental-helper.ts:95-102`)
297
+ * from `active=false` (passive mode) to `active=true`, so the four
298
+ * incremental consumer phases can carry forward non-closure work and
299
+ * reproduce a byte-identical graph hash vs a full re-index.
300
+ *
301
+ * Returns `undefined` when the store is missing, unreadable, or empty —
302
+ * any of which downgrades incremental mode to a clean full reindex in the
303
+ * phase without surfacing an error.
304
+ */
305
+ export async function loadPreviousGraph(repoPath) {
306
+ const scanState = await readScanState(repoPath);
307
+ if (scanState === undefined)
308
+ return undefined;
309
+ const dbPath = resolveDbPath(repoPath);
310
+ const store = await openStore({ path: dbPath, backend: "auto" }).catch(() => undefined);
311
+ if (store === undefined)
312
+ return undefined;
313
+ try {
314
+ await store.graph.open();
315
+ }
316
+ catch {
317
+ await store.close().catch(() => { });
318
+ return undefined;
319
+ }
320
+ try {
321
+ // Full node + edge dumps via typed finders. For a typical OCH repo
322
+ // this is 10K-50K nodes and 20K-100K edges — fits in memory in one
323
+ // shot. The `listNodes` / `listEdges` finders already return
324
+ // rehydrated `GraphNode` / `CodeRelation` objects, so the legacy
325
+ // `rowToGraphNode` / `rowToCodeRelation` adapters are no longer
326
+ // needed on this read path — they remain exported for external
327
+ // consumers that hand-roll over the wide-column shape.
328
+ const nodes = [...(await store.graph.listNodes())];
329
+ const edges = [...(await store.graph.listEdges())];
330
+ // Derive the legacy file-granular projections from the full edge set so
331
+ // we issue one fewer round-trip to the store. The incremental-scope
332
+ // phase still reads these as the closure-walk seed — the node/edge
333
+ // arrays above are the carry-forward snapshot that flips the four
334
+ // consumer phases into active mode.
335
+ const importEdges = [];
336
+ const heritageEdges = [];
337
+ for (const edge of edges) {
338
+ if (edge.type !== "IMPORTS" && edge.type !== "EXTENDS" && edge.type !== "IMPLEMENTS") {
339
+ continue;
340
+ }
341
+ const fromPath = fileFromNodeId(edge.from);
342
+ const toPath = fileFromNodeId(edge.to);
343
+ if (fromPath === undefined || toPath === undefined)
344
+ continue;
345
+ if (edge.type === "IMPORTS") {
346
+ importEdges.push({ importer: fromPath, target: toPath });
347
+ }
348
+ else {
349
+ heritageEdges.push({ childFile: fromPath, parentFile: toPath });
350
+ }
351
+ }
352
+ return { files: scanState.files, importEdges, heritageEdges, nodes, edges };
353
+ }
354
+ catch {
355
+ return undefined;
356
+ }
357
+ finally {
358
+ await store.close();
359
+ }
360
+ }
361
+ /**
362
+ * Resolve the effective `summaries` flag, honoring the
363
+ * `CODEHUB_BEDROCK_DISABLED=1` env kill-switch and the P04 default-on
364
+ * contract (absent flag → enabled).
365
+ *
366
+ * Truth table (post-P04):
367
+ * - env var set + flag undefined → false (kill-switch wins)
368
+ * - env var set + flag true → false (kill-switch wins)
369
+ * - env var set + flag false → false
370
+ * - env var unset + flag undefined → true (default on)
371
+ * - env var unset + flag true → true
372
+ * - env var unset + flag false → false (explicit --no-summaries)
373
+ *
374
+ * Exported for unit tests; the production call site reads `process.env`.
375
+ */
376
+ export function resolveSummariesEnabled(flag, env) {
377
+ if (env["CODEHUB_BEDROCK_DISABLED"] === "1")
378
+ return false;
379
+ return flag !== false;
380
+ }
381
+ /**
382
+ * Resolve `--max-summaries auto` / explicit numeric caps into a concrete
383
+ * numeric budget the pipeline can consume.
384
+ *
385
+ * Pre-run heuristic (P04): `auto` bounds the cap at
386
+ * `min(floor(scipConfirmedCallableCount × 0.1), 500)`. We cannot cheaply
387
+ * compute that before the pipeline runs (LSP phases haven't yielded
388
+ * yet), so we use the prior run's stored counts when available:
389
+ *
390
+ * - If a DuckDB store is readable at the expected path, count nodes
391
+ * whose kind is Function/Method/Class. That count is the best proxy
392
+ * for "SCIP-confirmed callables" we can get before the parse phase.
393
+ * - If no prior store exists (fresh clone, first analyze), fall back
394
+ * to a conservative first-run cap of 50. The next invocation has
395
+ * the prior counts and can resolve `auto` accurately.
396
+ *
397
+ * Explicit numeric caps pass through unchanged; negative values clamp to
398
+ * 0 (dry-run). When summaries are disabled we short-circuit to 0 so the
399
+ * phase's cost-cap branch is hit regardless.
400
+ *
401
+ * Exported for unit tests; the production call site passes
402
+ * `countPriorCallableSymbols` for the seed lookup.
403
+ */
404
+ export async function resolveMaxSummariesCap(repoPath, raw, summariesEnabled, seedLookup = countPriorCallableSymbols) {
405
+ if (!summariesEnabled)
406
+ return 0;
407
+ if (typeof raw === "number" && Number.isFinite(raw)) {
408
+ return Math.max(0, Math.floor(raw));
409
+ }
410
+ // Default or explicit "auto" — consult prior graph counts.
411
+ const seed = await seedLookup(repoPath);
412
+ if (seed === undefined) {
413
+ // First run: give Bedrock a bounded foothold so the operator sees
414
+ // the feature light up without the phase sitting idle in dry-run.
415
+ return 50;
416
+ }
417
+ return Math.min(Math.floor(seed * 0.1), 500);
418
+ }
419
+ /**
420
+ * Count callable symbols (Function / Method / Class) recorded by the
421
+ * prior run. Returns `undefined` when no prior DuckDB index exists or
422
+ * the count query fails — callers treat that as "no prior run" and fall
423
+ * back to the first-run heuristic.
424
+ */
425
+ async function countPriorCallableSymbols(repoPath) {
426
+ const dbPath = resolveDbPath(repoPath);
427
+ const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
428
+ if (store === undefined)
429
+ return undefined;
430
+ try {
431
+ await store.graph.open();
432
+ }
433
+ catch {
434
+ await store.close().catch(() => { });
435
+ return undefined;
436
+ }
437
+ try {
438
+ // `countNodesByKind` is the typed equivalent of `SELECT COUNT(*)
439
+ // GROUP BY kind`. We sum the three callable kinds in TS so cli stays
440
+ // off the raw-SQL surface.
441
+ const counts = await store.graph.countNodesByKind(["Function", "Method", "Class"]);
442
+ let n = 0;
443
+ for (const c of counts.values())
444
+ n += c;
445
+ return Number.isFinite(n) && n >= 0 ? n : undefined;
446
+ }
447
+ catch {
448
+ return undefined;
449
+ }
450
+ finally {
451
+ await store.close();
452
+ }
453
+ }
454
+ /**
455
+ * Open a read-only DuckDB store scoped to the `symbol_summaries` cache
456
+ * probe. The returned object carries a cache adapter the `summarize`
457
+ * phase uses to short-circuit candidates whose content hash already has
458
+ * a row on disk, plus a `close()` the caller invokes to release the
459
+ * native handle. Returns `undefined` when the store cannot be opened —
460
+ * the phase degrades gracefully to "every candidate is a miss".
461
+ */
462
+ async function openSummaryCacheAdapter(repoPath) {
463
+ const dbPath = resolveDbPath(repoPath);
464
+ const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
465
+ if (store === undefined)
466
+ return undefined;
467
+ try {
468
+ // The summary cache lives on the temporal tier. Open both views so
469
+ // the close() symmetry holds; on the duck backend the second open
470
+ // is a no-op against the same connection.
471
+ await store.graph.open();
472
+ if (store.graphFile !== store.temporalFile)
473
+ await store.temporal.open();
474
+ }
475
+ catch {
476
+ await store.close().catch(() => { });
477
+ return undefined;
478
+ }
479
+ return {
480
+ adapter: {
481
+ lookup: async (nodeId, contentHash, promptVersion) => store.temporal.lookupSymbolSummary(nodeId, contentHash, promptVersion),
482
+ },
483
+ close: async () => {
484
+ await store.close();
485
+ },
486
+ };
487
+ }
488
+ /**
489
+ * Open a read-only DuckDB store scoped to the `embeddings` content-hash
490
+ * probe. The returned adapter's `list()` loads every prior
491
+ * `(granularity, nodeId, chunkIndex) → content_hash` row in a single
492
+ * round-trip so the embeddings phase can skip chunks whose source text is
493
+ * unchanged across runs. Returns `undefined` when the store cannot be
494
+ * opened (e.g. the first analyze on a fresh repo) — the phase then
495
+ * degrades to "every chunk is new", which is correct just slower.
496
+ */
497
+ async function openEmbeddingHashCacheAdapter(repoPath) {
498
+ const dbPath = resolveDbPath(repoPath);
499
+ const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
500
+ if (store === undefined)
501
+ return undefined;
502
+ try {
503
+ await store.graph.open();
504
+ }
505
+ catch {
506
+ await store.close().catch(() => { });
507
+ return undefined;
508
+ }
509
+ return {
510
+ adapter: {
511
+ // listEmbeddingHashes is on the graph-tier interface — embeddings
512
+ // travel with the graph view, not the temporal cochange table.
513
+ list: async () => store.graph.listEmbeddingHashes(),
514
+ },
515
+ close: async () => {
516
+ await store.close();
517
+ },
518
+ };
519
+ }
520
+ /**
521
+ * Extract the repo-relative file path from a `NodeId`. All node kinds embed
522
+ * the file path as the second colon-delimited segment (`<Kind>:<path>:<q>`).
523
+ */
524
+ function fileFromNodeId(id) {
525
+ const first = id.indexOf(":");
526
+ if (first === -1)
527
+ return undefined;
528
+ const rest = id.slice(first + 1);
529
+ const second = rest.indexOf(":");
530
+ if (second === -1)
531
+ return rest;
532
+ return rest.slice(0, second);
533
+ }
534
+ // `PREV_NODE_SELECT_COLUMNS` was the explicit column whitelist used by the
535
+ // legacy SQL `SELECT * FROM nodes` round-trip in {@link loadPreviousGraph}.
536
+ // That read path now goes through `store.graph.listNodes()`, which already
537
+ // returns rehydrated `GraphNode` objects, so the constant is no longer
538
+ // load-bearing here. The `rowToGraphNode` / `rowToCodeRelation` adapters
539
+ // below remain exported for external consumers that hand-roll over the
540
+ // DuckDB wide-column shape.
541
+ const NODE_KIND_SET = new Set(NODE_KINDS);
542
+ const RELATION_TYPE_SET = new Set(RELATION_TYPES);
543
+ function strField(r, col) {
544
+ const v = r[col];
545
+ return typeof v === "string" && v.length > 0 ? v : undefined;
546
+ }
547
+ function numField(r, col) {
548
+ const v = r[col];
549
+ if (typeof v === "number" && Number.isFinite(v))
550
+ return v;
551
+ if (typeof v === "bigint")
552
+ return Number(v);
553
+ return undefined;
554
+ }
555
+ function boolField(r, col) {
556
+ const v = r[col];
557
+ return typeof v === "boolean" ? v : undefined;
558
+ }
559
+ function stringArrayField(r, col) {
560
+ // Preserve `[]` distinct from absent. The DuckDB TEXT[] binder returns
561
+ // a 0-length JS array for an empty SQL array literal and `null` for
562
+ // SQL NULL; mirror the storage adapter's `setStringArrayField` and
563
+ // return the array verbatim so a Community / Route node written as
564
+ // `{keywords: []}` (or `{responseKeys: []}`) survives the carry-forward
565
+ // load with its empty array intact — required so canonical-JSON /
566
+ // graphHash byte-identity holds across the incremental re-index.
567
+ const v = r[col];
568
+ if (!Array.isArray(v))
569
+ return undefined;
570
+ const out = [];
571
+ for (const item of v) {
572
+ if (typeof item === "string")
573
+ out.push(item);
574
+ }
575
+ return out;
576
+ }
577
+ function parseJsonStringArrayField(r, col) {
578
+ const raw = r[col];
579
+ if (typeof raw !== "string" || raw.length === 0)
580
+ return undefined;
581
+ try {
582
+ const parsed = JSON.parse(raw);
583
+ if (!Array.isArray(parsed))
584
+ return undefined;
585
+ return parsed.filter((x) => typeof x === "string");
586
+ }
587
+ catch {
588
+ return undefined;
589
+ }
590
+ }
591
+ function parseJsonObjectField(r, col) {
592
+ const raw = r[col];
593
+ if (typeof raw !== "string" || raw.length === 0)
594
+ return undefined;
595
+ try {
596
+ const parsed = JSON.parse(raw);
597
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed))
598
+ return undefined;
599
+ return parsed;
600
+ }
601
+ catch {
602
+ return undefined;
603
+ }
604
+ }
605
+ /**
606
+ * Reverse of `nodeToRow` (`packages/storage/src/duckdb-adapter.ts:1169`):
607
+ * translate one row of the polymorphic `nodes` table back into a
608
+ * {@link GraphNode}. Only the `nodes`/`edges` fidelity required by the four
609
+ * incremental consumer phases (`cross-file`, `mro`, `communities`,
610
+ * `processes`) is load-bearing — Community / Process nodes are re-added
611
+ * verbatim by `communities.ts:90-94` / `processes.ts:306-310`, so their
612
+ * `name` / `filePath` / `inferredLabel` / `keywords` / `symbolCount` /
613
+ * `cohesion` / `entryPointId` / `stepCount` must round-trip. Other kinds
614
+ * survive the round trip best-effort; fields we can't recover stay
615
+ * `undefined` and the caller treats the resulting node as lossy — safe
616
+ * because the carry-forward only lives long enough to be hashed into the
617
+ * next graph.
618
+ *
619
+ * Returns `undefined` when the row carries a `kind` we don't recognise or
620
+ * when required scalar slots (`id`, `name`, `file_path`) are missing.
621
+ *
622
+ * Exported for tests; the production call site is {@link loadPreviousGraph}.
623
+ */
624
+ export function rowToGraphNode(row) {
625
+ const idRaw = row["id"];
626
+ const nameRaw = row["name"];
627
+ const fileRaw = row["file_path"];
628
+ const kindRaw = row["kind"];
629
+ if (typeof idRaw !== "string" || idRaw.length === 0)
630
+ return undefined;
631
+ if (typeof nameRaw !== "string")
632
+ return undefined;
633
+ if (typeof fileRaw !== "string")
634
+ return undefined;
635
+ if (typeof kindRaw !== "string" || !NODE_KIND_SET.has(kindRaw))
636
+ return undefined;
637
+ const kind = kindRaw;
638
+ // Build a permissive record keyed by TS field names. The discriminated-
639
+ // union cast at the end is safe because every `GraphNode` member only
640
+ // requires `id`/`kind`/`name`/`filePath` plus optional fields beyond that;
641
+ // required fields unique to a kind (e.g. `FindingNode.propertiesBag`) are
642
+ // populated explicitly in the per-kind branches below.
643
+ const node = {
644
+ id: idRaw,
645
+ kind,
646
+ name: nameRaw,
647
+ filePath: fileRaw,
648
+ };
649
+ // LocatedNode fields — set only when non-NULL because some non-LocatedNode
650
+ // kinds (Community / Process / File / Folder) intentionally leave them
651
+ // NULL and re-hydrating a spurious zero would change the graph hash.
652
+ const startLine = numField(row, "start_line");
653
+ if (startLine !== undefined)
654
+ node["startLine"] = startLine;
655
+ const endLine = numField(row, "end_line");
656
+ if (endLine !== undefined)
657
+ node["endLine"] = endLine;
658
+ const isExported = boolField(row, "is_exported");
659
+ if (isExported !== undefined)
660
+ node["isExported"] = isExported;
661
+ const signature = strField(row, "signature");
662
+ if (signature !== undefined)
663
+ node["signature"] = signature;
664
+ const parameterCount = numField(row, "parameter_count");
665
+ if (parameterCount !== undefined)
666
+ node["parameterCount"] = parameterCount;
667
+ const returnType = strField(row, "return_type");
668
+ if (returnType !== undefined)
669
+ node["returnType"] = returnType;
670
+ const declaredType = strField(row, "declared_type");
671
+ if (declaredType !== undefined)
672
+ node["declaredType"] = declaredType;
673
+ const owner = strField(row, "owner");
674
+ if (owner !== undefined)
675
+ node["owner"] = owner;
676
+ const description = strField(row, "description");
677
+ if (description !== undefined)
678
+ node["description"] = description;
679
+ const contentHash = strField(row, "content_hash");
680
+ if (contentHash !== undefined)
681
+ node["contentHash"] = contentHash;
682
+ const content = strField(row, "content");
683
+ if (content !== undefined)
684
+ node["content"] = content;
685
+ // Community / Process — the two carry-forward-critical kinds.
686
+ const inferredLabel = strField(row, "inferred_label");
687
+ if (inferredLabel !== undefined)
688
+ node["inferredLabel"] = inferredLabel;
689
+ const symbolCount = numField(row, "symbol_count");
690
+ if (symbolCount !== undefined)
691
+ node["symbolCount"] = symbolCount;
692
+ const cohesion = numField(row, "cohesion");
693
+ if (cohesion !== undefined)
694
+ node["cohesion"] = cohesion;
695
+ const keywords = stringArrayField(row, "keywords");
696
+ if (keywords !== undefined)
697
+ node["keywords"] = keywords;
698
+ const entryPointId = strField(row, "entry_point_id");
699
+ if (entryPointId !== undefined)
700
+ node["entryPointId"] = entryPointId;
701
+ const stepCount = numField(row, "step_count");
702
+ if (stepCount !== undefined)
703
+ node["stepCount"] = stepCount;
704
+ // Section (markdown heading) — `level` round-trips for completeness.
705
+ const level = numField(row, "level");
706
+ if (level !== undefined)
707
+ node["level"] = level;
708
+ // Route: `url` + `responseKeys` + `method` (shared column with Tool / Operation).
709
+ const url = strField(row, "url");
710
+ if (url !== undefined)
711
+ node["url"] = url;
712
+ const responseKeys = stringArrayField(row, "response_keys");
713
+ if (responseKeys !== undefined)
714
+ node["responseKeys"] = responseKeys;
715
+ if (kind === "Tool") {
716
+ const toolName = strField(row, "tool_name");
717
+ if (toolName !== undefined)
718
+ node["toolName"] = toolName;
719
+ const inputSchemaJson = strField(row, "input_schema_json");
720
+ if (inputSchemaJson !== undefined)
721
+ node["inputSchemaJson"] = inputSchemaJson;
722
+ }
723
+ else if (kind === "Route") {
724
+ const method = strField(row, "method");
725
+ if (method !== undefined)
726
+ node["method"] = method;
727
+ }
728
+ if (kind === "Finding") {
729
+ const ruleId = strField(row, "rule_id");
730
+ const severity = strField(row, "severity");
731
+ const scannerId = strField(row, "scanner_id");
732
+ const message = strField(row, "message");
733
+ const propertiesBag = parseJsonObjectField(row, "properties_bag");
734
+ if (ruleId !== undefined)
735
+ node["ruleId"] = ruleId;
736
+ if (severity !== undefined)
737
+ node["severity"] = severity;
738
+ if (scannerId !== undefined)
739
+ node["scannerId"] = scannerId;
740
+ if (message !== undefined)
741
+ node["message"] = message;
742
+ // propertiesBag is REQUIRED on FindingNode; default to {} on lossy reads
743
+ // so the resulting object still structurally satisfies the union.
744
+ node["propertiesBag"] = propertiesBag ?? {};
745
+ const partialFingerprint = strField(row, "partial_fingerprint");
746
+ if (partialFingerprint !== undefined)
747
+ node["partialFingerprint"] = partialFingerprint;
748
+ const baselineState = strField(row, "baseline_state");
749
+ if (baselineState !== undefined)
750
+ node["baselineState"] = baselineState;
751
+ const suppressedJson = strField(row, "suppressed_json");
752
+ if (suppressedJson !== undefined)
753
+ node["suppressedJson"] = suppressedJson;
754
+ }
755
+ if (kind === "Dependency") {
756
+ const version = strField(row, "version");
757
+ const ecosystem = strField(row, "ecosystem");
758
+ const lockfileSource = strField(row, "lockfile_source");
759
+ const license = strField(row, "license");
760
+ // version / ecosystem / lockfileSource are REQUIRED on the type; default
761
+ // to safe values when NULL so the object still passes the structural
762
+ // union at runtime. The carry-forward path only hashes these fields.
763
+ node["version"] = version ?? "";
764
+ node["ecosystem"] = ecosystem ?? "npm";
765
+ node["lockfileSource"] = lockfileSource ?? "";
766
+ if (license !== undefined)
767
+ node["license"] = license;
768
+ }
769
+ if (kind === "Operation") {
770
+ const httpMethod = strField(row, "http_method");
771
+ const httpPath = strField(row, "http_path");
772
+ node["method"] = httpMethod ?? "GET";
773
+ node["path"] = httpPath ?? "/";
774
+ const summary = strField(row, "summary");
775
+ if (summary !== undefined)
776
+ node["summary"] = summary;
777
+ const operationId = strField(row, "operation_id");
778
+ if (operationId !== undefined)
779
+ node["operationId"] = operationId;
780
+ }
781
+ if (kind === "Contributor") {
782
+ const emailHash = strField(row, "email_hash");
783
+ node["emailHash"] = emailHash ?? "";
784
+ const emailPlain = strField(row, "email_plain");
785
+ if (emailPlain !== undefined)
786
+ node["emailPlain"] = emailPlain;
787
+ }
788
+ // ProjectProfile — JSON-encoded array columns plus a polymorphic
789
+ // `frameworks_json` (flat `string[]` OR `{ flat, detected }`).
790
+ if (kind === "ProjectProfile") {
791
+ node["languages"] = parseJsonStringArrayField(row, "languages_json") ?? [];
792
+ const frameworksRaw = strField(row, "frameworks_json");
793
+ let frameworksFlat = [];
794
+ if (frameworksRaw !== undefined) {
795
+ try {
796
+ const parsed = JSON.parse(frameworksRaw);
797
+ if (Array.isArray(parsed)) {
798
+ frameworksFlat = parsed.filter((x) => typeof x === "string");
799
+ }
800
+ else if (typeof parsed === "object" && parsed !== null) {
801
+ const rec = parsed;
802
+ const flat = rec["flat"];
803
+ if (Array.isArray(flat)) {
804
+ frameworksFlat = flat.filter((x) => typeof x === "string");
805
+ }
806
+ const detected = rec["detected"];
807
+ if (Array.isArray(detected))
808
+ node["frameworksDetected"] = detected;
809
+ }
810
+ }
811
+ catch {
812
+ /* ignore — leave frameworks as [] */
813
+ }
814
+ }
815
+ node["frameworks"] = frameworksFlat;
816
+ node["iacTypes"] = parseJsonStringArrayField(row, "iac_types_json") ?? [];
817
+ node["apiContracts"] = parseJsonStringArrayField(row, "api_contracts_json") ?? [];
818
+ node["manifests"] = parseJsonStringArrayField(row, "manifests_json") ?? [];
819
+ node["srcDirs"] = parseJsonStringArrayField(row, "src_dirs_json") ?? [];
820
+ }
821
+ // File ownership (H.5) + Community ownership (H.4) — shared across kinds.
822
+ const orphanGrade = strField(row, "orphan_grade");
823
+ if (orphanGrade !== undefined)
824
+ node["orphanGrade"] = orphanGrade;
825
+ const isOrphan = boolField(row, "is_orphan");
826
+ if (isOrphan !== undefined)
827
+ node["isOrphan"] = isOrphan;
828
+ const truckFactor = numField(row, "truck_factor");
829
+ if (truckFactor !== undefined)
830
+ node["truckFactor"] = truckFactor;
831
+ const od30 = numField(row, "ownership_drift_30d");
832
+ if (od30 !== undefined)
833
+ node["ownershipDrift30d"] = od30;
834
+ const od90 = numField(row, "ownership_drift_90d");
835
+ if (od90 !== undefined)
836
+ node["ownershipDrift90d"] = od90;
837
+ const od365 = numField(row, "ownership_drift_365d");
838
+ if (od365 !== undefined)
839
+ node["ownershipDrift365d"] = od365;
840
+ // v1.2 extensions
841
+ const deadness = strField(row, "deadness");
842
+ if (deadness !== undefined)
843
+ node["deadness"] = deadness;
844
+ const coveragePercent = numField(row, "coverage_percent");
845
+ if (coveragePercent !== undefined)
846
+ node["coveragePercent"] = coveragePercent;
847
+ const coveredLinesJson = strField(row, "covered_lines_json");
848
+ if (coveredLinesJson !== undefined)
849
+ node["coveredLinesJson"] = coveredLinesJson;
850
+ const cyclomaticComplexity = numField(row, "cyclomatic_complexity");
851
+ if (cyclomaticComplexity !== undefined)
852
+ node["cyclomaticComplexity"] = cyclomaticComplexity;
853
+ const nestingDepth = numField(row, "nesting_depth");
854
+ if (nestingDepth !== undefined)
855
+ node["nestingDepth"] = nestingDepth;
856
+ const nloc = numField(row, "nloc");
857
+ if (nloc !== undefined)
858
+ node["nloc"] = nloc;
859
+ const halsteadVolume = numField(row, "halstead_volume");
860
+ if (halsteadVolume !== undefined)
861
+ node["halsteadVolume"] = halsteadVolume;
862
+ return node;
863
+ }
864
+ /**
865
+ * Reverse of the relations row builder at
866
+ * `packages/storage/src/duckdb-adapter.ts:299-340`. Relations round-trip
867
+ * cleanly because their schema is 7 scalar columns with no polymorphism.
868
+ * Returns `undefined` when `type` is not a known {@link RelationType} or
869
+ * when required scalars are missing.
870
+ *
871
+ * Exported for tests; the production call site is {@link loadPreviousGraph}.
872
+ */
873
+ export function rowToCodeRelation(row) {
874
+ const id = row["id"];
875
+ const from = row["from_id"];
876
+ const to = row["to_id"];
877
+ const type = row["type"];
878
+ const confidence = row["confidence"];
879
+ if (typeof id !== "string" || id.length === 0)
880
+ return undefined;
881
+ if (typeof from !== "string" || from.length === 0)
882
+ return undefined;
883
+ if (typeof to !== "string" || to.length === 0)
884
+ return undefined;
885
+ if (typeof type !== "string" || !RELATION_TYPE_SET.has(type))
886
+ return undefined;
887
+ const conf = typeof confidence === "number" && Number.isFinite(confidence) ? confidence : Number(confidence);
888
+ if (!Number.isFinite(conf))
889
+ return undefined;
890
+ const reason = row["reason"];
891
+ const step = row["step"];
892
+ const base = {
893
+ id: id,
894
+ from: from,
895
+ to: to,
896
+ type: type,
897
+ confidence: conf,
898
+ };
899
+ const stepNum = typeof step === "number" && Number.isFinite(step)
900
+ ? step
901
+ : typeof step === "bigint"
902
+ ? Number(step)
903
+ : undefined;
904
+ const hasReason = typeof reason === "string" && reason.length > 0;
905
+ // Build the final record in a single statement so we match the optional-
906
+ // field discipline required by `exactOptionalPropertyTypes`.
907
+ if (hasReason && stepNum !== undefined) {
908
+ return { ...base, reason: reason, step: stepNum };
909
+ }
910
+ if (hasReason)
911
+ return { ...base, reason: reason };
912
+ if (stepNum !== undefined)
913
+ return { ...base, step: stepNum };
914
+ return base;
915
+ }
916
+ async function readScanState(repoPath) {
917
+ const stateFile = join(resolveRepoMetaDir(repoPath), "scan-state.json");
918
+ try {
919
+ const { readFile } = await import("node:fs/promises");
920
+ const raw = await readFile(stateFile, "utf8");
921
+ const parsed = JSON.parse(raw);
922
+ if (typeof parsed !== "object" ||
923
+ parsed === null ||
924
+ parsed.schemaVersion !== 1 ||
925
+ !Array.isArray(parsed.files)) {
926
+ return undefined;
927
+ }
928
+ return parsed;
929
+ }
930
+ catch {
931
+ return undefined;
932
+ }
933
+ }
934
+ async function writeScanState(repoPath, files) {
935
+ const target = join(resolveRepoMetaDir(repoPath), "scan-state.json");
936
+ const { writeFile, mkdir } = await import("node:fs/promises");
937
+ await mkdir(resolveRepoMetaDir(repoPath), { recursive: true });
938
+ // Sort by relPath for deterministic output — mirrors scan phase invariant.
939
+ const sortedFiles = [...files].sort((a, b) => a.relPath < b.relPath ? -1 : a.relPath > b.relPath ? 1 : 0);
940
+ const payload = { schemaVersion: 1, files: sortedFiles };
941
+ const tmp = `${target}.tmp-${process.pid}-${Date.now()}`;
942
+ await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
943
+ const { rename } = await import("node:fs/promises");
944
+ await rename(tmp, target);
945
+ }
946
+ export async function checkFastPath(repoName, repoPath, opts) {
947
+ const registryOpts = opts.home !== undefined ? { home: opts.home } : {};
948
+ const registry = await readRegistry(registryOpts);
949
+ const hit = registry[repoName];
950
+ if (!hit)
951
+ return undefined;
952
+ if (resolve(hit.path) !== repoPath)
953
+ return undefined;
954
+ // Without a recorded commit we cannot know whether the index is fresh.
955
+ if (hit.lastCommit === undefined)
956
+ return undefined;
957
+ // Uncommitted changes in the working tree mean the recorded `lastCommit`
958
+ // no longer reflects what's on disk — bypass the fast-path so analyze
959
+ // re-runs against the edited files. If git can't answer (non-git dir,
960
+ // git unavailable) `isWorkingTreeDirty` returns false and we fall
961
+ // through to the HEAD-based check below, matching `readGitHead`'s
962
+ // fallback posture.
963
+ const dirty = await isWorkingTreeDirty(repoPath);
964
+ if (dirty)
965
+ return undefined;
966
+ // Compare against the working tree's current HEAD so a `git pull`
967
+ // invalidates the fast-path. If git isn't available (non-git dir,
968
+ // shallow checkout without HEAD, etc.) fall back to treating the
969
+ // registry record as authoritative — the user can always --force.
970
+ const head = await readGitHead(repoPath);
971
+ if (head !== undefined && head !== hit.lastCommit)
972
+ return undefined;
973
+ return hit;
974
+ }
975
+ async function readGitHead(repoPath) {
976
+ return new Promise((resolveP) => {
977
+ let stdout = "";
978
+ let settled = false;
979
+ const child = spawn("git", ["rev-parse", "HEAD"], {
980
+ cwd: repoPath,
981
+ stdio: ["ignore", "pipe", "ignore"],
982
+ });
983
+ child.stdout.setEncoding("utf8");
984
+ child.stdout.on("data", (chunk) => {
985
+ stdout += chunk;
986
+ });
987
+ child.on("error", () => {
988
+ if (!settled) {
989
+ settled = true;
990
+ resolveP(undefined);
991
+ }
992
+ });
993
+ child.on("close", (code) => {
994
+ if (settled)
995
+ return;
996
+ settled = true;
997
+ if (code === 0) {
998
+ const trimmed = stdout.trim();
999
+ resolveP(trimmed.length > 0 ? trimmed : undefined);
1000
+ }
1001
+ else {
1002
+ resolveP(undefined);
1003
+ }
1004
+ });
1005
+ });
1006
+ }
1007
+ /**
1008
+ * Probe whether the working tree has uncommitted changes. Returns `true`
1009
+ * iff `git status --porcelain` exits 0 with non-empty stdout. Any spawn
1010
+ * error, non-zero exit, or git-unavailable case returns `false` so the
1011
+ * caller never blocks the fast-path on a git failure — mirroring
1012
+ * `readGitHead`'s "cannot determine" fallback.
1013
+ *
1014
+ * Exported so the CLI test suite can assert the fallback posture directly
1015
+ * without spawning a whole `runAnalyze` pipeline.
1016
+ */
1017
+ export async function isWorkingTreeDirty(repoPath) {
1018
+ return new Promise((resolveP) => {
1019
+ let stdout = "";
1020
+ let settled = false;
1021
+ const child = spawn("git", ["status", "--porcelain"], {
1022
+ cwd: repoPath,
1023
+ stdio: ["ignore", "pipe", "ignore"],
1024
+ });
1025
+ child.stdout.setEncoding("utf8");
1026
+ child.stdout.on("data", (chunk) => {
1027
+ stdout += chunk;
1028
+ });
1029
+ child.on("error", () => {
1030
+ if (!settled) {
1031
+ settled = true;
1032
+ resolveP(false);
1033
+ }
1034
+ });
1035
+ child.on("close", (code) => {
1036
+ if (settled)
1037
+ return;
1038
+ settled = true;
1039
+ if (code === 0) {
1040
+ resolveP(stdout.length > 0);
1041
+ }
1042
+ else {
1043
+ resolveP(false);
1044
+ }
1045
+ });
1046
+ });
1047
+ }
1048
+ /**
1049
+ * Emit pipeline warnings to stderr. By default, collapse high-cardinality
1050
+ * classes (e.g. dead-code ghost-community) into a single summary line so
1051
+ * a run doesn't drown the terminal with hundreds of near-identical lines.
1052
+ * Pass `verbose=true` to print every warning individually.
1053
+ */
1054
+ function logWarnings(warnings, verbose) {
1055
+ if (verbose) {
1056
+ for (const w of warnings)
1057
+ log(`codehub analyze: ${w}`);
1058
+ return;
1059
+ }
1060
+ // Group by `<phase>:` prefix. We count repeats of the same prefix and
1061
+ // print one summary + one sample so operators still see what's going on.
1062
+ const groups = new Map();
1063
+ const others = [];
1064
+ for (const w of warnings) {
1065
+ const colon = w.indexOf(":");
1066
+ if (colon === -1) {
1067
+ others.push(w);
1068
+ continue;
1069
+ }
1070
+ const prefix = w.slice(0, colon);
1071
+ const existing = groups.get(prefix);
1072
+ if (existing === undefined) {
1073
+ groups.set(prefix, { count: 1, sample: w });
1074
+ }
1075
+ else {
1076
+ existing.count += 1;
1077
+ }
1078
+ }
1079
+ for (const [prefix, { count, sample }] of groups) {
1080
+ if (count === 1) {
1081
+ log(`codehub analyze: ${sample}`);
1082
+ }
1083
+ else {
1084
+ log(`codehub analyze: ${prefix}: ${count} warnings (use --verbose to see all)`);
1085
+ log(`codehub analyze: e.g. ${sample}`);
1086
+ }
1087
+ }
1088
+ for (const w of others)
1089
+ log(`codehub analyze: ${w}`);
1090
+ }
1091
+ function log(message) {
1092
+ // Using console.warn keeps stdout reserved for machine-readable output from
1093
+ // subcommands like `sql` and `query --json`.
1094
+ console.warn(message);
1095
+ }
1096
+ //# sourceMappingURL=analyze.js.map