@opencodehub/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +85 -0
- package/dist/agent-context.d.ts +54 -0
- package/dist/agent-context.d.ts.map +1 -0
- package/dist/agent-context.js +122 -0
- package/dist/agent-context.js.map +1 -0
- package/dist/cobol-proleap-setup.d.ts +77 -0
- package/dist/cobol-proleap-setup.d.ts.map +1 -0
- package/dist/cobol-proleap-setup.js +289 -0
- package/dist/cobol-proleap-setup.js.map +1 -0
- package/dist/commands/analyze.d.ts +234 -0
- package/dist/commands/analyze.d.ts.map +1 -0
- package/dist/commands/analyze.js +1096 -0
- package/dist/commands/analyze.js.map +1 -0
- package/dist/commands/augment.d.ts +48 -0
- package/dist/commands/augment.d.ts.map +1 -0
- package/dist/commands/augment.js +249 -0
- package/dist/commands/augment.js.map +1 -0
- package/dist/commands/baseline.d.ts +68 -0
- package/dist/commands/baseline.d.ts.map +1 -0
- package/dist/commands/baseline.js +110 -0
- package/dist/commands/baseline.js.map +1 -0
- package/dist/commands/bench.d.ts +54 -0
- package/dist/commands/bench.d.ts.map +1 -0
- package/dist/commands/bench.js +283 -0
- package/dist/commands/bench.js.map +1 -0
- package/dist/commands/ci-init.d.ts +37 -0
- package/dist/commands/ci-init.d.ts.map +1 -0
- package/dist/commands/ci-init.js +115 -0
- package/dist/commands/ci-init.js.map +1 -0
- package/dist/commands/clean.d.ts +13 -0
- package/dist/commands/clean.d.ts.map +1 -0
- package/dist/commands/clean.js +38 -0
- package/dist/commands/clean.js.map +1 -0
- package/dist/commands/code-pack.d.ts +105 -0
- package/dist/commands/code-pack.d.ts.map +1 -0
- package/dist/commands/code-pack.js +187 -0
- package/dist/commands/code-pack.js.map +1 -0
- package/dist/commands/context.d.ts +30 -0
- package/dist/commands/context.d.ts.map +1 -0
- package/dist/commands/context.js +237 -0
- package/dist/commands/context.js.map +1 -0
- package/dist/commands/detect-changes.d.ts +26 -0
- package/dist/commands/detect-changes.d.ts.map +1 -0
- package/dist/commands/detect-changes.js +73 -0
- package/dist/commands/detect-changes.js.map +1 -0
- package/dist/commands/doctor.d.ts +52 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +472 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/find-enclosing-symbol.d.ts +67 -0
- package/dist/commands/find-enclosing-symbol.d.ts.map +1 -0
- package/dist/commands/find-enclosing-symbol.js +106 -0
- package/dist/commands/find-enclosing-symbol.js.map +1 -0
- package/dist/commands/group.d.ts +123 -0
- package/dist/commands/group.d.ts.map +1 -0
- package/dist/commands/group.js +448 -0
- package/dist/commands/group.js.map +1 -0
- package/dist/commands/impact.d.ts +23 -0
- package/dist/commands/impact.d.ts.map +1 -0
- package/dist/commands/impact.js +91 -0
- package/dist/commands/impact.js.map +1 -0
- package/dist/commands/index-repo.d.ts +39 -0
- package/dist/commands/index-repo.d.ts.map +1 -0
- package/dist/commands/index-repo.js +148 -0
- package/dist/commands/index-repo.js.map +1 -0
- package/dist/commands/ingest-sarif.d.ts +64 -0
- package/dist/commands/ingest-sarif.d.ts.map +1 -0
- package/dist/commands/ingest-sarif.js +381 -0
- package/dist/commands/ingest-sarif.js.map +1 -0
- package/dist/commands/init.d.ts +75 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +315 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/list.d.ts +17 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +79 -0
- package/dist/commands/list.js.map +1 -0
- package/dist/commands/mcp.d.ts +8 -0
- package/dist/commands/mcp.d.ts.map +1 -0
- package/dist/commands/mcp.js +28 -0
- package/dist/commands/mcp.js.map +1 -0
- package/dist/commands/open-store.d.ts +25 -0
- package/dist/commands/open-store.d.ts.map +1 -0
- package/dist/commands/open-store.js +47 -0
- package/dist/commands/open-store.js.map +1 -0
- package/dist/commands/pack.d.ts +35 -0
- package/dist/commands/pack.d.ts.map +1 -0
- package/dist/commands/pack.js +83 -0
- package/dist/commands/pack.js.map +1 -0
- package/dist/commands/query.d.ts +85 -0
- package/dist/commands/query.d.ts.map +1 -0
- package/dist/commands/query.js +309 -0
- package/dist/commands/query.js.map +1 -0
- package/dist/commands/scan.d.ts +81 -0
- package/dist/commands/scan.d.ts.map +1 -0
- package/dist/commands/scan.js +407 -0
- package/dist/commands/scan.js.map +1 -0
- package/dist/commands/setup.d.ts +178 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +370 -0
- package/dist/commands/setup.js.map +1 -0
- package/dist/commands/sql.d.ts +19 -0
- package/dist/commands/sql.d.ts.map +1 -0
- package/dist/commands/sql.js +51 -0
- package/dist/commands/sql.js.map +1 -0
- package/dist/commands/status.d.ts +13 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +66 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/verdict-render.d.ts +33 -0
- package/dist/commands/verdict-render.d.ts.map +1 -0
- package/dist/commands/verdict-render.js +123 -0
- package/dist/commands/verdict-render.js.map +1 -0
- package/dist/commands/verdict.d.ts +61 -0
- package/dist/commands/verdict.d.ts.map +1 -0
- package/dist/commands/verdict.js +146 -0
- package/dist/commands/verdict.js.map +1 -0
- package/dist/commands/wiki.d.ts +26 -0
- package/dist/commands/wiki.d.ts.map +1 -0
- package/dist/commands/wiki.js +74 -0
- package/dist/commands/wiki.js.map +1 -0
- package/dist/editors/claude-code.d.ts +23 -0
- package/dist/editors/claude-code.d.ts.map +1 -0
- package/dist/editors/claude-code.js +58 -0
- package/dist/editors/claude-code.js.map +1 -0
- package/dist/editors/codex.d.ts +22 -0
- package/dist/editors/codex.d.ts.map +1 -0
- package/dist/editors/codex.js +59 -0
- package/dist/editors/codex.js.map +1 -0
- package/dist/editors/cursor.d.ts +13 -0
- package/dist/editors/cursor.d.ts.map +1 -0
- package/dist/editors/cursor.js +21 -0
- package/dist/editors/cursor.js.map +1 -0
- package/dist/editors/index.d.ts +12 -0
- package/dist/editors/index.d.ts.map +1 -0
- package/dist/editors/index.js +11 -0
- package/dist/editors/index.js.map +1 -0
- package/dist/editors/opencode.d.ts +23 -0
- package/dist/editors/opencode.d.ts.map +1 -0
- package/dist/editors/opencode.js +61 -0
- package/dist/editors/opencode.js.map +1 -0
- package/dist/editors/types.d.ts +33 -0
- package/dist/editors/types.d.ts.map +1 -0
- package/dist/editors/types.js +19 -0
- package/dist/editors/types.js.map +1 -0
- package/dist/editors/windows-wrap.d.ts +19 -0
- package/dist/editors/windows-wrap.d.ts.map +1 -0
- package/dist/editors/windows-wrap.js +28 -0
- package/dist/editors/windows-wrap.js.map +1 -0
- package/dist/editors/windsurf.d.ts +12 -0
- package/dist/editors/windsurf.d.ts.map +1 -0
- package/dist/editors/windsurf.js +21 -0
- package/dist/editors/windsurf.js.map +1 -0
- package/dist/embedder-downloader.d.ts +87 -0
- package/dist/embedder-downloader.d.ts.map +1 -0
- package/dist/embedder-downloader.js +261 -0
- package/dist/embedder-downloader.js.map +1 -0
- package/dist/fs-atomic.d.ts +22 -0
- package/dist/fs-atomic.d.ts.map +1 -0
- package/dist/fs-atomic.js +28 -0
- package/dist/fs-atomic.js.map +1 -0
- package/dist/groups.d.ts +64 -0
- package/dist/groups.d.ts.map +1 -0
- package/dist/groups.js +172 -0
- package/dist/groups.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +703 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/is-indexed.d.ts +20 -0
- package/dist/lib/is-indexed.d.ts.map +1 -0
- package/dist/lib/is-indexed.js +35 -0
- package/dist/lib/is-indexed.js.map +1 -0
- package/dist/registry.d.ts +64 -0
- package/dist/registry.d.ts.map +1 -0
- package/dist/registry.js +145 -0
- package/dist/registry.js.map +1 -0
- package/dist/scip-downloader.d.ts +138 -0
- package/dist/scip-downloader.d.ts.map +1 -0
- package/dist/scip-downloader.js +372 -0
- package/dist/scip-downloader.js.map +1 -0
- package/dist/scip-pins.d.ts +99 -0
- package/dist/scip-pins.d.ts.map +1 -0
- package/dist/scip-pins.js +195 -0
- package/dist/scip-pins.js.map +1 -0
- package/dist/skills-gen.d.ts +47 -0
- package/dist/skills-gen.d.ts.map +1 -0
- package/dist/skills-gen.js +292 -0
- package/dist/skills-gen.js.map +1 -0
- package/package.json +81 -0
|
@@ -0,0 +1,1096 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `codehub analyze [path]` — index a repository.
|
|
3
|
+
*
|
|
4
|
+
* Flow:
|
|
5
|
+
* 1. Resolve `repoPath` (default `process.cwd()`).
|
|
6
|
+
* 2. Read the registry. If `!force` and the recorded `lastCommit` matches
|
|
7
|
+
* the pipeline's fresh commit, emit an "up to date" message and return
|
|
8
|
+
* without doing work.
|
|
9
|
+
* 3. Otherwise run `runIngestion(repoPath, {...})`, then open a writable
|
|
10
|
+
* `Store` (composed graph + temporal) via `openStore`, then
|
|
11
|
+
* `createSchema()`, `bulkLoad()`, and `setMeta()`.
|
|
12
|
+
* 4. Update the registry and, unless suppressed, stamp AGENTS.md + CLAUDE.md.
|
|
13
|
+
* 5. Print a one-line summary.
|
|
14
|
+
*
|
|
15
|
+
* The `--offline` flag is a hard promise: the ingestion pipeline never opens
|
|
16
|
+
* a network socket, and embeddings are a no-op for MVP. We log the promise so
|
|
17
|
+
* reviewers can audit call sites.
|
|
18
|
+
*/
|
|
19
|
+
import { spawn } from "node:child_process";
|
|
20
|
+
import { mkdir } from "node:fs/promises";
|
|
21
|
+
import { basename, join, resolve } from "node:path";
|
|
22
|
+
import { NODE_KINDS, RELATION_TYPES, SCHEMA_VERSION, } from "@opencodehub/core-types";
|
|
23
|
+
import { pipeline } from "@opencodehub/ingestion";
|
|
24
|
+
import { openStore, resolveDbPath, resolveRepoMetaDir, writeStoreMeta, } from "@opencodehub/storage";
|
|
25
|
+
import { writeAgentContextFiles } from "../agent-context.js";
|
|
26
|
+
import { readRegistry, upsertRegistry } from "../registry.js";
|
|
27
|
+
import { generateSkills } from "../skills-gen.js";
|
|
28
|
+
export async function runAnalyze(path, opts = {}) {
|
|
29
|
+
const started = Date.now();
|
|
30
|
+
const repoPath = resolve(path);
|
|
31
|
+
const repoName = basename(repoPath);
|
|
32
|
+
if (opts.offline) {
|
|
33
|
+
log("codehub analyze: offline mode (no network calls will be made)");
|
|
34
|
+
}
|
|
35
|
+
if (opts.embeddings) {
|
|
36
|
+
log("codehub analyze: --embeddings enabled " +
|
|
37
|
+
"(requires `codehub setup --embeddings` to have installed weights)");
|
|
38
|
+
}
|
|
39
|
+
// Fast path: if the registry knows about this repo and the commit hasn't
|
|
40
|
+
// moved, short-circuit without re-ingesting.
|
|
41
|
+
if (!opts.force) {
|
|
42
|
+
const fastPath = await checkFastPath(repoName, repoPath, opts);
|
|
43
|
+
if (fastPath !== undefined) {
|
|
44
|
+
log(`codehub analyze: ${repoName} already up to date at ${fastPath.lastCommit ?? "unknown"} ` +
|
|
45
|
+
`(${fastPath.nodeCount} nodes, ${fastPath.edgeCount} edges)`);
|
|
46
|
+
return {
|
|
47
|
+
repoPath,
|
|
48
|
+
repoName,
|
|
49
|
+
nodeCount: fastPath.nodeCount,
|
|
50
|
+
edgeCount: fastPath.edgeCount,
|
|
51
|
+
graphHash: "",
|
|
52
|
+
durationMs: Date.now() - started,
|
|
53
|
+
upToDate: true,
|
|
54
|
+
warnings: [],
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Load a prior graph projection for the incremental-scope phase when the
|
|
59
|
+
// CLI was not invoked with --force. The projection is a thin wrapper
|
|
60
|
+
// around the prior DuckDB index (File nodes + IMPORTS / EXTENDS /
|
|
61
|
+
// IMPLEMENTS edges). `loadPreviousGraph` silently returns undefined if
|
|
62
|
+
// the store does not exist or cannot be opened; incremental-scope then
|
|
63
|
+
// reports mode="full" with reason="no-prior-graph".
|
|
64
|
+
const incrementalFrom = opts.force === true ? undefined : await loadPreviousGraph(repoPath);
|
|
65
|
+
// Resolve the effective `summaries` flag. P04 flipped the default ON, so
|
|
66
|
+
// `undefined` now means "on". The `CODEHUB_BEDROCK_DISABLED=1` env kill-
|
|
67
|
+
// switch forces off regardless of the flag; `offline` is enforced later
|
|
68
|
+
// inside the phase itself (the phase's own invariant).
|
|
69
|
+
const summariesEnabled = resolveSummariesEnabled(opts.summaries, process.env);
|
|
70
|
+
// Open a read-only store upfront so the `summarize` phase can probe the
|
|
71
|
+
// prior summary rows before work is queued AND so we can inspect the
|
|
72
|
+
// prior run's `storeMeta.stats` to resolve `--max-summaries auto`. We
|
|
73
|
+
// keep the handle open for the duration of `runIngestion` and close it
|
|
74
|
+
// in a finally block. `summaries` must be enabled for the adapter to
|
|
75
|
+
// matter; skip the cost of a read-only open when the flag is off.
|
|
76
|
+
const summaryCacheAdapter = summariesEnabled
|
|
77
|
+
? await openSummaryCacheAdapter(repoPath)
|
|
78
|
+
: undefined;
|
|
79
|
+
// Mirror the same pattern for the embeddings phase's content-hash skip.
|
|
80
|
+
// Only open when `--embeddings` is on AND `--force` is off — force
|
|
81
|
+
// re-embeds everything, so the adapter would do no useful work. When the
|
|
82
|
+
// prior DB is absent the adapter returns undefined and the phase
|
|
83
|
+
// degrades to "every chunk is new".
|
|
84
|
+
const embeddingHashAdapter = opts.embeddings === true && opts.force !== true
|
|
85
|
+
? await openEmbeddingHashCacheAdapter(repoPath)
|
|
86
|
+
: undefined;
|
|
87
|
+
// Resolve `--max-summaries auto` against the prior run's callable count,
|
|
88
|
+
// if any. `auto` bounds the cap at 10% of the SCIP-confirmed callable
|
|
89
|
+
// symbols (capped at 500); on a cold first run the prior meta is absent
|
|
90
|
+
// and we fall back to a conservative 50. `0` and positive integers pass
|
|
91
|
+
// through unchanged. Unknown inputs (string without the "auto" literal)
|
|
92
|
+
// are treated as "auto" for forward compatibility.
|
|
93
|
+
const resolvedMaxSummaries = await resolveMaxSummariesCap(repoPath, opts.maxSummariesPerRun, summariesEnabled);
|
|
94
|
+
const pipelineOptions = {
|
|
95
|
+
...(opts.force !== undefined ? { force: opts.force } : {}),
|
|
96
|
+
...(opts.offline !== undefined ? { offline: opts.offline } : {}),
|
|
97
|
+
...(opts.verbose !== undefined ? { verbose: opts.verbose } : {}),
|
|
98
|
+
...(opts.embeddings !== undefined ? { embeddings: opts.embeddings } : {}),
|
|
99
|
+
...(opts.embeddingsVariant !== undefined ? { embeddingsVariant: opts.embeddingsVariant } : {}),
|
|
100
|
+
...(opts.embeddingsModelDir !== undefined
|
|
101
|
+
? { embeddingsModelDir: opts.embeddingsModelDir }
|
|
102
|
+
: {}),
|
|
103
|
+
...(opts.embeddingsGranularity !== undefined
|
|
104
|
+
? { embeddingsGranularity: opts.embeddingsGranularity }
|
|
105
|
+
: {}),
|
|
106
|
+
...(opts.embeddingsWorkers !== undefined ? { embeddingsWorkers: opts.embeddingsWorkers } : {}),
|
|
107
|
+
...(opts.embeddingsBatchSize !== undefined
|
|
108
|
+
? { embeddingsBatchSize: opts.embeddingsBatchSize }
|
|
109
|
+
: {}),
|
|
110
|
+
...(opts.sbom !== undefined ? { sbom: opts.sbom } : {}),
|
|
111
|
+
...(opts.coverage !== undefined ? { coverage: opts.coverage } : {}),
|
|
112
|
+
summaries: summariesEnabled,
|
|
113
|
+
maxSummariesPerRun: resolvedMaxSummaries,
|
|
114
|
+
...(opts.summaryModel !== undefined ? { summaryModel: opts.summaryModel } : {}),
|
|
115
|
+
...(opts.strictDetectors !== undefined ? { strictDetectors: opts.strictDetectors } : {}),
|
|
116
|
+
...(summaryCacheAdapter !== undefined
|
|
117
|
+
? { summaryCacheAdapter: summaryCacheAdapter.adapter }
|
|
118
|
+
: {}),
|
|
119
|
+
...(embeddingHashAdapter !== undefined
|
|
120
|
+
? { embeddingHashCacheAdapter: embeddingHashAdapter.adapter }
|
|
121
|
+
: {}),
|
|
122
|
+
...(incrementalFrom !== undefined ? { incrementalFrom } : {}),
|
|
123
|
+
};
|
|
124
|
+
let result;
|
|
125
|
+
try {
|
|
126
|
+
result = await pipeline.runIngestion(repoPath, pipelineOptions);
|
|
127
|
+
}
|
|
128
|
+
finally {
|
|
129
|
+
await summaryCacheAdapter?.close();
|
|
130
|
+
await embeddingHashAdapter?.close();
|
|
131
|
+
}
|
|
132
|
+
logWarnings(result.warnings, opts.verbose === true);
|
|
133
|
+
// Persist to the composed graph + temporal store. Backend resolution is
|
|
134
|
+
// env-driven (`CODEHUB_STORE`); the default `"duck"` writes to
|
|
135
|
+
// `<repo>/.codehub/graph.duckdb` exactly like the legacy path. The
|
|
136
|
+
// temporal-tier writes (`bulkLoadCochanges`, `bulkLoadSymbolSummaries`)
|
|
137
|
+
// route through `store.temporal`.
|
|
138
|
+
await mkdir(resolveRepoMetaDir(repoPath), { recursive: true });
|
|
139
|
+
const dbPath = resolveDbPath(repoPath);
|
|
140
|
+
const store = await openStore({ path: dbPath, backend: "auto" });
|
|
141
|
+
try {
|
|
142
|
+
await store.graph.open();
|
|
143
|
+
if (store.graphFile !== store.temporalFile)
|
|
144
|
+
await store.temporal.open();
|
|
145
|
+
await store.graph.createSchema();
|
|
146
|
+
if (store.graphFile !== store.temporalFile)
|
|
147
|
+
await store.temporal.createSchema();
|
|
148
|
+
await store.graph.bulkLoad(result.graph);
|
|
149
|
+
// Persist cochange rows to the dedicated `cochanges` table. `bulkLoad` in
|
|
150
|
+
// replace mode already truncated it, but `bulkLoadCochanges` does its own
|
|
151
|
+
// DELETE inside the same transaction so the call is idempotent even on
|
|
152
|
+
// upsert paths that keep the prior graph. Empty row sets collapse into a
|
|
153
|
+
// cheap DELETE.
|
|
154
|
+
if (result.cochange !== undefined) {
|
|
155
|
+
await store.temporal.bulkLoadCochanges(result.cochange.rows);
|
|
156
|
+
}
|
|
157
|
+
// Persist freshly produced summary rows. The phase returns an empty
|
|
158
|
+
// `rows` array in the common gated-off / dry-run case so this is a
|
|
159
|
+
// cheap no-op. A non-empty payload means the operator explicitly ran
|
|
160
|
+
// with `--summaries --max-summaries > 0` and accepted the Bedrock
|
|
161
|
+
// cost; we persist under the temporal-tier surface.
|
|
162
|
+
if (result.summarize !== undefined && result.summarize.rows.length > 0) {
|
|
163
|
+
await store.temporal.bulkLoadSymbolSummaries(result.summarize.rows);
|
|
164
|
+
log(`codehub analyze: persisted ${result.summarize.rows.length} symbol summaries ` +
|
|
165
|
+
`(promptVersion=${result.summarize.promptVersion})`);
|
|
166
|
+
}
|
|
167
|
+
// Surface the summarize-phase counters whenever the flag was enabled —
|
|
168
|
+
// even in dry-run (maxSummaries=0) mode — so operators can inspect how
|
|
169
|
+
// many symbols WOULD have been summarized before unlocking Bedrock.
|
|
170
|
+
if (summariesEnabled && result.summarize !== undefined) {
|
|
171
|
+
const s = result.summarize;
|
|
172
|
+
log(`codehub analyze: summarize — considered=${s.considered}, ` +
|
|
173
|
+
`skippedUnconfirmed=${s.skippedUnconfirmed}, cacheHits=${s.cacheHits}, ` +
|
|
174
|
+
`summarized=${s.summarized}, wouldHaveSummarized=${s.wouldHaveSummarized}, ` +
|
|
175
|
+
`failed=${s.failed} [promptVersion=${s.promptVersion}]`);
|
|
176
|
+
}
|
|
177
|
+
// Persist embeddings emitted by the `embeddings` phase (if any). The
|
|
178
|
+
// phase returns an empty `rows` array when `opts.embeddings` is false
|
|
179
|
+
// or when weights are missing, so this call is a cheap no-op in the
|
|
180
|
+
// common case. We upsert AFTER bulkLoad so the replace-mode wipe
|
|
181
|
+
// doesn't drop freshly-written embeddings.
|
|
182
|
+
if (result.embeddings !== undefined && result.embeddings.rows.length > 0) {
|
|
183
|
+
await store.graph.upsertEmbeddings(result.embeddings.rows);
|
|
184
|
+
log(`codehub analyze: upserted ${result.embeddings.rows.length} embeddings ` +
|
|
185
|
+
`(${result.embeddings.embeddingsModelId})`);
|
|
186
|
+
}
|
|
187
|
+
const indexedAt = new Date().toISOString();
|
|
188
|
+
// Numeric provenance stats, if any. embeddingsHash is a string and is
|
|
189
|
+
// persisted to the sidecar file instead of StoreMeta.stats (which is
|
|
190
|
+
// Record<string, number>).
|
|
191
|
+
const byKindStats = result.stats.byKind !== undefined ? { ...result.stats.byKind } : {};
|
|
192
|
+
if (result.embeddings?.ranEmbedder) {
|
|
193
|
+
byKindStats["embeddingsCount"] = result.embeddings.embeddingsInserted;
|
|
194
|
+
}
|
|
195
|
+
// Cache-health stats: the parse-cache hit ratio and on-disk size are
|
|
196
|
+
// surfaced to `codehub doctor` and `codehub status` via the meta
|
|
197
|
+
// sidecar. Missing ratio (no parse phase) → omit the field so pre-1.1
|
|
198
|
+
// meta.json snapshots keep round-tripping byte-identically.
|
|
199
|
+
const parseCache = result.stats.parseCache;
|
|
200
|
+
const cacheDir = join(repoPath, ".codehub", "parse-cache");
|
|
201
|
+
const cacheSize = await pipeline.computeCacheSize(cacheDir);
|
|
202
|
+
const storeMeta = {
|
|
203
|
+
schemaVersion: SCHEMA_VERSION,
|
|
204
|
+
indexedAt,
|
|
205
|
+
nodeCount: result.graph.nodeCount(),
|
|
206
|
+
edgeCount: result.graph.edgeCount(),
|
|
207
|
+
...(result.stats.currentCommit !== undefined
|
|
208
|
+
? { lastCommit: result.stats.currentCommit }
|
|
209
|
+
: {}),
|
|
210
|
+
stats: byKindStats,
|
|
211
|
+
...(parseCache !== undefined ? { cacheHitRatio: parseCache.ratio } : {}),
|
|
212
|
+
cacheSizeBytes: cacheSize.bytes,
|
|
213
|
+
};
|
|
214
|
+
await store.graph.setMeta(storeMeta);
|
|
215
|
+
await writeStoreMeta(repoPath, storeMeta);
|
|
216
|
+
// Persist the scan-state sidecar so the next analyze invocation can feed
|
|
217
|
+
// the incremental-scope phase via loadPreviousGraph(). We write this
|
|
218
|
+
// alongside the DuckDB file under `<repo>/.codehub` so a clean of the
|
|
219
|
+
// meta dir invalidates both the index and the incremental state together.
|
|
220
|
+
if (result.scan !== undefined) {
|
|
221
|
+
await writeScanState(repoPath, result.scan.files.map((f) => ({ relPath: f.relPath, contentSha: f.sha256 })));
|
|
222
|
+
}
|
|
223
|
+
// Opt-in skill generation. Walk Community nodes just persisted above and
|
|
224
|
+
// emit one SKILL.md per cluster under `<repo>/.codehub/skills/`. Runs
|
|
225
|
+
// against the still-open DuckDB handle so there's no re-open cost, and
|
|
226
|
+
// any per-skill failure (read-only dir, permission denied, disk full)
|
|
227
|
+
// logs-and-continues — analyze never aborts because of a skill write.
|
|
228
|
+
if (opts.skills === true) {
|
|
229
|
+
try {
|
|
230
|
+
const emitted = await generateSkills(store.graph, repoPath, { log });
|
|
231
|
+
log(`codehub analyze: generated ${emitted} SKILL.md ${emitted === 1 ? "file" : "files"}`);
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
log(`codehub analyze: skill generation failed: ${err.message}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
finally {
|
|
239
|
+
await store.close();
|
|
240
|
+
}
|
|
241
|
+
const entry = {
|
|
242
|
+
name: repoName,
|
|
243
|
+
path: repoPath,
|
|
244
|
+
indexedAt: new Date().toISOString(),
|
|
245
|
+
nodeCount: result.graph.nodeCount(),
|
|
246
|
+
edgeCount: result.graph.edgeCount(),
|
|
247
|
+
...(result.stats.currentCommit !== undefined ? { lastCommit: result.stats.currentCommit } : {}),
|
|
248
|
+
};
|
|
249
|
+
const registryOpts = opts.home !== undefined ? { home: opts.home } : {};
|
|
250
|
+
await upsertRegistry(entry, registryOpts);
|
|
251
|
+
if (!opts.skipAgentsMd) {
|
|
252
|
+
try {
|
|
253
|
+
await writeAgentContextFiles(repoPath);
|
|
254
|
+
}
|
|
255
|
+
catch (err) {
|
|
256
|
+
log(`codehub analyze: failed to write AGENTS.md stanza: ${err.message}`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
const durationMs = Date.now() - started;
|
|
260
|
+
// Surface incremental-scope + cache-hit stats on a single operational line
|
|
261
|
+
// so operators spot regressions without digging into meta.json.
|
|
262
|
+
const incrementalLine = result.incrementalScope !== undefined
|
|
263
|
+
? ` [scope=${result.incrementalScope.mode}${result.incrementalScope.fullReindexBecause !== undefined
|
|
264
|
+
? `:${result.incrementalScope.fullReindexBecause}`
|
|
265
|
+
: ""}, closure=${result.incrementalScope.closureFiles.length}/${result.incrementalScope.totalFiles}]`
|
|
266
|
+
: "";
|
|
267
|
+
const cacheLine = result.stats.parseCache !== undefined
|
|
268
|
+
? ` [cache=${(result.stats.parseCache.ratio * 100).toFixed(0)}% (${result.stats.parseCache.hits}/${result.stats.parseCache.hits + result.stats.parseCache.misses})]`
|
|
269
|
+
: "";
|
|
270
|
+
log(`codehub analyze: ${repoName} — ${entry.nodeCount} nodes, ${entry.edgeCount} edges, ` +
|
|
271
|
+
`graph ${result.graphHash.slice(0, 8)}, ${durationMs} ms${incrementalLine}${cacheLine}`);
|
|
272
|
+
return {
|
|
273
|
+
repoPath,
|
|
274
|
+
repoName,
|
|
275
|
+
nodeCount: entry.nodeCount,
|
|
276
|
+
edgeCount: entry.edgeCount,
|
|
277
|
+
graphHash: result.graphHash,
|
|
278
|
+
durationMs,
|
|
279
|
+
upToDate: false,
|
|
280
|
+
warnings: result.warnings,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Build the {@link pipeline.PreviousGraph} projection expected by the
|
|
285
|
+
* incremental-scope phase from the prior DuckDB index + scan-state sidecar.
|
|
286
|
+
*
|
|
287
|
+
* The projection carries:
|
|
288
|
+
* - file paths + scan-time content hashes, read from
|
|
289
|
+
* `.codehub/scan-state.json` (written at the tail of the prior run),
|
|
290
|
+
* - IMPORTS + EXTENDS + IMPLEMENTS edges recovered from the `relations`
|
|
291
|
+
* table by stripping each endpoint id back to its enclosing file path,
|
|
292
|
+
* - the FULL prior node and edge snapshot, mapped back into
|
|
293
|
+
* {@link GraphNode} / {@link CodeRelation} via {@link rowToGraphNode}
|
|
294
|
+
* and {@link rowToCodeRelation}. Shipping these two arrays is what
|
|
295
|
+
* flips `resolveIncrementalView`
|
|
296
|
+
* (`packages/ingestion/src/pipeline/phases/incremental-helper.ts:95-102`)
|
|
297
|
+
* from `active=false` (passive mode) to `active=true`, so the four
|
|
298
|
+
* incremental consumer phases can carry forward non-closure work and
|
|
299
|
+
* reproduce a byte-identical graph hash vs a full re-index.
|
|
300
|
+
*
|
|
301
|
+
* Returns `undefined` when the store is missing, unreadable, or empty —
|
|
302
|
+
* any of which downgrades incremental mode to a clean full reindex in the
|
|
303
|
+
* phase without surfacing an error.
|
|
304
|
+
*/
|
|
305
|
+
export async function loadPreviousGraph(repoPath) {
|
|
306
|
+
const scanState = await readScanState(repoPath);
|
|
307
|
+
if (scanState === undefined)
|
|
308
|
+
return undefined;
|
|
309
|
+
const dbPath = resolveDbPath(repoPath);
|
|
310
|
+
const store = await openStore({ path: dbPath, backend: "auto" }).catch(() => undefined);
|
|
311
|
+
if (store === undefined)
|
|
312
|
+
return undefined;
|
|
313
|
+
try {
|
|
314
|
+
await store.graph.open();
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
await store.close().catch(() => { });
|
|
318
|
+
return undefined;
|
|
319
|
+
}
|
|
320
|
+
try {
|
|
321
|
+
// Full node + edge dumps via typed finders. For a typical OCH repo
|
|
322
|
+
// this is 10K-50K nodes and 20K-100K edges — fits in memory in one
|
|
323
|
+
// shot. The `listNodes` / `listEdges` finders already return
|
|
324
|
+
// rehydrated `GraphNode` / `CodeRelation` objects, so the legacy
|
|
325
|
+
// `rowToGraphNode` / `rowToCodeRelation` adapters are no longer
|
|
326
|
+
// needed on this read path — they remain exported for external
|
|
327
|
+
// consumers that hand-roll over the wide-column shape.
|
|
328
|
+
const nodes = [...(await store.graph.listNodes())];
|
|
329
|
+
const edges = [...(await store.graph.listEdges())];
|
|
330
|
+
// Derive the legacy file-granular projections from the full edge set so
|
|
331
|
+
// we issue one fewer round-trip to the store. The incremental-scope
|
|
332
|
+
// phase still reads these as the closure-walk seed — the node/edge
|
|
333
|
+
// arrays above are the carry-forward snapshot that flips the four
|
|
334
|
+
// consumer phases into active mode.
|
|
335
|
+
const importEdges = [];
|
|
336
|
+
const heritageEdges = [];
|
|
337
|
+
for (const edge of edges) {
|
|
338
|
+
if (edge.type !== "IMPORTS" && edge.type !== "EXTENDS" && edge.type !== "IMPLEMENTS") {
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
const fromPath = fileFromNodeId(edge.from);
|
|
342
|
+
const toPath = fileFromNodeId(edge.to);
|
|
343
|
+
if (fromPath === undefined || toPath === undefined)
|
|
344
|
+
continue;
|
|
345
|
+
if (edge.type === "IMPORTS") {
|
|
346
|
+
importEdges.push({ importer: fromPath, target: toPath });
|
|
347
|
+
}
|
|
348
|
+
else {
|
|
349
|
+
heritageEdges.push({ childFile: fromPath, parentFile: toPath });
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return { files: scanState.files, importEdges, heritageEdges, nodes, edges };
|
|
353
|
+
}
|
|
354
|
+
catch {
|
|
355
|
+
return undefined;
|
|
356
|
+
}
|
|
357
|
+
finally {
|
|
358
|
+
await store.close();
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Resolve the effective `summaries` flag, honoring the
|
|
363
|
+
* `CODEHUB_BEDROCK_DISABLED=1` env kill-switch and the P04 default-on
|
|
364
|
+
* contract (absent flag → enabled).
|
|
365
|
+
*
|
|
366
|
+
* Truth table (post-P04):
|
|
367
|
+
* - env var set + flag undefined → false (kill-switch wins)
|
|
368
|
+
* - env var set + flag true → false (kill-switch wins)
|
|
369
|
+
* - env var set + flag false → false
|
|
370
|
+
* - env var unset + flag undefined → true (default on)
|
|
371
|
+
* - env var unset + flag true → true
|
|
372
|
+
* - env var unset + flag false → false (explicit --no-summaries)
|
|
373
|
+
*
|
|
374
|
+
* Exported for unit tests; the production call site reads `process.env`.
|
|
375
|
+
*/
|
|
376
|
+
export function resolveSummariesEnabled(flag, env) {
|
|
377
|
+
if (env["CODEHUB_BEDROCK_DISABLED"] === "1")
|
|
378
|
+
return false;
|
|
379
|
+
return flag !== false;
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Resolve `--max-summaries auto` / explicit numeric caps into a concrete
|
|
383
|
+
* numeric budget the pipeline can consume.
|
|
384
|
+
*
|
|
385
|
+
* Pre-run heuristic (P04): `auto` bounds the cap at
|
|
386
|
+
* `min(floor(scipConfirmedCallableCount × 0.1), 500)`. We cannot cheaply
|
|
387
|
+
* compute that before the pipeline runs (LSP phases haven't yielded
|
|
388
|
+
* yet), so we use the prior run's stored counts when available:
|
|
389
|
+
*
|
|
390
|
+
* - If a DuckDB store is readable at the expected path, count nodes
|
|
391
|
+
* whose kind is Function/Method/Class. That count is the best proxy
|
|
392
|
+
* for "SCIP-confirmed callables" we can get before the parse phase.
|
|
393
|
+
* - If no prior store exists (fresh clone, first analyze), fall back
|
|
394
|
+
* to a conservative first-run cap of 50. The next invocation has
|
|
395
|
+
* the prior counts and can resolve `auto` accurately.
|
|
396
|
+
*
|
|
397
|
+
* Explicit numeric caps pass through unchanged; negative values clamp to
|
|
398
|
+
* 0 (dry-run). When summaries are disabled we short-circuit to 0 so the
|
|
399
|
+
* phase's cost-cap branch is hit regardless.
|
|
400
|
+
*
|
|
401
|
+
* Exported for unit tests; the production call site passes
|
|
402
|
+
* `countPriorCallableSymbols` for the seed lookup.
|
|
403
|
+
*/
|
|
404
|
+
export async function resolveMaxSummariesCap(repoPath, raw, summariesEnabled, seedLookup = countPriorCallableSymbols) {
|
|
405
|
+
if (!summariesEnabled)
|
|
406
|
+
return 0;
|
|
407
|
+
if (typeof raw === "number" && Number.isFinite(raw)) {
|
|
408
|
+
return Math.max(0, Math.floor(raw));
|
|
409
|
+
}
|
|
410
|
+
// Default or explicit "auto" — consult prior graph counts.
|
|
411
|
+
const seed = await seedLookup(repoPath);
|
|
412
|
+
if (seed === undefined) {
|
|
413
|
+
// First run: give Bedrock a bounded foothold so the operator sees
|
|
414
|
+
// the feature light up without the phase sitting idle in dry-run.
|
|
415
|
+
return 50;
|
|
416
|
+
}
|
|
417
|
+
return Math.min(Math.floor(seed * 0.1), 500);
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Count callable symbols (Function / Method / Class) recorded by the
|
|
421
|
+
* prior run. Returns `undefined` when no prior DuckDB index exists or
|
|
422
|
+
* the count query fails — callers treat that as "no prior run" and fall
|
|
423
|
+
* back to the first-run heuristic.
|
|
424
|
+
*/
|
|
425
|
+
async function countPriorCallableSymbols(repoPath) {
|
|
426
|
+
const dbPath = resolveDbPath(repoPath);
|
|
427
|
+
const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
|
|
428
|
+
if (store === undefined)
|
|
429
|
+
return undefined;
|
|
430
|
+
try {
|
|
431
|
+
await store.graph.open();
|
|
432
|
+
}
|
|
433
|
+
catch {
|
|
434
|
+
await store.close().catch(() => { });
|
|
435
|
+
return undefined;
|
|
436
|
+
}
|
|
437
|
+
try {
|
|
438
|
+
// `countNodesByKind` is the typed equivalent of `SELECT COUNT(*)
|
|
439
|
+
// GROUP BY kind`. We sum the three callable kinds in TS so cli stays
|
|
440
|
+
// off the raw-SQL surface.
|
|
441
|
+
const counts = await store.graph.countNodesByKind(["Function", "Method", "Class"]);
|
|
442
|
+
let n = 0;
|
|
443
|
+
for (const c of counts.values())
|
|
444
|
+
n += c;
|
|
445
|
+
return Number.isFinite(n) && n >= 0 ? n : undefined;
|
|
446
|
+
}
|
|
447
|
+
catch {
|
|
448
|
+
return undefined;
|
|
449
|
+
}
|
|
450
|
+
finally {
|
|
451
|
+
await store.close();
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Open a read-only DuckDB store scoped to the `symbol_summaries` cache
|
|
456
|
+
* probe. The returned object carries a cache adapter the `summarize`
|
|
457
|
+
* phase uses to short-circuit candidates whose content hash already has
|
|
458
|
+
* a row on disk, plus a `close()` the caller invokes to release the
|
|
459
|
+
* native handle. Returns `undefined` when the store cannot be opened —
|
|
460
|
+
* the phase degrades gracefully to "every candidate is a miss".
|
|
461
|
+
*/
|
|
462
|
+
async function openSummaryCacheAdapter(repoPath) {
|
|
463
|
+
const dbPath = resolveDbPath(repoPath);
|
|
464
|
+
const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
|
|
465
|
+
if (store === undefined)
|
|
466
|
+
return undefined;
|
|
467
|
+
try {
|
|
468
|
+
// The summary cache lives on the temporal tier. Open both views so
|
|
469
|
+
// the close() symmetry holds; on the duck backend the second open
|
|
470
|
+
// is a no-op against the same connection.
|
|
471
|
+
await store.graph.open();
|
|
472
|
+
if (store.graphFile !== store.temporalFile)
|
|
473
|
+
await store.temporal.open();
|
|
474
|
+
}
|
|
475
|
+
catch {
|
|
476
|
+
await store.close().catch(() => { });
|
|
477
|
+
return undefined;
|
|
478
|
+
}
|
|
479
|
+
return {
|
|
480
|
+
adapter: {
|
|
481
|
+
lookup: async (nodeId, contentHash, promptVersion) => store.temporal.lookupSymbolSummary(nodeId, contentHash, promptVersion),
|
|
482
|
+
},
|
|
483
|
+
close: async () => {
|
|
484
|
+
await store.close();
|
|
485
|
+
},
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Open a read-only DuckDB store scoped to the `embeddings` content-hash
|
|
490
|
+
* probe. The returned adapter's `list()` loads every prior
|
|
491
|
+
* `(granularity, nodeId, chunkIndex) → content_hash` row in a single
|
|
492
|
+
* round-trip so the embeddings phase can skip chunks whose source text is
|
|
493
|
+
* unchanged across runs. Returns `undefined` when the store cannot be
|
|
494
|
+
* opened (e.g. the first analyze on a fresh repo) — the phase then
|
|
495
|
+
* degrades to "every chunk is new", which is correct just slower.
|
|
496
|
+
*/
|
|
497
|
+
async function openEmbeddingHashCacheAdapter(repoPath) {
|
|
498
|
+
const dbPath = resolveDbPath(repoPath);
|
|
499
|
+
const store = await openStore({ path: dbPath, backend: "auto", readOnly: true }).catch(() => undefined);
|
|
500
|
+
if (store === undefined)
|
|
501
|
+
return undefined;
|
|
502
|
+
try {
|
|
503
|
+
await store.graph.open();
|
|
504
|
+
}
|
|
505
|
+
catch {
|
|
506
|
+
await store.close().catch(() => { });
|
|
507
|
+
return undefined;
|
|
508
|
+
}
|
|
509
|
+
return {
|
|
510
|
+
adapter: {
|
|
511
|
+
// listEmbeddingHashes is on the graph-tier interface — embeddings
|
|
512
|
+
// travel with the graph view, not the temporal cochange table.
|
|
513
|
+
list: async () => store.graph.listEmbeddingHashes(),
|
|
514
|
+
},
|
|
515
|
+
close: async () => {
|
|
516
|
+
await store.close();
|
|
517
|
+
},
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Extract the repo-relative file path from a `NodeId`. All node kinds embed
|
|
522
|
+
* the file path as the second colon-delimited segment (`<Kind>:<path>:<q>`).
|
|
523
|
+
*/
|
|
524
|
+
function fileFromNodeId(id) {
|
|
525
|
+
const first = id.indexOf(":");
|
|
526
|
+
if (first === -1)
|
|
527
|
+
return undefined;
|
|
528
|
+
const rest = id.slice(first + 1);
|
|
529
|
+
const second = rest.indexOf(":");
|
|
530
|
+
if (second === -1)
|
|
531
|
+
return rest;
|
|
532
|
+
return rest.slice(0, second);
|
|
533
|
+
}
|
|
534
|
+
// `PREV_NODE_SELECT_COLUMNS` was the explicit column whitelist used by the
|
|
535
|
+
// legacy SQL `SELECT * FROM nodes` round-trip in {@link loadPreviousGraph}.
|
|
536
|
+
// That read path now goes through `store.graph.listNodes()`, which already
|
|
537
|
+
// returns rehydrated `GraphNode` objects, so the constant is no longer
|
|
538
|
+
// load-bearing here. The `rowToGraphNode` / `rowToCodeRelation` adapters
|
|
539
|
+
// below remain exported for external consumers that hand-roll over the
|
|
540
|
+
// DuckDB wide-column shape.
|
|
541
|
+
const NODE_KIND_SET = new Set(NODE_KINDS);
|
|
542
|
+
const RELATION_TYPE_SET = new Set(RELATION_TYPES);
|
|
543
|
+
function strField(r, col) {
|
|
544
|
+
const v = r[col];
|
|
545
|
+
return typeof v === "string" && v.length > 0 ? v : undefined;
|
|
546
|
+
}
|
|
547
|
+
function numField(r, col) {
|
|
548
|
+
const v = r[col];
|
|
549
|
+
if (typeof v === "number" && Number.isFinite(v))
|
|
550
|
+
return v;
|
|
551
|
+
if (typeof v === "bigint")
|
|
552
|
+
return Number(v);
|
|
553
|
+
return undefined;
|
|
554
|
+
}
|
|
555
|
+
function boolField(r, col) {
|
|
556
|
+
const v = r[col];
|
|
557
|
+
return typeof v === "boolean" ? v : undefined;
|
|
558
|
+
}
|
|
559
|
+
function stringArrayField(r, col) {
|
|
560
|
+
// Preserve `[]` distinct from absent. The DuckDB TEXT[] binder returns
|
|
561
|
+
// a 0-length JS array for an empty SQL array literal and `null` for
|
|
562
|
+
// SQL NULL; mirror the storage adapter's `setStringArrayField` and
|
|
563
|
+
// return the array verbatim so a Community / Route node written as
|
|
564
|
+
// `{keywords: []}` (or `{responseKeys: []}`) survives the carry-forward
|
|
565
|
+
// load with its empty array intact — required so canonical-JSON /
|
|
566
|
+
// graphHash byte-identity holds across the incremental re-index.
|
|
567
|
+
const v = r[col];
|
|
568
|
+
if (!Array.isArray(v))
|
|
569
|
+
return undefined;
|
|
570
|
+
const out = [];
|
|
571
|
+
for (const item of v) {
|
|
572
|
+
if (typeof item === "string")
|
|
573
|
+
out.push(item);
|
|
574
|
+
}
|
|
575
|
+
return out;
|
|
576
|
+
}
|
|
577
|
+
function parseJsonStringArrayField(r, col) {
|
|
578
|
+
const raw = r[col];
|
|
579
|
+
if (typeof raw !== "string" || raw.length === 0)
|
|
580
|
+
return undefined;
|
|
581
|
+
try {
|
|
582
|
+
const parsed = JSON.parse(raw);
|
|
583
|
+
if (!Array.isArray(parsed))
|
|
584
|
+
return undefined;
|
|
585
|
+
return parsed.filter((x) => typeof x === "string");
|
|
586
|
+
}
|
|
587
|
+
catch {
|
|
588
|
+
return undefined;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
function parseJsonObjectField(r, col) {
|
|
592
|
+
const raw = r[col];
|
|
593
|
+
if (typeof raw !== "string" || raw.length === 0)
|
|
594
|
+
return undefined;
|
|
595
|
+
try {
|
|
596
|
+
const parsed = JSON.parse(raw);
|
|
597
|
+
if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed))
|
|
598
|
+
return undefined;
|
|
599
|
+
return parsed;
|
|
600
|
+
}
|
|
601
|
+
catch {
|
|
602
|
+
return undefined;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Reverse of `nodeToRow` (`packages/storage/src/duckdb-adapter.ts:1169`):
|
|
607
|
+
* translate one row of the polymorphic `nodes` table back into a
|
|
608
|
+
* {@link GraphNode}. Only the `nodes`/`edges` fidelity required by the four
|
|
609
|
+
* incremental consumer phases (`cross-file`, `mro`, `communities`,
|
|
610
|
+
* `processes`) is load-bearing — Community / Process nodes are re-added
|
|
611
|
+
* verbatim by `communities.ts:90-94` / `processes.ts:306-310`, so their
|
|
612
|
+
* `name` / `filePath` / `inferredLabel` / `keywords` / `symbolCount` /
|
|
613
|
+
* `cohesion` / `entryPointId` / `stepCount` must round-trip. Other kinds
|
|
614
|
+
* survive the round trip best-effort; fields we can't recover stay
|
|
615
|
+
* `undefined` and the caller treats the resulting node as lossy — safe
|
|
616
|
+
* because the carry-forward only lives long enough to be hashed into the
|
|
617
|
+
* next graph.
|
|
618
|
+
*
|
|
619
|
+
* Returns `undefined` when the row carries a `kind` we don't recognise or
|
|
620
|
+
* when required scalar slots (`id`, `name`, `file_path`) are missing.
|
|
621
|
+
*
|
|
622
|
+
* Exported for tests; the production call site is {@link loadPreviousGraph}.
|
|
623
|
+
*/
|
|
624
|
+
export function rowToGraphNode(row) {
|
|
625
|
+
const idRaw = row["id"];
|
|
626
|
+
const nameRaw = row["name"];
|
|
627
|
+
const fileRaw = row["file_path"];
|
|
628
|
+
const kindRaw = row["kind"];
|
|
629
|
+
if (typeof idRaw !== "string" || idRaw.length === 0)
|
|
630
|
+
return undefined;
|
|
631
|
+
if (typeof nameRaw !== "string")
|
|
632
|
+
return undefined;
|
|
633
|
+
if (typeof fileRaw !== "string")
|
|
634
|
+
return undefined;
|
|
635
|
+
if (typeof kindRaw !== "string" || !NODE_KIND_SET.has(kindRaw))
|
|
636
|
+
return undefined;
|
|
637
|
+
const kind = kindRaw;
|
|
638
|
+
// Build a permissive record keyed by TS field names. The discriminated-
|
|
639
|
+
// union cast at the end is safe because every `GraphNode` member only
|
|
640
|
+
// requires `id`/`kind`/`name`/`filePath` plus optional fields beyond that;
|
|
641
|
+
// required fields unique to a kind (e.g. `FindingNode.propertiesBag`) are
|
|
642
|
+
// populated explicitly in the per-kind branches below.
|
|
643
|
+
const node = {
|
|
644
|
+
id: idRaw,
|
|
645
|
+
kind,
|
|
646
|
+
name: nameRaw,
|
|
647
|
+
filePath: fileRaw,
|
|
648
|
+
};
|
|
649
|
+
// LocatedNode fields — set only when non-NULL because some non-LocatedNode
|
|
650
|
+
// kinds (Community / Process / File / Folder) intentionally leave them
|
|
651
|
+
// NULL and re-hydrating a spurious zero would change the graph hash.
|
|
652
|
+
const startLine = numField(row, "start_line");
|
|
653
|
+
if (startLine !== undefined)
|
|
654
|
+
node["startLine"] = startLine;
|
|
655
|
+
const endLine = numField(row, "end_line");
|
|
656
|
+
if (endLine !== undefined)
|
|
657
|
+
node["endLine"] = endLine;
|
|
658
|
+
const isExported = boolField(row, "is_exported");
|
|
659
|
+
if (isExported !== undefined)
|
|
660
|
+
node["isExported"] = isExported;
|
|
661
|
+
const signature = strField(row, "signature");
|
|
662
|
+
if (signature !== undefined)
|
|
663
|
+
node["signature"] = signature;
|
|
664
|
+
const parameterCount = numField(row, "parameter_count");
|
|
665
|
+
if (parameterCount !== undefined)
|
|
666
|
+
node["parameterCount"] = parameterCount;
|
|
667
|
+
const returnType = strField(row, "return_type");
|
|
668
|
+
if (returnType !== undefined)
|
|
669
|
+
node["returnType"] = returnType;
|
|
670
|
+
const declaredType = strField(row, "declared_type");
|
|
671
|
+
if (declaredType !== undefined)
|
|
672
|
+
node["declaredType"] = declaredType;
|
|
673
|
+
const owner = strField(row, "owner");
|
|
674
|
+
if (owner !== undefined)
|
|
675
|
+
node["owner"] = owner;
|
|
676
|
+
const description = strField(row, "description");
|
|
677
|
+
if (description !== undefined)
|
|
678
|
+
node["description"] = description;
|
|
679
|
+
const contentHash = strField(row, "content_hash");
|
|
680
|
+
if (contentHash !== undefined)
|
|
681
|
+
node["contentHash"] = contentHash;
|
|
682
|
+
const content = strField(row, "content");
|
|
683
|
+
if (content !== undefined)
|
|
684
|
+
node["content"] = content;
|
|
685
|
+
// Community / Process — the two carry-forward-critical kinds.
|
|
686
|
+
const inferredLabel = strField(row, "inferred_label");
|
|
687
|
+
if (inferredLabel !== undefined)
|
|
688
|
+
node["inferredLabel"] = inferredLabel;
|
|
689
|
+
const symbolCount = numField(row, "symbol_count");
|
|
690
|
+
if (symbolCount !== undefined)
|
|
691
|
+
node["symbolCount"] = symbolCount;
|
|
692
|
+
const cohesion = numField(row, "cohesion");
|
|
693
|
+
if (cohesion !== undefined)
|
|
694
|
+
node["cohesion"] = cohesion;
|
|
695
|
+
const keywords = stringArrayField(row, "keywords");
|
|
696
|
+
if (keywords !== undefined)
|
|
697
|
+
node["keywords"] = keywords;
|
|
698
|
+
const entryPointId = strField(row, "entry_point_id");
|
|
699
|
+
if (entryPointId !== undefined)
|
|
700
|
+
node["entryPointId"] = entryPointId;
|
|
701
|
+
const stepCount = numField(row, "step_count");
|
|
702
|
+
if (stepCount !== undefined)
|
|
703
|
+
node["stepCount"] = stepCount;
|
|
704
|
+
// Section (markdown heading) — `level` round-trips for completeness.
|
|
705
|
+
const level = numField(row, "level");
|
|
706
|
+
if (level !== undefined)
|
|
707
|
+
node["level"] = level;
|
|
708
|
+
// Route: `url` + `responseKeys` + `method` (shared column with Tool / Operation).
|
|
709
|
+
const url = strField(row, "url");
|
|
710
|
+
if (url !== undefined)
|
|
711
|
+
node["url"] = url;
|
|
712
|
+
const responseKeys = stringArrayField(row, "response_keys");
|
|
713
|
+
if (responseKeys !== undefined)
|
|
714
|
+
node["responseKeys"] = responseKeys;
|
|
715
|
+
if (kind === "Tool") {
|
|
716
|
+
const toolName = strField(row, "tool_name");
|
|
717
|
+
if (toolName !== undefined)
|
|
718
|
+
node["toolName"] = toolName;
|
|
719
|
+
const inputSchemaJson = strField(row, "input_schema_json");
|
|
720
|
+
if (inputSchemaJson !== undefined)
|
|
721
|
+
node["inputSchemaJson"] = inputSchemaJson;
|
|
722
|
+
}
|
|
723
|
+
else if (kind === "Route") {
|
|
724
|
+
const method = strField(row, "method");
|
|
725
|
+
if (method !== undefined)
|
|
726
|
+
node["method"] = method;
|
|
727
|
+
}
|
|
728
|
+
if (kind === "Finding") {
|
|
729
|
+
const ruleId = strField(row, "rule_id");
|
|
730
|
+
const severity = strField(row, "severity");
|
|
731
|
+
const scannerId = strField(row, "scanner_id");
|
|
732
|
+
const message = strField(row, "message");
|
|
733
|
+
const propertiesBag = parseJsonObjectField(row, "properties_bag");
|
|
734
|
+
if (ruleId !== undefined)
|
|
735
|
+
node["ruleId"] = ruleId;
|
|
736
|
+
if (severity !== undefined)
|
|
737
|
+
node["severity"] = severity;
|
|
738
|
+
if (scannerId !== undefined)
|
|
739
|
+
node["scannerId"] = scannerId;
|
|
740
|
+
if (message !== undefined)
|
|
741
|
+
node["message"] = message;
|
|
742
|
+
// propertiesBag is REQUIRED on FindingNode; default to {} on lossy reads
|
|
743
|
+
// so the resulting object still structurally satisfies the union.
|
|
744
|
+
node["propertiesBag"] = propertiesBag ?? {};
|
|
745
|
+
const partialFingerprint = strField(row, "partial_fingerprint");
|
|
746
|
+
if (partialFingerprint !== undefined)
|
|
747
|
+
node["partialFingerprint"] = partialFingerprint;
|
|
748
|
+
const baselineState = strField(row, "baseline_state");
|
|
749
|
+
if (baselineState !== undefined)
|
|
750
|
+
node["baselineState"] = baselineState;
|
|
751
|
+
const suppressedJson = strField(row, "suppressed_json");
|
|
752
|
+
if (suppressedJson !== undefined)
|
|
753
|
+
node["suppressedJson"] = suppressedJson;
|
|
754
|
+
}
|
|
755
|
+
if (kind === "Dependency") {
|
|
756
|
+
const version = strField(row, "version");
|
|
757
|
+
const ecosystem = strField(row, "ecosystem");
|
|
758
|
+
const lockfileSource = strField(row, "lockfile_source");
|
|
759
|
+
const license = strField(row, "license");
|
|
760
|
+
// version / ecosystem / lockfileSource are REQUIRED on the type; default
|
|
761
|
+
// to safe values when NULL so the object still passes the structural
|
|
762
|
+
// union at runtime. The carry-forward path only hashes these fields.
|
|
763
|
+
node["version"] = version ?? "";
|
|
764
|
+
node["ecosystem"] = ecosystem ?? "npm";
|
|
765
|
+
node["lockfileSource"] = lockfileSource ?? "";
|
|
766
|
+
if (license !== undefined)
|
|
767
|
+
node["license"] = license;
|
|
768
|
+
}
|
|
769
|
+
if (kind === "Operation") {
|
|
770
|
+
const httpMethod = strField(row, "http_method");
|
|
771
|
+
const httpPath = strField(row, "http_path");
|
|
772
|
+
node["method"] = httpMethod ?? "GET";
|
|
773
|
+
node["path"] = httpPath ?? "/";
|
|
774
|
+
const summary = strField(row, "summary");
|
|
775
|
+
if (summary !== undefined)
|
|
776
|
+
node["summary"] = summary;
|
|
777
|
+
const operationId = strField(row, "operation_id");
|
|
778
|
+
if (operationId !== undefined)
|
|
779
|
+
node["operationId"] = operationId;
|
|
780
|
+
}
|
|
781
|
+
if (kind === "Contributor") {
|
|
782
|
+
const emailHash = strField(row, "email_hash");
|
|
783
|
+
node["emailHash"] = emailHash ?? "";
|
|
784
|
+
const emailPlain = strField(row, "email_plain");
|
|
785
|
+
if (emailPlain !== undefined)
|
|
786
|
+
node["emailPlain"] = emailPlain;
|
|
787
|
+
}
|
|
788
|
+
// ProjectProfile — JSON-encoded array columns plus a polymorphic
|
|
789
|
+
// `frameworks_json` (flat `string[]` OR `{ flat, detected }`).
|
|
790
|
+
if (kind === "ProjectProfile") {
|
|
791
|
+
node["languages"] = parseJsonStringArrayField(row, "languages_json") ?? [];
|
|
792
|
+
const frameworksRaw = strField(row, "frameworks_json");
|
|
793
|
+
let frameworksFlat = [];
|
|
794
|
+
if (frameworksRaw !== undefined) {
|
|
795
|
+
try {
|
|
796
|
+
const parsed = JSON.parse(frameworksRaw);
|
|
797
|
+
if (Array.isArray(parsed)) {
|
|
798
|
+
frameworksFlat = parsed.filter((x) => typeof x === "string");
|
|
799
|
+
}
|
|
800
|
+
else if (typeof parsed === "object" && parsed !== null) {
|
|
801
|
+
const rec = parsed;
|
|
802
|
+
const flat = rec["flat"];
|
|
803
|
+
if (Array.isArray(flat)) {
|
|
804
|
+
frameworksFlat = flat.filter((x) => typeof x === "string");
|
|
805
|
+
}
|
|
806
|
+
const detected = rec["detected"];
|
|
807
|
+
if (Array.isArray(detected))
|
|
808
|
+
node["frameworksDetected"] = detected;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
catch {
|
|
812
|
+
/* ignore — leave frameworks as [] */
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
node["frameworks"] = frameworksFlat;
|
|
816
|
+
node["iacTypes"] = parseJsonStringArrayField(row, "iac_types_json") ?? [];
|
|
817
|
+
node["apiContracts"] = parseJsonStringArrayField(row, "api_contracts_json") ?? [];
|
|
818
|
+
node["manifests"] = parseJsonStringArrayField(row, "manifests_json") ?? [];
|
|
819
|
+
node["srcDirs"] = parseJsonStringArrayField(row, "src_dirs_json") ?? [];
|
|
820
|
+
}
|
|
821
|
+
// File ownership (H.5) + Community ownership (H.4) — shared across kinds.
|
|
822
|
+
const orphanGrade = strField(row, "orphan_grade");
|
|
823
|
+
if (orphanGrade !== undefined)
|
|
824
|
+
node["orphanGrade"] = orphanGrade;
|
|
825
|
+
const isOrphan = boolField(row, "is_orphan");
|
|
826
|
+
if (isOrphan !== undefined)
|
|
827
|
+
node["isOrphan"] = isOrphan;
|
|
828
|
+
const truckFactor = numField(row, "truck_factor");
|
|
829
|
+
if (truckFactor !== undefined)
|
|
830
|
+
node["truckFactor"] = truckFactor;
|
|
831
|
+
const od30 = numField(row, "ownership_drift_30d");
|
|
832
|
+
if (od30 !== undefined)
|
|
833
|
+
node["ownershipDrift30d"] = od30;
|
|
834
|
+
const od90 = numField(row, "ownership_drift_90d");
|
|
835
|
+
if (od90 !== undefined)
|
|
836
|
+
node["ownershipDrift90d"] = od90;
|
|
837
|
+
const od365 = numField(row, "ownership_drift_365d");
|
|
838
|
+
if (od365 !== undefined)
|
|
839
|
+
node["ownershipDrift365d"] = od365;
|
|
840
|
+
// v1.2 extensions
|
|
841
|
+
const deadness = strField(row, "deadness");
|
|
842
|
+
if (deadness !== undefined)
|
|
843
|
+
node["deadness"] = deadness;
|
|
844
|
+
const coveragePercent = numField(row, "coverage_percent");
|
|
845
|
+
if (coveragePercent !== undefined)
|
|
846
|
+
node["coveragePercent"] = coveragePercent;
|
|
847
|
+
const coveredLinesJson = strField(row, "covered_lines_json");
|
|
848
|
+
if (coveredLinesJson !== undefined)
|
|
849
|
+
node["coveredLinesJson"] = coveredLinesJson;
|
|
850
|
+
const cyclomaticComplexity = numField(row, "cyclomatic_complexity");
|
|
851
|
+
if (cyclomaticComplexity !== undefined)
|
|
852
|
+
node["cyclomaticComplexity"] = cyclomaticComplexity;
|
|
853
|
+
const nestingDepth = numField(row, "nesting_depth");
|
|
854
|
+
if (nestingDepth !== undefined)
|
|
855
|
+
node["nestingDepth"] = nestingDepth;
|
|
856
|
+
const nloc = numField(row, "nloc");
|
|
857
|
+
if (nloc !== undefined)
|
|
858
|
+
node["nloc"] = nloc;
|
|
859
|
+
const halsteadVolume = numField(row, "halstead_volume");
|
|
860
|
+
if (halsteadVolume !== undefined)
|
|
861
|
+
node["halsteadVolume"] = halsteadVolume;
|
|
862
|
+
return node;
|
|
863
|
+
}
|
|
864
|
+
/**
|
|
865
|
+
* Reverse of the relations row builder at
|
|
866
|
+
* `packages/storage/src/duckdb-adapter.ts:299-340`. Relations round-trip
|
|
867
|
+
* cleanly because their schema is 7 scalar columns with no polymorphism.
|
|
868
|
+
* Returns `undefined` when `type` is not a known {@link RelationType} or
|
|
869
|
+
* when required scalars are missing.
|
|
870
|
+
*
|
|
871
|
+
* Exported for tests; the production call site is {@link loadPreviousGraph}.
|
|
872
|
+
*/
|
|
873
|
+
export function rowToCodeRelation(row) {
|
|
874
|
+
const id = row["id"];
|
|
875
|
+
const from = row["from_id"];
|
|
876
|
+
const to = row["to_id"];
|
|
877
|
+
const type = row["type"];
|
|
878
|
+
const confidence = row["confidence"];
|
|
879
|
+
if (typeof id !== "string" || id.length === 0)
|
|
880
|
+
return undefined;
|
|
881
|
+
if (typeof from !== "string" || from.length === 0)
|
|
882
|
+
return undefined;
|
|
883
|
+
if (typeof to !== "string" || to.length === 0)
|
|
884
|
+
return undefined;
|
|
885
|
+
if (typeof type !== "string" || !RELATION_TYPE_SET.has(type))
|
|
886
|
+
return undefined;
|
|
887
|
+
const conf = typeof confidence === "number" && Number.isFinite(confidence) ? confidence : Number(confidence);
|
|
888
|
+
if (!Number.isFinite(conf))
|
|
889
|
+
return undefined;
|
|
890
|
+
const reason = row["reason"];
|
|
891
|
+
const step = row["step"];
|
|
892
|
+
const base = {
|
|
893
|
+
id: id,
|
|
894
|
+
from: from,
|
|
895
|
+
to: to,
|
|
896
|
+
type: type,
|
|
897
|
+
confidence: conf,
|
|
898
|
+
};
|
|
899
|
+
const stepNum = typeof step === "number" && Number.isFinite(step)
|
|
900
|
+
? step
|
|
901
|
+
: typeof step === "bigint"
|
|
902
|
+
? Number(step)
|
|
903
|
+
: undefined;
|
|
904
|
+
const hasReason = typeof reason === "string" && reason.length > 0;
|
|
905
|
+
// Build the final record in a single statement so we match the optional-
|
|
906
|
+
// field discipline required by `exactOptionalPropertyTypes`.
|
|
907
|
+
if (hasReason && stepNum !== undefined) {
|
|
908
|
+
return { ...base, reason: reason, step: stepNum };
|
|
909
|
+
}
|
|
910
|
+
if (hasReason)
|
|
911
|
+
return { ...base, reason: reason };
|
|
912
|
+
if (stepNum !== undefined)
|
|
913
|
+
return { ...base, step: stepNum };
|
|
914
|
+
return base;
|
|
915
|
+
}
|
|
916
|
+
async function readScanState(repoPath) {
|
|
917
|
+
const stateFile = join(resolveRepoMetaDir(repoPath), "scan-state.json");
|
|
918
|
+
try {
|
|
919
|
+
const { readFile } = await import("node:fs/promises");
|
|
920
|
+
const raw = await readFile(stateFile, "utf8");
|
|
921
|
+
const parsed = JSON.parse(raw);
|
|
922
|
+
if (typeof parsed !== "object" ||
|
|
923
|
+
parsed === null ||
|
|
924
|
+
parsed.schemaVersion !== 1 ||
|
|
925
|
+
!Array.isArray(parsed.files)) {
|
|
926
|
+
return undefined;
|
|
927
|
+
}
|
|
928
|
+
return parsed;
|
|
929
|
+
}
|
|
930
|
+
catch {
|
|
931
|
+
return undefined;
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
async function writeScanState(repoPath, files) {
|
|
935
|
+
const target = join(resolveRepoMetaDir(repoPath), "scan-state.json");
|
|
936
|
+
const { writeFile, mkdir } = await import("node:fs/promises");
|
|
937
|
+
await mkdir(resolveRepoMetaDir(repoPath), { recursive: true });
|
|
938
|
+
// Sort by relPath for deterministic output — mirrors scan phase invariant.
|
|
939
|
+
const sortedFiles = [...files].sort((a, b) => a.relPath < b.relPath ? -1 : a.relPath > b.relPath ? 1 : 0);
|
|
940
|
+
const payload = { schemaVersion: 1, files: sortedFiles };
|
|
941
|
+
const tmp = `${target}.tmp-${process.pid}-${Date.now()}`;
|
|
942
|
+
await writeFile(tmp, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
|
943
|
+
const { rename } = await import("node:fs/promises");
|
|
944
|
+
await rename(tmp, target);
|
|
945
|
+
}
|
|
946
|
+
export async function checkFastPath(repoName, repoPath, opts) {
|
|
947
|
+
const registryOpts = opts.home !== undefined ? { home: opts.home } : {};
|
|
948
|
+
const registry = await readRegistry(registryOpts);
|
|
949
|
+
const hit = registry[repoName];
|
|
950
|
+
if (!hit)
|
|
951
|
+
return undefined;
|
|
952
|
+
if (resolve(hit.path) !== repoPath)
|
|
953
|
+
return undefined;
|
|
954
|
+
// Without a recorded commit we cannot know whether the index is fresh.
|
|
955
|
+
if (hit.lastCommit === undefined)
|
|
956
|
+
return undefined;
|
|
957
|
+
// Uncommitted changes in the working tree mean the recorded `lastCommit`
|
|
958
|
+
// no longer reflects what's on disk — bypass the fast-path so analyze
|
|
959
|
+
// re-runs against the edited files. If git can't answer (non-git dir,
|
|
960
|
+
// git unavailable) `isWorkingTreeDirty` returns false and we fall
|
|
961
|
+
// through to the HEAD-based check below, matching `readGitHead`'s
|
|
962
|
+
// fallback posture.
|
|
963
|
+
const dirty = await isWorkingTreeDirty(repoPath);
|
|
964
|
+
if (dirty)
|
|
965
|
+
return undefined;
|
|
966
|
+
// Compare against the working tree's current HEAD so a `git pull`
|
|
967
|
+
// invalidates the fast-path. If git isn't available (non-git dir,
|
|
968
|
+
// shallow checkout without HEAD, etc.) fall back to treating the
|
|
969
|
+
// registry record as authoritative — the user can always --force.
|
|
970
|
+
const head = await readGitHead(repoPath);
|
|
971
|
+
if (head !== undefined && head !== hit.lastCommit)
|
|
972
|
+
return undefined;
|
|
973
|
+
return hit;
|
|
974
|
+
}
|
|
975
|
+
async function readGitHead(repoPath) {
|
|
976
|
+
return new Promise((resolveP) => {
|
|
977
|
+
let stdout = "";
|
|
978
|
+
let settled = false;
|
|
979
|
+
const child = spawn("git", ["rev-parse", "HEAD"], {
|
|
980
|
+
cwd: repoPath,
|
|
981
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
982
|
+
});
|
|
983
|
+
child.stdout.setEncoding("utf8");
|
|
984
|
+
child.stdout.on("data", (chunk) => {
|
|
985
|
+
stdout += chunk;
|
|
986
|
+
});
|
|
987
|
+
child.on("error", () => {
|
|
988
|
+
if (!settled) {
|
|
989
|
+
settled = true;
|
|
990
|
+
resolveP(undefined);
|
|
991
|
+
}
|
|
992
|
+
});
|
|
993
|
+
child.on("close", (code) => {
|
|
994
|
+
if (settled)
|
|
995
|
+
return;
|
|
996
|
+
settled = true;
|
|
997
|
+
if (code === 0) {
|
|
998
|
+
const trimmed = stdout.trim();
|
|
999
|
+
resolveP(trimmed.length > 0 ? trimmed : undefined);
|
|
1000
|
+
}
|
|
1001
|
+
else {
|
|
1002
|
+
resolveP(undefined);
|
|
1003
|
+
}
|
|
1004
|
+
});
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
/**
|
|
1008
|
+
* Probe whether the working tree has uncommitted changes. Returns `true`
|
|
1009
|
+
* iff `git status --porcelain` exits 0 with non-empty stdout. Any spawn
|
|
1010
|
+
* error, non-zero exit, or git-unavailable case returns `false` so the
|
|
1011
|
+
* caller never blocks the fast-path on a git failure — mirroring
|
|
1012
|
+
* `readGitHead`'s "cannot determine" fallback.
|
|
1013
|
+
*
|
|
1014
|
+
* Exported so the CLI test suite can assert the fallback posture directly
|
|
1015
|
+
* without spawning a whole `runAnalyze` pipeline.
|
|
1016
|
+
*/
|
|
1017
|
+
export async function isWorkingTreeDirty(repoPath) {
|
|
1018
|
+
return new Promise((resolveP) => {
|
|
1019
|
+
let stdout = "";
|
|
1020
|
+
let settled = false;
|
|
1021
|
+
const child = spawn("git", ["status", "--porcelain"], {
|
|
1022
|
+
cwd: repoPath,
|
|
1023
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
1024
|
+
});
|
|
1025
|
+
child.stdout.setEncoding("utf8");
|
|
1026
|
+
child.stdout.on("data", (chunk) => {
|
|
1027
|
+
stdout += chunk;
|
|
1028
|
+
});
|
|
1029
|
+
child.on("error", () => {
|
|
1030
|
+
if (!settled) {
|
|
1031
|
+
settled = true;
|
|
1032
|
+
resolveP(false);
|
|
1033
|
+
}
|
|
1034
|
+
});
|
|
1035
|
+
child.on("close", (code) => {
|
|
1036
|
+
if (settled)
|
|
1037
|
+
return;
|
|
1038
|
+
settled = true;
|
|
1039
|
+
if (code === 0) {
|
|
1040
|
+
resolveP(stdout.length > 0);
|
|
1041
|
+
}
|
|
1042
|
+
else {
|
|
1043
|
+
resolveP(false);
|
|
1044
|
+
}
|
|
1045
|
+
});
|
|
1046
|
+
});
|
|
1047
|
+
}
|
|
1048
|
+
/**
|
|
1049
|
+
* Emit pipeline warnings to stderr. By default, collapse high-cardinality
|
|
1050
|
+
* classes (e.g. dead-code ghost-community) into a single summary line so
|
|
1051
|
+
* a run doesn't drown the terminal with hundreds of near-identical lines.
|
|
1052
|
+
* Pass `verbose=true` to print every warning individually.
|
|
1053
|
+
*/
|
|
1054
|
+
function logWarnings(warnings, verbose) {
|
|
1055
|
+
if (verbose) {
|
|
1056
|
+
for (const w of warnings)
|
|
1057
|
+
log(`codehub analyze: ${w}`);
|
|
1058
|
+
return;
|
|
1059
|
+
}
|
|
1060
|
+
// Group by `<phase>:` prefix. We count repeats of the same prefix and
|
|
1061
|
+
// print one summary + one sample so operators still see what's going on.
|
|
1062
|
+
const groups = new Map();
|
|
1063
|
+
const others = [];
|
|
1064
|
+
for (const w of warnings) {
|
|
1065
|
+
const colon = w.indexOf(":");
|
|
1066
|
+
if (colon === -1) {
|
|
1067
|
+
others.push(w);
|
|
1068
|
+
continue;
|
|
1069
|
+
}
|
|
1070
|
+
const prefix = w.slice(0, colon);
|
|
1071
|
+
const existing = groups.get(prefix);
|
|
1072
|
+
if (existing === undefined) {
|
|
1073
|
+
groups.set(prefix, { count: 1, sample: w });
|
|
1074
|
+
}
|
|
1075
|
+
else {
|
|
1076
|
+
existing.count += 1;
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
for (const [prefix, { count, sample }] of groups) {
|
|
1080
|
+
if (count === 1) {
|
|
1081
|
+
log(`codehub analyze: ${sample}`);
|
|
1082
|
+
}
|
|
1083
|
+
else {
|
|
1084
|
+
log(`codehub analyze: ${prefix}: ${count} warnings (use --verbose to see all)`);
|
|
1085
|
+
log(`codehub analyze: e.g. ${sample}`);
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
for (const w of others)
|
|
1089
|
+
log(`codehub analyze: ${w}`);
|
|
1090
|
+
}
|
|
1091
|
+
function log(message) {
|
|
1092
|
+
// Using console.warn keeps stdout reserved for machine-readable output from
|
|
1093
|
+
// subcommands like `sql` and `query --json`.
|
|
1094
|
+
console.warn(message);
|
|
1095
|
+
}
|
|
1096
|
+
//# sourceMappingURL=analyze.js.map
|