@codragraph/cli 1.6.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +34 -0
  2. package/dist/cli/analyze.d.ts +22 -0
  3. package/dist/cli/analyze.js +107 -4
  4. package/dist/cli/compress-stats.d.ts +29 -0
  5. package/dist/cli/compress-stats.js +97 -0
  6. package/dist/cli/graphstore.d.ts +6 -2
  7. package/dist/cli/graphstore.js +24 -2
  8. package/dist/cli/index.js +16 -2
  9. package/dist/cli/profile-heap.d.ts +35 -0
  10. package/dist/cli/profile-heap.js +126 -0
  11. package/dist/cli/setup.d.ts +13 -0
  12. package/dist/cli/setup.js +22 -11
  13. package/dist/cli/skill-gen.d.ts +14 -2
  14. package/dist/cli/skill-gen.js +52 -19
  15. package/dist/cli/tool.js +4 -0
  16. package/dist/core/embeddings/embedding-pipeline.js +24 -7
  17. package/dist/core/group/bridge-db.js +111 -24
  18. package/dist/core/lbug/content-read.d.ts +46 -0
  19. package/dist/core/lbug/content-read.js +64 -0
  20. package/dist/core/lbug/csv-generator.d.ts +2 -6
  21. package/dist/core/lbug/csv-generator.js +45 -12
  22. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  23. package/dist/core/lbug/lbug-adapter.js +153 -21
  24. package/dist/core/lbug/schema.d.ts +7 -7
  25. package/dist/core/lbug/schema.js +18 -0
  26. package/dist/core/run-analyze.d.ts +13 -0
  27. package/dist/core/run-analyze.js +91 -4
  28. package/dist/core/search/bm25-index.js +67 -15
  29. package/dist/mcp/local/local-backend.js +22 -5
  30. package/dist/server/api.js +4 -3
  31. package/dist/storage/repo-manager.d.ts +39 -0
  32. package/dist/storage/repo-manager.js +19 -0
  33. package/hooks/claude/codragraph-hook.cjs +95 -2
  34. package/package.json +4 -4
  35. package/scripts/build-tree-sitter-proto.cjs +15 -3
  36. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  37. package/skills/codragraph-api-surface.md +110 -0
  38. package/skills/codragraph-config-audit.md +146 -0
  39. package/skills/codragraph-cross-repo-impact.md +135 -0
  40. package/skills/codragraph-data-lineage.md +137 -0
  41. package/skills/codragraph-dead-code.md +119 -0
  42. package/skills/codragraph-gh-actions-debug.md +162 -0
  43. package/skills/codragraph-gh-issue-workflow.md +178 -0
  44. package/skills/codragraph-gh-pr-workflow.md +176 -0
  45. package/skills/codragraph-gh-release-workflow.md +187 -0
  46. package/skills/codragraph-git-bisect.md +176 -0
  47. package/skills/codragraph-git-force-push.md +147 -0
  48. package/skills/codragraph-git-history-rewrite.md +174 -0
  49. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  50. package/skills/codragraph-git-recovery.md +181 -0
  51. package/skills/codragraph-git-worktree.md +145 -0
  52. package/skills/codragraph-migration-tracking.md +130 -0
  53. package/skills/codragraph-notebook-context.md +136 -0
  54. package/skills/codragraph-observability-coverage.md +125 -0
  55. package/skills/codragraph-onboarding.md +129 -0
  56. package/skills/codragraph-perf-hotspots.md +132 -0
  57. package/skills/codragraph-project-switcher.md +116 -0
  58. package/skills/codragraph-security-audit.md +144 -0
  59. package/skills/codragraph-sql-tracing.md +122 -0
  60. package/skills/codragraph-supply-chain-audit.md +153 -0
  61. package/skills/codragraph-test-coverage.md +97 -0
package/README.md CHANGED
@@ -155,6 +155,9 @@ codragraph analyze --embeddings # Enable embedding generation (slower, better
155
155
  codragraph analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md codragraph section edits
156
156
  codragraph analyze --verbose # Log skipped files when parsers are unavailable
157
157
  codragraph analyze --max-file-size 1024 # Skip files larger than N KB (default: 512, cap: 32768)
158
+ codragraph analyze --compress brotli # Per-row body compression. Also: zstd, none.
159
+ codragraph profile-heap [path] # Run analyze with v8 heap-snapshot instrumentation
160
+ codragraph profile-heap --no-summary # Same, but skip the post-run RSS / heapUsed table
158
161
  codragraph mcp # Start MCP server (stdio) — serves all indexed repos
159
162
  codragraph serve # Start local HTTP server (multi-repo) for web UI
160
163
  codragraph index # Register an existing .codragraph/ folder into the global registry
@@ -306,6 +309,37 @@ echo "vendor/" >> .codragraphignore
306
309
  echo "dist/" >> .codragraphignore
307
310
  ```
308
311
 
312
+ If you want to know **which phase** is dragging the heap up before
313
+ deciding what to mitigate, run `codragraph profile-heap`. It writes a
314
+ v8 heap snapshot at every phase boundary plus a JSONL timeline of
315
+ `process.memoryUsage()` and prints a per-phase RSS / `heapUsed` table:
316
+
317
+ ```bash
318
+ codragraph profile-heap # writes .codragraph/heap-profiles/
319
+ # → load any .heapsnapshot in Chrome DevTools → Memory → Load
320
+ ```
321
+
322
+ Each snapshot is 100–500 MB, so the command is opt-in only. The JSONL
323
+ timeline is small enough to share for triage even when the snapshots
324
+ are too big.
325
+
326
+ ### Index size — opt-in per-row compression
327
+
328
+ For repos where `.codragraph/lbug` itself has grown large:
329
+
330
+ ```bash
331
+ codragraph analyze --compress brotli # Node ≥ 18, brotli quality 6
332
+ codragraph analyze --compress zstd # Node ≥ 22.15, zstd level 3
333
+ codragraph analyze --compress none # explicit default
334
+ ```
335
+
336
+ `--compress` routes every node-row content field through the matching
337
+ encoder before it's written to the CSV / lbug; readers decode
338
+ transparently via the per-row `contentEncoding` tag. With the flag
339
+ unset, the on-disk layout is byte-identical to pre-1.8 indexes. Pre-1.8
340
+ indexes auto-trigger a full re-analyze the first time a 1.8+ CLI runs
341
+ against them (one-time cost, surfaced in the analyze log).
342
+
309
343
  ### Large files are being skipped
310
344
 
311
345
  By default the walker skips files larger than **512 KB** (see log line `Skipped N large files (>512KB)`). Raise the threshold via either the CLI flag or the environment variable — both accept a value in **KB**:
@@ -39,5 +39,27 @@ export interface AnalyzeOptions {
39
39
  * `CODRAGRAPH_MAX_FILE_SIZE` for the rest of the pipeline.
40
40
  */
41
41
  maxFileSize?: string;
42
+ /**
43
+ * First-run auto-setup gate. Default `true` (commander injects this from the
44
+ * `--no-setup` flag — see CLI registration). When `true`, `analyze` detects a
45
+ * missing `~/.codragraph/registry.json` and runs editor setup before indexing,
46
+ * making `npx @codragraph/cli analyze` a true zero-install entry. Pass
47
+ * `--no-setup` to opt out (CI, headless servers, automated pipelines).
48
+ */
49
+ setup?: boolean;
50
+ /**
51
+ * Comma-separated list of editor targets for `--skills` output. Valid values
52
+ * are `claude`, `cursor`, `opencode`, `codex`. Default: `claude` (matches
53
+ * pre-flag behavior). Unknown values are reported and ignored.
54
+ */
55
+ skillTargets?: string;
56
+ /**
57
+ * RFC 0001 Phase 2 — opt-in per-row content compression. Accepts
58
+ * `'none'` (default), `'brotli'` (Node ≥ 18), or `'zstd'` (Node ≥
59
+ * 22.15). Compressed indexes are still queryable via the standard
60
+ * read path; decode happens at every external-consumer boundary
61
+ * (MCP, HTTP API, embeddings, CLI tools).
62
+ */
63
+ compress?: 'none' | 'brotli' | 'zstd';
42
64
  }
43
65
  export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;
@@ -11,6 +11,7 @@ import path from 'path';
11
11
  import { execFileSync } from 'child_process';
12
12
  import v8 from 'v8';
13
13
  import cliProgress from 'cli-progress';
14
+ import * as fsSync from 'node:fs';
14
15
  import { closeLbug } from '../core/lbug/lbug-adapter.js';
15
16
  import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
16
17
  import { getGitRoot, hasGitDir } from '../storage/git.js';
@@ -52,9 +53,77 @@ export const analyzeCommand = async (inputPath, options) => {
52
53
  if (options?.verbose) {
53
54
  process.env.CODRAGRAPH_VERBOSE = '1';
54
55
  }
56
+ // RFC 0001 Phase 2 — validate --compress before doing any work. Catching
57
+ // a typo or an unsupported encoding here is much friendlier than failing
58
+ // mid-analyze with an opaque CSV-write error. Node-version gating for
59
+ // zstd lives in @codragraph/graphstore via isEncodingSupported, but we
60
+ // import the check here so the CLI can offer the brotli fallback hint.
61
+ if (options?.compress && options.compress !== 'none') {
62
+ if (options.compress !== 'brotli' && options.compress !== 'zstd') {
63
+ console.error(` --compress must be one of: none, brotli, zstd (got: ${options.compress})`);
64
+ process.exitCode = 2;
65
+ return;
66
+ }
67
+ if (options.compress === 'zstd') {
68
+ const { isEncodingSupported } = await import('@codragraph/graphstore');
69
+ if (!isEncodingSupported('zstd')) {
70
+ console.error(' --compress zstd requires Node ≥ 22.15.0 (native node:zlib zstd).\n' +
71
+ ` Detected Node ${process.version}. Use --compress brotli instead, or upgrade Node.`);
72
+ process.exitCode = 2;
73
+ return;
74
+ }
75
+ }
76
+ // RFC 0001 Phase 2.5 — BM25 / FTS now drops `content` from its
77
+ // property list when meta.compress is non-'none' (see
78
+ // `core/search/bm25-index.ts`), so search inside compressed bodies
79
+ // gracefully falls back to name-only matches instead of tokenising
80
+ // base64 garbage. Surface the trade-off so users know what they're
81
+ // opting into.
82
+ console.warn(` Note: --compress ${options.compress} reduces .codragraph/lbug size.\n` +
83
+ ` BM25 search will index symbol names only (function bodies are not tokenised\n` +
84
+ ` when compressed); embeddings, graph queries, and \`context\` / \`impact\` are\n` +
85
+ ` unaffected. Run with --compress none if you rely on full-text search inside\n` +
86
+ ` source bodies.`);
87
+ }
55
88
  if (options?.maxFileSize) {
56
89
  process.env.CODRAGRAPH_MAX_FILE_SIZE = options.maxFileSize;
57
90
  }
91
+ // ── Auto-reindex coalesce-file cleanup ─────────────────────────────
92
+ // When the Claude Code PostToolUse hook spawns us in background mode, it
93
+ // passes the coalesce file path through this env var. We delete it on every
94
+ // exit path so the next commit immediately triggers a new reindex (rather
95
+ // than being blocked by a 10-min mtime TTL). The hook's TTL is just a
96
+ // crash safety net — this is the happy path.
97
+ const reindexLockPath = process.env.CODRAGRAPH_REINDEX_LOCK_PATH || '';
98
+ if (reindexLockPath) {
99
+ process.on('exit', () => {
100
+ try {
101
+ fsSync.unlinkSync(reindexLockPath);
102
+ }
103
+ catch {
104
+ /* already gone or unreadable — fine */
105
+ }
106
+ });
107
+ }
108
+ // ── First-run auto-setup ───────────────────────────────────────────
109
+ // Makes `npx @codragraph/cli analyze` a true one-command entry. We detect
110
+ // first-run by the absence of the global registry — analyze writes to it on
111
+ // every successful index, so it's a reliable "this user has never run us
112
+ // before" signal. Opt out with `--no-setup` for CI / headless contexts;
113
+ // commander maps `--no-setup` to `options.setup === false`.
114
+ if (options?.setup !== false) {
115
+ let registryExists = true;
116
+ try {
117
+ await fs.access(getGlobalRegistryPath());
118
+ }
119
+ catch {
120
+ registryExists = false;
121
+ }
122
+ if (!registryExists) {
123
+ const { runSetup } = await import('./setup.js');
124
+ await runSetup({ skipNextSteps: true, compactHeader: true });
125
+ }
126
+ }
58
127
  console.log('\n CodraGraph Analyzer\n');
59
128
  let repoPath;
60
129
  if (inputPath) {
@@ -168,6 +237,9 @@ export const analyzeCommand = async (inputPath, options) => {
168
237
  // be able to accept the duplicate name without also paying the
169
238
  // cost of a full pipeline re-index. See #829 review round 2.
170
239
  allowDuplicateName: options?.allowDuplicateName,
240
+ // RFC 0001 Phase 2 — pass through the per-row encoding choice.
241
+ // Default 'none' / undefined keeps the pre-Phase-2 wire layout.
242
+ compress: options?.compress,
171
243
  }, {
172
244
  onProgress: (_phase, percent, message) => {
173
245
  updateBar(percent, message);
@@ -190,9 +262,23 @@ export const analyzeCommand = async (inputPath, options) => {
190
262
  if (options?.skills && result.pipelineResult) {
191
263
  updateBar(99, 'Generating skill files...');
192
264
  try {
193
- const { generateSkillFiles } = await import('./skill-gen.js');
265
+ const { generateSkillFiles, SKILL_TARGETS } = await import('./skill-gen.js');
194
266
  const { generateAIContextFiles } = await import('./ai-context.js');
195
- const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult);
267
+ // Parse --skill-targets CSV; default to ['claude'] when omitted.
268
+ // Unknown tokens are reported once and dropped — we don't fail the
269
+ // whole analyze for a typo here, but we do want the user to see it.
270
+ const requestedTargets = (options?.skillTargets || 'claude')
271
+ .split(',')
272
+ .map((s) => s.trim().toLowerCase())
273
+ .filter(Boolean);
274
+ const validTargets = requestedTargets.filter((t) => SKILL_TARGETS.includes(t));
275
+ const invalidTargets = requestedTargets.filter((t) => !SKILL_TARGETS.includes(t));
276
+ if (invalidTargets.length > 0) {
277
+ barLog(` Skills: unknown target(s) ignored: ${invalidTargets.join(', ')} ` +
278
+ `(valid: ${SKILL_TARGETS.join(', ')})`);
279
+ }
280
+ const targetsToUse = validTargets.length > 0 ? validTargets : ['claude'];
281
+ const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult, targetsToUse);
196
282
  if (skillResult.skills.length > 0) {
197
283
  barLog(` Generated ${skillResult.skills.length} skill files`);
198
284
  // Re-generate AI context files now that we have skill info
@@ -235,11 +321,28 @@ export const analyzeCommand = async (inputPath, options) => {
235
321
  console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
236
322
  console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
237
323
  console.log(` ${repoPath}`);
324
+ // Surface @codragraph/compress's value prop with concrete numbers: how
325
+ // many tokens of distilled context did we generate. Best-effort — never
326
+ // fail the analyze for a stat read.
238
327
  try {
239
- await fs.access(getGlobalRegistryPath());
328
+ const { estimateTokens } = await import('./compress-stats.js');
329
+ const candidates = ['AGENTS.md', 'CLAUDE.md'];
330
+ const sizes = [];
331
+ for (const file of candidates) {
332
+ try {
333
+ const content = await fs.readFile(path.join(repoPath, file), 'utf-8');
334
+ sizes.push(`${file} ~${estimateTokens(content).toLocaleString()} tokens`);
335
+ }
336
+ catch {
337
+ /* file not generated for this run — skip */
338
+ }
339
+ }
340
+ if (sizes.length > 0) {
341
+ console.log(` @codragraph/compress: ${sizes.join(' | ')}`);
342
+ }
240
343
  }
241
344
  catch {
242
- console.log('\n Tip: Run `codragraph setup` to configure MCP for your editor.');
345
+ /* compress-stats import failed non-fatal */
243
346
  }
244
347
  console.log('');
245
348
  }
@@ -0,0 +1,29 @@
1
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
2
+ export declare function estimateTokens(text: string): number;
3
+ /**
4
+ * Walk a result object and collect every file path we can find. Looks for
5
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
6
+ * the raw-grep baseline (sum of source bytes the agent would have read
7
+ * without CodraGraph).
8
+ */
9
+ export declare function collectFilePaths(obj: unknown, paths?: Set<string>): Set<string>;
10
+ /**
11
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
12
+ * the referenced files. Returns null if any file is missing or unreadable —
13
+ * in that case we silently skip the comparison rather than show a misleading
14
+ * number.
15
+ */
16
+ export declare function estimateRawGrepTokens(filePaths: Iterable<string>): number | null;
17
+ /**
18
+ * Format a one-line token-savings summary suitable for stderr display.
19
+ * If a raw baseline is provided AND it's larger than the structured response,
20
+ * the line includes the savings percentage. Otherwise it only reports
21
+ * the structured token count.
22
+ */
23
+ export declare function formatTokenLine(structuredTokens: number, rawTokens?: number | null): string;
24
+ /**
25
+ * Compute and print the token-savings line for a tool result. Best-effort:
26
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
27
+ * stdout to jq stay clean.
28
+ */
29
+ export declare function emitTokenStats(result: unknown): void;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Token-savings reporter for CLI output.
3
+ *
4
+ * Surfaces the @codragraph/compress value proposition on every `query`,
5
+ * `context`, `impact`, and `analyze` invocation: how many tokens of
6
+ * structured context did we return vs the equivalent raw-grep response.
7
+ *
8
+ * Uses the same chars/4 heuristic as @codragraph/compress's `estimateTokens`
9
+ * for cross-package consistency. Inlined rather than imported because pulling
10
+ * in @codragraph/compress as a runtime dep also pulls @codragraph/harness as a
11
+ * transitive — too heavy for what is logically a one-line approximation. When
12
+ * we add real LLM compression (`--compress` opt-in), the package import will
13
+ * follow.
14
+ */
15
+ import * as fsSync from 'node:fs';
16
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
17
+ export function estimateTokens(text) {
18
+ return Math.max(0, Math.floor(text.trim().length / 4));
19
+ }
20
+ /**
21
+ * Walk a result object and collect every file path we can find. Looks for
22
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
23
+ * the raw-grep baseline (sum of source bytes the agent would have read
24
+ * without CodraGraph).
25
+ */
26
+ export function collectFilePaths(obj, paths = new Set()) {
27
+ if (!obj || typeof obj !== 'object')
28
+ return paths;
29
+ if (Array.isArray(obj)) {
30
+ for (const item of obj)
31
+ collectFilePaths(item, paths);
32
+ return paths;
33
+ }
34
+ for (const [key, value] of Object.entries(obj)) {
35
+ if ((key === 'filePath' || key === 'file_path' || key === 'file') &&
36
+ typeof value === 'string' &&
37
+ value.length > 0) {
38
+ paths.add(value);
39
+ }
40
+ else if (typeof value === 'object') {
41
+ collectFilePaths(value, paths);
42
+ }
43
+ }
44
+ return paths;
45
+ }
46
+ /**
47
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
48
+ * the referenced files. Returns null if any file is missing or unreadable —
49
+ * in that case we silently skip the comparison rather than show a misleading
50
+ * number.
51
+ */
52
+ export function estimateRawGrepTokens(filePaths) {
53
+ let totalChars = 0;
54
+ for (const fp of filePaths) {
55
+ try {
56
+ const stat = fsSync.statSync(fp);
57
+ if (!stat.isFile())
58
+ return null;
59
+ totalChars += stat.size;
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ return Math.floor(totalChars / 4);
66
+ }
67
+ /**
68
+ * Format a one-line token-savings summary suitable for stderr display.
69
+ * If a raw baseline is provided AND it's larger than the structured response,
70
+ * the line includes the savings percentage. Otherwise it only reports
71
+ * the structured token count.
72
+ */
73
+ export function formatTokenLine(structuredTokens, rawTokens) {
74
+ if (rawTokens && rawTokens > structuredTokens) {
75
+ const savings = Math.round((1 - structuredTokens / rawTokens) * 100);
76
+ return (` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context ` +
77
+ `(vs ~${rawTokens.toLocaleString()} tokens of raw source — ${savings}% smaller).`);
78
+ }
79
+ return ` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context.`;
80
+ }
81
+ /**
82
+ * Compute and print the token-savings line for a tool result. Best-effort:
83
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
84
+ * stdout to jq stay clean.
85
+ */
86
+ export function emitTokenStats(result) {
87
+ try {
88
+ const structured = typeof result === 'string' ? result : JSON.stringify(result);
89
+ const sTokens = estimateTokens(structured);
90
+ const files = collectFilePaths(result);
91
+ const rawTokens = files.size > 0 ? estimateRawGrepTokens(files) : null;
92
+ process.stderr.write('\n' + formatTokenLine(sTokens, rawTokens) + '\n');
93
+ }
94
+ catch {
95
+ /* never let stats break the actual output */
96
+ }
97
+ }
@@ -12,7 +12,9 @@ export declare const logCommand: (opts?: {
12
12
  limit?: string;
13
13
  }) => Promise<void>;
14
14
  export declare const branchListCommand: () => Promise<void>;
15
- export declare const diffCommand: (from: string, to: string) => Promise<void>;
15
+ export declare const diffCommand: (from: string, to: string, opts?: {
16
+ json?: boolean;
17
+ }) => Promise<void>;
16
18
  export declare const commitCommand: (opts?: {
17
19
  message?: string;
18
20
  }) => Promise<void>;
@@ -36,5 +38,7 @@ export declare const mergeCommand: (target: string, opts?: {
36
38
  export declare const gcCommand: (opts?: {
37
39
  dryRun?: boolean;
38
40
  }) => Promise<void>;
39
- export declare const diffSemanticCommand: (from: string, to: string) => Promise<void>;
41
+ export declare const diffSemanticCommand: (from: string, to: string, opts?: {
42
+ json?: boolean;
43
+ }) => Promise<void>;
40
44
  export { DEFAULT_BRANCH };
@@ -84,7 +84,7 @@ export const branchListCommand = async () => {
84
84
  // ──────────────────────────────────────────────────────────────────────
85
85
  // codragraph diff <from> <to>
86
86
  // ──────────────────────────────────────────────────────────────────────
87
- export const diffCommand = async (from, to) => {
87
+ export const diffCommand = async (from, to, opts = {}) => {
88
88
  const ctx = await resolveGraphstore(process.cwd());
89
89
  const fromCommitId = await resolveCommitTarget(ctx, from);
90
90
  const toCommitId = await resolveCommitTarget(ctx, to);
@@ -95,6 +95,17 @@ export const diffCommand = async (from, to) => {
95
95
  from: fromCommit.snapshot,
96
96
  to: toCommit.snapshot,
97
97
  });
98
+ // --json: emit a machine-readable payload for downstream consumers
99
+ // (GitHub Action comment formatter, IDE plugins, etc). Keep human and
100
+ // JSON paths separate — never sneak JSON into the human path's stdout.
101
+ if (opts.json) {
102
+ process.stdout.write(JSON.stringify({
103
+ from: { commit: fromCommitId, message: fromCommit.message },
104
+ to: { commit: toCommitId, message: toCommit.message },
105
+ diff,
106
+ }, null, 2) + '\n');
107
+ return;
108
+ }
98
109
  process.stdout.write(`From: ${fromCommitId.slice(7, 7 + 12)} ${fromCommit.message}\n`);
99
110
  process.stdout.write(`To: ${toCommitId.slice(7, 7 + 12)} ${toCommit.message}\n\n`);
100
111
  let totalAdded = 0;
@@ -581,7 +592,7 @@ const formatBytes = (n) => {
581
592
  // classified modifications, added/removed APIs, and process changes. We
582
593
  // expose it as a separate module-local helper so the CLI handler can
583
594
  // dispatch on the flag.
584
- export const diffSemanticCommand = async (from, to) => {
595
+ export const diffSemanticCommand = async (from, to, opts = {}) => {
585
596
  const ctx = await resolveGraphstore(process.cwd());
586
597
  const fromCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, from));
587
598
  const toCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, to));
@@ -590,6 +601,17 @@ export const diffSemanticCommand = async (from, to) => {
590
601
  from: fromCommit.snapshot,
591
602
  to: toCommit.snapshot,
592
603
  });
604
+ // --json: same shape as diff (plain) but with the semantic payload. The
605
+ // PR-review GitHub Action consumes this directly to render the Markdown
606
+ // comment without parsing free-form text.
607
+ if (opts.json) {
608
+ process.stdout.write(JSON.stringify({
609
+ from: { ref: from, message: fromCommit.message },
610
+ to: { ref: to, message: toCommit.message },
611
+ semantic: d,
612
+ }, null, 2) + '\n');
613
+ return;
614
+ }
593
615
  process.stdout.write(`From: ${from} (${fromCommit.message})\n`);
594
616
  process.stdout.write(`To: ${to} (${toCommit.message})\n\n`);
595
617
  if (d.addedAPIs.length > 0) {
package/dist/cli/index.js CHANGED
@@ -19,6 +19,7 @@ program
19
19
  .option('-f, --force', 'Force full re-index even if up to date')
20
20
  .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
21
21
  .option('--skills', 'Generate repo-specific skill files from detected communities')
22
+ .option('--skill-targets <list>', 'CSV of editor targets for --skills (claude, cursor, opencode, codex). Default: claude.')
22
23
  .option('--skip-agents-md', 'Skip updating the codragraph section in AGENTS.md and CLAUDE.md')
23
24
  .option('--no-stats', 'Omit volatile file/symbol counts from AGENTS.md and CLAUDE.md')
24
25
  .option('--skip-git', 'Index a folder without requiring a .git directory')
@@ -28,12 +29,24 @@ program
28
29
  'Leaves `-r <name>` ambiguous for the two paths; use -r <path> to disambiguate.')
29
30
  .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
30
31
  .option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
32
+ .option('--no-setup', 'Skip the first-run editor setup (auto-runs once when ~/.codragraph/registry.json is missing)')
33
+ .option('--compress <encoding>', 'Compress per-row content (RFC 0001 Phase 2). One of: none (default), brotli, zstd. zstd requires Node ≥ 22.15.', 'none')
31
34
  .addHelpText('after', '\nEnvironment variables:\n' +
32
35
  ' CODRAGRAPH_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .codragraphignore)\n' +
33
36
  ' CODRAGRAPH_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.\n' +
34
37
  '\nTip: `.codragraphignore` supports `.gitignore`-style negation. Add e.g.\n' +
35
38
  ' `!__tests__/` to index a directory that is auto-filtered by default (#771).')
36
39
  .action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));
40
+ program
41
+ .command('profile-heap [path]')
42
+ .description('Run analyze with heap-profile instrumentation (RFC 0002 Phase 1). ' +
43
+ 'Writes per-phase v8 heap snapshots + a JSONL RSS timeline under ' +
44
+ '.codragraph/heap-profiles/, then prints a summary table.')
45
+ .option('-f, --force', 'Force full re-index (analyze flag, passed through)')
46
+ .option('--skip-git', 'Index a folder without requiring a .git directory')
47
+ .option('--no-setup', 'Skip first-run editor setup')
48
+ .option('--no-summary', 'Skip the post-run summary table (raw artifacts only)')
49
+ .action(createLazyAction(() => import('./profile-heap.js'), 'profileHeapCommand'));
37
50
  program
38
51
  .command('index [path...]')
39
52
  .description('Register an existing .codragraph/ folder into the global registry (no re-analysis needed)')
@@ -192,12 +205,13 @@ program
192
205
  .command('diff <from> <to>')
193
206
  .description('Structural diff between two graph commits or branches')
194
207
  .option('--semantic', 'Use the semantic differ (added APIs, classified modifications, processes)')
208
+ .option('--json', 'Emit machine-readable JSON instead of human-readable text (for CI / GitHub Action consumers)')
195
209
  .action(async (from, to, opts) => {
196
210
  const mod = await import('./graphstore.js');
197
211
  if (opts.semantic)
198
- await mod.diffSemanticCommand(from, to);
212
+ await mod.diffSemanticCommand(from, to, { json: opts.json });
199
213
  else
200
- await mod.diffCommand(from, to);
214
+ await mod.diffCommand(from, to, { json: opts.json });
201
215
  });
202
216
  program
203
217
  .command('merge <branch>')
@@ -0,0 +1,35 @@
1
+ /**
2
+ * profile-heap — RFC 0002 Phase 1 entry point.
3
+ *
4
+ * A thin wrapper around `analyze` that flips on the heap-profile
5
+ * instrumentation already living in `runFullAnalysis`, then prints a
6
+ * per-phase RSS / heapUsed summary table after the run finishes.
7
+ *
8
+ * Why a dedicated subcommand instead of just documenting the env var?
9
+ * - Discoverability: `codragraph --help` lists it next to `analyze`.
10
+ * - One-shot UX: users (and the maintainer) get a useful summary table
11
+ * without having to spelunk through Chrome DevTools to compare
12
+ * snapshots. The `.heapsnapshot` files are still written for deep
13
+ * dives; the summary just makes the cheap signal (RSS curve, heapUsed
14
+ * curve) visible at a glance.
15
+ * - Phase 1 of RFC 0002 is profile-first by design — we ship the tool
16
+ * before any mitigation. Don't add compression, eviction, or streaming
17
+ * refactors here; that's Phase 2+ once we know which phase is the
18
+ * actual bottleneck.
19
+ *
20
+ * Side effects: writes `.codragraph/heap-profiles/<ts>-<phase>.heapsnapshot`
21
+ * (one per phase boundary, ~100-500MB each) plus a small
22
+ * `profile-summary.jsonl` timeline. Disk usage adds up fast on large
23
+ * repos — clean up between runs if you don't need the raw snapshots.
24
+ */
25
+ import { type AnalyzeOptions } from './analyze.js';
26
+ export interface ProfileHeapOptions extends AnalyzeOptions {
27
+ /**
28
+ * Commander injects this from the `--no-summary` flag — see CLI
29
+ * registration. `--no-summary` ⇒ `summary === false`. The dual-name
30
+ * convention (positive flag name, negated value) is a commander
31
+ * footgun: a `noSummary?: boolean` field would silently never fire.
32
+ */
33
+ summary?: boolean;
34
+ }
35
+ export declare const profileHeapCommand: (inputPath?: string, options?: ProfileHeapOptions) => Promise<void>;
@@ -0,0 +1,126 @@
1
+ /**
2
+ * profile-heap — RFC 0002 Phase 1 entry point.
3
+ *
4
+ * A thin wrapper around `analyze` that flips on the heap-profile
5
+ * instrumentation already living in `runFullAnalysis`, then prints a
6
+ * per-phase RSS / heapUsed summary table after the run finishes.
7
+ *
8
+ * Why a dedicated subcommand instead of just documenting the env var?
9
+ * - Discoverability: `codragraph --help` lists it next to `analyze`.
10
+ * - One-shot UX: users (and the maintainer) get a useful summary table
11
+ * without having to spelunk through Chrome DevTools to compare
12
+ * snapshots. The `.heapsnapshot` files are still written for deep
13
+ * dives; the summary just makes the cheap signal (RSS curve, heapUsed
14
+ * curve) visible at a glance.
15
+ * - Phase 1 of RFC 0002 is profile-first by design — we ship the tool
16
+ * before any mitigation. Don't add compression, eviction, or streaming
17
+ * refactors here; that's Phase 2+ once we know which phase is the
18
+ * actual bottleneck.
19
+ *
20
+ * Side effects: writes `.codragraph/heap-profiles/<ts>-<phase>.heapsnapshot`
21
+ * (one per phase boundary, ~100-500MB each) plus a small
22
+ * `profile-summary.jsonl` timeline. Disk usage adds up fast on large
23
+ * repos — clean up between runs if you don't need the raw snapshots.
24
+ */
25
+ import path from 'path';
26
+ import * as fsSync from 'node:fs';
27
+ import { getGitRoot, hasGitDir } from '../storage/git.js';
28
+ import { analyzeCommand } from './analyze.js';
29
+ export const profileHeapCommand = async (inputPath, options) => {
30
+ // Flip on the instrumentation BEFORE delegating to analyze. The env var
31
+ // is read by `runFullAnalysis` at orchestrator entry, so it must be set
32
+ // here. Setting it on every profile-heap invocation also guarantees that
33
+ // a leftover `unset` from a prior shell session can't disable profiling
34
+ // in this run.
35
+ process.env.CODRAGRAPH_HEAP_PROFILE = '1';
36
+ // Resolve the repo path the same way `analyze` does so we can locate the
37
+ // summary file after the run. Mirroring this avoids touching analyze's
38
+ // resolution logic, which already handles --skip-git, gitRoot, etc.
39
+ let repoPath;
40
+ if (inputPath) {
41
+ repoPath = path.resolve(inputPath);
42
+ }
43
+ else {
44
+ const gitRoot = getGitRoot(process.cwd());
45
+ if (!gitRoot && !options?.skipGit) {
46
+ // Let analyze produce its standard error message + exit code rather
47
+ // than duplicating the message here.
48
+ await analyzeCommand(inputPath, options);
49
+ return;
50
+ }
51
+ repoPath = gitRoot ?? path.resolve(process.cwd());
52
+ }
53
+ if (!hasGitDir(repoPath) && !options?.skipGit) {
54
+ await analyzeCommand(inputPath, options);
55
+ return;
56
+ }
57
+ // Detect whether we're the outer (pre-re-exec) process. analyzeCommand
58
+ // calls ensureHeap() which `execFileSync`s a child with
59
+ // --max-old-space-size=8192 on first invocation; that child runs the
60
+ // instrumented codepath and prints its own summary before exiting. If
61
+ // we don't bail here, the outer process re-reads the just-written
62
+ // summary file and prints it a second time.
63
+ //
64
+ // Capture the flag BEFORE the await so a future change to NODE_OPTIONS
65
+ // mid-flight can't confuse us. (execFileSync's child env doesn't
66
+ // propagate back to process.env, but be defensive.)
67
+ const isInnerProcess = (process.env.NODE_OPTIONS || '').includes('--max-old-space-size');
68
+ await analyzeCommand(inputPath, options);
69
+ // Outer process: the inner already printed the summary on its way out.
70
+ if (!isInnerProcess)
71
+ return;
72
+ // `--no-summary` → commander sets options.summary === false.
73
+ if (options?.summary === false)
74
+ return;
75
+ const summaryPath = path.join(repoPath, '.codragraph', 'heap-profiles', 'profile-summary.jsonl');
76
+ if (!fsSync.existsSync(summaryPath)) {
77
+ // analyze re-execs itself with a larger heap on first invocation; the
78
+ // outer process never reaches the instrumented codepath. Tell the user
79
+ // where to find the artifacts in that case.
80
+ console.log(`\n Heap profile summary not found at ${summaryPath}.\n` +
81
+ ` This is expected on the first call (analyze re-execs with --max-old-space-size).\n` +
82
+ ` Re-run \`codragraph profile-heap\` and the summary will appear in the second pass.\n`);
83
+ return;
84
+ }
85
+ const lines = fsSync
86
+ .readFileSync(summaryPath, 'utf8')
87
+ .split('\n')
88
+ .filter((l) => l.trim().length > 0);
89
+ const entries = [];
90
+ for (const line of lines) {
91
+ try {
92
+ entries.push(JSON.parse(line));
93
+ }
94
+ catch {
95
+ /* skip malformed lines — best-effort */
96
+ }
97
+ }
98
+ if (entries.length === 0) {
99
+ console.log(`\n Heap profile summary at ${summaryPath} is empty.\n`);
100
+ return;
101
+ }
102
+ printSummary(entries, summaryPath);
103
+ };
104
+ function printSummary(entries, summaryPath) {
105
+ const peakRss = entries.reduce((m, e) => (e.rss > m ? e.rss : m), 0);
106
+ const peakHeapUsed = entries.reduce((m, e) => (e.heapUsed > m ? e.heapUsed : m), 0);
107
+ const startTs = entries[0].ts;
108
+ console.log('\n Heap-profile summary');
109
+ console.log(' ────────────────────');
110
+ console.log(' Phase'.padEnd(28) +
111
+ ' Δt(s)'.padEnd(10) +
112
+ ' RSS(MB)'.padEnd(12) +
113
+ ' heapUsed(MB)'.padEnd(16) +
114
+ ' Snapshot');
115
+ for (const e of entries) {
116
+ const dt = ((e.ts - startTs) / 1000).toFixed(1);
117
+ const rssMb = (e.rss / 1024 / 1024).toFixed(0);
118
+ const heapMb = (e.heapUsed / 1024 / 1024).toFixed(0);
119
+ console.log(` ${e.phase.padEnd(26)} ${dt.padStart(6)} ${rssMb.padStart(7)} ${heapMb.padStart(11)} ${e.snapshotFile}`);
120
+ }
121
+ console.log(' ────────────────────');
122
+ console.log(` peak RSS: ${(peakRss / 1024 / 1024).toFixed(0)} MB`);
123
+ console.log(` peak heapUsed: ${(peakHeapUsed / 1024 / 1024).toFixed(0)} MB`);
124
+ console.log(` raw timeline: ${summaryPath}`);
125
+ console.log(` snapshots dir: ${path.dirname(summaryPath)} (open .heapsnapshot files in Chrome DevTools → Memory → Load)\n`);
126
+ }
@@ -5,4 +5,17 @@
5
5
  * Detects installed AI editors and writes the appropriate MCP config
6
6
  * so the CodraGraph MCP server is available in all projects.
7
7
  */
8
+ interface SetupResult {
9
+ configured: string[];
10
+ skipped: string[];
11
+ errors: string[];
12
+ }
13
+ export interface RunSetupOptions {
14
+ /** Suppress the trailing "Next steps" block (used when analyze auto-runs setup). */
15
+ skipNextSteps?: boolean;
16
+ /** Suppress the "CodraGraph Setup" header (used when analyze auto-runs setup). */
17
+ compactHeader?: boolean;
18
+ }
19
+ export declare const runSetup: (options?: RunSetupOptions) => Promise<SetupResult>;
8
20
  export declare const setupCommand: () => Promise<void>;
21
+ export {};