@codragraph/cli 1.6.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +50 -16
  2. package/dist/cli/ai-context.js +2 -2
  3. package/dist/cli/analyze.d.ts +22 -0
  4. package/dist/cli/analyze.js +111 -8
  5. package/dist/cli/compress-stats.d.ts +29 -0
  6. package/dist/cli/compress-stats.js +97 -0
  7. package/dist/cli/graphstore.d.ts +6 -2
  8. package/dist/cli/graphstore.js +24 -2
  9. package/dist/cli/index.js +17 -6
  10. package/dist/cli/profile-heap.d.ts +35 -0
  11. package/dist/cli/profile-heap.js +126 -0
  12. package/dist/cli/setup.d.ts +13 -0
  13. package/dist/cli/setup.js +75 -29
  14. package/dist/cli/skill-gen.d.ts +14 -2
  15. package/dist/cli/skill-gen.js +53 -20
  16. package/dist/cli/tool.js +4 -0
  17. package/dist/config/ignore-service.js +1 -1
  18. package/dist/core/embeddings/embedding-pipeline.js +24 -7
  19. package/dist/core/group/bridge-db.js +111 -24
  20. package/dist/core/group/extractors/grpc-patterns/proto.js +1 -12
  21. package/dist/core/ingestion/call-processor.js +2 -2
  22. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1 -1
  23. package/dist/core/ingestion/cobol/jcl-parser.d.ts +1 -1
  24. package/dist/core/ingestion/cobol/jcl-parser.js +1 -1
  25. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  26. package/dist/core/ingestion/cobol-processor.js +1 -1
  27. package/dist/core/ingestion/heritage-extractors/generic.js +1 -1
  28. package/dist/core/ingestion/heritage-processor.js +1 -1
  29. package/dist/core/ingestion/import-processor.js +1 -1
  30. package/dist/core/ingestion/mro-processor.js +1 -1
  31. package/dist/core/ingestion/parsing-processor.js +1 -1
  32. package/dist/core/ingestion/type-extractors/c-cpp.js +1 -1
  33. package/dist/core/ingestion/type-extractors/python.js +1 -1
  34. package/dist/core/ingestion/type-extractors/shared.js +0 -3
  35. package/dist/core/lbug/content-read.d.ts +46 -0
  36. package/dist/core/lbug/content-read.js +64 -0
  37. package/dist/core/lbug/csv-generator.d.ts +2 -6
  38. package/dist/core/lbug/csv-generator.js +45 -12
  39. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  40. package/dist/core/lbug/lbug-adapter.js +157 -25
  41. package/dist/core/lbug/pool-adapter.js +51 -44
  42. package/dist/core/lbug/schema.d.ts +7 -7
  43. package/dist/core/lbug/schema.js +18 -0
  44. package/dist/core/run-analyze.d.ts +13 -0
  45. package/dist/core/run-analyze.js +91 -4
  46. package/dist/core/search/bm25-index.js +153 -12
  47. package/dist/core/wiki/generator.js +4 -4
  48. package/dist/mcp/local/local-backend.js +22 -5
  49. package/dist/mcp/resources.js +2 -3
  50. package/dist/server/api.js +4 -3
  51. package/dist/storage/repo-manager.d.ts +39 -0
  52. package/dist/storage/repo-manager.js +19 -0
  53. package/hooks/claude/codragraph-hook.cjs +108 -5
  54. package/hooks/claude/pre-tool-use.sh +6 -1
  55. package/package.json +4 -4
  56. package/scripts/build-tree-sitter-proto.cjs +15 -3
  57. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  58. package/skills/codragraph-api-surface.md +110 -0
  59. package/skills/codragraph-cli.md +5 -5
  60. package/skills/codragraph-config-audit.md +146 -0
  61. package/skills/codragraph-cross-repo-impact.md +135 -0
  62. package/skills/codragraph-data-lineage.md +137 -0
  63. package/skills/codragraph-dead-code.md +119 -0
  64. package/skills/codragraph-debugging.md +1 -1
  65. package/skills/codragraph-exploring.md +1 -1
  66. package/skills/codragraph-gh-actions-debug.md +162 -0
  67. package/skills/codragraph-gh-issue-workflow.md +178 -0
  68. package/skills/codragraph-gh-pr-workflow.md +176 -0
  69. package/skills/codragraph-gh-release-workflow.md +187 -0
  70. package/skills/codragraph-git-bisect.md +176 -0
  71. package/skills/codragraph-git-force-push.md +147 -0
  72. package/skills/codragraph-git-history-rewrite.md +174 -0
  73. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  74. package/skills/codragraph-git-recovery.md +181 -0
  75. package/skills/codragraph-git-worktree.md +145 -0
  76. package/skills/codragraph-guide.md +1 -1
  77. package/skills/codragraph-impact-analysis.md +1 -1
  78. package/skills/codragraph-migration-tracking.md +130 -0
  79. package/skills/codragraph-notebook-context.md +136 -0
  80. package/skills/codragraph-observability-coverage.md +125 -0
  81. package/skills/codragraph-onboarding.md +129 -0
  82. package/skills/codragraph-perf-hotspots.md +132 -0
  83. package/skills/codragraph-pr-review.md +1 -1
  84. package/skills/codragraph-project-switcher.md +116 -0
  85. package/skills/codragraph-refactoring.md +1 -1
  86. package/skills/codragraph-security-audit.md +144 -0
  87. package/skills/codragraph-sql-tracing.md +122 -0
  88. package/skills/codragraph-supply-chain-audit.md +153 -0
  89. package/skills/codragraph-test-coverage.md +97 -0
package/README.md CHANGED
@@ -18,12 +18,12 @@ AI coding tools don't understand your codebase structure. They edit a function w
18
18
 
19
19
  ```bash
20
20
  # Index your repo (run from repo root)
21
- npx codragraph analyze
21
+ npx @codragraph/cli analyze
22
22
  ```
23
23
 
24
24
  That's it. This indexes the codebase, installs agent skills, registers Claude Code hooks, and creates `AGENTS.md` / `CLAUDE.md` context files — all in one command.
25
25
 
26
- To configure MCP for your editor, run `npx codragraph setup` once — or set it up manually below.
26
+ To configure MCP for your editor, run `npx @codragraph/cli setup` once — or set it up manually below.
27
27
 
28
28
  `codragraph setup` auto-detects your editors and writes the correct global MCP config. You only need to run it once.
29
29
 
@@ -53,16 +53,16 @@ If you prefer to configure manually instead of using `codragraph setup`:
53
53
 
54
54
  ```bash
55
55
  # macOS / Linux
56
- claude mcp add codragraph -- npx -y codragraph@latest mcp
56
+ claude mcp add codragraph -- npx -y @codragraph/cli@latest mcp
57
57
 
58
58
  # Windows
59
- claude mcp add codragraph -- cmd /c npx -y codragraph@latest mcp
59
+ claude mcp add codragraph -- cmd /c npx -y @codragraph/cli@latest mcp
60
60
  ```
61
61
 
62
62
  ### Codex (full support — MCP + skills)
63
63
 
64
64
  ```bash
65
- codex mcp add codragraph -- npx -y codragraph@latest mcp
65
+ codex mcp add codragraph -- npx -y @codragraph/cli@latest mcp
66
66
  ```
67
67
 
68
68
  ### Cursor / Windsurf
@@ -74,7 +74,7 @@ Add to `~/.cursor/mcp.json` (global — works for all projects):
74
74
  "mcpServers": {
75
75
  "codragraph": {
76
76
  "command": "npx",
77
- "args": ["-y", "codragraph@latest", "mcp"]
77
+ "args": ["-y", "@codragraph/cli@latest", "mcp"]
78
78
  }
79
79
  }
80
80
  }
@@ -89,7 +89,7 @@ Add to `~/.config/opencode/config.json`:
89
89
  "mcp": {
90
90
  "codragraph": {
91
91
  "command": "npx",
92
- "args": ["-y", "codragraph@latest", "mcp"]
92
+ "args": ["-y", "@codragraph/cli@latest", "mcp"]
93
93
  }
94
94
  }
95
95
  }
@@ -155,6 +155,9 @@ codragraph analyze --embeddings # Enable embedding generation (slower, better
155
155
  codragraph analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md codragraph section edits
156
156
  codragraph analyze --verbose # Log skipped files when parsers are unavailable
157
157
  codragraph analyze --max-file-size 1024 # Skip files larger than N KB (default: 512, cap: 32768)
158
+ codragraph analyze --compress brotli # Per-row body compression. Also: zstd, none.
159
+ codragraph profile-heap [path] # Run analyze with v8 heap-snapshot instrumentation
160
+ codragraph profile-heap --no-summary # Same, but skip the post-run RSS / heapUsed table
158
161
  codragraph mcp # Start MCP server (stdio) — serves all indexed repos
159
162
  codragraph serve # Start local HTTP server (multi-repo) for web UI
160
163
  codragraph index # Register an existing .codragraph/ folder into the global registry
@@ -244,9 +247,9 @@ merges are skipped.)
244
247
 
245
248
  ```bash
246
249
  # Try the latest release candidate (pre-stable — may change at any time)
247
- npm install -g codragraph@rc
250
+ npm install -g @codragraph/cli@rc
248
251
  # — or —
249
- npx codragraph@rc analyze
252
+ npx @codragraph/cli@rc analyze
250
253
  ```
251
254
 
252
255
  Release-candidate versions follow the standard semver prerelease format
@@ -265,9 +268,9 @@ certain npm/arborist versions ([npm/cli#8126](https://github.com/npm/cli/issues/
265
268
  It is fixed in **codragraph v1.6.2+**. Upgrade to the latest version:
266
269
 
267
270
  ```bash
268
- npx codragraph@latest analyze # always uses the newest release
271
+ npx @codragraph/cli@latest analyze # always uses the newest release
269
272
  # — or —
270
- npm install -g codragraph@latest # upgrade a global install
273
+ npm install -g @codragraph/cli@latest # upgrade a global install
271
274
  ```
272
275
 
273
276
  If you still hit npm install issues after upgrading, these generic workarounds
@@ -282,7 +285,7 @@ npm cache clean --force # clear a possibly corrupt cache
282
285
 
283
286
  Some optional language grammars (Dart, Kotlin, Swift) require native compilation. If they fail, CodraGraph still works — those languages will be skipped.
284
287
 
285
- If `npm install -g codragraph` fails on native modules:
288
+ If `npm install -g @codragraph/cli` fails on native modules:
286
289
 
287
290
  ```bash
288
291
  # Ensure build tools are available (Linux/macOS)
@@ -290,7 +293,7 @@ If `npm install -g codragraph` fails on native modules:
290
293
  # macOS: xcode-select --install
291
294
 
292
295
  # Retry installation
293
- npm install -g codragraph
296
+ npm install -g @codragraph/cli
294
297
  ```
295
298
 
296
299
  ### Analysis runs out of memory
@@ -299,24 +302,55 @@ For very large repositories:
299
302
 
300
303
  ```bash
301
304
  # Increase Node.js heap size
302
- NODE_OPTIONS="--max-old-space-size=16384" npx codragraph analyze
305
+ NODE_OPTIONS="--max-old-space-size=16384" npx @codragraph/cli analyze
303
306
 
304
307
  # Exclude large directories
305
308
  echo "vendor/" >> .codragraphignore
306
309
  echo "dist/" >> .codragraphignore
307
310
  ```
308
311
 
312
+ If you want to know **which phase** is dragging the heap up before
313
+ deciding what to mitigate, run `codragraph profile-heap`. It writes a
314
+ v8 heap snapshot at every phase boundary plus a JSONL timeline of
315
+ `process.memoryUsage()` and prints a per-phase RSS / `heapUsed` table:
316
+
317
+ ```bash
318
+ codragraph profile-heap # writes .codragraph/heap-profiles/
319
+ # → load any .heapsnapshot in Chrome DevTools → Memory → Load
320
+ ```
321
+
322
+ Each snapshot is 100–500 MB, so the command is opt-in only. The JSONL
323
+ timeline is small enough to share for triage even when the snapshots
324
+ are too big.
325
+
326
+ ### Index size — opt-in per-row compression
327
+
328
+ For repos where `.codragraph/lbug` itself has grown large:
329
+
330
+ ```bash
331
+ codragraph analyze --compress brotli # Node ≥ 18, brotli quality 6
332
+ codragraph analyze --compress zstd # Node ≥ 22.15, zstd level 3
333
+ codragraph analyze --compress none # explicit default
334
+ ```
335
+
336
+ `--compress` routes every node-row content field through the matching
337
+ encoder before it's written to the CSV / lbug; readers decode
338
+ transparently via the per-row `contentEncoding` tag. With the flag
339
+ unset, the on-disk layout is byte-identical to pre-1.8 indexes. Pre-1.8
340
+ indexes auto-trigger a full re-analyze the first time a 1.8+ CLI runs
341
+ against them (one-time cost, surfaced in the analyze log).
342
+
309
343
  ### Large files are being skipped
310
344
 
311
345
  By default the walker skips files larger than **512 KB** (see log line `Skipped N large files (>512KB)`). Raise the threshold via either the CLI flag or the environment variable — both accept a value in **KB**:
312
346
 
313
347
  ```bash
314
348
  # CLI flag (takes precedence over the env var)
315
- npx codragraph analyze --max-file-size 2048 # skip only files > 2 MB
349
+ npx @codragraph/cli analyze --max-file-size 2048 # skip only files > 2 MB
316
350
 
317
351
  # Environment variable (persists across commands)
318
352
  export CODRAGRAPH_MAX_FILE_SIZE=2048
319
- npx codragraph analyze
353
+ npx @codragraph/cli analyze
320
354
  ```
321
355
 
322
356
  Values above **32768 KB (32 MB)** are clamped to the tree-sitter parser ceiling; invalid values fall back to the 512 KB default with a one-time warning. When an override is active, `analyze` prints the effective threshold in its startup banner (e.g. `CODRAGRAPH_MAX_FILE_SIZE: effective threshold 2048KB (default 512KB)`).
@@ -86,7 +86,7 @@ function generateCodraGraphContent(projectName, stats, generatedSkills, groupNam
86
86
 
87
87
  This project is indexed by CodraGraph as **${projectName}**${noStats ? '' : ` (${stats.nodes || 0} symbols, ${stats.edges || 0} relationships, ${stats.processes || 0} execution flows)`}. Use the CodraGraph MCP tools to understand code, assess impact, and navigate safely.
88
88
 
89
- > If any CodraGraph tool warns the index is stale, run \`npx codragraph analyze\` in terminal first.
89
+ > If any CodraGraph tool warns the index is stale, run \`npx @codragraph/cli analyze\` in terminal first.
90
90
 
91
91
  ## Always Do
92
92
 
@@ -115,7 +115,7 @@ This project is indexed by CodraGraph as **${projectName}**${noStats ? '' : ` ($
115
115
  ${groupNames && groupNames.length > 0
116
116
  ? `## Cross-Repo Groups
117
117
 
118
- This repository is listed under CodraGraph **group(s): ${groupNames.join(', ')}** (see \`~/.codragraph/groups/\`). For cross-repo analysis, use MCP tools \`impact\`, \`query\`, and \`context\` with \`repo\` set to \`@<groupName>\` or \`@<groupName>/<memberPath>\` (paths match keys in that group’s \`group.yaml\`). Use \`group_list\` / \`group_sync\` for membership and sync. From the terminal: \`npx codragraph group list\`, \`npx codragraph group sync <name>\`, \`npx codragraph group impact <name> --target <symbol> --repo <group-path>\`.
118
+ This repository is listed under CodraGraph **group(s): ${groupNames.join(', ')}** (see \`~/.codragraph/groups/\`). For cross-repo analysis, use MCP tools \`impact\`, \`query\`, and \`context\` with \`repo\` set to \`@<groupName>\` or \`@<groupName>/<memberPath>\` (paths match keys in that group’s \`group.yaml\`). Use \`group_list\` / \`group_sync\` for membership and sync. From the terminal: \`npx @codragraph/cli group list\`, \`npx @codragraph/cli group sync <name>\`, \`npx @codragraph/cli group impact <name> --target <symbol> --repo <group-path>\`.
119
119
 
120
120
  `
121
121
  : ''}## CLI
@@ -39,5 +39,27 @@ export interface AnalyzeOptions {
39
39
  * `CODRAGRAPH_MAX_FILE_SIZE` for the rest of the pipeline.
40
40
  */
41
41
  maxFileSize?: string;
42
+ /**
43
+ * First-run auto-setup gate. Default `true` (commander injects this from the
44
+ * `--no-setup` flag — see CLI registration). When `true`, `analyze` detects a
45
+ * missing `~/.codragraph/registry.json` and runs editor setup before indexing,
46
+ * making `npx @codragraph/cli analyze` a true zero-install entry. Pass
47
+ * `--no-setup` to opt out (CI, headless servers, automated pipelines).
48
+ */
49
+ setup?: boolean;
50
+ /**
51
+ * Comma-separated list of editor targets for `--skills` output. Valid values
52
+ * are `claude`, `cursor`, `opencode`, `codex`. Default: `claude` (matches
53
+ * pre-flag behavior). Unknown values are reported and ignored.
54
+ */
55
+ skillTargets?: string;
56
+ /**
57
+ * RFC 0001 Phase 2 — opt-in per-row content compression. Accepts
58
+ * `'none'` (default), `'brotli'` (Node ≥ 18), or `'zstd'` (Node ≥
59
+ * 22.15). Compressed indexes are still queryable via the standard
60
+ * read path; decode happens at every external-consumer boundary
61
+ * (MCP, HTTP API, embeddings, CLI tools).
62
+ */
63
+ compress?: 'none' | 'brotli' | 'zstd';
42
64
  }
43
65
  export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;
@@ -11,6 +11,7 @@ import path from 'path';
11
11
  import { execFileSync } from 'child_process';
12
12
  import v8 from 'v8';
13
13
  import cliProgress from 'cli-progress';
14
+ import * as fsSync from 'node:fs';
14
15
  import { closeLbug } from '../core/lbug/lbug-adapter.js';
15
16
  import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
16
17
  import { getGitRoot, hasGitDir } from '../storage/git.js';
@@ -52,9 +53,77 @@ export const analyzeCommand = async (inputPath, options) => {
52
53
  if (options?.verbose) {
53
54
  process.env.CODRAGRAPH_VERBOSE = '1';
54
55
  }
56
+ // RFC 0001 Phase 2 — validate --compress before doing any work. Catching
57
+ // a typo or an unsupported encoding here is much friendlier than failing
58
+ // mid-analyze with an opaque CSV-write error. Node-version gating for
59
+ // zstd lives in @codragraph/graphstore via isEncodingSupported, but we
60
+ // import the check here so the CLI can offer the brotli fallback hint.
61
+ if (options?.compress && options.compress !== 'none') {
62
+ if (options.compress !== 'brotli' && options.compress !== 'zstd') {
63
+ console.error(` --compress must be one of: none, brotli, zstd (got: ${options.compress})`);
64
+ process.exitCode = 2;
65
+ return;
66
+ }
67
+ if (options.compress === 'zstd') {
68
+ const { isEncodingSupported } = await import('@codragraph/graphstore');
69
+ if (!isEncodingSupported('zstd')) {
70
+ console.error(' --compress zstd requires Node ≥ 22.15.0 (native node:zlib zstd).\n' +
71
+ ` Detected Node ${process.version}. Use --compress brotli instead, or upgrade Node.`);
72
+ process.exitCode = 2;
73
+ return;
74
+ }
75
+ }
76
+ // RFC 0001 Phase 2.5 — BM25 / FTS now drops `content` from its
77
+ // property list when meta.compress is non-'none' (see
78
+ // `core/search/bm25-index.ts`), so search inside compressed bodies
79
+ // gracefully falls back to name-only matches instead of tokenising
80
+ // base64 garbage. Surface the trade-off so users know what they're
81
+ // opting into.
82
+ console.warn(` Note: --compress ${options.compress} reduces .codragraph/lbug size.\n` +
83
+ ` BM25 search will index symbol names only (function bodies are not tokenised\n` +
84
+ ` when compressed); embeddings, graph queries, and \`context\` / \`impact\` are\n` +
85
+ ` unaffected. Run with --compress none if you rely on full-text search inside\n` +
86
+ ` source bodies.`);
87
+ }
55
88
  if (options?.maxFileSize) {
56
89
  process.env.CODRAGRAPH_MAX_FILE_SIZE = options.maxFileSize;
57
90
  }
91
+ // ── Auto-reindex coalesce-file cleanup ─────────────────────────────
92
+ // When the Claude Code PostToolUse hook spawns us in background mode, it
93
+ // passes the coalesce file path through this env var. We delete it on every
94
+ // exit path so the next commit immediately triggers a new reindex (rather
95
+ // than being blocked by a 10-min mtime TTL). The hook's TTL is just a
96
+ // crash safety net — this is the happy path.
97
+ const reindexLockPath = process.env.CODRAGRAPH_REINDEX_LOCK_PATH || '';
98
+ if (reindexLockPath) {
99
+ process.on('exit', () => {
100
+ try {
101
+ fsSync.unlinkSync(reindexLockPath);
102
+ }
103
+ catch {
104
+ /* already gone or unreadable — fine */
105
+ }
106
+ });
107
+ }
108
+ // ── First-run auto-setup ───────────────────────────────────────────
109
+ // Makes `npx @codragraph/cli analyze` a true one-command entry. We detect
110
+ // first-run by the absence of the global registry — analyze writes to it on
111
+ // every successful index, so it's a reliable "this user has never run us
112
+ // before" signal. Opt out with `--no-setup` for CI / headless contexts;
113
+ // commander maps `--no-setup` to `options.setup === false`.
114
+ if (options?.setup !== false) {
115
+ let registryExists = true;
116
+ try {
117
+ await fs.access(getGlobalRegistryPath());
118
+ }
119
+ catch {
120
+ registryExists = false;
121
+ }
122
+ if (!registryExists) {
123
+ const { runSetup } = await import('./setup.js');
124
+ await runSetup({ skipNextSteps: true, compactHeader: true });
125
+ }
126
+ }
58
127
  console.log('\n CodraGraph Analyzer\n');
59
128
  let repoPath;
60
129
  if (inputPath) {
@@ -168,6 +237,9 @@ export const analyzeCommand = async (inputPath, options) => {
168
237
  // be able to accept the duplicate name without also paying the
169
238
  // cost of a full pipeline re-index. See #829 review round 2.
170
239
  allowDuplicateName: options?.allowDuplicateName,
240
+ // RFC 0001 Phase 2 — pass through the per-row encoding choice.
241
+ // Default 'none' / undefined keeps the pre-Phase-2 wire layout.
242
+ compress: options?.compress,
171
243
  }, {
172
244
  onProgress: (_phase, percent, message) => {
173
245
  updateBar(percent, message);
@@ -190,9 +262,23 @@ export const analyzeCommand = async (inputPath, options) => {
190
262
  if (options?.skills && result.pipelineResult) {
191
263
  updateBar(99, 'Generating skill files...');
192
264
  try {
193
- const { generateSkillFiles } = await import('./skill-gen.js');
265
+ const { generateSkillFiles, SKILL_TARGETS } = await import('./skill-gen.js');
194
266
  const { generateAIContextFiles } = await import('./ai-context.js');
195
- const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult);
267
+ // Parse --skill-targets CSV; default to ['claude'] when omitted.
268
+ // Unknown tokens are reported once and dropped — we don't fail the
269
+ // whole analyze for a typo here, but we do want the user to see it.
270
+ const requestedTargets = (options?.skillTargets || 'claude')
271
+ .split(',')
272
+ .map((s) => s.trim().toLowerCase())
273
+ .filter(Boolean);
274
+ const validTargets = requestedTargets.filter((t) => SKILL_TARGETS.includes(t));
275
+ const invalidTargets = requestedTargets.filter((t) => !SKILL_TARGETS.includes(t));
276
+ if (invalidTargets.length > 0) {
277
+ barLog(` Skills: unknown target(s) ignored: ${invalidTargets.join(', ')} ` +
278
+ `(valid: ${SKILL_TARGETS.join(', ')})`);
279
+ }
280
+ const targetsToUse = validTargets.length > 0 ? validTargets : ['claude'];
281
+ const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult, targetsToUse);
196
282
  if (skillResult.skills.length > 0) {
197
283
  barLog(` Generated ${skillResult.skills.length} skill files`);
198
284
  // Re-generate AI context files now that we have skill info
@@ -235,11 +321,28 @@ export const analyzeCommand = async (inputPath, options) => {
235
321
  console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
236
322
  console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
237
323
  console.log(` ${repoPath}`);
324
+ // Surface @codragraph/compress's value prop with concrete numbers: how
325
+ // many tokens of distilled context did we generate. Best-effort — never
326
+ // fail the analyze for a stat read.
238
327
  try {
239
- await fs.access(getGlobalRegistryPath());
328
+ const { estimateTokens } = await import('./compress-stats.js');
329
+ const candidates = ['AGENTS.md', 'CLAUDE.md'];
330
+ const sizes = [];
331
+ for (const file of candidates) {
332
+ try {
333
+ const content = await fs.readFile(path.join(repoPath, file), 'utf-8');
334
+ sizes.push(`${file} ~${estimateTokens(content).toLocaleString()} tokens`);
335
+ }
336
+ catch {
337
+ /* file not generated for this run — skip */
338
+ }
339
+ }
340
+ if (sizes.length > 0) {
341
+ console.log(` @codragraph/compress: ${sizes.join(' | ')}`);
342
+ }
240
343
  }
241
344
  catch {
242
- console.log('\n Tip: Run `codragraph setup` to configure MCP for your editor.');
345
+ /* compress-stats import failed non-fatal */
243
346
  }
244
347
  console.log('');
245
348
  }
@@ -289,8 +392,8 @@ export const analyzeCommand = async (inputPath, options) => {
289
392
  console.error(' Suggestions:');
290
393
  console.error(' 1. Clear the npm cache: npm cache clean --force');
291
394
  console.error(' 2. Update npm: npm install -g npm@latest');
292
- console.error(' 3. Reinstall codragraph: npm install -g codragraph@latest');
293
- console.error(' 4. Or try npx directly: npx codragraph@latest analyze');
395
+ console.error(' 3. Reinstall codragraph: npm install -g @codragraph/cli@latest');
396
+ console.error(' 4. Or try npx directly: npx @codragraph/cli@latest analyze');
294
397
  console.error('');
295
398
  }
296
399
  else if (msg.includes('MODULE_NOT_FOUND') ||
@@ -298,8 +401,8 @@ export const analyzeCommand = async (inputPath, options) => {
298
401
  msg.includes('ERR_MODULE_NOT_FOUND')) {
299
402
  console.error(' A required module could not be loaded. The installation may be corrupt.');
300
403
  console.error(' Suggestions:');
301
- console.error(' 1. Reinstall: npm install -g codragraph@latest');
302
- console.error(' 2. Clear cache: npm cache clean --force && npx codragraph@latest analyze');
404
+ console.error(' 1. Reinstall: npm install -g @codragraph/cli@latest');
405
+ console.error(' 2. Clear cache: npm cache clean --force && npx @codragraph/cli@latest analyze');
303
406
  console.error('');
304
407
  }
305
408
  process.exitCode = 1;
@@ -0,0 +1,29 @@
1
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
2
+ export declare function estimateTokens(text: string): number;
3
+ /**
4
+ * Walk a result object and collect every file path we can find. Looks for
5
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
6
+ * the raw-grep baseline (sum of source bytes the agent would have read
7
+ * without CodraGraph).
8
+ */
9
+ export declare function collectFilePaths(obj: unknown, paths?: Set<string>): Set<string>;
10
+ /**
11
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
12
+ * the referenced files. Returns null if any file is missing or unreadable —
13
+ * in that case we silently skip the comparison rather than show a misleading
14
+ * number.
15
+ */
16
+ export declare function estimateRawGrepTokens(filePaths: Iterable<string>): number | null;
17
+ /**
18
+ * Format a one-line token-savings summary suitable for stderr display.
19
+ * If a raw baseline is provided AND it's larger than the structured response,
20
+ * the line includes the savings percentage. Otherwise it only reports
21
+ * the structured token count.
22
+ */
23
+ export declare function formatTokenLine(structuredTokens: number, rawTokens?: number | null): string;
24
+ /**
25
+ * Compute and print the token-savings line for a tool result. Best-effort:
26
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
27
+ * stdout to jq stay clean.
28
+ */
29
+ export declare function emitTokenStats(result: unknown): void;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Token-savings reporter for CLI output.
3
+ *
4
+ * Surfaces the @codragraph/compress value proposition on every `query`,
5
+ * `context`, `impact`, and `analyze` invocation: how many tokens of
6
+ * structured context did we return vs the equivalent raw-grep response.
7
+ *
8
+ * Uses the same chars/4 heuristic as @codragraph/compress's `estimateTokens`
9
+ * for cross-package consistency. Inlined rather than imported because pulling
10
+ * in @codragraph/compress as a runtime dep also pulls @codragraph/harness as a
11
+ * transitive — too heavy for what is logically a one-line approximation. When
12
+ * we add real LLM compression (`--compress` opt-in), the package import will
13
+ * follow.
14
+ */
15
+ import * as fsSync from 'node:fs';
16
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
17
+ export function estimateTokens(text) {
18
+ return Math.max(0, Math.floor(text.trim().length / 4));
19
+ }
20
+ /**
21
+ * Walk a result object and collect every file path we can find. Looks for
22
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
23
+ * the raw-grep baseline (sum of source bytes the agent would have read
24
+ * without CodraGraph).
25
+ */
26
+ export function collectFilePaths(obj, paths = new Set()) {
27
+ if (!obj || typeof obj !== 'object')
28
+ return paths;
29
+ if (Array.isArray(obj)) {
30
+ for (const item of obj)
31
+ collectFilePaths(item, paths);
32
+ return paths;
33
+ }
34
+ for (const [key, value] of Object.entries(obj)) {
35
+ if ((key === 'filePath' || key === 'file_path' || key === 'file') &&
36
+ typeof value === 'string' &&
37
+ value.length > 0) {
38
+ paths.add(value);
39
+ }
40
+ else if (typeof value === 'object') {
41
+ collectFilePaths(value, paths);
42
+ }
43
+ }
44
+ return paths;
45
+ }
46
+ /**
47
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
48
+ * the referenced files. Returns null if any file is missing or unreadable —
49
+ * in that case we silently skip the comparison rather than show a misleading
50
+ * number.
51
+ */
52
+ export function estimateRawGrepTokens(filePaths) {
53
+ let totalChars = 0;
54
+ for (const fp of filePaths) {
55
+ try {
56
+ const stat = fsSync.statSync(fp);
57
+ if (!stat.isFile())
58
+ return null;
59
+ totalChars += stat.size;
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ return Math.floor(totalChars / 4);
66
+ }
67
+ /**
68
+ * Format a one-line token-savings summary suitable for stderr display.
69
+ * If a raw baseline is provided AND it's larger than the structured response,
70
+ * the line includes the savings percentage. Otherwise it only reports
71
+ * the structured token count.
72
+ */
73
+ export function formatTokenLine(structuredTokens, rawTokens) {
74
+ if (rawTokens && rawTokens > structuredTokens) {
75
+ const savings = Math.round((1 - structuredTokens / rawTokens) * 100);
76
+ return (` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context ` +
77
+ `(vs ~${rawTokens.toLocaleString()} tokens of raw source — ${savings}% smaller).`);
78
+ }
79
+ return ` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context.`;
80
+ }
81
+ /**
82
+ * Compute and print the token-savings line for a tool result. Best-effort:
83
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
84
+ * stdout to jq stay clean.
85
+ */
86
+ export function emitTokenStats(result) {
87
+ try {
88
+ const structured = typeof result === 'string' ? result : JSON.stringify(result);
89
+ const sTokens = estimateTokens(structured);
90
+ const files = collectFilePaths(result);
91
+ const rawTokens = files.size > 0 ? estimateRawGrepTokens(files) : null;
92
+ process.stderr.write('\n' + formatTokenLine(sTokens, rawTokens) + '\n');
93
+ }
94
+ catch {
95
+ /* never let stats break the actual output */
96
+ }
97
+ }
@@ -12,7 +12,9 @@ export declare const logCommand: (opts?: {
12
12
  limit?: string;
13
13
  }) => Promise<void>;
14
14
  export declare const branchListCommand: () => Promise<void>;
15
- export declare const diffCommand: (from: string, to: string) => Promise<void>;
15
+ export declare const diffCommand: (from: string, to: string, opts?: {
16
+ json?: boolean;
17
+ }) => Promise<void>;
16
18
  export declare const commitCommand: (opts?: {
17
19
  message?: string;
18
20
  }) => Promise<void>;
@@ -36,5 +38,7 @@ export declare const mergeCommand: (target: string, opts?: {
36
38
  export declare const gcCommand: (opts?: {
37
39
  dryRun?: boolean;
38
40
  }) => Promise<void>;
39
- export declare const diffSemanticCommand: (from: string, to: string) => Promise<void>;
41
+ export declare const diffSemanticCommand: (from: string, to: string, opts?: {
42
+ json?: boolean;
43
+ }) => Promise<void>;
40
44
  export { DEFAULT_BRANCH };
@@ -84,7 +84,7 @@ export const branchListCommand = async () => {
84
84
  // ──────────────────────────────────────────────────────────────────────
85
85
  // codragraph diff <from> <to>
86
86
  // ──────────────────────────────────────────────────────────────────────
87
- export const diffCommand = async (from, to) => {
87
+ export const diffCommand = async (from, to, opts = {}) => {
88
88
  const ctx = await resolveGraphstore(process.cwd());
89
89
  const fromCommitId = await resolveCommitTarget(ctx, from);
90
90
  const toCommitId = await resolveCommitTarget(ctx, to);
@@ -95,6 +95,17 @@ export const diffCommand = async (from, to) => {
95
95
  from: fromCommit.snapshot,
96
96
  to: toCommit.snapshot,
97
97
  });
98
+ // --json: emit a machine-readable payload for downstream consumers
99
+ // (GitHub Action comment formatter, IDE plugins, etc). Keep human and
100
+ // JSON paths separate — never sneak JSON into the human path's stdout.
101
+ if (opts.json) {
102
+ process.stdout.write(JSON.stringify({
103
+ from: { commit: fromCommitId, message: fromCommit.message },
104
+ to: { commit: toCommitId, message: toCommit.message },
105
+ diff,
106
+ }, null, 2) + '\n');
107
+ return;
108
+ }
98
109
  process.stdout.write(`From: ${fromCommitId.slice(7, 7 + 12)} ${fromCommit.message}\n`);
99
110
  process.stdout.write(`To: ${toCommitId.slice(7, 7 + 12)} ${toCommit.message}\n\n`);
100
111
  let totalAdded = 0;
@@ -581,7 +592,7 @@ const formatBytes = (n) => {
581
592
  // classified modifications, added/removed APIs, and process changes. We
582
593
  // expose it as a separate module-local helper so the CLI handler can
583
594
  // dispatch on the flag.
584
- export const diffSemanticCommand = async (from, to) => {
595
+ export const diffSemanticCommand = async (from, to, opts = {}) => {
585
596
  const ctx = await resolveGraphstore(process.cwd());
586
597
  const fromCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, from));
587
598
  const toCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, to));
@@ -590,6 +601,17 @@ export const diffSemanticCommand = async (from, to) => {
590
601
  from: fromCommit.snapshot,
591
602
  to: toCommit.snapshot,
592
603
  });
604
+ // --json: same shape as diff (plain) but with the semantic payload. The
605
+ // PR-review GitHub Action consumes this directly to render the Markdown
606
+ // comment without parsing free-form text.
607
+ if (opts.json) {
608
+ process.stdout.write(JSON.stringify({
609
+ from: { ref: from, message: fromCommit.message },
610
+ to: { ref: to, message: toCommit.message },
611
+ semantic: d,
612
+ }, null, 2) + '\n');
613
+ return;
614
+ }
593
615
  process.stdout.write(`From: ${from} (${fromCommit.message})\n`);
594
616
  process.stdout.write(`To: ${to} (${toCommit.message})\n\n`);
595
617
  if (d.addedAPIs.length > 0) {
package/dist/cli/index.js CHANGED
@@ -8,10 +8,7 @@ import { registerGroupCommands } from './group.js';
8
8
  const _require = createRequire(import.meta.url);
9
9
  const pkg = _require('../../package.json');
10
10
  const program = new Command();
11
- program
12
- .name('codragraph')
13
- .description('CodraGraph local CLI and MCP server')
14
- .version(pkg.version);
11
+ program.name('codragraph').description('CodraGraph local CLI and MCP server').version(pkg.version);
15
12
  program
16
13
  .command('setup')
17
14
  .description('One-time setup: configure MCP for Cursor, Claude Code, OpenCode, Codex')
@@ -22,6 +19,7 @@ program
22
19
  .option('-f, --force', 'Force full re-index even if up to date')
23
20
  .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
24
21
  .option('--skills', 'Generate repo-specific skill files from detected communities')
22
+ .option('--skill-targets <list>', 'CSV of editor targets for --skills (claude, cursor, opencode, codex). Default: claude.')
25
23
  .option('--skip-agents-md', 'Skip updating the codragraph section in AGENTS.md and CLAUDE.md')
26
24
  .option('--no-stats', 'Omit volatile file/symbol counts from AGENTS.md and CLAUDE.md')
27
25
  .option('--skip-git', 'Index a folder without requiring a .git directory')
@@ -31,12 +29,24 @@ program
31
29
  'Leaves `-r <name>` ambiguous for the two paths; use -r <path> to disambiguate.')
32
30
  .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
33
31
  .option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
32
+ .option('--no-setup', 'Skip the first-run editor setup (auto-runs once when ~/.codragraph/registry.json is missing)')
33
+ .option('--compress <encoding>', 'Compress per-row content (RFC 0001 Phase 2). One of: none (default), brotli, zstd. zstd requires Node ≥ 22.15.', 'none')
34
34
  .addHelpText('after', '\nEnvironment variables:\n' +
35
35
  ' CODRAGRAPH_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .codragraphignore)\n' +
36
36
  ' CODRAGRAPH_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.\n' +
37
37
  '\nTip: `.codragraphignore` supports `.gitignore`-style negation. Add e.g.\n' +
38
38
  ' `!__tests__/` to index a directory that is auto-filtered by default (#771).')
39
39
  .action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));
40
+ program
41
+ .command('profile-heap [path]')
42
+ .description('Run analyze with heap-profile instrumentation (RFC 0002 Phase 1). ' +
43
+ 'Writes per-phase v8 heap snapshots + a JSONL RSS timeline under ' +
44
+ '.codragraph/heap-profiles/, then prints a summary table.')
45
+ .option('-f, --force', 'Force full re-index (analyze flag, passed through)')
46
+ .option('--skip-git', 'Index a folder without requiring a .git directory')
47
+ .option('--no-setup', 'Skip first-run editor setup')
48
+ .option('--no-summary', 'Skip the post-run summary table (raw artifacts only)')
49
+ .action(createLazyAction(() => import('./profile-heap.js'), 'profileHeapCommand'));
40
50
  program
41
51
  .command('index [path...]')
42
52
  .description('Register an existing .codragraph/ folder into the global registry (no re-analysis needed)')
@@ -195,12 +205,13 @@ program
195
205
  .command('diff <from> <to>')
196
206
  .description('Structural diff between two graph commits or branches')
197
207
  .option('--semantic', 'Use the semantic differ (added APIs, classified modifications, processes)')
208
+ .option('--json', 'Emit machine-readable JSON instead of human-readable text (for CI / GitHub Action consumers)')
198
209
  .action(async (from, to, opts) => {
199
210
  const mod = await import('./graphstore.js');
200
211
  if (opts.semantic)
201
- await mod.diffSemanticCommand(from, to);
212
+ await mod.diffSemanticCommand(from, to, { json: opts.json });
202
213
  else
203
- await mod.diffCommand(from, to);
214
+ await mod.diffCommand(from, to, { json: opts.json });
204
215
  });
205
216
  program
206
217
  .command('merge <branch>')