@codragraph/cli 1.6.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +34 -0
  2. package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
  4. package/dist/_shared/cgdb/schema-constants.js +67 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -0
  6. package/dist/_shared/index.d.ts +2 -2
  7. package/dist/_shared/index.js +1 -1
  8. package/dist/cli/analyze.d.ts +22 -0
  9. package/dist/cli/analyze.js +109 -6
  10. package/dist/cli/compress-stats.d.ts +29 -0
  11. package/dist/cli/compress-stats.js +97 -0
  12. package/dist/cli/graphstore.d.ts +6 -2
  13. package/dist/cli/graphstore.js +45 -23
  14. package/dist/cli/index-repo.js +3 -3
  15. package/dist/cli/index.js +16 -2
  16. package/dist/cli/profile-heap.d.ts +35 -0
  17. package/dist/cli/profile-heap.js +126 -0
  18. package/dist/cli/setup.d.ts +13 -0
  19. package/dist/cli/setup.js +22 -11
  20. package/dist/cli/skill-gen.d.ts +14 -2
  21. package/dist/cli/skill-gen.js +52 -19
  22. package/dist/cli/tool.js +4 -0
  23. package/dist/cli/wiki.js +3 -3
  24. package/dist/core/augmentation/engine.js +7 -7
  25. package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
  26. package/dist/core/cgdb/cgdb-adapter.js +1320 -0
  27. package/dist/core/cgdb/content-read.d.ts +46 -0
  28. package/dist/core/cgdb/content-read.js +64 -0
  29. package/dist/core/cgdb/csv-generator.d.ts +29 -0
  30. package/dist/core/cgdb/csv-generator.js +492 -0
  31. package/dist/core/cgdb/pool-adapter.d.ts +93 -0
  32. package/dist/core/cgdb/pool-adapter.js +550 -0
  33. package/dist/core/cgdb/schema.d.ts +62 -0
  34. package/dist/core/cgdb/schema.js +502 -0
  35. package/dist/core/embeddings/embedding-pipeline.js +27 -10
  36. package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
  37. package/dist/core/graphstore/cgdb-row-source.js +141 -0
  38. package/dist/core/graphstore/index.d.ts +1 -1
  39. package/dist/core/graphstore/index.js +3 -3
  40. package/dist/core/group/bridge-db.d.ts +2 -2
  41. package/dist/core/group/bridge-db.js +123 -36
  42. package/dist/core/group/bridge-schema.d.ts +4 -4
  43. package/dist/core/group/bridge-schema.js +4 -4
  44. package/dist/core/group/cross-impact.js +3 -3
  45. package/dist/core/group/sync.js +4 -4
  46. package/dist/core/lbug/content-read.d.ts +46 -0
  47. package/dist/core/lbug/content-read.js +64 -0
  48. package/dist/core/lbug/csv-generator.d.ts +2 -6
  49. package/dist/core/lbug/csv-generator.js +45 -12
  50. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  51. package/dist/core/lbug/lbug-adapter.js +153 -21
  52. package/dist/core/lbug/schema.d.ts +7 -7
  53. package/dist/core/lbug/schema.js +18 -0
  54. package/dist/core/run-analyze.d.ts +13 -0
  55. package/dist/core/run-analyze.js +114 -27
  56. package/dist/core/search/bm25-index.d.ts +3 -3
  57. package/dist/core/search/bm25-index.js +75 -23
  58. package/dist/core/search/hybrid-search.js +2 -2
  59. package/dist/core/wiki/generator.d.ts +2 -2
  60. package/dist/core/wiki/generator.js +4 -4
  61. package/dist/core/wiki/graph-queries.d.ts +2 -2
  62. package/dist/core/wiki/graph-queries.js +5 -5
  63. package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
  64. package/dist/mcp/core/cgdb-adapter.js +5 -0
  65. package/dist/mcp/core/embedder.js +1 -1
  66. package/dist/mcp/local/local-backend.d.ts +2 -2
  67. package/dist/mcp/local/local-backend.js +36 -19
  68. package/dist/mcp/server.js +3 -3
  69. package/dist/mcp/tools.js +1 -1
  70. package/dist/server/analyze-worker.js +2 -2
  71. package/dist/server/api.js +34 -33
  72. package/dist/storage/repo-manager.d.ts +42 -3
  73. package/dist/storage/repo-manager.js +23 -4
  74. package/hooks/claude/codragraph-hook.cjs +98 -5
  75. package/package.json +4 -4
  76. package/scripts/build-tree-sitter-proto.cjs +15 -3
  77. package/scripts/build.js +8 -9
  78. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  79. package/skills/codragraph-api-surface.md +110 -0
  80. package/skills/codragraph-config-audit.md +146 -0
  81. package/skills/codragraph-cross-repo-impact.md +135 -0
  82. package/skills/codragraph-data-lineage.md +137 -0
  83. package/skills/codragraph-dead-code.md +119 -0
  84. package/skills/codragraph-gh-actions-debug.md +162 -0
  85. package/skills/codragraph-gh-issue-workflow.md +178 -0
  86. package/skills/codragraph-gh-pr-workflow.md +176 -0
  87. package/skills/codragraph-gh-release-workflow.md +187 -0
  88. package/skills/codragraph-git-bisect.md +176 -0
  89. package/skills/codragraph-git-force-push.md +147 -0
  90. package/skills/codragraph-git-history-rewrite.md +174 -0
  91. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  92. package/skills/codragraph-git-recovery.md +181 -0
  93. package/skills/codragraph-git-worktree.md +145 -0
  94. package/skills/codragraph-migration-tracking.md +130 -0
  95. package/skills/codragraph-notebook-context.md +136 -0
  96. package/skills/codragraph-observability-coverage.md +125 -0
  97. package/skills/codragraph-onboarding.md +129 -0
  98. package/skills/codragraph-perf-hotspots.md +132 -0
  99. package/skills/codragraph-project-switcher.md +116 -0
  100. package/skills/codragraph-security-audit.md +144 -0
  101. package/skills/codragraph-sql-tracing.md +122 -0
  102. package/skills/codragraph-supply-chain-audit.md +153 -0
  103. package/skills/codragraph-test-coverage.md +97 -0
  104. package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
  105. package/vendor/tree-sitter-proto/src/node-types.json +1 -1
package/README.md CHANGED
@@ -155,6 +155,9 @@ codragraph analyze --embeddings # Enable embedding generation (slower, better
155
155
  codragraph analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md codragraph section edits
156
156
  codragraph analyze --verbose # Log skipped files when parsers are unavailable
157
157
  codragraph analyze --max-file-size 1024 # Skip files larger than N KB (default: 512, cap: 32768)
158
+ codragraph analyze --compress brotli # Per-row body compression. Also: zstd, none.
159
+ codragraph profile-heap [path] # Run analyze with v8 heap-snapshot instrumentation
160
+ codragraph profile-heap --no-summary # Same, but skip the post-run RSS / heapUsed table
158
161
  codragraph mcp # Start MCP server (stdio) — serves all indexed repos
159
162
  codragraph serve # Start local HTTP server (multi-repo) for web UI
160
163
  codragraph index # Register an existing .codragraph/ folder into the global registry
@@ -306,6 +309,37 @@ echo "vendor/" >> .codragraphignore
306
309
  echo "dist/" >> .codragraphignore
307
310
  ```
308
311
 
312
+ If you want to know **which phase** is dragging the heap up before
313
+ deciding what to mitigate, run `codragraph profile-heap`. It writes a
314
+ v8 heap snapshot at every phase boundary plus a JSONL timeline of
315
+ `process.memoryUsage()` and prints a per-phase RSS / `heapUsed` table:
316
+
317
+ ```bash
318
+ codragraph profile-heap # writes .codragraph/heap-profiles/
319
+ # → load any .heapsnapshot in Chrome DevTools → Memory → Load
320
+ ```
321
+
322
+ Each snapshot is 100–500 MB, so the command is opt-in only. The JSONL
323
+ timeline is small enough to share for triage even when the snapshots
324
+ are too big.
325
+
326
+ ### Index size — opt-in per-row compression
327
+
328
+ For repos where `.codragraph/cgdb` itself has grown large:
329
+
330
+ ```bash
331
+ codragraph analyze --compress brotli # Node ≥ 18, brotli quality 6
332
+ codragraph analyze --compress zstd # Node ≥ 22.15, zstd level 3
333
+ codragraph analyze --compress none # explicit default
334
+ ```
335
+
336
+ `--compress` routes every node-row content field through the matching
337
+ encoder before it's written to the CSV / cgdb; readers decode
338
+ transparently via the per-row `contentEncoding` tag. With the flag
339
+ unset, the on-disk layout is byte-identical to pre-1.8 indexes. Pre-1.8
340
+ indexes auto-trigger a full re-analyze the first time a 1.8+ CLI runs
341
+ against them (one-time cost, surfaced in the analyze log).
342
+
309
343
  ### Large files are being skipped
310
344
 
311
345
  By default the walker skips files larger than **512 KB** (see log line `Skipped N large files (>512KB)`). Raise the threshold via either the CLI flag or the environment variable — both accept a value in **KB**:
@@ -0,0 +1,16 @@
1
+ /**
2
+ * LadybugDB schema constants — single source of truth.
3
+ *
4
+ * NODE_TABLES and REL_TYPES define what the knowledge graph can contain.
5
+ * Both CLI and web must agree on these for data compatibility.
6
+ *
7
+ * Full DDL schemas remain in each package's own schema.ts because
8
+ * the CLI uses native LadybugDB and the web uses WASM.
9
+ */
10
+ export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Variable", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
11
+ export type NodeTableName = (typeof NODE_TABLES)[number];
12
+ export declare const REL_TABLE_NAME = "CodeRelation";
13
+ export declare const REL_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "HAS_PROPERTY", "ACCESSES", "METHOD_OVERRIDES", "OVERRIDES", "METHOD_IMPLEMENTS", "MEMBER_OF", "STEP_IN_PROCESS", "HANDLES_ROUTE", "FETCHES", "HANDLES_TOOL", "ENTRY_POINT_OF", "WRAPS", "QUERIES"];
14
+ export type RelType = (typeof REL_TYPES)[number];
15
+ export declare const EMBEDDING_TABLE_NAME = "CodeEmbedding";
16
+ //# sourceMappingURL=schema-constants.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/cgdb/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,0VAgCd,CAAC;AAEX,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,cAAc,iBAAiB,CAAC;AAE7C,eAAO,MAAM,SAAS,iSAqBZ,CAAC;AAEX,MAAM,MAAM,OAAO,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;AAEjD,eAAO,MAAM,oBAAoB,kBAAkB,CAAC"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * LadybugDB schema constants — single source of truth.
3
+ *
4
+ * NODE_TABLES and REL_TYPES define what the knowledge graph can contain.
5
+ * Both CLI and web must agree on these for data compatibility.
6
+ *
7
+ * Full DDL schemas remain in each package's own schema.ts because
8
+ * the CLI uses native LadybugDB and the web uses WASM.
9
+ */
10
+ export const NODE_TABLES = [
11
+ 'File',
12
+ 'Folder',
13
+ 'Function',
14
+ 'Class',
15
+ 'Interface',
16
+ 'Method',
17
+ 'CodeElement',
18
+ 'Community',
19
+ 'Process',
20
+ 'Section',
21
+ 'Struct',
22
+ 'Enum',
23
+ 'Macro',
24
+ 'Typedef',
25
+ 'Union',
26
+ 'Namespace',
27
+ 'Trait',
28
+ 'Impl',
29
+ 'TypeAlias',
30
+ 'Const',
31
+ 'Static',
32
+ 'Variable',
33
+ 'Property',
34
+ 'Record',
35
+ 'Delegate',
36
+ 'Annotation',
37
+ 'Constructor',
38
+ 'Template',
39
+ 'Module',
40
+ 'Route',
41
+ 'Tool',
42
+ ];
43
+ export const REL_TABLE_NAME = 'CodeRelation';
44
+ export const REL_TYPES = [
45
+ 'CONTAINS',
46
+ 'DEFINES',
47
+ 'IMPORTS',
48
+ 'CALLS',
49
+ 'EXTENDS',
50
+ 'IMPLEMENTS',
51
+ 'HAS_METHOD',
52
+ 'HAS_PROPERTY',
53
+ 'ACCESSES',
54
+ 'METHOD_OVERRIDES',
55
+ 'OVERRIDES', // Legacy compat alias — kept until all stored indexes are migrated
56
+ 'METHOD_IMPLEMENTS',
57
+ 'MEMBER_OF',
58
+ 'STEP_IN_PROCESS',
59
+ 'HANDLES_ROUTE',
60
+ 'FETCHES',
61
+ 'HANDLES_TOOL',
62
+ 'ENTRY_POINT_OF',
63
+ 'WRAPS',
64
+ 'QUERIES',
65
+ ];
66
+ export const EMBEDDING_TABLE_NAME = 'CodeEmbedding';
67
+ //# sourceMappingURL=schema-constants.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/cgdb/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
@@ -1,6 +1,6 @@
1
1
  export type { NodeLabel, NodeProperties, RelationshipType, GraphNode, GraphRelationship, } from './graph/types.js';
2
- export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './lbug/schema-constants.js';
3
- export type { NodeTableName, RelType } from './lbug/schema-constants.js';
2
+ export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './cgdb/schema-constants.js';
3
+ export type { NodeTableName, RelType } from './cgdb/schema-constants.js';
4
4
  export { SupportedLanguages } from './languages.js';
5
5
  export { getLanguageFromFilename, getSyntaxLanguageFromFilename } from './language-detection.js';
6
6
  export type { MroStrategy } from './mro-strategy.js';
@@ -1,5 +1,5 @@
1
1
  // Schema constants
2
- export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './lbug/schema-constants.js';
2
+ export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './cgdb/schema-constants.js';
3
3
  // Language support
4
4
  export { SupportedLanguages } from './languages.js';
5
5
  export { getLanguageFromFilename, getSyntaxLanguageFromFilename } from './language-detection.js';
@@ -39,5 +39,27 @@ export interface AnalyzeOptions {
39
39
  * `CODRAGRAPH_MAX_FILE_SIZE` for the rest of the pipeline.
40
40
  */
41
41
  maxFileSize?: string;
42
+ /**
43
+ * First-run auto-setup gate. Default `true` (commander injects this from the
44
+ * `--no-setup` flag — see CLI registration). When `true`, `analyze` detects a
45
+ * missing `~/.codragraph/registry.json` and runs editor setup before indexing,
46
+ * making `npx @codragraph/cli analyze` a true zero-install entry. Pass
47
+ * `--no-setup` to opt out (CI, headless servers, automated pipelines).
48
+ */
49
+ setup?: boolean;
50
+ /**
51
+ * Comma-separated list of editor targets for `--skills` output. Valid values
52
+ * are `claude`, `cursor`, `opencode`, `codex`. Default: `claude` (matches
53
+ * pre-flag behavior). Unknown values are reported and ignored.
54
+ */
55
+ skillTargets?: string;
56
+ /**
57
+ * RFC 0001 Phase 2 — opt-in per-row content compression. Accepts
58
+ * `'none'` (default), `'brotli'` (Node ≥ 18), or `'zstd'` (Node ≥
59
+ * 22.15). Compressed indexes are still queryable via the standard
60
+ * read path; decode happens at every external-consumer boundary
61
+ * (MCP, HTTP API, embeddings, CLI tools).
62
+ */
63
+ compress?: 'none' | 'brotli' | 'zstd';
42
64
  }
43
65
  export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;
@@ -11,7 +11,8 @@ import path from 'path';
11
11
  import { execFileSync } from 'child_process';
12
12
  import v8 from 'v8';
13
13
  import cliProgress from 'cli-progress';
14
- import { closeLbug } from '../core/lbug/lbug-adapter.js';
14
+ import * as fsSync from 'node:fs';
15
+ import { closeCgdb } from '../core/cgdb/cgdb-adapter.js';
15
16
  import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
16
17
  import { getGitRoot, hasGitDir } from '../storage/git.js';
17
18
  import { runFullAnalysis } from '../core/run-analyze.js';
@@ -52,9 +53,77 @@ export const analyzeCommand = async (inputPath, options) => {
52
53
  if (options?.verbose) {
53
54
  process.env.CODRAGRAPH_VERBOSE = '1';
54
55
  }
56
+ // RFC 0001 Phase 2 — validate --compress before doing any work. Catching
57
+ // a typo or an unsupported encoding here is much friendlier than failing
58
+ // mid-analyze with an opaque CSV-write error. Node-version gating for
59
+ // zstd lives in @codragraph/graphstore via isEncodingSupported, but we
60
+ // import the check here so the CLI can offer the brotli fallback hint.
61
+ if (options?.compress && options.compress !== 'none') {
62
+ if (options.compress !== 'brotli' && options.compress !== 'zstd') {
63
+ console.error(` --compress must be one of: none, brotli, zstd (got: ${options.compress})`);
64
+ process.exitCode = 2;
65
+ return;
66
+ }
67
+ if (options.compress === 'zstd') {
68
+ const { isEncodingSupported } = await import('@codragraph/graphstore');
69
+ if (!isEncodingSupported('zstd')) {
70
+ console.error(' --compress zstd requires Node ≥ 22.15.0 (native node:zlib zstd).\n' +
71
+ ` Detected Node ${process.version}. Use --compress brotli instead, or upgrade Node.`);
72
+ process.exitCode = 2;
73
+ return;
74
+ }
75
+ }
76
+ // RFC 0001 Phase 2.5 — BM25 / FTS now drops `content` from its
77
+ // property list when meta.compress is non-'none' (see
78
+ // `core/search/bm25-index.ts`), so search inside compressed bodies
79
+ // gracefully falls back to name-only matches instead of tokenising
80
+ // base64 garbage. Surface the trade-off so users know what they're
81
+ // opting into.
82
+ console.warn(` Note: --compress ${options.compress} reduces .codragraph/cgdb size.\n` +
83
+ ` BM25 search will index symbol names only (function bodies are not tokenised\n` +
84
+ ` when compressed); embeddings, graph queries, and \`context\` / \`impact\` are\n` +
85
+ ` unaffected. Run with --compress none if you rely on full-text search inside\n` +
86
+ ` source bodies.`);
87
+ }
55
88
  if (options?.maxFileSize) {
56
89
  process.env.CODRAGRAPH_MAX_FILE_SIZE = options.maxFileSize;
57
90
  }
91
+ // ── Auto-reindex coalesce-file cleanup ─────────────────────────────
92
+ // When the Claude Code PostToolUse hook spawns us in background mode, it
93
+ // passes the coalesce file path through this env var. We delete it on every
94
+ // exit path so the next commit immediately triggers a new reindex (rather
95
+ // than being blocked by a 10-min mtime TTL). The hook's TTL is just a
96
+ // crash safety net — this is the happy path.
97
+ const reindexLockPath = process.env.CODRAGRAPH_REINDEX_LOCK_PATH || '';
98
+ if (reindexLockPath) {
99
+ process.on('exit', () => {
100
+ try {
101
+ fsSync.unlinkSync(reindexLockPath);
102
+ }
103
+ catch {
104
+ /* already gone or unreadable — fine */
105
+ }
106
+ });
107
+ }
108
+ // ── First-run auto-setup ───────────────────────────────────────────
109
+ // Makes `npx @codragraph/cli analyze` a true one-command entry. We detect
110
+ // first-run by the absence of the global registry — analyze writes to it on
111
+ // every successful index, so it's a reliable "this user has never run us
112
+ // before" signal. Opt out with `--no-setup` for CI / headless contexts;
113
+ // commander maps `--no-setup` to `options.setup === false`.
114
+ if (options?.setup !== false) {
115
+ let registryExists = true;
116
+ try {
117
+ await fs.access(getGlobalRegistryPath());
118
+ }
119
+ catch {
120
+ registryExists = false;
121
+ }
122
+ if (!registryExists) {
123
+ const { runSetup } = await import('./setup.js');
124
+ await runSetup({ skipNextSteps: true, compactHeader: true });
125
+ }
126
+ }
58
127
  console.log('\n CodraGraph Analyzer\n');
59
128
  let repoPath;
60
129
  if (inputPath) {
@@ -113,7 +182,7 @@ export const analyzeCommand = async (inputPath, options) => {
113
182
  aborted = true;
114
183
  bar.stop();
115
184
  console.log('\n Interrupted — cleaning up...');
116
- closeLbug()
185
+ closeCgdb()
117
186
  .catch(() => { })
118
187
  .finally(() => process.exit(130));
119
188
  };
@@ -168,6 +237,9 @@ export const analyzeCommand = async (inputPath, options) => {
168
237
  // be able to accept the duplicate name without also paying the
169
238
  // cost of a full pipeline re-index. See #829 review round 2.
170
239
  allowDuplicateName: options?.allowDuplicateName,
240
+ // RFC 0001 Phase 2 — pass through the per-row encoding choice.
241
+ // Default 'none' / undefined keeps the pre-Phase-2 wire layout.
242
+ compress: options?.compress,
171
243
  }, {
172
244
  onProgress: (_phase, percent, message) => {
173
245
  updateBar(percent, message);
@@ -190,9 +262,23 @@ export const analyzeCommand = async (inputPath, options) => {
190
262
  if (options?.skills && result.pipelineResult) {
191
263
  updateBar(99, 'Generating skill files...');
192
264
  try {
193
- const { generateSkillFiles } = await import('./skill-gen.js');
265
+ const { generateSkillFiles, SKILL_TARGETS } = await import('./skill-gen.js');
194
266
  const { generateAIContextFiles } = await import('./ai-context.js');
195
- const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult);
267
+ // Parse --skill-targets CSV; default to ['claude'] when omitted.
268
+ // Unknown tokens are reported once and dropped — we don't fail the
269
+ // whole analyze for a typo here, but we do want the user to see it.
270
+ const requestedTargets = (options?.skillTargets || 'claude')
271
+ .split(',')
272
+ .map((s) => s.trim().toLowerCase())
273
+ .filter(Boolean);
274
+ const validTargets = requestedTargets.filter((t) => SKILL_TARGETS.includes(t));
275
+ const invalidTargets = requestedTargets.filter((t) => !SKILL_TARGETS.includes(t));
276
+ if (invalidTargets.length > 0) {
277
+ barLog(` Skills: unknown target(s) ignored: ${invalidTargets.join(', ')} ` +
278
+ `(valid: ${SKILL_TARGETS.join(', ')})`);
279
+ }
280
+ const targetsToUse = validTargets.length > 0 ? validTargets : ['claude'];
281
+ const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult, targetsToUse);
196
282
  if (skillResult.skills.length > 0) {
197
283
  barLog(` Generated ${skillResult.skills.length} skill files`);
198
284
  // Re-generate AI context files now that we have skill info
@@ -235,11 +321,28 @@ export const analyzeCommand = async (inputPath, options) => {
235
321
  console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
236
322
  console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
237
323
  console.log(` ${repoPath}`);
324
+ // Surface @codragraph/compress's value prop with concrete numbers: how
325
+ // many tokens of distilled context did we generate. Best-effort — never
326
+ // fail the analyze for a stat read.
238
327
  try {
239
- await fs.access(getGlobalRegistryPath());
328
+ const { estimateTokens } = await import('./compress-stats.js');
329
+ const candidates = ['AGENTS.md', 'CLAUDE.md'];
330
+ const sizes = [];
331
+ for (const file of candidates) {
332
+ try {
333
+ const content = await fs.readFile(path.join(repoPath, file), 'utf-8');
334
+ sizes.push(`${file} ~${estimateTokens(content).toLocaleString()} tokens`);
335
+ }
336
+ catch {
337
+ /* file not generated for this run — skip */
338
+ }
339
+ }
340
+ if (sizes.length > 0) {
341
+ console.log(` @codragraph/compress: ${sizes.join(' | ')}`);
342
+ }
240
343
  }
241
344
  catch {
242
- console.log('\n Tip: Run `codragraph setup` to configure MCP for your editor.');
345
+ /* compress-stats import failed non-fatal */
243
346
  }
244
347
  console.log('');
245
348
  }
@@ -0,0 +1,29 @@
1
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
2
+ export declare function estimateTokens(text: string): number;
3
+ /**
4
+ * Walk a result object and collect every file path we can find. Looks for
5
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
6
+ * the raw-grep baseline (sum of source bytes the agent would have read
7
+ * without CodraGraph).
8
+ */
9
+ export declare function collectFilePaths(obj: unknown, paths?: Set<string>): Set<string>;
10
+ /**
11
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
12
+ * the referenced files. Returns null if any file is missing or unreadable —
13
+ * in that case we silently skip the comparison rather than show a misleading
14
+ * number.
15
+ */
16
+ export declare function estimateRawGrepTokens(filePaths: Iterable<string>): number | null;
17
+ /**
18
+ * Format a one-line token-savings summary suitable for stderr display.
19
+ * If a raw baseline is provided AND it's larger than the structured response,
20
+ * the line includes the savings percentage. Otherwise it only reports
21
+ * the structured token count.
22
+ */
23
+ export declare function formatTokenLine(structuredTokens: number, rawTokens?: number | null): string;
24
+ /**
25
+ * Compute and print the token-savings line for a tool result. Best-effort:
26
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
27
+ * stdout to jq stay clean.
28
+ */
29
+ export declare function emitTokenStats(result: unknown): void;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Token-savings reporter for CLI output.
3
+ *
4
+ * Surfaces the @codragraph/compress value proposition on every `query`,
5
+ * `context`, `impact`, and `analyze` invocation: how many tokens of
6
+ * structured context did we return vs the equivalent raw-grep response.
7
+ *
8
+ * Uses the same chars/4 heuristic as @codragraph/compress's `estimateTokens`
9
+ * for cross-package consistency. Inlined rather than imported because pulling
10
+ * in @codragraph/compress as a runtime dep also pulls @codragraph/harness as a
11
+ * transitive — too heavy for what is logically a one-line approximation. When
12
+ * we add real LLM compression (`--compress` opt-in), the package import will
13
+ * follow.
14
+ */
15
+ import * as fsSync from 'node:fs';
16
+ /** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
17
+ export function estimateTokens(text) {
18
+ return Math.max(0, Math.floor(text.trim().length / 4));
19
+ }
20
+ /**
21
+ * Walk a result object and collect every file path we can find. Looks for
22
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
23
+ * the raw-grep baseline (sum of source bytes the agent would have read
24
+ * without CodraGraph).
25
+ */
26
+ export function collectFilePaths(obj, paths = new Set()) {
27
+ if (!obj || typeof obj !== 'object')
28
+ return paths;
29
+ if (Array.isArray(obj)) {
30
+ for (const item of obj)
31
+ collectFilePaths(item, paths);
32
+ return paths;
33
+ }
34
+ for (const [key, value] of Object.entries(obj)) {
35
+ if ((key === 'filePath' || key === 'file_path' || key === 'file') &&
36
+ typeof value === 'string' &&
37
+ value.length > 0) {
38
+ paths.add(value);
39
+ }
40
+ else if (typeof value === 'object') {
41
+ collectFilePaths(value, paths);
42
+ }
43
+ }
44
+ return paths;
45
+ }
46
+ /**
47
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
48
+ * the referenced files. Returns null if any file is missing or unreadable —
49
+ * in that case we silently skip the comparison rather than show a misleading
50
+ * number.
51
+ */
52
+ export function estimateRawGrepTokens(filePaths) {
53
+ let totalChars = 0;
54
+ for (const fp of filePaths) {
55
+ try {
56
+ const stat = fsSync.statSync(fp);
57
+ if (!stat.isFile())
58
+ return null;
59
+ totalChars += stat.size;
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ return Math.floor(totalChars / 4);
66
+ }
67
+ /**
68
+ * Format a one-line token-savings summary suitable for stderr display.
69
+ * If a raw baseline is provided AND it's larger than the structured response,
70
+ * the line includes the savings percentage. Otherwise it only reports
71
+ * the structured token count.
72
+ */
73
+ export function formatTokenLine(structuredTokens, rawTokens) {
74
+ if (rawTokens && rawTokens > structuredTokens) {
75
+ const savings = Math.round((1 - structuredTokens / rawTokens) * 100);
76
+ return (` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context ` +
77
+ `(vs ~${rawTokens.toLocaleString()} tokens of raw source — ${savings}% smaller).`);
78
+ }
79
+ return ` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context.`;
80
+ }
81
+ /**
82
+ * Compute and print the token-savings line for a tool result. Best-effort:
83
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
84
+ * stdout to jq stay clean.
85
+ */
86
+ export function emitTokenStats(result) {
87
+ try {
88
+ const structured = typeof result === 'string' ? result : JSON.stringify(result);
89
+ const sTokens = estimateTokens(structured);
90
+ const files = collectFilePaths(result);
91
+ const rawTokens = files.size > 0 ? estimateRawGrepTokens(files) : null;
92
+ process.stderr.write('\n' + formatTokenLine(sTokens, rawTokens) + '\n');
93
+ }
94
+ catch {
95
+ /* never let stats break the actual output */
96
+ }
97
+ }
@@ -12,7 +12,9 @@ export declare const logCommand: (opts?: {
12
12
  limit?: string;
13
13
  }) => Promise<void>;
14
14
  export declare const branchListCommand: () => Promise<void>;
15
- export declare const diffCommand: (from: string, to: string) => Promise<void>;
15
+ export declare const diffCommand: (from: string, to: string, opts?: {
16
+ json?: boolean;
17
+ }) => Promise<void>;
16
18
  export declare const commitCommand: (opts?: {
17
19
  message?: string;
18
20
  }) => Promise<void>;
@@ -36,5 +38,7 @@ export declare const mergeCommand: (target: string, opts?: {
36
38
  export declare const gcCommand: (opts?: {
37
39
  dryRun?: boolean;
38
40
  }) => Promise<void>;
39
- export declare const diffSemanticCommand: (from: string, to: string) => Promise<void>;
41
+ export declare const diffSemanticCommand: (from: string, to: string, opts?: {
42
+ json?: boolean;
43
+ }) => Promise<void>;
40
44
  export { DEFAULT_BRANCH };