@codragraph/cli 1.6.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +67 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/index.d.ts +2 -2
- package/dist/_shared/index.js +1 -1
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +109 -6
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +45 -23
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/cli/wiki.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1320 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +492 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +62 -0
- package/dist/core/cgdb/schema.js +502 -0
- package/dist/core/embeddings/embedding-pipeline.js +27 -10
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +1 -1
- package/dist/core/graphstore/index.js +3 -3
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +123 -36
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/sync.js +4 -4
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +114 -27
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +75 -23
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +2 -2
- package/dist/mcp/local/local-backend.js +36 -19
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +1 -1
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +34 -33
- package/dist/storage/repo-manager.d.ts +42 -3
- package/dist/storage/repo-manager.js +23 -4
- package/hooks/claude/codragraph-hook.cjs +98 -5
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/build.js +8 -9
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
package/README.md
CHANGED
|
@@ -155,6 +155,9 @@ codragraph analyze --embeddings # Enable embedding generation (slower, better
|
|
|
155
155
|
codragraph analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md codragraph section edits
|
|
156
156
|
codragraph analyze --verbose # Log skipped files when parsers are unavailable
|
|
157
157
|
codragraph analyze --max-file-size 1024 # Skip files larger than N KB (default: 512, cap: 32768)
|
|
158
|
+
codragraph analyze --compress brotli # Per-row body compression. Also: zstd, none.
|
|
159
|
+
codragraph profile-heap [path] # Run analyze with v8 heap-snapshot instrumentation
|
|
160
|
+
codragraph profile-heap --no-summary # Same, but skip the post-run RSS / heapUsed table
|
|
158
161
|
codragraph mcp # Start MCP server (stdio) — serves all indexed repos
|
|
159
162
|
codragraph serve # Start local HTTP server (multi-repo) for web UI
|
|
160
163
|
codragraph index # Register an existing .codragraph/ folder into the global registry
|
|
@@ -306,6 +309,37 @@ echo "vendor/" >> .codragraphignore
|
|
|
306
309
|
echo "dist/" >> .codragraphignore
|
|
307
310
|
```
|
|
308
311
|
|
|
312
|
+
If you want to know **which phase** is dragging the heap up before
|
|
313
|
+
deciding what to mitigate, run `codragraph profile-heap`. It writes a
|
|
314
|
+
v8 heap snapshot at every phase boundary plus a JSONL timeline of
|
|
315
|
+
`process.memoryUsage()` and prints a per-phase RSS / `heapUsed` table:
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
codragraph profile-heap # writes .codragraph/heap-profiles/
|
|
319
|
+
# → load any .heapsnapshot in Chrome DevTools → Memory → Load
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
Each snapshot is 100–500 MB, so the command is opt-in only. The JSONL
|
|
323
|
+
timeline is small enough to share for triage even when the snapshots
|
|
324
|
+
are too big.
|
|
325
|
+
|
|
326
|
+
### Index size — opt-in per-row compression
|
|
327
|
+
|
|
328
|
+
For repos where `.codragraph/cgdb` itself has grown large:
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
codragraph analyze --compress brotli # Node ≥ 18, brotli quality 6
|
|
332
|
+
codragraph analyze --compress zstd # Node ≥ 22.15, zstd level 3
|
|
333
|
+
codragraph analyze --compress none # explicit default
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
`--compress` routes every node-row content field through the matching
|
|
337
|
+
encoder before it's written to the CSV / cgdb; readers decode
|
|
338
|
+
transparently via the per-row `contentEncoding` tag. With the flag
|
|
339
|
+
unset, the on-disk layout is byte-identical to pre-1.8 indexes. Pre-1.8
|
|
340
|
+
indexes auto-trigger a full re-analyze the first time a 1.8+ CLI runs
|
|
341
|
+
against them (one-time cost, surfaced in the analyze log).
|
|
342
|
+
|
|
309
343
|
### Large files are being skipped
|
|
310
344
|
|
|
311
345
|
By default the walker skips files larger than **512 KB** (see log line `Skipped N large files (>512KB)`). Raise the threshold via either the CLI flag or the environment variable — both accept a value in **KB**:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LadybugDB schema constants — single source of truth.
|
|
3
|
+
*
|
|
4
|
+
* NODE_TABLES and REL_TYPES define what the knowledge graph can contain.
|
|
5
|
+
* Both CLI and web must agree on these for data compatibility.
|
|
6
|
+
*
|
|
7
|
+
* Full DDL schemas remain in each package's own schema.ts because
|
|
8
|
+
* the CLI uses native LadybugDB and the web uses WASM.
|
|
9
|
+
*/
|
|
10
|
+
export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Variable", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
|
|
11
|
+
export type NodeTableName = (typeof NODE_TABLES)[number];
|
|
12
|
+
export declare const REL_TABLE_NAME = "CodeRelation";
|
|
13
|
+
export declare const REL_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "HAS_PROPERTY", "ACCESSES", "METHOD_OVERRIDES", "OVERRIDES", "METHOD_IMPLEMENTS", "MEMBER_OF", "STEP_IN_PROCESS", "HANDLES_ROUTE", "FETCHES", "HANDLES_TOOL", "ENTRY_POINT_OF", "WRAPS", "QUERIES"];
|
|
14
|
+
export type RelType = (typeof REL_TYPES)[number];
|
|
15
|
+
export declare const EMBEDDING_TABLE_NAME = "CodeEmbedding";
|
|
16
|
+
//# sourceMappingURL=schema-constants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/cgdb/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,0VAgCd,CAAC;AAEX,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,cAAc,iBAAiB,CAAC;AAE7C,eAAO,MAAM,SAAS,iSAqBZ,CAAC;AAEX,MAAM,MAAM,OAAO,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;AAEjD,eAAO,MAAM,oBAAoB,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LadybugDB schema constants — single source of truth.
|
|
3
|
+
*
|
|
4
|
+
* NODE_TABLES and REL_TYPES define what the knowledge graph can contain.
|
|
5
|
+
* Both CLI and web must agree on these for data compatibility.
|
|
6
|
+
*
|
|
7
|
+
* Full DDL schemas remain in each package's own schema.ts because
|
|
8
|
+
* the CLI uses native LadybugDB and the web uses WASM.
|
|
9
|
+
*/
|
|
10
|
+
export const NODE_TABLES = [
|
|
11
|
+
'File',
|
|
12
|
+
'Folder',
|
|
13
|
+
'Function',
|
|
14
|
+
'Class',
|
|
15
|
+
'Interface',
|
|
16
|
+
'Method',
|
|
17
|
+
'CodeElement',
|
|
18
|
+
'Community',
|
|
19
|
+
'Process',
|
|
20
|
+
'Section',
|
|
21
|
+
'Struct',
|
|
22
|
+
'Enum',
|
|
23
|
+
'Macro',
|
|
24
|
+
'Typedef',
|
|
25
|
+
'Union',
|
|
26
|
+
'Namespace',
|
|
27
|
+
'Trait',
|
|
28
|
+
'Impl',
|
|
29
|
+
'TypeAlias',
|
|
30
|
+
'Const',
|
|
31
|
+
'Static',
|
|
32
|
+
'Variable',
|
|
33
|
+
'Property',
|
|
34
|
+
'Record',
|
|
35
|
+
'Delegate',
|
|
36
|
+
'Annotation',
|
|
37
|
+
'Constructor',
|
|
38
|
+
'Template',
|
|
39
|
+
'Module',
|
|
40
|
+
'Route',
|
|
41
|
+
'Tool',
|
|
42
|
+
];
|
|
43
|
+
export const REL_TABLE_NAME = 'CodeRelation';
|
|
44
|
+
export const REL_TYPES = [
|
|
45
|
+
'CONTAINS',
|
|
46
|
+
'DEFINES',
|
|
47
|
+
'IMPORTS',
|
|
48
|
+
'CALLS',
|
|
49
|
+
'EXTENDS',
|
|
50
|
+
'IMPLEMENTS',
|
|
51
|
+
'HAS_METHOD',
|
|
52
|
+
'HAS_PROPERTY',
|
|
53
|
+
'ACCESSES',
|
|
54
|
+
'METHOD_OVERRIDES',
|
|
55
|
+
'OVERRIDES', // Legacy compat alias — kept until all stored indexes are migrated
|
|
56
|
+
'METHOD_IMPLEMENTS',
|
|
57
|
+
'MEMBER_OF',
|
|
58
|
+
'STEP_IN_PROCESS',
|
|
59
|
+
'HANDLES_ROUTE',
|
|
60
|
+
'FETCHES',
|
|
61
|
+
'HANDLES_TOOL',
|
|
62
|
+
'ENTRY_POINT_OF',
|
|
63
|
+
'WRAPS',
|
|
64
|
+
'QUERIES',
|
|
65
|
+
];
|
|
66
|
+
export const EMBEDDING_TABLE_NAME = 'CodeEmbedding';
|
|
67
|
+
//# sourceMappingURL=schema-constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/cgdb/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
|
package/dist/_shared/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type { NodeLabel, NodeProperties, RelationshipType, GraphNode, GraphRelationship, } from './graph/types.js';
|
|
2
|
-
export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './
|
|
3
|
-
export type { NodeTableName, RelType } from './
|
|
2
|
+
export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './cgdb/schema-constants.js';
|
|
3
|
+
export type { NodeTableName, RelType } from './cgdb/schema-constants.js';
|
|
4
4
|
export { SupportedLanguages } from './languages.js';
|
|
5
5
|
export { getLanguageFromFilename, getSyntaxLanguageFromFilename } from './language-detection.js';
|
|
6
6
|
export type { MroStrategy } from './mro-strategy.js';
|
package/dist/_shared/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// Schema constants
|
|
2
|
-
export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './
|
|
2
|
+
export { NODE_TABLES, REL_TABLE_NAME, REL_TYPES, EMBEDDING_TABLE_NAME, } from './cgdb/schema-constants.js';
|
|
3
3
|
// Language support
|
|
4
4
|
export { SupportedLanguages } from './languages.js';
|
|
5
5
|
export { getLanguageFromFilename, getSyntaxLanguageFromFilename } from './language-detection.js';
|
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -39,5 +39,27 @@ export interface AnalyzeOptions {
|
|
|
39
39
|
* `CODRAGRAPH_MAX_FILE_SIZE` for the rest of the pipeline.
|
|
40
40
|
*/
|
|
41
41
|
maxFileSize?: string;
|
|
42
|
+
/**
|
|
43
|
+
* First-run auto-setup gate. Default `true` (commander injects this from the
|
|
44
|
+
* `--no-setup` flag — see CLI registration). When `true`, `analyze` detects a
|
|
45
|
+
* missing `~/.codragraph/registry.json` and runs editor setup before indexing,
|
|
46
|
+
* making `npx @codragraph/cli analyze` a true zero-install entry. Pass
|
|
47
|
+
* `--no-setup` to opt out (CI, headless servers, automated pipelines).
|
|
48
|
+
*/
|
|
49
|
+
setup?: boolean;
|
|
50
|
+
/**
|
|
51
|
+
* Comma-separated list of editor targets for `--skills` output. Valid values
|
|
52
|
+
* are `claude`, `cursor`, `opencode`, `codex`. Default: `claude` (matches
|
|
53
|
+
* pre-flag behavior). Unknown values are reported and ignored.
|
|
54
|
+
*/
|
|
55
|
+
skillTargets?: string;
|
|
56
|
+
/**
|
|
57
|
+
* RFC 0001 Phase 2 — opt-in per-row content compression. Accepts
|
|
58
|
+
* `'none'` (default), `'brotli'` (Node ≥ 18), or `'zstd'` (Node ≥
|
|
59
|
+
* 22.15). Compressed indexes are still queryable via the standard
|
|
60
|
+
* read path; decode happens at every external-consumer boundary
|
|
61
|
+
* (MCP, HTTP API, embeddings, CLI tools).
|
|
62
|
+
*/
|
|
63
|
+
compress?: 'none' | 'brotli' | 'zstd';
|
|
42
64
|
}
|
|
43
65
|
export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;
|
package/dist/cli/analyze.js
CHANGED
|
@@ -11,7 +11,8 @@ import path from 'path';
|
|
|
11
11
|
import { execFileSync } from 'child_process';
|
|
12
12
|
import v8 from 'v8';
|
|
13
13
|
import cliProgress from 'cli-progress';
|
|
14
|
-
import
|
|
14
|
+
import * as fsSync from 'node:fs';
|
|
15
|
+
import { closeCgdb } from '../core/cgdb/cgdb-adapter.js';
|
|
15
16
|
import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
|
|
16
17
|
import { getGitRoot, hasGitDir } from '../storage/git.js';
|
|
17
18
|
import { runFullAnalysis } from '../core/run-analyze.js';
|
|
@@ -52,9 +53,77 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
52
53
|
if (options?.verbose) {
|
|
53
54
|
process.env.CODRAGRAPH_VERBOSE = '1';
|
|
54
55
|
}
|
|
56
|
+
// RFC 0001 Phase 2 — validate --compress before doing any work. Catching
|
|
57
|
+
// a typo or an unsupported encoding here is much friendlier than failing
|
|
58
|
+
// mid-analyze with an opaque CSV-write error. Node-version gating for
|
|
59
|
+
// zstd lives in @codragraph/graphstore via isEncodingSupported, but we
|
|
60
|
+
// import the check here so the CLI can offer the brotli fallback hint.
|
|
61
|
+
if (options?.compress && options.compress !== 'none') {
|
|
62
|
+
if (options.compress !== 'brotli' && options.compress !== 'zstd') {
|
|
63
|
+
console.error(` --compress must be one of: none, brotli, zstd (got: ${options.compress})`);
|
|
64
|
+
process.exitCode = 2;
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
if (options.compress === 'zstd') {
|
|
68
|
+
const { isEncodingSupported } = await import('@codragraph/graphstore');
|
|
69
|
+
if (!isEncodingSupported('zstd')) {
|
|
70
|
+
console.error(' --compress zstd requires Node ≥ 22.15.0 (native node:zlib zstd).\n' +
|
|
71
|
+
` Detected Node ${process.version}. Use --compress brotli instead, or upgrade Node.`);
|
|
72
|
+
process.exitCode = 2;
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// RFC 0001 Phase 2.5 — BM25 / FTS now drops `content` from its
|
|
77
|
+
// property list when meta.compress is non-'none' (see
|
|
78
|
+
// `core/search/bm25-index.ts`), so search inside compressed bodies
|
|
79
|
+
// gracefully falls back to name-only matches instead of tokenising
|
|
80
|
+
// base64 garbage. Surface the trade-off so users know what they're
|
|
81
|
+
// opting into.
|
|
82
|
+
console.warn(` Note: --compress ${options.compress} reduces .codragraph/cgdb size.\n` +
|
|
83
|
+
` BM25 search will index symbol names only (function bodies are not tokenised\n` +
|
|
84
|
+
` when compressed); embeddings, graph queries, and \`context\` / \`impact\` are\n` +
|
|
85
|
+
` unaffected. Run with --compress none if you rely on full-text search inside\n` +
|
|
86
|
+
` source bodies.`);
|
|
87
|
+
}
|
|
55
88
|
if (options?.maxFileSize) {
|
|
56
89
|
process.env.CODRAGRAPH_MAX_FILE_SIZE = options.maxFileSize;
|
|
57
90
|
}
|
|
91
|
+
// ── Auto-reindex coalesce-file cleanup ─────────────────────────────
|
|
92
|
+
// When the Claude Code PostToolUse hook spawns us in background mode, it
|
|
93
|
+
// passes the coalesce file path through this env var. We delete it on every
|
|
94
|
+
// exit path so the next commit immediately triggers a new reindex (rather
|
|
95
|
+
// than being blocked by a 10-min mtime TTL). The hook's TTL is just a
|
|
96
|
+
// crash safety net — this is the happy path.
|
|
97
|
+
const reindexLockPath = process.env.CODRAGRAPH_REINDEX_LOCK_PATH || '';
|
|
98
|
+
if (reindexLockPath) {
|
|
99
|
+
process.on('exit', () => {
|
|
100
|
+
try {
|
|
101
|
+
fsSync.unlinkSync(reindexLockPath);
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
/* already gone or unreadable — fine */
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
// ── First-run auto-setup ───────────────────────────────────────────
|
|
109
|
+
// Makes `npx @codragraph/cli analyze` a true one-command entry. We detect
|
|
110
|
+
// first-run by the absence of the global registry — analyze writes to it on
|
|
111
|
+
// every successful index, so it's a reliable "this user has never run us
|
|
112
|
+
// before" signal. Opt out with `--no-setup` for CI / headless contexts;
|
|
113
|
+
// commander maps `--no-setup` to `options.setup === false`.
|
|
114
|
+
if (options?.setup !== false) {
|
|
115
|
+
let registryExists = true;
|
|
116
|
+
try {
|
|
117
|
+
await fs.access(getGlobalRegistryPath());
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
registryExists = false;
|
|
121
|
+
}
|
|
122
|
+
if (!registryExists) {
|
|
123
|
+
const { runSetup } = await import('./setup.js');
|
|
124
|
+
await runSetup({ skipNextSteps: true, compactHeader: true });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
58
127
|
console.log('\n CodraGraph Analyzer\n');
|
|
59
128
|
let repoPath;
|
|
60
129
|
if (inputPath) {
|
|
@@ -113,7 +182,7 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
113
182
|
aborted = true;
|
|
114
183
|
bar.stop();
|
|
115
184
|
console.log('\n Interrupted — cleaning up...');
|
|
116
|
-
|
|
185
|
+
closeCgdb()
|
|
117
186
|
.catch(() => { })
|
|
118
187
|
.finally(() => process.exit(130));
|
|
119
188
|
};
|
|
@@ -168,6 +237,9 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
168
237
|
// be able to accept the duplicate name without also paying the
|
|
169
238
|
// cost of a full pipeline re-index. See #829 review round 2.
|
|
170
239
|
allowDuplicateName: options?.allowDuplicateName,
|
|
240
|
+
// RFC 0001 Phase 2 — pass through the per-row encoding choice.
|
|
241
|
+
// Default 'none' / undefined keeps the pre-Phase-2 wire layout.
|
|
242
|
+
compress: options?.compress,
|
|
171
243
|
}, {
|
|
172
244
|
onProgress: (_phase, percent, message) => {
|
|
173
245
|
updateBar(percent, message);
|
|
@@ -190,9 +262,23 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
190
262
|
if (options?.skills && result.pipelineResult) {
|
|
191
263
|
updateBar(99, 'Generating skill files...');
|
|
192
264
|
try {
|
|
193
|
-
const { generateSkillFiles } = await import('./skill-gen.js');
|
|
265
|
+
const { generateSkillFiles, SKILL_TARGETS } = await import('./skill-gen.js');
|
|
194
266
|
const { generateAIContextFiles } = await import('./ai-context.js');
|
|
195
|
-
|
|
267
|
+
// Parse --skill-targets CSV; default to ['claude'] when omitted.
|
|
268
|
+
// Unknown tokens are reported once and dropped — we don't fail the
|
|
269
|
+
// whole analyze for a typo here, but we do want the user to see it.
|
|
270
|
+
const requestedTargets = (options?.skillTargets || 'claude')
|
|
271
|
+
.split(',')
|
|
272
|
+
.map((s) => s.trim().toLowerCase())
|
|
273
|
+
.filter(Boolean);
|
|
274
|
+
const validTargets = requestedTargets.filter((t) => SKILL_TARGETS.includes(t));
|
|
275
|
+
const invalidTargets = requestedTargets.filter((t) => !SKILL_TARGETS.includes(t));
|
|
276
|
+
if (invalidTargets.length > 0) {
|
|
277
|
+
barLog(` Skills: unknown target(s) ignored: ${invalidTargets.join(', ')} ` +
|
|
278
|
+
`(valid: ${SKILL_TARGETS.join(', ')})`);
|
|
279
|
+
}
|
|
280
|
+
const targetsToUse = validTargets.length > 0 ? validTargets : ['claude'];
|
|
281
|
+
const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult, targetsToUse);
|
|
196
282
|
if (skillResult.skills.length > 0) {
|
|
197
283
|
barLog(` Generated ${skillResult.skills.length} skill files`);
|
|
198
284
|
// Re-generate AI context files now that we have skill info
|
|
@@ -235,11 +321,28 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
235
321
|
console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
|
|
236
322
|
console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
|
|
237
323
|
console.log(` ${repoPath}`);
|
|
324
|
+
// Surface @codragraph/compress's value prop with concrete numbers: how
|
|
325
|
+
// many tokens of distilled context did we generate. Best-effort — never
|
|
326
|
+
// fail the analyze for a stat read.
|
|
238
327
|
try {
|
|
239
|
-
await
|
|
328
|
+
const { estimateTokens } = await import('./compress-stats.js');
|
|
329
|
+
const candidates = ['AGENTS.md', 'CLAUDE.md'];
|
|
330
|
+
const sizes = [];
|
|
331
|
+
for (const file of candidates) {
|
|
332
|
+
try {
|
|
333
|
+
const content = await fs.readFile(path.join(repoPath, file), 'utf-8');
|
|
334
|
+
sizes.push(`${file} ~${estimateTokens(content).toLocaleString()} tokens`);
|
|
335
|
+
}
|
|
336
|
+
catch {
|
|
337
|
+
/* file not generated for this run — skip */
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (sizes.length > 0) {
|
|
341
|
+
console.log(` @codragraph/compress: ${sizes.join(' | ')}`);
|
|
342
|
+
}
|
|
240
343
|
}
|
|
241
344
|
catch {
|
|
242
|
-
|
|
345
|
+
/* compress-stats import failed — non-fatal */
|
|
243
346
|
}
|
|
244
347
|
console.log('');
|
|
245
348
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
|
|
2
|
+
export declare function estimateTokens(text: string): number;
|
|
3
|
+
/**
|
|
4
|
+
* Walk a result object and collect every file path we can find. Looks for
|
|
5
|
+
* `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
|
|
6
|
+
* the raw-grep baseline (sum of source bytes the agent would have read
|
|
7
|
+
* without CodraGraph).
|
|
8
|
+
*/
|
|
9
|
+
export declare function collectFilePaths(obj: unknown, paths?: Set<string>): Set<string>;
|
|
10
|
+
/**
|
|
11
|
+
* Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
|
|
12
|
+
* the referenced files. Returns null if any file is missing or unreadable —
|
|
13
|
+
* in that case we silently skip the comparison rather than show a misleading
|
|
14
|
+
* number.
|
|
15
|
+
*/
|
|
16
|
+
export declare function estimateRawGrepTokens(filePaths: Iterable<string>): number | null;
|
|
17
|
+
/**
|
|
18
|
+
* Format a one-line token-savings summary suitable for stderr display.
|
|
19
|
+
* If a raw baseline is provided AND it's larger than the structured response,
|
|
20
|
+
* the line includes the savings percentage. Otherwise it only reports
|
|
21
|
+
* the structured token count.
|
|
22
|
+
*/
|
|
23
|
+
export declare function formatTokenLine(structuredTokens: number, rawTokens?: number | null): string;
|
|
24
|
+
/**
|
|
25
|
+
* Compute and print the token-savings line for a tool result. Best-effort:
|
|
26
|
+
* never throws, never blocks output. Goes to stderr so JSON consumers piping
|
|
27
|
+
* stdout to jq stay clean.
|
|
28
|
+
*/
|
|
29
|
+
export declare function emitTokenStats(result: unknown): void;
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-savings reporter for CLI output.
|
|
3
|
+
*
|
|
4
|
+
* Surfaces the @codragraph/compress value proposition on every `query`,
|
|
5
|
+
* `context`, `impact`, and `analyze` invocation: how many tokens of
|
|
6
|
+
* structured context did we return vs the equivalent raw-grep response.
|
|
7
|
+
*
|
|
8
|
+
* Uses the same chars/4 heuristic as @codragraph/compress's `estimateTokens`
|
|
9
|
+
* for cross-package consistency. Inlined rather than imported because pulling
|
|
10
|
+
* in @codragraph/compress as a runtime dep also pulls @codragraph/harness as a
|
|
11
|
+
* transitive — too heavy for what is logically a one-line approximation. When
|
|
12
|
+
* we add real LLM compression (`--compress` opt-in), the package import will
|
|
13
|
+
* follow.
|
|
14
|
+
*/
|
|
15
|
+
import * as fsSync from 'node:fs';
|
|
16
|
+
/** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
|
|
17
|
+
export function estimateTokens(text) {
|
|
18
|
+
return Math.max(0, Math.floor(text.trim().length / 4));
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Walk a result object and collect every file path we can find. Looks for
|
|
22
|
+
* `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
|
|
23
|
+
* the raw-grep baseline (sum of source bytes the agent would have read
|
|
24
|
+
* without CodraGraph).
|
|
25
|
+
*/
|
|
26
|
+
export function collectFilePaths(obj, paths = new Set()) {
|
|
27
|
+
if (!obj || typeof obj !== 'object')
|
|
28
|
+
return paths;
|
|
29
|
+
if (Array.isArray(obj)) {
|
|
30
|
+
for (const item of obj)
|
|
31
|
+
collectFilePaths(item, paths);
|
|
32
|
+
return paths;
|
|
33
|
+
}
|
|
34
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
35
|
+
if ((key === 'filePath' || key === 'file_path' || key === 'file') &&
|
|
36
|
+
typeof value === 'string' &&
|
|
37
|
+
value.length > 0) {
|
|
38
|
+
paths.add(value);
|
|
39
|
+
}
|
|
40
|
+
else if (typeof value === 'object') {
|
|
41
|
+
collectFilePaths(value, paths);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return paths;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
|
|
48
|
+
* the referenced files. Returns null if any file is missing or unreadable —
|
|
49
|
+
* in that case we silently skip the comparison rather than show a misleading
|
|
50
|
+
* number.
|
|
51
|
+
*/
|
|
52
|
+
export function estimateRawGrepTokens(filePaths) {
|
|
53
|
+
let totalChars = 0;
|
|
54
|
+
for (const fp of filePaths) {
|
|
55
|
+
try {
|
|
56
|
+
const stat = fsSync.statSync(fp);
|
|
57
|
+
if (!stat.isFile())
|
|
58
|
+
return null;
|
|
59
|
+
totalChars += stat.size;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return Math.floor(totalChars / 4);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format a one-line token-savings summary suitable for stderr display.
|
|
69
|
+
* If a raw baseline is provided AND it's larger than the structured response,
|
|
70
|
+
* the line includes the savings percentage. Otherwise it only reports
|
|
71
|
+
* the structured token count.
|
|
72
|
+
*/
|
|
73
|
+
export function formatTokenLine(structuredTokens, rawTokens) {
|
|
74
|
+
if (rawTokens && rawTokens > structuredTokens) {
|
|
75
|
+
const savings = Math.round((1 - structuredTokens / rawTokens) * 100);
|
|
76
|
+
return (` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context ` +
|
|
77
|
+
`(vs ~${rawTokens.toLocaleString()} tokens of raw source — ${savings}% smaller).`);
|
|
78
|
+
}
|
|
79
|
+
return ` @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context.`;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Compute and print the token-savings line for a tool result. Best-effort:
|
|
83
|
+
* never throws, never blocks output. Goes to stderr so JSON consumers piping
|
|
84
|
+
* stdout to jq stay clean.
|
|
85
|
+
*/
|
|
86
|
+
export function emitTokenStats(result) {
|
|
87
|
+
try {
|
|
88
|
+
const structured = typeof result === 'string' ? result : JSON.stringify(result);
|
|
89
|
+
const sTokens = estimateTokens(structured);
|
|
90
|
+
const files = collectFilePaths(result);
|
|
91
|
+
const rawTokens = files.size > 0 ? estimateRawGrepTokens(files) : null;
|
|
92
|
+
process.stderr.write('\n' + formatTokenLine(sTokens, rawTokens) + '\n');
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
/* never let stats break the actual output */
|
|
96
|
+
}
|
|
97
|
+
}
|
package/dist/cli/graphstore.d.ts
CHANGED
|
@@ -12,7 +12,9 @@ export declare const logCommand: (opts?: {
|
|
|
12
12
|
limit?: string;
|
|
13
13
|
}) => Promise<void>;
|
|
14
14
|
export declare const branchListCommand: () => Promise<void>;
|
|
15
|
-
export declare const diffCommand: (from: string, to: string
|
|
15
|
+
export declare const diffCommand: (from: string, to: string, opts?: {
|
|
16
|
+
json?: boolean;
|
|
17
|
+
}) => Promise<void>;
|
|
16
18
|
export declare const commitCommand: (opts?: {
|
|
17
19
|
message?: string;
|
|
18
20
|
}) => Promise<void>;
|
|
@@ -36,5 +38,7 @@ export declare const mergeCommand: (target: string, opts?: {
|
|
|
36
38
|
export declare const gcCommand: (opts?: {
|
|
37
39
|
dryRun?: boolean;
|
|
38
40
|
}) => Promise<void>;
|
|
39
|
-
export declare const diffSemanticCommand: (from: string, to: string
|
|
41
|
+
export declare const diffSemanticCommand: (from: string, to: string, opts?: {
|
|
42
|
+
json?: boolean;
|
|
43
|
+
}) => Promise<void>;
|
|
40
44
|
export { DEFAULT_BRANCH };
|