npm - @codragraph/cli - Versions diffs - 1.6.4 → 2.0.0 - Mend

@codragraph/cli 1.6.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/README.md +34 -0
package/dist/cli/analyze.d.ts +22 -0
package/dist/cli/analyze.js +107 -4
package/dist/cli/compress-stats.d.ts +29 -0
package/dist/cli/compress-stats.js +97 -0
package/dist/cli/graphstore.d.ts +6 -2
package/dist/cli/graphstore.js +24 -2
package/dist/cli/index.js +16 -2
package/dist/cli/profile-heap.d.ts +35 -0
package/dist/cli/profile-heap.js +126 -0
package/dist/cli/setup.d.ts +13 -0
package/dist/cli/setup.js +22 -11
package/dist/cli/skill-gen.d.ts +14 -2
package/dist/cli/skill-gen.js +52 -19
package/dist/cli/tool.js +4 -0
package/dist/core/embeddings/embedding-pipeline.js +24 -7
package/dist/core/group/bridge-db.js +111 -24
package/dist/core/lbug/content-read.d.ts +46 -0
package/dist/core/lbug/content-read.js +64 -0
package/dist/core/lbug/csv-generator.d.ts +2 -6
package/dist/core/lbug/csv-generator.js +45 -12
package/dist/core/lbug/lbug-adapter.d.ts +4 -1
package/dist/core/lbug/lbug-adapter.js +153 -21
package/dist/core/lbug/schema.d.ts +7 -7
package/dist/core/lbug/schema.js +18 -0
package/dist/core/run-analyze.d.ts +13 -0
package/dist/core/run-analyze.js +91 -4
package/dist/core/search/bm25-index.js +67 -15
package/dist/mcp/local/local-backend.js +22 -5
package/dist/server/api.js +4 -3
package/dist/storage/repo-manager.d.ts +39 -0
package/dist/storage/repo-manager.js +19 -0
package/hooks/claude/codragraph-hook.cjs +95 -2
package/package.json +4 -4
package/scripts/build-tree-sitter-proto.cjs +15 -3
package/scripts/patch-tree-sitter-swift.cjs +17 -4
package/skills/codragraph-api-surface.md +110 -0
package/skills/codragraph-config-audit.md +146 -0
package/skills/codragraph-cross-repo-impact.md +135 -0
package/skills/codragraph-data-lineage.md +137 -0
package/skills/codragraph-dead-code.md +119 -0
package/skills/codragraph-gh-actions-debug.md +162 -0
package/skills/codragraph-gh-issue-workflow.md +178 -0
package/skills/codragraph-gh-pr-workflow.md +176 -0
package/skills/codragraph-gh-release-workflow.md +187 -0
package/skills/codragraph-git-bisect.md +176 -0
package/skills/codragraph-git-force-push.md +147 -0
package/skills/codragraph-git-history-rewrite.md +174 -0
package/skills/codragraph-git-rebase-vs-merge.md +138 -0
package/skills/codragraph-git-recovery.md +181 -0
package/skills/codragraph-git-worktree.md +145 -0
package/skills/codragraph-migration-tracking.md +130 -0
package/skills/codragraph-notebook-context.md +136 -0
package/skills/codragraph-observability-coverage.md +125 -0
package/skills/codragraph-onboarding.md +129 -0
package/skills/codragraph-perf-hotspots.md +132 -0
package/skills/codragraph-project-switcher.md +116 -0
package/skills/codragraph-security-audit.md +144 -0
package/skills/codragraph-sql-tracing.md +122 -0
package/skills/codragraph-supply-chain-audit.md +153 -0
package/skills/codragraph-test-coverage.md +97 -0

package/README.md CHANGED Viewed

@@ -155,6 +155,9 @@ codragraph analyze --embeddings    # Enable embedding generation (slower, better
 codragraph analyze --skip-agents-md  # Preserve custom AGENTS.md/CLAUDE.md codragraph section edits
 codragraph analyze --verbose       # Log skipped files when parsers are unavailable
 codragraph analyze --max-file-size 1024  # Skip files larger than N KB (default: 512, cap: 32768)
+codragraph analyze --compress brotli  # Per-row body compression. Also: zstd, none.
+codragraph profile-heap [path]     # Run analyze with v8 heap-snapshot instrumentation
+codragraph profile-heap --no-summary  # Same, but skip the post-run RSS / heapUsed table
 codragraph mcp                     # Start MCP server (stdio) — serves all indexed repos
 codragraph serve                   # Start local HTTP server (multi-repo) for web UI
 codragraph index                   # Register an existing .codragraph/ folder into the global registry
@@ -306,6 +309,37 @@ echo "vendor/" >> .codragraphignore
 echo "dist/" >> .codragraphignore
 ```
+If you want to know **which phase** is dragging the heap up before
+deciding what to mitigate, run `codragraph profile-heap`. It writes a
+v8 heap snapshot at every phase boundary plus a JSONL timeline of
+`process.memoryUsage()` and prints a per-phase RSS / `heapUsed` table:
+```bash
+codragraph profile-heap                       # writes .codragraph/heap-profiles/
+# → load any .heapsnapshot in Chrome DevTools → Memory → Load
+```
+Each snapshot is 100–500 MB, so the command is opt-in only. The JSONL
+timeline is small enough to share for triage even when the snapshots
+are too big.
+### Index size — opt-in per-row compression
+For repos where `.codragraph/lbug` itself has grown large:
+```bash
+codragraph analyze --compress brotli   # Node ≥ 18, brotli quality 6
+codragraph analyze --compress zstd     # Node ≥ 22.15, zstd level 3
+codragraph analyze --compress none     # explicit default
+```
+`--compress` routes every node-row content field through the matching
+encoder before it's written to the CSV / lbug; readers decode
+transparently via the per-row `contentEncoding` tag. With the flag
+unset, the on-disk layout is byte-identical to pre-1.8 indexes. Pre-1.8
+indexes auto-trigger a full re-analyze the first time a 1.8+ CLI runs
+against them (one-time cost, surfaced in the analyze log).
 ### Large files are being skipped
 By default the walker skips files larger than **512 KB** (see log line `Skipped N large files (>512KB)`). Raise the threshold via either the CLI flag or the environment variable — both accept a value in **KB**:

package/dist/cli/analyze.d.ts CHANGED Viewed

@@ -39,5 +39,27 @@ export interface AnalyzeOptions {
      * `CODRAGRAPH_MAX_FILE_SIZE` for the rest of the pipeline.
      */
     maxFileSize?: string;
+    /**
+     * First-run auto-setup gate. Default `true` (commander injects this from the
+     * `--no-setup` flag — see CLI registration). When `true`, `analyze` detects a
+     * missing `~/.codragraph/registry.json` and runs editor setup before indexing,
+     * making `npx @codragraph/cli analyze` a true zero-install entry. Pass
+     * `--no-setup` to opt out (CI, headless servers, automated pipelines).
+     */
+    setup?: boolean;
+    /**
+     * Comma-separated list of editor targets for `--skills` output. Valid values
+     * are `claude`, `cursor`, `opencode`, `codex`. Default: `claude` (matches
+     * pre-flag behavior). Unknown values are reported and ignored.
+     */
+    skillTargets?: string;
+    /**
+     * RFC 0001 Phase 2 — opt-in per-row content compression. Accepts
+     * `'none'` (default), `'brotli'` (Node ≥ 18), or `'zstd'` (Node ≥
+     * 22.15). Compressed indexes are still queryable via the standard
+     * read path; decode happens at every external-consumer boundary
+     * (MCP, HTTP API, embeddings, CLI tools).
+     */
+    compress?: 'none' | 'brotli' | 'zstd';
 }
 export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;

package/dist/cli/analyze.js CHANGED Viewed

@@ -11,6 +11,7 @@ import path from 'path';
 import { execFileSync } from 'child_process';
 import v8 from 'v8';
 import cliProgress from 'cli-progress';
+import * as fsSync from 'node:fs';
 import { closeLbug } from '../core/lbug/lbug-adapter.js';
 import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
 import { getGitRoot, hasGitDir } from '../storage/git.js';
@@ -52,9 +53,77 @@ export const analyzeCommand = async (inputPath, options) => {
     if (options?.verbose) {
         process.env.CODRAGRAPH_VERBOSE = '1';
     }
+    // RFC 0001 Phase 2 — validate --compress before doing any work. Catching
+    // a typo or an unsupported encoding here is much friendlier than failing
+    // mid-analyze with an opaque CSV-write error. Node-version gating for
+    // zstd lives in @codragraph/graphstore via isEncodingSupported, but we
+    // import the check here so the CLI can offer the brotli fallback hint.
+    if (options?.compress && options.compress !== 'none') {
+        if (options.compress !== 'brotli' && options.compress !== 'zstd') {
+            console.error(`  --compress must be one of: none, brotli, zstd (got: ${options.compress})`);
+            process.exitCode = 2;
+            return;
+        }
+        if (options.compress === 'zstd') {
+            const { isEncodingSupported } = await import('@codragraph/graphstore');
+            if (!isEncodingSupported('zstd')) {
+                console.error('  --compress zstd requires Node ≥ 22.15.0 (native node:zlib zstd).\n' +
+                    `  Detected Node ${process.version}. Use --compress brotli instead, or upgrade Node.`);
+                process.exitCode = 2;
+                return;
+            }
+        }
+        // RFC 0001 Phase 2.5 — BM25 / FTS now drops `content` from its
+        // property list when meta.compress is non-'none' (see
+        // `core/search/bm25-index.ts`), so search inside compressed bodies
+        // gracefully falls back to name-only matches instead of tokenising
+        // base64 garbage. Surface the trade-off so users know what they're
+        // opting into.
+        console.warn(`  Note: --compress ${options.compress} reduces .codragraph/lbug size.\n` +
+            `  BM25 search will index symbol names only (function bodies are not tokenised\n` +
+            `  when compressed); embeddings, graph queries, and \`context\` / \`impact\` are\n` +
+            `  unaffected. Run with --compress none if you rely on full-text search inside\n` +
+            `  source bodies.`);
+    }
     if (options?.maxFileSize) {
         process.env.CODRAGRAPH_MAX_FILE_SIZE = options.maxFileSize;
     }
+    // ── Auto-reindex coalesce-file cleanup ─────────────────────────────
+    // When the Claude Code PostToolUse hook spawns us in background mode, it
+    // passes the coalesce file path through this env var. We delete it on every
+    // exit path so the next commit immediately triggers a new reindex (rather
+    // than being blocked by a 10-min mtime TTL). The hook's TTL is just a
+    // crash safety net — this is the happy path.
+    const reindexLockPath = process.env.CODRAGRAPH_REINDEX_LOCK_PATH || '';
+    if (reindexLockPath) {
+        process.on('exit', () => {
+            try {
+                fsSync.unlinkSync(reindexLockPath);
+            }
+            catch {
+                /* already gone or unreadable — fine */
+            }
+        });
+    }
+    // ── First-run auto-setup ───────────────────────────────────────────
+    // Makes `npx @codragraph/cli analyze` a true one-command entry. We detect
+    // first-run by the absence of the global registry — analyze writes to it on
+    // every successful index, so it's a reliable "this user has never run us
+    // before" signal. Opt out with `--no-setup` for CI / headless contexts;
+    // commander maps `--no-setup` to `options.setup === false`.
+    if (options?.setup !== false) {
+        let registryExists = true;
+        try {
+            await fs.access(getGlobalRegistryPath());
+        }
+        catch {
+            registryExists = false;
+        }
+        if (!registryExists) {
+            const { runSetup } = await import('./setup.js');
+            await runSetup({ skipNextSteps: true, compactHeader: true });
+        }
+    }
     console.log('\n  CodraGraph Analyzer\n');
     let repoPath;
     if (inputPath) {
@@ -168,6 +237,9 @@ export const analyzeCommand = async (inputPath, options) => {
             // be able to accept the duplicate name without also paying the
             // cost of a full pipeline re-index. See #829 review round 2.
             allowDuplicateName: options?.allowDuplicateName,
+            // RFC 0001 Phase 2 — pass through the per-row encoding choice.
+            // Default 'none' / undefined keeps the pre-Phase-2 wire layout.
+            compress: options?.compress,
         }, {
             onProgress: (_phase, percent, message) => {
                 updateBar(percent, message);
@@ -190,9 +262,23 @@ export const analyzeCommand = async (inputPath, options) => {
         if (options?.skills && result.pipelineResult) {
             updateBar(99, 'Generating skill files...');
             try {
-                const { generateSkillFiles } = await import('./skill-gen.js');
+                const { generateSkillFiles, SKILL_TARGETS } = await import('./skill-gen.js');
                 const { generateAIContextFiles } = await import('./ai-context.js');
-                const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult);
+                // Parse --skill-targets CSV; default to ['claude'] when omitted.
+                // Unknown tokens are reported once and dropped — we don't fail the
+                // whole analyze for a typo here, but we do want the user to see it.
+                const requestedTargets = (options?.skillTargets || 'claude')
+                    .split(',')
+                    .map((s) => s.trim().toLowerCase())
+                    .filter(Boolean);
+                const validTargets = requestedTargets.filter((t) => SKILL_TARGETS.includes(t));
+                const invalidTargets = requestedTargets.filter((t) => !SKILL_TARGETS.includes(t));
+                if (invalidTargets.length > 0) {
+                    barLog(`  Skills: unknown target(s) ignored: ${invalidTargets.join(', ')} ` +
+                        `(valid: ${SKILL_TARGETS.join(', ')})`);
+                }
+                const targetsToUse = validTargets.length > 0 ? validTargets : ['claude'];
+                const skillResult = await generateSkillFiles(repoPath, result.repoName, result.pipelineResult, targetsToUse);
                 if (skillResult.skills.length > 0) {
                     barLog(`  Generated ${skillResult.skills.length} skill files`);
                     // Re-generate AI context files now that we have skill info
@@ -235,11 +321,28 @@ export const analyzeCommand = async (inputPath, options) => {
         console.log(`\n  Repository indexed successfully (${totalTime}s)\n`);
         console.log(`  ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
         console.log(`  ${repoPath}`);
+        // Surface @codragraph/compress's value prop with concrete numbers: how
+        // many tokens of distilled context did we generate. Best-effort — never
+        // fail the analyze for a stat read.
         try {
-            await fs.access(getGlobalRegistryPath());
+            const { estimateTokens } = await import('./compress-stats.js');
+            const candidates = ['AGENTS.md', 'CLAUDE.md'];
+            const sizes = [];
+            for (const file of candidates) {
+                try {
+                    const content = await fs.readFile(path.join(repoPath, file), 'utf-8');
+                    sizes.push(`${file} ~${estimateTokens(content).toLocaleString()} tokens`);
+                }
+                catch {
+                    /* file not generated for this run — skip */
+                }
+            }
+            if (sizes.length > 0) {
+                console.log(`  @codragraph/compress: ${sizes.join(' | ')}`);
+            }
         }
         catch {
-            console.log('\n  Tip: Run `codragraph setup` to configure MCP for your editor.');
+            /* compress-stats import failed — non-fatal */
         }
         console.log('');
     }

package/dist/cli/compress-stats.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
+export declare function estimateTokens(text: string): number;
+/**
+ * Walk a result object and collect every file path we can find. Looks for
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
+ * the raw-grep baseline (sum of source bytes the agent would have read
+ * without CodraGraph).
+ */
+export declare function collectFilePaths(obj: unknown, paths?: Set<string>): Set<string>;
+/**
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
+ * the referenced files. Returns null if any file is missing or unreadable —
+ * in that case we silently skip the comparison rather than show a misleading
+ * number.
+ */
+export declare function estimateRawGrepTokens(filePaths: Iterable<string>): number | null;
+/**
+ * Format a one-line token-savings summary suitable for stderr display.
+ * If a raw baseline is provided AND it's larger than the structured response,
+ * the line includes the savings percentage. Otherwise it only reports
+ * the structured token count.
+ */
+export declare function formatTokenLine(structuredTokens: number, rawTokens?: number | null): string;
+/**
+ * Compute and print the token-savings line for a tool result. Best-effort:
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
+ * stdout to jq stay clean.
+ */
+export declare function emitTokenStats(result: unknown): void;

package/dist/cli/compress-stats.js ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Token-savings reporter for CLI output.
+ *
+ * Surfaces the @codragraph/compress value proposition on every `query`,
+ * `context`, `impact`, and `analyze` invocation: how many tokens of
+ * structured context did we return vs the equivalent raw-grep response.
+ *
+ * Uses the same chars/4 heuristic as @codragraph/compress's `estimateTokens`
+ * for cross-package consistency. Inlined rather than imported because pulling
+ * in @codragraph/compress as a runtime dep also pulls @codragraph/harness as a
+ * transitive — too heavy for what is logically a one-line approximation. When
+ * we add real LLM compression (`--compress` opt-in), the package import will
+ * follow.
+ */
+import * as fsSync from 'node:fs';
+/** chars/4 token estimate. Matches @codragraph/compress's `estimateTokens`. */
+export function estimateTokens(text) {
+    return Math.max(0, Math.floor(text.trim().length / 4));
+}
+/**
+ * Walk a result object and collect every file path we can find. Looks for
+ * `filePath`, `file_path`, and `file` keys at any depth. Used to estimate
+ * the raw-grep baseline (sum of source bytes the agent would have read
+ * without CodraGraph).
+ */
+export function collectFilePaths(obj, paths = new Set()) {
+    if (!obj || typeof obj !== 'object')
+        return paths;
+    if (Array.isArray(obj)) {
+        for (const item of obj)
+            collectFilePaths(item, paths);
+        return paths;
+    }
+    for (const [key, value] of Object.entries(obj)) {
+        if ((key === 'filePath' || key === 'file_path' || key === 'file') &&
+            typeof value === 'string' &&
+            value.length > 0) {
+            paths.add(value);
+        }
+        else if (typeof value === 'object') {
+            collectFilePaths(value, paths);
+        }
+    }
+    return paths;
+}
+/**
+ * Estimate raw-grep-equivalent token count by summing on-disk byte sizes of
+ * the referenced files. Returns null if any file is missing or unreadable —
+ * in that case we silently skip the comparison rather than show a misleading
+ * number.
+ */
+export function estimateRawGrepTokens(filePaths) {
+    let totalChars = 0;
+    for (const fp of filePaths) {
+        try {
+            const stat = fsSync.statSync(fp);
+            if (!stat.isFile())
+                return null;
+            totalChars += stat.size;
+        }
+        catch {
+            return null;
+        }
+    }
+    return Math.floor(totalChars / 4);
+}
+/**
+ * Format a one-line token-savings summary suitable for stderr display.
+ * If a raw baseline is provided AND it's larger than the structured response,
+ * the line includes the savings percentage. Otherwise it only reports
+ * the structured token count.
+ */
+export function formatTokenLine(structuredTokens, rawTokens) {
+    if (rawTokens && rawTokens > structuredTokens) {
+        const savings = Math.round((1 - structuredTokens / rawTokens) * 100);
+        return (`  @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context ` +
+            `(vs ~${rawTokens.toLocaleString()} tokens of raw source — ${savings}% smaller).`);
+    }
+    return `  @codragraph/compress: ~${structuredTokens.toLocaleString()} tokens of structured context.`;
+}
+/**
+ * Compute and print the token-savings line for a tool result. Best-effort:
+ * never throws, never blocks output. Goes to stderr so JSON consumers piping
+ * stdout to jq stay clean.
+ */
+export function emitTokenStats(result) {
+    try {
+        const structured = typeof result === 'string' ? result : JSON.stringify(result);
+        const sTokens = estimateTokens(structured);
+        const files = collectFilePaths(result);
+        const rawTokens = files.size > 0 ? estimateRawGrepTokens(files) : null;
+        process.stderr.write('\n' + formatTokenLine(sTokens, rawTokens) + '\n');
+    }
+    catch {
+        /* never let stats break the actual output */
+    }
+}

package/dist/cli/graphstore.d.ts CHANGED Viewed

@@ -12,7 +12,9 @@ export declare const logCommand: (opts?: {
     limit?: string;
 }) => Promise<void>;
 export declare const branchListCommand: () => Promise<void>;
-export declare const diffCommand: (from: string, to: string) => Promise<void>;
+export declare const diffCommand: (from: string, to: string, opts?: {
+    json?: boolean;
+}) => Promise<void>;
 export declare const commitCommand: (opts?: {
     message?: string;
 }) => Promise<void>;
@@ -36,5 +38,7 @@ export declare const mergeCommand: (target: string, opts?: {
 export declare const gcCommand: (opts?: {
     dryRun?: boolean;
 }) => Promise<void>;
-export declare const diffSemanticCommand: (from: string, to: string) => Promise<void>;
+export declare const diffSemanticCommand: (from: string, to: string, opts?: {
+    json?: boolean;
+}) => Promise<void>;
 export { DEFAULT_BRANCH };

package/dist/cli/graphstore.js CHANGED Viewed

@@ -84,7 +84,7 @@ export const branchListCommand = async () => {
 // ──────────────────────────────────────────────────────────────────────
 // codragraph diff <from> <to>
 // ──────────────────────────────────────────────────────────────────────
-export const diffCommand = async (from, to) => {
+export const diffCommand = async (from, to, opts = {}) => {
     const ctx = await resolveGraphstore(process.cwd());
     const fromCommitId = await resolveCommitTarget(ctx, from);
     const toCommitId = await resolveCommitTarget(ctx, to);
@@ -95,6 +95,17 @@ export const diffCommand = async (from, to) => {
         from: fromCommit.snapshot,
         to: toCommit.snapshot,
     });
+    // --json: emit a machine-readable payload for downstream consumers
+    // (GitHub Action comment formatter, IDE plugins, etc). Keep human and
+    // JSON paths separate — never sneak JSON into the human path's stdout.
+    if (opts.json) {
+        process.stdout.write(JSON.stringify({
+            from: { commit: fromCommitId, message: fromCommit.message },
+            to: { commit: toCommitId, message: toCommit.message },
+            diff,
+        }, null, 2) + '\n');
+        return;
+    }
     process.stdout.write(`From: ${fromCommitId.slice(7, 7 + 12)}  ${fromCommit.message}\n`);
     process.stdout.write(`To:   ${toCommitId.slice(7, 7 + 12)}  ${toCommit.message}\n\n`);
     let totalAdded = 0;
@@ -581,7 +592,7 @@ const formatBytes = (n) => {
 // classified modifications, added/removed APIs, and process changes. We
 // expose it as a separate module-local helper so the CLI handler can
 // dispatch on the flag.
-export const diffSemanticCommand = async (from, to) => {
+export const diffSemanticCommand = async (from, to, opts = {}) => {
     const ctx = await resolveGraphstore(process.cwd());
     const fromCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, from));
     const toCommit = await readCommit(ctx.cas, await resolveCommitTarget(ctx, to));
@@ -590,6 +601,17 @@ export const diffSemanticCommand = async (from, to) => {
         from: fromCommit.snapshot,
         to: toCommit.snapshot,
     });
+    // --json: same shape as diff (plain) but with the semantic payload. The
+    // PR-review GitHub Action consumes this directly to render the Markdown
+    // comment without parsing free-form text.
+    if (opts.json) {
+        process.stdout.write(JSON.stringify({
+            from: { ref: from, message: fromCommit.message },
+            to: { ref: to, message: toCommit.message },
+            semantic: d,
+        }, null, 2) + '\n');
+        return;
+    }
     process.stdout.write(`From: ${from}  (${fromCommit.message})\n`);
     process.stdout.write(`To:   ${to}    (${toCommit.message})\n\n`);
     if (d.addedAPIs.length > 0) {

package/dist/cli/index.js CHANGED Viewed

@@ -19,6 +19,7 @@ program
     .option('-f, --force', 'Force full re-index even if up to date')
     .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
     .option('--skills', 'Generate repo-specific skill files from detected communities')
+    .option('--skill-targets <list>', 'CSV of editor targets for --skills (claude, cursor, opencode, codex). Default: claude.')
     .option('--skip-agents-md', 'Skip updating the codragraph section in AGENTS.md and CLAUDE.md')
     .option('--no-stats', 'Omit volatile file/symbol counts from AGENTS.md and CLAUDE.md')
     .option('--skip-git', 'Index a folder without requiring a .git directory')
@@ -28,12 +29,24 @@ program
     'Leaves `-r <name>` ambiguous for the two paths; use -r <path> to disambiguate.')
     .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
     .option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
+    .option('--no-setup', 'Skip the first-run editor setup (auto-runs once when ~/.codragraph/registry.json is missing)')
+    .option('--compress <encoding>', 'Compress per-row content (RFC 0001 Phase 2). One of: none (default), brotli, zstd. zstd requires Node ≥ 22.15.', 'none')
     .addHelpText('after', '\nEnvironment variables:\n' +
     '  CODRAGRAPH_NO_GITIGNORE=1   Skip .gitignore parsing (still reads .codragraphignore)\n' +
     '  CODRAGRAPH_MAX_FILE_SIZE=N  Override large-file skip threshold (KB). Default 512, max 32768.\n' +
     '\nTip: `.codragraphignore` supports `.gitignore`-style negation. Add e.g.\n' +
     '     `!__tests__/` to index a directory that is auto-filtered by default (#771).')
     .action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));
+program
+    .command('profile-heap [path]')
+    .description('Run analyze with heap-profile instrumentation (RFC 0002 Phase 1). ' +
+    'Writes per-phase v8 heap snapshots + a JSONL RSS timeline under ' +
+    '.codragraph/heap-profiles/, then prints a summary table.')
+    .option('-f, --force', 'Force full re-index (analyze flag, passed through)')
+    .option('--skip-git', 'Index a folder without requiring a .git directory')
+    .option('--no-setup', 'Skip first-run editor setup')
+    .option('--no-summary', 'Skip the post-run summary table (raw artifacts only)')
+    .action(createLazyAction(() => import('./profile-heap.js'), 'profileHeapCommand'));
 program
     .command('index [path...]')
     .description('Register an existing .codragraph/ folder into the global registry (no re-analysis needed)')
@@ -192,12 +205,13 @@ program
     .command('diff <from> <to>')
     .description('Structural diff between two graph commits or branches')
     .option('--semantic', 'Use the semantic differ (added APIs, classified modifications, processes)')
+    .option('--json', 'Emit machine-readable JSON instead of human-readable text (for CI / GitHub Action consumers)')
     .action(async (from, to, opts) => {
     const mod = await import('./graphstore.js');
     if (opts.semantic)
-        await mod.diffSemanticCommand(from, to);
+        await mod.diffSemanticCommand(from, to, { json: opts.json });
     else
-        await mod.diffCommand(from, to);
+        await mod.diffCommand(from, to, { json: opts.json });
 });
 program
     .command('merge <branch>')

package/dist/cli/profile-heap.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * profile-heap — RFC 0002 Phase 1 entry point.
+ *
+ * A thin wrapper around `analyze` that flips on the heap-profile
+ * instrumentation already living in `runFullAnalysis`, then prints a
+ * per-phase RSS / heapUsed summary table after the run finishes.
+ *
+ * Why a dedicated subcommand instead of just documenting the env var?
+ *   - Discoverability: `codragraph --help` lists it next to `analyze`.
+ *   - One-shot UX: users (and the maintainer) get a useful summary table
+ *     without having to spelunk through Chrome DevTools to compare
+ *     snapshots. The `.heapsnapshot` files are still written for deep
+ *     dives; the summary just makes the cheap signal (RSS curve, heapUsed
+ *     curve) visible at a glance.
+ *   - Phase 1 of RFC 0002 is profile-first by design — we ship the tool
+ *     before any mitigation. Don't add compression, eviction, or streaming
+ *     refactors here; that's Phase 2+ once we know which phase is the
+ *     actual bottleneck.
+ *
+ * Side effects: writes `.codragraph/heap-profiles/<ts>-<phase>.heapsnapshot`
+ * (one per phase boundary, ~100-500MB each) plus a small
+ * `profile-summary.jsonl` timeline. Disk usage adds up fast on large
+ * repos — clean up between runs if you don't need the raw snapshots.
+ */
+import { type AnalyzeOptions } from './analyze.js';
+export interface ProfileHeapOptions extends AnalyzeOptions {
+    /**
+     * Commander injects this from the `--no-summary` flag — see CLI
+     * registration. `--no-summary` ⇒ `summary === false`. The dual-name
+     * convention (positive flag name, negated value) is a commander
+     * footgun: a `noSummary?: boolean` field would silently never fire.
+     */
+    summary?: boolean;
+}
+export declare const profileHeapCommand: (inputPath?: string, options?: ProfileHeapOptions) => Promise<void>;

package/dist/cli/profile-heap.js ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * profile-heap — RFC 0002 Phase 1 entry point.
+ *
+ * A thin wrapper around `analyze` that flips on the heap-profile
+ * instrumentation already living in `runFullAnalysis`, then prints a
+ * per-phase RSS / heapUsed summary table after the run finishes.
+ *
+ * Why a dedicated subcommand instead of just documenting the env var?
+ *   - Discoverability: `codragraph --help` lists it next to `analyze`.
+ *   - One-shot UX: users (and the maintainer) get a useful summary table
+ *     without having to spelunk through Chrome DevTools to compare
+ *     snapshots. The `.heapsnapshot` files are still written for deep
+ *     dives; the summary just makes the cheap signal (RSS curve, heapUsed
+ *     curve) visible at a glance.
+ *   - Phase 1 of RFC 0002 is profile-first by design — we ship the tool
+ *     before any mitigation. Don't add compression, eviction, or streaming
+ *     refactors here; that's Phase 2+ once we know which phase is the
+ *     actual bottleneck.
+ *
+ * Side effects: writes `.codragraph/heap-profiles/<ts>-<phase>.heapsnapshot`
+ * (one per phase boundary, ~100-500MB each) plus a small
+ * `profile-summary.jsonl` timeline. Disk usage adds up fast on large
+ * repos — clean up between runs if you don't need the raw snapshots.
+ */
+import path from 'path';
+import * as fsSync from 'node:fs';
+import { getGitRoot, hasGitDir } from '../storage/git.js';
+import { analyzeCommand } from './analyze.js';
+export const profileHeapCommand = async (inputPath, options) => {
+    // Flip on the instrumentation BEFORE delegating to analyze. The env var
+    // is read by `runFullAnalysis` at orchestrator entry, so it must be set
+    // here. Setting it on every profile-heap invocation also guarantees that
+    // a leftover `unset` from a prior shell session can't disable profiling
+    // in this run.
+    process.env.CODRAGRAPH_HEAP_PROFILE = '1';
+    // Resolve the repo path the same way `analyze` does so we can locate the
+    // summary file after the run. Mirroring this avoids touching analyze's
+    // resolution logic, which already handles --skip-git, gitRoot, etc.
+    let repoPath;
+    if (inputPath) {
+        repoPath = path.resolve(inputPath);
+    }
+    else {
+        const gitRoot = getGitRoot(process.cwd());
+        if (!gitRoot && !options?.skipGit) {
+            // Let analyze produce its standard error message + exit code rather
+            // than duplicating the message here.
+            await analyzeCommand(inputPath, options);
+            return;
+        }
+        repoPath = gitRoot ?? path.resolve(process.cwd());
+    }
+    if (!hasGitDir(repoPath) && !options?.skipGit) {
+        await analyzeCommand(inputPath, options);
+        return;
+    }
+    // Detect whether we're the outer (pre-re-exec) process. analyzeCommand
+    // calls ensureHeap() which `execFileSync`s a child with
+    // --max-old-space-size=8192 on first invocation; that child runs the
+    // instrumented codepath and prints its own summary before exiting. If
+    // we don't bail here, the outer process re-reads the just-written
+    // summary file and prints it a second time.
+    //
+    // Capture the flag BEFORE the await so a future change to NODE_OPTIONS
+    // mid-flight can't confuse us. (execFileSync's child env doesn't
+    // propagate back to process.env, but be defensive.)
+    const isInnerProcess = (process.env.NODE_OPTIONS || '').includes('--max-old-space-size');
+    await analyzeCommand(inputPath, options);
+    // Outer process: the inner already printed the summary on its way out.
+    if (!isInnerProcess)
+        return;
+    // `--no-summary` → commander sets options.summary === false.
+    if (options?.summary === false)
+        return;
+    const summaryPath = path.join(repoPath, '.codragraph', 'heap-profiles', 'profile-summary.jsonl');
+    if (!fsSync.existsSync(summaryPath)) {
+        // analyze re-execs itself with a larger heap on first invocation; the
+        // outer process never reaches the instrumented codepath. Tell the user
+        // where to find the artifacts in that case.
+        console.log(`\n  Heap profile summary not found at ${summaryPath}.\n` +
+            `  This is expected on the first call (analyze re-execs with --max-old-space-size).\n` +
+            `  Re-run \`codragraph profile-heap\` and the summary will appear in the second pass.\n`);
+        return;
+    }
+    const lines = fsSync
+        .readFileSync(summaryPath, 'utf8')
+        .split('\n')
+        .filter((l) => l.trim().length > 0);
+    const entries = [];
+    for (const line of lines) {
+        try {
+            entries.push(JSON.parse(line));
+        }
+        catch {
+            /* skip malformed lines — best-effort */
+        }
+    }
+    if (entries.length === 0) {
+        console.log(`\n  Heap profile summary at ${summaryPath} is empty.\n`);
+        return;
+    }
+    printSummary(entries, summaryPath);
+};
+function printSummary(entries, summaryPath) {
+    const peakRss = entries.reduce((m, e) => (e.rss > m ? e.rss : m), 0);
+    const peakHeapUsed = entries.reduce((m, e) => (e.heapUsed > m ? e.heapUsed : m), 0);
+    const startTs = entries[0].ts;
+    console.log('\n  Heap-profile summary');
+    console.log('  ────────────────────');
+    console.log('  Phase'.padEnd(28) +
+        '  Δt(s)'.padEnd(10) +
+        '  RSS(MB)'.padEnd(12) +
+        '  heapUsed(MB)'.padEnd(16) +
+        '  Snapshot');
+    for (const e of entries) {
+        const dt = ((e.ts - startTs) / 1000).toFixed(1);
+        const rssMb = (e.rss / 1024 / 1024).toFixed(0);
+        const heapMb = (e.heapUsed / 1024 / 1024).toFixed(0);
+        console.log(`  ${e.phase.padEnd(26)}  ${dt.padStart(6)}    ${rssMb.padStart(7)}    ${heapMb.padStart(11)}      ${e.snapshotFile}`);
+    }
+    console.log('  ────────────────────');
+    console.log(`  peak RSS:       ${(peakRss / 1024 / 1024).toFixed(0)} MB`);
+    console.log(`  peak heapUsed:  ${(peakHeapUsed / 1024 / 1024).toFixed(0)} MB`);
+    console.log(`  raw timeline:   ${summaryPath}`);
+    console.log(`  snapshots dir:  ${path.dirname(summaryPath)} (open .heapsnapshot files in Chrome DevTools → Memory → Load)\n`);
+}

package/dist/cli/setup.d.ts CHANGED Viewed

@@ -5,4 +5,17 @@
  * Detects installed AI editors and writes the appropriate MCP config
  * so the CodraGraph MCP server is available in all projects.
  */
+interface SetupResult {
+    configured: string[];
+    skipped: string[];
+    errors: string[];
+}
+export interface RunSetupOptions {
+    /** Suppress the trailing "Next steps" block (used when analyze auto-runs setup). */
+    skipNextSteps?: boolean;
+    /** Suppress the "CodraGraph Setup" header (used when analyze auto-runs setup). */
+    compactHeader?: boolean;
+}
+export declare const runSetup: (options?: RunSetupOptions) => Promise<SetupResult>;
 export declare const setupCommand: () => Promise<void>;
+export {};