npm - @graphpilot-oss/graphpilot - Versions diffs - 0.0.1 - Mend

@graphpilot-oss/graphpilot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/.editorconfig +15 -0
package/.github/CODEOWNERS +22 -0
package/.github/FUNDING.yml +1 -0
package/.github/ISSUE_TEMPLATE/bug_report.md +33 -0
package/.github/ISSUE_TEMPLATE/config.yml +5 -0
package/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
package/.github/PULL_REQUEST_TEMPLATE.md +19 -0
package/.github/dependabot.yml +15 -0
package/.github/workflows/ci.yml +62 -0
package/.github/workflows/release.yml +50 -0
package/.prettierignore +19 -0
package/.prettierrc.json +20 -0
package/CHANGELOG.md +138 -0
package/CODE_OF_CONDUCT.md +83 -0
package/CONTRIBUTING.md +111 -0
package/LICENSE +201 -0
package/README.md +132 -0
package/SECURITY.md +44 -0
package/assets/logo.png +0 -0
package/assets/logo.svg +1 -0
package/bench/README.md +544 -0
package/bench/results/agent-tier-2026-05-22.md +28 -0
package/bench/results/agent-tier-summary.md +44 -0
package/bench/results/baseline-tier-2026-05-22.md +23 -0
package/bench/results/baseline.json +810 -0
package/bench/results/baseline.md +28 -0
package/bench/run-agent-tier-automated.ts +234 -0
package/bench/run-agent-tier.md +125 -0
package/bench/run-baseline-tier.ts +200 -0
package/bench/run.ts +210 -0
package/bench/runner-baseline.ts +177 -0
package/bench/runner-graphpilot.ts +131 -0
package/bench/score-agent-tier.ts +191 -0
package/bench/score.ts +59 -0
package/bench/tasks.ts +236 -0
package/dist/cli.d.ts +2 -0
package/dist/cli.js +162 -0
package/dist/cli.js.map +1 -0
package/dist/edges.d.ts +57 -0
package/dist/edges.js +170 -0
package/dist/edges.js.map +1 -0
package/dist/git.d.ts +95 -0
package/dist/git.js +247 -0
package/dist/git.js.map +1 -0
package/dist/graph-schema.d.ts +36 -0
package/dist/graph-schema.js +208 -0
package/dist/graph-schema.js.map +1 -0
package/dist/impact.d.ts +99 -0
package/dist/impact.js +123 -0
package/dist/impact.js.map +1 -0
package/dist/indexer.d.ts +28 -0
package/dist/indexer.js +111 -0
package/dist/indexer.js.map +1 -0
package/dist/interactions.d.ts +46 -0
package/dist/interactions.js +0 -0
package/dist/interactions.js.map +1 -0
package/dist/mcp.d.ts +3 -0
package/dist/mcp.js +567 -0
package/dist/mcp.js.map +1 -0
package/dist/parser.d.ts +24 -0
package/dist/parser.js +128 -0
package/dist/parser.js.map +1 -0
package/dist/provenance.d.ts +74 -0
package/dist/provenance.js +95 -0
package/dist/provenance.js.map +1 -0
package/dist/query.d.ts +68 -0
package/dist/query.js +127 -0
package/dist/query.js.map +1 -0
package/dist/redact.d.ts +30 -0
package/dist/redact.js +117 -0
package/dist/redact.js.map +1 -0
package/dist/storage.d.ts +42 -0
package/dist/storage.js +85 -0
package/dist/storage.js.map +1 -0
package/dist/symbols.d.ts +20 -0
package/dist/symbols.js +140 -0
package/dist/symbols.js.map +1 -0
package/dist/validation.d.ts +9 -0
package/dist/validation.js +65 -0
package/dist/validation.js.map +1 -0
package/dist/validators.d.ts +55 -0
package/dist/validators.js +205 -0
package/dist/validators.js.map +1 -0
package/dist/watcher.d.ts +86 -0
package/dist/watcher.js +310 -0
package/dist/watcher.js.map +1 -0
package/docs/architecture.md +311 -0
package/docs/limitations.md +156 -0
package/docs/mcp-setup.md +231 -0
package/docs/quickstart.md +202 -0
package/eslint.config.js +148 -0
package/lefthook.yml +81 -0
package/package.json +56 -0
package/pnpm-workspace.yaml +6 -0
package/scripts/smoke-stdio.mjs +97 -0
package/src/cli.ts +171 -0
package/src/edges.ts +202 -0
package/src/git.ts +255 -0
package/src/graph-schema.ts +229 -0
package/src/impact.ts +218 -0
package/src/indexer.ts +152 -0
package/src/interactions.ts +0 -0
package/src/mcp.ts +652 -0
package/src/parser.ts +138 -0
package/src/provenance.ts +115 -0
package/src/query.ts +148 -0
package/src/redact.ts +122 -0
package/src/storage.ts +115 -0
package/src/symbols.ts +173 -0
package/src/validation.ts +69 -0
package/src/validators.ts +253 -0
package/src/watcher.ts +383 -0
package/tests/edges.test.ts +175 -0
package/tests/fixtures/sample.ts +32 -0
package/tests/git.test.ts +303 -0
package/tests/graph-schema.test.ts +321 -0
package/tests/impact.test.ts +454 -0
package/tests/interactions.test.ts +180 -0
package/tests/lint-policy.test.ts +106 -0
package/tests/mcp-stdio.test.ts +171 -0
package/tests/mcp.test.ts +335 -0
package/tests/parser.test.ts +31 -0
package/tests/provenance.test.ts +132 -0
package/tests/query.test.ts +160 -0
package/tests/redact.test.ts +167 -0
package/tests/security.test.ts +144 -0
package/tests/symbols.test.ts +78 -0
package/tests/validators.test.ts +193 -0
package/tests/watcher.test.ts +250 -0
package/tsconfig.json +18 -0

package/src/parser.ts ADDED Viewed

@@ -0,0 +1,138 @@
+import Parser from 'tree-sitter';
+// @ts-ignore — tree-sitter-typescript ships JS, has no types
+import TS from 'tree-sitter-typescript';
+import { readFileSync, statSync } from 'node:fs';
+import { extname } from 'node:path';
+import { MAX_FILE_BYTES } from './validation.js';
+export interface ParsedFile {
+  path: string;
+  lang: 'typescript' | 'tsx' | 'javascript' | 'jsx';
+  tree: Parser.Tree;
+  source: string;
+}
+const PARSER_CACHE = new Map<string, Parser>();
+function getParser(lang: ParsedFile['lang']): Parser {
+  if (PARSER_CACHE.has(lang)) return PARSER_CACHE.get(lang)!;
+  const p = new Parser();
+  // Cast around the peer-dep type-version skew between tree-sitter and
+  // tree-sitter-typescript. Runtime is fine; only the .d.ts files disagree.
+  const langs = TS as { typescript: Parser.Language; tsx: Parser.Language };
+  switch (lang) {
+    case 'typescript':
+      p.setLanguage(langs.typescript);
+      break;
+    case 'tsx':
+      p.setLanguage(langs.tsx);
+      break;
+    case 'javascript':
+    case 'jsx':
+      p.setLanguage(langs.typescript);
+      break;
+  }
+  PARSER_CACHE.set(lang, p);
+  return p;
+}
+export function detectLang(path: string): ParsedFile['lang'] | null {
+  switch (extname(path).toLowerCase()) {
+    case '.ts':
+      return 'typescript';
+    case '.tsx':
+      return 'tsx';
+    case '.js':
+    case '.mjs':
+    case '.cjs':
+      return 'javascript';
+    case '.jsx':
+      return 'jsx';
+    default:
+      return null;
+  }
+}
+export function parseFile(path: string): ParsedFile | null {
+  const lang = detectLang(path);
+  if (!lang) return null;
+  // Defence against T1 (resource exhaustion): skip oversized files rather than
+  // OOM the process. 5 MB is enough for any real source file.
+  let size: number;
+  try {
+    size = statSync(path).size;
+  } catch {
+    return null;
+  }
+  if (size > MAX_FILE_BYTES) return null;
+  const source = readFileSync(path, 'utf8');
+  return parseSource(path, source, lang);
+}
+export function parseSource(path: string, source: string, lang: ParsedFile['lang']): ParsedFile {
+  const tree = getParser(lang).parse(source);
+  return { path, lang, tree, source };
+}
+/**
+ * Walk the tree and yield every node. Depth-first, pre-order.
+ *
+ * Iterative (not recursive) so a deeply-nested AST can't blow the JS stack.
+ * Tree-sitter trees on real codebases hit ~50–80 depth; pathological generated
+ * code can go much deeper. Defence against T1.
+ */
+export function* walk(node: Parser.SyntaxNode): Generator<Parser.SyntaxNode> {
+  const stack: Parser.SyntaxNode[] = [node];
+  while (stack.length > 0) {
+    const cur = stack.pop()!;
+    yield cur;
+    // Push children in reverse so they pop in original order (preserves
+    // pre-order traversal — matters for callers that rely on source ordering).
+    for (let i = cur.childCount - 1; i >= 0; i--) {
+      const child = cur.child(i);
+      if (child) stack.push(child);
+    }
+  }
+}
+/**
+ * Day-2 deliverable: list every function name in a parsed file.
+ * Catches: function declarations, arrow functions assigned to consts,
+ * class methods, function expressions assigned to variables.
+ */
+export function listFunctions(parsed: ParsedFile): string[] {
+  const names: string[] = [];
+  for (const node of walk(parsed.tree.rootNode)) {
+    const name = functionNameOf(node);
+    if (name) names.push(name);
+  }
+  return names;
+}
+function functionNameOf(node: Parser.SyntaxNode): string | null {
+  switch (node.type) {
+    case 'function_declaration':
+    case 'generator_function_declaration':
+    case 'method_definition':
+    case 'function_signature': {
+      const nameNode = node.childForFieldName('name');
+      return nameNode?.text ?? null;
+    }
+    case 'variable_declarator': {
+      // const foo = () => ... | const foo = function() {}
+      const valueNode = node.childForFieldName('value');
+      if (
+        valueNode &&
+        (valueNode.type === 'arrow_function' ||
+          valueNode.type === 'function_expression' ||
+          valueNode.type === 'function')
+      ) {
+        const nameNode = node.childForFieldName('name');
+        return nameNode?.text ?? null;
+      }
+      return null;
+    }
+    default:
+      return null;
+  }
+}

package/src/provenance.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Provenance / evidence anchors for tool responses.
+ *
+ * Why this exists: agents hallucinate. The most common Cursor / Claude
+ * Code failure pattern is "the model called a function that doesn't
+ * exist" — and the user has no quick way to verify a tool's claim
+ * before acting on it.
+ *
+ * Solution: every result we return carries an evidence anchor — a
+ * structured `{file, line, sha, excerpt}` reference that the agent
+ * can include verbatim in its reply. The user (or the agent itself)
+ * can then jump to the file/line and confirm the cited code matches
+ * the claim. If we ever fabricate, the anchor exposes us instantly.
+ *
+ * Format design:
+ *   - Path is relative to the indexed root (portable).
+ *   - Line is 1-indexed (matches editor convention).
+ *   - SHA is optional — null when the indexed repo isn't a git repo.
+ *     When present, it's the 7-char short SHA of the index time.
+ *   - Excerpt is optional and short (~200 chars) — for symbol records
+ *     it's the signature line. For edges it's the call expression.
+ *
+ * Per the differentiation research, this is the SINGLE feature no
+ * competitor in the 15+ landscape has shipped. See
+ * .notes/differentiation-research-2026-05-21.md §1.3.
+ */
+import type { SymbolRecord } from './symbols.js';
+import type { CallEdge } from './edges.js';
+/** A single evidence anchor pointing at a specific location in the repo. */
+export interface Provenance {
+  /** Relative path from the indexed repo root, e.g. "src/auth.ts". */
+  file: string;
+  /** 1-indexed line number. */
+  line: number;
+  /** Optional 1-indexed column. */
+  column?: number;
+  /** End line, when the entity spans multiple lines. */
+  endLine?: number;
+  /** Short git SHA (7 chars) at index time. Null if not a git repo. */
+  sha?: string | null;
+  /** Short text excerpt — symbol signature, call expression, etc. */
+  excerpt?: string;
+}
+const MAX_EXCERPT_LEN = 200;
+function clipExcerpt(s: string | undefined): string | undefined {
+  if (!s) return undefined;
+  const trimmed = s.trim();
+  if (trimmed.length === 0) return undefined;
+  return trimmed.length > MAX_EXCERPT_LEN ? trimmed.slice(0, MAX_EXCERPT_LEN - 1) + '…' : trimmed;
+}
+/**
+ * Make provenance for a symbol. Excerpt is the symbol's stored
+ * signature (already secret-redacted by T3).
+ */
+export function symbolProvenance(s: SymbolRecord, sha: string | null): Provenance {
+  return {
+    file: s.file,
+    line: s.line,
+    column: s.column,
+    endLine: s.endLine,
+    sha: sha ?? null,
+    excerpt: clipExcerpt(s.signature),
+  };
+}
+/**
+ * Make provenance for a call edge. Points at the CALL SITE (where the
+ * call happens), not the callee's definition. The callee's own
+ * provenance is available via `symbolProvenance(idx.findById(edge.toId))`
+ * when toId is non-null.
+ */
+export function edgeProvenance(e: CallEdge, sha: string | null): Provenance {
+  return {
+    file: e.file,
+    line: e.line,
+    column: e.column,
+    sha: sha ?? null,
+    // No excerpt for edges — we don't store the call line text.
+    // (Future v0.2: capture the call line at index time so the agent
+    // sees the actual call expression.)
+  };
+}
+/**
+ * Format a Provenance as a single-line human/agent-readable string.
+ * Used inline in tool text responses.
+ *
+ * Examples:
+ *   src/auth.ts:42                    (no sha — not a git repo)
+ *   src/auth.ts:42 @ ab12cd3          (with sha)
+ *   src/auth.ts:42:5 @ ab12cd3        (with column)
+ */
+export function formatProvenance(p: Provenance): string {
+  let out = p.file + ':' + p.line;
+  if (p.column !== undefined) out += ':' + p.column;
+  if (p.sha) out += ' @ ' + p.sha;
+  return out;
+}
+/**
+ * Format a Provenance as a verifiable evidence tag the agent can
+ * include in its reply. The agent's user (or a downstream tool) can
+ * paste this directly into a search.
+ *
+ * Example:
+ *   [evidence: src/auth.ts:42 @ ab12cd3]
+ */
+export function formatEvidenceTag(p: Provenance): string {
+  return `[evidence: ${formatProvenance(p)}]`;
+}

package/src/query.ts ADDED Viewed

@@ -0,0 +1,148 @@
+import type { Graph } from './storage.js';
+import type { SymbolRecord } from './symbols.js';
+import type { CallEdge } from './edges.js';
+export interface RecallOptions {
+  /** Max results. Default 10. Capped at 100. */
+  limit?: number;
+  /**
+   * If true, match if the query is a substring of the symbol name.
+   * If false (default), require exact case-insensitive match.
+   */
+  substring?: boolean;
+}
+export interface EdgeQueryOptions {
+  /** Max edges to return. Default 50. Capped at 500. */
+  limit?: number;
+  /**
+   * If true, include edges where the callee is unresolved (toId === null).
+   * Default true — agents usually want to see those too.
+   */
+  includeUnresolved?: boolean;
+}
+const HARD_RESULT_CAP = 100;
+const HARD_EDGE_CAP = 500;
+/**
+ * Pre-computed lookup tables over a Graph. Build once after loading; every
+ * query is then O(1) or O(k) (k = result count).
+ *
+ * The whole index is built in one pass on construction. For graphpilot's own
+ * code (50 symbols, 155 edges) build time is <1ms. For a 50k-symbol repo
+ * we'd expect ~20ms — still negligible compared to a Claude Code round trip.
+ */
+export class GraphIndex {
+  private readonly byNameLower: Map<string, SymbolRecord[]> = new Map();
+  private readonly byId: Map<string, SymbolRecord> = new Map();
+  /** Edges keyed by callee id — answers "who calls X?". */
+  private readonly callersOf: Map<string, CallEdge[]> = new Map();
+  /** Edges keyed by caller id — answers "what does X call?". */
+  private readonly calleesOf: Map<string, CallEdge[]> = new Map();
+  constructor(public readonly graph: Graph) {
+    for (const s of graph.symbols) {
+      this.byId.set(s.id, s);
+      const key = s.name.toLowerCase();
+      const list = this.byNameLower.get(key);
+      if (list) list.push(s);
+      else this.byNameLower.set(key, [s]);
+    }
+    for (const e of graph.edges) {
+      // callers index — only resolved edges have a toId
+      if (e.toId) {
+        const list = this.callersOf.get(e.toId);
+        if (list) list.push(e);
+        else this.callersOf.set(e.toId, [e]);
+      }
+      // callees index — every edge has a fromId
+      const list = this.calleesOf.get(e.fromId);
+      if (list) list.push(e);
+      else this.calleesOf.set(e.fromId, [e]);
+    }
+  }
+  /**
+   * Find symbols by name. Default behaviour is exact case-insensitive match;
+   * pass `substring: true` to enable substring search.
+   *
+   * Returns ranked roughly by "best first" — exact case match before
+   * case-folded matches.
+   */
+  findByName(query: string, opts: RecallOptions = {}): SymbolRecord[] {
+    const limit = Math.min(opts.limit ?? 10, HARD_RESULT_CAP);
+    if (!query) return [];
+    if (opts.substring) {
+      const q = query.toLowerCase();
+      const results: SymbolRecord[] = [];
+      for (const [name, syms] of this.byNameLower) {
+        if (!name.includes(q)) continue;
+        for (const s of syms) {
+          results.push(s);
+          if (results.length >= limit) return results;
+        }
+      }
+      return results;
+    }
+    // Exact case-insensitive — fast path through the map.
+    const candidates = this.byNameLower.get(query.toLowerCase()) ?? [];
+    if (candidates.length === 0) return [];
+    // Prefer exact case match first, then the rest.
+    const exact = candidates.filter((s) => s.name === query);
+    const rest = candidates.filter((s) => s.name !== query);
+    return [...exact, ...rest].slice(0, limit);
+  }
+  /** Look up a symbol by its id. Returns null if not found. */
+  findById(id: string): SymbolRecord | null {
+    return this.byId.get(id) ?? null;
+  }
+  /**
+   * Resolve a name (or id) to a unique symbol. Used by tools that take a
+   * "symbol" argument — accepts either a bare name or a full id.
+   *
+   * Ambiguity policy: if more than one symbol matches the name, returns the
+   * first one (same heuristic as the resolver). Caller can disambiguate by
+   * passing the full id.
+   */
+  resolveSymbol(nameOrId: string): SymbolRecord | null {
+    if (nameOrId.includes('#') && nameOrId.includes('@')) {
+      return this.findById(nameOrId);
+    }
+    const matches = this.findByName(nameOrId);
+    return matches[0] ?? null;
+  }
+  /** Edges where this symbol is the target — "who calls X?". */
+  callers(symbolId: string, opts: EdgeQueryOptions = {}): CallEdge[] {
+    const limit = Math.min(opts.limit ?? 50, HARD_EDGE_CAP);
+    return (this.callersOf.get(symbolId) ?? []).slice(0, limit);
+  }
+  /** Edges where this symbol is the source — "what does X call?". */
+  callees(symbolId: string, opts: EdgeQueryOptions = {}): CallEdge[] {
+    const limit = Math.min(opts.limit ?? 50, HARD_EDGE_CAP);
+    const all = this.calleesOf.get(symbolId) ?? [];
+    if (opts.includeUnresolved === false) {
+      return all.filter((e) => e.toId !== null).slice(0, limit);
+    }
+    return all.slice(0, limit);
+  }
+  /** Convenience: how many symbols / edges are indexed. */
+  get stats(): { symbols: number; edges: number; resolvedEdges: number } {
+    let resolved = 0;
+    for (const e of this.graph.edges) if (e.toId) resolved++;
+    return {
+      symbols: this.graph.symbols.length,
+      edges: this.graph.edges.length,
+      resolvedEdges: resolved,
+    };
+  }
+}

package/src/redact.ts ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * Secret-pattern redaction for symbol signatures before they're stored.
+ *
+ * Code occasionally contains literal secrets — `const API_KEY = "sk-..."`,
+ * embedded JWTs, AWS access keys, GitHub tokens. When we extract a symbol's
+ * signature line into graph.json, those literals come with it. We redact
+ * them here so:
+ *   1. The on-disk graph.json doesn't carry plaintext secrets
+ *   2. The MCP tool output sent to the agent doesn't expose them either
+ *
+ * This is NOT a full secret scanner — it covers well-known fixed-prefix
+ * formats. Determined leakage (custom-format secrets) will still escape,
+ * but the common ones are covered.
+ *
+ * Pattern coverage and replacement tokens are intentionally short so a
+ * signature stays readable after redaction.
+ */
+interface SecretPattern {
+  pattern: RegExp;
+  replacement: string;
+  label: string;
+}
+const SECRET_PATTERNS: readonly SecretPattern[] = [
+  // PEM private-key headers — match the BEGIN line before generic long-token
+  // catches it, so we get the precise label.
+  {
+    pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/g,
+    replacement: '-----BEGIN ***REDACTED*** PRIVATE KEY-----',
+    label: 'pem-private-key',
+  },
+  // JSON Web Tokens: three base64url segments separated by dots.
+  {
+    pattern: /eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}/g,
+    replacement: '***JWT-REDACTED***',
+    label: 'jwt',
+  },
+  // OpenAI / Anthropic style API keys: `sk-` then 20+ alphanumerics.
+  // Also covers `sk-ant-...`, `sk-proj-...` etc.
+  {
+    pattern: /\bsk-[A-Za-z0-9_-]{20,}\b/g,
+    replacement: 'sk-***REDACTED***',
+    label: 'sk-token',
+  },
+  // GitHub personal access tokens.
+  { pattern: /\bghp_[A-Za-z0-9]{30,}\b/g, replacement: 'ghp_***REDACTED***', label: 'github-pat' },
+  // GitHub server tokens.
+  {
+    pattern: /\bghs_[A-Za-z0-9]{30,}\b/g,
+    replacement: 'ghs_***REDACTED***',
+    label: 'github-server',
+  },
+  // GitHub user / OAuth tokens.
+  {
+    pattern: /\bgho_[A-Za-z0-9]{30,}\b/g,
+    replacement: 'gho_***REDACTED***',
+    label: 'github-oauth',
+  },
+  // GitHub refresh tokens.
+  {
+    pattern: /\bghr_[A-Za-z0-9]{30,}\b/g,
+    replacement: 'ghr_***REDACTED***',
+    label: 'github-refresh',
+  },
+  // AWS access key IDs.
+  { pattern: /\bAKIA[0-9A-Z]{16}\b/g, replacement: 'AKIA***REDACTED***', label: 'aws-akia' },
+  // AWS short-term session tokens (less specific, but the leading prefix is unique).
+  { pattern: /\bASIA[0-9A-Z]{16}\b/g, replacement: 'ASIA***REDACTED***', label: 'aws-asia' },
+  // Slack tokens.
+  {
+    pattern: /\bxox[abprs]-[A-Za-z0-9-]{10,}\b/g,
+    replacement: 'xox*-***REDACTED***',
+    label: 'slack',
+  },
+  // Stripe live secret keys.
+  {
+    pattern: /\bsk_live_[A-Za-z0-9]{20,}\b/g,
+    replacement: 'sk_live_***REDACTED***',
+    label: 'stripe-live',
+  },
+  // Generic long high-entropy token inside a string literal.
+  // Heuristic: 32+ chars of alphanumeric / underscore / hyphen / equals,
+  // immediately surrounded by matching quotes. Keeps false positives down
+  // because random function names rarely live inside quotes.
+  {
+    pattern: /(["'])([A-Za-z0-9_+/=-]{40,})\1/g,
+    replacement: '$1***REDACTED-LONG-TOKEN***$1',
+    label: 'long-token-in-string',
+  },
+];
+/**
+ * Run every secret pattern over the input string. Returns the redacted text.
+ * Safe for any input length: regex operations are linear in input size.
+ *
+ * Order matters — more specific patterns run first so a JWT doesn't get
+ * eaten by the generic long-token catch-all.
+ */
+export function redactSecrets(text: string): string {
+  if (!text) return text;
+  let out = text;
+  for (const { pattern, replacement } of SECRET_PATTERNS) {
+    out = out.replace(pattern, replacement);
+  }
+  return out;
+}
+/**
+ * Test helper / introspection: which patterns matched in this input?
+ * Useful for diagnostics; not on a hot path.
+ */
+export function detectSecrets(text: string): string[] {
+  if (!text) return [];
+  const hits: string[] = [];
+  for (const { pattern, label } of SECRET_PATTERNS) {
+    // Build a fresh regex from the source to avoid lastIndex state on /g.
+    const fresh = new RegExp(pattern.source, pattern.flags);
+    if (fresh.test(text)) hits.push(label);
+  }
+  return hits;
+}

package/src/storage.ts ADDED Viewed

@@ -0,0 +1,115 @@
+import { createHash } from 'node:crypto';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
+import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync, renameSync } from 'node:fs';
+import type { SymbolRecord } from './symbols.js';
+import type { CallEdge } from './edges.js';
+import { validateGraph } from './graph-schema.js';
+const isWindows = process.platform === 'win32';
+export interface Graph {
+  version: 1;
+  repoId: string;
+  rootPath: string;
+  indexedAt: string;
+  filesIndexed: number;
+  symbolCount: number;
+  edgeCount: number;
+  symbols: SymbolRecord[];
+  edges: CallEdge[];
+  /**
+   * Optional git provenance — set when the indexed root lives inside a
+   * git worktree. Both fields may be null even within a git repo (e.g.
+   * detached HEAD has no branch; an empty repo has no SHA). Older
+   * graph.json files written before the v0.1.5 pivot won't have these
+   * fields; the schema validator treats them as optional so old graphs
+   * still load.
+   */
+  indexedSha?: string | null;
+  indexedBranch?: string | null;
+}
+export function repoIdFor(absRootPath: string): string {
+  return createHash('sha256').update(absRootPath).digest('hex').slice(0, 16);
+}
+export function repoDir(absRootPath: string): string {
+  return join(homedir(), '.graphpilot', repoIdFor(absRootPath));
+}
+export function graphPath(absRootPath: string): string {
+  return join(repoDir(absRootPath), 'graph.json');
+}
+export function saveGraph(graph: Graph): string {
+  const dir = repoDir(graph.rootPath);
+  // T7 defence: 0700 dir + 0600 file so other users on shared machines can't
+  // read the index. The mkdir/writeFileSync `mode` option only applies on
+  // creation, so we explicitly chmod afterwards to fix permissions on any
+  // pre-existing files (e.g. an index written before this protection landed).
+  // On Windows these modes are silently ignored, which is fine — NTFS ACLs
+  // are handled by the user profile boundary.
+  mkdirSync(dir, { recursive: true, mode: 0o700 });
+  if (!isWindows) chmodSync(dir, 0o700);
+  const path = graphPath(graph.rootPath);
+  // Atomic write — write to a sibling .tmp file and rename. Crash-safe:
+  // a partial write never produces a corrupt graph.json that would later
+  // fail T4's schema validator and force a full re-index. Defends watch
+  // mode (which writes many times) against ungraceful shutdowns.
+  const tmpPath = path + '.tmp';
+  writeFileSync(tmpPath, JSON.stringify(graph, null, 2), {
+    encoding: 'utf8',
+    mode: 0o600,
+  });
+  if (!isWindows) chmodSync(tmpPath, 0o600);
+  renameSync(tmpPath, path);
+  return path;
+}
+/**
+ * Load and validate a graph from disk. Returns null if the file is missing,
+ * unparseable, has a wrong schema version, or fails structural validation.
+ *
+ * T4 defence: anything from disk is untrusted. We re-parse and re-shape
+ * every field before exposing the result to query / MCP layers. String
+ * fields are sanitized (control chars stripped, lengths capped) so a
+ * crafted graph.json can't smuggle prompt-injection payloads or fake JSON
+ * Lines into tool output.
+ *
+ * Validation errors are written to stderr for diagnostics. The function
+ * never throws on bad data — it returns null so the MCP tool layer can
+ * surface "no index" cleanly.
+ */
+export function loadGraph(absRootPath: string): Graph | null {
+  const path = graphPath(absRootPath);
+  if (!existsSync(path)) return null;
+  let raw: string;
+  try {
+    raw = readFileSync(path, 'utf8');
+  } catch {
+    return null;
+  }
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch {
+    process.stderr.write(`[graphpilot] graph.json is not valid JSON: ${path}\n`);
+    return null;
+  }
+  const errors: string[] = [];
+  const validated = validateGraph(parsed, errors);
+  if (!validated) {
+    process.stderr.write(
+      `[graphpilot] graph.json failed schema validation: ${path}\n` +
+        errors.map((e) => `  - ${e}`).join('\n') +
+        '\n',
+    );
+    return null;
+  }
+  return validated;
+}