npm - seer-mcp - Versions diffs - 0.1.0 - Mend

seer-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (371) hide show

package/.vscode/settings.json +3 -0
package/LICENSE +176 -0
package/README.md +272 -0
package/README_dev.md +199 -0
package/dist/bundle/ci.d.ts +47 -0
package/dist/bundle/ci.d.ts.map +1 -0
package/dist/bundle/ci.js +113 -0
package/dist/bundle/ci.js.map +1 -0
package/dist/bundle/contract.d.ts +111 -0
package/dist/bundle/contract.d.ts.map +1 -0
package/dist/bundle/contract.js +352 -0
package/dist/bundle/contract.js.map +1 -0
package/dist/bundle/export.d.ts +36 -0
package/dist/bundle/export.d.ts.map +1 -0
package/dist/bundle/export.js +152 -0
package/dist/bundle/export.js.map +1 -0
package/dist/bundle/external.d.ts +66 -0
package/dist/bundle/external.d.ts.map +1 -0
package/dist/bundle/external.js +238 -0
package/dist/bundle/external.js.map +1 -0
package/dist/bundle/format.d.ts +94 -0
package/dist/bundle/format.d.ts.map +1 -0
package/dist/bundle/format.js +42 -0
package/dist/bundle/format.js.map +1 -0
package/dist/bundle/import.d.ts +49 -0
package/dist/bundle/import.d.ts.map +1 -0
package/dist/bundle/import.js +116 -0
package/dist/bundle/import.js.map +1 -0
package/dist/cli/index.d.ts +3 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +1402 -0
package/dist/cli/index.js.map +1 -0
package/dist/cli/init.d.ts +48 -0
package/dist/cli/init.d.ts.map +1 -0
package/dist/cli/init.js +284 -0
package/dist/cli/init.js.map +1 -0
package/dist/db/schema.d.ts +3 -0
package/dist/db/schema.d.ts.map +1 -0
package/dist/db/schema.js +616 -0
package/dist/db/schema.js.map +1 -0
package/dist/db/store.d.ts +1011 -0
package/dist/db/store.d.ts.map +1 -0
package/dist/db/store.js +3888 -0
package/dist/db/store.js.map +1 -0
package/dist/graph/pagerank.d.ts +9 -0
package/dist/graph/pagerank.d.ts.map +1 -0
package/dist/graph/pagerank.js +47 -0
package/dist/graph/pagerank.js.map +1 -0
package/dist/indexer/architecture.d.ts +72 -0
package/dist/indexer/architecture.d.ts.map +1 -0
package/dist/indexer/architecture.js +112 -0
package/dist/indexer/architecture.js.map +1 -0
package/dist/indexer/behavior.d.ts +75 -0
package/dist/indexer/behavior.d.ts.map +1 -0
package/dist/indexer/behavior.js +395 -0
package/dist/indexer/behavior.js.map +1 -0
package/dist/indexer/boundaries.d.ts +60 -0
package/dist/indexer/boundaries.d.ts.map +1 -0
package/dist/indexer/boundaries.js +366 -0
package/dist/indexer/boundaries.js.map +1 -0
package/dist/indexer/churn.d.ts +15 -0
package/dist/indexer/churn.d.ts.map +1 -0
package/dist/indexer/churn.js +49 -0
package/dist/indexer/churn.js.map +1 -0
package/dist/indexer/classify.d.ts +9 -0
package/dist/indexer/classify.d.ts.map +1 -0
package/dist/indexer/classify.js +90 -0
package/dist/indexer/classify.js.map +1 -0
package/dist/indexer/context.d.ts +176 -0
package/dist/indexer/context.d.ts.map +1 -0
package/dist/indexer/context.js +193 -0
package/dist/indexer/context.js.map +1 -0
package/dist/indexer/continuity.d.ts +67 -0
package/dist/indexer/continuity.d.ts.map +1 -0
package/dist/indexer/continuity.js +288 -0
package/dist/indexer/continuity.js.map +1 -0
package/dist/indexer/detectchanges.d.ts +32 -0
package/dist/indexer/detectchanges.d.ts.map +1 -0
package/dist/indexer/detectchanges.js +74 -0
package/dist/indexer/detectchanges.js.map +1 -0
package/dist/indexer/discovery.d.ts +37 -0
package/dist/indexer/discovery.d.ts.map +1 -0
package/dist/indexer/discovery.js +136 -0
package/dist/indexer/discovery.js.map +1 -0
package/dist/indexer/externaldeps.d.ts +18 -0
package/dist/indexer/externaldeps.d.ts.map +1 -0
package/dist/indexer/externaldeps.js +288 -0
package/dist/indexer/externaldeps.js.map +1 -0
package/dist/indexer/freshness.d.ts +48 -0
package/dist/indexer/freshness.d.ts.map +1 -0
package/dist/indexer/freshness.js +128 -0
package/dist/indexer/freshness.js.map +1 -0
package/dist/indexer/git.d.ts +144 -0
package/dist/indexer/git.d.ts.map +1 -0
package/dist/indexer/git.js +444 -0
package/dist/indexer/git.js.map +1 -0
package/dist/indexer/index.d.ts +145 -0
package/dist/indexer/index.d.ts.map +1 -0
package/dist/indexer/index.js +930 -0
package/dist/indexer/index.js.map +1 -0
package/dist/indexer/modules.d.ts +62 -0
package/dist/indexer/modules.d.ts.map +1 -0
package/dist/indexer/modules.js +293 -0
package/dist/indexer/modules.js.map +1 -0
package/dist/indexer/preflight.d.ts +154 -0
package/dist/indexer/preflight.d.ts.map +1 -0
package/dist/indexer/preflight.js +399 -0
package/dist/indexer/preflight.js.map +1 -0
package/dist/indexer/protoScanner.d.ts +34 -0
package/dist/indexer/protoScanner.d.ts.map +1 -0
package/dist/indexer/protoScanner.js +133 -0
package/dist/indexer/protoScanner.js.map +1 -0
package/dist/indexer/risk.d.ts +115 -0
package/dist/indexer/risk.d.ts.map +1 -0
package/dist/indexer/risk.js +194 -0
package/dist/indexer/risk.js.map +1 -0
package/dist/indexer/serviceHostScanner.d.ts +25 -0
package/dist/indexer/serviceHostScanner.d.ts.map +1 -0
package/dist/indexer/serviceHostScanner.js +95 -0
package/dist/indexer/serviceHostScanner.js.map +1 -0
package/dist/indexer/serviceLinks.d.ts +105 -0
package/dist/indexer/serviceLinks.d.ts.map +1 -0
package/dist/indexer/serviceLinks.js +509 -0
package/dist/indexer/serviceLinks.js.map +1 -0
package/dist/indexer/shapehash.d.ts +98 -0
package/dist/indexer/shapehash.d.ts.map +1 -0
package/dist/indexer/shapehash.js +354 -0
package/dist/indexer/shapehash.js.map +1 -0
package/dist/indexer/skeleton.d.ts +15 -0
package/dist/indexer/skeleton.d.ts.map +1 -0
package/dist/indexer/skeleton.js +136 -0
package/dist/indexer/skeleton.js.map +1 -0
package/dist/indexer/symbolhistory.d.ts +41 -0
package/dist/indexer/symbolhistory.d.ts.map +1 -0
package/dist/indexer/symbolhistory.js +124 -0
package/dist/indexer/symbolhistory.js.map +1 -0
package/dist/indexer/watcher.d.ts +68 -0
package/dist/indexer/watcher.d.ts.map +1 -0
package/dist/indexer/watcher.js +179 -0
package/dist/indexer/watcher.js.map +1 -0
package/dist/mcp/server.d.ts +80 -0
package/dist/mcp/server.d.ts.map +1 -0
package/dist/mcp/server.js +1610 -0
package/dist/mcp/server.js.map +1 -0
package/dist/parser/index.d.ts +8 -0
package/dist/parser/index.d.ts.map +1 -0
package/dist/parser/index.js +33 -0
package/dist/parser/index.js.map +1 -0
package/dist/parser/languages/cpp.d.ts +3 -0
package/dist/parser/languages/cpp.d.ts.map +1 -0
package/dist/parser/languages/cpp.js +350 -0
package/dist/parser/languages/cpp.js.map +1 -0
package/dist/parser/languages/csharp.d.ts +3 -0
package/dist/parser/languages/csharp.d.ts.map +1 -0
package/dist/parser/languages/csharp.js +239 -0
package/dist/parser/languages/csharp.js.map +1 -0
package/dist/parser/languages/go.d.ts +3 -0
package/dist/parser/languages/go.d.ts.map +1 -0
package/dist/parser/languages/go.js +259 -0
package/dist/parser/languages/go.js.map +1 -0
package/dist/parser/languages/java.d.ts +3 -0
package/dist/parser/languages/java.d.ts.map +1 -0
package/dist/parser/languages/java.js +391 -0
package/dist/parser/languages/java.js.map +1 -0
package/dist/parser/languages/python.d.ts +3 -0
package/dist/parser/languages/python.d.ts.map +1 -0
package/dist/parser/languages/python.js +396 -0
package/dist/parser/languages/python.js.map +1 -0
package/dist/parser/languages/rust.d.ts +3 -0
package/dist/parser/languages/rust.d.ts.map +1 -0
package/dist/parser/languages/rust.js +159 -0
package/dist/parser/languages/rust.js.map +1 -0
package/dist/parser/languages/typescript.d.ts +3 -0
package/dist/parser/languages/typescript.d.ts.map +1 -0
package/dist/parser/languages/typescript.js +1442 -0
package/dist/parser/languages/typescript.js.map +1 -0
package/dist/parser/parserContext.d.ts +77 -0
package/dist/parser/parserContext.d.ts.map +1 -0
package/dist/parser/parserContext.js +354 -0
package/dist/parser/parserContext.js.map +1 -0
package/dist/parser/walker.d.ts +81 -0
package/dist/parser/walker.d.ts.map +1 -0
package/dist/parser/walker.js +217 -0
package/dist/parser/walker.js.map +1 -0
package/dist/parser/worker.d.ts +66 -0
package/dist/parser/worker.d.ts.map +1 -0
package/dist/parser/worker.js +129 -0
package/dist/parser/worker.js.map +1 -0
package/dist/parser/workerpool.d.ts +107 -0
package/dist/parser/workerpool.d.ts.map +1 -0
package/dist/parser/workerpool.js +383 -0
package/dist/parser/workerpool.js.map +1 -0
package/dist/scip/format.d.ts +87 -0
package/dist/scip/format.d.ts.map +1 -0
package/dist/scip/format.js +31 -0
package/dist/scip/format.js.map +1 -0
package/dist/scip/import.d.ts +37 -0
package/dist/scip/import.d.ts.map +1 -0
package/dist/scip/import.js +180 -0
package/dist/scip/import.js.map +1 -0
package/dist/types.d.ts +392 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +4 -0
package/dist/types.js.map +1 -0
package/docs/architecture.md +105 -0
package/docs/benchmarks/methodology.md +134 -0
package/docs/benchmarks/raw-results.md +71 -0
package/docs/benchmarks.md +74 -0
package/docs/cli.md +148 -0
package/docs/examples/behavior-tests.md +70 -0
package/docs/examples/change-history.md +85 -0
package/docs/examples/pre-edit-context.md +81 -0
package/docs/examples/service-links.md +88 -0
package/docs/examples.md +80 -0
package/docs/faq.md +70 -0
package/docs/internals.md +104 -0
package/docs/languages.md +70 -0
package/docs/limits.md +52 -0
package/docs/mcp.md +199 -0
package/docs/quickstart.md +119 -0
package/docs/testing.md +123 -0
package/docs/tools.md +115 -0
package/package.json +52 -0
package/research-codebase.md +578 -0
package/seer-cli-docs.md +326 -0
package/seer-master-guide.md +246 -0
package/src/bundle/ci.ts +141 -0
package/src/bundle/contract.ts +387 -0
package/src/bundle/export.ts +175 -0
package/src/bundle/external.ts +285 -0
package/src/bundle/format.ts +92 -0
package/src/bundle/import.ts +157 -0
package/src/cli/index.ts +1249 -0
package/src/cli/init.ts +389 -0
package/src/db/schema.ts +614 -0
package/src/db/store.ts +4306 -0
package/src/graph/pagerank.ts +53 -0
package/src/indexer/architecture.ts +148 -0
package/src/indexer/behavior.ts +466 -0
package/src/indexer/boundaries.ts +374 -0
package/src/indexer/churn.ts +58 -0
package/src/indexer/classify.ts +96 -0
package/src/indexer/context.ts +340 -0
package/src/indexer/continuity.ts +322 -0
package/src/indexer/detectchanges.ts +94 -0
package/src/indexer/discovery.ts +176 -0
package/src/indexer/externaldeps.ts +243 -0
package/src/indexer/freshness.ts +166 -0
package/src/indexer/git.ts +453 -0
package/src/indexer/index.ts +1092 -0
package/src/indexer/modules.ts +358 -0
package/src/indexer/preflight.ts +548 -0
package/src/indexer/protoScanner.ts +147 -0
package/src/indexer/risk.ts +304 -0
package/src/indexer/serviceHostScanner.ts +92 -0
package/src/indexer/serviceLinks.ts +543 -0
package/src/indexer/shapehash.ts +370 -0
package/src/indexer/skeleton.ts +169 -0
package/src/indexer/symbolhistory.ts +172 -0
package/src/indexer/watcher.ts +206 -0
package/src/mcp/server.ts +1659 -0
package/src/parser/index.ts +37 -0
package/src/parser/languages/cpp.ts +361 -0
package/src/parser/languages/csharp.ts +235 -0
package/src/parser/languages/go.ts +259 -0
package/src/parser/languages/java.ts +382 -0
package/src/parser/languages/python.ts +370 -0
package/src/parser/languages/rust.ts +164 -0
package/src/parser/languages/typescript.ts +1435 -0
package/src/parser/parserContext.ts +392 -0
package/src/parser/walker.ts +306 -0
package/src/parser/worker.ts +181 -0
package/src/parser/workerpool.ts +448 -0
package/src/scip/format.ts +83 -0
package/src/scip/import.ts +216 -0
package/src/types.ts +457 -0
package/tests/benchmark-service-links.ts +244 -0
package/tests/bug-regressions.ts +626 -0
package/tests/filters.ts +264 -0
package/tests/fixtures/Counter.tsx +38 -0
package/tests/fixtures/caller.ts +7 -0
package/tests/fixtures/collisions.ts +23 -0
package/tests/fixtures/local_helper.ts +5 -0
package/tests/fixtures/overloads.java +17 -0
package/tests/fixtures/remote_helper.ts +4 -0
package/tests/fixtures/sample.c +15 -0
package/tests/fixtures/sample.cpp +47 -0
package/tests/fixtures/sample.cs +62 -0
package/tests/fixtures/sample.go +68 -0
package/tests/fixtures/sample.h +30 -0
package/tests/fixtures/sample.java +85 -0
package/tests/fixtures/sample.py +46 -0
package/tests/fixtures/sample.rs +78 -0
package/tests/fixtures/sample.ts +76 -0
package/tests/fixtures-service/HttpClients.cs +30 -0
package/tests/fixtures-service/HttpClients.java +24 -0
package/tests/fixtures-service/billing.ts +15 -0
package/tests/fixtures-service/docker-compose.yml +15 -0
package/tests/fixtures-service/gateway.ts +10 -0
package/tests/fixtures-service/get_user.ts +11 -0
package/tests/fixtures-service/graphql_client.ts +63 -0
package/tests/fixtures-service/graphql_server.ts +30 -0
package/tests/fixtures-service/grpc_client.go +30 -0
package/tests/fixtures-service/http_clients.go +23 -0
package/tests/fixtures-service/http_clients.py +38 -0
package/tests/fixtures-service/http_clients.ts +49 -0
package/tests/fixtures-service/k8s/payment-service.yaml +22 -0
package/tests/fixtures-service/k8s_calls.ts +20 -0
package/tests/fixtures-service/messaging.ts +87 -0
package/tests/fixtures-service/trpc_client.ts +39 -0
package/tests/fixtures-service/trpc_server.ts +39 -0
package/tests/fixtures-service/user_service.proto +33 -0
package/tests/fixtures-trackcd/Cargo.toml +11 -0
package/tests/fixtures-trackcd/SpringController.java +36 -0
package/tests/fixtures-trackcd/auth_service.ts +19 -0
package/tests/fixtures-trackcd/complex_module.py +50 -0
package/tests/fixtures-trackcd/express_app.js +30 -0
package/tests/fixtures-trackcd/fastapi_app.py +49 -0
package/tests/fixtures-trackcd/fastify_object_routes.js +32 -0
package/tests/fixtures-trackcd/go.mod +8 -0
package/tests/fixtures-trackcd/package.json +15 -0
package/tests/fixtures-trackcd/requirements.txt +4 -0
package/tests/fixtures-trackcd/tests/auth_service.test.ts +13 -0
package/tests/fixtures-tracke/auth/AuthService.ts +23 -0
package/tests/fixtures-tracke/auth/crypto.ts +7 -0
package/tests/fixtures-tracke/billing/Billing.ts +20 -0
package/tests/fixtures-tracke/billing/Invoice.ts +10 -0
package/tests/fixtures-tracke/billing/server.ts +17 -0
package/tests/fixtures-tracke/package.json +7 -0
package/tests/fixtures-tracke/tests/auth.test.ts +23 -0
package/tests/fixtures-tracke/tests/billing.test.ts +14 -0
package/tests/fixtures-trackf/package.json +5 -0
package/tests/fixtures-trackf/src/auth.ts +26 -0
package/tests/fixtures-trackf/src/handlers.ts +35 -0
package/tests/fixtures-tracki/billing/routes.ts +12 -0
package/tests/fixtures-tracki/gateway/client.ts +13 -0
package/tests/git-features.ts +267 -0
package/tests/init.ts +141 -0
package/tests/mcp-jit.ts +130 -0
package/tests/mcp-smoke.ts +191 -0
package/tests/mcp-trackcd.ts +169 -0
package/tests/mcp-tracke.ts +229 -0
package/tests/mcp-trackf.ts +330 -0
package/tests/mcp-trackg.ts +219 -0
package/tests/mcp-tracki.ts +174 -0
package/tests/mcp-watcher.ts +126 -0
package/tests/optspec.ts +194 -0
package/tests/parallel-index.ts +333 -0
package/tests/parallel-read.ts +125 -0
package/tests/parallel-recovery.ts +241 -0
package/tests/perf-callers.ts +145 -0
package/tests/query-parity.ts +184 -0
package/tests/query-perf.ts +55 -0
package/tests/scale-parallel-parity.ts +225 -0
package/tests/scale-test.ts +523 -0
package/tests/smoke.ts +396 -0
package/tests/trackcd.ts +325 -0
package/tests/tracke-collisions.ts +255 -0
package/tests/tracke.ts +314 -0
package/tests/trackf-bugs.ts +406 -0
package/tests/trackf.ts +390 -0
package/tests/trackg.ts +1372 -0
package/tests/tracki-boundaries.ts +202 -0
package/tests/tracki-continuity.ts +253 -0
package/tests/tracki-contract-diff.ts +249 -0
package/tests/tracki-external-bundles.ts +341 -0
package/tests/tracki-preflight.ts +251 -0
package/tests/verify-roles.ts +51 -0
package/tests/worker-parity.ts +286 -0
package/tests/worker-pool.ts +262 -0
package/tsconfig.json +20 -0

package/src/indexer/shapehash.ts ADDED Viewed

@@ -0,0 +1,370 @@
+import fs from 'fs';
+import { Store } from '../db/store.js';
+/**
+ * Track-F structural SimHash duplicate detection.
+ *
+ * For each function/method/constructor symbol we compute a 64-bit SimHash
+ * over its STRUCTURAL token stream — identifier names are folded into their
+ * "kind" (NAME / NUMBER / STRING / KEYWORD / OP) so two functions that do
+ * the same shape with different variable names still match. This is the
+ * classic Charikar SimHash construction, sized so two near-duplicates
+ * differ in only a small Hamming distance.
+ *
+ * Why structural and not exact-tree? Exact-tree hashes (Merkle over the AST)
+ * find verbatim copies; that's a small fraction of real-world duplication. A
+ * SimHash over tokens with a sliding 3-gram window catches:
+ *   - genuine copy-paste with renames
+ *   - near-duplicate boilerplate (CRUD handlers, parser branches)
+ *   - structural twins across files / languages with similar syntactic shape
+ *
+ * The trade-off is exact-equality false positives (two unrelated 3-line
+ * helpers can hash close). We mitigate by:
+ *   1. Requiring LOC >= MIN_LOC (default 4) to avoid trivial pairs.
+ *   2. Computing the hash only over function/method/constructor symbols.
+ *   3. Returning Hamming distance with every pair so the caller can filter.
+ *
+ * SCIP-merged symbols keep the tree-sitter hash; SCIP-only symbols never get
+ * a hash because we don't see their bodies.
+ */
+const MIN_LOC_DEFAULT = 4;
+const NGRAM_SIZE = 3;
+const HASH_BITS = 64;
+/** Tokens we recognize when computing the shape hash. */
+type TokenKind = 'NAME' | 'NUMBER' | 'STRING' | 'KEYWORD' | 'OP';
+interface ShapeToken {
+  kind: TokenKind;
+  /**
+   * For keywords/operators we keep the lexeme so `if` ≠ `for` ≠ `while`.
+   * For names/numbers/strings we drop the lexeme to fold them together —
+   * structure first, content second.
+   */
+  text: string;
+}
+/**
+ * A tiny language-agnostic tokenizer. We don't need to be a full lexer —
+ * the goal is "structurally meaningful tokens that survive renames." A
+ * char-class scan over the source body suffices:
+ *   - identifier-start runs → NAME (folded)
+ *   - digit runs → NUMBER (folded)
+ *   - quoted strings → STRING (folded; we just skip until the closing quote)
+ *   - operators / punctuation → OP (lexeme kept)
+ *
+ * Keywords aren't language-specific in this tokenizer — they appear as NAME
+ * tokens. That's intentional: a Python `if` and a JS `if` have the same
+ * structural role, and at the shape-hash level we want them to collide.
+ * Real keywords still get distinguished from random identifiers because the
+ * SURROUNDING operators differ ( `if (` vs `def foo(` ).
+ */
+export function tokenize(source: string): ShapeToken[] {
+  const tokens: ShapeToken[] = [];
+  let i = 0;
+  const n = source.length;
+  while (i < n) {
+    const c = source.charCodeAt(i);
+    // Whitespace
+    if (c === 32 || c === 9 || c === 10 || c === 13) { i++; continue; }
+    // Line comment (// or #) — skip to EOL
+    if ((c === 47 && source.charCodeAt(i + 1) === 47) || c === 35) {
+      while (i < n && source.charCodeAt(i) !== 10) i++;
+      continue;
+    }
+    // Block comment (/* … */)
+    if (c === 47 && source.charCodeAt(i + 1) === 42) {
+      i += 2;
+      while (i < n && !(source.charCodeAt(i) === 42 && source.charCodeAt(i + 1) === 47)) i++;
+      i += 2;
+      continue;
+    }
+    // String — single, double, or backtick quoted; folded to a single STRING.
+    if (c === 34 || c === 39 || c === 96) {
+      const quote = c;
+      i++;
+      while (i < n) {
+        const cc = source.charCodeAt(i);
+        if (cc === 92) { i += 2; continue; } // escape: skip next char too
+        if (cc === quote) { i++; break; }
+        i++;
+      }
+      tokens.push({ kind: 'STRING', text: '' });
+      continue;
+    }
+    // Identifier — letter / underscore / $ followed by alnum-underscore-$
+    if (isIdStart(c)) {
+      let j = i + 1;
+      while (j < n && isIdContinue(source.charCodeAt(j))) j++;
+      tokens.push({ kind: 'NAME', text: '' });
+      i = j;
+      continue;
+    }
+    // Number
+    if (c >= 48 && c <= 57) {
+      let j = i + 1;
+      while (j < n) {
+        const cc = source.charCodeAt(j);
+        if ((cc >= 48 && cc <= 57) || cc === 46 || cc === 95 || cc === 120 || cc === 88) j++;
+        else break;
+      }
+      tokens.push({ kind: 'NUMBER', text: '' });
+      i = j;
+      continue;
+    }
+    // Operator / punctuation — single char. We keep the lexeme so '{' ≠ '}'.
+    tokens.push({ kind: 'OP', text: source[i] });
+    i++;
+  }
+  return tokens;
+}
+function isIdStart(c: number): boolean {
+  return (c >= 65 && c <= 90) || (c >= 97 && c <= 122) || c === 95 || c === 36;
+}
+function isIdContinue(c: number): boolean {
+  return isIdStart(c) || (c >= 48 && c <= 57);
+}
+/**
+ * Compute the 64-bit structural SimHash over a function body source string.
+ * Returns NULL when the source is too small to be meaningfully compared.
+ *
+ * Algorithm (standard Charikar SimHash):
+ *   1. Tokenize, fold names/numbers/strings, keep operator lexemes.
+ *   2. Slide a 3-gram window over the tokens.
+ *   3. For each n-gram compute a stable 64-bit hash (FNV-1a is plenty here).
+ *   4. For each bit position, sum +1 if set in the gram-hash, -1 if unset.
+ *   5. Output bit i is 1 iff sum_i > 0.
+ */
+export function computeShapeHash(body: string, minTokens = 8): bigint | null {
+  const tokens = tokenize(body);
+  if (tokens.length < minTokens) return null;
+  const counters = new Int32Array(HASH_BITS);
+  const ngram: string[] = [];
+  for (const tok of tokens) {
+    ngram.push(tok.kind === 'OP' ? `OP:${tok.text}` : tok.kind);
+    if (ngram.length < NGRAM_SIZE) continue;
+    if (ngram.length > NGRAM_SIZE) ngram.shift();
+    const h = fnv64(ngram.join('|'));
+    for (let b = 0; b < HASH_BITS; b++) {
+      const bit = (h >> BigInt(b)) & 1n;
+      counters[b] += bit === 1n ? 1 : -1;
+    }
+  }
+  let out = 0n;
+  for (let b = 0; b < HASH_BITS; b++) {
+    if (counters[b] > 0) out |= (1n << BigInt(b));
+  }
+  return out;
+}
+/** FNV-1a 64-bit hash. Stable, deterministic, no dependencies. */
+function fnv64(s: string): bigint {
+  let h = 0xcbf29ce484222325n;
+  const PRIME = 0x100000001b3n;
+  const MASK = 0xFFFFFFFFFFFFFFFFn;
+  for (let i = 0; i < s.length; i++) {
+    h ^= BigInt(s.charCodeAt(i) & 0xff);
+    h = (h * PRIME) & MASK;
+  }
+  return h;
+}
+/** Hamming distance between two 64-bit bigints. */
+export function hammingDistance(a: bigint, b: bigint): number {
+  let x = a ^ b;
+  let count = 0;
+  while (x !== 0n) {
+    x &= (x - 1n);
+    count++;
+  }
+  return count;
+}
+export interface BuildShapeHashResult {
+  symbolsHashed: number;
+  symbolsSkipped: number;
+  elapsedMs: number;
+}
+/**
+ * Compute shape hashes for every function-like symbol in the DB by reading
+ * its file and slicing out the body. We re-read each file once and slice all
+ * its function bodies in one pass.
+ *
+ * Idempotent: skips symbols that already have a shape_hash and whose file
+ * hash hasn't changed since the last pass (Store.upsertFileWithCache will
+ * have NULLed the column for re-parsed files automatically because the row
+ * gets deleted-and-reinserted).
+ */
+export function buildShapeHashes(
+  store: Store,
+  options: { minLoc?: number; force?: boolean; log?: (m: string) => void } = {},
+): BuildShapeHashResult {
+  const start = Date.now();
+  const minLoc = options.minLoc ?? MIN_LOC_DEFAULT;
+  const log = options.log ?? (() => { /* */ });
+  if (!store.hasV7()) {
+    log('shape hashes require schema v7; skipping');
+    return { symbolsHashed: 0, symbolsSkipped: 0, elapsedMs: Date.now() - start };
+  }
+  // Pull every function/method/constructor symbol with loc >= minLoc that
+  // doesn't already have a shape_hash.
+  const where = options.force ? '' : 'AND s.shape_hash IS NULL';
+  const rows = store.rawDb().prepare(`
+    SELECT s.id, s.line_start AS lineStart, s.line_end AS lineEnd, s.loc, f.path AS filePath
+    FROM symbols s JOIN files f ON f.id = s.file_id
+    WHERE s.kind IN ('function','method','constructor')
+      AND s.symbol_role <> 'declaration'
+      AND s.loc >= ?
+      ${where}
+    ORDER BY f.path
+  `).all(minLoc) as Array<Record<string, unknown>>;
+  let symbolsHashed = 0;
+  let symbolsSkipped = 0;
+  let lastFile = '';
+  let lastSource: string | null = null;
+  // node:sqlite — minor optimization: prepare the update once and reuse.
+  const setHash = store.rawDb().prepare(
+    'UPDATE symbols SET shape_hash = ? WHERE id = ?',
+  );
+  // Same signed-bigint conversion the Store uses for storage round-trip.
+  const toSigned = (u: bigint): bigint => {
+    const MAX = 0x7FFFFFFFFFFFFFFFn;
+    return u > MAX ? u - 0x10000000000000000n : u;
+  };
+  for (const r of rows) {
+    const filePath = String(r.filePath);
+    if (filePath !== lastFile) {
+      lastFile = filePath;
+      try { lastSource = fs.readFileSync(filePath, 'utf-8') as string; }
+      catch { lastSource = null; }
+    }
+    if (lastSource == null) { symbolsSkipped++; continue; }
+    const lineStart = Number(r.lineStart);
+    const lineEnd = Number(r.lineEnd);
+    const body = sliceLines(lastSource, lineStart, lineEnd);
+    const hash = computeShapeHash(body);
+    if (hash == null) { symbolsSkipped++; continue; }
+    setHash.run(toSigned(hash), Number(r.id));
+    symbolsHashed++;
+  }
+  log(`Hashed ${symbolsHashed} symbols (${symbolsSkipped} skipped)`);
+  return { symbolsHashed, symbolsSkipped, elapsedMs: Date.now() - start };
+}
+function sliceLines(source: string, startLine: number, endLine: number): string {
+  // 0-indexed line span — inclusive end. Naive line slicing is fine for our
+  // sizes; we don't need to worry about trailing-newline edge cases since
+  // tokenize() ignores whitespace anyway.
+  const lines = source.split(/\r?\n/);
+  return lines.slice(startLine, endLine + 1).join('\n');
+}
+export interface DuplicateCluster {
+  fingerprint: bigint;
+  symbols: Array<{
+    id: number; name: string; qualifiedName: string | null; kind: string;
+    file: string; lineStart: number; lineEnd: number; loc: number | null;
+    hammingFromAnchor: number;
+  }>;
+}
+export interface FindDuplicatesOptions {
+  /** Maximum Hamming distance two symbols may differ to count as duplicates. */
+  maxDistance?: number;
+  /** Minimum LOC for a symbol to be considered. */
+  minLoc?: number;
+  /** Include test files. */
+  includeTests?: boolean;
+  /** Hard cap on clusters returned. */
+  maxClusters?: number;
+}
+/**
+ * Find clusters of structurally near-duplicate symbols.
+ *
+ * Implementation: pairwise Hamming distance over the candidate set. For
+ * codebases up to ~20k functions this stays well under a second; bigger
+ * codebases can pre-bucket on the top 16 bits of the hash (we don't do that
+ * here yet — the current scale works). The output is grouped into clusters
+ * via simple transitive-closure union-find on the (distance ≤ N) graph.
+ */
+export function findDuplicates(
+  store: Store, options: FindDuplicatesOptions = {},
+): DuplicateCluster[] {
+  const maxDistance = options.maxDistance ?? 6;
+  const minLoc = options.minLoc ?? MIN_LOC_DEFAULT;
+  const includeTests = options.includeTests ?? false;
+  const maxClusters = options.maxClusters ?? 200;
+  const candidates = store.listSymbolsWithShapeHash({
+    minLoc, includeTests, limit: 50000,
+  });
+  if (candidates.length < 2) return [];
+  // Union-find.
+  const parent = new Int32Array(candidates.length);
+  for (let i = 0; i < parent.length; i++) parent[i] = i;
+  function find(x: number): number {
+    while (parent[x] !== x) {
+      parent[x] = parent[parent[x]]; // path compression
+      x = parent[x];
+    }
+    return x;
+  }
+  function union(a: number, b: number): void {
+    const ra = find(a), rb = find(b);
+    if (ra !== rb) parent[ra] = rb;
+  }
+  // Pairwise. N² for now — acceptable up to ~10k candidates (50M comparisons
+  // each ~100ns = 5s worst case). Bigger codebases should bucket first.
+  for (let i = 0; i < candidates.length; i++) {
+    const a = candidates[i];
+    for (let j = i + 1; j < candidates.length; j++) {
+      const b = candidates[j];
+      // Skip pairs from the same symbol (same id). Two rows can share the
+      // same id when one is a tree-sitter / scip-merge overlap.
+      if (a.id === b.id) continue;
+      const d = hammingDistance(a.shapeHash, b.shapeHash);
+      if (d <= maxDistance) union(i, j);
+    }
+  }
+  // Group by root.
+  const clusters = new Map<number, number[]>();
+  for (let i = 0; i < candidates.length; i++) {
+    const r = find(i);
+    let bucket = clusters.get(r);
+    if (!bucket) { bucket = []; clusters.set(r, bucket); }
+    bucket.push(i);
+  }
+  const out: DuplicateCluster[] = [];
+  for (const indices of clusters.values()) {
+    if (indices.length < 2) continue;
+    const anchor = candidates[indices[0]];
+    out.push({
+      fingerprint: anchor.shapeHash,
+      symbols: indices.map(i => {
+        const s = candidates[i];
+        return {
+          id: s.id, name: s.name, qualifiedName: s.qualifiedName, kind: s.kind,
+          file: s.filePath, lineStart: s.lineStart, lineEnd: s.lineEnd, loc: s.loc,
+          hammingFromAnchor: hammingDistance(anchor.shapeHash, s.shapeHash),
+        };
+      }).sort((x, y) => x.hammingFromAnchor - y.hammingFromAnchor),
+    });
+    if (out.length >= maxClusters) break;
+  }
+  // Largest clusters first; ties broken by lowest fingerprint for stability.
+  out.sort((a, b) => b.symbols.length - a.symbols.length
+    || (a.fingerprint < b.fingerprint ? -1 : 1));
+  return out;
+}

package/src/indexer/skeleton.ts ADDED Viewed

@@ -0,0 +1,169 @@
+/**
+ * Deterministic skeleton renderer (AI-agent optimization §3).
+ *
+ * Renders a file as a *structural skeleton*: every symbol's header (signature)
+ * is kept, bodies are collapsed to a fold marker carrying the exact collapsed
+ * line count. This is deterministic SOURCE ELISION — not summarization — so it
+ * stays inside Seer's zero-AI / reproducible contract: the same DB + same file
+ * bytes always render byte-identical output.
+ *
+ * Inputs are entirely owned by Seer already: per-symbol line ranges and kinds
+ * from the index, plus the file bytes on disk (read only for the header lines
+ * and an optional focused body). Nesting is reconstructed from line-range
+ * containment, so it works for every language without per-grammar logic.
+ */
+import fs from 'fs';
+import type { Store } from '../db/store.js';
+import type { SymbolRow } from '../types.js';
+export interface SkeletonResult {
+  ok: boolean;
+  file?: string;
+  relPath?: string;
+  language?: string;
+  symbolCount?: number;
+  focus?: string | null;
+  skeleton?: string;
+  reason?: string;
+}
+/** Kinds whose bodies are worth collapsing into a fold marker. */
+const BODY_KINDS = new Set(['function', 'method', 'constructor']);
+interface Node {
+  row: SymbolRow;
+  children: Node[];
+}
+function norm(p: string): string {
+  return p.replace(/\\/g, '/').toLowerCase();
+}
+/** Find the indexed file row best matching `file` (abs path, rel_path, or a
+ *  trailing path fragment on a `/` boundary). */
+function matchFile(
+  store: Store,
+  file: string,
+): { id: number; path: string; relPath: string; language: string } | null {
+  const target = norm(file);
+  const files = store.listFiles();
+  // Exact first, then boundary-aligned suffix — mirrors getDefinition's rule.
+  const exact = files.find(f => norm(f.path) === target || norm(f.relPath) === target);
+  if (exact) return exact;
+  const frag = target.startsWith('/') ? target : '/' + target;
+  return (
+    files.find(f => norm(f.path).endsWith(frag) || norm(f.relPath).endsWith(frag)) ?? null
+  );
+}
+/** Build the containment forest from line ranges (tightest-encloser parenting). */
+function buildForest(rows: SymbolRow[]): Node[] {
+  // Sort by start asc, then by end desc so a container precedes its members.
+  const sorted = [...rows].sort(
+    (a, b) => a.lineStart - b.lineStart || b.lineEnd - a.lineEnd || a.id - b.id,
+  );
+  const roots: Node[] = [];
+  const stack: Node[] = [];
+  for (const row of sorted) {
+    const node: Node = { row, children: [] };
+    // Pop until the top of the stack encloses this node.
+    while (
+      stack.length > 0 &&
+      !(stack[stack.length - 1].row.lineStart <= row.lineStart &&
+        row.lineEnd <= stack[stack.length - 1].row.lineEnd &&
+        stack[stack.length - 1].row !== row)
+    ) {
+      stack.pop();
+    }
+    if (stack.length === 0) roots.push(node);
+    else stack[stack.length - 1].children.push(node);
+    stack.push(node);
+  }
+  return roots;
+}
+/** Pick the display header for a symbol: prefer the stored signature, else the
+ *  first source line of its declaration, trimmed of trailing block openers. */
+function headerFor(row: SymbolRow, lines: string[]): string {
+  const sig = row.signature?.trim();
+  if (sig) return sig.replace(/\s*\{\s*$/, '').trim();
+  const raw = lines[row.lineStart - 1] ?? '';
+  return raw.trim().replace(/\s*\{\s*$/, '').trim();
+}
+function render(
+  node: Node,
+  lines: string[],
+  depth: number,
+  focus: string | null,
+  out: string[],
+): void {
+  const { row } = node;
+  const indent = '  '.repeat(depth);
+  const span = `[L${row.lineStart}-${row.lineEnd}]`;
+  const header = headerFor(row, lines);
+  const isFocus =
+    focus != null && (row.name === focus || row.qualifiedName === focus);
+  if (isFocus) {
+    // Expanded: show the real source slice verbatim (the agent asked for it).
+    out.push(`${indent}${header}  ${span}  ◀ focus`);
+    const body = lines.slice(row.lineStart - 1, row.lineEnd);
+    for (const l of body) out.push(`${indent}  ${l}`);
+    return;
+  }
+  if (node.children.length > 0) {
+    // Container: header, then recurse into members.
+    out.push(`${indent}${header}  ${span}`);
+    for (const child of node.children) render(child, lines, depth + 1, focus, out);
+    return;
+  }
+  // Leaf. Collapse a real body into a fold marker with the exact line count.
+  const bodyLines = row.lineEnd - row.lineStart - 1;
+  if (BODY_KINDS.has(row.kind) && bodyLines > 0) {
+    out.push(`${indent}${header}  ${span}  { … ${bodyLines} lines … }`);
+  } else {
+    out.push(`${indent}${header}  ${span}`);
+  }
+}
+export function buildSkeleton(
+  store: Store,
+  file: string,
+  opts: { focusSymbol?: string } = {},
+): SkeletonResult {
+  const match = matchFile(store, file);
+  if (!match) return { ok: false, reason: `no indexed file matching "${file}"` };
+  let src: string;
+  try {
+    src = fs.readFileSync(match.path, 'utf8');
+  } catch (err) {
+    return { ok: false, reason: `cannot read ${match.path}: ${(err as Error).message}` };
+  }
+  // Strip a UTF-8 BOM so line 1 matches the indexer's view.
+  if (src.charCodeAt(0) === 0xfeff) src = src.slice(1);
+  const lines = src.split(/\r?\n/);
+  const rows = store.listSymbolsInFile(match.path, 5000);
+  const focus = opts.focusSymbol ?? null;
+  const forest = buildForest(rows);
+  const out: string[] = [];
+  out.push(`${match.relPath}  (${match.language}, ${rows.length} symbols)`);
+  out.push('─'.repeat(Math.min(60, Math.max(20, match.relPath.length + 16))));
+  for (const root of forest) render(root, lines, 0, focus, out);
+  if (rows.length === 0) out.push('(no symbols indexed in this file)');
+  return {
+    ok: true,
+    file: match.path,
+    relPath: match.relPath,
+    language: match.language,
+    symbolCount: rows.length,
+    focus,
+    skeleton: out.join('\n'),
+  };
+}