@mneme-ai/core 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/README.md +31 -0
  2. package/dist/correlate/index.d.ts +44 -0
  3. package/dist/correlate/index.d.ts.map +1 -0
  4. package/dist/correlate/index.js +21 -0
  5. package/dist/correlate/index.js.map +1 -0
  6. package/dist/enrich/index.d.ts +35 -0
  7. package/dist/enrich/index.d.ts.map +1 -0
  8. package/dist/enrich/index.js +69 -0
  9. package/dist/enrich/index.js.map +1 -0
  10. package/dist/entities/cosine-clones.d.ts +6 -0
  11. package/dist/entities/cosine-clones.d.ts.map +1 -0
  12. package/dist/entities/cosine-clones.js +142 -0
  13. package/dist/entities/cosine-clones.js.map +1 -0
  14. package/dist/entities/cosine-clones.test.d.ts +2 -0
  15. package/dist/entities/cosine-clones.test.d.ts.map +1 -0
  16. package/dist/entities/cosine-clones.test.js +109 -0
  17. package/dist/entities/cosine-clones.test.js.map +1 -0
  18. package/dist/entities/index.d.ts +74 -0
  19. package/dist/entities/index.d.ts.map +1 -0
  20. package/dist/entities/index.js +24 -0
  21. package/dist/entities/index.js.map +1 -0
  22. package/dist/entities/python-parser.d.ts +16 -0
  23. package/dist/entities/python-parser.d.ts.map +1 -0
  24. package/dist/entities/python-parser.js +248 -0
  25. package/dist/entities/python-parser.js.map +1 -0
  26. package/dist/entities/typescript-parser.d.ts +18 -0
  27. package/dist/entities/typescript-parser.d.ts.map +1 -0
  28. package/dist/entities/typescript-parser.js +220 -0
  29. package/dist/entities/typescript-parser.js.map +1 -0
  30. package/dist/entities/typescript-parser.test.d.ts +2 -0
  31. package/dist/entities/typescript-parser.test.d.ts.map +1 -0
  32. package/dist/entities/typescript-parser.test.js +103 -0
  33. package/dist/entities/typescript-parser.test.js.map +1 -0
  34. package/dist/git/blame.d.ts +9 -0
  35. package/dist/git/blame.d.ts.map +1 -0
  36. package/dist/git/blame.js +56 -0
  37. package/dist/git/blame.js.map +1 -0
  38. package/dist/git/exec.d.ts +13 -0
  39. package/dist/git/exec.d.ts.map +1 -0
  40. package/dist/git/exec.js +40 -0
  41. package/dist/git/exec.js.map +1 -0
  42. package/dist/git/github.d.ts +62 -0
  43. package/dist/git/github.d.ts.map +1 -0
  44. package/dist/git/github.js +115 -0
  45. package/dist/git/github.js.map +1 -0
  46. package/dist/git/github.test.d.ts +2 -0
  47. package/dist/git/github.test.d.ts.map +1 -0
  48. package/dist/git/github.test.js +86 -0
  49. package/dist/git/github.test.js.map +1 -0
  50. package/dist/git/gitlab.d.ts +66 -0
  51. package/dist/git/gitlab.d.ts.map +1 -0
  52. package/dist/git/gitlab.js +121 -0
  53. package/dist/git/gitlab.js.map +1 -0
  54. package/dist/git/gitlab.test.d.ts +2 -0
  55. package/dist/git/gitlab.test.d.ts.map +1 -0
  56. package/dist/git/gitlab.test.js +122 -0
  57. package/dist/git/gitlab.test.js.map +1 -0
  58. package/dist/git/index.d.ts +7 -0
  59. package/dist/git/index.d.ts.map +1 -0
  60. package/dist/git/index.js +7 -0
  61. package/dist/git/index.js.map +1 -0
  62. package/dist/git/log.d.ts +11 -0
  63. package/dist/git/log.d.ts.map +1 -0
  64. package/dist/git/log.js +107 -0
  65. package/dist/git/log.js.map +1 -0
  66. package/dist/git/log.test.d.ts +2 -0
  67. package/dist/git/log.test.d.ts.map +1 -0
  68. package/dist/git/log.test.js +88 -0
  69. package/dist/git/log.test.js.map +1 -0
  70. package/dist/git/repo.d.ts +12 -0
  71. package/dist/git/repo.d.ts.map +1 -0
  72. package/dist/git/repo.js +50 -0
  73. package/dist/git/repo.js.map +1 -0
  74. package/dist/git/repo.test.d.ts +2 -0
  75. package/dist/git/repo.test.d.ts.map +1 -0
  76. package/dist/git/repo.test.js +35 -0
  77. package/dist/git/repo.test.js.map +1 -0
  78. package/dist/index.d.ts +10 -0
  79. package/dist/index.d.ts.map +1 -0
  80. package/dist/index.js +10 -0
  81. package/dist/index.js.map +1 -0
  82. package/dist/indexer/index.d.ts +2 -0
  83. package/dist/indexer/index.d.ts.map +1 -0
  84. package/dist/indexer/index.js +2 -0
  85. package/dist/indexer/index.js.map +1 -0
  86. package/dist/indexer/indexer.d.ts +22 -0
  87. package/dist/indexer/indexer.d.ts.map +1 -0
  88. package/dist/indexer/indexer.js +107 -0
  89. package/dist/indexer/indexer.js.map +1 -0
  90. package/dist/indexer/indexer.test.d.ts +2 -0
  91. package/dist/indexer/indexer.test.d.ts.map +1 -0
  92. package/dist/indexer/indexer.test.js +80 -0
  93. package/dist/indexer/indexer.test.js.map +1 -0
  94. package/dist/retrieve/index.d.ts +3 -0
  95. package/dist/retrieve/index.d.ts.map +1 -0
  96. package/dist/retrieve/index.js +3 -0
  97. package/dist/retrieve/index.js.map +1 -0
  98. package/dist/retrieve/rerank.d.ts +44 -0
  99. package/dist/retrieve/rerank.d.ts.map +1 -0
  100. package/dist/retrieve/rerank.js +68 -0
  101. package/dist/retrieve/rerank.js.map +1 -0
  102. package/dist/retrieve/rerank.test.d.ts +2 -0
  103. package/dist/retrieve/rerank.test.d.ts.map +1 -0
  104. package/dist/retrieve/rerank.test.js +52 -0
  105. package/dist/retrieve/rerank.test.js.map +1 -0
  106. package/dist/retrieve/search.d.ts +31 -0
  107. package/dist/retrieve/search.d.ts.map +1 -0
  108. package/dist/retrieve/search.js +170 -0
  109. package/dist/retrieve/search.js.map +1 -0
  110. package/dist/retrieve/search.test.d.ts +2 -0
  111. package/dist/retrieve/search.test.d.ts.map +1 -0
  112. package/dist/retrieve/search.test.js +105 -0
  113. package/dist/retrieve/search.test.js.map +1 -0
  114. package/dist/store/index.d.ts +3 -0
  115. package/dist/store/index.d.ts.map +1 -0
  116. package/dist/store/index.js +3 -0
  117. package/dist/store/index.js.map +1 -0
  118. package/dist/store/schema.d.ts +11 -0
  119. package/dist/store/schema.d.ts.map +1 -0
  120. package/dist/store/schema.js +129 -0
  121. package/dist/store/schema.js.map +1 -0
  122. package/dist/store/sqlite.d.ts +51 -0
  123. package/dist/store/sqlite.d.ts.map +1 -0
  124. package/dist/store/sqlite.js +262 -0
  125. package/dist/store/sqlite.js.map +1 -0
  126. package/dist/store/sqlite.test.d.ts +2 -0
  127. package/dist/store/sqlite.test.d.ts.map +1 -0
  128. package/dist/store/sqlite.test.js +128 -0
  129. package/dist/store/sqlite.test.js.map +1 -0
  130. package/dist/types.d.ts +115 -0
  131. package/dist/types.d.ts.map +1 -0
  132. package/dist/types.js +6 -0
  133. package/dist/types.js.map +1 -0
  134. package/dist/util/index.d.ts +15 -0
  135. package/dist/util/index.d.ts.map +1 -0
  136. package/dist/util/index.js +65 -0
  137. package/dist/util/index.js.map +1 -0
  138. package/dist/util/index.test.d.ts +2 -0
  139. package/dist/util/index.test.d.ts.map +1 -0
  140. package/dist/util/index.test.js +37 -0
  141. package/dist/util/index.test.js.map +1 -0
  142. package/package.json +62 -0
package/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # @mneme-ai/core
2
+
3
+ Core indexing, retrieval, and graph engine for [Mneme](https://github.com/patsa2561-art/mneme-ai).
4
+
5
+ ```ts
6
+ import { git, indexer, store, retrieve } from "@mneme-ai/core";
7
+
8
+ const s = new store.MnemeStore("./.mneme/mneme.db");
9
+ const idx = new indexer.Indexer({ cwd: process.cwd(), store: s });
10
+ await idx.run();
11
+
12
+ const results = await retrieve.search("why does parseAmount use try/catch?", {
13
+ store: s,
14
+ topK: 5,
15
+ });
16
+ ```
17
+
18
+ ## What's in here
19
+
20
+ - `git/` — log/blame parser, repo metadata, GitHub + GitLab adapters
21
+ - `store/` — better-sqlite3 wrapper with FTS5 + BLOB embeddings
22
+ - `indexer/` — chunker + embedder driver
23
+ - `retrieve/` — hybrid search (BM25 + vector cosine fused via Reciprocal Rank Fusion) + reranker contracts
24
+ - `correlate/` — incident correlation contracts (Phase 3)
25
+ - `entities/` — symbol-level memory contracts (Phase 2)
26
+
27
+ See the [main README](https://github.com/patsa2561-art/mneme-ai#readme) for the full picture and [ARCHITECTURE.md](https://github.com/patsa2561-art/mneme-ai/blob/main/ARCHITECTURE.md) for the data-flow diagram.
28
+
29
+ ## License
30
+
31
+ MIT.
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Phase 3 — Error correlation engine.
3
+ *
4
+ * The differentiator. Given:
5
+ * - commits (from git)
6
+ * - incidents (from pager / manual JSON)
7
+ * - entities (phase 2)
8
+ *
9
+ * produce Correlation rows that answer:
10
+ * "every time PaymentService changes, OrderQueue throws within 48h"
11
+ *
12
+ * Phase 3 will fill in:
13
+ * - temporal correlation (commit window → incident spike)
14
+ * - structural correlation (file overlap, call-graph proximity)
15
+ * - semantic correlation (commit message vs stack trace embeddings)
16
+ *
17
+ * For now this module exports the contracts that the @mneme-ai/correlator package implements.
18
+ */
19
+ import type { Commit, Correlation, Incident } from "../types.js";
20
+ export interface CorrelationEngine {
21
+ readonly name: string;
22
+ /** Build correlations between a window of commits and a window of incidents. */
23
+ correlate(input: CorrelateInput): Promise<Correlation[]>;
24
+ }
25
+ export interface CorrelateInput {
26
+ commits: Commit[];
27
+ incidents: Incident[];
28
+ /** Window in milliseconds — how long after a commit an incident may be attributed to it. */
29
+ windowMs?: number;
30
+ }
31
+ export interface IncidentAdapter {
32
+ readonly source: "sentry" | "datadog" | "manual" | "github" | "log";
33
+ fetch(opts: FetchIncidentOptions): Promise<Incident[]>;
34
+ }
35
+ export interface FetchIncidentOptions {
36
+ since?: string;
37
+ until?: string;
38
+ projectId?: string;
39
+ apiKey?: string;
40
+ orgSlug?: string;
41
+ }
42
+ /** Default temporal window: 7 days. */
43
+ export declare const DEFAULT_WINDOW_MS: number;
44
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/correlate/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,gFAAgF;IAChF,SAAS,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;CAC1D;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,4FAA4F;IAC5F,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,CAAC;IACpE,KAAK,CAAC,IAAI,EAAE,oBAAoB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACxD;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,uCAAuC;AACvC,eAAO,MAAM,iBAAiB,QAA0B,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Phase 3 — Error correlation engine.
3
+ *
4
+ * The differentiator. Given:
5
+ * - commits (from git)
6
+ * - incidents (from pager / manual JSON)
7
+ * - entities (phase 2)
8
+ *
9
+ * produce Correlation rows that answer:
10
+ * "every time PaymentService changes, OrderQueue throws within 48h"
11
+ *
12
+ * Phase 3 will fill in:
13
+ * - temporal correlation (commit window → incident spike)
14
+ * - structural correlation (file overlap, call-graph proximity)
15
+ * - semantic correlation (commit message vs stack trace embeddings)
16
+ *
17
+ * For now this module exports the contracts that the @mneme-ai/correlator package implements.
18
+ */
19
+ /** Default temporal window: 7 days. */
20
+ export const DEFAULT_WINDOW_MS = 7 * 24 * 60 * 60 * 1000;
21
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/correlate/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AA8BH,uCAAuC;AACvC,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Enrichment — synthesize missing context for commits with poor messages.
3
+ *
4
+ * The honest framing: we never *replace* git history. The original commit
5
+ * message stays untouched. We add a `synthesized_note` row keyed by commit
6
+ * hash, marked with the model that produced it, and search treats it as a
7
+ * separate kind of chunk so users can always tell synthesized text from
8
+ * original text.
9
+ *
10
+ * The system prompt is intentionally cautious — it tells the model to *infer
11
+ * conservatively from the diff* and to admit uncertainty rather than
12
+ * confabulate. We mark notes that fall back to "unknown" so they don't
13
+ * pollute search.
14
+ */
15
+ import type { Commit } from "../types.js";
16
+ export declare const SYNTHESIZE_SYSTEM_PROMPT = "You are reading a git commit and the diff it produced.\nYour job is to write a 2-4 sentence note explaining the most likely WHY behind the change,\ninferred CONSERVATIVELY from the diff alone \u2014 not invented.\n\nRules:\n1. Refer only to what the diff and adjacent commits actually contain. Never make up facts.\n2. If the diff is too small or generic to infer intent, say exactly: \"Cannot determine purpose from diff alone.\"\n3. Use plain prose, no bullet lists, no markdown headings.\n4. Avoid filler (\"This commit changes...\"). Open with the inferred motivation.\n5. Mention specific function/file names from the diff when they help; never invent names.\n\nOutput ONLY the note. No preamble, no signoff, no quotes.";
17
+ export interface CommitContextForSynthesis {
18
+ commit: Commit;
19
+ diff: string;
20
+ /** Few neighbor commit subjects for temporal context. */
21
+ neighborSubjects?: string[];
22
+ }
23
+ export declare function buildSynthesisPrompt(input: CommitContextForSynthesis): string;
24
+ /**
25
+ * Heuristic: a commit "needs healing" when its message gives no real signal.
26
+ * - Subject shorter than `subjectMinLen` characters, or
27
+ * - Subject matches one of the generic templates ("update", "fix", "wip", etc.), and
28
+ * - Body is empty
29
+ *
30
+ * Tunable so users can heal the worst offenders first.
31
+ */
32
+ export declare function needsHealing(commit: Commit, subjectMinLen?: number): boolean;
33
+ /** Return true if the synthesized text is a polite "don't know" — should not be persisted as memory. */
34
+ export declare function isUncertain(note: string): boolean;
35
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/enrich/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,eAAO,MAAM,wBAAwB,ytBAWqB,CAAC;AAE3D,MAAM,WAAW,yBAAyB;IACxC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,yDAAyD;IACzD,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7B;AAED,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,yBAAyB,GAAG,MAAM,CAqB7E;AAED;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,aAAa,SAAK,GAAG,OAAO,CAQxE;AAED,wGAAwG;AACxG,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAOjD"}
@@ -0,0 +1,69 @@
1
+ export const SYNTHESIZE_SYSTEM_PROMPT = `You are reading a git commit and the diff it produced.
2
+ Your job is to write a 2-4 sentence note explaining the most likely WHY behind the change,
3
+ inferred CONSERVATIVELY from the diff alone — not invented.
4
+
5
+ Rules:
6
+ 1. Refer only to what the diff and adjacent commits actually contain. Never make up facts.
7
+ 2. If the diff is too small or generic to infer intent, say exactly: "Cannot determine purpose from diff alone."
8
+ 3. Use plain prose, no bullet lists, no markdown headings.
9
+ 4. Avoid filler ("This commit changes..."). Open with the inferred motivation.
10
+ 5. Mention specific function/file names from the diff when they help; never invent names.
11
+
12
+ Output ONLY the note. No preamble, no signoff, no quotes.`;
13
+ export function buildSynthesisPrompt(input) {
14
+ const { commit, diff, neighborSubjects = [] } = input;
15
+ const lines = [];
16
+ lines.push(`Commit: ${commit.shortHash || commit.hash.slice(0, 7)}`);
17
+ lines.push(`Author: ${commit.authorName} <${commit.authorEmail}>`);
18
+ lines.push(`Date: ${commit.authorDate}`);
19
+ lines.push(`Subject: ${commit.subject || "(empty)"}`);
20
+ if (commit.body && commit.body.trim()) {
21
+ lines.push(`Body: ${commit.body}`);
22
+ }
23
+ if (commit.files.length) {
24
+ lines.push(`Files: ${commit.files.slice(0, 8).join(", ")}${commit.files.length > 8 ? " (+more)" : ""}`);
25
+ }
26
+ if (neighborSubjects.length) {
27
+ lines.push(`Neighboring commits:`);
28
+ for (const s of neighborSubjects.slice(0, 5))
29
+ lines.push(` - ${s}`);
30
+ }
31
+ lines.push("");
32
+ lines.push("Diff (truncated to 4000 chars):");
33
+ lines.push(diff.length > 4000 ? diff.slice(0, 4000) + "\n…(truncated)" : diff);
34
+ return lines.join("\n");
35
+ }
36
+ /**
37
+ * Heuristic: a commit "needs healing" when its message gives no real signal.
38
+ * - Subject shorter than `subjectMinLen` characters, or
39
+ * - Subject matches one of the generic templates ("update", "fix", "wip", etc.), and
40
+ * - Body is empty
41
+ *
42
+ * Tunable so users can heal the worst offenders first.
43
+ */
44
+ export function needsHealing(commit, subjectMinLen = 20) {
45
+ const subject = commit.subject.trim();
46
+ const body = commit.body.trim();
47
+ if (body.length > 30)
48
+ return false; // already has a body
49
+ if (subject.length < subjectMinLen)
50
+ return true;
51
+ const generic = /^(?:wip|update[sd]?|fix|fixed|fixes|tweak|small\s+change|misc|stuff|refactor|cleanup|chore|adjust|tweaks)\.?$/i;
52
+ if (generic.test(subject))
53
+ return true;
54
+ return false;
55
+ }
56
+ /** Return true if the synthesized text is a polite "don't know" — should not be persisted as memory. */
57
+ export function isUncertain(note) {
58
+ if (!note)
59
+ return true;
60
+ const t = note.trim().toLowerCase();
61
+ if (t.length < 30)
62
+ return true;
63
+ if (t.includes("cannot determine purpose from diff alone"))
64
+ return true;
65
+ if (t.startsWith("i cannot") || t.startsWith("unable to"))
66
+ return true;
67
+ return false;
68
+ }
69
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/enrich/index.ts"],"names":[],"mappings":"AAgBA,MAAM,CAAC,MAAM,wBAAwB,GAAG;;;;;;;;;;;0DAWkB,CAAC;AAS3D,MAAM,UAAU,oBAAoB,CAAC,KAAgC;IACnE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,EAAE,EAAE,GAAG,KAAK,CAAC;IACtD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;IACrE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,UAAU,KAAK,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC;IACnE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;IAC3C,KAAK,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,OAAO,IAAI,SAAS,EAAE,CAAC,CAAC;IACtD,IAAI,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,SAAS,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IACrC,CAAC;IACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1G,CAAC;IACD,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACnC,KAAK,MAAM,CAAC,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC/E,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,YAAY,CAAC,MAAc,EAAE,aAAa,GAAG,EAAE;IAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,KAAK,CAAC,CAAC,qBAAqB;IACzD,IAAI,OAAO,CAAC,MAAM,GAAG,aAAa;QAAE,OAAO,IAAI,CAAC;IAChD,MAAM,OAAO,GAAG,gHAAgH,CAAC;IACjI,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,OAAO,KAAK,CAAC;AACf,CAAC;AAED,wGAAwG;AACxG,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACpC,IAAI,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,CAAC,CAAC,QAAQ,CAAC,0CAA0C,CAAC;QAAE,OAAO,IAAI,CAAC;IACxE,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,IAAI,CAAC;IACvE,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,6 @@
1
+ import { type CloneDetector, type DetectOptions, type EntityCluster } from "./index.js";
2
+ export declare class CosineCloneDetector implements CloneDetector {
3
+ readonly name = "cosine-connected-components-v1";
4
+ detect(opts: DetectOptions): Promise<EntityCluster[]>;
5
+ }
6
+ //# sourceMappingURL=cosine-clones.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine-clones.d.ts","sourceRoot":"","sources":["../../src/entities/cosine-clones.ts"],"names":[],"mappings":"AAoBA,OAAO,EAGL,KAAK,aAAa,EAClB,KAAK,aAAa,EAClB,KAAK,aAAa,EACnB,MAAM,YAAY,CAAC;AAMpB,qBAAa,mBAAoB,YAAW,aAAa;IACvD,QAAQ,CAAC,IAAI,oCAAoC;IAE3C,MAAM,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CAoE5D"}
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Cosine-similarity clone detector.
3
+ *
4
+ * 1. For each entity pair, compute cosine of their embeddings.
5
+ * 2. Build a similarity graph: edge A↔B if cos(A,B) ≥ threshold.
6
+ * 3. Connected-component-cluster the graph (union-find).
7
+ * 4. Drop singletons; cap cluster size at maxClusterSize.
8
+ * 5. Compute cohesion = mean pairwise cosine within cluster.
9
+ *
10
+ * Why connected components and not HDBSCAN: the threshold is the user's lever.
11
+ * Connected components are monotone in the threshold — slide it up, get fewer
12
+ * tighter clusters; slide it down, get bigger looser ones. Users can reason
13
+ * about that. HDBSCAN's `min_cluster_size` and `min_samples` are not
14
+ * intuitive for "I want clones tighter than 0.85".
15
+ *
16
+ * Complexity: O(N²). Fine up to ~5,000 entities (~12 M comparisons, sub-second
17
+ * on a laptop). Beyond that, swap in approximate-NN — same surface, faster core.
18
+ */
19
+ import { createHash } from "node:crypto";
20
+ import { DEFAULT_CLONE_THRESHOLD, DEFAULT_MAX_CLUSTER_SIZE, } from "./index.js";
21
+ export class CosineCloneDetector {
22
+ name = "cosine-connected-components-v1";
23
+ async detect(opts) {
24
+ const threshold = opts.threshold ?? DEFAULT_CLONE_THRESHOLD;
25
+ const maxClusterSize = opts.maxClusterSize ?? DEFAULT_MAX_CLUSTER_SIZE;
26
+ const withVecs = opts.entities.filter(hasEmbedding);
27
+ if (withVecs.length < 2)
28
+ return [];
29
+ // Pre-normalize for cosine = dot product.
30
+ const norms = withVecs.map((e) => normalize(e.embedding));
31
+ // Union-find structure.
32
+ const parent = new Int32Array(withVecs.length);
33
+ for (let i = 0; i < parent.length; i++)
34
+ parent[i] = i;
35
+ const find = (x) => {
36
+ while (parent[x] !== x) {
37
+ parent[x] = parent[parent[x]];
38
+ x = parent[x];
39
+ }
40
+ return x;
41
+ };
42
+ const union = (a, b) => {
43
+ const ra = find(a);
44
+ const rb = find(b);
45
+ if (ra !== rb)
46
+ parent[rb] = ra;
47
+ };
48
+ // Pair-wise cosine via dot of normalized vectors. Track sims for cohesion.
49
+ const simMap = new Map(); // key: "min-max"
50
+ for (let i = 0; i < norms.length; i++) {
51
+ for (let j = i + 1; j < norms.length; j++) {
52
+ const sim = dot(norms[i], norms[j]);
53
+ if (sim >= threshold) {
54
+ union(i, j);
55
+ simMap.set(`${i}-${j}`, sim);
56
+ }
57
+ }
58
+ }
59
+ // Group by root.
60
+ const groups = new Map();
61
+ for (let i = 0; i < withVecs.length; i++) {
62
+ const root = find(i);
63
+ const g = groups.get(root);
64
+ if (g)
65
+ g.push(i);
66
+ else
67
+ groups.set(root, [i]);
68
+ }
69
+ const clusters = [];
70
+ for (const [, indices] of groups) {
71
+ if (indices.length < 2)
72
+ continue;
73
+ const limited = indices.slice(0, maxClusterSize);
74
+ const cohesion = pairwiseMean(limited, simMap);
75
+ const members = limited.map((i) => stripVec(withVecs[i]));
76
+ clusters.push({
77
+ id: clusterId(members.map((m) => m.id)),
78
+ cohesion,
79
+ members,
80
+ });
81
+ }
82
+ // Stable ordering: most-cohesive first; tiebreak by largest cluster.
83
+ clusters.sort((a, b) => {
84
+ if (b.members.length !== a.members.length)
85
+ return b.members.length - a.members.length;
86
+ return b.cohesion - a.cohesion;
87
+ });
88
+ return clusters;
89
+ }
90
+ }
91
+ function hasEmbedding(e) {
92
+ return e.embedding instanceof Float32Array && e.embedding.length > 0;
93
+ }
94
+ function normalize(v) {
95
+ let n = 0;
96
+ for (let i = 0; i < v.length; i++)
97
+ n += v[i] * v[i];
98
+ const norm = Math.sqrt(n);
99
+ if (norm === 0)
100
+ return v;
101
+ const out = new Float32Array(v.length);
102
+ for (let i = 0; i < v.length; i++)
103
+ out[i] = v[i] / norm;
104
+ return out;
105
+ }
106
+ function dot(a, b) {
107
+ if (a.length !== b.length)
108
+ return 0;
109
+ let s = 0;
110
+ for (let i = 0; i < a.length; i++)
111
+ s += a[i] * b[i];
112
+ return s;
113
+ }
114
+ function pairwiseMean(indices, simMap) {
115
+ if (indices.length < 2)
116
+ return 0;
117
+ let total = 0;
118
+ let count = 0;
119
+ for (let i = 0; i < indices.length; i++) {
120
+ for (let j = i + 1; j < indices.length; j++) {
121
+ const a = indices[i];
122
+ const b = indices[j];
123
+ const key = a < b ? `${a}-${b}` : `${b}-${a}`;
124
+ const sim = simMap.get(key);
125
+ if (sim !== undefined) {
126
+ total += sim;
127
+ count++;
128
+ }
129
+ }
130
+ }
131
+ return count > 0 ? total / count : 0;
132
+ }
133
+ function stripVec(e) {
134
+ const { embedding, ...rest } = e;
135
+ void embedding;
136
+ return rest;
137
+ }
138
+ function clusterId(memberIds) {
139
+ const sorted = [...memberIds].sort();
140
+ return "c_" + createHash("sha1").update(sorted.join("|")).digest("hex").slice(0, 12);
141
+ }
142
+ //# sourceMappingURL=cosine-clones.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine-clones.js","sourceRoot":"","sources":["../../src/entities/cosine-clones.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EACL,uBAAuB,EACvB,wBAAwB,GAIzB,MAAM,YAAY,CAAC;AAMpB,MAAM,OAAO,mBAAmB;IACrB,IAAI,GAAG,gCAAgC,CAAC;IAEjD,KAAK,CAAC,MAAM,CAAC,IAAmB;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,uBAAuB,CAAC;QAC5D,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,wBAAwB,CAAC;QAEvE,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QACpD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,EAAE,CAAC;QAEnC,0CAA0C;QAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QAE1D,wBAAwB;QACxB,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;YAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;YACjC,OAAO,MAAM,CAAC,CAAC,CAAE,KAAK,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAE,CAAC;gBAChC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;YACjB,CAAC;YACD,OAAO,CAAC,CAAC;QACX,CAAC,CAAC;QACF,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;YAC3C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACnB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACnB,IAAI,EAAE,KAAK,EAAE;gBAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;QACjC,CAAC,CAAC;QAEF,2EAA2E;QAC3E,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,iBAAiB;QAC3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1C,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC;gBACtC,IAAI,GAAG,IAAI,SAAS,EAAE,CAAC;oBACrB,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBACZ,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC3B,IAAI,CAAC;gBAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;gBACZ,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7B,CAAC;QAED,MAAM,QAAQ,GAAoB,EAAE,CAAC;QACrC,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;YACjC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS;YACjC,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC;YAC3D,QAAQ,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACvC,QAAQ;gBACR,OAAO;aACR,CAAC,CAAC;QACL,CAAC;QAED,qEAAqE;QACrE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACrB,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,OAAO,CAAC,MAAM;gBAAE,OAAO,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;YACtF,OAAO,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,SAAS,YAAY,YAAY,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC;AACvE,CAAC;AAED,SAAS,SAAS,CAAC,CAAe;IAChC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;IACtD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,IAAI,CAAC;IACzD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,GAAG,CAAC,CAAe,EAAE,CAAe;IAC3C,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IACpC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;IACtD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,YAAY,CAAC,OAAiB,EAAE,MAA2B;IAClE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACjC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAE,CAAC;YACtB,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC5B,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,KAAK,IAAI,GAAG,CAAC;gBACb,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,QAAQ,CAAmB,CAAI;IACtC,MAAM,EAAE,SAAS,EAAE,GAAG,IAAI,EAAE,GAAG,CAAW,CAAC;IAC3C,KAAK,SAAS,CAAC;IACf,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,SAAS,CAAC,SAAmB;IACpC,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IACrC,OAAO,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACvF,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=cosine-clones.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine-clones.test.d.ts","sourceRoot":"","sources":["../../src/entities/cosine-clones.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,109 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { CosineCloneDetector } from "./cosine-clones.js";
3
+ const e = (id, vec) => ({
4
+ id,
5
+ kind: "function",
6
+ name: id,
7
+ filePath: `${id}.ts`,
8
+ startLine: 1,
9
+ endLine: 10,
10
+ language: "typescript",
11
+ embedding: Float32Array.from(vec),
12
+ });
13
+ describe("CosineCloneDetector", () => {
14
+ const det = new CosineCloneDetector();
15
+ it("returns no clusters when fewer than 2 entities have embeddings", async () => {
16
+ expect(await det.detect({ entities: [] })).toEqual([]);
17
+ expect(await det.detect({ entities: [e("a", [1, 0, 0])] })).toEqual([]);
18
+ });
19
+ it("clusters two near-identical entities", async () => {
20
+ const out = await det.detect({
21
+ entities: [e("a", [1, 0, 0]), e("b", [1, 0.01, 0])],
22
+ threshold: 0.99,
23
+ });
24
+ expect(out).toHaveLength(1);
25
+ expect(out[0].members.map((m) => m.id).sort()).toEqual(["a", "b"]);
26
+ });
27
+ it("does not cluster orthogonal entities at any threshold", async () => {
28
+ const out = await det.detect({
29
+ entities: [e("a", [1, 0]), e("b", [0, 1])],
30
+ threshold: 0.5,
31
+ });
32
+ expect(out).toEqual([]);
33
+ });
34
+ it("connects via transitive similarity (A↔B, B↔C → cluster {A,B,C})", async () => {
35
+ const out = await det.detect({
36
+ entities: [e("a", [1, 0, 0]), e("b", [0.95, 0.31, 0]), e("c", [0.85, 0.5, 0])],
37
+ threshold: 0.92,
38
+ });
39
+ expect(out).toHaveLength(1);
40
+ expect(out[0].members).toHaveLength(3);
41
+ });
42
+ it("threshold is monotone — higher threshold produces fewer clusters", async () => {
43
+ const ents = [
44
+ e("a", [1, 0, 0]),
45
+ e("b", [0.95, 0.31, 0]),
46
+ e("c", [0, 1, 0]),
47
+ e("d", [0, 0.95, 0.31]),
48
+ ];
49
+ const loose = await det.detect({ entities: ents, threshold: 0.5 });
50
+ const strict = await det.detect({ entities: ents, threshold: 0.99 });
51
+ const looseCount = loose.reduce((n, c) => n + c.members.length, 0);
52
+ const strictCount = strict.reduce((n, c) => n + c.members.length, 0);
53
+ expect(strictCount).toBeLessThanOrEqual(looseCount);
54
+ });
55
+ it("respects maxClusterSize cap", async () => {
56
+ const ents = Array.from({ length: 8 }, (_, i) => e(`x${i}`, [1, i * 0.001, 0]));
57
+ const out = await det.detect({ entities: ents, threshold: 0.9, maxClusterSize: 3 });
58
+ expect(out[0].members.length).toBe(3);
59
+ });
60
+ it("computes cohesion ∈ [threshold, 1]", async () => {
61
+ const out = await det.detect({
62
+ entities: [e("a", [1, 0, 0]), e("b", [1, 0.05, 0])],
63
+ threshold: 0.9,
64
+ });
65
+ const c = out[0];
66
+ expect(c.cohesion).toBeGreaterThanOrEqual(0.9);
67
+ expect(c.cohesion).toBeLessThanOrEqual(1);
68
+ });
69
+ it("produces deterministic cluster ids (sorted member ids)", async () => {
70
+ const a = await det.detect({
71
+ entities: [e("a", [1, 0, 0]), e("b", [1, 0.01, 0])],
72
+ threshold: 0.99,
73
+ });
74
+ const b = await det.detect({
75
+ entities: [e("b", [1, 0.01, 0]), e("a", [1, 0, 0])],
76
+ threshold: 0.99,
77
+ });
78
+ expect(a[0].id).toBe(b[0].id);
79
+ });
80
+ it("ignores entities without embeddings", async () => {
81
+ const noEmbed = {
82
+ id: "no",
83
+ kind: "function",
84
+ name: "no",
85
+ filePath: "no.ts",
86
+ startLine: 1,
87
+ endLine: 1,
88
+ language: "typescript",
89
+ };
90
+ const out = await det.detect({
91
+ entities: [e("a", [1, 0]), e("b", [1, 0.01]), noEmbed],
92
+ threshold: 0.99,
93
+ });
94
+ expect(out[0].members.find((m) => m.id === "no")).toBeUndefined();
95
+ });
96
+ it("output is sorted by cluster size desc, then cohesion desc", async () => {
97
+ const ents = [
98
+ e("a1", [1, 0, 0]),
99
+ e("a2", [1, 0.01, 0]),
100
+ e("a3", [1, 0.02, 0]),
101
+ e("b1", [0, 1, 0]),
102
+ e("b2", [0, 1, 0.01]),
103
+ ];
104
+ const out = await det.detect({ entities: ents, threshold: 0.99 });
105
+ expect(out).toHaveLength(2);
106
+ expect(out[0].members.length).toBeGreaterThanOrEqual(out[1].members.length);
107
+ });
108
+ });
109
+ //# sourceMappingURL=cosine-clones.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine-clones.test.js","sourceRoot":"","sources":["../../src/entities/cosine-clones.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAGzD,MAAM,CAAC,GAAG,CAAC,EAAU,EAAE,GAAa,EAAU,EAAE,CAAC,CAAC;IAChD,EAAE;IACF,IAAI,EAAE,UAAU;IAChB,IAAI,EAAE,EAAE;IACR,QAAQ,EAAE,GAAG,EAAE,KAAK;IACpB,SAAS,EAAE,CAAC;IACZ,OAAO,EAAE,EAAE;IACX,QAAQ,EAAE,YAAY;IACtB,SAAS,EAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC;CAClC,CAAC,CAAC;AAEH,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,GAAG,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAEtC,EAAE,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAC9E,MAAM,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACvD,MAAM,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACnD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAC1C,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,KAAK,IAAI,EAAE;QAC/E,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;YAC9E,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,IAAI,GAAG;YACX,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YACjB,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACvB,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YACjB,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;SACxB,CAAC;QACF,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACrE,MAAM,CAAC,WAAW,CAAC,CAAC,mBAAmB,CAAC,UAAU,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC9C,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,CAC9B,CAAC;QACF,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC,CAAC;QACpF,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACnD,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC;QAClB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;QAC/C,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YACzB,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACnD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YACzB,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACnD,MAAM,OAAO,GAAW;YACtB,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,UAAU;YAChB,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,OAAO;YACjB,SAAS,EAAE,CAAC;YACZ,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,YAAY;SACvB,CAAC;QACF,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC;YAC3B,QAAQ,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,OAAO,CAAC;YACtD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;QACzE,MAAM,IAAI,GAAG;YACX,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACrB,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACrB,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;SACtB,CAAC;QACF,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAChF,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Phase 2 — entity-level memory.
3
+ *
4
+ * Phase 1 indexes commits and PR text. Phase 2 indexes the *symbols inside the
5
+ * code itself* — functions, classes, modules, exported types — so we can answer
6
+ * questions like:
7
+ *
8
+ * "Which functions in this repo do roughly the same thing as parseAmount?"
9
+ * "Find all places that re-implement retry logic."
10
+ * "Where is this concept handled outside of its declared module?"
11
+ *
12
+ * The contracts live here. The parsers and clusterers plug in by implementing
13
+ * these interfaces. v0.2 will ship a tree-sitter-backed EntityParser and a
14
+ * cosine-clustering CloneDetector. Both are forward-compatible with this surface.
15
+ */
16
+ import type { Entity } from "../types.js";
17
+ /**
18
+ * Walks a workspace and emits every interesting symbol it finds.
19
+ *
20
+ * Implementations should:
21
+ * - Use tree-sitter or an equivalent AST library — regex is not enough
22
+ * - Skip generated, vendored, and node_modules paths by default
23
+ * - Be incremental-friendly: callers may only re-parse changed files
24
+ */
25
+ export interface EntityParser {
26
+ readonly name: string;
27
+ readonly languages: string[];
28
+ parseRepo(opts: ParseOptions): AsyncIterable<Entity>;
29
+ parseFile(filePath: string, source: string): Iterable<Entity>;
30
+ }
31
+ export interface ParseOptions {
32
+ cwd: string;
33
+ /** Limit to specific paths (relative to cwd). */
34
+ paths?: string[];
35
+ /** Languages to consider; default = all that the parser supports. */
36
+ languages?: string[];
37
+ onProgress?: (filesParsed: number) => void;
38
+ }
39
+ /**
40
+ * Groups entities by semantic similarity. The output is a set of clusters, each
41
+ * containing entities that "do roughly the same thing". The detector itself
42
+ * decides how strict the threshold is.
43
+ *
44
+ * detect({ entities, embedder, threshold: 0.85 })
45
+ */
46
+ export interface CloneDetector {
47
+ readonly name: string;
48
+ detect(opts: DetectOptions): Promise<EntityCluster[]>;
49
+ }
50
+ export interface DetectOptions {
51
+ entities: Entity[];
52
+ /** Cosine threshold (0..1). Higher = stricter. */
53
+ threshold?: number;
54
+ /** Cap on cluster size to keep results scannable. */
55
+ maxClusterSize?: number;
56
+ }
57
+ export interface EntityCluster {
58
+ /** Stable id derived from member entity ids. */
59
+ id: string;
60
+ /** Cluster cohesion score (mean pairwise cosine, 0..1). */
61
+ cohesion: number;
62
+ members: Entity[];
63
+ /** Optional one-line summary the detector chose to attach. */
64
+ label?: string;
65
+ }
66
+ /**
67
+ * Default thresholds used across the codebase.
68
+ */
69
+ export declare const DEFAULT_CLONE_THRESHOLD = 0.85;
70
+ export declare const DEFAULT_MAX_CLUSTER_SIZE = 12;
71
+ export { TypeScriptParser, entityEmbeddingText } from "./typescript-parser.js";
72
+ export { PythonParser } from "./python-parser.js";
73
+ export { CosineCloneDetector } from "./cosine-clones.js";
74
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/entities/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C;;;;;;;GAOG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAC7B,SAAS,CAAC,IAAI,EAAE,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACrD,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;CAC/D;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,iDAAiD;IACjD,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,qEAAqE;IACrE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,UAAU,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;CAC5C;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC;CACvD;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,gDAAgD;IAChD,EAAE,EAAE,MAAM,CAAC;IACX,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,8DAA8D;IAC9D,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,eAAO,MAAM,uBAAuB,OAAO,CAAC;AAC5C,eAAO,MAAM,wBAAwB,KAAK,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC/E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Phase 2 — entity-level memory.
3
+ *
4
+ * Phase 1 indexes commits and PR text. Phase 2 indexes the *symbols inside the
5
+ * code itself* — functions, classes, modules, exported types — so we can answer
6
+ * questions like:
7
+ *
8
+ * "Which functions in this repo do roughly the same thing as parseAmount?"
9
+ * "Find all places that re-implement retry logic."
10
+ * "Where is this concept handled outside of its declared module?"
11
+ *
12
+ * The contracts live here. The parsers and clusterers plug in by implementing
13
+ * these interfaces. v0.2 will ship a tree-sitter-backed EntityParser and a
14
+ * cosine-clustering CloneDetector. Both are forward-compatible with this surface.
15
+ */
16
+ /**
17
+ * Default thresholds used across the codebase.
18
+ */
19
+ export const DEFAULT_CLONE_THRESHOLD = 0.85;
20
+ export const DEFAULT_MAX_CLUSTER_SIZE = 12;
21
+ export { TypeScriptParser, entityEmbeddingText } from "./typescript-parser.js";
22
+ export { PythonParser } from "./python-parser.js";
23
+ export { CosineCloneDetector } from "./cosine-clones.js";
24
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/entities/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AA0DH;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,IAAI,CAAC;AAC5C,MAAM,CAAC,MAAM,wBAAwB,GAAG,EAAE,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC/E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { Entity } from "../types.js";
2
+ import type { EntityParser, ParseOptions } from "./index.js";
3
+ export declare class PythonParser implements EntityParser {
4
+ readonly name = "python-ast";
5
+ readonly languages: string[];
6
+ private pythonCmd;
7
+ /** Detect which Python binary works on this machine. */
8
+ preload(): Promise<void>;
9
+ parseRepo(opts: ParseOptions): AsyncIterable<Entity>;
10
+ parseFile(_filePath: string, _source: string): Iterable<Entity>;
11
+ parseSourceAsync(filePath: string, source: string): Promise<Entity[]>;
12
+ private parseSource;
13
+ private runPython;
14
+ private probe;
15
+ }
16
+ //# sourceMappingURL=python-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"python-parser.d.ts","sourceRoot":"","sources":["../../src/entities/python-parser.ts"],"names":[],"mappings":"AA2BA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAmF7D,qBAAa,YAAa,YAAW,YAAY;IAC/C,QAAQ,CAAC,IAAI,gBAAgB;IAC7B,QAAQ,CAAC,SAAS,WAAc;IAEhC,OAAO,CAAC,SAAS,CAAuB;IAExC,wDAAwD;IAClD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAcvB,SAAS,CAAC,IAAI,EAAE,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC;IAuB3D,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAOzD,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YAK7D,WAAW;IAoCzB,OAAO,CAAC,SAAS;IAejB,OAAO,CAAC,KAAK;CAad"}