ragcode-context-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +366 -0
  3. package/README.zh-CN.md +363 -0
  4. package/dist/src/cli/configure/app.d.ts +6 -0
  5. package/dist/src/cli/configure/app.js +81 -0
  6. package/dist/src/cli/configure/run.d.ts +5 -0
  7. package/dist/src/cli/configure/run.js +85 -0
  8. package/dist/src/cli/configure/state.d.ts +42 -0
  9. package/dist/src/cli/configure/state.js +174 -0
  10. package/dist/src/cli/configure.d.ts +31 -0
  11. package/dist/src/cli/configure.js +101 -0
  12. package/dist/src/cli/index.d.ts +2 -0
  13. package/dist/src/cli/index.js +503 -0
  14. package/dist/src/cli/tui/index-progress.d.ts +12 -0
  15. package/dist/src/cli/tui/index-progress.js +49 -0
  16. package/dist/src/cli/tui/watch-status.d.ts +10 -0
  17. package/dist/src/cli/tui/watch-status.js +27 -0
  18. package/dist/src/cli/update.d.ts +18 -0
  19. package/dist/src/cli/update.js +111 -0
  20. package/dist/src/config/dotenv.d.ts +1 -0
  21. package/dist/src/config/dotenv.js +14 -0
  22. package/dist/src/config/graph-runtime.d.ts +13 -0
  23. package/dist/src/config/graph-runtime.js +29 -0
  24. package/dist/src/config/runtime-config.d.ts +87 -0
  25. package/dist/src/config/runtime-config.js +215 -0
  26. package/dist/src/config/semantic-runtime.d.ts +24 -0
  27. package/dist/src/config/semantic-runtime.js +89 -0
  28. package/dist/src/context/context-builder.d.ts +20 -0
  29. package/dist/src/context/context-builder.js +277 -0
  30. package/dist/src/context/expansion-policy.d.ts +6 -0
  31. package/dist/src/context/expansion-policy.js +49 -0
  32. package/dist/src/context/skeletonizer.d.ts +2 -0
  33. package/dist/src/context/skeletonizer.js +79 -0
  34. package/dist/src/context/snippet-renderer.d.ts +2 -0
  35. package/dist/src/context/snippet-renderer.js +67 -0
  36. package/dist/src/core/contracts.d.ts +74 -0
  37. package/dist/src/core/contracts.js +1 -0
  38. package/dist/src/core/engine.d.ts +64 -0
  39. package/dist/src/core/engine.js +442 -0
  40. package/dist/src/core/types.d.ts +490 -0
  41. package/dist/src/core/types.js +1 -0
  42. package/dist/src/diagnostics/doctor.d.ts +66 -0
  43. package/dist/src/diagnostics/doctor.js +193 -0
  44. package/dist/src/diagnostics/embedding-test.d.ts +24 -0
  45. package/dist/src/diagnostics/embedding-test.js +83 -0
  46. package/dist/src/graph/diff-files.d.ts +1 -0
  47. package/dist/src/graph/diff-files.js +14 -0
  48. package/dist/src/graph/impact-report.d.ts +10 -0
  49. package/dist/src/graph/impact-report.js +173 -0
  50. package/dist/src/graph/in-memory-graph-store.d.ts +36 -0
  51. package/dist/src/graph/in-memory-graph-store.js +395 -0
  52. package/dist/src/graph/owner-ranking.d.ts +2 -0
  53. package/dist/src/graph/owner-ranking.js +41 -0
  54. package/dist/src/graph/sqlite-graph-store.d.ts +51 -0
  55. package/dist/src/graph/sqlite-graph-store.js +724 -0
  56. package/dist/src/graph/sqlite-statements.d.ts +36 -0
  57. package/dist/src/graph/sqlite-statements.js +105 -0
  58. package/dist/src/graph/target-matcher.d.ts +13 -0
  59. package/dist/src/graph/target-matcher.js +64 -0
  60. package/dist/src/index.d.ts +32 -0
  61. package/dist/src/index.js +32 -0
  62. package/dist/src/indexing/analyzers/fallback-analyzer.d.ts +6 -0
  63. package/dist/src/indexing/analyzers/fallback-analyzer.js +45 -0
  64. package/dist/src/indexing/analyzers/go-treesitter-analyzer.d.ts +2 -0
  65. package/dist/src/indexing/analyzers/go-treesitter-analyzer.js +87 -0
  66. package/dist/src/indexing/analyzers/java-treesitter-analyzer.d.ts +2 -0
  67. package/dist/src/indexing/analyzers/java-treesitter-analyzer.js +88 -0
  68. package/dist/src/indexing/analyzers/python-treesitter-analyzer.d.ts +2 -0
  69. package/dist/src/indexing/analyzers/python-treesitter-analyzer.js +96 -0
  70. package/dist/src/indexing/analyzers/registry.d.ts +5 -0
  71. package/dist/src/indexing/analyzers/registry.js +23 -0
  72. package/dist/src/indexing/analyzers/rust-treesitter-analyzer.d.ts +2 -0
  73. package/dist/src/indexing/analyzers/rust-treesitter-analyzer.js +96 -0
  74. package/dist/src/indexing/analyzers/tree-sitter-base.d.ts +30 -0
  75. package/dist/src/indexing/analyzers/tree-sitter-base.js +163 -0
  76. package/dist/src/indexing/analyzers/types.d.ts +17 -0
  77. package/dist/src/indexing/analyzers/types.js +1 -0
  78. package/dist/src/indexing/analyzers/typescript-analyzer.d.ts +5 -0
  79. package/dist/src/indexing/analyzers/typescript-analyzer.js +199 -0
  80. package/dist/src/indexing/ast-analyzer.d.ts +11 -0
  81. package/dist/src/indexing/ast-analyzer.js +11 -0
  82. package/dist/src/indexing/chunker.d.ts +11 -0
  83. package/dist/src/indexing/chunker.js +157 -0
  84. package/dist/src/indexing/ignore-policy.d.ts +6 -0
  85. package/dist/src/indexing/ignore-policy.js +40 -0
  86. package/dist/src/indexing/indexer.d.ts +13 -0
  87. package/dist/src/indexing/indexer.js +189 -0
  88. package/dist/src/indexing/language.d.ts +3 -0
  89. package/dist/src/indexing/language.js +24 -0
  90. package/dist/src/indexing/scanner.d.ts +13 -0
  91. package/dist/src/indexing/scanner.js +87 -0
  92. package/dist/src/lsp/definition-resolver.d.ts +6 -0
  93. package/dist/src/lsp/definition-resolver.js +60 -0
  94. package/dist/src/lsp/typescript-language-service.d.ts +21 -0
  95. package/dist/src/lsp/typescript-language-service.js +82 -0
  96. package/dist/src/mcp/server.d.ts +11 -0
  97. package/dist/src/mcp/server.js +64 -0
  98. package/dist/src/mcp/tools.d.ts +266 -0
  99. package/dist/src/mcp/tools.js +309 -0
  100. package/dist/src/project/project-identity.d.ts +2 -0
  101. package/dist/src/project/project-identity.js +24 -0
  102. package/dist/src/project/project-registry.d.ts +12 -0
  103. package/dist/src/project/project-registry.js +49 -0
  104. package/dist/src/project/workspace-resolver.d.ts +20 -0
  105. package/dist/src/project/workspace-resolver.js +62 -0
  106. package/dist/src/retrieval/graph-reranker.d.ts +11 -0
  107. package/dist/src/retrieval/graph-reranker.js +0 -0
  108. package/dist/src/retrieval/hybrid-retriever.d.ts +31 -0
  109. package/dist/src/retrieval/hybrid-retriever.js +111 -0
  110. package/dist/src/retrieval/path-classification.d.ts +6 -0
  111. package/dist/src/retrieval/path-classification.js +22 -0
  112. package/dist/src/retrieval/query-matching.d.ts +22 -0
  113. package/dist/src/retrieval/query-matching.js +166 -0
  114. package/dist/src/retrieval/query-planner.d.ts +5 -0
  115. package/dist/src/retrieval/query-planner.js +77 -0
  116. package/dist/src/retrieval/ranking-signals.d.ts +19 -0
  117. package/dist/src/retrieval/ranking-signals.js +97 -0
  118. package/dist/src/retrieval/topology-distance.d.ts +21 -0
  119. package/dist/src/retrieval/topology-distance.js +116 -0
  120. package/dist/src/reuse/reuse-detector.d.ts +12 -0
  121. package/dist/src/reuse/reuse-detector.js +564 -0
  122. package/dist/src/semantic/deterministic-embedding.d.ts +7 -0
  123. package/dist/src/semantic/deterministic-embedding.js +31 -0
  124. package/dist/src/semantic/in-memory-semantic-store.d.ts +11 -0
  125. package/dist/src/semantic/in-memory-semantic-store.js +65 -0
  126. package/dist/src/semantic/lance-semantic-store.d.ts +131 -0
  127. package/dist/src/semantic/lance-semantic-store.js +623 -0
  128. package/dist/src/semantic/openai-compatible-embedding.d.ts +19 -0
  129. package/dist/src/semantic/openai-compatible-embedding.js +75 -0
  130. package/dist/src/service/service-identity.d.ts +13 -0
  131. package/dist/src/service/service-identity.js +48 -0
  132. package/dist/src/service/service-manager.d.ts +29 -0
  133. package/dist/src/service/service-manager.js +231 -0
  134. package/dist/src/service/service-templates.d.ts +22 -0
  135. package/dist/src/service/service-templates.js +101 -0
  136. package/dist/src/subgraph/impact-explainer.d.ts +2 -0
  137. package/dist/src/subgraph/impact-explainer.js +54 -0
  138. package/dist/src/subgraph/node-expander.d.ts +13 -0
  139. package/dist/src/subgraph/node-expander.js +139 -0
  140. package/dist/src/subgraph/output-preset.d.ts +3 -0
  141. package/dist/src/subgraph/output-preset.js +102 -0
  142. package/dist/src/subgraph/subgraph-builder.d.ts +17 -0
  143. package/dist/src/subgraph/subgraph-builder.js +688 -0
  144. package/dist/src/topology/export-index.d.ts +7 -0
  145. package/dist/src/topology/export-index.js +14 -0
  146. package/dist/src/topology/framework-topology.d.ts +3 -0
  147. package/dist/src/topology/framework-topology.js +460 -0
  148. package/dist/src/topology/import-resolver.d.ts +2 -0
  149. package/dist/src/topology/import-resolver.js +29 -0
  150. package/dist/src/topology/orm-topology.d.ts +3 -0
  151. package/dist/src/topology/orm-topology.js +200 -0
  152. package/dist/src/topology/runtime-topology.d.ts +3 -0
  153. package/dist/src/topology/runtime-topology.js +204 -0
  154. package/dist/src/topology/symbol-resolver.d.ts +6 -0
  155. package/dist/src/topology/symbol-resolver.js +74 -0
  156. package/dist/src/topology/test-topology.d.ts +2 -0
  157. package/dist/src/topology/test-topology.js +82 -0
  158. package/dist/src/utils/hash.d.ts +2 -0
  159. package/dist/src/utils/hash.js +7 -0
  160. package/dist/src/utils/path.d.ts +2 -0
  161. package/dist/src/utils/path.js +7 -0
  162. package/dist/src/watch/event-journal.d.ts +17 -0
  163. package/dist/src/watch/event-journal.js +81 -0
  164. package/dist/src/watch/file-event-coalescer.d.ts +9 -0
  165. package/dist/src/watch/file-event-coalescer.js +39 -0
  166. package/dist/src/watch/index-scheduler.d.ts +52 -0
  167. package/dist/src/watch/index-scheduler.js +190 -0
  168. package/dist/src/watch/watch-daemon.d.ts +73 -0
  169. package/dist/src/watch/watch-daemon.js +368 -0
  170. package/dist/src/watch/watcher-liveness.d.ts +47 -0
  171. package/dist/src/watch/watcher-liveness.js +168 -0
  172. package/dist/src/web/server.d.ts +1 -0
  173. package/dist/src/web/server.js +375 -0
  174. package/package.json +94 -0
@@ -0,0 +1,564 @@
1
+ import ts from "typescript";
2
+ import { renderSnippet } from "../context/snippet-renderer.js";
3
+ import { sha256 } from "../utils/hash.js";
4
+ export function buildReuseCandidateReport(input) {
5
+ const limit = input.limit ?? 8;
6
+ const drafts = new Map();
7
+ const symbolsById = new Map(input.symbols.map((symbol) => [symbol.id, symbol]));
8
+ const expandedTerms = expandedQueryTerms(input.query);
9
+ const structureIndex = buildStructureIndex(input.symbols, input.edges, input.chunks, symbolsById);
10
+ for (const hit of input.hits) {
11
+ const symbol = symbolForHit(hit, input.symbols);
12
+ addDraft(drafts, {
13
+ filePath: hit.chunk.filePath,
14
+ symbolName: hit.chunk.symbolName,
15
+ symbol,
16
+ score: Math.max(0.1, hit.score * 2),
17
+ reason: hit.reason,
18
+ hit
19
+ });
20
+ }
21
+ for (const owner of input.owners) {
22
+ const ownerSymbols = owner.symbols.length > 0 ? owner.symbols : input.symbols.filter((symbol) => symbol.filePath === owner.filePath && symbol.kind !== "file").slice(0, 2);
23
+ if (ownerSymbols.length === 0) {
24
+ addDraft(drafts, {
25
+ filePath: owner.filePath,
26
+ score: owner.score,
27
+ reason: owner.reasons.join("; ")
28
+ });
29
+ continue;
30
+ }
31
+ for (const symbol of ownerSymbols) {
32
+ addDraft(drafts, {
33
+ filePath: symbol.filePath,
34
+ symbolName: symbol.name,
35
+ symbol,
36
+ score: owner.score + 0.5,
37
+ reason: `Owner candidate: ${owner.reasons.join("; ")}`
38
+ });
39
+ }
40
+ }
41
+ for (const symbol of input.symbols) {
42
+ if (symbol.kind === "file")
43
+ continue;
44
+ const matchScore = symbolSimilarity(symbol, expandedTerms);
45
+ if (matchScore <= 0)
46
+ continue;
47
+ addDraft(drafts, {
48
+ filePath: symbol.filePath,
49
+ symbolName: symbol.name,
50
+ symbol,
51
+ score: matchScore,
52
+ reason: `Symbol/API similarity to query terms: ${symbol.name}`
53
+ });
54
+ }
55
+ expandStructuralDuplicates(drafts, structureIndex);
56
+ const candidates = [...drafts.values()]
57
+ .map((draft) => finalizeCandidate(draft, input.edges, input.chunks, symbolsById, input.query, structureIndex))
58
+ .sort((a, b) => b.score - a.score || a.filePath.localeCompare(b.filePath))
59
+ .slice(0, limit);
60
+ const decision = decisionFor(candidates);
61
+ const confidence = reportConfidence(candidates);
62
+ const duplicateRisk = duplicateRiskFor(candidates);
63
+ const reuseGuard = reuseGuardFor(candidates, Boolean(input.reuseGuard));
64
+ const missingEvidence = missingEvidenceFor(candidates);
65
+ return {
66
+ query: input.query,
67
+ decision,
68
+ confidence,
69
+ candidates,
70
+ duplicateRisk,
71
+ reuseGuard,
72
+ missingEvidence,
73
+ nextQueries: nextQueriesFor(candidates)
74
+ };
75
+ }
76
+ function addDraft(drafts, input) {
77
+ const key = `${input.filePath}:${input.symbolName ?? "__file__"}`;
78
+ const current = drafts.get(key) ?? {
79
+ filePath: input.filePath,
80
+ symbolName: input.symbolName,
81
+ symbol: input.symbol,
82
+ score: 0,
83
+ exported: Boolean(input.symbol?.exported),
84
+ reasons: [],
85
+ snippets: []
86
+ };
87
+ current.score += input.score;
88
+ current.symbol = current.symbol ?? input.symbol;
89
+ current.exported = current.exported || Boolean(input.symbol?.exported);
90
+ current.reasons.push(input.reason);
91
+ if (input.hit)
92
+ current.snippets.push(input.hit);
93
+ drafts.set(key, current);
94
+ }
95
+ function finalizeCandidate(draft, edges, chunks, symbolsById, query, structureIndex) {
96
+ const symbolId = draft.symbol?.id;
97
+ const callerCount = symbolId ? edges.filter((edge) => edge.targetId === symbolId && edge.kind === "calls").length : 0;
98
+ const relatedTestCount = symbolId ? edges.filter((edge) => edge.sourceId === symbolId && edge.kind === "tested_by").length : 0;
99
+ const structure = symbolId ? structureIndex.bySymbolId.get(symbolId) : undefined;
100
+ const structuralSignals = structuralSignalsFor(structure);
101
+ const structuralScore = structuralScoreFor(structuralSignals);
102
+ const score = draft.score
103
+ + (draft.exported ? 1.2 : 0)
104
+ + Math.min(2, callerCount * 0.4)
105
+ + Math.min(1.5, relatedTestCount * 0.8)
106
+ + structuralScore;
107
+ const kind = candidateKind(draft);
108
+ const snippet = candidateSnippet(draft, chunks, query);
109
+ const confidence = score >= 3.5 ? "high" : score >= 2 ? "medium" : "low";
110
+ const whyReuse = [
111
+ draft.exported ? "Exported/public symbol is available to call or wrap." : "Candidate is private; prefer extending nearby code before creating a duplicate.",
112
+ callerCount > 0 ? `${callerCount} indexed caller(s) already depend on it.` : "No indexed callers were found.",
113
+ relatedTestCount > 0 ? `${relatedTestCount} related test edge(s) cover it.` : "No explicit tested_by edge was found.",
114
+ ...whyReuseFromStructure(structuralSignals)
115
+ ];
116
+ return {
117
+ filePath: draft.filePath,
118
+ symbolName: draft.symbolName,
119
+ kind,
120
+ score: Number(score.toFixed(3)),
121
+ confidence,
122
+ exported: draft.exported,
123
+ callerCount,
124
+ relatedTestCount,
125
+ structuralSignals,
126
+ reasons: [...new Set(draft.reasons)].slice(0, 6),
127
+ whyReuse,
128
+ snippet
129
+ };
130
+ }
131
+ function candidateSnippet(draft, chunks, query) {
132
+ const hit = draft.snippets.sort((a, b) => b.score - a.score)[0];
133
+ if (hit)
134
+ return renderSnippet(hit, query, "review");
135
+ const chunk = chunks.find((item) => item.filePath === draft.filePath && item.symbolName === draft.symbolName)
136
+ ?? chunks.find((item) => item.filePath === draft.filePath);
137
+ if (!chunk)
138
+ return undefined;
139
+ return renderSnippet({
140
+ chunk,
141
+ score: draft.score,
142
+ source: "graph",
143
+ reason: `Reuse candidate ${draft.symbolName ?? draft.filePath}`
144
+ }, query, "review");
145
+ }
146
+ function candidateKind(draft) {
147
+ const symbol = draft.symbolName ?? "";
148
+ const file = draft.filePath;
149
+ if (/(^|\/)(__tests__|tests?)(\/|$)|\.(test|spec)\.[jt]sx?$/.test(file))
150
+ return "test_fixture";
151
+ if (/\.tsx?$/.test(file) && /^use[A-Z]/.test(symbol))
152
+ return "react_hook";
153
+ if (/\.tsx$/.test(file) && /^[A-Z]/.test(symbol))
154
+ return "component";
155
+ if (/api|client|sdk|fetch/i.test(file) || /api|client|fetch/i.test(symbol))
156
+ return "api_wrapper";
157
+ if (draft.symbol?.kind === "type" || /schema|type|interface/i.test(file))
158
+ return "type_or_schema";
159
+ if (/config|constant|env/i.test(file) || /^[A-Z0-9_]+$/.test(symbol))
160
+ return "config_constant";
161
+ if (/service|billing|repo|repository/i.test(file) || draft.symbol?.kind === "method")
162
+ return "service_method";
163
+ if (draft.symbol?.kind === "function")
164
+ return "helper";
165
+ return "unknown";
166
+ }
167
+ function expandedQueryTerms(query) {
168
+ const base = tokenize(query);
169
+ const expanded = new Set(base);
170
+ const text = query.toLowerCase();
171
+ if (/rate[-\s]?limit|limiting|throttle/.test(text)) {
172
+ for (const term of ["rate", "limit", "limiter", "throttle", "throttlerequest", "token", "bucket", "tokenbucket"])
173
+ expanded.add(term);
174
+ }
175
+ if (/payment|billing|checkout/.test(text)) {
176
+ for (const term of ["payment", "billing", "checkout", "charge", "invoice"])
177
+ expanded.add(term);
178
+ }
179
+ if (/auth|login|session/.test(text)) {
180
+ for (const term of ["auth", "login", "session", "user"])
181
+ expanded.add(term);
182
+ }
183
+ return [...expanded];
184
+ }
185
+ function symbolSimilarity(symbol, terms) {
186
+ const haystack = `${symbol.name} ${splitIdentifier(symbol.name).join(" ")} ${symbol.filePath} ${symbol.signature ?? ""}`.toLowerCase();
187
+ let score = 0;
188
+ for (const term of terms) {
189
+ if (haystack.includes(term))
190
+ score += Math.min(1.2, term.length / 5);
191
+ }
192
+ if (symbol.exported)
193
+ score += 0.5;
194
+ return score;
195
+ }
196
+ function decisionFor(candidates) {
197
+ const top = candidates[0];
198
+ if (!top)
199
+ return "implement_new";
200
+ if (top.confidence === "high" && top.exported)
201
+ return "reuse";
202
+ if (top.confidence === "high")
203
+ return "extend";
204
+ if (top.confidence === "medium" && top.exported)
205
+ return "wrap";
206
+ return "uncertain";
207
+ }
208
+ function reportConfidence(candidates) {
209
+ const top = candidates[0];
210
+ if (!top)
211
+ return "low";
212
+ return top.confidence;
213
+ }
214
+ function duplicateRiskFor(candidates) {
215
+ if (candidates.length === 0)
216
+ return "low";
217
+ const top = candidates[0];
218
+ const close = candidates.slice(1).filter((candidate) => top.score - candidate.score < 1).length;
219
+ if (candidates.some((candidate) => candidate.structuralSignals.bodyDuplicateCount > 0 && candidate.confidence === "high"))
220
+ return "high";
221
+ if (top.confidence === "high" || close >= 2)
222
+ return "high";
223
+ if (top.confidence === "medium" || candidates.length > 1)
224
+ return "medium";
225
+ return "low";
226
+ }
227
+ function reuseGuardFor(candidates, enabled) {
228
+ const structuralBlockers = candidates
229
+ .filter((candidate) => candidate.exported && candidate.confidence === "high" && candidate.structuralSignals.bodyDuplicateCount > 0)
230
+ .slice(0, 5);
231
+ if (structuralBlockers.length > 0) {
232
+ return {
233
+ status: enabled ? "block_new" : "review_required",
234
+ reason: enabled
235
+ ? "reuse_guard is enabled and high-confidence normalized duplicate implementations already exist. Reuse or extend them instead of implementing a new copy."
236
+ : "High-confidence normalized duplicate implementations already exist; enable reuseGuard to hard block new duplicate work.",
237
+ candidates: guardCandidates(structuralBlockers)
238
+ };
239
+ }
240
+ const highReuse = candidates.filter((candidate) => candidate.exported && candidate.confidence === "high").slice(0, 5);
241
+ if (!enabled && highReuse.length > 0) {
242
+ return {
243
+ status: "review_required",
244
+ reason: "High-confidence reusable candidates exist. Review them before implementing new code.",
245
+ candidates: guardCandidates(highReuse)
246
+ };
247
+ }
248
+ return {
249
+ status: "allow_new",
250
+ reason: enabled ? "No high-confidence structural duplicate blocks new implementation." : "No reuse guard blockers were found.",
251
+ candidates: []
252
+ };
253
+ }
254
+ function guardCandidates(candidates) {
255
+ return candidates.map((candidate) => ({
256
+ filePath: candidate.filePath,
257
+ symbolName: candidate.symbolName,
258
+ score: candidate.score,
259
+ confidence: candidate.confidence
260
+ }));
261
+ }
262
+ function missingEvidenceFor(candidates) {
263
+ if (candidates.length === 0)
264
+ return ["No indexed reusable candidate matched the request."];
265
+ const missing = [];
266
+ if (!candidates.some((candidate) => candidate.exported))
267
+ missing.push("No exported reusable candidate was found.");
268
+ if (!candidates.some((candidate) => candidate.relatedTestCount > 0))
269
+ missing.push("No candidate has explicit tested_by evidence.");
270
+ return missing;
271
+ }
272
+ function nextQueriesFor(candidates) {
273
+ return candidates.slice(0, 5).flatMap((candidate) => {
274
+ const nodeRef = `${candidate.filePath}${candidate.symbolName ? `:${candidate.symbolName}` : ""}`;
275
+ return [`expand_node ${nodeRef}`, `explain_impact ${nodeRef}`];
276
+ }).slice(0, 8);
277
+ }
278
+ function tokenize(query) {
279
+ return query.toLowerCase().split(/[^a-z0-9_]+/i).map((part) => part.trim()).filter(Boolean);
280
+ }
281
+ function splitIdentifier(value) {
282
+ return value
283
+ .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
284
+ .split(/[^a-z0-9]+/i)
285
+ .map((part) => part.toLowerCase())
286
+ .filter(Boolean);
287
+ }
288
+ function symbolForHit(hit, symbols) {
289
+ if (!hit.chunk.symbolName)
290
+ return undefined;
291
+ return symbols.find((symbol) => symbol.filePath === hit.chunk.filePath && symbol.name === hit.chunk.symbolName);
292
+ }
293
+ function expandStructuralDuplicates(drafts, structureIndex) {
294
+ const initialDrafts = [...drafts.values()];
295
+ for (const draft of initialDrafts) {
296
+ if (!draft.symbol)
297
+ continue;
298
+ const structure = structureIndex.bySymbolId.get(draft.symbol.id);
299
+ if (!structure?.bodyFingerprint)
300
+ continue;
301
+ const duplicates = structureIndex.byFingerprint.get(structure.bodyFingerprint) ?? [];
302
+ for (const duplicate of duplicates) {
303
+ if (duplicate.symbol.id === draft.symbol.id)
304
+ continue;
305
+ addDraft(drafts, {
306
+ filePath: duplicate.symbol.filePath,
307
+ symbolName: duplicate.symbol.name,
308
+ symbol: duplicate.symbol,
309
+ score: Math.max(1.5, draft.score * 0.6),
310
+ reason: `Normalized body fingerprint matches ${draft.symbol.name}.`
311
+ });
312
+ }
313
+ }
314
+ }
315
+ function buildStructureIndex(symbols, edges, chunks, symbolsById) {
316
+ const bySymbolId = new Map();
317
+ const byFingerprint = new Map();
318
+ // Pre-group chunks/edges once so each symbol's structure lookup scans only its own
319
+ // file slice instead of the full arrays. Turns the former O(symbols × (chunks + edges))
320
+ // into O(chunks + edges + symbols), which matters on large repos.
321
+ const chunksByFile = groupBy(chunks, (chunk) => chunk.filePath);
322
+ const importEdgesByFile = new Map();
323
+ const callEdgesByFile = new Map();
324
+ const callEdgesBySourceId = new Map();
325
+ for (const edge of edges) {
326
+ if (edge.kind === "imports") {
327
+ const sourceFile = stringMetadata(edge, "sourceFile");
328
+ if (sourceFile)
329
+ pushToGroup(importEdgesByFile, sourceFile, edge);
330
+ }
331
+ else if (edge.kind === "calls") {
332
+ const sourceFile = stringMetadata(edge, "sourceFile");
333
+ if (sourceFile)
334
+ pushToGroup(callEdgesByFile, sourceFile, edge);
335
+ pushToGroup(callEdgesBySourceId, edge.sourceId, edge);
336
+ }
337
+ }
338
+ for (const symbol of symbols) {
339
+ if (symbol.kind === "file")
340
+ continue;
341
+ const chunk = chunkForSymbol(symbol, chunksByFile.get(symbol.filePath) ?? []);
342
+ const bodyFingerprint = chunk ? normalizedBodyFingerprint(chunk) : undefined;
343
+ const structure = {
344
+ symbol,
345
+ chunk,
346
+ bodyFingerprint,
347
+ duplicateCount: 0,
348
+ signatureTokens: normalizedSignatureTokens(symbol.signature ?? ""),
349
+ imports: importsForSymbol(importEdgesByFile.get(symbol.filePath) ?? []),
350
+ callees: calleesForSymbol(symbol, callEdgesByFile.get(symbol.filePath) ?? [], callEdgesBySourceId.get(symbol.id) ?? [], symbolsById),
351
+ signatureSimilarity: 0,
352
+ importOverlap: 0,
353
+ calleeOverlap: 0
354
+ };
355
+ bySymbolId.set(symbol.id, structure);
356
+ if (bodyFingerprint) {
357
+ const group = byFingerprint.get(bodyFingerprint) ?? [];
358
+ group.push(structure);
359
+ byFingerprint.set(bodyFingerprint, group);
360
+ }
361
+ }
362
+ for (const group of byFingerprint.values()) {
363
+ if (group.length < 2)
364
+ continue;
365
+ // Pairwise similarity inside one fingerprint group is O(group²); a degenerate group
366
+ // (thousands of same-shaped boilerplate functions) made this pass take ~40s. Cap each
367
+ // structure's comparison pool to a fixed window so the pass stays linear in group size.
368
+ // For oversized groups the signals become a sampled lower bound — acceptable, because a
369
+ // shape shared by hundreds of functions is boilerplate, not a meaningful reuse target.
370
+ const pool = group.length > FINGERPRINT_COMPARISON_WINDOW + 1
371
+ ? group.slice(0, FINGERPRINT_COMPARISON_WINDOW + 1)
372
+ : group;
373
+ for (const structure of group) {
374
+ const others = pool.filter((candidate) => candidate.symbol.id !== structure.symbol.id);
375
+ // A shared body fingerprint over-matches on its own: identifiers and literals are
376
+ // normalized away, so e.g. `enable(id)` and `remove(id)` collapse to one shape.
377
+ // Require callee overlap (Jaccard >= 0.5) so only behavioral copies — not every
378
+ // same-shaped function — count as duplicates and can trip reuseGuard.
379
+ const confirmedDuplicates = others.filter((candidate) => jaccard(structure.callees, candidate.callees) >= 0.5);
380
+ structure.duplicateCount = confirmedDuplicates.length;
381
+ structure.signatureSimilarity = maxSimilarity(structure.signatureTokens, others.map((candidate) => candidate.signatureTokens));
382
+ structure.importOverlap = maxSimilarity(structure.imports, others.map((candidate) => candidate.imports));
383
+ structure.calleeOverlap = maxSimilarity(structure.callees, others.map((candidate) => candidate.callees));
384
+ }
385
+ }
386
+ return { bySymbolId, byFingerprint };
387
+ }
388
+ function groupBy(items, keyOf) {
389
+ const groups = new Map();
390
+ for (const item of items)
391
+ pushToGroup(groups, keyOf(item), item);
392
+ return groups;
393
+ }
394
+ function pushToGroup(groups, key, item) {
395
+ const group = groups.get(key);
396
+ if (group)
397
+ group.push(item);
398
+ else
399
+ groups.set(key, [item]);
400
+ }
401
+ function chunkForSymbol(symbol, fileChunks) {
402
+ return fileChunks.find((chunk) => chunk.symbolName === symbol.name)
403
+ ?? fileChunks.find((chunk) => chunk.startLine <= symbol.startLine && chunk.endLine >= symbol.endLine);
404
+ }
405
+ function importsForSymbol(fileImportEdges) {
406
+ const imports = new Set();
407
+ for (const edge of fileImportEdges) {
408
+ const source = stringMetadata(edge, "source");
409
+ if (source)
410
+ imports.add(source);
411
+ const bindings = edge.metadata?.bindings;
412
+ if (Array.isArray(bindings)) {
413
+ for (const binding of bindings) {
414
+ if (!binding || typeof binding !== "object")
415
+ continue;
416
+ const record = binding;
417
+ if (typeof record.imported === "string")
418
+ imports.add(record.imported);
419
+ if (typeof record.local === "string")
420
+ imports.add(record.local);
421
+ }
422
+ }
423
+ }
424
+ return imports;
425
+ }
426
+ function calleesForSymbol(symbol, fileCallEdges, sourceIdCallEdges, symbolsById) {
427
+ const callees = new Set();
428
+ const addCallee = (edge) => {
429
+ const targetName = stringMetadata(edge, "targetName") ?? symbolsById.get(edge.targetId)?.name;
430
+ if (targetName)
431
+ callees.add(targetName);
432
+ };
433
+ // sourceId match: the edge's source IS this symbol, regardless of line metadata.
434
+ for (const edge of sourceIdCallEdges)
435
+ addCallee(edge);
436
+ // file + line-range match: unresolved calls located only by file/line.
437
+ for (const edge of fileCallEdges) {
438
+ const line = numberMetadata(edge, "line");
439
+ if (line !== undefined && line >= symbol.startLine && line <= symbol.endLine)
440
+ addCallee(edge);
441
+ }
442
+ return callees;
443
+ }
444
+ function structuralSignalsFor(structure) {
445
+ return {
446
+ bodyFingerprint: structure?.bodyFingerprint,
447
+ bodyDuplicateCount: structure?.duplicateCount ?? 0,
448
+ signatureSimilarity: roundSignal(structure?.signatureSimilarity ?? 0),
449
+ importOverlap: roundSignal(structure?.importOverlap ?? 0),
450
+ calleeOverlap: roundSignal(structure?.calleeOverlap ?? 0)
451
+ };
452
+ }
453
+ function structuralScoreFor(signals) {
454
+ let score = 0;
455
+ if (signals.bodyDuplicateCount > 0)
456
+ score += 2.5;
457
+ if (signals.signatureSimilarity >= 0.75)
458
+ score += 0.7;
459
+ if (signals.importOverlap >= 0.5)
460
+ score += 0.5;
461
+ if (signals.calleeOverlap >= 0.5)
462
+ score += 0.5;
463
+ return score;
464
+ }
465
+ function whyReuseFromStructure(signals) {
466
+ const reasons = [];
467
+ if (signals.bodyDuplicateCount > 0)
468
+ reasons.push(`Normalized body fingerprint matches ${signals.bodyDuplicateCount} other indexed symbol(s).`);
469
+ if (signals.signatureSimilarity >= 0.75)
470
+ reasons.push(`Signature shape similarity is ${signals.signatureSimilarity}.`);
471
+ if (signals.importOverlap >= 0.5)
472
+ reasons.push(`Import overlap with duplicate implementation is ${signals.importOverlap}.`);
473
+ if (signals.calleeOverlap >= 0.5)
474
+ reasons.push(`Callee overlap with duplicate implementation is ${signals.calleeOverlap}.`);
475
+ return reasons;
476
+ }
477
+ // A fingerprint depends only on (language, script kind, content), and contentHash already keys
478
+ // the content — so cache by hash to skip re-running ts.createSourceFile on every chunk for every
479
+ // reuse query. The per-chunk AST parse is the dominant fixed cost of buildStructureIndex
480
+ // (~1.1s per 4k chunks measured); repeated queries in one process now pay it once.
481
+ const fingerprintCache = new Map();
482
+ const FINGERPRINT_CACHE_MAX_ENTRIES = 100_000;
483
+ function normalizedBodyFingerprint(chunk) {
484
+ const key = `${chunk.language}|${scriptKindForPath(chunk.filePath)}|${chunk.contentHash}`;
485
+ const cached = fingerprintCache.get(key);
486
+ if (cached !== undefined)
487
+ return cached;
488
+ const fingerprint = computeBodyFingerprint(chunk);
489
+ if (fingerprintCache.size >= FINGERPRINT_CACHE_MAX_ENTRIES)
490
+ fingerprintCache.clear();
491
+ fingerprintCache.set(key, fingerprint);
492
+ return fingerprint;
493
+ }
494
+ function computeBodyFingerprint(chunk) {
495
+ if (chunk.language === "typescript" || chunk.language === "javascript") {
496
+ const sourceFile = ts.createSourceFile(chunk.filePath, chunk.content, ts.ScriptTarget.Latest, true, scriptKindForPath(chunk.filePath));
497
+ const parts = [];
498
+ function visit(node) {
499
+ if (node.kind === ts.SyntaxKind.SourceFile || node.kind === ts.SyntaxKind.EndOfFileToken) {
500
+ ts.forEachChild(node, visit);
501
+ return;
502
+ }
503
+ if (ts.isIdentifier(node)) {
504
+ parts.push("Identifier");
505
+ return;
506
+ }
507
+ if (ts.isStringLiteralLike(node) || ts.isNumericLiteral(node) || node.kind === ts.SyntaxKind.TrueKeyword || node.kind === ts.SyntaxKind.FalseKeyword) {
508
+ parts.push("Literal");
509
+ return;
510
+ }
511
+ parts.push(ts.SyntaxKind[node.kind] ?? String(node.kind));
512
+ ts.forEachChild(node, visit);
513
+ }
514
+ ts.forEachChild(sourceFile, visit);
515
+ return sha256(parts.join("|")).slice(0, 32);
516
+ }
517
+ return sha256(chunk.content.replace(/[A-Za-z_$][\w$]*/g, "Identifier").replace(/\d+(?:\.\d+)?|(['\"]).*?\1/g, "Literal").replace(/\s+/g, " ").trim()).slice(0, 32);
518
+ }
519
+ function normalizedSignatureTokens(signature) {
520
+ const normalized = signature
521
+ .replace(/(['\"]).*?\1/g, " Literal ")
522
+ .replace(/\b\d+(?:\.\d+)?\b/g, " Literal ")
523
+ .replace(/[A-Za-z_$][\w$]*/g, (token) => signatureKeywordTokens.has(token) ? token : "Identifier");
524
+ return new Set(tokenize(normalized));
525
+ }
526
+ function maxSimilarity(base, candidates) {
527
+ let max = 0;
528
+ for (const candidate of candidates)
529
+ max = Math.max(max, jaccard(base, candidate));
530
+ return max;
531
+ }
532
+ function jaccard(left, right) {
533
+ if (left.size === 0 && right.size === 0)
534
+ return 0;
535
+ const union = new Set([...left, ...right]);
536
+ let intersection = 0;
537
+ for (const value of left) {
538
+ if (right.has(value))
539
+ intersection += 1;
540
+ }
541
+ return intersection / union.size;
542
+ }
543
+ function roundSignal(value) {
544
+ return Number(value.toFixed(3));
545
+ }
546
+ function stringMetadata(edge, key) {
547
+ const value = edge.metadata?.[key];
548
+ return typeof value === "string" ? value : undefined;
549
+ }
550
+ function numberMetadata(edge, key) {
551
+ const value = edge.metadata?.[key];
552
+ return typeof value === "number" ? value : undefined;
553
+ }
554
+ function scriptKindForPath(filePath) {
555
+ if (filePath.endsWith(".tsx"))
556
+ return ts.ScriptKind.TSX;
557
+ if (filePath.endsWith(".jsx"))
558
+ return ts.ScriptKind.JSX;
559
+ if (filePath.endsWith(".js") || filePath.endsWith(".mjs") || filePath.endsWith(".cjs"))
560
+ return ts.ScriptKind.JS;
561
+ return ts.ScriptKind.TS;
562
+ }
563
+ const signatureKeywordTokens = new Set(["export", "default", "async", "function", "class", "interface", "type", "const", "let", "var", "string", "number", "boolean", "void", "Promise"]);
564
+ const FINGERPRINT_COMPARISON_WINDOW = 64;
@@ -0,0 +1,7 @@
1
+ import type { EmbeddingProvider } from "../core/contracts.js";
2
+ export declare class DeterministicEmbeddingProvider implements EmbeddingProvider {
3
+ readonly dimensions: number;
4
+ constructor(dimensions?: number);
5
+ embed(text: string): Promise<number[]>;
6
+ embedBatch(texts: string[]): Promise<number[][]>;
7
+ }
@@ -0,0 +1,31 @@
1
+ export class DeterministicEmbeddingProvider {
2
+ dimensions;
3
+ constructor(dimensions = 64) {
4
+ this.dimensions = dimensions;
5
+ }
6
+ async embed(text) {
7
+ const vector = new Array(this.dimensions).fill(0);
8
+ for (const token of text.toLowerCase().split(/[^a-z0-9_]+/i).filter(Boolean)) {
9
+ const index = hashToken(token) % this.dimensions;
10
+ vector[index] += 1;
11
+ }
12
+ return normalize(vector);
13
+ }
14
+ async embedBatch(texts) {
15
+ return Promise.all(texts.map((text) => this.embed(text)));
16
+ }
17
+ }
18
+ function hashToken(token) {
19
+ let hash = 2166136261;
20
+ for (let i = 0; i < token.length; i += 1) {
21
+ hash ^= token.charCodeAt(i);
22
+ hash = Math.imul(hash, 16777619);
23
+ }
24
+ return hash >>> 0;
25
+ }
26
+ function normalize(vector) {
27
+ const magnitude = Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
28
+ if (magnitude === 0)
29
+ return vector;
30
+ return vector.map((value) => value / magnitude);
31
+ }
@@ -0,0 +1,11 @@
1
+ import type { EmbeddingProvider, SemanticStore } from "../core/contracts.js";
2
+ import type { CodeChunk, SearchHit, SearchQuery } from "../core/types.js";
3
+ export declare class InMemorySemanticStore implements SemanticStore {
4
+ private readonly repos;
5
+ resetRepo(repoRoot: string): Promise<void>;
6
+ deleteFile(repoRoot: string, projectId: string, filePath: string): Promise<void>;
7
+ upsertChunks(chunks: CodeChunk[], provider: EmbeddingProvider, _generation?: number): Promise<void>;
8
+ search(query: SearchQuery, provider: EmbeddingProvider): Promise<SearchHit[]>;
9
+ }
10
+ export declare function renderChunkForEmbedding(chunk: CodeChunk): string;
11
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
@@ -0,0 +1,65 @@
1
+ export class InMemorySemanticStore {
2
+ repos = new Map();
3
+ async resetRepo(repoRoot) {
4
+ this.repos.set(repoRoot, []);
5
+ }
6
+ async deleteFile(repoRoot, projectId, filePath) {
7
+ const existing = this.repos.get(repoRoot) ?? [];
8
+ this.repos.set(repoRoot, existing.filter((record) => record.chunk.projectId !== projectId || record.chunk.filePath !== filePath));
9
+ }
10
+ async upsertChunks(chunks, provider, _generation) {
11
+ const grouped = new Map();
12
+ for (const chunk of chunks) {
13
+ const embedding = await provider.embed(renderChunkForEmbedding(chunk));
14
+ const records = grouped.get(chunk.repoRoot) ?? [];
15
+ records.push({ chunk, embedding });
16
+ grouped.set(chunk.repoRoot, records);
17
+ }
18
+ for (const [repoRoot, records] of grouped.entries()) {
19
+ const existing = this.repos.get(repoRoot) ?? [];
20
+ const byId = new Map(existing.map((record) => [record.chunk.id, record]));
21
+ for (const record of records)
22
+ byId.set(record.chunk.id, record);
23
+ this.repos.set(repoRoot, [...byId.values()]);
24
+ }
25
+ }
26
+ async search(query, provider) {
27
+ const repoRoot = requireRepoRoot(query.repoRoot);
28
+ const queryEmbedding = await provider.embed(query.query);
29
+ const limit = query.limit ?? 20;
30
+ return (this.repos.get(repoRoot) ?? [])
31
+ .filter((record) => !query.projectId || record.chunk.projectId === query.projectId)
32
+ .map((record) => ({ record, score: cosineSimilarity(queryEmbedding, record.embedding) }))
33
+ .filter((hit) => hit.score > 0)
34
+ .sort((a, b) => b.score - a.score)
35
+ .slice(0, limit)
36
+ .map(({ record, score }) => ({
37
+ chunk: record.chunk,
38
+ score,
39
+ source: "semantic",
40
+ reason: "Vector similarity match"
41
+ }));
42
+ }
43
+ }
44
+ function requireRepoRoot(repoRoot) {
45
+ if (!repoRoot)
46
+ throw new Error("Internal error: semantic search requires a resolved repoRoot.");
47
+ return repoRoot;
48
+ }
49
+ export function renderChunkForEmbedding(chunk) {
50
+ return [chunk.filePath, chunk.symbolName, chunk.language, chunk.content].filter(Boolean).join("\n");
51
+ }
52
+ export function cosineSimilarity(a, b) {
53
+ const length = Math.min(a.length, b.length);
54
+ let dot = 0;
55
+ let magA = 0;
56
+ let magB = 0;
57
+ for (let i = 0; i < length; i += 1) {
58
+ dot += a[i] * b[i];
59
+ magA += a[i] * a[i];
60
+ magB += b[i] * b[i];
61
+ }
62
+ if (magA === 0 || magB === 0)
63
+ return 0;
64
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB));
65
+ }