rbxstudio-mcp 2.3.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +67 -14
  2. package/dist/__tests__/bridge-service.test.js +25 -13
  3. package/dist/__tests__/bridge-service.test.js.map +1 -1
  4. package/dist/__tests__/bridge-session.test.d.ts +2 -0
  5. package/dist/__tests__/bridge-session.test.d.ts.map +1 -0
  6. package/dist/__tests__/bridge-session.test.js +171 -0
  7. package/dist/__tests__/bridge-session.test.js.map +1 -0
  8. package/dist/__tests__/chunker.test.d.ts +2 -0
  9. package/dist/__tests__/chunker.test.d.ts.map +1 -0
  10. package/dist/__tests__/chunker.test.js +201 -0
  11. package/dist/__tests__/chunker.test.js.map +1 -0
  12. package/dist/__tests__/docs-core.test.d.ts +2 -0
  13. package/dist/__tests__/docs-core.test.d.ts.map +1 -0
  14. package/dist/__tests__/docs-core.test.js +137 -0
  15. package/dist/__tests__/docs-core.test.js.map +1 -0
  16. package/dist/__tests__/docs-fetcher.test.d.ts +2 -0
  17. package/dist/__tests__/docs-fetcher.test.d.ts.map +1 -0
  18. package/dist/__tests__/docs-fetcher.test.js +173 -0
  19. package/dist/__tests__/docs-fetcher.test.js.map +1 -0
  20. package/dist/__tests__/helpers.d.ts +8 -0
  21. package/dist/__tests__/helpers.d.ts.map +1 -0
  22. package/dist/__tests__/helpers.js +23 -0
  23. package/dist/__tests__/helpers.js.map +1 -0
  24. package/dist/__tests__/http-routes.test.d.ts +2 -0
  25. package/dist/__tests__/http-routes.test.d.ts.map +1 -0
  26. package/dist/__tests__/http-routes.test.js +233 -0
  27. package/dist/__tests__/http-routes.test.js.map +1 -0
  28. package/dist/__tests__/http-server.test.js +13 -6
  29. package/dist/__tests__/http-server.test.js.map +1 -1
  30. package/dist/__tests__/integration.test.js +9 -4
  31. package/dist/__tests__/integration.test.js.map +1 -1
  32. package/dist/__tests__/semantic-search.test.d.ts +2 -0
  33. package/dist/__tests__/semantic-search.test.d.ts.map +1 -0
  34. package/dist/__tests__/semantic-search.test.js +202 -0
  35. package/dist/__tests__/semantic-search.test.js.map +1 -0
  36. package/dist/__tests__/smoke.test.js +7 -3
  37. package/dist/__tests__/smoke.test.js.map +1 -1
  38. package/dist/__tests__/studio-client.test.d.ts +2 -0
  39. package/dist/__tests__/studio-client.test.d.ts.map +1 -0
  40. package/dist/__tests__/studio-client.test.js +25 -0
  41. package/dist/__tests__/studio-client.test.js.map +1 -0
  42. package/dist/__tests__/tool-nudges.test.d.ts +2 -0
  43. package/dist/__tests__/tool-nudges.test.d.ts.map +1 -0
  44. package/dist/__tests__/tool-nudges.test.js +60 -0
  45. package/dist/__tests__/tool-nudges.test.js.map +1 -0
  46. package/dist/__tests__/tool-registry.test.d.ts +2 -0
  47. package/dist/__tests__/tool-registry.test.d.ts.map +1 -0
  48. package/dist/__tests__/tool-registry.test.js +365 -0
  49. package/dist/__tests__/tool-registry.test.js.map +1 -0
  50. package/dist/__tests__/tools-bridge.test.d.ts +2 -0
  51. package/dist/__tests__/tools-bridge.test.d.ts.map +1 -0
  52. package/dist/__tests__/tools-bridge.test.js +396 -0
  53. package/dist/__tests__/tools-bridge.test.js.map +1 -0
  54. package/dist/__tests__/tools-docs.test.d.ts +2 -0
  55. package/dist/__tests__/tools-docs.test.d.ts.map +1 -0
  56. package/dist/__tests__/tools-docs.test.js +112 -0
  57. package/dist/__tests__/tools-docs.test.js.map +1 -0
  58. package/dist/__tests__/tools-guards.test.d.ts +2 -0
  59. package/dist/__tests__/tools-guards.test.d.ts.map +1 -0
  60. package/dist/__tests__/tools-guards.test.js +131 -0
  61. package/dist/__tests__/tools-guards.test.js.map +1 -0
  62. package/dist/__tests__/tools-runtime.test.d.ts +2 -0
  63. package/dist/__tests__/tools-runtime.test.d.ts.map +1 -0
  64. package/dist/__tests__/tools-runtime.test.js +214 -0
  65. package/dist/__tests__/tools-runtime.test.js.map +1 -0
  66. package/dist/__tests__/tools-visual.test.d.ts +2 -0
  67. package/dist/__tests__/tools-visual.test.d.ts.map +1 -0
  68. package/dist/__tests__/tools-visual.test.js +149 -0
  69. package/dist/__tests__/tools-visual.test.js.map +1 -0
  70. package/dist/bridge-service.d.ts +99 -12
  71. package/dist/bridge-service.d.ts.map +1 -1
  72. package/dist/bridge-service.js +238 -21
  73. package/dist/bridge-service.js.map +1 -1
  74. package/dist/docs/cache.d.ts +50 -0
  75. package/dist/docs/cache.d.ts.map +1 -0
  76. package/dist/docs/cache.js +123 -0
  77. package/dist/docs/cache.js.map +1 -0
  78. package/dist/docs/embeddings/chunker.d.ts +120 -0
  79. package/dist/docs/embeddings/chunker.d.ts.map +1 -0
  80. package/dist/docs/embeddings/chunker.js +395 -0
  81. package/dist/docs/embeddings/chunker.js.map +1 -0
  82. package/dist/docs/embeddings/embedder.d.ts +41 -0
  83. package/dist/docs/embeddings/embedder.d.ts.map +1 -0
  84. package/dist/docs/embeddings/embedder.js +113 -0
  85. package/dist/docs/embeddings/embedder.js.map +1 -0
  86. package/dist/docs/embeddings/index.d.ts +102 -0
  87. package/dist/docs/embeddings/index.d.ts.map +1 -0
  88. package/dist/docs/embeddings/index.js +250 -0
  89. package/dist/docs/embeddings/index.js.map +1 -0
  90. package/dist/docs/embeddings/manager.d.ts +68 -0
  91. package/dist/docs/embeddings/manager.d.ts.map +1 -0
  92. package/dist/docs/embeddings/manager.js +97 -0
  93. package/dist/docs/embeddings/manager.js.map +1 -0
  94. package/dist/docs/fetcher.d.ts +29 -0
  95. package/dist/docs/fetcher.d.ts.map +1 -0
  96. package/dist/docs/fetcher.js +244 -0
  97. package/dist/docs/fetcher.js.map +1 -0
  98. package/dist/docs/reference.d.ts +37 -0
  99. package/dist/docs/reference.d.ts.map +1 -0
  100. package/dist/docs/reference.js +108 -0
  101. package/dist/docs/reference.js.map +1 -0
  102. package/dist/docs/search.d.ts +194 -0
  103. package/dist/docs/search.d.ts.map +1 -0
  104. package/dist/docs/search.js +733 -0
  105. package/dist/docs/search.js.map +1 -0
  106. package/dist/http-server.d.ts.map +1 -1
  107. package/dist/http-server.js +52 -5
  108. package/dist/http-server.js.map +1 -1
  109. package/dist/index.d.ts +8 -9
  110. package/dist/index.d.ts.map +1 -1
  111. package/dist/index.js +35 -1035
  112. package/dist/index.js.map +1 -1
  113. package/dist/instructions.d.ts +15 -0
  114. package/dist/instructions.d.ts.map +1 -0
  115. package/dist/instructions.js +26 -0
  116. package/dist/instructions.js.map +1 -0
  117. package/dist/tools/defs/attributes.d.ts +6 -0
  118. package/dist/tools/defs/attributes.d.ts.map +1 -0
  119. package/dist/tools/defs/attributes.js +85 -0
  120. package/dist/tools/defs/attributes.js.map +1 -0
  121. package/dist/tools/defs/docs.d.ts +17 -0
  122. package/dist/tools/defs/docs.d.ts.map +1 -0
  123. package/dist/tools/defs/docs.js +151 -0
  124. package/dist/tools/defs/docs.js.map +1 -0
  125. package/dist/tools/defs/execute.d.ts +6 -0
  126. package/dist/tools/defs/execute.d.ts.map +1 -0
  127. package/dist/tools/defs/execute.js +21 -0
  128. package/dist/tools/defs/execute.js.map +1 -0
  129. package/dist/tools/defs/inspection.d.ts +7 -0
  130. package/dist/tools/defs/inspection.d.ts.map +1 -0
  131. package/dist/tools/defs/inspection.js +202 -0
  132. package/dist/tools/defs/inspection.js.map +1 -0
  133. package/dist/tools/defs/objects.d.ts +6 -0
  134. package/dist/tools/defs/objects.d.ts.map +1 -0
  135. package/dist/tools/defs/objects.js +111 -0
  136. package/dist/tools/defs/objects.js.map +1 -0
  137. package/dist/tools/defs/properties.d.ts +6 -0
  138. package/dist/tools/defs/properties.d.ts.map +1 -0
  139. package/dist/tools/defs/properties.js +71 -0
  140. package/dist/tools/defs/properties.js.map +1 -0
  141. package/dist/tools/defs/runtime.d.ts +6 -0
  142. package/dist/tools/defs/runtime.d.ts.map +1 -0
  143. package/dist/tools/defs/runtime.js +145 -0
  144. package/dist/tools/defs/runtime.js.map +1 -0
  145. package/dist/tools/defs/scripts.d.ts +18 -0
  146. package/dist/tools/defs/scripts.d.ts.map +1 -0
  147. package/dist/tools/defs/scripts.js +163 -0
  148. package/dist/tools/defs/scripts.js.map +1 -0
  149. package/dist/tools/defs/tags.d.ts +6 -0
  150. package/dist/tools/defs/tags.d.ts.map +1 -0
  151. package/dist/tools/defs/tags.js +74 -0
  152. package/dist/tools/defs/tags.js.map +1 -0
  153. package/dist/tools/defs/visual.d.ts +7 -0
  154. package/dist/tools/defs/visual.d.ts.map +1 -0
  155. package/dist/tools/defs/visual.js +208 -0
  156. package/dist/tools/defs/visual.js.map +1 -0
  157. package/dist/tools/index.d.ts +101 -25
  158. package/dist/tools/index.d.ts.map +1 -1
  159. package/dist/tools/index.js +580 -63
  160. package/dist/tools/index.js.map +1 -1
  161. package/dist/tools/nudges.d.ts +25 -0
  162. package/dist/tools/nudges.d.ts.map +1 -0
  163. package/dist/tools/nudges.js +34 -0
  164. package/dist/tools/nudges.js.map +1 -0
  165. package/dist/tools/registry.d.ts +20 -0
  166. package/dist/tools/registry.d.ts.map +1 -0
  167. package/dist/tools/registry.js +65 -0
  168. package/dist/tools/registry.js.map +1 -0
  169. package/dist/tools/types.d.ts +24 -0
  170. package/dist/tools/types.d.ts.map +1 -0
  171. package/dist/tools/types.js +2 -0
  172. package/dist/tools/types.js.map +1 -0
  173. package/package.json +7 -6
  174. package/studio-plugin/MCPPlugin.rbxmx +3 -238
  175. package/studio-plugin/plugin.luau +2041 -365
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Thin wrapper around @huggingface/transformers' feature-extraction
3
+ * pipeline.
4
+ *
5
+ * Why a wrapper instead of using `pipeline()` directly at call sites?
6
+ * 1. The pipeline is HEAVY to load (~25MB model download + ~200ms
7
+ * ORT init). We want exactly one instance per process, created
8
+ * lazily on first use.
9
+ * 2. The library is pure ESM and import-heavy; isolating it here
10
+ * means the rest of the codebase doesn't pay the cost on startup
11
+ * and tests that don't need embeddings don't load it.
12
+ * 3. We want a stable surface (encode(string[]) → Float32Array[])
13
+ * regardless of which Transformers.js version we're on.
14
+ *
15
+ * Model: sentence-transformers/all-MiniLM-L6-v2 (via the Xenova ONNX
16
+ * port). 384-dim normalized sentence embeddings, ~25MB on disk,
17
+ * ~5–10ms per encode on a modern CPU. Trained on 1B+ sentence pairs
18
+ * with contrastive loss → great general-purpose retrieval quality at
19
+ * tiny model size.
20
+ */
21
+ export declare const EMBED_DIM = 384;
22
+ export declare const EMBED_MODEL = "Xenova/all-MiniLM-L6-v2";
23
+ export declare function __setEmbedderForTests(fn: ((texts: string[]) => Promise<Float32Array[]>) | null): void;
24
+ /**
25
+ * Encode an array of strings into normalized 384-dim Float32 vectors.
26
+ *
27
+ * Normalization is done by the model (pooling: 'mean', normalize: true)
28
+ * so cosine similarity == plain dot product, which is hot-path-cheap.
29
+ *
30
+ * Batched internally — pass big arrays (50–200 items per call is fine)
31
+ * to amortize tensor allocation overhead.
32
+ */
33
+ export declare function encode(texts: string[]): Promise<Float32Array[]>;
34
+ /** Convenience: encode a single string. */
35
+ export declare function encodeOne(text: string): Promise<Float32Array>;
36
+ /**
37
+ * Dot product of two normalized vectors == cosine similarity. Tight
38
+ * loop, no allocations, called millions of times during query.
39
+ */
40
+ export declare function dot(a: Float32Array, b: Float32Array): number;
41
+ //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../../src/docs/embeddings/embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,eAAO,MAAM,SAAS,MAAM,CAAC;AAC7B,eAAO,MAAM,WAAW,4BAA4B,CAAC;AAerD,wBAAgB,qBAAqB,CACnC,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,GAAG,IAAI,GACxD,IAAI,CAKN;AAuBD;;;;;;;;GAQG;AACH,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CA6BrE;AAED,2CAA2C;AAC3C,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAGnE;AAED;;;GAGG;AACH,wBAAgB,GAAG,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAK5D"}
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Thin wrapper around @huggingface/transformers' feature-extraction
3
+ * pipeline.
4
+ *
5
+ * Why a wrapper instead of using `pipeline()` directly at call sites?
6
+ * 1. The pipeline is HEAVY to load (~25MB model download + ~200ms
7
+ * ORT init). We want exactly one instance per process, created
8
+ * lazily on first use.
9
+ * 2. The library is pure ESM and import-heavy; isolating it here
10
+ * means the rest of the codebase doesn't pay the cost on startup
11
+ * and tests that don't need embeddings don't load it.
12
+ * 3. We want a stable surface (encode(string[]) → Float32Array[])
13
+ * regardless of which Transformers.js version we're on.
14
+ *
15
+ * Model: sentence-transformers/all-MiniLM-L6-v2 (via the Xenova ONNX
16
+ * port). 384-dim normalized sentence embeddings, ~25MB on disk,
17
+ * ~5–10ms per encode on a modern CPU. Trained on 1B+ sentence pairs
18
+ * with contrastive loss → great general-purpose retrieval quality at
19
+ * tiny model size.
20
+ */
21
+ export const EMBED_DIM = 384;
22
+ export const EMBED_MODEL = 'Xenova/all-MiniLM-L6-v2';
23
+ /**
24
+ * Cached pipeline promise. We store the promise (not the resolved
25
+ * value) so concurrent first-callers all wait on the same
26
+ * initialization instead of each kicking off their own.
27
+ */
28
+ let pipelinePromise = null;
29
+ /**
30
+ * For the test/dev path where we want to inject a stub instead of
31
+ * downloading 25MB of weights into a temp dir.
32
+ */
33
+ let overrideEmbed = null;
34
+ export function __setEmbedderForTests(fn) {
35
+ overrideEmbed = fn;
36
+ // Also clear any lazily-loaded real pipeline so a test that runs
37
+ // after a real load doesn't accidentally use the stub afterward.
38
+ pipelinePromise = null;
39
+ }
40
+ async function loadPipeline() {
41
+ if (pipelinePromise)
42
+ return pipelinePromise;
43
+ pipelinePromise = (async () => {
44
+ // Dynamic import keeps the heavy ESM out of the module-load graph
45
+ // until someone actually needs an embedding. The whole transformers
46
+ // package weighs ~30MB of JS — not something we want loaded just
47
+ // because a user called `search_roblox_docs` with one keyword.
48
+ const t = await import('@huggingface/transformers');
49
+ // `pipeline('feature-extraction', ...)` returns a callable that
50
+ // takes a string or string[] and returns a Tensor.
51
+ return await t.pipeline('feature-extraction', EMBED_MODEL, {
52
+ // fp32 is the safest dtype on Node — q4f16 hit ORT graph-fusion
53
+ // bugs (see HF issue #1567). Size on disk is ~90MB for fp32 vs
54
+ // ~25MB for q8; for a one-time download we accept that cost in
55
+ // exchange for not having to debug runtime errors later.
56
+ dtype: 'fp32',
57
+ });
58
+ })();
59
+ return pipelinePromise;
60
+ }
61
+ /**
62
+ * Encode an array of strings into normalized 384-dim Float32 vectors.
63
+ *
64
+ * Normalization is done by the model (pooling: 'mean', normalize: true)
65
+ * so cosine similarity == plain dot product, which is hot-path-cheap.
66
+ *
67
+ * Batched internally — pass big arrays (50–200 items per call is fine)
68
+ * to amortize tensor allocation overhead.
69
+ */
70
+ export async function encode(texts) {
71
+ if (texts.length === 0)
72
+ return [];
73
+ if (overrideEmbed)
74
+ return await overrideEmbed(texts);
75
+ const pipe = await loadPipeline();
76
+ const output = await pipe(texts, { pooling: 'mean', normalize: true });
77
+ // Transformers.js returns a single Tensor of shape [N, EMBED_DIM]
78
+ // even for single-input calls. `.tolist()` would give a nested
79
+ // array; for perf we slice the flat backing buffer ourselves.
80
+ const data = output.data instanceof Float32Array
81
+ ? output.data
82
+ : Float32Array.from(output.data);
83
+ const dim = output.dims?.[1] ?? EMBED_DIM;
84
+ if (dim !== EMBED_DIM) {
85
+ // Defensive: if HF ever updates the model and dim changes, we'd
86
+ // index incompatible vectors and silently return garbage. Loud
87
+ // failure is better.
88
+ throw new Error(`Embedding dim mismatch: model returned ${dim}, expected ${EMBED_DIM}. ` +
89
+ `Bump EMBED_DIM and rebuild the docs index.`);
90
+ }
91
+ const out = new Array(texts.length);
92
+ for (let i = 0; i < texts.length; i++) {
93
+ out[i] = data.subarray(i * dim, (i + 1) * dim).slice(); // copy
94
+ }
95
+ return out;
96
+ }
97
+ /** Convenience: encode a single string. */
98
+ export async function encodeOne(text) {
99
+ const [vec] = await encode([text]);
100
+ return vec;
101
+ }
102
+ /**
103
+ * Dot product of two normalized vectors == cosine similarity. Tight
104
+ * loop, no allocations, called millions of times during query.
105
+ */
106
+ export function dot(a, b) {
107
+ const n = a.length;
108
+ let s = 0;
109
+ for (let i = 0; i < n; i++)
110
+ s += a[i] * b[i];
111
+ return s;
112
+ }
113
+ //# sourceMappingURL=embedder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/docs/embeddings/embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,MAAM,CAAC,MAAM,SAAS,GAAG,GAAG,CAAC;AAC7B,MAAM,CAAC,MAAM,WAAW,GAAG,yBAAyB,CAAC;AAErD;;;;GAIG;AACH,IAAI,eAAe,GAAwB,IAAI,CAAC;AAEhD;;;GAGG;AACH,IAAI,aAAa,GAA0D,IAAI,CAAC;AAEhF,MAAM,UAAU,qBAAqB,CACnC,EAAyD;IAEzD,aAAa,GAAG,EAAE,CAAC;IACnB,iEAAiE;IACjE,iEAAiE;IACjE,eAAe,GAAG,IAAI,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,YAAY;IACzB,IAAI,eAAe;QAAE,OAAO,eAAe,CAAC;IAC5C,eAAe,GAAG,CAAC,KAAK,IAAI,EAAE;QAC5B,kEAAkE;QAClE,oEAAoE;QACpE,iEAAiE;QACjE,+DAA+D;QAC/D,MAAM,CAAC,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QACpD,gEAAgE;QAChE,mDAAmD;QACnD,OAAO,MAAM,CAAC,CAAC,QAAQ,CAAC,oBAAoB,EAAE,WAAW,EAAE;YACzD,gEAAgE;YAChE,+DAA+D;YAC/D,+DAA+D;YAC/D,yDAAyD;YACzD,KAAK,EAAE,MAAM;SACd,CAAC,CAAC;IACL,CAAC,CAAC,EAAE,CAAC;IACL,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,KAAe;IAC1C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAClC,IAAI,aAAa;QAAE,OAAO,MAAM,aAAa,CAAC,KAAK,CAAC,CAAC;IAErD,MAAM,IAAI,GAAG,MAAM,YAAY,EAAE,CAAC;IAClC,MAAM,MAAM,GAAQ,MAAM,IAAI,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5E,kEAAkE;IAClE,+DAA+D;IAC/D,8DAA8D;IAC9D,MAAM,IAAI,GACR,MAAM,CAAC,IAAI,YAAY,YAAY;QACjC,CAAC,CAAC,MAAM,CAAC,IAAI;QACb,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACrC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;IAC1C,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;QACtB,gEAAgE;QAChE,+DAA+D;QAC/D,qBAAqB;QACrB,MAAM,IAAI,KAAK,CACb,0CAA0C,GAAG,cAAc,SAAS,IAAI;YACtE,4CAA4C,CAC/C,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,OAAO;IACjE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,2CAA2C;AAC3C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAY;IAC1C,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,GAAG,CAAC,CAAe,EAAE,CAAe;IAClD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;IACnB,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7C,OAAO,CAAC,CAAC;AACX,CAAC"}
@@ -0,0 +1,102 @@
1
+ import { type Chunk } from './chunker.js';
2
+ export interface IndexMeta {
3
+ schemaVersion: number;
4
+ /** Docs SHA this index was built against. Used to detect staleness. */
5
+ sha: string;
6
+ /** Embedding model identifier; mismatched models are incompatible vectors. */
7
+ model: string;
8
+ /** Vector dimension (defensive — matches embedder.EMBED_DIM at build time). */
9
+ dim: number;
10
+ /** Number of chunks indexed. */
11
+ chunkCount: number;
12
+ /** ISO timestamp of the build. */
13
+ builtAt: string;
14
+ /** ms spent building (informational). */
15
+ buildDurationMs: number;
16
+ }
17
+ export interface DocsIndex {
18
+ meta: IndexMeta;
19
+ chunks: Chunk[];
20
+ /** Packed N*dim vectors. */
21
+ vectors: Float32Array;
22
+ }
23
+ export interface QueryHit {
24
+ chunk: Chunk;
25
+ /** Raw cosine score in [-1, 1] (in practice [0, 1] for these vectors). */
26
+ score: number;
27
+ }
28
+ export declare function indexDir(cacheDir: string): string;
29
+ export interface BuildOptions {
30
+ /** Override the embedding model identifier recorded in meta. */
31
+ model?: string;
32
+ /**
33
+ * Chunks per embed call. Larger = fewer round-trips, but each call
34
+ * holds the batch in memory. 64 is a comfortable middle.
35
+ */
36
+ batchSize?: number;
37
+ /** Optional progress callback for long builds. */
38
+ onProgress?: (done: number, total: number) => void;
39
+ }
40
+ /**
41
+ * Build (or rebuild) the vector index for the docs tree in `cacheDir`.
42
+ * Writes atomically: builds into a tmp dir then renames into place
43
+ * so a partial/aborted build never leaves the index half-written.
44
+ */
45
+ export declare function buildIndex(cacheDir: string, docsSha: string, options?: BuildOptions): Promise<IndexMeta>;
46
+ /**
47
+ * Try to load the on-disk index. Returns null if absent, corrupt, or
48
+ * mismatched against `expectedSha` / model / dim. Callers should treat
49
+ * null as "needs rebuild".
50
+ */
51
+ export declare function loadIndex(cacheDir: string, expectedSha?: string, expectedModel?: string): Promise<DocsIndex | null>;
52
+ export interface QueryOptions {
53
+ /** Number of results to return. Default 10. */
54
+ topK?: number;
55
+ /**
56
+ * MMR diversity weight in [0, 1]. 0 = pure relevance, 1 = pure
57
+ * diversity. Default 0.3 — small bias toward diversity so we don't
58
+ * return five near-identical chunks from the same page.
59
+ */
60
+ diversity?: number;
61
+ /**
62
+ * Restrict to chunks whose `path` starts with this prefix.
63
+ * Mirrors `SearchOptions.scope` from the keyword search.
64
+ */
65
+ scope?: string;
66
+ /**
67
+ * Restrict to chunks of these kinds. Default: all kinds.
68
+ * Useful for "search only API reference" (yaml-member / yaml-preamble).
69
+ */
70
+ kinds?: Chunk['kind'][];
71
+ /**
72
+ * Initial candidate pool size before MMR rerank. Should be >= topK.
73
+ * Default 4× topK, capped at 200. Bigger = better diversity options
74
+ * but more cosine math at query time.
75
+ */
76
+ poolSize?: number;
77
+ }
78
+ /**
79
+ * Run a semantic query against the index. Returns top-K hits ranked
80
+ * by cosine similarity and diversified with MMR.
81
+ *
82
+ * MMR (Maximal Marginal Relevance) algorithm:
83
+ * 1. Pick the highest-cosine candidate as the first result.
84
+ * 2. For each subsequent slot, score remaining candidates as
85
+ * λ * cosine_to_query - (1 - λ) * max_cosine_to_already_picked
86
+ * and pick the highest.
87
+ * 3. Repeat until we have K results.
88
+ *
89
+ * This guarantees that a query like "Motor6D" doesn't return six
90
+ * near-duplicate snippets from the same Motor6D.yaml file — we get
91
+ * the top member, then the next-best chunk that's *also* different
92
+ * from what we've already shown.
93
+ */
94
+ export declare function query(index: DocsIndex, queryText: string, options?: QueryOptions): Promise<QueryHit[]>;
95
+ /**
96
+ * Has an index been built for this cache dir? Cheap stat — doesn't
97
+ * touch the vector file.
98
+ */
99
+ export declare function indexExists(cacheDir: string): Promise<boolean>;
100
+ /** Wipe the index directory. */
101
+ export declare function clearIndex(cacheDir: string): Promise<void>;
102
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/docs/embeddings/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAY,KAAK,KAAK,EAAE,MAAM,cAAc,CAAC;AAgCpD,MAAM,WAAW,SAAS;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,uEAAuE;IACvE,GAAG,EAAE,MAAM,CAAC;IACZ,8EAA8E;IAC9E,KAAK,EAAE,MAAM,CAAC;IACd,+EAA+E;IAC/E,GAAG,EAAE,MAAM,CAAC;IACZ,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,yCAAyC;IACzC,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,4BAA4B;IAC5B,OAAO,EAAE,YAAY,CAAC;CACvB;AAED,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,KAAK,CAAC;IACb,0EAA0E;IAC1E,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAEjD;AAgBD,MAAM,WAAW,YAAY;IAC3B,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACpD;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,SAAS,CAAC,CAqDpB;AAID;;;;GAIG;AACH,wBAAsB,SAAS,CAC7B,QAAQ,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,aAAa,CAAC,EAAE,MAAM,GACrB,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CA6C3B;AAID,MAAM,WAAW,YAAY;IAC3B,+CAA+C;IAC/C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;IACxB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,KAAK,CACzB,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAyErB;AAQD;;;GAGG;AACH,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAOpE;AAED,gCAAgC;AAChC,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAEhE"}
@@ -0,0 +1,250 @@
1
+ import { promises as fs } from 'fs';
2
+ import * as path from 'path';
3
+ import { chunkAll } from './chunker.js';
4
+ import { dot, encode, encodeOne, EMBED_DIM } from './embedder.js';
5
+ /**
6
+ * Vector index for the Roblox creator-docs mirror.
7
+ *
8
+ * On-disk layout (sits next to the cached docs tree):
9
+ *
10
+ * <cacheDir>/
11
+ * content/ ← (existing) extracted docs
12
+ * index/
13
+ * meta.json ← schema version, chunk count, sha
14
+ * chunks.json ← Chunk[] metadata (path, lines, …)
15
+ * vectors.bin ← Float32Array, N*EMBED_DIM packed
16
+ *
17
+ * Why a separate `index/` dir instead of jamming it into `meta.json`?
18
+ * 1. We keep it adjacent to the data it indexes — the docs cache
19
+ * already knows how to wipe itself; we just nuke `index/` alongside.
20
+ * 2. `vectors.bin` is a flat Float32 dump (~4.6MB at 3k chunks) —
21
+ * loading it with `fs.readFile` + `new Float32Array(buf.buffer)`
22
+ * is sub-10ms vs. parsing the same data out of JSON.
23
+ *
24
+ * Why flat cosine instead of HNSW / IVF?
25
+ * At ~19k chunks × 384 dims (measured on the real docs tree),
26
+ * brute-force takes ~5-15ms in JS — still well within "interactive"
27
+ * for a tool call. HNSW would add a dependency and complexity for
28
+ * savings that don't move the user-visible needle. If the index
29
+ * grows past ~100k chunks we should reconsider.
30
+ */
31
+ const INDEX_SCHEMA_VERSION = 2;
32
+ export function indexDir(cacheDir) {
33
+ return path.join(cacheDir, 'index');
34
+ }
35
+ function indexMetaPath(cacheDir) {
36
+ return path.join(indexDir(cacheDir), 'meta.json');
37
+ }
38
+ function indexChunksPath(cacheDir) {
39
+ return path.join(indexDir(cacheDir), 'chunks.json');
40
+ }
41
+ function indexVectorsPath(cacheDir) {
42
+ return path.join(indexDir(cacheDir), 'vectors.bin');
43
+ }
44
+ /**
45
+ * Build (or rebuild) the vector index for the docs tree in `cacheDir`.
46
+ * Writes atomically: builds into a tmp dir then renames into place
47
+ * so a partial/aborted build never leaves the index half-written.
48
+ */
49
+ export async function buildIndex(cacheDir, docsSha, options = {}) {
50
+ const t0 = Date.now();
51
+ const model = options.model ?? 'Xenova/all-MiniLM-L6-v2';
52
+ const batchSize = Math.max(1, Math.min(options.batchSize ?? 64, 256));
53
+ const chunks = await chunkAll(cacheDir);
54
+ if (chunks.length === 0) {
55
+ throw new Error(`No chunks produced from ${cacheDir}/content — is the docs cache empty?`);
56
+ }
57
+ // Embed in batches. The packed Float32Array is allocated up-front
58
+ // so each batch writes directly into its slice — no per-batch
59
+ // concat / copy.
60
+ const vectors = new Float32Array(chunks.length * EMBED_DIM);
61
+ for (let i = 0; i < chunks.length; i += batchSize) {
62
+ const batch = chunks.slice(i, i + batchSize);
63
+ const vecs = await encode(batch.map((c) => c.text));
64
+ for (let j = 0; j < vecs.length; j++) {
65
+ vectors.set(vecs[j], (i + j) * EMBED_DIM);
66
+ }
67
+ options.onProgress?.(Math.min(i + batchSize, chunks.length), chunks.length);
68
+ }
69
+ // Write atomically via tmp dir → rename.
70
+ const dir = indexDir(cacheDir);
71
+ const tmp = `${dir}.tmp-${process.pid}-${Date.now()}`;
72
+ await fs.mkdir(tmp, { recursive: true });
73
+ const meta = {
74
+ schemaVersion: INDEX_SCHEMA_VERSION,
75
+ sha: docsSha,
76
+ model,
77
+ dim: EMBED_DIM,
78
+ chunkCount: chunks.length,
79
+ builtAt: new Date().toISOString(),
80
+ buildDurationMs: Date.now() - t0,
81
+ };
82
+ await fs.writeFile(path.join(tmp, 'meta.json'), JSON.stringify(meta, null, 2), 'utf8');
83
+ await fs.writeFile(path.join(tmp, 'chunks.json'), JSON.stringify(chunks), 'utf8');
84
+ await fs.writeFile(path.join(tmp, 'vectors.bin'), Buffer.from(vectors.buffer, vectors.byteOffset, vectors.byteLength));
85
+ // Wipe old, rename new. Two-step because some platforms (Windows)
86
+ // don't allow renaming over an existing directory.
87
+ await fs.rm(dir, { recursive: true, force: true });
88
+ await fs.rename(tmp, dir);
89
+ return meta;
90
+ }
91
+ // ---------- Load / check ----------
92
+ /**
93
+ * Try to load the on-disk index. Returns null if absent, corrupt, or
94
+ * mismatched against `expectedSha` / model / dim. Callers should treat
95
+ * null as "needs rebuild".
96
+ */
97
+ export async function loadIndex(cacheDir, expectedSha, expectedModel) {
98
+ let metaRaw;
99
+ let chunksRaw;
100
+ let vecBuf;
101
+ try {
102
+ [metaRaw, chunksRaw, vecBuf] = await Promise.all([
103
+ fs.readFile(indexMetaPath(cacheDir), 'utf8'),
104
+ fs.readFile(indexChunksPath(cacheDir), 'utf8'),
105
+ fs.readFile(indexVectorsPath(cacheDir)),
106
+ ]);
107
+ }
108
+ catch {
109
+ return null;
110
+ }
111
+ let meta;
112
+ try {
113
+ meta = JSON.parse(metaRaw);
114
+ }
115
+ catch {
116
+ return null;
117
+ }
118
+ if (meta.schemaVersion !== INDEX_SCHEMA_VERSION)
119
+ return null;
120
+ if (expectedSha && meta.sha && meta.sha !== expectedSha)
121
+ return null;
122
+ if (expectedModel && meta.model !== expectedModel)
123
+ return null;
124
+ if (meta.dim !== EMBED_DIM)
125
+ return null;
126
+ let chunks;
127
+ try {
128
+ chunks = JSON.parse(chunksRaw);
129
+ }
130
+ catch {
131
+ return null;
132
+ }
133
+ if (!Array.isArray(chunks) || chunks.length !== meta.chunkCount)
134
+ return null;
135
+ // Wrap the underlying buffer as a Float32Array — zero-copy.
136
+ // `vecBuf.buffer` may be larger than vecBuf if Node pooled it, so
137
+ // honor byteOffset/byteLength.
138
+ const expectedBytes = meta.chunkCount * meta.dim * 4;
139
+ if (vecBuf.byteLength !== expectedBytes)
140
+ return null;
141
+ const vectors = new Float32Array(vecBuf.buffer, vecBuf.byteOffset, meta.chunkCount * meta.dim);
142
+ return { meta, chunks, vectors };
143
+ }
144
+ /**
145
+ * Run a semantic query against the index. Returns top-K hits ranked
146
+ * by cosine similarity and diversified with MMR.
147
+ *
148
+ * MMR (Maximal Marginal Relevance) algorithm:
149
+ * 1. Pick the highest-cosine candidate as the first result.
150
+ * 2. For each subsequent slot, score remaining candidates as
151
+ * λ * cosine_to_query - (1 - λ) * max_cosine_to_already_picked
152
+ * and pick the highest.
153
+ * 3. Repeat until we have K results.
154
+ *
155
+ * This guarantees that a query like "Motor6D" doesn't return six
156
+ * near-duplicate snippets from the same Motor6D.yaml file — we get
157
+ * the top member, then the next-best chunk that's *also* different
158
+ * from what we've already shown.
159
+ */
160
+ export async function query(index, queryText, options = {}) {
161
+ if (!queryText || index.chunks.length === 0)
162
+ return [];
163
+ const topK = Math.max(1, Math.min(options.topK ?? 10, 100));
164
+ const diversity = clamp(options.diversity ?? 0.3, 0, 1);
165
+ const poolSize = Math.max(topK, Math.min(options.poolSize ?? topK * 4, Math.min(200, index.chunks.length)));
166
+ const qvec = await encodeOne(queryText);
167
+ // Score every chunk that passes the scope / kind filter.
168
+ // For 3k chunks this is ~1.1M float multiplies = sub-millisecond.
169
+ const dim = index.meta.dim;
170
+ const scope = options.scope;
171
+ const kinds = options.kinds ? new Set(options.kinds) : null;
172
+ const scored = [];
173
+ for (let i = 0; i < index.chunks.length; i++) {
174
+ const c = index.chunks[i];
175
+ if (scope && !c.path.startsWith(scope))
176
+ continue;
177
+ if (kinds && !kinds.has(c.kind))
178
+ continue;
179
+ const vec = index.vectors.subarray(i * dim, (i + 1) * dim);
180
+ scored.push({ idx: i, score: dot(qvec, vec) });
181
+ }
182
+ if (scored.length === 0)
183
+ return [];
184
+ // Partial sort: take top poolSize candidates. For our sizes a full
185
+ // sort is fast enough; if perf ever matters use a heap.
186
+ scored.sort((a, b) => b.score - a.score);
187
+ const pool = scored.slice(0, poolSize);
188
+ // MMR. λ in textbook MMR == "relevance weight" — we use
189
+ // λ = 1 - diversity so that diversity=0 → λ=1 → pure relevance,
190
+ // diversity=1 → λ=0 → pure diversity.
191
+ const lambda = 1 - diversity;
192
+ const picked = []; // positions inside `pool`
193
+ const used = new Uint8Array(pool.length);
194
+ // Always pick the highest-score candidate first.
195
+ picked.push(0);
196
+ used[0] = 1;
197
+ while (picked.length < topK && picked.length < pool.length) {
198
+ let bestI = -1;
199
+ let bestScore = -Infinity;
200
+ for (let i = 0; i < pool.length; i++) {
201
+ if (used[i])
202
+ continue;
203
+ const cand = pool[i];
204
+ const candVec = index.vectors.subarray(cand.idx * dim, (cand.idx + 1) * dim);
205
+ // Max similarity to anything already picked.
206
+ let maxSim = -Infinity;
207
+ for (const pi of picked) {
208
+ const pickedVec = index.vectors.subarray(pool[pi].idx * dim, (pool[pi].idx + 1) * dim);
209
+ const sim = dot(candVec, pickedVec);
210
+ if (sim > maxSim)
211
+ maxSim = sim;
212
+ }
213
+ const mmrScore = lambda * cand.score - (1 - lambda) * maxSim;
214
+ if (mmrScore > bestScore) {
215
+ bestScore = mmrScore;
216
+ bestI = i;
217
+ }
218
+ }
219
+ if (bestI < 0)
220
+ break;
221
+ picked.push(bestI);
222
+ used[bestI] = 1;
223
+ }
224
+ return picked.map((p) => ({
225
+ chunk: index.chunks[pool[p].idx],
226
+ score: pool[p].score,
227
+ }));
228
+ }
229
+ function clamp(n, lo, hi) {
230
+ return Math.min(hi, Math.max(lo, n));
231
+ }
232
+ // ---------- Convenience ----------
233
+ /**
234
+ * Has an index been built for this cache dir? Cheap stat — doesn't
235
+ * touch the vector file.
236
+ */
237
+ export async function indexExists(cacheDir) {
238
+ try {
239
+ const stat = await fs.stat(indexMetaPath(cacheDir));
240
+ return stat.isFile();
241
+ }
242
+ catch {
243
+ return false;
244
+ }
245
+ }
246
+ /** Wipe the index directory. */
247
+ export async function clearIndex(cacheDir) {
248
+ await fs.rm(indexDir(cacheDir), { recursive: true, force: true });
249
+ }
250
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/docs/embeddings/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,QAAQ,EAAc,MAAM,cAAc,CAAC;AACpD,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAElE;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,MAAM,oBAAoB,GAAG,CAAC,CAAC;AA+B/B,MAAM,UAAU,QAAQ,CAAC,QAAgB;IACvC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,WAAW,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,eAAe,CAAC,QAAgB;IACvC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,aAAa,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAgB;IACxC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,aAAa,CAAC,CAAC;AACtD,CAAC;AAgBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAgB,EAChB,OAAe,EACf,UAAwB,EAAE;IAE1B,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACtB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,yBAAyB,CAAC;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CACb,2BAA2B,QAAQ,qCAAqC,CACzE,CAAC;IACJ,CAAC;IAED,kEAAkE;IAClE,8DAA8D;IAC9D,iBAAiB;IACjB,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IAC5D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5C,CAAC;QACD,OAAO,CAAC,UAAU,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAC9E,CAAC;IAED,yCAAyC;IACzC,MAAM,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,GAAG,GAAG,QAAQ,OAAO,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IACtD,MAAM,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEzC,MAAM,IAAI,GAAc;QACtB,aAAa,EAAE,oBAAoB;QACnC,GAAG,EAAE,OAAO;QACZ,KAAK;QACL,GAAG,EAAE,SAAS;QACd,UAAU,EAAE,MAAM,CAAC,MAAM;QACzB,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE;KACjC,CAAC;IAEF,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IACvF,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;IAClF,MAAM,EAAE,CAAC,SAAS,CAChB,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,EAC7B,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC,CACpE,CAAC;IAEF,kEAAkE;IAClE,mDAAmD;IACnD,MAAM,EAAE,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACnD,MAAM,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAE1B,OAAO,IAAI,CAAC;AACd,CAAC;AAED,qCAAqC;AAErC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,QAAgB,EAChB,WAAoB,EACpB,aAAsB;IAEtB,IAAI,OAAe,CAAC;IACpB,IAAI,SAAiB,CAAC;IACtB,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC/C,EAAE,CAAC,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;YAC5C,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;YAC9C,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;SACxC,CAAC,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,IAAe,CAAC;IACpB,IAAI,CAAC;QACH,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,IAAI,CAAC,aAAa,KAAK,oBAAoB;QAAE,OAAO,IAAI,CAAC;IAC7D,IAAI,WAAW,IAAI,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,KAAK,WAAW;QAAE,OAAO,IAAI,CAAC;IACrE,IAAI,aAAa,IAAI,IAAI,CAAC,KAAK,KAAK,aAAa;QAAE,OAAO,IAAI,CAAC;IAC/D,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IAExC,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7E,4DAA4D;IAC5D,kEAAkE;IAClE,+BAA+B;IAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;IACrD,IAAI,MAAM,CAAC,UAAU,KAAK,aAAa;QAAE,OAAO,IAAI,CAAC;IACrD,MAAM,OAAO,GAAG,IAAI,YAAY,CAC9B,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,UAAU,EACjB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAC3B,CAAC;IAEF,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;AACnC,CAAC;AA+BD;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,KAAgB,EAChB,SAAiB,EACjB,UAAwB,EAAE;IAE1B,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CACvB,IAAI,EACJ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAC3E,CAAC;IAEF,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,CAAC;IAExC,yDAAyD;IACzD,kEAAkE;IAClE,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC;IAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE5D,MAAM,MAAM,GAAqC,EAAE,CAAC;IACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,SAAS;QACjD,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;YAAE,SAAS;QAC1C,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;QAC3D,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,mEAAmE;IACnE,wDAAwD;IACxD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAEvC,wDAAwD;IACxD,gEAAgE;IAChE,sCAAsC;IACtC,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC;IAC7B,MAAM,MAAM,GAAa,EAAE,CAAC,CAAC,0BAA0B;IACvD,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEzC,iDAAiD;IACjD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACf,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAEZ,OAAO,MAAM,CAAC,MAAM,GAAG,IAAI,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3D,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC;QACf,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,IAAI,CAAC,CAAC,CAAC;gBAAE,SAAS;YACtB,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;YAC7E,6CAA6C;YAC7C,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;YACvB,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBACvF,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;gBACpC,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;YACjC,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;YAC7D,IAAI,QAAQ,GAAG,SAAS,EAAE,CAAC;gBACzB,SAAS,GAAG,QAAQ,CAAC;gBACrB,KAAK,GAAG,CAAC,CAAC;YACZ,CAAC;QACH,CAAC;QACD,IAAI,KAAK,GAAG,CAAC;YAAE,MAAM;QACrB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACxB,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAChC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK;KACrB,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,KAAK,CAAC,CAAS,EAAE,EAAU,EAAE,EAAU;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,oCAAoC;AAEpC;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC;QACpD,OAAO,IAAI,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,gCAAgC;AAChC,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;AACpE,CAAC"}
@@ -0,0 +1,68 @@
1
+ import { type DocsIndex, type IndexMeta } from './index.js';
2
+ /**
3
+ * Process-wide manager for the semantic docs index.
4
+ *
5
+ * Responsibilities:
6
+ * 1. Lazy-load the index on first semantic query so plain keyword
7
+ * searches don't pay the cost.
8
+ * 2. Cache the loaded index in memory (packed Float32 vectors are
9
+ * ~29MB for the measured ~19k chunks).
10
+ * 3. Coalesce concurrent first-callers onto the same load/build
11
+ * promise so we don't run two builds in parallel.
12
+ * 4. Rebuild on docs-SHA mismatch (the fetcher handed us a new
13
+ * docs cache → invalidate stale vectors).
14
+ *
15
+ * NOT this module's job:
16
+ * - Deciding *when* to rebuild based on TTL — that's the fetcher.
17
+ * - Knowing about the keyword search — `search.ts` calls this lazily
18
+ * and falls back to keyword-only if the load/build fails.
19
+ */
20
+ interface CachedEntry {
21
+ cacheDir: string;
22
+ sha: string;
23
+ index: DocsIndex;
24
+ }
25
+ /**
26
+ * Test seam: lets unit tests inject a precanned index and skip the
27
+ * load/build path entirely. The keyword + hybrid rerank machinery
28
+ * uses `getOrBuild` directly, so injecting here is enough to
29
+ * deterministically exercise reranking.
30
+ */
31
+ export declare function __setIndexForTests(entry: CachedEntry | null): void;
32
+ export interface GetOrBuildOptions {
33
+ /** If true, suppress build-on-miss and only attempt a load. */
34
+ loadOnly?: boolean;
35
+ /** Build progress hook (passed through to buildIndex). */
36
+ onProgress?: (done: number, total: number) => void;
37
+ }
38
+ /**
39
+ * Return a usable in-memory index for `cacheDir`@`docsSha`, building
40
+ * it if necessary. Returns null when the index can't be obtained — the
41
+ * caller should fall back to keyword-only mode in that case.
42
+ *
43
+ * Decision tree:
44
+ * 1. We already have a hot index for this (cacheDir, sha) → return it.
45
+ * 2. Otherwise try to loadIndex() from disk — if it matches the sha,
46
+ * cache + return it.
47
+ * 3. Otherwise, if loadOnly: return null.
48
+ * 4. Otherwise build a fresh index. Heavy: ~25MB model download +
49
+ * embedding ~19k chunks. Measured at ~8 minutes on a single CPU
50
+ * box; faster on multi-core / GPU. Subsequent process restarts
51
+ * load from disk in <100ms.
52
+ *
53
+ * Errors during build are swallowed (returned as null) because we
54
+ * don't want a semantic-index failure to break the keyword search.
55
+ */
56
+ export declare function getOrBuild(cacheDir: string, docsSha: string, options?: GetOrBuildOptions): Promise<DocsIndex | null>;
57
+ /**
58
+ * Drop the in-memory cache. Used when the docs cache itself is
59
+ * re-fetched and we know the on-disk vectors are now stale. The
60
+ * fetcher (or its caller) is expected to nuke the index directory
61
+ * separately if it wants the on-disk vectors gone too — typically
62
+ * `buildIndex` will overwrite atomically on the next call anyway.
63
+ */
64
+ export declare function invalidate(): void;
65
+ /** Diagnostic: is anything cached right now? */
66
+ export declare function currentMeta(): IndexMeta | null;
67
+ export {};
68
+ //# sourceMappingURL=manager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../../src/docs/embeddings/manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,KAAK,SAAS,EAAE,KAAK,SAAS,EAAE,MAAM,YAAY,CAAC;AAGnF;;;;;;;;;;;;;;;;;GAiBG;AAEH,UAAU,WAAW;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,SAAS,CAAC;CAClB;AAMD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,WAAW,GAAG,IAAI,GAAG,IAAI,CAGlE;AAED,MAAM,WAAW,iBAAiB;IAChC,+DAA+D;IAC/D,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,0DAA0D;IAC1D,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACpD;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAoD3B;AAED;;;;;;GAMG;AACH,wBAAgB,UAAU,IAAI,IAAI,CAEjC;AAED,gDAAgD;AAChD,wBAAgB,WAAW,IAAI,SAAS,GAAG,IAAI,CAE9C"}