rag-lite-ts 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +94 -65
  2. package/dist/cli/indexer.d.ts.map +1 -1
  3. package/dist/cli/indexer.js +78 -50
  4. package/dist/cli/indexer.js.map +1 -1
  5. package/dist/cli/search.d.ts.map +1 -1
  6. package/dist/cli/search.js +13 -30
  7. package/dist/cli/search.js.map +1 -1
  8. package/dist/cli.js +2 -2
  9. package/dist/cli.js.map +1 -1
  10. package/dist/config.d.ts +34 -73
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +50 -255
  13. package/dist/config.js.map +1 -1
  14. package/dist/core/adapters.d.ts +93 -0
  15. package/dist/core/adapters.d.ts.map +1 -0
  16. package/dist/core/adapters.js +139 -0
  17. package/dist/core/adapters.js.map +1 -0
  18. package/dist/core/chunker.d.ts +117 -0
  19. package/dist/core/chunker.d.ts.map +1 -0
  20. package/dist/core/chunker.js +73 -0
  21. package/dist/core/chunker.js.map +1 -0
  22. package/dist/core/config.d.ts +102 -0
  23. package/dist/core/config.d.ts.map +1 -0
  24. package/dist/core/config.js +240 -0
  25. package/dist/core/config.js.map +1 -0
  26. package/dist/{db.d.ts → core/db.d.ts} +25 -9
  27. package/dist/core/db.d.ts.map +1 -0
  28. package/dist/{db.js → core/db.js} +86 -16
  29. package/dist/core/db.js.map +1 -0
  30. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  31. package/dist/core/error-handler.d.ts.map +1 -0
  32. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  33. package/dist/core/error-handler.js.map +1 -0
  34. package/dist/core/index.d.ts +57 -0
  35. package/dist/core/index.d.ts.map +1 -0
  36. package/dist/core/index.js +66 -0
  37. package/dist/core/index.js.map +1 -0
  38. package/dist/core/ingestion.d.ts +143 -0
  39. package/dist/core/ingestion.d.ts.map +1 -0
  40. package/dist/core/ingestion.js +347 -0
  41. package/dist/core/ingestion.js.map +1 -0
  42. package/dist/core/interfaces.d.ts +408 -0
  43. package/dist/core/interfaces.d.ts.map +1 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/interfaces.js.map +1 -0
  46. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  47. package/dist/core/path-manager.d.ts.map +1 -0
  48. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  49. package/dist/core/path-manager.js.map +1 -0
  50. package/dist/core/search-example.d.ts +25 -0
  51. package/dist/core/search-example.d.ts.map +1 -0
  52. package/dist/core/search-example.js +138 -0
  53. package/dist/core/search-example.js.map +1 -0
  54. package/dist/core/search-pipeline-example.d.ts +21 -0
  55. package/dist/core/search-pipeline-example.d.ts.map +1 -0
  56. package/dist/core/search-pipeline-example.js +188 -0
  57. package/dist/core/search-pipeline-example.js.map +1 -0
  58. package/dist/core/search-pipeline.d.ts +111 -0
  59. package/dist/core/search-pipeline.d.ts.map +1 -0
  60. package/dist/core/search-pipeline.js +287 -0
  61. package/dist/core/search-pipeline.js.map +1 -0
  62. package/dist/core/search.d.ts +104 -0
  63. package/dist/core/search.d.ts.map +1 -0
  64. package/dist/core/search.js +218 -0
  65. package/dist/core/search.js.map +1 -0
  66. package/dist/core/types.d.ts +63 -0
  67. package/dist/core/types.d.ts.map +1 -0
  68. package/dist/core/types.js +6 -0
  69. package/dist/core/types.js.map +1 -0
  70. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  71. package/dist/core/vector-index.d.ts.map +1 -0
  72. package/dist/{vector-index.js → core/vector-index.js} +19 -0
  73. package/dist/core/vector-index.js.map +1 -0
  74. package/dist/dom-polyfills.d.ts +6 -0
  75. package/dist/dom-polyfills.d.ts.map +1 -0
  76. package/dist/dom-polyfills.js +40 -0
  77. package/dist/dom-polyfills.js.map +1 -0
  78. package/dist/examples/clean-api-examples.d.ts +44 -0
  79. package/dist/examples/clean-api-examples.d.ts.map +1 -0
  80. package/dist/examples/clean-api-examples.js +206 -0
  81. package/dist/examples/clean-api-examples.js.map +1 -0
  82. package/dist/factories/index.d.ts +43 -0
  83. package/dist/factories/index.d.ts.map +1 -0
  84. package/dist/factories/index.js +44 -0
  85. package/dist/factories/index.js.map +1 -0
  86. package/dist/factories/text-factory.d.ts +466 -0
  87. package/dist/factories/text-factory.d.ts.map +1 -0
  88. package/dist/factories/text-factory.js +719 -0
  89. package/dist/factories/text-factory.js.map +1 -0
  90. package/dist/file-processor.d.ts +2 -2
  91. package/dist/file-processor.d.ts.map +1 -1
  92. package/dist/file-processor.js +3 -3
  93. package/dist/file-processor.js.map +1 -1
  94. package/dist/index-manager.d.ts +3 -2
  95. package/dist/index-manager.d.ts.map +1 -1
  96. package/dist/index-manager.js +13 -11
  97. package/dist/index-manager.js.map +1 -1
  98. package/dist/index.d.ts +63 -8
  99. package/dist/index.d.ts.map +1 -1
  100. package/dist/index.js +91 -16
  101. package/dist/index.js.map +1 -1
  102. package/dist/indexer.js +1 -1
  103. package/dist/indexer.js.map +1 -1
  104. package/dist/ingestion.d.ts +30 -156
  105. package/dist/ingestion.d.ts.map +1 -1
  106. package/dist/ingestion.js +58 -675
  107. package/dist/ingestion.js.map +1 -1
  108. package/dist/mcp-server.js +86 -55
  109. package/dist/mcp-server.js.map +1 -1
  110. package/dist/preprocess.js +1 -1
  111. package/dist/preprocess.js.map +1 -1
  112. package/dist/search-standalone.js +1 -1
  113. package/dist/search-standalone.js.map +1 -1
  114. package/dist/search.d.ts +32 -76
  115. package/dist/search.d.ts.map +1 -1
  116. package/dist/search.js +80 -428
  117. package/dist/search.js.map +1 -1
  118. package/dist/text/chunker.d.ts +32 -0
  119. package/dist/text/chunker.d.ts.map +1 -0
  120. package/dist/{chunker.js → text/chunker.js} +98 -75
  121. package/dist/text/chunker.js.map +1 -0
  122. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  123. package/dist/text/embedder.d.ts.map +1 -0
  124. package/dist/{embedder.js → text/embedder.js} +71 -4
  125. package/dist/text/embedder.js.map +1 -0
  126. package/dist/text/index.d.ts +7 -0
  127. package/dist/text/index.d.ts.map +1 -0
  128. package/dist/text/index.js +8 -0
  129. package/dist/text/index.js.map +1 -0
  130. package/dist/text/preprocessors/index.d.ts +17 -0
  131. package/dist/text/preprocessors/index.d.ts.map +1 -0
  132. package/dist/text/preprocessors/index.js +38 -0
  133. package/dist/text/preprocessors/index.js.map +1 -0
  134. package/dist/text/preprocessors/mdx.d.ts +25 -0
  135. package/dist/text/preprocessors/mdx.d.ts.map +1 -0
  136. package/dist/text/preprocessors/mdx.js +101 -0
  137. package/dist/text/preprocessors/mdx.js.map +1 -0
  138. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  139. package/dist/text/preprocessors/mermaid.d.ts.map +1 -0
  140. package/dist/text/preprocessors/mermaid.js +330 -0
  141. package/dist/text/preprocessors/mermaid.js.map +1 -0
  142. package/dist/text/preprocessors/registry.d.ts +56 -0
  143. package/dist/text/preprocessors/registry.d.ts.map +1 -0
  144. package/dist/text/preprocessors/registry.js +180 -0
  145. package/dist/text/preprocessors/registry.js.map +1 -0
  146. package/dist/text/reranker.d.ts +60 -0
  147. package/dist/text/reranker.d.ts.map +1 -0
  148. package/dist/{reranker.js → text/reranker.js} +134 -19
  149. package/dist/text/reranker.js.map +1 -0
  150. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  151. package/dist/text/tokenizer.d.ts.map +1 -0
  152. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  153. package/dist/text/tokenizer.js.map +1 -0
  154. package/dist/types.d.ts +1 -1
  155. package/dist/types.d.ts.map +1 -1
  156. package/package.json +2 -2
  157. package/dist/chunker.d.ts +0 -47
  158. package/dist/chunker.d.ts.map +0 -1
  159. package/dist/chunker.js.map +0 -1
  160. package/dist/db.d.ts.map +0 -1
  161. package/dist/db.js.map +0 -1
  162. package/dist/embedder.d.ts.map +0 -1
  163. package/dist/embedder.js.map +0 -1
  164. package/dist/error-handler.d.ts.map +0 -1
  165. package/dist/error-handler.js.map +0 -1
  166. package/dist/path-manager.d.ts.map +0 -1
  167. package/dist/path-manager.js.map +0 -1
  168. package/dist/reranker.d.ts +0 -40
  169. package/dist/reranker.d.ts.map +0 -1
  170. package/dist/reranker.js.map +0 -1
  171. package/dist/resource-manager-demo.d.ts +0 -7
  172. package/dist/resource-manager-demo.d.ts.map +0 -1
  173. package/dist/resource-manager-demo.js +0 -52
  174. package/dist/resource-manager-demo.js.map +0 -1
  175. package/dist/resource-manager.d.ts +0 -129
  176. package/dist/resource-manager.d.ts.map +0 -1
  177. package/dist/resource-manager.js +0 -389
  178. package/dist/resource-manager.js.map +0 -1
  179. package/dist/tokenizer.d.ts.map +0 -1
  180. package/dist/tokenizer.js.map +0 -1
  181. package/dist/vector-index.d.ts.map +0 -1
  182. package/dist/vector-index.js.map +0 -1
@@ -1,11 +1,10 @@
1
- import { countTokens } from './tokenizer.js';
2
1
  /**
3
- * Default chunking configuration
2
+ * Text-specific chunking implementation
3
+ * Implements the ChunkingStrategy interface for text content
4
4
  */
5
- export const DEFAULT_CHUNK_CONFIG = {
6
- chunkSize: 250, // Target 200-300 tokens
7
- chunkOverlap: 50
8
- };
5
+ import '../dom-polyfills.js';
6
+ import { DEFAULT_CHUNK_CONFIG } from '../core/chunker.js';
7
+ import { countTokens } from './tokenizer.js';
9
8
  /**
10
9
  * Split text at paragraph boundaries (double newlines)
11
10
  * This is the first tier of the chunking strategy
@@ -173,84 +172,108 @@ async function createOverlapText(text, overlapTokens) {
173
172
  return overlapText;
174
173
  }
175
174
  /**
176
- * Main chunking function implementing the three-tier strategy:
177
- * 1. Split at paragraph boundaries (double newlines)
178
- * 2. If paragraphs are too large, split at sentence boundaries
179
- * 3. If sentences are still too large, use fixed-size chunking
180
- *
181
- * @param document - Document to chunk
182
- * @param config - Chunking configuration
183
- * @returns Array of chunks with metadata
175
+ * Text chunking strategy implementation
184
176
  */
185
- export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
186
- console.log(`📝 Chunking document "${document.title}" with config: chunkSize=${config.chunkSize}, chunkOverlap=${config.chunkOverlap}`);
187
- if (!document.content || document.content.trim().length === 0) {
188
- return [];
177
+ export class TextChunkingStrategy {
178
+ appliesTo(contentType) {
179
+ return contentType === 'text';
189
180
  }
190
- // Tier 1: Split into paragraphs
191
- const paragraphs = splitIntoParagraphs(document.content);
192
- // Tier 2: For large paragraphs, split into sentences
193
- const segments = [];
194
- for (const paragraph of paragraphs) {
195
- const paragraphTokens = await countTokens(paragraph);
196
- if (paragraphTokens <= config.chunkSize) {
197
- // Paragraph is small enough, use as-is
198
- segments.push(paragraph);
181
+ async chunk(document, config) {
182
+ console.log(`📝 Chunking document "${document.title}" with config: chunkSize=${config.chunkSize}, chunkOverlap=${config.chunkOverlap}`);
183
+ if (!document.content || document.content.trim().length === 0) {
184
+ return [];
199
185
  }
200
- else {
201
- // Paragraph is too large, split into sentences
202
- const sentences = splitIntoSentences(paragraph);
203
- // Group sentences that fit within token limits
204
- let currentGroup = '';
205
- let currentTokens = 0;
206
- for (const sentence of sentences) {
207
- const sentenceTokens = await countTokens(sentence);
208
- // If single sentence exceeds limit, it will be handled in createChunksFromSegments
209
- if (sentenceTokens > config.chunkSize) {
210
- // Save current group if it has content
211
- if (currentGroup.trim()) {
212
- segments.push(currentGroup.trim());
213
- currentGroup = '';
214
- currentTokens = 0;
186
+ // Tier 1: Split into paragraphs
187
+ const paragraphs = splitIntoParagraphs(document.content);
188
+ // Tier 2: For large paragraphs, split into sentences
189
+ const segments = [];
190
+ for (const paragraph of paragraphs) {
191
+ const paragraphTokens = await countTokens(paragraph);
192
+ if (paragraphTokens <= config.chunkSize) {
193
+ // Paragraph is small enough, use as-is
194
+ segments.push(paragraph);
195
+ }
196
+ else {
197
+ // Paragraph is too large, split into sentences
198
+ const sentences = splitIntoSentences(paragraph);
199
+ // Group sentences that fit within token limits
200
+ let currentGroup = '';
201
+ let currentTokens = 0;
202
+ for (const sentence of sentences) {
203
+ const sentenceTokens = await countTokens(sentence);
204
+ // If single sentence exceeds limit, it will be handled in createChunksFromSegments
205
+ if (sentenceTokens > config.chunkSize) {
206
+ // Save current group if it has content
207
+ if (currentGroup.trim()) {
208
+ segments.push(currentGroup.trim());
209
+ currentGroup = '';
210
+ currentTokens = 0;
211
+ }
212
+ // Add the large sentence as its own segment (will be split later)
213
+ segments.push(sentence);
214
+ continue;
215
215
  }
216
- // Add the large sentence as its own segment (will be split later)
217
- segments.push(sentence);
218
- continue;
219
- }
220
- const potentialGroup = currentGroup ? `${currentGroup} ${sentence}` : sentence;
221
- const potentialTokens = await countTokens(potentialGroup);
222
- if (potentialTokens <= config.chunkSize) {
223
- currentGroup = potentialGroup;
224
- currentTokens = potentialTokens;
225
- }
226
- else {
227
- // Save current group and start new one
228
- if (currentGroup.trim()) {
229
- segments.push(currentGroup.trim());
216
+ const potentialGroup = currentGroup ? `${currentGroup} ${sentence}` : sentence;
217
+ const potentialTokens = await countTokens(potentialGroup);
218
+ if (potentialTokens <= config.chunkSize) {
219
+ currentGroup = potentialGroup;
220
+ currentTokens = potentialTokens;
221
+ }
222
+ else {
223
+ // Save current group and start new one
224
+ if (currentGroup.trim()) {
225
+ segments.push(currentGroup.trim());
226
+ }
227
+ currentGroup = sentence;
228
+ currentTokens = sentenceTokens;
230
229
  }
231
- currentGroup = sentence;
232
- currentTokens = sentenceTokens;
233
230
  }
234
- }
235
- // Add final group if it has content
236
- if (currentGroup.trim()) {
237
- segments.push(currentGroup.trim());
231
+ // Add final group if it has content
232
+ if (currentGroup.trim()) {
233
+ segments.push(currentGroup.trim());
234
+ }
238
235
  }
239
236
  }
237
+ // Tier 3: Create final chunks with overlap handling
238
+ const chunkTexts = await createChunksFromSegments(segments, config);
239
+ // Convert to GenericChunk objects
240
+ const chunks = [];
241
+ for (let i = 0; i < chunkTexts.length; i++) {
242
+ const content = chunkTexts[i];
243
+ chunks.push({
244
+ content,
245
+ contentType: document.contentType,
246
+ chunkIndex: i,
247
+ metadata: {
248
+ tokenCount: await countTokens(content),
249
+ ...document.metadata
250
+ }
251
+ });
252
+ }
253
+ return chunks;
240
254
  }
241
- // Tier 3: Create final chunks with overlap handling
242
- const chunkTexts = await createChunksFromSegments(segments, config);
243
- // Convert to Chunk objects with metadata
244
- const chunks = [];
245
- for (let i = 0; i < chunkTexts.length; i++) {
246
- const text = chunkTexts[i];
247
- const tokenCount = await countTokens(text);
248
- chunks.push({
249
- text,
250
- chunkIndex: i,
251
- tokenCount
252
- });
253
- }
255
+ }
256
+ /**
257
+ * Text document chunking function
258
+ * Converts between text-specific and generic interfaces
259
+ */
260
+ export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
261
+ const strategy = new TextChunkingStrategy();
262
+ // Convert Document to GenericDocument
263
+ const genericDocument = {
264
+ source: document.source,
265
+ title: document.title,
266
+ content: document.content,
267
+ contentType: 'text'
268
+ };
269
+ // Use the strategy to chunk
270
+ const genericChunks = await strategy.chunk(genericDocument, config);
271
+ // Convert GenericChunk back to Chunk format
272
+ const chunks = genericChunks.map(chunk => ({
273
+ text: chunk.content,
274
+ chunkIndex: chunk.chunkIndex,
275
+ tokenCount: chunk.metadata?.tokenCount || 0
276
+ }));
254
277
  return chunks;
255
278
  }
256
279
  //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/text/chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,qBAAqB,CAAC;AAC7B,OAAO,EAKL,oBAAoB,EACrB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAiB7C;;;GAGG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,qDAAqD;IACrD,OAAO,IAAI;SACR,KAAK,CAAC,SAAS,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,+EAA+E;IAC/E,6CAA6C;IAC7C,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,eAAe,CAAC;SACtB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,wBAAwB,CAAC,IAAY,EAAE,SAAiB,EAAE,aAAqB;IAC5F,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEhC,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,YAAY,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAClE,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,SAAS,CAAC,CAAC;QAEhD,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;YAC5B,YAAY,GAAG,SAAS,CAAC;YACzB,aAAa,GAAG,UAAU,CAAC;YAC3B,CAAC,EAAE,CAAC;QACN,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,IAAI,YAAY,EAAE,CAAC;gBACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAE1B,gCAAgC;gBAChC,IAAI,aAAa,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC3C,MAAM,WAAW,GAAG,MAAM,sBAAsB,CAAC,YAAY,EAAE,aAAa,CAAC,CAAC;oBAC9E,YAAY,GAAG,WAAW,CAAC;oBAC3B,aAAa,GAAG,MAAM,WAAW,CAAC,YAAY,CAAC,CAAC;gBAClD,CAAC;qBAAM,CAAC;oBACN,YAAY,GAAG,EAAE,CAAC;oBAClB,aAAa,GAAG,CAAC,CAAC;gBACpB,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,2CAA2C;gBAC3C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAClB,CAAC,EAAE,CAAC;YACN,CAAC;QACH,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CAAC,IAAY,EAAE,aAAqB;IACvE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAChC,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,8BAA8B;IAC9B,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,UAAU,IAAI,aAAa,EAAE,CAAC;YAChC,WAAW,GAAG,QAAQ,CAAC;YACvB,MAAM,GAAG,UAAU,CAAC;QACtB,CAAC;aAAM,CAAC;YACN,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,wBAAwB,CACrC,QAAkB,EAClB,MAAmB;IAEnB,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,CAAC;QAEjD,wEAAwE;QACxE,IAAI,aAAa,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;YACrC,uCAAuC;YACvC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjC,YAAY,GAAG,EAAE,CAAC;gBAClB,aAAa,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,oEAAoE;YACpE,MAAM,SAAS,GAAG,MAAM,wBAAwB,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC;YACjG,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QAED,4DAA4D;QAC5D,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,YAAY,OAAO,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;QAChF,MAAM,eAAe,GAAG,MAAM,WAAW,CAAC,cAAc,CAAC,CAAC;QAE1D,IAAI,eAAe,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACxC,uBAAuB;YACvB,YAAY,GAAG,cAAc,CAAC;YAC9B,aAAa,GAAG,eAAe,CAAC;QAClC,CAAC;aAAM,CAAC;YACN,yCAAyC;YACzC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACnC,CAAC;YAED,2CAA2C;YAC3C,IAAI,MAAM,CAAC,YAAY,GAAG,CAAC,IAAI,YAAY,EAAE,CAAC;gBAC5C,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,YAAY,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC;gBAC/E,YAAY,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,OAAO,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;YACxE,CAAC;iBAAM,CAAC;gBACN,YAAY,GAAG,OAAO,CAAC;YACzB,CAAC;YACD,aAAa,GAAG,MAAM,WAAW,CAAC,YAAY,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,iBAAiB,CAAC,IAAY,EAAE,aAAqB;IAClE,2FAA2F;IAC3F,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,cAAc,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,CAAC;QAEnD,IAAI,MAAM,GAAG,cAAc,IAAI,aAAa,EAAE,CAAC;YAC7C,WAAW,GAAG,QAAQ,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAChE,MAAM,IAAI,cAAc,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,oBAAoB;IAC/B,SAAS,CAAC,WAAmB;QAC3B,OAAO,WAAW,KAAK,MAAM,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAyB,EAAE,MAAmB;QACxD,OAAO,CAAC,GAAG,CAAC,yBAAyB,QAAQ,CAAC,KAAK,4BAA4B,MAAM,CAAC,SAAS,kBAAkB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAExI,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9D,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,gCAAgC;QAChC,MAAM,UAAU,GAAG,mBAAmB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAEzD,qDAAqD;QACrD,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,eAAe,GAAG,MAAM,WAAW,CAAC,SAAS,CAAC,CAAC;YAErD,IAAI,eAAe,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;gBACxC,uCAAuC;gBACvC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,+CAA+C;gBAC/C,MAAM,SAAS,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC;gBAEhD,+CAA+C;gBAC/C,IAAI,YAAY,GAAG,EAAE,CAAC;gBACtB,IAAI,aAAa,GAAG,CAAC,CAAC;gBAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,MAAM,cAAc,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,CAAC;oBAEnD,mFAAmF;oBACnF,IAAI,cAAc,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;wBACtC,uCAAuC;wBACvC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;4BACxB,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;4BACnC,YAAY,GAAG,EAAE,CAAC;4BAClB,aAAa,GAAG,CAAC,CAAC;wBACpB,CAAC;wBACD,kEAAkE;wBAClE,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;wBACxB,SAAS;oBACX,CAAC;oBAED,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,YAAY,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;oBAC/E,MAAM,eAAe,GAAG,MAAM,WAAW,CAAC,cAAc,CAAC,CAAC;oBAE1D,IAAI,eAAe,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;wBACxC,YAAY,GAAG,cAAc,CAAC;wBAC9B,aAAa,GAAG,eAAe,CAAC;oBAClC,CAAC;yBAAM,CAAC;wBACN,uCAAuC;wBACvC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;4BACxB,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;wBACrC,CAAC;wBACD,YAAY,GAAG,QAAQ,CAAC;wBACxB,aAAa,GAAG,cAAc,CAAC;oBACjC,CAAC;gBACH,CAAC;gBAED,oCAAoC;gBACpC,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;oBACxB,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;QAED,oDAAoD;QACpD,MAAM,UAAU,GAAG,MAAM,wBAAwB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAEpE,kCAAkC;QAClC,MAAM,MAAM,GAAmB,EAAE,CAAC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAE9B,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO;gBACP,WAAW,EAAE,QAAQ,CAAC,WAAW;gBACjC,UAAU,EAAE,CAAC;gBACb,QAAQ,EAAE;oBACR,UAAU,EAAE,MAAM,WAAW,CAAC,OAAO,CAAC;oBACtC,GAAG,QAAQ,CAAC,QAAQ;iBACrB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAkB,EAClB,SAAsB,oBAAoB;IAE1C,MAAM,QAAQ,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAE5C,sCAAsC;IACtC,MAAM,eAAe,GAAoB;QACvC,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;QACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,WAAW,EAAE,MAAM;KACpB,CAAC;IAEF,4BAA4B;IAC5B,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC;IAEpE,4CAA4C;IAC5C,MAAM,MAAM,GAAY,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClD,IAAI,EAAE,KAAK,CAAC,OAAO;QACnB,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,UAAU,EAAE,KAAK,CAAC,QAAQ,EAAE,UAAU,IAAI,CAAC;KAC5C,CAAC,CAAC,CAAC;IAEJ,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1,4 +1,5 @@
1
- import type { EmbeddingResult } from './types.js';
1
+ import '../dom-polyfills.js';
2
+ import type { EmbeddingResult, EmbedFunction } from '../core/types.js';
2
3
  /**
3
4
  * Embedding engine using transformers.js for generating embeddings
4
5
  */
@@ -98,4 +99,24 @@ export declare function getEmbeddingEngine(modelName?: string, batchSize?: numbe
98
99
  * @returns Promise resolving to the loaded embedding engine
99
100
  */
100
101
  export declare function initializeEmbeddingEngine(modelName?: string, batchSize?: number): Promise<EmbeddingEngine>;
102
+ /**
103
+
104
+ * Create an EmbedFunction implementation using the text embedding engine
105
+ * This function implements the core EmbedFunction interface for dependency injection
106
+ * @param modelName - Optional model name override
107
+ * @param batchSize - Optional batch size override
108
+ * @returns EmbedFunction that can be injected into core components
109
+ */
110
+ export declare function createTextEmbedFunction(modelName?: string, batchSize?: number): EmbedFunction;
111
+ /**
112
+ * Create a text embedding engine factory function
113
+ * @param modelName - Optional model name override
114
+ * @param batchSize - Optional batch size override
115
+ * @returns Factory function that creates initialized embedding engines
116
+ */
117
+ export declare function createTextEmbedder(modelName?: string, batchSize?: number): {
118
+ embedSingle(text: string): Promise<EmbeddingResult>;
119
+ embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
120
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
121
+ };
101
122
  //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/text/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,qBAAqB,CAAC;AAI7B,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAUvE;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,YAAY,CAAuB;IAC3C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM;IAelD;;;OAGG;IACG,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAgEhC;;;;OAIG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAqB7D;;;;;OAKG;YACW,6BAA6B;IAmC3C;;OAEG;YACW,8BAA8B;IAgC5C;;;;;OAKG;YACW,kBAAkB;IAqBhC;;;;OAIG;IACG,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAQzD;;;;;OAKG;IACG,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IA+CtE;;;OAGG;IACH,eAAe,IAAI,MAAM;IAOzB;;;OAGG;IACH,QAAQ,IAAI,OAAO;IAInB;;;OAGG;IACH,YAAY,IAAI,MAAM;IAItB;;;OAGG;IACH,YAAY,IAAI,MAAM;IAItB;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IAa5B;;;;;OAKG;IACH,OAAO,CAAC,mBAAmB;CAM5B;AAQD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,eAAe,CAS1F;AAED;;;;;GAKG;AACH,wBAAsB,yBAAyB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAQhH;AACD;;;;;;;GAOG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAyB7F;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM;sBAE7C,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;sBAKjC,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;+BAK5B,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;EAKzE"}
@@ -1,7 +1,7 @@
1
- import { pipeline } from '@huggingface/transformers';
1
+ import '../dom-polyfills.js';
2
2
  import { createHash } from 'crypto';
3
- import { config } from './config.js';
4
- import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
3
+ import { config } from '../core/config.js';
4
+ import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
5
5
  /**
6
6
  * List of supported embedding models
7
7
  */
@@ -34,9 +34,25 @@ export class EmbeddingEngine {
34
34
  async loadModel() {
35
35
  await safeExecute(async () => {
36
36
  console.log(`Loading embedding model: ${this.modelName}`);
37
- // Initialize the feature extraction pipeline
37
+ // Ensure DOM polyfills are set up before importing transformers
38
+ if (typeof globalThis.self === 'undefined') {
39
+ globalThis.self = globalThis;
40
+ }
41
+ if (typeof global.self === 'undefined') {
42
+ global.self = global;
43
+ }
44
+ // Additional polyfills that might be needed
45
+ if (typeof globalThis.window === 'undefined') {
46
+ globalThis.window = {};
47
+ }
48
+ if (typeof globalThis.document === 'undefined') {
49
+ globalThis.document = {};
50
+ }
51
+ console.log('Embedder polyfills set up. self is now:', typeof self !== 'undefined' ? 'defined' : 'undefined');
52
+ // Initialize the feature extraction pipeline using dynamic import
38
53
  // Let transformers.js handle model caching automatically
39
54
  try {
55
+ const { pipeline } = await import('@huggingface/transformers');
40
56
  this.model = await pipeline('feature-extraction', this.modelName, {
41
57
  cache_dir: config.model_cache_path,
42
58
  local_files_only: false,
@@ -320,4 +336,55 @@ export async function initializeEmbeddingEngine(modelName, batchSize) {
320
336
  }
321
337
  return engine;
322
338
  }
339
+ /**
340
+
341
+ * Create an EmbedFunction implementation using the text embedding engine
342
+ * This function implements the core EmbedFunction interface for dependency injection
343
+ * @param modelName - Optional model name override
344
+ * @param batchSize - Optional batch size override
345
+ * @returns EmbedFunction that can be injected into core components
346
+ */
347
+ export function createTextEmbedFunction(modelName, batchSize) {
348
+ let engine = null;
349
+ const embedFunction = async (query, contentType) => {
350
+ // Only support text content type
351
+ if (contentType && contentType !== 'text') {
352
+ throw new Error(`Text embedder only supports 'text' content type, got: ${contentType}`);
353
+ }
354
+ // Initialize engine if not already done
355
+ if (!engine) {
356
+ engine = await initializeEmbeddingEngine(modelName, batchSize);
357
+ }
358
+ // Use the existing embedSingle method
359
+ const result = await engine.embedSingle(query);
360
+ // Add contentType to the result
361
+ return {
362
+ ...result,
363
+ contentType: 'text'
364
+ };
365
+ };
366
+ return embedFunction;
367
+ }
368
+ /**
369
+ * Create a text embedding engine factory function
370
+ * @param modelName - Optional model name override
371
+ * @param batchSize - Optional batch size override
372
+ * @returns Factory function that creates initialized embedding engines
373
+ */
374
+ export function createTextEmbedder(modelName, batchSize) {
375
+ return {
376
+ async embedSingle(text) {
377
+ const engine = await initializeEmbeddingEngine(modelName, batchSize);
378
+ return engine.embedSingle(text);
379
+ },
380
+ async embedBatch(texts) {
381
+ const engine = await initializeEmbeddingEngine(modelName, batchSize);
382
+ return engine.embedBatch(texts);
383
+ },
384
+ async embedDocumentBatch(chunks) {
385
+ const engine = await initializeEmbeddingEngine(modelName, batchSize);
386
+ return engine.embedDocumentBatch(chunks);
387
+ }
388
+ };
389
+ }
323
390
  //# sourceMappingURL=embedder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/text/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,qBAAqB,CAAC;AAC7B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,EAAe,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAG/G;;GAEG;AACH,MAAM,gBAAgB,GAAG;IACvB,wCAAwC;IACxC,0BAA0B;CAC3B,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,eAAe;IAClB,KAAK,GAAe,IAAI,CAAC;IACzB,YAAY,GAAkB,IAAI,CAAC;IAC1B,SAAS,CAAS;IAClB,SAAS,CAAS;IAEnC,YAAY,SAAkB,EAAE,SAAkB;QAChD,IAAI,CAAC,SAAS,GAAG,SAAS,IAAI,MAAM,CAAC,eAAe,CAAC;QACrD,IAAI,CAAC,SAAS,GAAG,SAAS,IAAI,MAAM,CAAC,UAAU,CAAC;QAEhD,uCAAuC;QACvC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CACb,sBAAsB,IAAI,CAAC,SAAS,IAAI;gBACxC,qBAAqB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACnD,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,8CAA8C,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;IAC5G,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,SAAS;QACb,MAAM,WAAW,CACf,KAAK,IAAI,EAAE;YACT,OAAO,CAAC,GAAG,CAAC,4BAA4B,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAE1D,gEAAgE;YAChE,IAAI,OAAQ,UAAkB,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACnD,UAAkB,CAAC,IAAI,GAAG,UAAU,CAAC;YACxC,CAAC;YACD,IAAI,OAAQ,MAAc,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC/C,MAAc,CAAC,IAAI,GAAG,MAAM,CAAC;YAChC,CAAC;YAED,4CAA4C;YAC5C,IAAI,OAAQ,UAAkB,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACrD,UAAkB,CAAC,MAAM,GAAG,EAAE,CAAC;YAClC,CAAC;YACD,IAAI,OAAQ,UAAkB,CAAC,QAAQ,KAAK,WAAW,EAAE,CAAC;gBACvD,UAAkB,CAAC,QAAQ,GAAG,EAAE,CAAC;YACpC,CAAC;YAED,OAAO,CAAC,GAAG,CAAC,yCAAyC,EAAE,OAAO,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;YAE9G,kEAAkE;YAClE,yDAAyD;YACzD,IAAI,CAAC;gBACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;gBAC/D,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,EAAE;oBAChE,SAAS,EAAE,MAAM,CAAC,gBAAgB;oBAClC,gBAAgB,EAAE,KAAK;oBACvB,KAAK,EAAE,MAAM,CAAC,+CAA+C;iBAC9D,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,sDAAsD;gBACtD,IAAI,KAAK,YAAY,KAAK,IAAI,CAC5B,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBACjC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAClC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBAC/B,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;oBACnC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;oBACtC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAClC,EAAE,CAAC;oBACF,MAAM,IAAI,KAAK,CACb,6BAA6B,IAAI,CAAC,SAAS,KAAK;wBAChD,wFAAwF,CACzF,CAAC;gBACJ,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YAED,8BAA8B;YAC9B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC;YAEhD,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;QAC1E,CAAC,EACD,eAAe,EACf;YACE,QAAQ,EAAE,aAAa,CAAC,KAAK;YAC7B,QAAQ,EAAE,aAAa,CAAC,KAAK;YAC7B,QAAQ,EAAE,CAAC;SACZ,CACF,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,4DAA4D;QAC5D,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;YACjD,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,6BAA6B,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACxE,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;;;OAKG;IACK,KAAK,CAAC,6BAA6B,CAAC,KAAe,EAAE,UAAkB;QAC7E,OAAO,MAAM,WAAW,CACtB,KAAK,IAAI,EAAE;YACT,wCAAwC;YACxC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAM,CAAC,KAAK,EAAE;gBAC1C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,oCAAoC;YACpC,MAAM,OAAO,GAAsB,EAAE,CAAC;YACtC,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;YAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,YAAY,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;gBACxE,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;gBAElD,OAAO,CAAC,IAAI,CAAC;oBACX,YAAY;oBACZ,MAAM;iBACP,CAAC,CAAC;YACL,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC,EACD,oBAAoB,KAAK,CAAC,MAAM,UAAU,EAC1C;YACE,QAAQ,EAAE,aAAa,CAAC,SAAS;YACjC,QAAQ,EAAE,aAAa,CAAC,KAAK;YAC7B,SAAS,EAAE,IAAI;YACf,aAAa,EAAE,EAAE;SAClB,CACF,IAAI,MAAM,IAAI,CAAC,8BAA8B,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IACpE,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,8BAA8B,CAAC,KAAe,EAAE,UAAkB;QAC9E,WAAW,CACT,+BAA+B,KAAK,CAAC,MAAM,gDAAgD,EAC3F,4BAA4B,EAC5B;YACE,QAAQ,EAAE,aAAa,CAAC,SAAS;YACjC,QAAQ,EAAE,aAAa,CAAC,OAAO;YAC/B,SAAS,EAAE,IAAI;SAChB,CACF,CAAC;QAEF,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,YAAY,GAAG,MAAM,WAAW,CACpC,GAAG,EAAE,CAAC,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,EACvD,+BAA+B,UAAU,GAAG,CAAC,GAAG,EAChD;gBACE,QAAQ,EAAE,aAAa,CAAC,SAAS;gBACjC,QAAQ,EAAE,aAAa,CAAC,OAAO;gBAC/B,SAAS,EAAE,IAAI;aAChB,CACF,CAAC;YAEF,IAAI,YAAY,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;;;OAKG;IACK,KAAK,CAAC,kBAAkB,CAAC,IAAY,EAAE,KAAa;QAC1D,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAM,CAAC,CAAC,IAAI,CAAC,EAAE;gBAC3C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC3D,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;YAElD,OAAO;gBACL,YAAY;gBACZ,MAAM;aACP,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,kCAAkC;YAClC,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,WAAW,CAAC,IAAY;QAC5B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;QACD,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,kBAAkB,CAAC,MAAgB;QACvC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,MAAM,SAAS,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,kBAAkB,IAAI,CAAC,SAAS,KAAK,CAAC,CAAC;QAErH,MAAM,OAAO,GAAsB,EAAE,CAAC;QACtC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QAC/D,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,YAAY,EAAE,UAAU,EAAE,EAAE,CAAC;YACjE,MAAM,QAAQ,GAAG,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC;YAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAClE,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAE7C,IAAI,CAAC;gBACH,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,6BAA6B,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;gBAC/E,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;gBAC9B,eAAe,IAAI,YAAY,CAAC,MAAM,CAAC;gBACvC,aAAa,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;gBAEtD,sEAAsE;gBACtE,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,yBAAyB;gBAC9F,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,gBAAgB,KAAK,CAAC,IAAI,UAAU,KAAK,YAAY,GAAG,CAAC,EAAE,CAAC;oBACjF,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC;oBACvE,OAAO,CAAC,GAAG,CAAC,aAAa,eAAe,OAAO,MAAM,CAAC,MAAM,YAAY,UAAU,KAAK,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,aAAa,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACnJ,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CAAC,2BAA2B,UAAU,GAAG,CAAC,IAAI,YAAY,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACpI,aAAa,IAAI,KAAK,CAAC,MAAM,CAAC;YAChC,CAAC;QACH,CAAC;QAED,IAAI,aAAa,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,yBAAyB,eAAe,gBAAgB,aAAa,wBAAwB,CAAC,CAAC;QAC7G,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,yBAAyB,eAAe,gCAAgC,CAAC,CAAC;QACxF,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,eAAe;QACb,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;;OAGG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC;IAC7B,CAAC;IAED;;;OAGG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;;OAGG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;;;OAIG;IACK,oBAAoB;QAC1B,oEAAoE;QACpE,6EAA6E;QAC7E,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC;YAChC,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,oDAAoD;YACpD,SAAS,EAAE,KAAK;YAChB,QAAQ,EAAE,MAAM;SACjB,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACpF,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;IACvD,CAAC;IAED;;;;;OAKG;IACK,mBAAmB,CAAC,IAAY,EAAE,KAAa;QACrD,qDAAqD;QACrD,oFAAoF;QACpF,MAAM,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3E,OAAO,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACtC,CAAC;CACF;AAED;;;GAGG;AACH,IAAI,uBAAuB,GAA2B,IAAI,CAAC;AAE3D;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,SAAkB,EAAE,SAAkB;IACvE,mEAAmE;IACnE,sEAAsE;IACtE,IAAI,SAAS,IAAI,SAAS,EAAE,CAAC;QAC3B,uBAAuB,GAAG,IAAI,eAAe,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IACtE,CAAC;SAAM,IAAI,CAAC,uBAAuB,EAAE,CAAC;QACpC,uBAAuB,GAAG,IAAI,eAAe,EAAE,CAAC;IAClD,CAAC;IACD,OAAO,uBAAuB,CAAC;AACjC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,SAAkB,EAAE,SAAkB;IACpF,MAAM,MAAM,GAAG,kBAAkB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IAExD,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;QACvB,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;IAC3B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AACD;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,SAAkB,EAAE,SAAkB;IAC5E,IAAI,MAAM,GAA2B,IAAI,CAAC;IAE1C,MAAM,aAAa,GAAkB,KAAK,EAAE,KAAa,EAAE,WAAoB,EAA4B,EAAE;QAC3G,iCAAiC;QACjC,IAAI,WAAW,IAAI,WAAW,KAAK,MAAM,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,yDAAyD,WAAW,EAAE,CAAC,CAAC;QAC1F,CAAC;QAED,wCAAwC;QACxC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,MAAM,yBAAyB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QACjE,CAAC;QAED,sCAAsC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAE/C,gCAAgC;QAChC,OAAO;YACL,GAAG,MAAM;YACT,WAAW,EAAE,MAAM;SACpB,CAAC;IACJ,CAAC,CAAC;IAEF,OAAO,aAAa,CAAC;AACvB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,SAAkB,EAAE,SAAkB;IACvE,OAAO;QACL,KAAK,CAAC,WAAW,CAAC,IAAY;YAC5B,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YACrE,OAAO,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,CAAC,UAAU,CAAC,KAAe;YAC9B,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YACrE,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,CAAC,kBAAkB,CAAC,MAAgB;YACvC,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YACrE,OAAO,MAAM,CAAC,kBAAkB,CAAC,MAAM,CAAC,CAAC;QAC3C,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,7 @@
1
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './embedder.js';
2
+ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
3
+ export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
4
+ export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
5
+ export { type ChunkConfig } from '../core/chunker.js';
6
+ export * from './preprocessors/index.js';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/text/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,yBAAyB,EACzB,uBAAuB,EACvB,kBAAkB,EACnB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,oBAAoB,EACpB,wBAAwB,EACxB,kBAAkB,EACnB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC3E,OAAO,EAAE,aAAa,EAAE,KAAK,KAAK,EAAE,KAAK,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9E,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGtD,cAAc,0BAA0B,CAAC"}
@@ -0,0 +1,8 @@
1
+ // Text implementation layer exports
2
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './embedder.js';
3
+ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
4
+ export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
5
+ export { chunkDocument } from '../core/chunker.js';
6
+ // Re-export preprocessors
7
+ export * from './preprocessors/index.js';
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/text/index.ts"],"names":[],"mappings":"AAAA,oCAAoC;AACpC,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,yBAAyB,EACzB,uBAAuB,EACvB,kBAAkB,EACnB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,oBAAoB,EACpB,wBAAwB,EACxB,kBAAkB,EACnB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC3E,OAAO,EAAE,aAAa,EAA6B,MAAM,oBAAoB,CAAC;AAG9E,0BAA0B;AAC1B,cAAc,0BAA0B,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { PreprocessorRegistry } from './registry.js';
2
+ export { PreprocessorRegistry, ContentTypeDetector } from './registry.js';
3
+ export { MdxPreprocessor } from './mdx.js';
4
+ export { MermaidPreprocessor } from './mermaid.js';
5
+ /**
6
+ * Global preprocessor registry instance
7
+ */
8
+ export declare const preprocessorRegistry: PreprocessorRegistry;
9
+ /**
10
+ * Validate that all required preprocessors are available in the registry
11
+ */
12
+ export declare function validatePreprocessorConfiguration(requiredPreprocessors: string[]): void;
13
+ /**
14
+ * Get all available preprocessor names
15
+ */
16
+ export declare function getAvailablePreprocessors(): string[];
17
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/text/preprocessors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAKrD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAC1E,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAenD;;GAEG;AACH,eAAO,MAAM,oBAAoB,sBAA+B,CAAC;AAEjE;;GAEG;AACH,wBAAgB,iCAAiC,CAAC,qBAAqB,EAAE,MAAM,EAAE,GAAG,IAAI,CAOvF;AAED;;GAEG;AACH,wBAAgB,yBAAyB,IAAI,MAAM,EAAE,CAEpD"}
@@ -0,0 +1,38 @@
1
+ import { PreprocessorRegistry } from './registry.js';
2
+ import { MdxPreprocessor } from './mdx.js';
3
+ import { MermaidPreprocessor } from './mermaid.js';
4
+ // Export all preprocessor classes
5
+ export { PreprocessorRegistry, ContentTypeDetector } from './registry.js';
6
+ export { MdxPreprocessor } from './mdx.js';
7
+ export { MermaidPreprocessor } from './mermaid.js';
8
+ /**
9
+ * Create and initialize the global preprocessor registry
10
+ */
11
+ function createPreprocessorRegistry() {
12
+ const registry = new PreprocessorRegistry();
13
+ // Register built-in preprocessors
14
+ registry.register('mdx', new MdxPreprocessor());
15
+ registry.register('mermaid', new MermaidPreprocessor());
16
+ return registry;
17
+ }
18
+ /**
19
+ * Global preprocessor registry instance
20
+ */
21
+ export const preprocessorRegistry = createPreprocessorRegistry();
22
+ /**
23
+ * Validate that all required preprocessors are available in the registry
24
+ */
25
+ export function validatePreprocessorConfiguration(requiredPreprocessors) {
26
+ const validation = preprocessorRegistry.validatePreprocessors(requiredPreprocessors);
27
+ if (!validation.valid) {
28
+ const missingList = validation.missing.join(', ');
29
+ throw new Error(`Missing required preprocessors: ${missingList}. Available: ${preprocessorRegistry.getRegisteredNames().join(', ')}`);
30
+ }
31
+ }
32
+ /**
33
+ * Get all available preprocessor names
34
+ */
35
+ export function getAvailablePreprocessors() {
36
+ return preprocessorRegistry.getRegisteredNames();
37
+ }
38
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/text/preprocessors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEnD,kCAAkC;AAClC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAC1E,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEnD;;GAEG;AACH,SAAS,0BAA0B;IACjC,MAAM,QAAQ,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAE5C,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,eAAe,EAAE,CAAC,CAAC;IAChD,QAAQ,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,mBAAmB,EAAE,CAAC,CAAC;IAExD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG,0BAA0B,EAAE,CAAC;AAEjE;;GAEG;AACH,MAAM,UAAU,iCAAiC,CAAC,qBAA+B;IAC/E,MAAM,UAAU,GAAG,oBAAoB,CAAC,qBAAqB,CAAC,qBAAqB,CAAC,CAAC;IAErF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACtB,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,IAAI,KAAK,CAAC,mCAAmC,WAAW,gBAAgB,oBAAoB,CAAC,kBAAkB,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxI,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB;IACvC,OAAO,oBAAoB,CAAC,kBAAkB,EAAE,CAAC;AACnD,CAAC"}
@@ -0,0 +1,25 @@
1
+ import { Preprocessor, PreprocessorOptions } from '../../types.js';
2
+ /**
3
+ * MDX preprocessor for handling JSX content in Markdown files
4
+ * Ports the existing cleanMdxContent logic with mode-aware behavior
5
+ */
6
+ export declare class MdxPreprocessor implements Preprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to .mdx files and content with JSX syntax
10
+ */
11
+ appliesTo(language: string): boolean;
12
+ /**
13
+ * Process MDX content based on the specified mode
14
+ */
15
+ process(content: string, options: PreprocessorOptions): string;
16
+ /**
17
+ * Strip JSX content entirely - ported from cleanMdxContent logic
18
+ */
19
+ private stripJsx;
20
+ /**
21
+ * Replace JSX with descriptive placeholders
22
+ */
23
+ private replaceWithPlaceholders;
24
+ }
25
+ //# sourceMappingURL=mdx.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mdx.d.ts","sourceRoot":"","sources":["../../../src/text/preprocessors/mdx.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAGnE;;;GAGG;AACH,qBAAa,eAAgB,YAAW,YAAY;IAClD;;;OAGG;IACH,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAIpC;;OAEG;IACH,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,mBAAmB,GAAG,MAAM;IAmB9D;;OAEG;IACH,OAAO,CAAC,QAAQ;IAyChB;;OAEG;IACH,OAAO,CAAC,uBAAuB;CAiChC"}
@@ -0,0 +1,101 @@
1
+ import { ContentTypeDetector } from './registry.js';
2
+ /**
3
+ * MDX preprocessor for handling JSX content in Markdown files
4
+ * Ports the existing cleanMdxContent logic with mode-aware behavior
5
+ */
6
+ export class MdxPreprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to .mdx files and content with JSX syntax
10
+ */
11
+ appliesTo(language) {
12
+ return language === 'mdx';
13
+ }
14
+ /**
15
+ * Process MDX content based on the specified mode
16
+ */
17
+ process(content, options) {
18
+ // Only process if content actually contains JSX
19
+ if (!ContentTypeDetector.hasJsxContent(content)) {
20
+ return content;
21
+ }
22
+ switch (options.mode) {
23
+ case 'strip':
24
+ return this.stripJsx(content);
25
+ case 'keep':
26
+ return content; // Keep JSX as-is
27
+ case 'placeholder':
28
+ return this.replaceWithPlaceholders(content);
29
+ default:
30
+ console.log(`Unknown MDX processing mode: ${options.mode}, using placeholder`);
31
+ return this.replaceWithPlaceholders(content);
32
+ }
33
+ }
34
+ /**
35
+ * Strip JSX content entirely - ported from cleanMdxContent logic
36
+ */
37
+ stripJsx(content) {
38
+ let cleaned = content;
39
+ // Remove JSX import statements (requirement 11.1)
40
+ // Matches: import ... from '...' or import ... from "..."
41
+ cleaned = cleaned.replace(/^import\s+.*?from\s+['"][^'"]*['"];?\s*$/gm, '');
42
+ // Remove JSX export statements (requirement 11.2)
43
+ // Handle both single-line and multi-line exports
44
+ // Multi-line function exports: export default function() { ... }
45
+ cleaned = cleaned.replace(/^export\s+default\s+function[^{]*\{[\s\S]*?\n\}\s*$/gm, '');
46
+ // Object exports: export const metadata = { ... }
47
+ cleaned = cleaned.replace(/^export\s+const\s+[^=]*=\s*\{[\s\S]*?\}\s*;?\s*$/gm, '');
48
+ // Single line exports: export const x = ...; or export default ...
49
+ cleaned = cleaned.replace(/^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+[^;{]*;?\s*$/gm, '');
50
+ // Simple exports: export default Component
51
+ cleaned = cleaned.replace(/^export\s+default\s+[^;{]*;?\s*$/gm, '');
52
+ // Remove JSX components (requirements 11.3, 11.4)
53
+ // Self-closing tags: <Component />
54
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*\/>/g, '');
55
+ // Opening and closing tags with content: <Component>content</Component>
56
+ // This handles nested components by replacing the outermost ones first
57
+ let previousLength;
58
+ do {
59
+ previousLength = cleaned.length;
60
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>.*?<\/[A-Z][a-zA-Z0-9]*>/gs, '');
61
+ } while (cleaned.length !== previousLength);
62
+ // Clean up multiple consecutive newlines and trim
63
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
64
+ // Ensure we never return empty content (requirement 6.4)
65
+ if (!cleaned.trim()) {
66
+ return '[content removed]';
67
+ }
68
+ return cleaned;
69
+ }
70
+ /**
71
+ * Replace JSX with descriptive placeholders
72
+ */
73
+ replaceWithPlaceholders(content) {
74
+ let cleaned = content;
75
+ // Replace JSX import statements
76
+ cleaned = cleaned.replace(/^import\s+.*?from\s+['"][^'"]*['"];?\s*$/gm, '[import removed]');
77
+ // Replace JSX export statements
78
+ // Multi-line function exports: export default function() { ... }
79
+ cleaned = cleaned.replace(/^export\s+default\s+function[^{]*\{[\s\S]*?\n\}\s*$/gm, '[export removed]');
80
+ // Object exports: export const metadata = { ... }
81
+ cleaned = cleaned.replace(/^export\s+const\s+[^=]*=\s*\{[\s\S]*?\}\s*;?\s*$/gm, '[export removed]');
82
+ // Single line exports: export const x = ...; or export default ...
83
+ cleaned = cleaned.replace(/^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+[^;{]*;?\s*$/gm, '[export removed]');
84
+ // Simple exports: export default Component
85
+ cleaned = cleaned.replace(/^export\s+default\s+[^;{]*;?\s*$/gm, '[export removed]');
86
+ // Replace JSX components with placeholder
87
+ // Self-closing tags: <Component />
88
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*\/>/g, '[component removed]');
89
+ // Opening and closing tags with content: <Component>content</Component>
90
+ // This handles nested components by replacing the outermost ones first
91
+ let previousLength;
92
+ do {
93
+ previousLength = cleaned.length;
94
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>.*?<\/[A-Z][a-zA-Z0-9]*>/gs, '[component removed]');
95
+ } while (cleaned.length !== previousLength);
96
+ // Clean up multiple consecutive newlines and trim
97
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
98
+ return cleaned;
99
+ }
100
+ }
101
+ //# sourceMappingURL=mdx.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mdx.js","sourceRoot":"","sources":["../../../src/text/preprocessors/mdx.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAEpD;;;GAGG;AACH,MAAM,OAAO,eAAe;IAC1B;;;OAGG;IACH,SAAS,CAAC,QAAgB;QACxB,OAAO,QAAQ,KAAK,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,OAAe,EAAE,OAA4B;QACnD,gDAAgD;QAChD,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC;YAChD,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;YACrB,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAChC,KAAK,MAAM;gBACT,OAAO,OAAO,CAAC,CAAC,iBAAiB;YACnC,KAAK,aAAa;gBAChB,OAAO,IAAI,CAAC,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC/C;gBACE,OAAO,CAAC,GAAG,CAAC,gCAAgC,OAAO,CAAC,IAAI,qBAAqB,CAAC,CAAC;gBAC/E,OAAO,IAAI,CAAC,uBAAuB,CAAC,OAAO,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,QAAQ,CAAC,OAAe;QAC9B,IAAI,OAAO,GAAG,OAAO,CAAC;QAEtB,kDAAkD;QAClD,0DAA0D;QAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,4CAA4C,EAAE,EAAE,CAAC,CAAC;QAE5E,oDAAoD;QACpD,iDAAiD;QACjD,iEAAiE;QACjE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC,CAAC;QACvF,kDAAkD;QAClD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oDAAoD,EAAE,EAAE,CAAC,CAAC;QACpF,mEAAmE;QACnE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,4EAA4E,EAAE,EAAE,CAAC,CAAC;QAC5G,2CAA2C;QAC3C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oCAAoC,EAAE,EAAE,CAAC,CAAC;QAEpE,kDAAkD;QAClD,mCAAmC;QACnC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC,CAAC;QAE7D,wEAAwE;QACxE,uEAAuE;QACvE,IAAI,cAAc,CAAC;QACnB,GAAG,CAAC;YACF,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;YAChC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oDAAoD,EAAE,EAAE,CAAC,CAAC;QACtF,CAAC,QAAQ,OAAO,CAAC,MAAM,KAAK,cAAc,EAAE;QAE5C,kDAAkD;QAClD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAE1D,yDAAyD;QACzD,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACpB,OAAO,mBAAmB,CAAC;QAC7B,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,OAAe;QAC7C,IAAI,OAAO,GAAG,OAAO,CAAC;QAEtB,gCAAgC;QAChC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,4CAA4C,EAAE,kBAAkB,CAAC,CAAC;QAE5F,kCAAkC;QAClC,iEAAiE;QACjE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,uDAAuD,EAAE,kBAAkB,CAAC,CAAC;QACvG,kDAAkD;QAClD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oDAAoD,EAAE,kBAAkB,CAAC,CAAC;QACpG,mEAAmE;QACnE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,4EAA4E,EAAE,kBAAkB,CAAC,CAAC;QAC5H,2CAA2C;QAC3C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oCAAoC,EAAE,kBAAkB,CAAC,CAAC;QAEpF,0CAA0C;QAC1C,mCAAmC;QACnC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,6BAA6B,EAAE,qBAAqB,CAAC,CAAC;QAEhF,wEAAwE;QACxE,uEAAuE;QACvE,IAAI,cAAc,CAAC;QACnB,GAAG,CAAC;YACF,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;YAChC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oDAAoD,EAAE,qBAAqB,CAAC,CAAC;QACzG,CAAC,QAAQ,OAAO,CAAC,MAAM,KAAK,cAAc,EAAE;QAE5C,kDAAkD;QAClD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAE1D,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}