rag-lite-ts 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +94 -65
  2. package/dist/cli/indexer.d.ts.map +1 -1
  3. package/dist/cli/indexer.js +78 -50
  4. package/dist/cli/indexer.js.map +1 -1
  5. package/dist/cli/search.d.ts.map +1 -1
  6. package/dist/cli/search.js +13 -30
  7. package/dist/cli/search.js.map +1 -1
  8. package/dist/cli.js +2 -2
  9. package/dist/cli.js.map +1 -1
  10. package/dist/config.d.ts +34 -73
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +50 -255
  13. package/dist/config.js.map +1 -1
  14. package/dist/core/adapters.d.ts +93 -0
  15. package/dist/core/adapters.d.ts.map +1 -0
  16. package/dist/core/adapters.js +139 -0
  17. package/dist/core/adapters.js.map +1 -0
  18. package/dist/core/chunker.d.ts +117 -0
  19. package/dist/core/chunker.d.ts.map +1 -0
  20. package/dist/core/chunker.js +73 -0
  21. package/dist/core/chunker.js.map +1 -0
  22. package/dist/core/config.d.ts +102 -0
  23. package/dist/core/config.d.ts.map +1 -0
  24. package/dist/core/config.js +240 -0
  25. package/dist/core/config.js.map +1 -0
  26. package/dist/{db.d.ts → core/db.d.ts} +25 -9
  27. package/dist/core/db.d.ts.map +1 -0
  28. package/dist/{db.js → core/db.js} +86 -16
  29. package/dist/core/db.js.map +1 -0
  30. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  31. package/dist/core/error-handler.d.ts.map +1 -0
  32. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  33. package/dist/core/error-handler.js.map +1 -0
  34. package/dist/core/index.d.ts +57 -0
  35. package/dist/core/index.d.ts.map +1 -0
  36. package/dist/core/index.js +66 -0
  37. package/dist/core/index.js.map +1 -0
  38. package/dist/core/ingestion.d.ts +143 -0
  39. package/dist/core/ingestion.d.ts.map +1 -0
  40. package/dist/core/ingestion.js +347 -0
  41. package/dist/core/ingestion.js.map +1 -0
  42. package/dist/core/interfaces.d.ts +408 -0
  43. package/dist/core/interfaces.d.ts.map +1 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/interfaces.js.map +1 -0
  46. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  47. package/dist/core/path-manager.d.ts.map +1 -0
  48. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  49. package/dist/core/path-manager.js.map +1 -0
  50. package/dist/core/search-example.d.ts +25 -0
  51. package/dist/core/search-example.d.ts.map +1 -0
  52. package/dist/core/search-example.js +138 -0
  53. package/dist/core/search-example.js.map +1 -0
  54. package/dist/core/search-pipeline-example.d.ts +21 -0
  55. package/dist/core/search-pipeline-example.d.ts.map +1 -0
  56. package/dist/core/search-pipeline-example.js +188 -0
  57. package/dist/core/search-pipeline-example.js.map +1 -0
  58. package/dist/core/search-pipeline.d.ts +111 -0
  59. package/dist/core/search-pipeline.d.ts.map +1 -0
  60. package/dist/core/search-pipeline.js +287 -0
  61. package/dist/core/search-pipeline.js.map +1 -0
  62. package/dist/core/search.d.ts +104 -0
  63. package/dist/core/search.d.ts.map +1 -0
  64. package/dist/core/search.js +218 -0
  65. package/dist/core/search.js.map +1 -0
  66. package/dist/core/types.d.ts +63 -0
  67. package/dist/core/types.d.ts.map +1 -0
  68. package/dist/core/types.js +6 -0
  69. package/dist/core/types.js.map +1 -0
  70. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  71. package/dist/core/vector-index.d.ts.map +1 -0
  72. package/dist/{vector-index.js → core/vector-index.js} +19 -0
  73. package/dist/core/vector-index.js.map +1 -0
  74. package/dist/dom-polyfills.d.ts +6 -0
  75. package/dist/dom-polyfills.d.ts.map +1 -0
  76. package/dist/dom-polyfills.js +40 -0
  77. package/dist/dom-polyfills.js.map +1 -0
  78. package/dist/examples/clean-api-examples.d.ts +44 -0
  79. package/dist/examples/clean-api-examples.d.ts.map +1 -0
  80. package/dist/examples/clean-api-examples.js +206 -0
  81. package/dist/examples/clean-api-examples.js.map +1 -0
  82. package/dist/factories/index.d.ts +43 -0
  83. package/dist/factories/index.d.ts.map +1 -0
  84. package/dist/factories/index.js +44 -0
  85. package/dist/factories/index.js.map +1 -0
  86. package/dist/factories/text-factory.d.ts +466 -0
  87. package/dist/factories/text-factory.d.ts.map +1 -0
  88. package/dist/factories/text-factory.js +719 -0
  89. package/dist/factories/text-factory.js.map +1 -0
  90. package/dist/file-processor.d.ts +2 -2
  91. package/dist/file-processor.d.ts.map +1 -1
  92. package/dist/file-processor.js +3 -3
  93. package/dist/file-processor.js.map +1 -1
  94. package/dist/index-manager.d.ts +3 -2
  95. package/dist/index-manager.d.ts.map +1 -1
  96. package/dist/index-manager.js +13 -11
  97. package/dist/index-manager.js.map +1 -1
  98. package/dist/index.d.ts +63 -8
  99. package/dist/index.d.ts.map +1 -1
  100. package/dist/index.js +91 -16
  101. package/dist/index.js.map +1 -1
  102. package/dist/indexer.js +1 -1
  103. package/dist/indexer.js.map +1 -1
  104. package/dist/ingestion.d.ts +30 -156
  105. package/dist/ingestion.d.ts.map +1 -1
  106. package/dist/ingestion.js +58 -675
  107. package/dist/ingestion.js.map +1 -1
  108. package/dist/mcp-server.js +86 -55
  109. package/dist/mcp-server.js.map +1 -1
  110. package/dist/preprocess.js +1 -1
  111. package/dist/preprocess.js.map +1 -1
  112. package/dist/search-standalone.js +1 -1
  113. package/dist/search-standalone.js.map +1 -1
  114. package/dist/search.d.ts +32 -76
  115. package/dist/search.d.ts.map +1 -1
  116. package/dist/search.js +80 -428
  117. package/dist/search.js.map +1 -1
  118. package/dist/text/chunker.d.ts +32 -0
  119. package/dist/text/chunker.d.ts.map +1 -0
  120. package/dist/{chunker.js → text/chunker.js} +98 -75
  121. package/dist/text/chunker.js.map +1 -0
  122. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  123. package/dist/text/embedder.d.ts.map +1 -0
  124. package/dist/{embedder.js → text/embedder.js} +71 -4
  125. package/dist/text/embedder.js.map +1 -0
  126. package/dist/text/index.d.ts +7 -0
  127. package/dist/text/index.d.ts.map +1 -0
  128. package/dist/text/index.js +8 -0
  129. package/dist/text/index.js.map +1 -0
  130. package/dist/text/preprocessors/index.d.ts +17 -0
  131. package/dist/text/preprocessors/index.d.ts.map +1 -0
  132. package/dist/text/preprocessors/index.js +38 -0
  133. package/dist/text/preprocessors/index.js.map +1 -0
  134. package/dist/text/preprocessors/mdx.d.ts +25 -0
  135. package/dist/text/preprocessors/mdx.d.ts.map +1 -0
  136. package/dist/text/preprocessors/mdx.js +101 -0
  137. package/dist/text/preprocessors/mdx.js.map +1 -0
  138. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  139. package/dist/text/preprocessors/mermaid.d.ts.map +1 -0
  140. package/dist/text/preprocessors/mermaid.js +330 -0
  141. package/dist/text/preprocessors/mermaid.js.map +1 -0
  142. package/dist/text/preprocessors/registry.d.ts +56 -0
  143. package/dist/text/preprocessors/registry.d.ts.map +1 -0
  144. package/dist/text/preprocessors/registry.js +180 -0
  145. package/dist/text/preprocessors/registry.js.map +1 -0
  146. package/dist/text/reranker.d.ts +60 -0
  147. package/dist/text/reranker.d.ts.map +1 -0
  148. package/dist/{reranker.js → text/reranker.js} +134 -19
  149. package/dist/text/reranker.js.map +1 -0
  150. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  151. package/dist/text/tokenizer.d.ts.map +1 -0
  152. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  153. package/dist/text/tokenizer.js.map +1 -0
  154. package/dist/types.d.ts +1 -1
  155. package/dist/types.d.ts.map +1 -1
  156. package/package.json +2 -2
  157. package/dist/chunker.d.ts +0 -47
  158. package/dist/chunker.d.ts.map +0 -1
  159. package/dist/chunker.js.map +0 -1
  160. package/dist/db.d.ts.map +0 -1
  161. package/dist/db.js.map +0 -1
  162. package/dist/embedder.d.ts.map +0 -1
  163. package/dist/embedder.js.map +0 -1
  164. package/dist/error-handler.d.ts.map +0 -1
  165. package/dist/error-handler.js.map +0 -1
  166. package/dist/path-manager.d.ts.map +0 -1
  167. package/dist/path-manager.js.map +0 -1
  168. package/dist/reranker.d.ts +0 -40
  169. package/dist/reranker.d.ts.map +0 -1
  170. package/dist/reranker.js.map +0 -1
  171. package/dist/resource-manager-demo.d.ts +0 -7
  172. package/dist/resource-manager-demo.d.ts.map +0 -1
  173. package/dist/resource-manager-demo.js +0 -52
  174. package/dist/resource-manager-demo.js.map +0 -1
  175. package/dist/resource-manager.d.ts +0 -129
  176. package/dist/resource-manager.d.ts.map +0 -1
  177. package/dist/resource-manager.js +0 -389
  178. package/dist/resource-manager.js.map +0 -1
  179. package/dist/tokenizer.d.ts.map +0 -1
  180. package/dist/tokenizer.js.map +0 -1
  181. package/dist/vector-index.d.ts.map +0 -1
  182. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,93 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * This module provides adapter utilities to convert existing implementations to the new
6
+ * dependency injection interfaces. These adapters enable:
7
+ *
8
+ * 1. Integration with existing embedding and reranking implementations
9
+ * 2. Support for dependency injection patterns
10
+ * 3. Wrapping of third-party libraries to match core interfaces
11
+ * 4. Testing with mock implementations
12
+ *
13
+ * ADAPTER PATTERN USAGE:
14
+ * ```typescript
15
+ * // Convert embedder to dependency injection
16
+ * const embedder = new TextEmbeddingEngine();
17
+ * const embedFn = EmbeddingAdapter.fromEmbedder(embedder);
18
+ * const search = new SearchEngine(embedFn, indexManager, db);
19
+ *
20
+ * // Convert reranker to dependency injection
21
+ * const reranker = new CrossEncoderReranker();
22
+ * const rerankFn = RerankingAdapter.fromReranker(reranker);
23
+ * const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
24
+ *
25
+ * // Create full interfaces for advanced usage
26
+ * const embeddingInterface = EmbeddingAdapter.createInterface(
27
+ * embedder,
28
+ * ['text'],
29
+ * 384,
30
+ * 'all-MiniLM-L6-v2'
31
+ * );
32
+ * ```
33
+ */
34
+ import type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface } from './interfaces.js';
35
+ /**
36
+ * Adapter to convert embedding engines to core EmbedFunction
37
+ * Enables integration while supporting dependency injection
38
+ *
39
+ * USAGE EXAMPLES:
40
+ * ```typescript
41
+ * // Basic adapter usage
42
+ * const embedder = new TextEmbeddingEngine();
43
+ * const embedFn = EmbeddingAdapter.fromEmbedder(embedder);
44
+ *
45
+ * // Use in SearchEngine
46
+ * const search = new SearchEngine(embedFn, indexManager, db);
47
+ *
48
+ * // Create full interface for advanced features
49
+ * const embeddingInterface = EmbeddingAdapter.createInterface(
50
+ * embedder,
51
+ * ['text', 'code'], // Supported content types
52
+ * 384, // Embedding dimensions
53
+ * 'all-MiniLM-L6-v2' // Model identifier
54
+ * );
55
+ *
56
+ * // Use interface for validation and metadata
57
+ * if (embeddingInterface.supportedContentTypes.includes('text')) {
58
+ * const result = await embeddingInterface.embedQuery('test query');
59
+ * }
60
+ * ```
61
+ */
62
+ export declare class EmbeddingAdapter {
63
+ /**
64
+ * Convert an embedding engine to an EmbedFunction
65
+ * @param embedder - Embedder with embedSingle method
66
+ * @returns EmbedFunction compatible with core dependency injection
67
+ */
68
+ static fromEmbedder(embedder: any): EmbedFunction;
69
+ /**
70
+ * Create an EmbeddingQueryInterface from an embedder
71
+ * @param embedder - Embedder with embedSingle method
72
+ * @param supportedContentTypes - Content types this embedder supports
73
+ * @param embeddingDimensions - Dimensions of embedding vectors
74
+ * @param modelIdentifier - Model name or identifier
75
+ * @returns Full EmbeddingQueryInterface with metadata
76
+ */
77
+ static createInterface(embedder: any, supportedContentTypes?: string[], embeddingDimensions?: number, modelIdentifier?: string): EmbeddingQueryInterface;
78
+ }
79
+ /**
80
+ * Adapter to convert rerankers to core RerankFunction
81
+ * Enables integration while supporting dependency injection
82
+ */
83
+ export declare class RerankingAdapter {
84
+ /**
85
+ * Convert a reranker to a RerankFunction
86
+ */
87
+ static fromReranker(reranker: any): RerankFunction;
88
+ /**
89
+ * Create a RerankingInterface from a reranker
90
+ */
91
+ static createInterface(reranker: any, supportedContentTypes?: string[], modelIdentifier?: string): RerankingInterface;
92
+ }
93
+ //# sourceMappingURL=adapters.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/core/adapters.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,KAAK,EACV,aAAa,EACb,cAAc,EACd,uBAAuB,EACvB,kBAAkB,EAEnB,MAAM,iBAAiB,CAAC;AAGzB;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,gBAAgB;IAC3B;;;;OAIG;IACH,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,GAAG,GAAG,aAAa;IAOjD;;;;;;;OAOG;IACH,MAAM,CAAC,eAAe,CACpB,QAAQ,EAAE,GAAG,EACb,qBAAqB,GAAE,MAAM,EAAa,EAC1C,mBAAmB,GAAE,MAAY,EACjC,eAAe,GAAE,MAAkB,GAClC,uBAAuB;CAQ3B;AAED;;;GAGG;AACH,qBAAa,gBAAgB;IAC3B;;OAEG;IACH,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,GAAG,GAAG,cAAc;IAoClD;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAQ,EAAE,GAAG,EACb,qBAAqB,GAAE,MAAM,EAAa,EAC1C,eAAe,GAAE,MAAkB,GAClC,kBAAkB;CAQtB"}
@@ -0,0 +1,139 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * This module provides adapter utilities to convert existing implementations to the new
6
+ * dependency injection interfaces. These adapters enable:
7
+ *
8
+ * 1. Integration with existing embedding and reranking implementations
9
+ * 2. Support for dependency injection patterns
10
+ * 3. Wrapping of third-party libraries to match core interfaces
11
+ * 4. Testing with mock implementations
12
+ *
13
+ * ADAPTER PATTERN USAGE:
14
+ * ```typescript
15
+ * // Convert embedder to dependency injection
16
+ * const embedder = new TextEmbeddingEngine();
17
+ * const embedFn = EmbeddingAdapter.fromEmbedder(embedder);
18
+ * const search = new SearchEngine(embedFn, indexManager, db);
19
+ *
20
+ * // Convert reranker to dependency injection
21
+ * const reranker = new CrossEncoderReranker();
22
+ * const rerankFn = RerankingAdapter.fromReranker(reranker);
23
+ * const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
24
+ *
25
+ * // Create full interfaces for advanced usage
26
+ * const embeddingInterface = EmbeddingAdapter.createInterface(
27
+ * embedder,
28
+ * ['text'],
29
+ * 384,
30
+ * 'all-MiniLM-L6-v2'
31
+ * );
32
+ * ```
33
+ */
34
+ /**
35
+ * Adapter to convert embedding engines to core EmbedFunction
36
+ * Enables integration while supporting dependency injection
37
+ *
38
+ * USAGE EXAMPLES:
39
+ * ```typescript
40
+ * // Basic adapter usage
41
+ * const embedder = new TextEmbeddingEngine();
42
+ * const embedFn = EmbeddingAdapter.fromEmbedder(embedder);
43
+ *
44
+ * // Use in SearchEngine
45
+ * const search = new SearchEngine(embedFn, indexManager, db);
46
+ *
47
+ * // Create full interface for advanced features
48
+ * const embeddingInterface = EmbeddingAdapter.createInterface(
49
+ * embedder,
50
+ * ['text', 'code'], // Supported content types
51
+ * 384, // Embedding dimensions
52
+ * 'all-MiniLM-L6-v2' // Model identifier
53
+ * );
54
+ *
55
+ * // Use interface for validation and metadata
56
+ * if (embeddingInterface.supportedContentTypes.includes('text')) {
57
+ * const result = await embeddingInterface.embedQuery('test query');
58
+ * }
59
+ * ```
60
+ */
61
+ export class EmbeddingAdapter {
62
+ /**
63
+ * Convert an embedding engine to an EmbedFunction
64
+ * @param embedder - Embedder with embedSingle method
65
+ * @returns EmbedFunction compatible with core dependency injection
66
+ */
67
+ static fromEmbedder(embedder) {
68
+ return async (query, contentType) => {
69
+ // Call the embedSingle method
70
+ return await embedder.embedSingle(query);
71
+ };
72
+ }
73
+ /**
74
+ * Create an EmbeddingQueryInterface from an embedder
75
+ * @param embedder - Embedder with embedSingle method
76
+ * @param supportedContentTypes - Content types this embedder supports
77
+ * @param embeddingDimensions - Dimensions of embedding vectors
78
+ * @param modelIdentifier - Model name or identifier
79
+ * @returns Full EmbeddingQueryInterface with metadata
80
+ */
81
+ static createInterface(embedder, supportedContentTypes = ['text'], embeddingDimensions = 384, modelIdentifier = 'unknown') {
82
+ return {
83
+ embedQuery: this.fromEmbedder(embedder),
84
+ supportedContentTypes,
85
+ embeddingDimensions,
86
+ modelIdentifier
87
+ };
88
+ }
89
+ }
90
+ /**
91
+ * Adapter to convert rerankers to core RerankFunction
92
+ * Enables integration while supporting dependency injection
93
+ */
94
+ export class RerankingAdapter {
95
+ /**
96
+ * Convert a reranker to a RerankFunction
97
+ */
98
+ static fromReranker(reranker) {
99
+ return async (query, results, contentType) => {
100
+ // Convert core SearchResult format to reranker format
101
+ const rerankResults = results.map(result => ({
102
+ text: result.content,
103
+ score: result.score,
104
+ document: {
105
+ id: result.document.id,
106
+ source: result.document.source,
107
+ title: result.document.title
108
+ }
109
+ }));
110
+ // Call rerank method
111
+ const reranked = await reranker.rerank(query, rerankResults);
112
+ // Convert back to core SearchResult format
113
+ return reranked.map((result, index) => ({
114
+ content: result.text,
115
+ score: result.score,
116
+ contentType: results[index]?.contentType || 'text',
117
+ document: {
118
+ id: result.document.id,
119
+ source: result.document.source,
120
+ title: result.document.title,
121
+ contentType: results[index]?.document.contentType || 'text'
122
+ },
123
+ metadata: results[index]?.metadata
124
+ }));
125
+ };
126
+ }
127
+ /**
128
+ * Create a RerankingInterface from a reranker
129
+ */
130
+ static createInterface(reranker, supportedContentTypes = ['text'], modelIdentifier = 'unknown') {
131
+ return {
132
+ rerankResults: this.fromReranker(reranker),
133
+ supportedContentTypes,
134
+ isEnabled: reranker && reranker.isLoaded && reranker.isLoaded(),
135
+ modelIdentifier
136
+ };
137
+ }
138
+ }
139
+ //# sourceMappingURL=adapters.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapters.js","sourceRoot":"","sources":["../../src/core/adapters.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAWH;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,OAAO,gBAAgB;IAC3B;;;;OAIG;IACH,MAAM,CAAC,YAAY,CAAC,QAAa;QAC/B,OAAO,KAAK,EAAE,KAAa,EAAE,WAAoB,EAA4B,EAAE;YAC7E,8BAA8B;YAC9B,OAAO,MAAM,QAAQ,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAC3C,CAAC,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,MAAM,CAAC,eAAe,CACpB,QAAa,EACb,wBAAkC,CAAC,MAAM,CAAC,EAC1C,sBAA8B,GAAG,EACjC,kBAA0B,SAAS;QAEnC,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;YACvC,qBAAqB;YACrB,mBAAmB;YACnB,eAAe;SAChB,CAAC;IACJ,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,OAAO,gBAAgB;IAC3B;;OAEG;IACH,MAAM,CAAC,YAAY,CAAC,QAAa;QAC/B,OAAO,KAAK,EACV,KAAa,EACb,OAAuB,EACvB,WAAoB,EACK,EAAE;YAC3B,sDAAsD;YACtD,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC3C,IAAI,EAAE,MAAM,CAAC,OAAO;gBACpB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE;oBACtB,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;oBAC9B,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,KAAK;iBAC7B;aACF,CAAC,CAAC,CAAC;YAEJ,qBAAqB;YACrB,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;YAE7D,2CAA2C;YAC3C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,MAAW,EAAE,KAAa,EAAE,EAAE,CAAC,CAAC;gBACnD,OAAO,EAAE,MAAM,CAAC,IAAI;gBACpB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,WAAW,EAAE,OAAO,CAAC,KAAK,CAAC,EAAE,WAAW,IAAI,MAAM;gBAClD,QAAQ,EAAE;oBACR,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE;oBACtB,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;oBAC9B,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,KAAK;oBAC5B,WAAW,EAAE,OAAO,CAAC,KAAK,CAAC,EAAE,QAAQ,CAAC,WAAW,IAAI,MAAM;iBAC5D;gBACD,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,EAAE,QAAQ;aACnC,CAAC,CAAC,CAAC;QACN,CAAC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAa,EACb,wBAAkC,CAAC,MAAM,CAAC,EAC1C,kBAA0B,SAAS;QAEnC,OAAO;YACL,aAAa,EAAE,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;YAC1C,qBAAqB;YACrB,SAAS,EAAE,QAAQ,IAAI,QAAQ,CAAC,QAAQ,IAAI,QAAQ,CAAC,QAAQ,EAAE;YAC/D,eAAe;SAChB,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,117 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Configuration for chunking behavior
7
+ */
8
+ export interface ChunkConfig {
9
+ /** Target chunk size in tokens (200-300 recommended) */
10
+ chunkSize: number;
11
+ /** Overlap between chunks in tokens (50 recommended) */
12
+ chunkOverlap: number;
13
+ }
14
+ /**
15
+ * Generic document interface that can represent different content types
16
+ */
17
+ export interface GenericDocument {
18
+ /** Source path or identifier */
19
+ source: string;
20
+ /** Document title */
21
+ title: string;
22
+ /** Content (text, image path, etc.) */
23
+ content: string;
24
+ /** Content type identifier (text, image, etc.) */
25
+ contentType: string;
26
+ /** Optional metadata for the document */
27
+ metadata?: Record<string, any>;
28
+ }
29
+ /**
30
+ * Generic chunk interface that can represent different content types
31
+ */
32
+ export interface GenericChunk {
33
+ /** The content of the chunk (text, image path, etc.) */
34
+ content: string;
35
+ /** Content type identifier (text, image, etc.) */
36
+ contentType: string;
37
+ /** Index of this chunk within the document */
38
+ chunkIndex: number;
39
+ /** Optional metadata for the chunk */
40
+ metadata?: Record<string, any>;
41
+ }
42
+ /**
43
+ * Strategy interface for chunking different content types
44
+ */
45
+ export interface ChunkingStrategy {
46
+ /**
47
+ * Check if this strategy applies to the given content type
48
+ */
49
+ appliesTo(contentType: string): boolean;
50
+ /**
51
+ * Chunk a document using this strategy
52
+ */
53
+ chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
54
+ }
55
+ /**
56
+ * Registry for chunking strategies
57
+ */
58
+ export declare class ChunkingStrategyRegistry {
59
+ private strategies;
60
+ /**
61
+ * Register a chunking strategy
62
+ */
63
+ register(strategy: ChunkingStrategy): void;
64
+ /**
65
+ * Find the appropriate strategy for a content type
66
+ */
67
+ findStrategy(contentType: string): ChunkingStrategy | undefined;
68
+ /**
69
+ * Get all registered strategies
70
+ */
71
+ getStrategies(): ChunkingStrategy[];
72
+ }
73
+ /**
74
+ * Default chunking configuration
75
+ */
76
+ export declare const DEFAULT_CHUNK_CONFIG: ChunkConfig;
77
+ /**
78
+ * Global chunking strategy registry
79
+ */
80
+ export declare const chunkingRegistry: ChunkingStrategyRegistry;
81
+ /**
82
+ * Generic chunking function that uses registered strategies
83
+ */
84
+ export declare function chunkGenericDocument(document: GenericDocument, config?: ChunkConfig): Promise<GenericChunk[]>;
85
+ /**
86
+ * Document interface for text chunking
87
+ */
88
+ export interface Document {
89
+ /** Source path or identifier */
90
+ source: string;
91
+ /** Document title */
92
+ title: string;
93
+ /** Full text content */
94
+ content: string;
95
+ }
96
+ /**
97
+ * Chunk interface for text chunking results
98
+ */
99
+ export interface Chunk {
100
+ /** The text content of the chunk */
101
+ text: string;
102
+ /** Index of this chunk within the document */
103
+ chunkIndex: number;
104
+ /** Number of tokens in this chunk */
105
+ tokenCount: number;
106
+ }
107
+ /**
108
+ * Text document chunking function
109
+ * Uses the text chunking strategy from the text implementation layer
110
+ */
111
+ export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
112
+ /**
113
+ * Register the text chunking strategy with the global registry
114
+ * This should be called during application initialization
115
+ */
116
+ export declare function registerTextChunkingStrategy(): Promise<void>;
117
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/core/chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,gCAAgC;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,qBAAqB;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB,yCAAyC;IACzC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;IAChB,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,SAAS,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IAExC;;OAEG;IACH,KAAK,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CAChF;AAED;;GAEG;AACH,qBAAa,wBAAwB;IACnC,OAAO,CAAC,UAAU,CAA0B;IAE5C;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAI1C;;OAEG;IACH,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,gBAAgB,GAAG,SAAS;IAI/D;;OAEG;IACH,aAAa,IAAI,gBAAgB,EAAE;CAGpC;AAED;;GAEG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAGlC,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,gBAAgB,0BAAiC,CAAC;AAE/D;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,QAAQ,EAAE,eAAe,EACzB,MAAM,GAAE,WAAkC,GACzC,OAAO,CAAC,YAAY,EAAE,CAAC,CAQzB;AAOD;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,gCAAgC;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,qBAAqB;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,wBAAwB;IACxB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,QAAQ,EAClB,MAAM,GAAE,WAAkC,GACzC,OAAO,CAAC,KAAK,EAAE,CAAC,CAIlB;AACD;;;GAGG;AACH,wBAAsB,4BAA4B,IAAI,OAAO,CAAC,IAAI,CAAC,CAIlE"}
@@ -0,0 +1,73 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Registry for chunking strategies
7
+ */
8
+ export class ChunkingStrategyRegistry {
9
+ strategies = [];
10
+ /**
11
+ * Register a chunking strategy
12
+ */
13
+ register(strategy) {
14
+ this.strategies.push(strategy);
15
+ }
16
+ /**
17
+ * Find the appropriate strategy for a content type
18
+ */
19
+ findStrategy(contentType) {
20
+ return this.strategies.find(strategy => strategy.appliesTo(contentType));
21
+ }
22
+ /**
23
+ * Get all registered strategies
24
+ */
25
+ getStrategies() {
26
+ return [...this.strategies];
27
+ }
28
+ }
29
+ /**
30
+ * Default chunking configuration
31
+ */
32
+ export const DEFAULT_CHUNK_CONFIG = {
33
+ chunkSize: 250, // Target 200-300 tokens
34
+ chunkOverlap: 50
35
+ };
36
+ /**
37
+ * Global chunking strategy registry
38
+ */
39
+ export const chunkingRegistry = new ChunkingStrategyRegistry();
40
+ /**
41
+ * Generic chunking function that uses registered strategies
42
+ */
43
+ export async function chunkGenericDocument(document, config = DEFAULT_CHUNK_CONFIG) {
44
+ const strategy = chunkingRegistry.findStrategy(document.contentType);
45
+ if (!strategy) {
46
+ throw new Error(`No chunking strategy found for content type: ${document.contentType}`);
47
+ }
48
+ return strategy.chunk(document, config);
49
+ }
50
+ /**
51
+ * Text document chunking function
52
+ * Uses the text chunking strategy from the text implementation layer
53
+ */
54
+ export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
55
+ // Import the text chunker implementation dynamically to avoid circular dependencies
56
+ const { chunkDocument: textChunkDocument } = await import('../text/chunker.js');
57
+ return textChunkDocument(document, config);
58
+ }
59
+ /**
60
+ * Register the text chunking strategy with the global registry
61
+ * This should be called during application initialization
62
+ */
63
+ export async function registerTextChunkingStrategy() {
64
+ const { TextChunkingStrategy } = await import('../text/chunker.js');
65
+ const textStrategy = new TextChunkingStrategy();
66
+ chunkingRegistry.register(textStrategy);
67
+ }
68
+ // Auto-register the text strategy when this module is loaded
69
+ // This ensures text chunking works out of the box
70
+ registerTextChunkingStrategy().catch(error => {
71
+ console.warn('Failed to register text chunking strategy:', error);
72
+ });
73
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/core/chunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAyDH;;GAEG;AACH,MAAM,OAAO,wBAAwB;IAC3B,UAAU,GAAuB,EAAE,CAAC;IAE5C;;OAEG;IACH,QAAQ,CAAC,QAA0B;QACjC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,WAAmB;QAC9B,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;IAC9B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAgB;IAC/C,SAAS,EAAE,GAAG,EAAE,wBAAwB;IACxC,YAAY,EAAE,EAAE;CACjB,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,IAAI,wBAAwB,EAAE,CAAC;AAE/D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,QAAyB,EACzB,SAAsB,oBAAoB;IAE1C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAErE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,gDAAgD,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED,OAAO,QAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC1C,CAAC;AA+BD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAkB,EAClB,SAAsB,oBAAoB;IAE1C,oFAAoF;IACpF,MAAM,EAAE,aAAa,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAChF,OAAO,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC7C,CAAC;AACD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,4BAA4B;IAChD,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IACpE,MAAM,YAAY,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAChD,gBAAgB,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;AAC1C,CAAC;AAED,6DAA6D;AAC7D,kDAAkD;AAClD,4BAA4B,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE;IAC3C,OAAO,CAAC,IAAI,CAAC,4CAA4C,EAAE,KAAK,CAAC,CAAC;AACpE,CAAC,CAAC,CAAC"}
@@ -0,0 +1,102 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Core configuration interface - model-agnostic settings
7
+ */
8
+ export interface CoreConfig {
9
+ chunk_size: number;
10
+ chunk_overlap: number;
11
+ batch_size: number;
12
+ top_k: number;
13
+ db_file: string;
14
+ index_file: string;
15
+ model_cache_path?: string;
16
+ path_storage_strategy: 'absolute' | 'relative';
17
+ embedding_model: string;
18
+ rerank_enabled: boolean;
19
+ preprocessing?: any;
20
+ }
21
+ /**
22
+ * Extensible configuration interface that can be extended by implementations
23
+ */
24
+ export interface ExtensibleConfig<T = {}> extends CoreConfig {
25
+ implementation: T;
26
+ }
27
+ /**
28
+ * Standard exit codes for different error conditions
29
+ */
30
+ export declare const EXIT_CODES: {
31
+ readonly SUCCESS: 0;
32
+ readonly GENERAL_ERROR: 1;
33
+ readonly INVALID_ARGUMENTS: 2;
34
+ readonly CONFIGURATION_ERROR: 3;
35
+ readonly FILE_NOT_FOUND: 4;
36
+ readonly DATABASE_ERROR: 5;
37
+ readonly MODEL_ERROR: 6;
38
+ readonly INDEX_ERROR: 7;
39
+ readonly PERMISSION_ERROR: 8;
40
+ };
41
+ /**
42
+ * Configuration validation error with specific exit code
43
+ */
44
+ export declare class ConfigurationError extends Error {
45
+ exitCode: number;
46
+ constructor(message: string, exitCode?: number);
47
+ }
48
+ /**
49
+ * Get the default model cache path as specified in the requirements
50
+ * @returns Default cache path (~/.raglite/models/)
51
+ */
52
+ export declare function getDefaultModelCachePath(): string;
53
+ /**
54
+ * Validates core configuration fields
55
+ * @param config - Configuration object to validate
56
+ * @throws {ConfigurationError} If configuration is invalid
57
+ */
58
+ export declare function validateCoreConfig(config: any): asserts config is CoreConfig;
59
+ /**
60
+ * Model defaults interface for different embedding models
61
+ */
62
+ export interface ModelDefaults {
63
+ dimensions: number;
64
+ chunk_size: number;
65
+ chunk_overlap: number;
66
+ batch_size: number;
67
+ }
68
+ /**
69
+ * Get default configuration for different embedding models
70
+ * @param modelName - Name of the embedding model
71
+ * @returns Model-specific defaults
72
+ */
73
+ export declare function getModelDefaults(modelName?: string): ModelDefaults;
74
+ /**
75
+ * Default core configuration object
76
+ * Model-agnostic settings that can be used by core modules
77
+ */
78
+ export declare const config: CoreConfig;
79
+ /**
80
+ * Validate preprocessing configuration
81
+ */
82
+ export declare function validatePreprocessingConfig(config: any): asserts config is any;
83
+ /**
84
+ * Merge preprocessing configurations with mode defaults
85
+ */
86
+ export declare function mergePreprocessingConfig(config: any): any;
87
+ /**
88
+ * Utility function to handle unrecoverable errors with descriptive messages
89
+ * Logs error and exits immediately with appropriate exit code
90
+ * @param error - Error object or message
91
+ * @param context - Context where the error occurred
92
+ * @param exitCode - Exit code to use (defaults to GENERAL_ERROR)
93
+ */
94
+ export declare function handleUnrecoverableError(error: Error | string, context: string, exitCode?: number): never;
95
+ /**
96
+ * Utility function for safe error logging with context
97
+ * @param error - Error to log
98
+ * @param context - Context where error occurred
99
+ * @param skipError - Whether to skip this error and continue (default: false)
100
+ */
101
+ export declare function logError(error: Error | string, context: string, skipError?: boolean): void;
102
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/core/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,qBAAqB,EAAE,UAAU,GAAG,UAAU,CAAC;IAC/C,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,CAAC,EAAE,GAAG,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB,CAAC,CAAC,GAAG,EAAE,CAAE,SAAQ,UAAU;IAC1D,cAAc,EAAE,CAAC,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;CAUb,CAAC;AAEX;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IACP,QAAQ,EAAE,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAS,QAAQ,GAAE,MAAuC;CAItF;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,IAAI,MAAM,CAEjD;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,IAAI,UAAU,CA+D5E;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CA6BlE;AAED;;;GAGG;AACH,eAAO,MAAM,MAAM,EAAE,UAYpB,CAAC;AAEF;;GAEG;AACH,wBAAgB,2BAA2B,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,IAAI,GAAG,CAgC9E;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,GAAG,GAAG,GAAG,CAazD;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,KAAK,GAAG,MAAM,EACrB,OAAO,EAAE,MAAM,EACf,QAAQ,GAAE,MAAiC,GAC1C,KAAK,CAgCP;AAED;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,GAAE,OAAe,GAAG,IAAI,CAQjG"}