rag-lite-ts 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/vector-index.js +4 -2
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/indexer.js +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/ingestion.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.js +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{core → cjs/core}/vector-index.d.ts +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +0 -0
- /package/dist/{index-manager.js → cjs/index-manager.js} +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Standalone search script for direct node execution
|
|
4
|
+
* Usage: node search.js <query> [--top-k <number>] [--rerank|--no-rerank]
|
|
5
|
+
*/
|
|
6
|
+
import { runSearch } from './cli/search.js';
|
|
7
|
+
import { EXIT_CODES, ConfigurationError } from './core/config.js';
|
|
8
|
+
function parseArgs() {
|
|
9
|
+
const args = process.argv.slice(2);
|
|
10
|
+
if (args.length === 0) {
|
|
11
|
+
console.error('RAG-lite TS Document Search');
|
|
12
|
+
console.error('');
|
|
13
|
+
console.error('Usage: node search.js <query> [options]');
|
|
14
|
+
console.error('');
|
|
15
|
+
console.error('Arguments:');
|
|
16
|
+
console.error(' <query> Search query (wrap in quotes if it contains spaces)');
|
|
17
|
+
console.error('');
|
|
18
|
+
console.error('Options:');
|
|
19
|
+
console.error(' --top-k <number> Number of results to return (default: 10, max: 100)');
|
|
20
|
+
console.error(' --rerank Enable reranking for better results');
|
|
21
|
+
console.error(' --no-rerank Disable reranking');
|
|
22
|
+
console.error('');
|
|
23
|
+
console.error('Examples:');
|
|
24
|
+
console.error(' node search.js "machine learning"');
|
|
25
|
+
console.error(' node search.js "API documentation" --top-k 10');
|
|
26
|
+
console.error(' node search.js "tutorial" --rerank');
|
|
27
|
+
console.error(' node search.js "how to install" --top-k 20 --rerank');
|
|
28
|
+
console.error('');
|
|
29
|
+
console.error('Note: Make sure you have ingested documents first using:');
|
|
30
|
+
console.error(' node indexer.js <path>');
|
|
31
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
32
|
+
}
|
|
33
|
+
const queryParts = [];
|
|
34
|
+
const options = {};
|
|
35
|
+
for (let i = 0; i < args.length; i++) {
|
|
36
|
+
const arg = args[i];
|
|
37
|
+
if (arg === '--top-k') {
|
|
38
|
+
const nextArg = args[i + 1];
|
|
39
|
+
if (!nextArg) {
|
|
40
|
+
console.error('Error: --top-k requires a numeric value');
|
|
41
|
+
console.error('');
|
|
42
|
+
console.error('Examples:');
|
|
43
|
+
console.error(' --top-k 5 # Return 5 results');
|
|
44
|
+
console.error(' --top-k 20 # Return 20 results');
|
|
45
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
46
|
+
}
|
|
47
|
+
const topK = parseInt(nextArg, 10);
|
|
48
|
+
if (isNaN(topK) || topK <= 0) {
|
|
49
|
+
console.error('Error: --top-k must be a positive number');
|
|
50
|
+
console.error('');
|
|
51
|
+
console.error('Examples:');
|
|
52
|
+
console.error(' --top-k 5 # Return 5 results');
|
|
53
|
+
console.error(' --top-k 20 # Return 20 results');
|
|
54
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
55
|
+
}
|
|
56
|
+
if (topK > 100) {
|
|
57
|
+
console.warn(`Warning: Large --top-k value (${topK}) may impact performance. Consider using a smaller value.`);
|
|
58
|
+
}
|
|
59
|
+
options['top-k'] = topK;
|
|
60
|
+
i++; // Skip next argument
|
|
61
|
+
}
|
|
62
|
+
else if (arg === '--rerank') {
|
|
63
|
+
options.rerank = true;
|
|
64
|
+
}
|
|
65
|
+
else if (arg === '--no-rerank') {
|
|
66
|
+
options.rerank = false;
|
|
67
|
+
}
|
|
68
|
+
else if (arg === '--help' || arg === '-h') {
|
|
69
|
+
// Re-show help and exit
|
|
70
|
+
parseArgs();
|
|
71
|
+
}
|
|
72
|
+
else if (arg.startsWith('--')) {
|
|
73
|
+
console.error(`Error: Unknown option '${arg}'`);
|
|
74
|
+
console.error('');
|
|
75
|
+
console.error('Available options:');
|
|
76
|
+
console.error(' --top-k <number> Number of results to return');
|
|
77
|
+
console.error(' --rerank Enable reranking');
|
|
78
|
+
console.error(' --no-rerank Disable reranking');
|
|
79
|
+
console.error(' --help, -h Show this help');
|
|
80
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
queryParts.push(arg);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const query = queryParts.join(' ');
|
|
87
|
+
if (!query.trim()) {
|
|
88
|
+
console.error('Error: Query cannot be empty');
|
|
89
|
+
console.error('');
|
|
90
|
+
console.error('Please provide a search query:');
|
|
91
|
+
console.error(' node search.js "your search terms"');
|
|
92
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
93
|
+
}
|
|
94
|
+
if (query.trim().length > 500) {
|
|
95
|
+
console.error('Error: Query is too long (maximum 500 characters)');
|
|
96
|
+
console.error('');
|
|
97
|
+
console.error('Please use a shorter, more specific query.');
|
|
98
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
99
|
+
}
|
|
100
|
+
return { query, options };
|
|
101
|
+
}
|
|
102
|
+
async function main() {
|
|
103
|
+
const { query, options } = parseArgs();
|
|
104
|
+
await runSearch(query, options);
|
|
105
|
+
}
|
|
106
|
+
main().catch((error) => {
|
|
107
|
+
if (error instanceof ConfigurationError) {
|
|
108
|
+
console.error('Configuration Error:');
|
|
109
|
+
console.error(error.message);
|
|
110
|
+
process.exit(error.exitCode);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
console.error('Fatal Error:', error instanceof Error ? error.message : String(error));
|
|
114
|
+
process.exit(EXIT_CODES.GENERAL_ERROR);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
//# sourceMappingURL=search-standalone.js.map
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public API SearchEngine - Simple constructor with Chameleon Architecture
|
|
3
|
+
*
|
|
4
|
+
* This class provides a clean, simple API that automatically adapts to the mode
|
|
5
|
+
* (text or multimodal) stored in the database during ingestion. The system detects
|
|
6
|
+
* the mode and creates the appropriate embedder and reranker without user intervention.
|
|
7
|
+
*
|
|
8
|
+
* Chameleon Architecture Features:
|
|
9
|
+
* - Automatic mode detection from database configuration
|
|
10
|
+
* - Seamless switching between text and multimodal modes
|
|
11
|
+
* - Appropriate embedder selection (sentence-transformer or CLIP)
|
|
12
|
+
* - Mode-specific reranking strategies
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Simple usage - mode automatically detected from database
|
|
17
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
18
|
+
* const results = await search.search('query');
|
|
19
|
+
*
|
|
20
|
+
* // Works for both text and multimodal databases
|
|
21
|
+
* // Text mode: uses sentence-transformer embeddings
|
|
22
|
+
* // Multimodal mode: uses CLIP embeddings for cross-modal search
|
|
23
|
+
*
|
|
24
|
+
* // With options (advanced)
|
|
25
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite', {
|
|
26
|
+
* enableReranking: true
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
import type { SearchResult, SearchOptions, EmbedFunction, RerankFunction } from './core/types.js';
|
|
31
|
+
export interface SearchEngineOptions {
|
|
32
|
+
/** Embedding model name override */
|
|
33
|
+
embeddingModel?: string;
|
|
34
|
+
/** Embedding batch size override */
|
|
35
|
+
batchSize?: number;
|
|
36
|
+
/** Reranking model name override */
|
|
37
|
+
rerankingModel?: string;
|
|
38
|
+
/** Whether to enable reranking (default: true) */
|
|
39
|
+
enableReranking?: boolean;
|
|
40
|
+
/** Top-k results to return (default: from config) */
|
|
41
|
+
topK?: number;
|
|
42
|
+
/** Custom embedding function (advanced usage) */
|
|
43
|
+
embedFn?: EmbedFunction;
|
|
44
|
+
/** Custom reranking function (advanced usage) */
|
|
45
|
+
rerankFn?: RerankFunction;
|
|
46
|
+
}
|
|
47
|
+
export declare class SearchEngine {
|
|
48
|
+
private indexPath;
|
|
49
|
+
private dbPath;
|
|
50
|
+
private options;
|
|
51
|
+
private coreEngine;
|
|
52
|
+
private initPromise;
|
|
53
|
+
constructor(indexPath: string, dbPath: string, options?: SearchEngineOptions);
|
|
54
|
+
/**
|
|
55
|
+
* Initialize the search engine using polymorphic factory or direct injection
|
|
56
|
+
*
|
|
57
|
+
* Chameleon Architecture Implementation:
|
|
58
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
59
|
+
* - Creates appropriate embedder based on detected mode
|
|
60
|
+
* - Applies mode-specific reranking strategies
|
|
61
|
+
* - Provides seamless polymorphic behavior
|
|
62
|
+
*/
|
|
63
|
+
private initialize;
|
|
64
|
+
/**
|
|
65
|
+
* Perform semantic search
|
|
66
|
+
*/
|
|
67
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
68
|
+
/**
|
|
69
|
+
* Retrieve content by ID in the specified format
|
|
70
|
+
* @param contentId - Content ID to retrieve
|
|
71
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
72
|
+
* @returns Promise that resolves to content in requested format
|
|
73
|
+
*/
|
|
74
|
+
getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
|
|
75
|
+
/**
|
|
76
|
+
* Retrieve multiple content items efficiently in batch
|
|
77
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
78
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
79
|
+
* @returns Promise that resolves to array of content in requested format
|
|
80
|
+
*/
|
|
81
|
+
getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
|
|
82
|
+
/**
|
|
83
|
+
* Retrieve content metadata for result enhancement
|
|
84
|
+
* @param contentId - Content ID to get metadata for
|
|
85
|
+
* @returns Promise that resolves to content metadata
|
|
86
|
+
*/
|
|
87
|
+
getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
|
|
88
|
+
/**
|
|
89
|
+
* Verify that content exists and is accessible
|
|
90
|
+
* @param contentId - Content ID to verify
|
|
91
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
92
|
+
*/
|
|
93
|
+
verifyContentExists(contentId: string): Promise<boolean>;
|
|
94
|
+
/**
|
|
95
|
+
* Clean up resources
|
|
96
|
+
*/
|
|
97
|
+
cleanup(): Promise<void>;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=search.d.ts.map
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public API SearchEngine - Simple constructor with Chameleon Architecture
|
|
3
|
+
*
|
|
4
|
+
* This class provides a clean, simple API that automatically adapts to the mode
|
|
5
|
+
* (text or multimodal) stored in the database during ingestion. The system detects
|
|
6
|
+
* the mode and creates the appropriate embedder and reranker without user intervention.
|
|
7
|
+
*
|
|
8
|
+
* Chameleon Architecture Features:
|
|
9
|
+
* - Automatic mode detection from database configuration
|
|
10
|
+
* - Seamless switching between text and multimodal modes
|
|
11
|
+
* - Appropriate embedder selection (sentence-transformer or CLIP)
|
|
12
|
+
* - Mode-specific reranking strategies
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Simple usage - mode automatically detected from database
|
|
17
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
18
|
+
* const results = await search.search('query');
|
|
19
|
+
*
|
|
20
|
+
* // Works for both text and multimodal databases
|
|
21
|
+
* // Text mode: uses sentence-transformer embeddings
|
|
22
|
+
* // Multimodal mode: uses CLIP embeddings for cross-modal search
|
|
23
|
+
*
|
|
24
|
+
* // With options (advanced)
|
|
25
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite', {
|
|
26
|
+
* enableReranking: true
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
import { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
31
|
+
export class SearchEngine {
|
|
32
|
+
indexPath;
|
|
33
|
+
dbPath;
|
|
34
|
+
options;
|
|
35
|
+
coreEngine = null;
|
|
36
|
+
initPromise = null;
|
|
37
|
+
constructor(indexPath, dbPath, options = {}) {
|
|
38
|
+
this.indexPath = indexPath;
|
|
39
|
+
this.dbPath = dbPath;
|
|
40
|
+
this.options = options;
|
|
41
|
+
// Validate required parameters
|
|
42
|
+
if (!indexPath || typeof indexPath !== 'string' || indexPath.trim() === '') {
|
|
43
|
+
throw new Error('Both indexPath and dbPath are required.\n' +
|
|
44
|
+
'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
|
|
45
|
+
'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
|
|
46
|
+
}
|
|
47
|
+
if (!dbPath || typeof dbPath !== 'string' || dbPath.trim() === '') {
|
|
48
|
+
throw new Error('Both indexPath and dbPath are required.\n' +
|
|
49
|
+
'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
|
|
50
|
+
'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Initialize the search engine using polymorphic factory or direct injection
|
|
55
|
+
*
|
|
56
|
+
* Chameleon Architecture Implementation:
|
|
57
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
58
|
+
* - Creates appropriate embedder based on detected mode
|
|
59
|
+
* - Applies mode-specific reranking strategies
|
|
60
|
+
* - Provides seamless polymorphic behavior
|
|
61
|
+
*/
|
|
62
|
+
async initialize() {
|
|
63
|
+
if (this.coreEngine) {
|
|
64
|
+
return; // Already initialized
|
|
65
|
+
}
|
|
66
|
+
if (this.initPromise) {
|
|
67
|
+
return this.initPromise; // Initialization in progress
|
|
68
|
+
}
|
|
69
|
+
this.initPromise = (async () => {
|
|
70
|
+
// If custom functions are provided, use direct dependency injection
|
|
71
|
+
if (this.options.embedFn || this.options.rerankFn) {
|
|
72
|
+
const { IndexManager } = await import('./index-manager.js');
|
|
73
|
+
const { openDatabase } = await import('./core/db.js');
|
|
74
|
+
const { createTextEmbedFunction } = await import('./text/embedder.js');
|
|
75
|
+
const { existsSync } = await import('fs');
|
|
76
|
+
// Validate files exist
|
|
77
|
+
if (!existsSync(this.indexPath)) {
|
|
78
|
+
throw new Error(`Vector index not found at: ${this.indexPath}`);
|
|
79
|
+
}
|
|
80
|
+
if (!existsSync(this.dbPath)) {
|
|
81
|
+
throw new Error(`Database not found at: ${this.dbPath}`);
|
|
82
|
+
}
|
|
83
|
+
// Use custom embedFn or create default
|
|
84
|
+
const embedFn = this.options.embedFn || createTextEmbedFunction(this.options.embeddingModel);
|
|
85
|
+
// Get model defaults for dimensions
|
|
86
|
+
const { getModelDefaults, config } = await import('./core/config.js');
|
|
87
|
+
const modelDefaults = getModelDefaults(this.options.embeddingModel || config.embedding_model);
|
|
88
|
+
// Initialize dependencies
|
|
89
|
+
const db = await openDatabase(this.dbPath);
|
|
90
|
+
const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
|
|
91
|
+
await indexManager.initialize();
|
|
92
|
+
// Create ContentResolver for unified content system
|
|
93
|
+
const { ContentResolver } = await import('./core/content-resolver.js');
|
|
94
|
+
const contentResolver = new ContentResolver(db);
|
|
95
|
+
// Create core engine with dependency injection
|
|
96
|
+
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
// Use core polymorphic factory for automatic mode detection (Chameleon Architecture)
|
|
100
|
+
// This enables SearchEngine to automatically adapt to text or multimodal mode
|
|
101
|
+
// based on the configuration stored in the database during ingestion
|
|
102
|
+
const { SearchFactory } = await import('./factories/search-factory.js');
|
|
103
|
+
this.coreEngine = await SearchFactory.create(this.indexPath, this.dbPath);
|
|
104
|
+
}
|
|
105
|
+
})();
|
|
106
|
+
return this.initPromise;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Perform semantic search
|
|
110
|
+
*/
|
|
111
|
+
async search(query, options) {
|
|
112
|
+
await this.initialize();
|
|
113
|
+
if (!this.coreEngine) {
|
|
114
|
+
throw new Error('SearchEngine failed to initialize');
|
|
115
|
+
}
|
|
116
|
+
return this.coreEngine.search(query, options);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Retrieve content by ID in the specified format
|
|
120
|
+
* @param contentId - Content ID to retrieve
|
|
121
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
122
|
+
* @returns Promise that resolves to content in requested format
|
|
123
|
+
*/
|
|
124
|
+
async getContent(contentId, format = 'file') {
|
|
125
|
+
await this.initialize();
|
|
126
|
+
if (!this.coreEngine) {
|
|
127
|
+
throw new Error('SearchEngine failed to initialize');
|
|
128
|
+
}
|
|
129
|
+
return this.coreEngine.getContent(contentId, format);
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Retrieve multiple content items efficiently in batch
|
|
133
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
134
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
135
|
+
* @returns Promise that resolves to array of content in requested format
|
|
136
|
+
*/
|
|
137
|
+
async getContentBatch(contentIds, format = 'file') {
|
|
138
|
+
await this.initialize();
|
|
139
|
+
if (!this.coreEngine) {
|
|
140
|
+
throw new Error('SearchEngine failed to initialize');
|
|
141
|
+
}
|
|
142
|
+
return this.coreEngine.getContentBatch(contentIds, format);
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Retrieve content metadata for result enhancement
|
|
146
|
+
* @param contentId - Content ID to get metadata for
|
|
147
|
+
* @returns Promise that resolves to content metadata
|
|
148
|
+
*/
|
|
149
|
+
async getContentMetadata(contentId) {
|
|
150
|
+
await this.initialize();
|
|
151
|
+
if (!this.coreEngine) {
|
|
152
|
+
throw new Error('SearchEngine failed to initialize');
|
|
153
|
+
}
|
|
154
|
+
return this.coreEngine.getContentMetadata(contentId);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Verify that content exists and is accessible
|
|
158
|
+
* @param contentId - Content ID to verify
|
|
159
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
160
|
+
*/
|
|
161
|
+
async verifyContentExists(contentId) {
|
|
162
|
+
await this.initialize();
|
|
163
|
+
if (!this.coreEngine) {
|
|
164
|
+
throw new Error('SearchEngine failed to initialize');
|
|
165
|
+
}
|
|
166
|
+
return this.coreEngine.verifyContentExists(contentId);
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Clean up resources
|
|
170
|
+
*/
|
|
171
|
+
async cleanup() {
|
|
172
|
+
if (this.coreEngine) {
|
|
173
|
+
await this.coreEngine.cleanup();
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test utilities for multi-model support
|
|
3
|
+
* Provides common configurations and helpers for testing with different embedding models
|
|
4
|
+
*/
|
|
5
|
+
export interface TestModel {
|
|
6
|
+
name: string;
|
|
7
|
+
dimensions: number;
|
|
8
|
+
chunkSize: number;
|
|
9
|
+
batchSize: number;
|
|
10
|
+
}
|
|
11
|
+
export declare const TEST_MODELS: TestModel[];
|
|
12
|
+
/**
|
|
13
|
+
* Retrieve model configuration by name
|
|
14
|
+
* @param modelName - The name of the model to retrieve
|
|
15
|
+
* @returns Model configuration object or undefined if not found
|
|
16
|
+
*/
|
|
17
|
+
export declare function getTestModel(modelName: string): TestModel | undefined;
|
|
18
|
+
//# sourceMappingURL=test-utils.d.ts.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test utilities for multi-model support
|
|
3
|
+
* Provides common configurations and helpers for testing with different embedding models
|
|
4
|
+
*/
|
|
5
|
+
export const TEST_MODELS = [
|
|
6
|
+
{
|
|
7
|
+
name: 'sentence-transformers/all-MiniLM-L6-v2',
|
|
8
|
+
dimensions: 384,
|
|
9
|
+
chunkSize: 250,
|
|
10
|
+
batchSize: 16
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
name: 'Xenova/all-mpnet-base-v2',
|
|
14
|
+
dimensions: 768,
|
|
15
|
+
chunkSize: 400,
|
|
16
|
+
batchSize: 8
|
|
17
|
+
}
|
|
18
|
+
];
|
|
19
|
+
/**
|
|
20
|
+
* Retrieve model configuration by name
|
|
21
|
+
* @param modelName - The name of the model to retrieve
|
|
22
|
+
* @returns Model configuration object or undefined if not found
|
|
23
|
+
*/
|
|
24
|
+
export function getTestModel(modelName) {
|
|
25
|
+
return TEST_MODELS.find(m => m.name === modelName);
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=test-utils.js.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-specific chunking implementation
|
|
3
|
+
* Implements the ChunkingStrategy interface for text content
|
|
4
|
+
*/
|
|
5
|
+
import '../dom-polyfills.js';
|
|
6
|
+
import { ChunkingStrategy, GenericDocument, GenericChunk, ChunkConfig } from '../core/chunker.js';
|
|
7
|
+
/**
|
|
8
|
+
* Document interface for text chunking
|
|
9
|
+
*/
|
|
10
|
+
export interface Document {
|
|
11
|
+
source: string;
|
|
12
|
+
title: string;
|
|
13
|
+
content: string;
|
|
14
|
+
metadata?: Record<string, any>;
|
|
15
|
+
}
|
|
16
|
+
export interface Chunk {
|
|
17
|
+
text: string;
|
|
18
|
+
chunkIndex: number;
|
|
19
|
+
tokenCount: number;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Text chunking strategy implementation
|
|
23
|
+
*/
|
|
24
|
+
export declare class TextChunkingStrategy implements ChunkingStrategy {
|
|
25
|
+
appliesTo(contentType: string): boolean;
|
|
26
|
+
chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Text document chunking function
|
|
30
|
+
* Converts between text-specific and generic interfaces
|
|
31
|
+
*/
|
|
32
|
+
export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
|
|
33
|
+
//# sourceMappingURL=chunker.d.ts.map
|