rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Resolver - Handles content retrieval and format adaptation for unified content system
|
|
3
|
+
* Resolves content IDs to actual content locations and adapts format based on client needs
|
|
4
|
+
* Supports efficient batch retrieval operations and handles missing content gracefully
|
|
5
|
+
*/
|
|
6
|
+
import { DatabaseConnection, type ContentMetadata } from './db.js';
|
|
7
|
+
export type { ContentMetadata };
|
|
8
|
+
/**
|
|
9
|
+
* Content request for batch operations
|
|
10
|
+
*/
|
|
11
|
+
export interface ContentRequest {
|
|
12
|
+
contentId: string;
|
|
13
|
+
format: 'file' | 'base64';
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Content result for batch operations
|
|
17
|
+
*/
|
|
18
|
+
export interface ContentResult {
|
|
19
|
+
contentId: string;
|
|
20
|
+
success: boolean;
|
|
21
|
+
content?: string;
|
|
22
|
+
error?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* ContentResolver class for handling content retrieval and format conversion
|
|
26
|
+
* Implements format-adaptive content retrieval for CLI and MCP clients
|
|
27
|
+
*/
|
|
28
|
+
export declare class ContentResolver {
|
|
29
|
+
private db;
|
|
30
|
+
private streamingOps;
|
|
31
|
+
private performanceOptimizer;
|
|
32
|
+
constructor(db: DatabaseConnection);
|
|
33
|
+
/**
|
|
34
|
+
* Retrieves content by ID and adapts format based on client needs
|
|
35
|
+
* @param contentId - Content ID to retrieve
|
|
36
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
37
|
+
* @returns Promise that resolves to content in requested format
|
|
38
|
+
*/
|
|
39
|
+
getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
|
|
40
|
+
/**
|
|
41
|
+
* Retrieves multiple content items efficiently in batch with performance optimizations
|
|
42
|
+
* @param requests - Array of content requests with IDs and formats
|
|
43
|
+
* @returns Promise that resolves to array of content results
|
|
44
|
+
*/
|
|
45
|
+
getContentBatch(requests: ContentRequest[]): Promise<ContentResult[]>;
|
|
46
|
+
/**
|
|
47
|
+
* Fallback batch processing implementation
|
|
48
|
+
* @param requests - Array of content requests
|
|
49
|
+
* @returns Promise that resolves to array of content results
|
|
50
|
+
*/
|
|
51
|
+
private getContentBatchFallback;
|
|
52
|
+
/**
|
|
53
|
+
* Retrieves content metadata without loading the actual content
|
|
54
|
+
* @param contentId - Content ID to get metadata for
|
|
55
|
+
* @returns Promise that resolves to content metadata
|
|
56
|
+
*/
|
|
57
|
+
getContentMetadata(contentId: string): Promise<ContentMetadata>;
|
|
58
|
+
/**
|
|
59
|
+
* Verifies that content exists and is accessible
|
|
60
|
+
* @param contentId - Content ID to verify
|
|
61
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
62
|
+
*/
|
|
63
|
+
verifyContentExists(contentId: string): Promise<boolean>;
|
|
64
|
+
/**
|
|
65
|
+
* Gets performance statistics for batch operations and content retrieval
|
|
66
|
+
* @returns Performance statistics
|
|
67
|
+
*/
|
|
68
|
+
getPerformanceStats(): {
|
|
69
|
+
batchOperations: {
|
|
70
|
+
totalOperations: number;
|
|
71
|
+
averageDuration: number;
|
|
72
|
+
totalBytesProcessed: number;
|
|
73
|
+
averageSpeed: number;
|
|
74
|
+
errorRate: number;
|
|
75
|
+
};
|
|
76
|
+
contentRetrieval: {
|
|
77
|
+
totalRetrievals: number;
|
|
78
|
+
averageDuration: number;
|
|
79
|
+
cacheHitRate: number;
|
|
80
|
+
};
|
|
81
|
+
};
|
|
82
|
+
/**
|
|
83
|
+
* Clears performance caches and resets metrics
|
|
84
|
+
*/
|
|
85
|
+
clearPerformanceCaches(): void;
|
|
86
|
+
/**
|
|
87
|
+
* Returns content as file path for CLI clients
|
|
88
|
+
* @param metadata - Content metadata
|
|
89
|
+
* @returns File path that can be accessed directly
|
|
90
|
+
*/
|
|
91
|
+
private getContentAsFilePath;
|
|
92
|
+
/**
|
|
93
|
+
* Returns content as base64 string for MCP clients with optimized I/O
|
|
94
|
+
* @param metadata - Content metadata
|
|
95
|
+
* @returns Base64-encoded content ready for display
|
|
96
|
+
*/
|
|
97
|
+
private getContentAsBase64;
|
|
98
|
+
/**
|
|
99
|
+
* Cleanup resources to prevent memory leaks and hanging processes
|
|
100
|
+
* Should be called when ContentResolver is no longer needed
|
|
101
|
+
*/
|
|
102
|
+
cleanup(): void;
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=content-resolver.d.ts.map
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Resolver - Handles content retrieval and format adaptation for unified content system
|
|
3
|
+
* Resolves content IDs to actual content locations and adapts format based on client needs
|
|
4
|
+
* Supports efficient batch retrieval operations and handles missing content gracefully
|
|
5
|
+
*/
|
|
6
|
+
import { promises as fs } from 'fs';
|
|
7
|
+
import { getContentMetadata } from './db.js';
|
|
8
|
+
import { ContentNotFoundError, ContentRetrievalError, ContentErrorHandler } from './content-errors.js';
|
|
9
|
+
import { withTimeout, SafeBuffer } from './resource-cleanup.js';
|
|
10
|
+
import { createStreamingOperations, formatBytes, formatProcessingTime, calculateProcessingSpeed } from './streaming-operations.js';
|
|
11
|
+
import { createContentPerformanceOptimizer, formatProcessingSpeed } from './content-performance-optimizer.js';
|
|
12
|
+
/**
|
|
13
|
+
* ContentResolver class for handling content retrieval and format conversion
|
|
14
|
+
* Implements format-adaptive content retrieval for CLI and MCP clients
|
|
15
|
+
*/
|
|
16
|
+
export class ContentResolver {
|
|
17
|
+
db;
|
|
18
|
+
streamingOps;
|
|
19
|
+
performanceOptimizer;
|
|
20
|
+
constructor(db) {
|
|
21
|
+
this.db = db;
|
|
22
|
+
// Initialize streaming operations for content retrieval
|
|
23
|
+
this.streamingOps = createStreamingOperations({
|
|
24
|
+
chunkSize: 256 * 1024, // 256KB chunks for retrieval operations
|
|
25
|
+
enableProgress: false,
|
|
26
|
+
enableHashing: false,
|
|
27
|
+
timeout: 300000 // 5 minutes
|
|
28
|
+
});
|
|
29
|
+
// Initialize performance optimizer for batch operations and caching
|
|
30
|
+
this.performanceOptimizer = createContentPerformanceOptimizer({
|
|
31
|
+
hashCacheSize: 500, // Smaller cache for resolver
|
|
32
|
+
hashCacheTTL: 30 * 60 * 1000, // 30 minutes TTL
|
|
33
|
+
maxConcurrentOperations: 15, // Higher concurrency for retrieval
|
|
34
|
+
batchSize: 25,
|
|
35
|
+
fileBufferSize: 256 * 1024,
|
|
36
|
+
enableAsyncIO: true,
|
|
37
|
+
enableMetrics: true,
|
|
38
|
+
metricsRetentionTime: 12 * 60 * 60 * 1000 // 12 hours
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Retrieves content by ID and adapts format based on client needs
|
|
43
|
+
* @param contentId - Content ID to retrieve
|
|
44
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
45
|
+
* @returns Promise that resolves to content in requested format
|
|
46
|
+
*/
|
|
47
|
+
async getContent(contentId, format = 'file') {
|
|
48
|
+
try {
|
|
49
|
+
// Validate format parameter
|
|
50
|
+
if (format !== 'file' && format !== 'base64') {
|
|
51
|
+
throw new ContentRetrievalError(contentId, format, 'Format must be either "file" or "base64"', 'format_validation');
|
|
52
|
+
}
|
|
53
|
+
// Get content metadata with timeout
|
|
54
|
+
const metadata = await withTimeout(getContentMetadata(this.db, contentId), 10000, // 10 second timeout for database query
|
|
55
|
+
'Database query for content metadata timed out');
|
|
56
|
+
if (!metadata) {
|
|
57
|
+
throw new ContentNotFoundError(contentId, undefined, 'metadata_lookup');
|
|
58
|
+
}
|
|
59
|
+
// Check if content file exists with timeout
|
|
60
|
+
const contentExists = await withTimeout(this.verifyContentExists(contentId), 5000, // 5 second timeout for file verification
|
|
61
|
+
'Content file verification timed out');
|
|
62
|
+
if (!contentExists) {
|
|
63
|
+
throw new ContentNotFoundError(contentId, metadata.displayName, 'file_verification');
|
|
64
|
+
}
|
|
65
|
+
// Return content in requested format with timeout
|
|
66
|
+
if (format === 'file') {
|
|
67
|
+
return await withTimeout(this.getContentAsFilePath(metadata), 5000, // 5 second timeout for file path resolution
|
|
68
|
+
'File path resolution timed out');
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
return await withTimeout(this.getContentAsBase64(metadata), 30000, // 30 second timeout for base64 conversion (can be slow for large files)
|
|
72
|
+
'Base64 conversion timed out');
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
if (error instanceof ContentNotFoundError || error instanceof ContentRetrievalError) {
|
|
77
|
+
throw error; // Re-throw content-specific errors
|
|
78
|
+
}
|
|
79
|
+
ContentErrorHandler.handleContentError(error, 'content retrieval', 'getContent');
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Retrieves multiple content items efficiently in batch with performance optimizations
|
|
84
|
+
* @param requests - Array of content requests with IDs and formats
|
|
85
|
+
* @returns Promise that resolves to array of content results
|
|
86
|
+
*/
|
|
87
|
+
async getContentBatch(requests) {
|
|
88
|
+
const startTime = Date.now();
|
|
89
|
+
try {
|
|
90
|
+
// Use performance optimizer for batch processing
|
|
91
|
+
const batchResult = await this.performanceOptimizer.processBatchOptimized(requests, async (request) => {
|
|
92
|
+
try {
|
|
93
|
+
const content = await this.getContent(request.contentId, request.format);
|
|
94
|
+
return {
|
|
95
|
+
contentId: request.contentId,
|
|
96
|
+
success: true,
|
|
97
|
+
content
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
catch (error) {
|
|
101
|
+
return {
|
|
102
|
+
contentId: request.contentId,
|
|
103
|
+
success: false,
|
|
104
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
}, {
|
|
108
|
+
batchSize: 25, // Optimized batch size for content retrieval
|
|
109
|
+
maxConcurrency: 15, // Higher concurrency for I/O operations
|
|
110
|
+
enableMetrics: true
|
|
111
|
+
});
|
|
112
|
+
// Log performance metrics for large batches
|
|
113
|
+
if (requests.length > 10) {
|
|
114
|
+
const duration = Date.now() - startTime;
|
|
115
|
+
const speed = batchResult.averageSpeed;
|
|
116
|
+
console.log(`Batch retrieval completed: ${requests.length} items in ${formatProcessingTime(duration)} ` +
|
|
117
|
+
`(${batchResult.successCount} success, ${batchResult.errorCount} errors, ${formatProcessingSpeed(speed)})`);
|
|
118
|
+
}
|
|
119
|
+
return batchResult.results;
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
// Fallback to original implementation if optimization fails
|
|
123
|
+
console.warn('Batch optimization failed, using fallback:', error);
|
|
124
|
+
return this.getContentBatchFallback(requests);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Fallback batch processing implementation
|
|
129
|
+
* @param requests - Array of content requests
|
|
130
|
+
* @returns Promise that resolves to array of content results
|
|
131
|
+
*/
|
|
132
|
+
async getContentBatchFallback(requests) {
|
|
133
|
+
const results = [];
|
|
134
|
+
const concurrencyLimit = 10;
|
|
135
|
+
const batches = [];
|
|
136
|
+
for (let i = 0; i < requests.length; i += concurrencyLimit) {
|
|
137
|
+
batches.push(requests.slice(i, i + concurrencyLimit));
|
|
138
|
+
}
|
|
139
|
+
for (const batch of batches) {
|
|
140
|
+
const batchPromises = batch.map(async (request) => {
|
|
141
|
+
try {
|
|
142
|
+
const content = await this.getContent(request.contentId, request.format);
|
|
143
|
+
return {
|
|
144
|
+
contentId: request.contentId,
|
|
145
|
+
success: true,
|
|
146
|
+
content
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
catch (error) {
|
|
150
|
+
return {
|
|
151
|
+
contentId: request.contentId,
|
|
152
|
+
success: false,
|
|
153
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
const batchResults = await Promise.all(batchPromises);
|
|
158
|
+
results.push(...batchResults);
|
|
159
|
+
}
|
|
160
|
+
return results;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Retrieves content metadata without loading the actual content
|
|
164
|
+
* @param contentId - Content ID to get metadata for
|
|
165
|
+
* @returns Promise that resolves to content metadata
|
|
166
|
+
*/
|
|
167
|
+
async getContentMetadata(contentId) {
|
|
168
|
+
const metadata = await getContentMetadata(this.db, contentId);
|
|
169
|
+
if (!metadata) {
|
|
170
|
+
throw new ContentNotFoundError(contentId, undefined, 'metadata_retrieval');
|
|
171
|
+
}
|
|
172
|
+
return metadata;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Verifies that content exists and is accessible
|
|
176
|
+
* @param contentId - Content ID to verify
|
|
177
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
178
|
+
*/
|
|
179
|
+
async verifyContentExists(contentId) {
|
|
180
|
+
try {
|
|
181
|
+
const metadata = await getContentMetadata(this.db, contentId);
|
|
182
|
+
if (!metadata) {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
// Check if the content file exists and is accessible
|
|
186
|
+
try {
|
|
187
|
+
const stats = await fs.stat(metadata.contentPath);
|
|
188
|
+
return stats.isFile();
|
|
189
|
+
}
|
|
190
|
+
catch {
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Gets performance statistics for batch operations and content retrieval
|
|
200
|
+
* @returns Performance statistics
|
|
201
|
+
*/
|
|
202
|
+
getPerformanceStats() {
|
|
203
|
+
const batchStats = this.performanceOptimizer.getPerformanceStats('batch_processing');
|
|
204
|
+
const retrievalStats = this.performanceOptimizer.getPerformanceStats('file_read');
|
|
205
|
+
return {
|
|
206
|
+
batchOperations: batchStats,
|
|
207
|
+
contentRetrieval: {
|
|
208
|
+
totalRetrievals: retrievalStats.totalOperations,
|
|
209
|
+
averageDuration: retrievalStats.averageDuration,
|
|
210
|
+
cacheHitRate: retrievalStats.cacheHitRate
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Clears performance caches and resets metrics
|
|
216
|
+
*/
|
|
217
|
+
clearPerformanceCaches() {
|
|
218
|
+
this.performanceOptimizer.clearHashCache();
|
|
219
|
+
}
|
|
220
|
+
// =============================================================================
|
|
221
|
+
// PRIVATE METHODS
|
|
222
|
+
// =============================================================================
|
|
223
|
+
/**
|
|
224
|
+
* Returns content as file path for CLI clients
|
|
225
|
+
* @param metadata - Content metadata
|
|
226
|
+
* @returns File path that can be accessed directly
|
|
227
|
+
*/
|
|
228
|
+
async getContentAsFilePath(metadata) {
|
|
229
|
+
// For both filesystem and content_dir storage, return the content path
|
|
230
|
+
// CLI clients can access files directly regardless of storage type
|
|
231
|
+
return metadata.contentPath;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Returns content as base64 string for MCP clients with optimized I/O
|
|
235
|
+
* @param metadata - Content metadata
|
|
236
|
+
* @returns Base64-encoded content ready for display
|
|
237
|
+
*/
|
|
238
|
+
async getContentAsBase64(metadata) {
|
|
239
|
+
let safeBuffer = null;
|
|
240
|
+
try {
|
|
241
|
+
// Use optimized file reading for better performance
|
|
242
|
+
if (metadata.fileSize > 10 * 1024 * 1024) { // Use streaming for files > 10MB
|
|
243
|
+
const startTime = Date.now();
|
|
244
|
+
const base64Content = await withTimeout(this.streamingOps.readFileAsBase64Streaming(metadata.contentPath), 300000, // 5 minute timeout for large file base64 conversion
|
|
245
|
+
'Streaming base64 conversion timed out');
|
|
246
|
+
const processingTime = Date.now() - startTime;
|
|
247
|
+
const speed = calculateProcessingSpeed(metadata.fileSize, processingTime);
|
|
248
|
+
// Log performance metrics for large files
|
|
249
|
+
if (metadata.fileSize > 50 * 1024 * 1024) {
|
|
250
|
+
console.log(`Optimized base64 conversion completed: ${formatBytes(metadata.fileSize)} in ${formatProcessingTime(processingTime)} (${formatProcessingSpeed(speed)})`);
|
|
251
|
+
}
|
|
252
|
+
return base64Content;
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
// For smaller files, use traditional method with memory management
|
|
256
|
+
const content = await fs.readFile(metadata.contentPath);
|
|
257
|
+
// Use safe buffer for memory management (don't clear original for normal operations)
|
|
258
|
+
safeBuffer = new SafeBuffer(content, { clearOriginal: false });
|
|
259
|
+
// Convert to base64
|
|
260
|
+
const base64Content = safeBuffer.get().toString('base64');
|
|
261
|
+
return base64Content;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
throw new ContentRetrievalError(metadata.id, 'base64', `Failed to read content file: ${error instanceof Error ? error.message : 'Unknown error'}`, 'file_reading');
|
|
266
|
+
}
|
|
267
|
+
finally {
|
|
268
|
+
// Clear sensitive buffer data
|
|
269
|
+
if (safeBuffer) {
|
|
270
|
+
safeBuffer.clear();
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Cleanup resources to prevent memory leaks and hanging processes
|
|
276
|
+
* Should be called when ContentResolver is no longer needed
|
|
277
|
+
*/
|
|
278
|
+
cleanup() {
|
|
279
|
+
// Clean up performance optimizer interval that prevents process exit
|
|
280
|
+
if (this.performanceOptimizer && typeof this.performanceOptimizer.cleanup === 'function') {
|
|
281
|
+
this.performanceOptimizer.cleanup();
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
//# sourceMappingURL=content-resolver.js.map
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-Modal Search Implementation
|
|
3
|
+
*
|
|
4
|
+
* This module extends the core SearchEngine to provide cross-modal search capabilities
|
|
5
|
+
* that enable text queries to find images and image queries to find text content.
|
|
6
|
+
*
|
|
7
|
+
* Task 4.2: Implement cross-modal search functionality
|
|
8
|
+
* - Update search logic to handle mixed content type results
|
|
9
|
+
* - Ensure ranking works properly across text and image content
|
|
10
|
+
* - Test text queries finding relevant images and vice versa
|
|
11
|
+
*
|
|
12
|
+
* Requirements addressed:
|
|
13
|
+
* - 6.1: Enable text queries to find relevant image content
|
|
14
|
+
* - 6.2: Enable image queries to find relevant text content
|
|
15
|
+
* - 6.3: Rank mixed content types by semantic similarity
|
|
16
|
+
*/
|
|
17
|
+
import { SearchEngine } from './search.js';
|
|
18
|
+
import type { SearchResult, SearchOptions } from './types.js';
|
|
19
|
+
import type { EmbedFunction, RerankFunction } from './interfaces.js';
|
|
20
|
+
/**
|
|
21
|
+
* Extended search options for cross-modal search
|
|
22
|
+
*/
|
|
23
|
+
export interface CrossModalSearchOptions extends SearchOptions {
|
|
24
|
+
/** Content types to include in search results */
|
|
25
|
+
includeContentTypes?: string[];
|
|
26
|
+
/** Whether to enable cross-modal ranking */
|
|
27
|
+
enableCrossModalRanking?: boolean;
|
|
28
|
+
/** Minimum similarity threshold for cross-modal results */
|
|
29
|
+
crossModalThreshold?: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Cross-modal search result with enhanced metadata
|
|
33
|
+
*/
|
|
34
|
+
export interface CrossModalSearchResult extends SearchResult {
|
|
35
|
+
/** Whether this result is from a different modality than the query */
|
|
36
|
+
isCrossModal?: boolean;
|
|
37
|
+
/** Semantic similarity score for cross-modal ranking */
|
|
38
|
+
semanticSimilarity?: number;
|
|
39
|
+
/** Original vector search score before cross-modal adjustments */
|
|
40
|
+
originalScore?: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Cross-Modal Search Engine
|
|
44
|
+
*
|
|
45
|
+
* Extends the core SearchEngine to provide cross-modal search capabilities.
|
|
46
|
+
* This implementation enables:
|
|
47
|
+
* - Text queries finding semantically similar images
|
|
48
|
+
* - Image queries finding semantically similar text
|
|
49
|
+
* - Mixed content type results ranked by semantic similarity
|
|
50
|
+
* - Unified embedding space leveraging CLIP models
|
|
51
|
+
*/
|
|
52
|
+
export declare class CrossModalSearchEngine extends SearchEngine {
|
|
53
|
+
private embedder?;
|
|
54
|
+
constructor(embedFn: EmbedFunction, indexManager: any, db: any, rerankFn?: RerankFunction, contentResolver?: any, embedder?: any);
|
|
55
|
+
/**
|
|
56
|
+
* Perform cross-modal search that can find content across different modalities
|
|
57
|
+
*
|
|
58
|
+
* This method extends the base search functionality to:
|
|
59
|
+
* 1. Detect query content type (text or image path)
|
|
60
|
+
* 2. Generate appropriate embeddings for the query
|
|
61
|
+
* 3. Search across all content types in the unified embedding space
|
|
62
|
+
* 4. Rank results by semantic similarity regardless of content type
|
|
63
|
+
* 5. Apply cross-modal ranking adjustments
|
|
64
|
+
*
|
|
65
|
+
* @param query - Search query (text string or image path)
|
|
66
|
+
* @param options - Cross-modal search options
|
|
67
|
+
* @returns Promise resolving to cross-modal search results
|
|
68
|
+
*/
|
|
69
|
+
crossModalSearch(query: string, options?: CrossModalSearchOptions): Promise<CrossModalSearchResult[]>;
|
|
70
|
+
/**
|
|
71
|
+
* Search for images using text queries
|
|
72
|
+
*
|
|
73
|
+
* @param textQuery - Text description to search for
|
|
74
|
+
* @param options - Search options
|
|
75
|
+
* @returns Promise resolving to image search results
|
|
76
|
+
*/
|
|
77
|
+
searchImagesWithText(textQuery: string, options?: CrossModalSearchOptions): Promise<CrossModalSearchResult[]>;
|
|
78
|
+
/**
|
|
79
|
+
* Search for text using image queries
|
|
80
|
+
*
|
|
81
|
+
* @param imagePath - Path to image file to search with
|
|
82
|
+
* @param options - Search options
|
|
83
|
+
* @returns Promise resolving to text search results
|
|
84
|
+
*/
|
|
85
|
+
searchTextWithImage(imagePath: string, options?: CrossModalSearchOptions): Promise<CrossModalSearchResult[]>;
|
|
86
|
+
/**
|
|
87
|
+
* Search across all content types with unified ranking
|
|
88
|
+
*
|
|
89
|
+
* @param query - Search query (text or image path)
|
|
90
|
+
* @param options - Search options
|
|
91
|
+
* @returns Promise resolving to mixed content type results
|
|
92
|
+
*/
|
|
93
|
+
searchUnified(query: string, options?: CrossModalSearchOptions): Promise<CrossModalSearchResult[]>;
|
|
94
|
+
/**
|
|
95
|
+
* Detect the content type of a query
|
|
96
|
+
* @private
|
|
97
|
+
*/
|
|
98
|
+
private detectQueryContentType;
|
|
99
|
+
/**
|
|
100
|
+
* Enhance search results with cross-modal information
|
|
101
|
+
* @private
|
|
102
|
+
*/
|
|
103
|
+
private enhanceResultsWithCrossModalInfo;
|
|
104
|
+
/**
|
|
105
|
+
* Apply cross-modal ranking to results
|
|
106
|
+
* @private
|
|
107
|
+
*/
|
|
108
|
+
private applyCrossModalRanking;
|
|
109
|
+
/**
|
|
110
|
+
* Calculate semantic similarity between query and result content
|
|
111
|
+
* @private
|
|
112
|
+
*/
|
|
113
|
+
private calculateSemanticSimilarity;
|
|
114
|
+
/**
|
|
115
|
+
* Get cross-modal search statistics
|
|
116
|
+
*/
|
|
117
|
+
getCrossModalStats(): Promise<{
|
|
118
|
+
totalChunks: number;
|
|
119
|
+
textChunks: number;
|
|
120
|
+
imageChunks: number;
|
|
121
|
+
crossModalCapable: boolean;
|
|
122
|
+
supportedContentTypes: string[];
|
|
123
|
+
}>;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Factory function to create a cross-modal search engine
|
|
127
|
+
*
|
|
128
|
+
* @param embedFn - Embedding function that supports multiple content types
|
|
129
|
+
* @param indexManager - Vector index manager
|
|
130
|
+
* @param db - Database connection
|
|
131
|
+
* @param rerankFn - Optional reranking function
|
|
132
|
+
* @param contentResolver - Content resolver for unified content system
|
|
133
|
+
* @param embedder - Reference to the embedder for cross-modal operations
|
|
134
|
+
* @returns CrossModalSearchEngine instance
|
|
135
|
+
*/
|
|
136
|
+
export declare function createCrossModalSearchEngine(embedFn: EmbedFunction, indexManager: any, db: any, rerankFn?: RerankFunction, contentResolver?: any, embedder?: any): CrossModalSearchEngine;
|
|
137
|
+
/**
|
|
138
|
+
* Utility function to check if a search engine supports cross-modal search
|
|
139
|
+
*
|
|
140
|
+
* @param searchEngine - Search engine to check
|
|
141
|
+
* @returns True if the engine supports cross-modal search
|
|
142
|
+
*/
|
|
143
|
+
export declare function supportsCrossModalSearch(searchEngine: any): searchEngine is CrossModalSearchEngine;
|
|
144
|
+
/**
|
|
145
|
+
* Cross-modal search result analyzer
|
|
146
|
+
* Provides utilities for analyzing cross-modal search results
|
|
147
|
+
*/
|
|
148
|
+
export declare class CrossModalResultAnalyzer {
|
|
149
|
+
/**
|
|
150
|
+
* Analyze cross-modal search results
|
|
151
|
+
*/
|
|
152
|
+
static analyzeResults(results: CrossModalSearchResult[]): {
|
|
153
|
+
totalResults: number;
|
|
154
|
+
crossModalResults: number;
|
|
155
|
+
sameModalResults: number;
|
|
156
|
+
averageSemanticSimilarity: number;
|
|
157
|
+
contentTypeDistribution: Record<string, number>;
|
|
158
|
+
};
|
|
159
|
+
/**
|
|
160
|
+
* Generate a summary report of cross-modal search results
|
|
161
|
+
*/
|
|
162
|
+
static generateReport(results: CrossModalSearchResult[], query: string): string;
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=cross-modal-search.d.ts.map
|