voctar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +102 -0
  3. package/dist/index.d.ts +6 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +29 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/src/chunking/index.d.ts +48 -0
  8. package/dist/src/chunking/index.d.ts.map +1 -0
  9. package/dist/src/chunking/index.js +123 -0
  10. package/dist/src/chunking/index.js.map +1 -0
  11. package/dist/src/chunking/strategies/fixed.d.ts +14 -0
  12. package/dist/src/chunking/strategies/fixed.d.ts.map +1 -0
  13. package/dist/src/chunking/strategies/fixed.js +111 -0
  14. package/dist/src/chunking/strategies/fixed.js.map +1 -0
  15. package/dist/src/chunking/strategies/paragraph.d.ts +6 -0
  16. package/dist/src/chunking/strategies/paragraph.d.ts.map +1 -0
  17. package/dist/src/chunking/strategies/paragraph.js +84 -0
  18. package/dist/src/chunking/strategies/paragraph.js.map +1 -0
  19. package/dist/src/chunking/strategies/recursive.d.ts +17 -0
  20. package/dist/src/chunking/strategies/recursive.d.ts.map +1 -0
  21. package/dist/src/chunking/strategies/recursive.js +192 -0
  22. package/dist/src/chunking/strategies/recursive.js.map +1 -0
  23. package/dist/src/chunking/strategies/semantic.d.ts +96 -0
  24. package/dist/src/chunking/strategies/semantic.d.ts.map +1 -0
  25. package/dist/src/chunking/strategies/semantic.js +587 -0
  26. package/dist/src/chunking/strategies/semantic.js.map +1 -0
  27. package/dist/src/chunking/strategies/sentence.d.ts +7 -0
  28. package/dist/src/chunking/strategies/sentence.d.ts.map +1 -0
  29. package/dist/src/chunking/strategies/sentence.js +116 -0
  30. package/dist/src/chunking/strategies/sentence.js.map +1 -0
  31. package/dist/src/chunking/types.d.ts +45 -0
  32. package/dist/src/chunking/types.d.ts.map +1 -0
  33. package/dist/src/chunking/types.js +4 -0
  34. package/dist/src/chunking/types.js.map +1 -0
  35. package/dist/src/chunking/utils/tokenizer.d.ts +10 -0
  36. package/dist/src/chunking/utils/tokenizer.d.ts.map +1 -0
  37. package/dist/src/chunking/utils/tokenizer.js +50 -0
  38. package/dist/src/chunking/utils/tokenizer.js.map +1 -0
  39. package/dist/src/providers/embeddings/index.d.ts +3 -0
  40. package/dist/src/providers/embeddings/index.d.ts.map +1 -0
  41. package/dist/src/providers/embeddings/index.js +7 -0
  42. package/dist/src/providers/embeddings/index.js.map +1 -0
  43. package/dist/src/providers/embeddings/openai.d.ts +21 -0
  44. package/dist/src/providers/embeddings/openai.d.ts.map +1 -0
  45. package/dist/src/providers/embeddings/openai.js +86 -0
  46. package/dist/src/providers/embeddings/openai.js.map +1 -0
  47. package/dist/src/providers/index.d.ts +3 -0
  48. package/dist/src/providers/index.d.ts.map +1 -0
  49. package/dist/src/providers/index.js +20 -0
  50. package/dist/src/providers/index.js.map +1 -0
  51. package/dist/src/providers/stores/index.d.ts +6 -0
  52. package/dist/src/providers/stores/index.d.ts.map +1 -0
  53. package/dist/src/providers/stores/index.js +11 -0
  54. package/dist/src/providers/stores/index.js.map +1 -0
  55. package/dist/src/providers/stores/memory.d.ts +18 -0
  56. package/dist/src/providers/stores/memory.d.ts.map +1 -0
  57. package/dist/src/providers/stores/memory.js +169 -0
  58. package/dist/src/providers/stores/memory.js.map +1 -0
  59. package/dist/src/providers/stores/qdrant.d.ts +28 -0
  60. package/dist/src/providers/stores/qdrant.d.ts.map +1 -0
  61. package/dist/src/providers/stores/qdrant.js +223 -0
  62. package/dist/src/providers/stores/qdrant.js.map +1 -0
  63. package/dist/src/providers/stores/sqlite.d.ts +38 -0
  64. package/dist/src/providers/stores/sqlite.d.ts.map +1 -0
  65. package/dist/src/providers/stores/sqlite.js +306 -0
  66. package/dist/src/providers/stores/sqlite.js.map +1 -0
  67. package/dist/src/types.d.ts +111 -0
  68. package/dist/src/types.d.ts.map +1 -0
  69. package/dist/src/types.js +32 -0
  70. package/dist/src/types.js.map +1 -0
  71. package/dist/src/vector.d.ts +74 -0
  72. package/dist/src/vector.d.ts.map +1 -0
  73. package/dist/src/vector.js +505 -0
  74. package/dist/src/vector.js.map +1 -0
  75. package/docs/API.md +361 -0
  76. package/docs/CHUNKING.md +280 -0
  77. package/docs/CUSTOM_PROVIDERS.md +101 -0
  78. package/docs/README.md +11 -0
  79. package/docs/STORAGE_BACKENDS.md +189 -0
  80. package/docs/assets/vectar.png +0 -0
  81. package/package.json +46 -0
@@ -0,0 +1,116 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SentenceChunkingStrategy = void 0;
4
+ // Sentence-based chunking strategy
5
+ const uuid_1 = require("uuid");
6
+ const tokenizer_1 = require("../utils/tokenizer");
7
+ class SentenceChunkingStrategy {
8
+ getName() {
9
+ return 'sentence';
10
+ }
11
+ chunk(text, documentId, options) {
12
+ // Get token limit and ensure maxSize doesn't exceed it
13
+ const tokenLimit = options.tokenLimit ?? 8192;
14
+ const maxSize = Math.min(options.maxChunkSize ?? 1000, tokenLimit);
15
+ const overlap = options.overlap ?? 1; // Overlap in number of sentences
16
+ // Split text into sentences
17
+ const sentences = this.splitIntoSentences(text);
18
+ const chunks = [];
19
+ let currentChunk = [];
20
+ let currentTokens = 0;
21
+ let chunkIndex = 0;
22
+ let startChar = 0;
23
+ for (let i = 0; i < sentences.length; i++) {
24
+ const sentence = sentences[i];
25
+ const sentenceTokens = (0, tokenizer_1.countTokens)(sentence);
26
+ // If adding this sentence would exceed maxSize (in tokens) and we have content, create a chunk
27
+ if (currentTokens + sentenceTokens > maxSize && currentChunk.length > 0) {
28
+ const chunkText = currentChunk.join(' ').trim();
29
+ const endChar = startChar + chunkText.length;
30
+ chunks.push({
31
+ id: (0, uuid_1.v4)(),
32
+ text: chunkText,
33
+ metadata: {
34
+ documentId,
35
+ chunkIndex,
36
+ totalChunks: 0, // Will be updated later
37
+ startChar,
38
+ endChar,
39
+ sentences: currentChunk.length,
40
+ ...options.metadata,
41
+ },
42
+ });
43
+ // Keep last N sentences for overlap
44
+ const overlapSentences = currentChunk.slice(-overlap);
45
+ currentChunk = [...overlapSentences, sentence];
46
+ currentTokens = (0, tokenizer_1.countTokens)(overlapSentences.join(' ')) + sentenceTokens;
47
+ startChar = endChar - (overlapSentences.join(' ').length);
48
+ chunkIndex++;
49
+ }
50
+ else {
51
+ currentChunk.push(sentence);
52
+ currentTokens += sentenceTokens;
53
+ }
54
+ }
55
+ // Add remaining content as final chunk
56
+ if (currentChunk.length > 0) {
57
+ const chunkText = currentChunk.join(' ').trim();
58
+ const endChar = startChar + chunkText.length;
59
+ chunks.push({
60
+ id: (0, uuid_1.v4)(),
61
+ text: chunkText,
62
+ metadata: {
63
+ documentId,
64
+ chunkIndex,
65
+ totalChunks: 0,
66
+ startChar,
67
+ endChar,
68
+ sentences: currentChunk.length,
69
+ ...options.metadata,
70
+ },
71
+ });
72
+ }
73
+ // Update totalChunks
74
+ chunks.forEach(chunk => {
75
+ chunk.metadata.totalChunks = chunks.length;
76
+ });
77
+ return chunks;
78
+ }
79
+ splitIntoSentences(text) {
80
+ // Simple sentence splitter - could be improved with NLP library
81
+ // Handles common abbreviations
82
+ const sentences = [];
83
+ // Replace common abbreviations to avoid false splits
84
+ let normalized = text
85
+ .replace(/Mr\./g, 'Mr')
86
+ .replace(/Mrs\./g, 'Mrs')
87
+ .replace(/Dr\./g, 'Dr')
88
+ .replace(/Ms\./g, 'Ms')
89
+ .replace(/vs\./g, 'vs')
90
+ .replace(/etc\./g, 'etc')
91
+ .replace(/e\.g\./g, 'eg')
92
+ .replace(/i\.e\./g, 'ie');
93
+ // Split on sentence boundaries
94
+ const parts = normalized.split(/([.!?]+[\s\n]+)/);
95
+ let currentSentence = '';
96
+ for (const part of parts) {
97
+ if (/[.!?]+[\s\n]+/.test(part)) {
98
+ currentSentence += part.trim();
99
+ if (currentSentence.trim()) {
100
+ sentences.push(currentSentence.trim());
101
+ }
102
+ currentSentence = '';
103
+ }
104
+ else {
105
+ currentSentence += part;
106
+ }
107
+ }
108
+ // Add any remaining content
109
+ if (currentSentence.trim()) {
110
+ sentences.push(currentSentence.trim());
111
+ }
112
+ return sentences.filter(s => s.length > 0);
113
+ }
114
+ }
115
+ exports.SentenceChunkingStrategy = SentenceChunkingStrategy;
116
+ //# sourceMappingURL=sentence.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sentence.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/sentence.ts"],"names":[],"mappings":";;;AAAA,mCAAmC;AACnC,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,wBAAwB;IACnC,OAAO;QACL,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,iCAAiC;QAEvE,4BAA4B;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,cAAc,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAE7C,+FAA+F;YAC/F,IAAI,aAAa,GAAG,cAAc,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxE,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;gBAE7C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ,EAAE;wBACR,UAAU;wBACV,UAAU;wBACV,WAAW,EAAE,CAAC,EAAE,wBAAwB;wBACxC,SAAS;wBACT,OAAO;wBACP,SAAS,EAAE,YAAY,CAAC,MAAM;wBAC9B,GAAG,OAAO,CAAC,QAAQ;qBACpB;iBACF,CAAC,CAAC;gBAEH,oCAAoC;gBACpC,MAAM,gBAAgB,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC;gBACtD,YAAY,GAAG,CAAC,GAAG,gBAAgB,EAAE,QAAQ,CAAC,CAAC;gBAC/C,aAAa,GAAG,IAAA,uBAAW,EAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,cAAc,CAAC;gBACzE,SAAS,GAAG,OAAO,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC1D,UAAU,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,aAAa,IAAI,cAAc,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC;oBACd,SAAS;oBACT,OAAO;oBACP,SAAS,EAAE,YAAY,CAAC,MAAM;oBAC9B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,kBAAkB,CAAC,IAAY;QACrC,gEAAgE;QAChE,+BAA+B;QAC/B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,qDAAqD;QACrD,IAAI,UAAU,GAAG,IAAI;aAClB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAE5B,+BAA+B;QAC/B,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAElD,IAAI,eAAe,GAAG,EAAE,CAAC;QAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,eAAe,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;gBACzC,CAAC;gBACD,eAAe,GAAG,EAAE,CAAC;YACvB,CAAC;iBAAM,CAAC;gBACN,eAAe,IAAI,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF;AA3HD,4DA2HC"}
@@ -0,0 +1,45 @@
1
+ export interface Chunk {
2
+ id: string;
3
+ text: string;
4
+ metadata: ChunkMetadata;
5
+ }
6
+ export interface ChunkMetadata {
7
+ documentId: string;
8
+ chunkIndex: number;
9
+ totalChunks: number;
10
+ startChar: number;
11
+ endChar: number;
12
+ tokens?: number;
13
+ [key: string]: any;
14
+ }
15
+ export interface ChunkingOptions {
16
+ strategy?: 'fixed' | 'recursive' | 'semantic' | 'sentence' | 'paragraph';
17
+ maxChunkSize?: number;
18
+ overlap?: number;
19
+ preserveFormatting?: boolean;
20
+ metadata?: Record<string, any>;
21
+ separator?: string | string[];
22
+ tokenLimit?: number;
23
+ softLimit?: number;
24
+ hardLimit?: number;
25
+ similarityThreshold?: number;
26
+ contentType?: 'conversation' | 'text';
27
+ contextOverlapPercent?: number;
28
+ smartOverlap?: boolean;
29
+ volatilityWindow?: number;
30
+ generateHeaders?: boolean;
31
+ stripNoise?: boolean;
32
+ noisePatterns?: RegExp[];
33
+ addRoleMarkers?: boolean;
34
+ embeddingProvider?: any;
35
+ }
36
+ export interface ChunkingStrategy {
37
+ chunk(text: string, documentId: string, options: ChunkingOptions): Chunk[];
38
+ getName(): string;
39
+ }
40
+ export interface DocumentChunkResult {
41
+ documentId: string;
42
+ chunks: Chunk[];
43
+ metadata: Record<string, any>;
44
+ }
45
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/chunking/types.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,OAAO,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,CAAC;IACzE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IAGpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,WAAW,CAAC,EAAE,cAAc,GAAG,MAAM,CAAC;IACtC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,iBAAiB,CAAC,EAAE,GAAG,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE,CAAC;IAC3E,OAAO,IAAI,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,mBAAmB;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B"}
@@ -0,0 +1,4 @@
1
+ "use strict";
2
+ // Chunking types and interfaces
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/chunking/types.ts"],"names":[],"mappings":";AAAA,gCAAgC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Count tokens in text accurately using tiktoken
3
+ */
4
+ export declare function countTokens(text: string): number;
5
+ /**
6
+ * Estimate tokens (fallback method)
7
+ * Use this only if tiktoken is not available
8
+ */
9
+ export declare function estimateTokens(text: string): number;
10
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":"AAoBA;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAahD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMnD"}
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.countTokens = countTokens;
4
+ exports.estimateTokens = estimateTokens;
5
+ // Token counting utility for chunking strategies
6
+ // Uses tiktoken for accurate token counting
7
+ const tiktoken_1 = require("tiktoken");
8
+ // Cache encoding to avoid recreating it
9
+ let cachedEncoding = null;
10
+ /**
11
+ * Get the encoding for embedding models
12
+ * OpenAI embedding models use cl100k_base encoding
13
+ */
14
+ function getEmbeddingEncoding() {
15
+ if (!cachedEncoding) {
16
+ // Use cl100k_base encoding which is used by text-embedding-3 models
17
+ // This is compatible with GPT-4 and text-embedding-3 models
18
+ cachedEncoding = (0, tiktoken_1.encoding_for_model)('gpt-4');
19
+ }
20
+ return cachedEncoding;
21
+ }
22
+ /**
23
+ * Count tokens in text accurately using tiktoken
24
+ */
25
+ function countTokens(text) {
26
+ if (!text || text.length === 0) {
27
+ return 0;
28
+ }
29
+ try {
30
+ const encoding = getEmbeddingEncoding();
31
+ return encoding.encode(text).length;
32
+ }
33
+ catch (error) {
34
+ // Fallback to approximation if tiktoken fails
35
+ // Rough approximation: 1 token ≈ 4 characters for English text
36
+ return Math.ceil(text.length / 4);
37
+ }
38
+ }
39
+ /**
40
+ * Estimate tokens (fallback method)
41
+ * Use this only if tiktoken is not available
42
+ */
43
+ function estimateTokens(text) {
44
+ if (!text || text.length === 0) {
45
+ return 0;
46
+ }
47
+ // Rough approximation: 1 token ≈ 4 characters for English text
48
+ return Math.ceil(text.length / 4);
49
+ }
50
+ //# sourceMappingURL=tokenizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":";;AAuBA,kCAaC;AAMD,wCAMC;AAhDD,iDAAiD;AACjD,4CAA4C;AAC5C,uCAA8C;AAE9C,wCAAwC;AACxC,IAAI,cAAc,GAAiD,IAAI,CAAC;AAExE;;;GAGG;AACH,SAAS,oBAAoB;IAC3B,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,oEAAoE;QACpE,4DAA4D;QAC5D,cAAc,GAAG,IAAA,6BAAkB,EAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAgB,WAAW,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,oBAAoB,EAAE,CAAC;QACxC,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,8CAA8C;QAC9C,+DAA+D;QAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IACD,+DAA+D;IAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { OpenAIEmbeddingProvider } from './openai';
2
+ export type { OpenAIEmbeddingConfig } from './openai';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/providers/embeddings/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,UAAU,CAAC;AACnD,YAAY,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC"}
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OpenAIEmbeddingProvider = void 0;
4
+ // Embedding providers export
5
+ var openai_1 = require("./openai");
6
+ Object.defineProperty(exports, "OpenAIEmbeddingProvider", { enumerable: true, get: function () { return openai_1.OpenAIEmbeddingProvider; } });
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/providers/embeddings/index.ts"],"names":[],"mappings":";;;AAAA,6BAA6B;AAC7B,mCAAmD;AAA1C,iHAAA,uBAAuB,OAAA"}
@@ -0,0 +1,21 @@
1
+ import type { EmbeddingProvider } from '../../types';
2
+ export interface OpenAIEmbeddingConfig {
3
+ apiKey: string;
4
+ model?: string;
5
+ dimension?: number;
6
+ maxRetries?: number;
7
+ }
8
+ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
9
+ private client;
10
+ private model;
11
+ private dimension;
12
+ private maxRetries;
13
+ constructor(config: OpenAIEmbeddingConfig);
14
+ embed(text: string): Promise<number[]>;
15
+ embedBatch(texts: string[]): Promise<number[][]>;
16
+ getDimension(): number;
17
+ getModelName(): string;
18
+ getTokenLimit(): number;
19
+ private normalizeText;
20
+ }
21
+ //# sourceMappingURL=openai.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../../../src/providers/embeddings/openai.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,UAAU,CAAS;gBAEf,MAAM,EAAE,qBAAqB;IAUnC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAkBtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAoCtD,YAAY,IAAI,MAAM;IAItB,YAAY,IAAI,MAAM;IAItB,aAAa,IAAI,MAAM;IAevB,OAAO,CAAC,aAAa;CAMtB"}
@@ -0,0 +1,86 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OpenAIEmbeddingProvider = void 0;
4
+ // OpenAI embedding provider
5
+ const openai_1 = require("openai");
6
+ const types_1 = require("../../types");
7
+ class OpenAIEmbeddingProvider {
8
+ constructor(config) {
9
+ this.client = new openai_1.OpenAI({
10
+ apiKey: config.apiKey,
11
+ maxRetries: config.maxRetries ?? 3,
12
+ });
13
+ this.model = config.model ?? 'text-embedding-3-small';
14
+ this.dimension = config.dimension ?? 1536;
15
+ this.maxRetries = config.maxRetries ?? 3;
16
+ }
17
+ async embed(text) {
18
+ try {
19
+ const normalized = this.normalizeText(text);
20
+ const response = await this.client.embeddings.create({
21
+ model: this.model,
22
+ input: normalized,
23
+ dimensions: this.dimension,
24
+ });
25
+ return response.data[0].embedding;
26
+ }
27
+ catch (error) {
28
+ throw new types_1.VectorEmbeddingError(`Failed to generate embedding: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
29
+ }
30
+ }
31
+ async embedBatch(texts) {
32
+ if (texts.length === 0) {
33
+ return [];
34
+ }
35
+ try {
36
+ // OpenAI supports up to 2048 inputs per request, but we'll be conservative
37
+ const batchSize = 100;
38
+ const batches = [];
39
+ for (let i = 0; i < texts.length; i += batchSize) {
40
+ batches.push(texts.slice(i, i + batchSize));
41
+ }
42
+ const results = [];
43
+ for (const batch of batches) {
44
+ const normalized = batch.map(t => this.normalizeText(t));
45
+ const response = await this.client.embeddings.create({
46
+ model: this.model,
47
+ input: normalized,
48
+ dimensions: this.dimension,
49
+ });
50
+ results.push(...response.data.map(d => d.embedding));
51
+ }
52
+ return results;
53
+ }
54
+ catch (error) {
55
+ throw new types_1.VectorEmbeddingError(`Failed to generate batch embeddings: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
56
+ }
57
+ }
58
+ getDimension() {
59
+ return this.dimension;
60
+ }
61
+ getModelName() {
62
+ return this.model;
63
+ }
64
+ getTokenLimit() {
65
+ // OpenAI embedding models have different token limits
66
+ // text-embedding-3-small and text-embedding-3-large: 8192 tokens
67
+ // text-embedding-ada-002: 8191 tokens
68
+ // Older models may have different limits, default to 8192
69
+ if (this.model.includes('text-embedding-3')) {
70
+ return 8192;
71
+ }
72
+ if (this.model.includes('text-embedding-ada-002')) {
73
+ return 8191;
74
+ }
75
+ // Default for other models
76
+ return 8192;
77
+ }
78
+ normalizeText(text) {
79
+ return text
80
+ .trim()
81
+ .replace(/\n{3,}/g, '\n\n') // Replace 3+ newlines with 2
82
+ .replace(/\s{2,}/g, ' '); // Replace multiple spaces with single space
83
+ }
84
+ }
85
+ exports.OpenAIEmbeddingProvider = OpenAIEmbeddingProvider;
86
+ //# sourceMappingURL=openai.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../../src/providers/embeddings/openai.ts"],"names":[],"mappings":";;;AAAA,4BAA4B;AAC5B,mCAAgC;AAEhC,uCAAmD;AASnD,MAAa,uBAAuB;IAMlC,YAAY,MAA6B;QACvC,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC;YACvB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,CAAC;SACnC,CAAC,CAAC;QACH,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,wBAAwB,CAAC;QACtD,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC1C,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;IAC3C,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;gBACnD,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,KAAK,EAAE,UAAU;gBACjB,UAAU,EAAE,IAAI,CAAC,SAAS;aAC3B,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACpC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,4BAAoB,CAC5B,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,EAC3F,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,2EAA2E;YAC3E,MAAM,SAAS,GAAG,GAAG,CAAC;YACtB,MAAM,OAAO,GAAe,EAAE,CAAC;YAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACjD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,OAAO,GAAe,EAAE,CAAC;YAE/B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;oBACnD,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,KAAK,EAAE,UAAU;oBACjB,UAAU,EAAE,IAAI,CAAC,SAAS;iBAC3B,CAAC,CAAC;gBAEH,OAAO,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,4BAAoB,CAC5B,wCAAwC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,EAClG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACJ,CAAC;IACH,CAAC;IAED,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY;QACV,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,aAAa;QACX,sDAAsD;QACtD,iEAAiE;QACjE,sCAAsC;QACtC,0DAA0D;QAC1D,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC;YAClD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,2BAA2B;QAC3B,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,aAAa,CAAC,IAAY;QAChC,OAAO,IAAI;aACR,IAAI,EAAE;aACN,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,6BAA6B;aACxD,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,4CAA4C;IAC1E,CAAC;CACF;AAnGD,0DAmGC"}
@@ -0,0 +1,3 @@
1
+ export * from './embeddings';
2
+ export * from './stores';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/index.ts"],"names":[],"mappings":"AACA,cAAc,cAAc,CAAC;AAC7B,cAAc,UAAU,CAAC"}
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ // Vector service providers
18
+ __exportStar(require("./embeddings"), exports);
19
+ __exportStar(require("./stores"), exports);
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,2BAA2B;AAC3B,+CAA6B;AAC7B,2CAAyB"}
@@ -0,0 +1,6 @@
1
+ export { QdrantVectorStoreProvider } from './qdrant';
2
+ export type { QdrantConfig } from './qdrant';
3
+ export { InMemoryVectorStoreProvider } from './memory';
4
+ export { SQLiteVectorStoreProvider } from './sqlite';
5
+ export type { SQLiteConfig } from './sqlite';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/providers/stores/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AACrD,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAC7C,OAAO,EAAE,2BAA2B,EAAE,MAAM,UAAU,CAAC;AACvD,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AACrD,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC"}
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SQLiteVectorStoreProvider = exports.InMemoryVectorStoreProvider = exports.QdrantVectorStoreProvider = void 0;
4
+ // Vector store providers export
5
+ var qdrant_1 = require("./qdrant");
6
+ Object.defineProperty(exports, "QdrantVectorStoreProvider", { enumerable: true, get: function () { return qdrant_1.QdrantVectorStoreProvider; } });
7
+ var memory_1 = require("./memory");
8
+ Object.defineProperty(exports, "InMemoryVectorStoreProvider", { enumerable: true, get: function () { return memory_1.InMemoryVectorStoreProvider; } });
9
+ var sqlite_1 = require("./sqlite");
10
+ Object.defineProperty(exports, "SQLiteVectorStoreProvider", { enumerable: true, get: function () { return sqlite_1.SQLiteVectorStoreProvider; } });
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/providers/stores/index.ts"],"names":[],"mappings":";;;AAAA,gCAAgC;AAChC,mCAAqD;AAA5C,mHAAA,yBAAyB,OAAA;AAElC,mCAAuD;AAA9C,qHAAA,2BAA2B,OAAA;AACpC,mCAAqD;AAA5C,mHAAA,yBAAyB,OAAA"}
@@ -0,0 +1,18 @@
1
+ import type { VectorStoreProvider, VectorPoint, SearchOptions, SearchResult, CollectionConfig } from '../../types';
2
+ export declare class InMemoryVectorStoreProvider implements VectorStoreProvider {
3
+ private collections;
4
+ private collectionConfigs;
5
+ ensureCollection(name: string, dimension: number, config?: CollectionConfig): Promise<void>;
6
+ upsert(collection: string, points: VectorPoint[]): Promise<void>;
7
+ search(collection: string, vector: number[], options?: SearchOptions): Promise<SearchResult[]>;
8
+ delete(collection: string, ids: string[]): Promise<void>;
9
+ deleteCollection(collection: string): Promise<void>;
10
+ getIdsByFilter(collection: string, filter: Record<string, any>, limit?: number): Promise<string[]>;
11
+ private calculateSimilarity;
12
+ private cosineSimilarity;
13
+ private euclideanDistance;
14
+ private dotProduct;
15
+ private matchesFilter;
16
+ private buildFilter;
17
+ }
18
+ //# sourceMappingURL=memory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"memory.d.ts","sourceRoot":"","sources":["../../../../src/providers/stores/memory.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAOnH,qBAAa,2BAA4B,YAAW,mBAAmB;IACrE,OAAO,CAAC,WAAW,CAA+C;IAClE,OAAO,CAAC,iBAAiB,CAA8D;IAEjF,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU3F,MAAM,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAchE,MAAM,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmDlG,MAAM,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAWxD,gBAAgB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKnD,cAAc,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,KAAK,GAAE,MAAc,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAqB/G,OAAO,CAAC,mBAAmB;IAiB3B,OAAO,CAAC,gBAAgB;IAOxB,OAAO,CAAC,iBAAiB;IAIzB,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,aAAa;IAgCrB,OAAO,CAAC,WAAW;CAYpB"}
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.InMemoryVectorStoreProvider = void 0;
4
+ const types_1 = require("../../types");
5
+ class InMemoryVectorStoreProvider {
6
+ constructor() {
7
+ this.collections = new Map();
8
+ this.collectionConfigs = new Map();
9
+ }
10
+ async ensureCollection(name, dimension, config) {
11
+ if (!this.collections.has(name)) {
12
+ this.collections.set(name, new Map());
13
+ this.collectionConfigs.set(name, {
14
+ dimension,
15
+ distance: config?.distance ?? 'cosine',
16
+ });
17
+ }
18
+ }
19
+ async upsert(collection, points) {
20
+ const store = this.collections.get(collection);
21
+ if (!store) {
22
+ throw new types_1.VectorStoreError(`Collection '${collection}' does not exist`);
23
+ }
24
+ for (const point of points) {
25
+ store.set(point.id, {
26
+ ...point,
27
+ text: point.payload?.text || '',
28
+ });
29
+ }
30
+ }
31
+ async search(collection, vector, options = {}) {
32
+ try {
33
+ const store = this.collections.get(collection);
34
+ if (!store) {
35
+ throw new types_1.VectorSearchError(`Collection '${collection}' does not exist`);
36
+ }
37
+ const config = this.collectionConfigs.get(collection);
38
+ if (!config) {
39
+ throw new types_1.VectorSearchError(`Collection config not found for '${collection}'`);
40
+ }
41
+ const limit = options.limit ?? 10;
42
+ const scoreThreshold = options.scoreThreshold ?? 0.0;
43
+ // Calculate similarity for all points
44
+ const results = [];
45
+ for (const [id, point] of store.entries()) {
46
+ // Apply filter if provided
47
+ if (options.filter && !this.matchesFilter(point.payload, options.filter)) {
48
+ continue;
49
+ }
50
+ const score = this.calculateSimilarity(vector, point.vector, config.distance);
51
+ if (score >= scoreThreshold) {
52
+ const payload = point.payload || {};
53
+ const system = payload.system || {};
54
+ results.push({
55
+ id,
56
+ text: point.text,
57
+ score,
58
+ createdAt: system.createdAt || Date.now(),
59
+ metadata: payload,
60
+ });
61
+ }
62
+ }
63
+ // Sort by score descending and limit
64
+ results.sort((a, b) => b.score - a.score);
65
+ return results.slice(0, limit);
66
+ }
67
+ catch (error) {
68
+ throw new types_1.VectorSearchError(`Failed to search in collection '${collection}': ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
69
+ }
70
+ }
71
+ async delete(collection, ids) {
72
+ const store = this.collections.get(collection);
73
+ if (!store) {
74
+ throw new types_1.VectorStoreError(`Collection '${collection}' does not exist`);
75
+ }
76
+ for (const id of ids) {
77
+ store.delete(id);
78
+ }
79
+ }
80
+ async deleteCollection(collection) {
81
+ this.collections.delete(collection);
82
+ this.collectionConfigs.delete(collection);
83
+ }
84
+ async getIdsByFilter(collection, filter, limit = 10000) {
85
+ const store = this.collections.get(collection);
86
+ if (!store) {
87
+ throw new types_1.VectorStoreError(`Collection '${collection}' does not exist`);
88
+ }
89
+ const matchingIds = [];
90
+ let count = 0;
91
+ for (const [id, point] of store.entries()) {
92
+ if (count >= limit)
93
+ break;
94
+ if (this.matchesFilter(point.payload, filter)) {
95
+ matchingIds.push(id);
96
+ count++;
97
+ }
98
+ }
99
+ return matchingIds;
100
+ }
101
+ calculateSimilarity(vec1, vec2, distance) {
102
+ if (vec1.length !== vec2.length) {
103
+ throw new Error('Vector dimensions do not match');
104
+ }
105
+ switch (distance) {
106
+ case 'cosine':
107
+ return this.cosineSimilarity(vec1, vec2);
108
+ case 'euclidean':
109
+ return 1 / (1 + this.euclideanDistance(vec1, vec2));
110
+ case 'dot':
111
+ return this.dotProduct(vec1, vec2);
112
+ default:
113
+ return this.cosineSimilarity(vec1, vec2);
114
+ }
115
+ }
116
+ cosineSimilarity(vec1, vec2) {
117
+ const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
118
+ const mag1 = Math.sqrt(vec1.reduce((sum, val) => sum + val * val, 0));
119
+ const mag2 = Math.sqrt(vec2.reduce((sum, val) => sum + val * val, 0));
120
+ return dotProduct / (mag1 * mag2);
121
+ }
122
+ euclideanDistance(vec1, vec2) {
123
+ return Math.sqrt(vec1.reduce((sum, val, i) => sum + Math.pow(val - vec2[i], 2), 0));
124
+ }
125
+ dotProduct(vec1, vec2) {
126
+ return vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
127
+ }
128
+ matchesFilter(payload, filter) {
129
+ if (!payload)
130
+ return false;
131
+ const _filter = this.buildFilter(filter);
132
+ for (const [key, value] of Object.entries(_filter)) {
133
+ const keys = key.split('.');
134
+ let current = payload;
135
+ for (const k of keys) {
136
+ if (current && typeof current === 'object' && k in current) {
137
+ current = current[k];
138
+ }
139
+ else {
140
+ return false;
141
+ }
142
+ }
143
+ if (Array.isArray(value)) {
144
+ if (value.length === 0) {
145
+ continue;
146
+ }
147
+ if (!value.includes(current)) {
148
+ return false;
149
+ }
150
+ }
151
+ else if (current !== value) {
152
+ return false;
153
+ }
154
+ }
155
+ return true;
156
+ }
157
+ buildFilter(filter) {
158
+ const _filter = {};
159
+ for (const [key, value] of Object.entries(filter)) {
160
+ if (key === 'text')
161
+ continue; // Skip text field as it's the main content
162
+ const parsedKey = key.includes('.') ? key : `metadata.${key}`;
163
+ _filter[parsedKey] = value;
164
+ }
165
+ return _filter;
166
+ }
167
+ }
168
+ exports.InMemoryVectorStoreProvider = InMemoryVectorStoreProvider;
169
+ //# sourceMappingURL=memory.js.map