@opensaas/stack-rag 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/CHANGELOG.md +10 -0
  3. package/CLAUDE.md +565 -0
  4. package/LICENSE +21 -0
  5. package/README.md +406 -0
  6. package/dist/config/index.d.ts +63 -0
  7. package/dist/config/index.d.ts.map +1 -0
  8. package/dist/config/index.js +94 -0
  9. package/dist/config/index.js.map +1 -0
  10. package/dist/config/plugin.d.ts +38 -0
  11. package/dist/config/plugin.d.ts.map +1 -0
  12. package/dist/config/plugin.js +215 -0
  13. package/dist/config/plugin.js.map +1 -0
  14. package/dist/config/plugin.test.d.ts +2 -0
  15. package/dist/config/plugin.test.d.ts.map +1 -0
  16. package/dist/config/plugin.test.js +554 -0
  17. package/dist/config/plugin.test.js.map +1 -0
  18. package/dist/config/types.d.ts +249 -0
  19. package/dist/config/types.d.ts.map +1 -0
  20. package/dist/config/types.js +5 -0
  21. package/dist/config/types.js.map +1 -0
  22. package/dist/fields/embedding.d.ts +85 -0
  23. package/dist/fields/embedding.d.ts.map +1 -0
  24. package/dist/fields/embedding.js +81 -0
  25. package/dist/fields/embedding.js.map +1 -0
  26. package/dist/fields/embedding.test.d.ts +2 -0
  27. package/dist/fields/embedding.test.d.ts.map +1 -0
  28. package/dist/fields/embedding.test.js +323 -0
  29. package/dist/fields/embedding.test.js.map +1 -0
  30. package/dist/fields/index.d.ts +6 -0
  31. package/dist/fields/index.d.ts.map +1 -0
  32. package/dist/fields/index.js +5 -0
  33. package/dist/fields/index.js.map +1 -0
  34. package/dist/index.d.ts +8 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +9 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/mcp/index.d.ts +19 -0
  39. package/dist/mcp/index.d.ts.map +1 -0
  40. package/dist/mcp/index.js +18 -0
  41. package/dist/mcp/index.js.map +1 -0
  42. package/dist/providers/index.d.ts +38 -0
  43. package/dist/providers/index.d.ts.map +1 -0
  44. package/dist/providers/index.js +68 -0
  45. package/dist/providers/index.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +49 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +151 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openai.d.ts +41 -0
  51. package/dist/providers/openai.d.ts.map +1 -0
  52. package/dist/providers/openai.js +126 -0
  53. package/dist/providers/openai.js.map +1 -0
  54. package/dist/providers/providers.test.d.ts +2 -0
  55. package/dist/providers/providers.test.d.ts.map +1 -0
  56. package/dist/providers/providers.test.js +224 -0
  57. package/dist/providers/providers.test.js.map +1 -0
  58. package/dist/providers/types.d.ts +88 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/runtime/batch.d.ts +183 -0
  63. package/dist/runtime/batch.d.ts.map +1 -0
  64. package/dist/runtime/batch.js +240 -0
  65. package/dist/runtime/batch.js.map +1 -0
  66. package/dist/runtime/batch.test.d.ts +2 -0
  67. package/dist/runtime/batch.test.d.ts.map +1 -0
  68. package/dist/runtime/batch.test.js +251 -0
  69. package/dist/runtime/batch.test.js.map +1 -0
  70. package/dist/runtime/chunking.d.ts +42 -0
  71. package/dist/runtime/chunking.d.ts.map +1 -0
  72. package/dist/runtime/chunking.js +264 -0
  73. package/dist/runtime/chunking.js.map +1 -0
  74. package/dist/runtime/chunking.test.d.ts +2 -0
  75. package/dist/runtime/chunking.test.d.ts.map +1 -0
  76. package/dist/runtime/chunking.test.js +212 -0
  77. package/dist/runtime/chunking.test.js.map +1 -0
  78. package/dist/runtime/embeddings.d.ts +147 -0
  79. package/dist/runtime/embeddings.d.ts.map +1 -0
  80. package/dist/runtime/embeddings.js +201 -0
  81. package/dist/runtime/embeddings.js.map +1 -0
  82. package/dist/runtime/embeddings.test.d.ts +2 -0
  83. package/dist/runtime/embeddings.test.d.ts.map +1 -0
  84. package/dist/runtime/embeddings.test.js +366 -0
  85. package/dist/runtime/embeddings.test.js.map +1 -0
  86. package/dist/runtime/index.d.ts +14 -0
  87. package/dist/runtime/index.d.ts.map +1 -0
  88. package/dist/runtime/index.js +18 -0
  89. package/dist/runtime/index.js.map +1 -0
  90. package/dist/runtime/search.d.ts +135 -0
  91. package/dist/runtime/search.d.ts.map +1 -0
  92. package/dist/runtime/search.js +101 -0
  93. package/dist/runtime/search.js.map +1 -0
  94. package/dist/storage/index.d.ts +41 -0
  95. package/dist/storage/index.d.ts.map +1 -0
  96. package/dist/storage/index.js +73 -0
  97. package/dist/storage/index.js.map +1 -0
  98. package/dist/storage/json.d.ts +34 -0
  99. package/dist/storage/json.d.ts.map +1 -0
  100. package/dist/storage/json.js +82 -0
  101. package/dist/storage/json.js.map +1 -0
  102. package/dist/storage/pgvector.d.ts +53 -0
  103. package/dist/storage/pgvector.d.ts.map +1 -0
  104. package/dist/storage/pgvector.js +168 -0
  105. package/dist/storage/pgvector.js.map +1 -0
  106. package/dist/storage/sqlite-vss.d.ts +49 -0
  107. package/dist/storage/sqlite-vss.d.ts.map +1 -0
  108. package/dist/storage/sqlite-vss.js +148 -0
  109. package/dist/storage/sqlite-vss.js.map +1 -0
  110. package/dist/storage/storage.test.d.ts +2 -0
  111. package/dist/storage/storage.test.d.ts.map +1 -0
  112. package/dist/storage/storage.test.js +440 -0
  113. package/dist/storage/storage.test.js.map +1 -0
  114. package/dist/storage/types.d.ts +79 -0
  115. package/dist/storage/types.d.ts.map +1 -0
  116. package/dist/storage/types.js +49 -0
  117. package/dist/storage/types.js.map +1 -0
  118. package/package.json +82 -0
  119. package/src/config/index.ts +116 -0
  120. package/src/config/plugin.test.ts +664 -0
  121. package/src/config/plugin.ts +257 -0
  122. package/src/config/types.ts +283 -0
  123. package/src/fields/embedding.test.ts +408 -0
  124. package/src/fields/embedding.ts +150 -0
  125. package/src/fields/index.ts +6 -0
  126. package/src/index.ts +33 -0
  127. package/src/mcp/index.ts +21 -0
  128. package/src/providers/index.ts +81 -0
  129. package/src/providers/ollama.ts +186 -0
  130. package/src/providers/openai.ts +161 -0
  131. package/src/providers/providers.test.ts +275 -0
  132. package/src/providers/types.ts +100 -0
  133. package/src/runtime/batch.test.ts +332 -0
  134. package/src/runtime/batch.ts +424 -0
  135. package/src/runtime/chunking.test.ts +258 -0
  136. package/src/runtime/chunking.ts +334 -0
  137. package/src/runtime/embeddings.test.ts +441 -0
  138. package/src/runtime/embeddings.ts +380 -0
  139. package/src/runtime/index.ts +51 -0
  140. package/src/runtime/search.ts +243 -0
  141. package/src/storage/index.ts +86 -0
  142. package/src/storage/json.ts +106 -0
  143. package/src/storage/pgvector.ts +206 -0
  144. package/src/storage/sqlite-vss.ts +193 -0
  145. package/src/storage/storage.test.ts +521 -0
  146. package/src/storage/types.ts +126 -0
  147. package/tsconfig.json +13 -0
  148. package/tsconfig.tsbuildinfo +1 -0
  149. package/vitest.config.ts +18 -0
@@ -0,0 +1,212 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { chunkText, estimateTokenCount, mergeSmallChunks } from './chunking.js';
3
+ describe('chunkText', () => {
4
+ describe('recursive strategy', () => {
5
+ it('should chunk text at paragraph boundaries', () => {
6
+ const text = 'Paragraph 1\n\nParagraph 2\n\nParagraph 3';
7
+ const chunks = chunkText(text, {
8
+ strategy: 'recursive',
9
+ chunkSize: 15,
10
+ chunkOverlap: 0,
11
+ });
12
+ expect(chunks).toHaveLength(3);
13
+ expect(chunks[0].text).toBe('Paragraph 1\n\n');
14
+ expect(chunks[1].text).toBe('Paragraph 2\n\n');
15
+ expect(chunks[2].text).toBe('Paragraph 3');
16
+ });
17
+ it('should handle overlap between chunks', () => {
18
+ const text = 'First chunk here. Second chunk here. Third chunk here.';
19
+ const chunks = chunkText(text, {
20
+ strategy: 'recursive',
21
+ chunkSize: 20,
22
+ chunkOverlap: 5,
23
+ });
24
+ expect(chunks.length).toBeGreaterThan(1);
25
+ // Check that chunks have some overlap
26
+ for (let i = 1; i < chunks.length; i++) {
27
+ const prevChunk = chunks[i - 1].text;
28
+ const currChunk = chunks[i].text;
29
+ const overlap = prevChunk.slice(-5);
30
+ // Overlap might not be exact due to sentence boundaries
31
+ expect(currChunk).toContain(overlap.trim().split(' ')[0]);
32
+ }
33
+ });
34
+ it('should respect chunk size limits', () => {
35
+ const text = 'A'.repeat(1000);
36
+ const chunks = chunkText(text, {
37
+ strategy: 'recursive',
38
+ chunkSize: 100,
39
+ chunkOverlap: 0,
40
+ });
41
+ for (const chunk of chunks) {
42
+ expect(chunk.text.length).toBeLessThanOrEqual(100);
43
+ }
44
+ });
45
+ it('should track chunk positions correctly', () => {
46
+ const text = 'Start. Middle. End.';
47
+ const chunks = chunkText(text, {
48
+ strategy: 'recursive',
49
+ chunkSize: 100,
50
+ chunkOverlap: 0,
51
+ });
52
+ expect(chunks[0].start).toBe(0);
53
+ expect(chunks[0].end).toBe(text.length);
54
+ expect(chunks[0].text).toBe(text);
55
+ });
56
+ });
57
+ describe('sentence strategy', () => {
58
+ it('should preserve sentence boundaries', () => {
59
+ const text = 'First sentence. Second sentence. Third sentence.';
60
+ const chunks = chunkText(text, {
61
+ strategy: 'sentence',
62
+ chunkSize: 20,
63
+ chunkOverlap: 0,
64
+ });
65
+ expect(chunks.length).toBeGreaterThan(1);
66
+ // Each chunk should end with sentence punctuation
67
+ for (const chunk of chunks) {
68
+ expect(chunk.text.trim()).toMatch(/[.!?]$/);
69
+ }
70
+ });
71
+ it('should handle text with no sentences', () => {
72
+ const text = 'No sentence markers here';
73
+ const chunks = chunkText(text, {
74
+ strategy: 'sentence',
75
+ chunkSize: 10,
76
+ chunkOverlap: 0,
77
+ });
78
+ expect(chunks).toHaveLength(1);
79
+ expect(chunks[0].text).toBe(text);
80
+ });
81
+ });
82
+ describe('sliding-window strategy', () => {
83
+ it('should create overlapping fixed-size chunks', () => {
84
+ const text = 'A'.repeat(100);
85
+ const chunks = chunkText(text, {
86
+ strategy: 'sliding-window',
87
+ chunkSize: 30,
88
+ chunkOverlap: 10,
89
+ });
90
+ expect(chunks.length).toBeGreaterThan(1);
91
+ // Check fixed size (except possibly last chunk)
92
+ for (let i = 0; i < chunks.length - 1; i++) {
93
+ expect(chunks[i].text.length).toBe(30);
94
+ }
95
+ // Check overlap
96
+ for (let i = 1; i < chunks.length; i++) {
97
+ const prevChunk = chunks[i - 1];
98
+ const currChunk = chunks[i];
99
+ expect(currChunk.start).toBe(prevChunk.start + 20); // step = chunkSize - overlap
100
+ }
101
+ });
102
+ it('should skip empty chunks', () => {
103
+ const text = ' '; // Just whitespace
104
+ const chunks = chunkText(text, {
105
+ strategy: 'sliding-window',
106
+ chunkSize: 10,
107
+ chunkOverlap: 0,
108
+ });
109
+ expect(chunks).toHaveLength(0);
110
+ });
111
+ });
112
+ describe('token-aware strategy', () => {
113
+ it('should estimate token limits', () => {
114
+ const text = 'A'.repeat(400); // ~100 tokens at 4 chars/token
115
+ const chunks = chunkText(text, {
116
+ strategy: 'token-aware',
117
+ tokenLimit: 50,
118
+ chunkOverlap: 0,
119
+ });
120
+ expect(chunks.length).toBeGreaterThanOrEqual(2);
121
+ // Each chunk should be roughly under token limit * 4 chars
122
+ for (const chunk of chunks) {
123
+ expect(chunk.text.length).toBeLessThanOrEqual(50 * 4);
124
+ }
125
+ });
126
+ });
127
+ describe('edge cases', () => {
128
+ it('should handle empty text', () => {
129
+ const chunks = chunkText('', { chunkSize: 100 });
130
+ expect(chunks).toHaveLength(0);
131
+ });
132
+ it('should handle text smaller than chunk size', () => {
133
+ const text = 'Small text';
134
+ const chunks = chunkText(text, { chunkSize: 1000 });
135
+ expect(chunks).toHaveLength(1);
136
+ expect(chunks[0].text).toBe(text);
137
+ expect(chunks[0].start).toBe(0);
138
+ expect(chunks[0].end).toBe(text.length);
139
+ });
140
+ it('should throw error if overlap >= chunk size', () => {
141
+ expect(() => {
142
+ chunkText('text', { chunkSize: 10, chunkOverlap: 10 });
143
+ }).toThrow('chunkOverlap must be less than chunkSize');
144
+ });
145
+ it('should assign correct chunk indexes', () => {
146
+ const text = 'A'.repeat(300);
147
+ const chunks = chunkText(text, { chunkSize: 100, chunkOverlap: 0 });
148
+ chunks.forEach((chunk, i) => {
149
+ expect(chunk.index).toBe(i);
150
+ });
151
+ });
152
+ });
153
+ });
154
+ describe('estimateTokenCount', () => {
155
+ it('should estimate token count', () => {
156
+ const text = 'Hello world';
157
+ const count = estimateTokenCount(text);
158
+ // "Hello world" is 11 chars / 4 = ~3 tokens
159
+ expect(count).toBe(3);
160
+ });
161
+ it('should handle empty text', () => {
162
+ expect(estimateTokenCount('')).toBe(0);
163
+ });
164
+ it('should handle long text', () => {
165
+ const text = 'A'.repeat(1000);
166
+ const count = estimateTokenCount(text);
167
+ expect(count).toBe(250); // 1000 / 4
168
+ });
169
+ });
170
+ describe('mergeSmallChunks', () => {
171
+ it('should merge chunks below minimum size', () => {
172
+ const chunks = [
173
+ { text: 'A', start: 0, end: 1, index: 0 },
174
+ { text: 'B', start: 1, end: 2, index: 1 },
175
+ { text: 'C', start: 2, end: 3, index: 2 },
176
+ ];
177
+ const merged = mergeSmallChunks(chunks, 2);
178
+ expect(merged.length).toBeLessThan(chunks.length);
179
+ expect(merged[0].text.length).toBeGreaterThanOrEqual(2);
180
+ });
181
+ it('should not merge chunks already above minimum size', () => {
182
+ const chunks = [
183
+ { text: 'AAA', start: 0, end: 3, index: 0 },
184
+ { text: 'BBB', start: 3, end: 6, index: 1 },
185
+ ];
186
+ const merged = mergeSmallChunks(chunks, 2);
187
+ expect(merged).toHaveLength(2);
188
+ });
189
+ it('should handle empty array', () => {
190
+ const merged = mergeSmallChunks([], 10);
191
+ expect(merged).toHaveLength(0);
192
+ });
193
+ it('should update chunk indexes after merge', () => {
194
+ const chunks = [
195
+ { text: 'A', start: 0, end: 1, index: 0 },
196
+ { text: 'B', start: 1, end: 2, index: 1 },
197
+ ];
198
+ const merged = mergeSmallChunks(chunks, 5);
199
+ merged.forEach((chunk, i) => {
200
+ expect(chunk.index).toBe(i);
201
+ });
202
+ });
203
+ it('should merge metadata from merged chunks', () => {
204
+ const chunks = [
205
+ { text: 'A', start: 0, end: 1, index: 0, metadata: { foo: 1 } },
206
+ { text: 'B', start: 1, end: 2, index: 1, metadata: { bar: 2 } },
207
+ ];
208
+ const merged = mergeSmallChunks(chunks, 5);
209
+ expect(merged[0].metadata).toEqual({ foo: 1, bar: 2 });
210
+ });
211
+ });
212
+ //# sourceMappingURL=chunking.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.test.js","sourceRoot":"","sources":["../../src/runtime/chunking.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAE/E,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IACzB,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,IAAI,GAAG,2CAA2C,CAAA;YACxD,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,WAAW;gBACrB,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;YAC9C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;YAC9C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;QAC5C,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,MAAM,IAAI,GAAG,wDAAwD,CAAA;YACrE,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,WAAW;gBACrB,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YAExC,sCAAsC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAA;gBACpC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;gBAChC,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;gBACnC,wDAAwD;gBACxD,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YAC3D,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,WAAW;gBACrB,SAAS,EAAE,GAAG;gBACd,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAA;YACpD,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,IAAI,GAAG,qBAAqB,CAAA;YAClC,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,WAAW;gBACrB,SAAS,EAAE,GAAG;gBACd,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACvC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,IAAI,GAAG,kDAAkD,CAAA;YAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,UAAU;gBACpB,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YAExC,kDAAkD;YAClD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAA;YAC7C,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,MAAM,IAAI,GAAG,0BAA0B,CAAA;YACvC,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,UAAU;gBACpB,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACvC,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC5B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,gBAAgB;gBAC1B,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,EAAE;aACjB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YAExC,gDAAgD;YAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YACxC,CAAC;YAED,gBAAgB;YAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC/B,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;gBAC3B,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA,CAAC,6BAA6B;YAClF,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;YAClC,MAAM,IAAI,GAAG,KAAK,CAAA,CAAC,kBAAkB;YACrC,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,gBAAgB;gBAC1B,SAAS,EAAE,EAAE;gBACb,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACtC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA,CAAC,+BAA+B;YAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE;gBAC7B,QAAQ,EAAE,aAAa;gBACvB,UAAU,EAAE,EAAE;gBACd,YAAY,EAAE,CAAC;aAChB,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;YAE/C,2DAA2D;YAC3D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,EAAE,GAAG,CAAC,CAAC,CAAA;YACvD,CAAC;QACH,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;YAClC,MAAM,MAAM,GAAG,SAAS,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAA;YAChD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,IAAI,GAAG,YAAY,CAAA;YACzB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAEnD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,CAAC,GAAG,EAAE;gBACV,SAAS,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC,CAAA;YACxD,CAAC,CAAC,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC5B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC,CAAA;YAEnE,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;gBAC1B,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC,CAAC,CAAA;QACJ,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,IAAI,GAAG,aAAa,CAAA;QAC1B,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAEtC,4CAA4C;QAC5C,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvB,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,CAAC,kBAAkB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QAC7B,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAEtC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA,CAAC,WAAW;IACrC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;YACzC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;YACzC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;SAC1C,CAAA;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;QAE1C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;IACzD,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;YAC3C,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;SAC5C,CAAA;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;QAE1C,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,MAAM,GAAG,gBAAgB,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;YACzC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;SAC1C,CAAA;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;QAE1C,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;YAC1B,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,MAAM,GAAG;YACb,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;YAC/D,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;SAChE,CAAA;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;QAE1C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;IACxD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,147 @@
1
+ /**
2
+ * High-level embedding generation utilities
3
+ */
4
+ import type { EmbeddingProvider } from '../providers/types.js';
5
+ import type { StoredEmbedding } from '../config/types.js';
6
+ import { type ChunkingOptions, type TextChunk } from './chunking.js';
7
+ export interface GenerateEmbeddingOptions {
8
+ /**
9
+ * Embedding provider to use
10
+ */
11
+ provider: EmbeddingProvider;
12
+ /**
13
+ * Text to embed
14
+ */
15
+ text: string;
16
+ /**
17
+ * Whether to enable text chunking for long documents
18
+ * @default false
19
+ */
20
+ enableChunking?: boolean;
21
+ /**
22
+ * Chunking configuration (only used if enableChunking is true)
23
+ */
24
+ chunking?: ChunkingOptions;
25
+ /**
26
+ * Whether to include source hash in metadata for change detection
27
+ * @default true
28
+ */
29
+ includeSourceHash?: boolean;
30
+ /**
31
+ * Additional metadata to include
32
+ */
33
+ metadata?: Record<string, unknown>;
34
+ }
35
+ export interface ChunkedEmbedding {
36
+ /**
37
+ * The chunk information
38
+ */
39
+ chunk: TextChunk;
40
+ /**
41
+ * The stored embedding for this chunk
42
+ */
43
+ embedding: StoredEmbedding;
44
+ }
45
+ /**
46
+ * Generate embedding for text with automatic chunking support
47
+ *
48
+ * For single embeddings (no chunking), returns a StoredEmbedding.
49
+ * For chunked text, returns an array of ChunkedEmbeddings.
50
+ *
51
+ * @example
52
+ * ```typescript
53
+ * // Simple embedding
54
+ * const embedding = await generateEmbedding({
55
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
56
+ * text: 'Hello world',
57
+ * })
58
+ *
59
+ * // Chunked embedding for long text
60
+ * const chunks = await generateEmbedding({
61
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
62
+ * text: longDocument,
63
+ * enableChunking: true,
64
+ * chunking: { chunkSize: 1000, chunkOverlap: 200 },
65
+ * })
66
+ * ```
67
+ */
68
+ export declare function generateEmbedding(options: GenerateEmbeddingOptions & {
69
+ enableChunking: true;
70
+ }): Promise<ChunkedEmbedding[]>;
71
+ export declare function generateEmbedding(options: GenerateEmbeddingOptions & {
72
+ enableChunking?: false;
73
+ }): Promise<StoredEmbedding>;
74
+ export declare function generateEmbedding(options: GenerateEmbeddingOptions): Promise<StoredEmbedding | ChunkedEmbedding[]>;
75
+ export interface GenerateEmbeddingsOptions {
76
+ /**
77
+ * Embedding provider to use
78
+ */
79
+ provider: EmbeddingProvider;
80
+ /**
81
+ * Array of texts to embed
82
+ */
83
+ texts: string[];
84
+ /**
85
+ * Whether to include source hash in metadata for change detection
86
+ * @default true
87
+ */
88
+ includeSourceHash?: boolean;
89
+ /**
90
+ * Additional metadata to include for all embeddings
91
+ */
92
+ metadata?: Record<string, unknown>;
93
+ /**
94
+ * Batch size for embedding generation
95
+ * @default 10
96
+ */
97
+ batchSize?: number;
98
+ }
99
+ /**
100
+ * Generate embeddings for multiple texts in batches
101
+ *
102
+ * More efficient than calling generateEmbedding() multiple times.
103
+ * Automatically batches requests to respect API limits.
104
+ *
105
+ * @example
106
+ * ```typescript
107
+ * const embeddings = await generateEmbeddings({
108
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
109
+ * texts: ['text 1', 'text 2', 'text 3'],
110
+ * batchSize: 10,
111
+ * })
112
+ * ```
113
+ */
114
+ export declare function generateEmbeddings(options: GenerateEmbeddingsOptions): Promise<StoredEmbedding[]>;
115
+ /**
116
+ * Check if an embedding needs regeneration based on source text changes
117
+ *
118
+ * @param sourceText - Current source text
119
+ * @param currentEmbedding - Existing embedding (if any)
120
+ * @returns true if embedding needs regeneration
121
+ */
122
+ export declare function shouldRegenerateEmbedding(sourceText: string, currentEmbedding: StoredEmbedding | null | undefined): boolean;
123
+ /**
124
+ * Hash text for change detection
125
+ * Uses SHA-256 for consistent hashing
126
+ */
127
+ export declare function hashText(text: string): string;
128
+ /**
129
+ * Validate that embedding dimensions match expected dimensions
130
+ *
131
+ * @param embedding - The embedding to validate
132
+ * @param expectedDimensions - Expected number of dimensions
133
+ * @throws Error if dimensions don't match
134
+ */
135
+ export declare function validateEmbeddingDimensions(embedding: StoredEmbedding, expectedDimensions: number): void;
136
+ /**
137
+ * Merge multiple embeddings into a single embedding
138
+ * Uses average pooling by default
139
+ *
140
+ * Useful for combining chunk embeddings into a single document embedding.
141
+ *
142
+ * @param embeddings - Array of embeddings to merge
143
+ * @param method - Merge method ('average' or 'max')
144
+ * @returns Merged embedding
145
+ */
146
+ export declare function mergeEmbeddings(embeddings: StoredEmbedding[], method?: 'average' | 'max'): StoredEmbedding;
147
+ //# sourceMappingURL=embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/runtime/embeddings.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAC9D,OAAO,KAAK,EAAE,eAAe,EAAqB,MAAM,oBAAoB,CAAA;AAC5E,OAAO,EAAa,KAAK,eAAe,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAA;AAG/E,MAAM,WAAW,wBAAwB;IACvC;;OAEG;IACH,QAAQ,EAAE,iBAAiB,CAAA;IAE3B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAA;IAEZ;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB;;OAEG;IACH,QAAQ,CAAC,EAAE,eAAe,CAAA;IAE1B;;;OAGG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAE3B;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,KAAK,EAAE,SAAS,CAAA;IAEhB;;OAEG;IACH,SAAS,EAAE,eAAe,CAAA;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,wBAAwB,GAAG;IAAE,cAAc,EAAE,IAAI,CAAA;CAAE,GAC3D,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAA;AAE9B,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,wBAAwB,GAAG;IAAE,cAAc,CAAC,EAAE,KAAK,CAAA;CAAE,GAC7D,OAAO,CAAC,eAAe,CAAC,CAAA;AAE3B,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC,CAAA;AAkEhD,MAAM,WAAW,yBAAyB;IACxC;;OAEG;IACH,QAAQ,EAAE,iBAAiB,CAAA;IAE3B;;OAEG;IACH,KAAK,EAAE,MAAM,EAAE,CAAA;IAEf;;;OAGG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAE3B;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAElC;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,eAAe,EAAE,CAAC,CA2C5B;AAED;;;;;;GAMG;AACH,wBAAgB,yBAAyB,CACvC,UAAU,EAAE,MAAM,EAClB,gBAAgB,EAAE,eAAe,GAAG,IAAI,GAAG,SAAS,GACnD,OAAO,CAcT;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE7C;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CACzC,SAAS,EAAE,eAAe,EAC1B,kBAAkB,EAAE,MAAM,GACzB,IAAI,CAgBN;AAED;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC7B,UAAU,EAAE,eAAe,EAAE,EAC7B,MAAM,GAAE,SAAS,GAAG,KAAiB,GACpC,eAAe,CAyDjB"}
@@ -0,0 +1,201 @@
1
+ /**
2
+ * High-level embedding generation utilities
3
+ */
4
+ import { chunkText } from './chunking.js';
5
+ import { createHash } from 'node:crypto';
6
+ // Implementation
7
+ // eslint-disable-next-line no-redeclare
8
+ export async function generateEmbedding(options) {
9
+ const { provider, text, enableChunking = false, chunking, includeSourceHash = true, metadata: additionalMetadata, } = options;
10
+ const sourceHash = includeSourceHash ? hashText(text) : undefined;
11
+ // Generate base metadata
12
+ const baseMetadata = {
13
+ model: provider.model,
14
+ provider: provider.type,
15
+ dimensions: provider.dimensions,
16
+ generatedAt: new Date().toISOString(),
17
+ sourceHash,
18
+ };
19
+ // Without chunking, generate single embedding
20
+ if (!enableChunking) {
21
+ const vector = await provider.embed(text);
22
+ return {
23
+ vector,
24
+ metadata: {
25
+ ...baseMetadata,
26
+ ...additionalMetadata,
27
+ },
28
+ };
29
+ }
30
+ // With chunking, split text and generate embeddings for each chunk
31
+ const chunks = chunkText(text, chunking);
32
+ // Extract chunk texts
33
+ const chunkTexts = chunks.map((c) => c.text);
34
+ // Generate embeddings for all chunks in batch
35
+ const vectors = await provider.embedBatch(chunkTexts);
36
+ // Combine chunks with their embeddings
37
+ const chunkedEmbeddings = chunks.map((chunk, index) => ({
38
+ chunk,
39
+ embedding: {
40
+ vector: vectors[index],
41
+ metadata: {
42
+ ...baseMetadata,
43
+ ...additionalMetadata,
44
+ chunkIndex: index,
45
+ chunkStart: chunk.start,
46
+ chunkEnd: chunk.end,
47
+ },
48
+ },
49
+ }));
50
+ return chunkedEmbeddings;
51
+ }
52
+ /**
53
+ * Generate embeddings for multiple texts in batches
54
+ *
55
+ * More efficient than calling generateEmbedding() multiple times.
56
+ * Automatically batches requests to respect API limits.
57
+ *
58
+ * @example
59
+ * ```typescript
60
+ * const embeddings = await generateEmbeddings({
61
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
62
+ * texts: ['text 1', 'text 2', 'text 3'],
63
+ * batchSize: 10,
64
+ * })
65
+ * ```
66
+ */
67
+ export async function generateEmbeddings(options) {
68
+ const { provider, texts, includeSourceHash = true, metadata: additionalMetadata, batchSize = 10, } = options;
69
+ const baseMetadata = {
70
+ model: provider.model,
71
+ provider: provider.type,
72
+ dimensions: provider.dimensions,
73
+ generatedAt: new Date().toISOString(),
74
+ };
75
+ const embeddings = [];
76
+ // Process in batches
77
+ for (let i = 0; i < texts.length; i += batchSize) {
78
+ const batch = texts.slice(i, i + batchSize);
79
+ // Generate embeddings for batch
80
+ const vectors = await provider.embedBatch(batch);
81
+ // Create StoredEmbedding objects
82
+ for (let j = 0; j < batch.length; j++) {
83
+ const text = batch[j];
84
+ const vector = vectors[j];
85
+ const sourceHash = includeSourceHash ? hashText(text) : undefined;
86
+ embeddings.push({
87
+ vector,
88
+ metadata: {
89
+ ...baseMetadata,
90
+ sourceHash,
91
+ ...additionalMetadata,
92
+ },
93
+ });
94
+ }
95
+ }
96
+ return embeddings;
97
+ }
98
+ /**
99
+ * Check if an embedding needs regeneration based on source text changes
100
+ *
101
+ * @param sourceText - Current source text
102
+ * @param currentEmbedding - Existing embedding (if any)
103
+ * @returns true if embedding needs regeneration
104
+ */
105
+ export function shouldRegenerateEmbedding(sourceText, currentEmbedding) {
106
+ // No existing embedding, needs generation
107
+ if (!currentEmbedding) {
108
+ return true;
109
+ }
110
+ // No source hash in metadata, can't detect changes
111
+ if (!currentEmbedding.metadata.sourceHash) {
112
+ return false; // Conservative: don't regenerate if we can't tell
113
+ }
114
+ // Compare source hash
115
+ const currentHash = hashText(sourceText);
116
+ return currentHash !== currentEmbedding.metadata.sourceHash;
117
+ }
118
+ /**
119
+ * Hash text for change detection
120
+ * Uses SHA-256 for consistent hashing
121
+ */
122
+ export function hashText(text) {
123
+ return createHash('sha256').update(text).digest('hex');
124
+ }
125
+ /**
126
+ * Validate that embedding dimensions match expected dimensions
127
+ *
128
+ * @param embedding - The embedding to validate
129
+ * @param expectedDimensions - Expected number of dimensions
130
+ * @throws Error if dimensions don't match
131
+ */
132
+ export function validateEmbeddingDimensions(embedding, expectedDimensions) {
133
+ const actualDimensions = embedding.vector.length;
134
+ if (actualDimensions !== expectedDimensions) {
135
+ throw new Error(`Embedding dimension mismatch: expected ${expectedDimensions}, got ${actualDimensions}. ` +
136
+ `Provider: ${embedding.metadata.provider}, Model: ${embedding.metadata.model}`);
137
+ }
138
+ if (embedding.metadata.dimensions !== actualDimensions) {
139
+ throw new Error(`Embedding metadata dimension mismatch: metadata says ${embedding.metadata.dimensions}, ` +
140
+ `but vector has ${actualDimensions} dimensions`);
141
+ }
142
+ }
143
+ /**
144
+ * Merge multiple embeddings into a single embedding
145
+ * Uses average pooling by default
146
+ *
147
+ * Useful for combining chunk embeddings into a single document embedding.
148
+ *
149
+ * @param embeddings - Array of embeddings to merge
150
+ * @param method - Merge method ('average' or 'max')
151
+ * @returns Merged embedding
152
+ */
153
+ export function mergeEmbeddings(embeddings, method = 'average') {
154
+ if (embeddings.length === 0) {
155
+ throw new Error('Cannot merge empty array of embeddings');
156
+ }
157
+ if (embeddings.length === 1) {
158
+ return embeddings[0];
159
+ }
160
+ // Validate all embeddings have same dimensions
161
+ const dimensions = embeddings[0].vector.length;
162
+ for (const emb of embeddings) {
163
+ if (emb.vector.length !== dimensions) {
164
+ throw new Error(`Cannot merge embeddings with different dimensions: ${dimensions} vs ${emb.vector.length}`);
165
+ }
166
+ }
167
+ let mergedVector;
168
+ if (method === 'average') {
169
+ // Average pooling
170
+ mergedVector = new Array(dimensions).fill(0);
171
+ for (const emb of embeddings) {
172
+ for (let i = 0; i < dimensions; i++) {
173
+ mergedVector[i] += emb.vector[i];
174
+ }
175
+ }
176
+ for (let i = 0; i < dimensions; i++) {
177
+ mergedVector[i] /= embeddings.length;
178
+ }
179
+ }
180
+ else {
181
+ // Max pooling
182
+ mergedVector = new Array(dimensions).fill(-Infinity);
183
+ for (const emb of embeddings) {
184
+ for (let i = 0; i < dimensions; i++) {
185
+ mergedVector[i] = Math.max(mergedVector[i], emb.vector[i]);
186
+ }
187
+ }
188
+ }
189
+ // Merge metadata (use first embedding's metadata)
190
+ const firstMetadata = embeddings[0].metadata;
191
+ return {
192
+ vector: mergedVector,
193
+ metadata: {
194
+ ...firstMetadata,
195
+ generatedAt: new Date().toISOString(),
196
+ mergedFrom: embeddings.length,
197
+ mergeMethod: method,
198
+ },
199
+ };
200
+ }
201
+ //# sourceMappingURL=embeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/runtime/embeddings.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,SAAS,EAAwC,MAAM,eAAe,CAAA;AAC/E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAoFxC,iBAAiB;AACjB,wCAAwC;AACxC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,OAAiC;IAEjC,MAAM,EACJ,QAAQ,EACR,IAAI,EACJ,cAAc,GAAG,KAAK,EACtB,QAAQ,EACR,iBAAiB,GAAG,IAAI,EACxB,QAAQ,EAAE,kBAAkB,GAC7B,GAAG,OAAO,CAAA;IAEX,MAAM,UAAU,GAAG,iBAAiB,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;IAEjE,yBAAyB;IACzB,MAAM,YAAY,GAAsB;QACtC,KAAK,EAAE,QAAQ,CAAC,KAAK;QACrB,QAAQ,EAAE,QAAQ,CAAC,IAAI;QACvB,UAAU,EAAE,QAAQ,CAAC,UAAU;QAC/B,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,UAAU;KACX,CAAA;IAED,8CAA8C;IAC9C,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAEzC,OAAO;YACL,MAAM;YACN,QAAQ,EAAE;gBACR,GAAG,YAAY;gBACf,GAAG,kBAAkB;aACtB;SACF,CAAA;IACH,CAAC;IAED,mEAAmE;IACnE,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;IAExC,sBAAsB;IACtB,MAAM,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE5C,8CAA8C;IAC9C,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;IAErD,uCAAuC;IACvC,MAAM,iBAAiB,GAAuB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QAC1E,KAAK;QACL,SAAS,EAAE;YACT,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC;YACtB,QAAQ,EAAE;gBACR,GAAG,YAAY;gBACf,GAAG,kBAAkB;gBACrB,UAAU,EAAE,KAAK;gBACjB,UAAU,EAAE,KAAK,CAAC,KAAK;gBACvB,QAAQ,EAAE,KAAK,CAAC,GAAG;aACpB;SACF;KACF,CAAC,CAAC,CAAA;IAEH,OAAO,iBAAiB,CAAA;AAC1B,CAAC;AA+BD;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,OAAkC;IAElC,MAAM,EACJ,QAAQ,EACR,KAAK,EACL,iBAAiB,GAAG,IAAI,EACxB,QAAQ,EAAE,kBAAkB,EAC5B,SAAS,GAAG,EAAE,GACf,GAAG,OAAO,CAAA;IAEX,MAAM,YAAY,GAA0C;QAC1D,KAAK,EAAE,QAAQ,CAAC,KAAK;QACrB,QAAQ,EAAE,QAAQ,CAAC,IAAI;QACvB,UAAU,EAAE,QAAQ,CAAC,UAAU;QAC/B,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACtC,CAAA;IAED,MAAM,UAAU,GAAsB,EAAE,CAAA;IAExC,qBAAqB;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAA;QAE3C,gCAAgC;QAChC,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAA;QAEhD,iCAAiC;QACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;YACrB,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAA;YACzB,MAAM,UAAU,GAAG,iBAAiB,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;YAEjE,UAAU,CAAC,IAAI,CAAC;gBACd,MAAM;gBACN,QAAQ,EAAE;oBACR,GAAG,YAAY;oBACf,UAAU;oBACV,GAAG,kBAAkB;iBACtB;aACF,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAA;AACnB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,yBAAyB,CACvC,UAAkB,EAClB,gBAAoD;IAEpD,0CAA0C;IAC1C,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,mDAAmD;IACnD,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC;QAC1C,OAAO,KAAK,CAAA,CAAC,kDAAkD;IACjE,CAAC;IAED,sBAAsB;IACtB,MAAM,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAA;IACxC,OAAO,WAAW,KAAK,gBAAgB,CAAC,QAAQ,CAAC,UAAU,CAAA;AAC7D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;AACxD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,2BAA2B,CACzC,SAA0B,EAC1B,kBAA0B;IAE1B,MAAM,gBAAgB,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,CAAA;IAEhD,IAAI,gBAAgB,KAAK,kBAAkB,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CACb,0CAA0C,kBAAkB,SAAS,gBAAgB,IAAI;YACvF,aAAa,SAAS,CAAC,QAAQ,CAAC,QAAQ,YAAY,SAAS,CAAC,QAAQ,CAAC,KAAK,EAAE,CACjF,CAAA;IACH,CAAC;IAED,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,KAAK,gBAAgB,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CACb,wDAAwD,SAAS,CAAC,QAAQ,CAAC,UAAU,IAAI;YACvF,kBAAkB,gBAAgB,aAAa,CAClD,CAAA;IACH,CAAC;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAC7B,UAA6B,EAC7B,SAA4B,SAAS;IAErC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;IAC3D,CAAC;IAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,UAAU,CAAC,CAAC,CAAC,CAAA;IACtB,CAAC;IAED,+CAA+C;IAC/C,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAA;IAC9C,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CACb,sDAAsD,UAAU,OAAO,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAC3F,CAAA;QACH,CAAC;IACH,CAAC;IAED,IAAI,YAAsB,CAAA;IAE1B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,kBAAkB;QAClB,YAAY,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAE5C,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,YAAY,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;YAClC,CAAC;QACH,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,YAAY,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,MAAM,CAAA;QACtC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,cAAc;QACd,YAAY,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAA;QAEpD,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,YAAY,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;YAC5D,CAAC;QACH,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,MAAM,aAAa,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAA;IAE5C,OAAO;QACL,MAAM,EAAE,YAAY;QACpB,QAAQ,EAAE;YACR,GAAG,aAAa;YAChB,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACrC,UAAU,EAAE,UAAU,CAAC,MAAM;YAC7B,WAAW,EAAE,MAAM;SACC;KACvB,CAAA;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=embeddings.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.test.d.ts","sourceRoot":"","sources":["../../src/runtime/embeddings.test.ts"],"names":[],"mappings":""}