@futdevpro/nts-dynamo 1.14.64 → 1.14.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts +19 -0
  2. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts.map +1 -0
  3. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js +23 -0
  4. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js.map +1 -0
  5. package/build/_modules/local-vector-search/_models/lvs-search-result.interface.d.ts +17 -0
  6. package/build/_modules/local-vector-search/_models/lvs-search-result.interface.d.ts.map +1 -0
  7. package/build/_modules/local-vector-search/_models/lvs-search-result.interface.js +3 -0
  8. package/build/_modules/local-vector-search/_models/lvs-search-result.interface.js.map +1 -0
  9. package/build/_modules/local-vector-search/_services/lvs-doc-chunk-data.service.d.ts +27 -0
  10. package/build/_modules/local-vector-search/_services/lvs-doc-chunk-data.service.d.ts.map +1 -0
  11. package/build/_modules/local-vector-search/_services/lvs-doc-chunk-data.service.js +198 -0
  12. package/build/_modules/local-vector-search/_services/lvs-doc-chunk-data.service.js.map +1 -0
  13. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts +130 -0
  14. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts.map +1 -0
  15. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js +241 -0
  16. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js.map +1 -0
  17. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.d.ts +71 -0
  18. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.d.ts.map +1 -0
  19. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.js +182 -0
  20. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.js.map +1 -0
  21. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.spec.d.ts +2 -0
  22. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.spec.d.ts.map +1 -0
  23. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.spec.js +294 -0
  24. package/build/_modules/local-vector-search/_services/lvs-vector-pool.control-service.spec.js.map +1 -0
  25. package/build/_modules/local-vector-search/index.d.ts +6 -0
  26. package/build/_modules/local-vector-search/index.d.ts.map +1 -0
  27. package/build/_modules/local-vector-search/index.js +12 -0
  28. package/build/_modules/local-vector-search/index.js.map +1 -0
  29. package/build/_services/base/data.service.d.ts +1 -1
  30. package/build/_services/base/data.service.d.ts.map +1 -1
  31. package/build/_services/base/data.service.js +7 -1
  32. package/build/_services/base/data.service.js.map +1 -1
  33. package/package.json +12 -1
  34. package/src/_modules/local-vector-search/_enums/lvs-search-mode.enum.ts +19 -0
  35. package/src/_modules/local-vector-search/_models/lvs-search-result.interface.ts +17 -0
  36. package/src/_modules/local-vector-search/_services/lvs-doc-chunk-data.service.ts +276 -0
  37. package/src/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.ts +330 -0
  38. package/src/_modules/local-vector-search/_services/lvs-vector-pool.control-service.spec.ts +393 -0
  39. package/src/_modules/local-vector-search/_services/lvs-vector-pool.control-service.ts +220 -0
  40. package/src/_modules/local-vector-search/index.ts +12 -0
  41. package/src/_services/base/data.service.ts +12 -3
@@ -0,0 +1,330 @@
1
+ import { DyFM_Log, DyFM_Error } from '@futdevpro/fsm-dynamo';
2
+ import {
3
+ DyFM_DataModel_Params,
4
+ DyFM_DataProperty_Params,
5
+ DyFM_DBFilter,
6
+ DyFM_Metadata,
7
+ } from '@futdevpro/fsm-dynamo';
8
+ import { DyFM_OAI_Settings } from '@futdevpro/fsm-dynamo/ai/open-ai';
9
+
10
+ import { LVS_Search_Mode } from '../_enums/lvs-search-mode.enum';
11
+ import { LVS_SearchResult } from '../_models/lvs-search-result.interface';
12
+ import { LVS_VectorPool_ControlService } from './lvs-vector-pool.control-service';
13
+ import { DyNTS_OAI_VectorDataService } from '../../ai/_modules/open-ai/_services/data-services/oai-vector-data.service';
14
+ import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
15
+
16
+ /**
17
+ * Local Vector Data Service
18
+ * Extends {@link DyNTS_OAI_VectorDataService} to perform in-memory vector similarity search
19
+ * instead of MongoDB vector search
20
+ *
21
+ * This service loads data from the database, builds an in-memory vector pool,
22
+ * and performs brute-force vector similarity search using cosine similarity or L2 distance
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * const service = new DyNTS_OAI_LocalVectorDataService(
27
+ * data,
28
+ * dataParams,
29
+ * openAISettings,
30
+ * issuer
31
+ * );
32
+ *
33
+ * const results = await service.vectorSearch({
34
+ * input: 'search query',
35
+ * searchInKey: 'contentVectorized',
36
+ * limit: 10,
37
+ * filterBy: { category: 'docs' }
38
+ * });
39
+ * ```
40
+ */
41
+ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_OAI_VectorDataService<T> {
42
+
43
+ /**
44
+ * Default search mode
45
+ * Cosine similarity is typically preferred for text embeddings
46
+ */
47
+ defaultSearchMode: LVS_Search_Mode = LVS_Search_Mode.cosineSimilarity;
48
+
49
+ /**
50
+ * Whether to use L2 normalization when storing vectors
51
+ * Normalized vectors are more efficient for cosine similarity calculations
52
+ */
53
+ useL2Normalization: boolean = true;
54
+
55
+ /**
56
+ * Instance-based vector pool utility
57
+ * Minden service instance saját vektor pool-t tart fenn
58
+ */
59
+ private vectorPool: LVS_VectorPool_ControlService = new LVS_VectorPool_ControlService();
60
+
61
+ constructor(
62
+ /**
63
+ * Initial data, this will be used by functions on default
64
+ */
65
+ data: T,
66
+ /**
67
+ * DB data params will be used to connect to usable dbService on GlobalService
68
+ */
69
+ dataParams: DyFM_DataModel_Params<T>,
70
+ /**
71
+ * OpenAI settings
72
+ */
73
+ openAISettings: DyFM_OAI_Settings,
74
+ /**
75
+ * Initial set for issuer to be able to follow the issuer's activity
76
+ */
77
+ issuer: string
78
+ ) {
79
+ super(data, dataParams, openAISettings, issuer);
80
+ }
81
+
82
+ /**
83
+ * Overrides the parent vectorSearch method to perform local in-memory vector search
84
+ *
85
+ * Process:
86
+ * 1. Load data from DB using filterBy (if provided) or getAll()
87
+ * 2. Extract vectorized properties from loaded data
88
+ * 3. Build in-memory vector pool
89
+ * 4. Vectorize input query
90
+ * 5. Perform local vector search
91
+ * 6. Map search results back to full data objects
92
+ *
93
+ * @param set Search parameters
94
+ * @returns Array of data objects sorted by similarity score
95
+ */
96
+ override async vectorSearch(
97
+ set: {
98
+ input: string;
99
+ /** this should be the vectorized property key */
100
+ searchInKey: string;
101
+ limit?: number;
102
+ /**
103
+ * Number of candidates that are used to find the best match
104
+ * (Not used in local search, but kept for compatibility)
105
+ */
106
+ numberOfCandidates?: number;
107
+ /**
108
+ * Filter to narrow down candidates from DB before vector search
109
+ */
110
+ filterBy?: DyFM_DBFilter<T>;
111
+ /**
112
+ * Search mode (cosine similarity or L2 distance)
113
+ * Defaults to this.defaultSearchMode
114
+ */
115
+ searchMode?: LVS_Search_Mode;
116
+ },
117
+ ): Promise<T[]> {
118
+ try {
119
+ if (!set.input) {
120
+ throw new DyFM_Error({
121
+ ...this.getDefaultErrorSettings(
122
+ 'vectorSearch',
123
+ new Error('input is required')
124
+ ),
125
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS1`,
126
+ });
127
+ }
128
+
129
+ set.limit ??= 3;
130
+ const { input, searchInKey, limit, filterBy, searchMode } = set;
131
+
132
+ // Validáljuk, hogy a searchInKey létezik-e
133
+ const property: DyFM_DataProperty_Params<any, T> =
134
+ this.dataParams.properties[searchInKey];
135
+
136
+ if (!property) {
137
+ throw new DyFM_Error({
138
+ ...this.getDefaultErrorSettings(
139
+ 'vectorSearch',
140
+ new Error(
141
+ `Property "${searchInKey}" not found! ` +
142
+ `while searching "${this.dataParams.dataName}" ` +
143
+ `(The searchable properties are: ` +
144
+ `${this.vectorizedProperties.map(p => `"${p.key}"`).join(', ')})`
145
+ )
146
+ ),
147
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS2`,
148
+ __localStack: this.dataParams.stackLocation,
149
+ });
150
+ }
151
+
152
+ // Ellenőrizzük, hogy a property vectorized-e
153
+ if (!this.vectorizedProperties.some(
154
+ (p: DyFM_DataProperty_Params<any, T>) => p.key === searchInKey
155
+ )) {
156
+ throw new DyFM_Error({
157
+ ...this.getDefaultErrorSettings(
158
+ 'vectorSearch',
159
+ new Error(
160
+ `Property "${searchInKey}" is not a vectorized property! ` +
161
+ `(The vectorized properties are: ` +
162
+ `${this.vectorizedProperties.map(p => `"${p.key}"`).join(', ')})`
163
+ )
164
+ ),
165
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS3`,
166
+ __localStack: this.dataParams.stackLocation,
167
+ });
168
+ }
169
+
170
+ // 1. Betöltjük az adatokat a DB-ből (filterBy-val szűrve, ha van)
171
+ let dataList: T[];
172
+ if (filterBy) {
173
+ // Ha van filterBy, akkor először szűrünk a DB-ben
174
+ if (this.debugLog) {
175
+ DyFM_Log.log(`Local vector search: filtering data with filterBy...`);
176
+ }
177
+ dataList = await this.findDataList(filterBy, true);
178
+ } else {
179
+ // Ha nincs filterBy, akkor minden adatot betöltünk
180
+ if (this.debugLog) {
181
+ DyFM_Log.log(`Local vector search: loading all data...`);
182
+ }
183
+ dataList = await this.getAll(true);
184
+ }
185
+
186
+ if (dataList.length === 0) {
187
+ if (this.debugLog) {
188
+ DyFM_Log.log(`Local vector search: no data found`);
189
+ }
190
+ return [];
191
+ }
192
+
193
+ // 2. Kinyerjük a vectorized property-ket és építjük a vector pool-t
194
+ if (this.debugLog) {
195
+ DyFM_Log.log(
196
+ `Local vector search: building vector pool from ${dataList.length} items...`
197
+ );
198
+ }
199
+
200
+ // Töröljük a korábbi pool-t
201
+ this.vectorPool.clearPool();
202
+
203
+ // Hozzáadjuk a vektorokat a pool-hoz
204
+ const dataMap: Map<string, T> = new Map<string, T>();
205
+ for (const dataItem of dataList) {
206
+ if (!dataItem._id) {
207
+ // Ha nincs _id, akkor kihagyjuk
208
+ if (this.debugLog) {
209
+ DyFM_Log.warn(
210
+ `Local vector search: skipping item without _id`
211
+ );
212
+ }
213
+ continue;
214
+ }
215
+
216
+ const vectorizedValue: number[] | undefined =
217
+ dataItem[searchInKey] as number[] | undefined;
218
+
219
+ if (!vectorizedValue || !Array.isArray(vectorizedValue)) {
220
+ // Ha nincs vectorized érték, akkor kihagyjuk
221
+ if (this.debugLog) {
222
+ DyFM_Log.warn(
223
+ `Local vector search: skipping item "${dataItem._id}" ` +
224
+ `without vectorized value for "${searchInKey}"`
225
+ );
226
+ }
227
+ continue;
228
+ }
229
+
230
+ // Hozzáadjuk a vektort a pool-hoz
231
+ this.vectorPool.addVector(dataItem._id, vectorizedValue);
232
+ dataMap.set(dataItem._id, dataItem);
233
+ }
234
+
235
+ if (dataMap.size === 0) {
236
+ if (this.debugLog) {
237
+ DyFM_Log.log(`Local vector search: no valid vectors found`);
238
+ }
239
+ return [];
240
+ }
241
+
242
+ // 3. Vectorizáljuk a query input-ot
243
+ if (this.debugLog) {
244
+ DyFM_Log.log(`Local vector search: vectorizing query input...`);
245
+ }
246
+
247
+ const queryVector: number[] = await this.vectorize(
248
+ input,
249
+ property.embeddingModel
250
+ );
251
+
252
+ // 4. Végrehajtjuk a local vector search-t
253
+ const mode: LVS_Search_Mode = searchMode ?? this.defaultSearchMode;
254
+
255
+ if (this.debugLog) {
256
+ DyFM_Log.log(
257
+ `Local vector search: searching with mode "${mode}", limit: ${limit}...`
258
+ );
259
+ }
260
+
261
+ const searchResults: LVS_SearchResult[] = this.vectorPool.search(
262
+ queryVector,
263
+ limit,
264
+ mode
265
+ );
266
+
267
+ if (this.debugLog) {
268
+ DyFM_Log.log(
269
+ `Local vector search: found ${searchResults.length} results`
270
+ );
271
+ }
272
+
273
+ // 5. Map-eljük vissza a search results-ot a teljes data objektumokra
274
+ const results: T[] = [];
275
+ for (const searchResult of searchResults) {
276
+ const dataItem: T | undefined = dataMap.get(searchResult.id);
277
+ if (dataItem) {
278
+ results.push(dataItem);
279
+ }
280
+ }
281
+
282
+ // Töröljük a pool-t a memória felszabadítása érdekében
283
+ this.vectorPool.clearPool();
284
+
285
+ return results;
286
+ } catch (error) {
287
+ // Töröljük a pool-t hiba esetén is
288
+ this.vectorPool.clearPool();
289
+
290
+ throw new DyFM_Error({
291
+ ...this.getDefaultErrorSettings('vectorSearch', error),
292
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS0`,
293
+ });
294
+ }
295
+ }
296
+ }
297
+
298
+ /**
299
+ * @example
300
+ * ```typescript
301
+ * // Create a local vector data service
302
+ * const localVectorService = new DyNTS_OAI_LocalVectorDataService<CodeChunk>(
303
+ * new CodeChunk(),
304
+ * codeChunk_dataParams,
305
+ * openAISettings,
306
+ * 'example-issuer'
307
+ * );
308
+ *
309
+ * // Perform vector search with filterBy
310
+ * const results = await localVectorService.vectorSearch({
311
+ * input: 'How to implement authentication?',
312
+ * searchInKey: 'chunkParentedContentVectorized',
313
+ * limit: 5,
314
+ * filterBy: {
315
+ * // Filter by category before vector search
316
+ * category: 'documentation'
317
+ * },
318
+ * searchMode: LVS_Search_Mode.cosineSimilarity
319
+ * });
320
+ *
321
+ * // Results are sorted by similarity score (highest first for cosine)
322
+ * console.log(`Found ${results.length} similar chunks`);
323
+ *
324
+ * // Access the results
325
+ * for (const result of results) {
326
+ * console.log(`Chunk ID: ${result._id}, Content: ${result.chunkContent}`);
327
+ * }
328
+ * ```
329
+ */
330
+