@futdevpro/nts-dynamo 1.15.23 → 1.15.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +4 -0
  2. package/__documentations/2026-05-17-oai-compatible-providers.md +229 -0
  3. package/_specifications/BACKLOG.md +28 -0
  4. package/build/_models/interfaces/compare-data-options.interface.d.ts +27 -0
  5. package/build/_models/interfaces/compare-data-options.interface.d.ts.map +1 -0
  6. package/build/_models/interfaces/compare-data-options.interface.js +3 -0
  7. package/build/_models/interfaces/compare-data-options.interface.js.map +1 -0
  8. package/build/_models/interfaces/compare-data-result.interface.d.ts +13 -0
  9. package/build/_models/interfaces/compare-data-result.interface.d.ts.map +1 -0
  10. package/build/_models/interfaces/compare-data-result.interface.js +3 -0
  11. package/build/_models/interfaces/compare-data-result.interface.js.map +1 -0
  12. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.d.ts +14 -0
  13. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.d.ts.map +1 -0
  14. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.js +3 -0
  15. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.js.map +1 -0
  16. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.d.ts +50 -0
  17. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.d.ts.map +1 -0
  18. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.js +3 -0
  19. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.js.map +1 -0
  20. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.d.ts.map +1 -1
  21. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.js +32 -0
  22. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.js.map +1 -1
  23. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.d.ts.map +1 -1
  24. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.js +20 -2
  25. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.js.map +1 -1
  26. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.d.ts +4 -1
  27. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.d.ts.map +1 -1
  28. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.js +28 -1
  29. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.js.map +1 -1
  30. package/build/_modules/ai/_services/ai-provider.service-base.d.ts +21 -0
  31. package/build/_modules/ai/_services/ai-provider.service-base.d.ts.map +1 -1
  32. package/build/_modules/ai/_services/ai-provider.service-base.js +32 -0
  33. package/build/_modules/ai/_services/ai-provider.service-base.js.map +1 -1
  34. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts +17 -1
  35. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts.map +1 -1
  36. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js +16 -0
  37. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js.map +1 -1
  38. package/build/_modules/local-vector-search/_services/lvs-bm25.util.d.ts +89 -0
  39. package/build/_modules/local-vector-search/_services/lvs-bm25.util.d.ts.map +1 -0
  40. package/build/_modules/local-vector-search/_services/lvs-bm25.util.js +190 -0
  41. package/build/_modules/local-vector-search/_services/lvs-bm25.util.js.map +1 -0
  42. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts +18 -2
  43. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts.map +1 -1
  44. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js +57 -3
  45. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js.map +1 -1
  46. package/build/_services/base/data.service.d.ts +63 -0
  47. package/build/_services/base/data.service.d.ts.map +1 -1
  48. package/build/_services/base/data.service.js +189 -0
  49. package/build/_services/base/data.service.js.map +1 -1
  50. package/package.json +1 -1
  51. package/src/_models/interfaces/compare-data-options.interface.ts +27 -0
  52. package/src/_models/interfaces/compare-data-result.interface.ts +12 -0
  53. package/src/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.ts +14 -0
  54. package/src/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.ts +56 -0
  55. package/src/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.spec.ts +92 -0
  56. package/src/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.ts +38 -4
  57. package/src/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.ts +24 -5
  58. package/src/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.spec.ts +52 -0
  59. package/src/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.ts +39 -10
  60. package/src/_modules/ai/_services/ai-provider.service-base.spec.ts +79 -0
  61. package/src/_modules/ai/_services/ai-provider.service-base.ts +41 -3
  62. package/src/_modules/local-vector-search/_enums/lvs-search-mode.enum.ts +16 -0
  63. package/src/_modules/local-vector-search/_services/lvs-bm25.util.spec.ts +159 -0
  64. package/src/_modules/local-vector-search/_services/lvs-bm25.util.ts +206 -0
  65. package/src/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.spec.ts +135 -0
  66. package/src/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.ts +95 -9
  67. package/src/_services/base/data.service.spec.ts +181 -0
  68. package/src/_services/base/data.service.ts +196 -2
@@ -0,0 +1,206 @@
1
+ /**
2
+ * BM25 text-search ranking util a LVS hybrid search-hez (FR-004).
3
+ *
4
+ * Pure TypeScript, dependency-free. In-memory corpus alapjan szamol score-okat,
5
+ * NEM perzisztal indexet (a hybrid hivasonkent ujraepiti a corpus-t a candidate
6
+ * dokumentumokon — kis (~100..10000 dokumentum) LVS-corpus eseten ez gyors es
7
+ * egyszeru).
8
+ *
9
+ * Canonical params: k1=1.2, b=0.75 (industry standard a "lucene-szeru"
10
+ * implementaciokban). NEM expose-oltak — ha kell, FR-002 kovetkezo iteracioban
11
+ * tehetjuk parameterizalhatova.
12
+ *
13
+ * Tokenizer: `text.toLowerCase().match(/\w+/g) || []`. Case-insensitive,
14
+ * alphanumeric+underscore boundary-k. `UserController` egy token marad (jo az
15
+ * identifier match-re), `auth-flow` ket tokenre esik (auth + flow).
16
+ *
17
+ * IDF formula (BM25+): `log((N - df + 0.5) / (df + 0.5) + 1)`. A +1 a logon
18
+ * belul garantalja, hogy a kozos szavak is pozitiv (kicsi) IDF-et kapjanak,
19
+ * NEM negativ-t — fontos a hybrid score-merge-nel hogy ne huzzon le dokumentumot
20
+ * ahol kozos szo szerepel.
21
+ */
22
+
23
+
24
+ /** BM25 k1 parameter (term saturation control). Canonical default. */
25
+ const BM25_K1: number = 1.2;
26
+
27
+ /** BM25 b parameter (length normalization weight, 0=off, 1=full). Canonical default. */
28
+ const BM25_B: number = 0.75;
29
+
30
+ /** Token regex — alphanumeric + underscore. */
31
+ const TOKEN_REGEX: RegExp = /\w+/g;
32
+
33
+
34
+ /**
35
+ * Egy dokumentum BM25-score-ja egy query ellen, egy elore-felepitett corpus
36
+ * konteztusaban.
37
+ */
38
+ export interface DyNTS_LVS_BM25_DocScore {
39
+ /** Dokumentum azonosito (mint az LVS_SearchResult `id`-jaben). */
40
+ id: string;
41
+ /** Nyers BM25 score (0..∞). NEM normalizalt. */
42
+ score: number;
43
+ }
44
+
45
+
46
+ /**
47
+ * Felepitett BM25 corpus — egy adott dokumentumhalmaz indexe. A `score()` az
48
+ * indexen kerdez le egy query-t es minden dokumentumra ad egy score-t.
49
+ *
50
+ * Egy corpus egyszer-hasznalatos a hybrid search hivasban — NEM kell cache-elni,
51
+ * a felepites O(N * |doc|) ami pici N-re elhanyagolhato.
52
+ */
53
+ export class DyNTS_LVS_BM25_Corpus {
54
+
55
+ /** Tokenizalt dokumentumok: id -> tokens. */
56
+ private readonly docTokens: Map<string, string[]> = new Map<string, string[]>();
57
+ /** Doc-length: id -> token count. */
58
+ private readonly docLengths: Map<string, number> = new Map<string, number>();
59
+ /** Term -> doc-frequency (hany docban szerepel az adott term, legalabb 1x). */
60
+ private readonly termDocFreq: Map<string, number> = new Map<string, number>();
61
+ /** Term -> id -> term-frequency a docban. */
62
+ private readonly termFreqByDoc: Map<string, Map<string, number>> = new Map<string, Map<string, number>>();
63
+ /** Atlagos dokumentum-hossz (token count). */
64
+ private avgDocLength: number = 0;
65
+ /** Total doc count. */
66
+ private docCount: number = 0;
67
+
68
+ /**
69
+ * Letrehoz egy uj corpus-t a megadott id->text parok-bol.
70
+ * NEM dob hibat ures input-ra — ures corpus ervenyes (minden score = 0).
71
+ */
72
+ constructor(docs: { id: string; text: string }[]) {
73
+ if (!Array.isArray(docs) || docs.length === 0) { return; }
74
+
75
+ let totalLength: number = 0;
76
+ for (const doc of docs) {
77
+ if (!doc || typeof doc.id !== 'string' || typeof doc.text !== 'string') { continue; }
78
+ const tokens: string[] = DyNTS_LVS_BM25_Corpus.tokenize(doc.text);
79
+ this.docTokens.set(doc.id, tokens);
80
+ this.docLengths.set(doc.id, tokens.length);
81
+ totalLength += tokens.length;
82
+
83
+ // Term-frequency a docban
84
+ const localTf: Map<string, number> = new Map<string, number>();
85
+ for (const tok of tokens) {
86
+ localTf.set(tok, (localTf.get(tok) ?? 0) + 1);
87
+ }
88
+
89
+ for (const [term, tf] of localTf) {
90
+ // Doc-frequency: +1 per term, per doc
91
+ this.termDocFreq.set(term, (this.termDocFreq.get(term) ?? 0) + 1);
92
+ // Term-freq-by-doc reverse index
93
+ let perDoc: Map<string, number> | undefined = this.termFreqByDoc.get(term);
94
+ if (!perDoc) {
95
+ perDoc = new Map<string, number>();
96
+ this.termFreqByDoc.set(term, perDoc);
97
+ }
98
+ perDoc.set(doc.id, tf);
99
+ }
100
+ }
101
+
102
+ this.docCount = this.docTokens.size;
103
+ this.avgDocLength = this.docCount > 0 ? totalLength / this.docCount : 0;
104
+ }
105
+
106
+
107
+ /**
108
+ * Public tokenizer — exportalt, hogy spec-ek + hivok ugyanazt a normalizalast
109
+ * tudjak hasznalni mint a corpus.
110
+ */
111
+ static tokenize(text: string): string[] {
112
+ if (typeof text !== 'string' || text.length === 0) { return []; }
113
+ return text.toLowerCase().match(TOKEN_REGEX) ?? [];
114
+ }
115
+
116
+
117
+ /**
118
+ * Visszaadja a corpus dokumentum-szamat (NEM-ures docok).
119
+ */
120
+ size(): number {
121
+ return this.docCount;
122
+ }
123
+
124
+
125
+ /**
126
+ * BM25 score minden dokumentumra a query-re.
127
+ *
128
+ * Ures query → minden score 0 (degenerate case; a hivo kezelje ha kell).
129
+ * Ures corpus → ures array.
130
+ */
131
+ score(query: string): DyNTS_LVS_BM25_DocScore[] {
132
+ if (this.docCount === 0) { return []; }
133
+ const queryTokens: string[] = DyNTS_LVS_BM25_Corpus.tokenize(query);
134
+ if (queryTokens.length === 0) {
135
+ // Minden doc 0 score-t kap
136
+ const results: DyNTS_LVS_BM25_DocScore[] = [];
137
+ for (const id of this.docTokens.keys()) {
138
+ results.push({ id: id, score: 0 });
139
+ }
140
+ return results;
141
+ }
142
+
143
+ // Egyedi query-termek halmaza (ismetlodes nem ad nagyobb IDF-et)
144
+ const uniqueTerms: string[] = Array.from(new Set<string>(queryTokens));
145
+
146
+ // Pre-compute IDF a query-termekre
147
+ const idfMap: Map<string, number> = new Map<string, number>();
148
+ for (const term of uniqueTerms) {
149
+ const df: number = this.termDocFreq.get(term) ?? 0;
150
+ // BM25+ IDF: log((N - df + 0.5) / (df + 0.5) + 1)
151
+ const idf: number = Math.log((this.docCount - df + 0.5) / (df + 0.5) + 1);
152
+ idfMap.set(term, idf);
153
+ }
154
+
155
+ // Per-doc BM25 score
156
+ const results: DyNTS_LVS_BM25_DocScore[] = [];
157
+ for (const [docId, docLen] of this.docLengths) {
158
+ let score: number = 0;
159
+ for (const term of uniqueTerms) {
160
+ const tf: number = this.termFreqByDoc.get(term)?.get(docId) ?? 0;
161
+ if (tf === 0) { continue; }
162
+ const idf: number = idfMap.get(term) ?? 0;
163
+ const norm: number = 1 - BM25_B + BM25_B * (docLen / (this.avgDocLength || 1));
164
+ score += idf * (tf * (BM25_K1 + 1)) / (tf + BM25_K1 * norm);
165
+ }
166
+ results.push({ id: docId, score: score });
167
+ }
168
+
169
+ return results;
170
+ }
171
+ }
172
+
173
+
174
+ /**
175
+ * Min-max normalizalas [0,1] tartomanyra. A hybrid score-merge-hez kell a BM25
176
+ * score-okat a candidate-szetten 0..1 sav-ba hozni (a cosine mar 0..1).
177
+ *
178
+ * Edge case-ek:
179
+ * - Ures array → [].
180
+ * - Minden score azonos (max-min === 0) → minden 0.0 (NEM 0.5 vagy 1.0; ha
181
+ * nincs diszkriminacio, ne tegyunk hozza signal-t).
182
+ * - Negativ score-ok (BM25+IDF garantal pozitivat, de defensive): a min-max
183
+ * ugyanugy mukodik.
184
+ */
185
+ export function dyNTS_LVS_BM25_minMaxNormalize(
186
+ scores: DyNTS_LVS_BM25_DocScore[],
187
+ ): DyNTS_LVS_BM25_DocScore[] {
188
+ if (!Array.isArray(scores) || scores.length === 0) { return []; }
189
+ let min: number = Infinity;
190
+ let max: number = -Infinity;
191
+ for (const s of scores) {
192
+ if (s.score < min) { min = s.score; }
193
+ if (s.score > max) { max = s.score; }
194
+ }
195
+ const range: number = max - min;
196
+ if (range === 0) {
197
+ // Nincs diszkriminacio — minden 0.0 (NEM huzzon le, NEM huzzon fel)
198
+ return scores.map((s: DyNTS_LVS_BM25_DocScore): DyNTS_LVS_BM25_DocScore => ({
199
+ id: s.id, score: 0,
200
+ }));
201
+ }
202
+ return scores.map((s: DyNTS_LVS_BM25_DocScore): DyNTS_LVS_BM25_DocScore => ({
203
+ id: s.id,
204
+ score: (s.score - min) / range,
205
+ }));
206
+ }
@@ -341,5 +341,140 @@ describe('| DyNTS_LVS_VectorDataService', () => {
341
341
  expect(result[1]._id).toBe('data-1');
342
342
  });
343
343
  });
344
+
345
+ describe('| vectorSearch hybrid (FR-004)', () => {
346
+ const buildHybridCorpus = (): TestDataModel[] => {
347
+ const d1: TestDataModel = new TestDataModel();
348
+ d1._id = 'doc-user';
349
+ d1.content = 'the UserController handles authentication flow';
350
+ d1.contentVectorized = [0.4, 0.5, 0.6];
351
+ const d2: TestDataModel = new TestDataModel();
352
+ d2._id = 'doc-recipe';
353
+ d2.content = 'cooking recipes for desserts and cakes';
354
+ d2.contentVectorized = [0.45, 0.55, 0.65];
355
+ const d3: TestDataModel = new TestDataModel();
356
+ d3._id = 'doc-db';
357
+ d3.content = 'database setup guide for MongoDB';
358
+ d3.contentVectorized = [0.42, 0.52, 0.62];
359
+ return [d1, d2, d3];
360
+ };
361
+
362
+ it('| throws ha textSearchKey hianyzik hybrid modban (VS4)', async () => {
363
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
364
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
365
+ try {
366
+ await service.vectorSearch({
367
+ input: 'UserController',
368
+ searchInKey: 'contentVectorized',
369
+ searchMode: LVS_Search_Mode.hybrid,
370
+ });
371
+ fail('Should have thrown an error');
372
+ } catch (err) {
373
+ expect(err).toBeInstanceOf(DyFM_Error);
374
+ expect((err as DyFM_Error)._errorCode).toContain('DyNTS-LVS-VS4');
375
+ }
376
+ });
377
+
378
+ it('| throws ha hybridWeight invalid (negativ) (VS5)', async () => {
379
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
380
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
381
+ try {
382
+ await service.vectorSearch({
383
+ input: 'UserController',
384
+ searchInKey: 'contentVectorized',
385
+ searchMode: LVS_Search_Mode.hybrid,
386
+ textSearchKey: 'content',
387
+ hybridWeight: { vector: -0.5, text: 1.5 },
388
+ });
389
+ fail('Should have thrown an error');
390
+ } catch (err) {
391
+ expect(err).toBeInstanceOf(DyFM_Error);
392
+ expect((err as DyFM_Error)._errorCode).toContain('DyNTS-LVS-VS5');
393
+ }
394
+ });
395
+
396
+ it('| basic hybrid: text-relevant doc top-en', async () => {
397
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
398
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
399
+ const result: TestDataModel[] = await service.vectorSearch({
400
+ input: 'UserController',
401
+ searchInKey: 'contentVectorized',
402
+ searchMode: LVS_Search_Mode.hybrid,
403
+ textSearchKey: 'content',
404
+ limit: 3,
405
+ });
406
+ expect(result.length).toBe(3);
407
+ expect(result[0]._id).toBe('doc-user');
408
+ });
409
+
410
+ it('| weight {vector:1, text:0} → effektivan pure cosine', async () => {
411
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
412
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.42, 0.52, 0.62]));
413
+ const result: TestDataModel[] = await service.vectorSearch({
414
+ input: 'UserController',
415
+ searchInKey: 'contentVectorized',
416
+ searchMode: LVS_Search_Mode.hybrid,
417
+ textSearchKey: 'content',
418
+ hybridWeight: { vector: 1, text: 0 },
419
+ limit: 3,
420
+ });
421
+ expect(result.length).toBe(3);
422
+ expect(result[0]._id).toBe('doc-db');
423
+ });
424
+
425
+ it('| weight {vector:0, text:1} → effektivan pure BM25', async () => {
426
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
427
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.45, 0.55, 0.65]));
428
+ const result: TestDataModel[] = await service.vectorSearch({
429
+ input: 'authentication',
430
+ searchInKey: 'contentVectorized',
431
+ searchMode: LVS_Search_Mode.hybrid,
432
+ textSearchKey: 'content',
433
+ hybridWeight: { vector: 0, text: 1 },
434
+ limit: 3,
435
+ });
436
+ expect(result.length).toBe(3);
437
+ expect(result[0]._id).toBe('doc-user');
438
+ });
439
+
440
+ it('| all-zero BM25 fallback → cosine-rendezes marad', async () => {
441
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
442
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.45, 0.55, 0.65]));
443
+ const result: TestDataModel[] = await service.vectorSearch({
444
+ input: 'xyzzy-nonexistent-token',
445
+ searchInKey: 'contentVectorized',
446
+ searchMode: LVS_Search_Mode.hybrid,
447
+ textSearchKey: 'content',
448
+ limit: 3,
449
+ });
450
+ expect(result.length).toBe(3);
451
+ expect(result[0]._id).toBe('doc-recipe');
452
+ });
453
+
454
+ it('| limit honored hybrid modban', async () => {
455
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
456
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
457
+ const result: TestDataModel[] = await service.vectorSearch({
458
+ input: 'UserController',
459
+ searchInKey: 'contentVectorized',
460
+ searchMode: LVS_Search_Mode.hybrid,
461
+ textSearchKey: 'content',
462
+ limit: 1,
463
+ });
464
+ expect(result.length).toBe(1);
465
+ });
466
+
467
+ it('| ures candidate-szet → ures eredmeny', async () => {
468
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve([]));
469
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
470
+ const result: TestDataModel[] = await service.vectorSearch({
471
+ input: 'UserController',
472
+ searchInKey: 'contentVectorized',
473
+ searchMode: LVS_Search_Mode.hybrid,
474
+ textSearchKey: 'content',
475
+ });
476
+ expect(result.length).toBe(0);
477
+ });
478
+ });
344
479
  });
345
480
 
@@ -10,6 +10,11 @@ import { DyFM_OAI_Settings } from '@futdevpro/fsm-dynamo/ai/open-ai';
10
10
  import { LVS_Search_Mode } from '../_enums/lvs-search-mode.enum';
11
11
  import { LVS_SearchResult } from '../_models/lvs-search-result.interface';
12
12
  import { LVS_VectorPool_ControlService } from './lvs-vector-pool.control-service';
13
+ import {
14
+ DyNTS_LVS_BM25_Corpus,
15
+ DyNTS_LVS_BM25_DocScore,
16
+ dyNTS_LVS_BM25_minMaxNormalize,
17
+ } from './lvs-bm25.util';
13
18
  import { DyNTS_OAI_VectorDataService } from '../../ai/_modules/open-ai/_services/data-services/oai-vector-data.service';
14
19
  import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
15
20
 
@@ -109,10 +114,23 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
109
114
  */
110
115
  filterBy?: DyFM_DBFilter<T>;
111
116
  /**
112
- * Search mode (cosine similarity or L2 distance)
113
- * Defaults to this.defaultSearchMode
117
+ * Search mode (cosine similarity, L2 distance, or hybrid).
118
+ * Defaults to this.defaultSearchMode.
119
+ *
120
+ * `hybrid` mode combines cosine similarity (vector half) with BM25
121
+ * text scoring (text half) — `textSearchKey` is REQUIRED in hybrid mode.
114
122
  */
115
123
  searchMode?: LVS_Search_Mode;
124
+ /**
125
+ * Csak `hybrid` modban — weighted score-merge a cosine es a BM25 kozott.
126
+ * Default: { vector: 0.5, text: 0.5 }. Mindkettonek 0..1 tartomany javasolt.
127
+ */
128
+ hybridWeight?: { vector: number; text: number };
129
+ /**
130
+ * Csak `hybrid` modban — KOTELEZO; melyik string property-n fut a BM25
131
+ * text-search. NEM kell hogy a vectorized property legyen.
132
+ */
133
+ textSearchKey?: keyof T;
116
134
  },
117
135
  ): Promise<T[]> {
118
136
  try {
@@ -127,7 +145,33 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
127
145
  }
128
146
 
129
147
  set.limit ??= 3;
130
- const { input, searchInKey, limit, filterBy, searchMode } = set;
148
+ const { input, searchInKey, limit, filterBy, searchMode, hybridWeight, textSearchKey } = set;
149
+ const effectiveMode: LVS_Search_Mode = searchMode ?? this.defaultSearchMode;
150
+
151
+ // Hybrid mode korai validacio
152
+ if (effectiveMode === LVS_Search_Mode.hybrid) {
153
+ if (!textSearchKey) {
154
+ throw new DyFM_Error({
155
+ ...this.getDefaultErrorSettings(
156
+ 'vectorSearch',
157
+ new Error('textSearchKey is required when searchMode is hybrid'),
158
+ ),
159
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS4`,
160
+ });
161
+ }
162
+ if (hybridWeight) {
163
+ const w: { vector: number; text: number } = hybridWeight;
164
+ if (!Number.isFinite(w.vector) || !Number.isFinite(w.text) || w.vector < 0 || w.text < 0) {
165
+ throw new DyFM_Error({
166
+ ...this.getDefaultErrorSettings(
167
+ 'vectorSearch',
168
+ new Error('hybridWeight.vector and .text must be non-negative finite numbers'),
169
+ ),
170
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS5`,
171
+ });
172
+ }
173
+ }
174
+ }
131
175
 
132
176
  // Validáljuk, hogy a searchInKey létezik-e
133
177
  const property: DyFM_DataProperty_Params<any, T> =
@@ -250,7 +294,7 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
250
294
  );
251
295
 
252
296
  // 4. Végrehajtjuk a local vector search-t
253
- const mode: LVS_Search_Mode = searchMode ?? this.defaultSearchMode;
297
+ const mode: LVS_Search_Mode = effectiveMode;
254
298
 
255
299
  if (this.debugLog) {
256
300
  DyFM_Log.log(
@@ -258,11 +302,53 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
258
302
  );
259
303
  }
260
304
 
261
- const searchResults: LVS_SearchResult[] = this.vectorPool.search(
262
- queryVector,
263
- limit,
264
- mode
265
- );
305
+ let searchResults: LVS_SearchResult[];
306
+
307
+ if (mode === LVS_Search_Mode.hybrid) {
308
+ // Hybrid: cosine ALL candidate-re + BM25 ALL candidate-re + min-max norm + weighted sum
309
+ const allCandidatesCosine: LVS_SearchResult[] = this.vectorPool.search(
310
+ queryVector,
311
+ dataMap.size,
312
+ LVS_Search_Mode.cosineSimilarity,
313
+ );
314
+
315
+ // BM25 corpus epitese a `textSearchKey` property-bol
316
+ const docs: { id: string; text: string }[] = [];
317
+ for (const [docId, dataItem] of dataMap) {
318
+ const textValue: unknown = dataItem[textSearchKey as keyof T];
319
+ docs.push({
320
+ id: docId,
321
+ text: typeof textValue === 'string' ? textValue : '',
322
+ });
323
+ }
324
+ const bm25Corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus(docs);
325
+ const bm25Raw: DyNTS_LVS_BM25_DocScore[] = bm25Corpus.score(input);
326
+ const bm25Normalized: DyNTS_LVS_BM25_DocScore[] = dyNTS_LVS_BM25_minMaxNormalize(bm25Raw);
327
+
328
+ const bm25ScoreById: Map<string, number> = new Map<string, number>();
329
+ for (const s of bm25Normalized) {
330
+ bm25ScoreById.set(s.id, s.score);
331
+ }
332
+
333
+ const wVector: number = hybridWeight?.vector ?? 0.5;
334
+ const wText: number = hybridWeight?.text ?? 0.5;
335
+
336
+ const merged: LVS_SearchResult[] = allCandidatesCosine.map((c: LVS_SearchResult): LVS_SearchResult => {
337
+ const bm25Score: number = bm25ScoreById.get(c.id) ?? 0;
338
+ return {
339
+ id: c.id,
340
+ score: wVector * c.score + wText * bm25Score,
341
+ };
342
+ });
343
+ merged.sort((a: LVS_SearchResult, b: LVS_SearchResult) => b.score - a.score);
344
+ searchResults = merged.slice(0, limit);
345
+ } else {
346
+ searchResults = this.vectorPool.search(
347
+ queryVector,
348
+ limit,
349
+ mode,
350
+ );
351
+ }
266
352
 
267
353
  if (this.debugLog) {
268
354
  DyFM_Log.log(
@@ -489,5 +489,186 @@ describe('| DyNTS_DataService', () => {
489
489
  }).toThrow();
490
490
  });
491
491
  });
492
+
493
+ // ════════════════════════════════════════════════════════════════════════
494
+ // FR-001 — Generic compareData()
495
+ // ════════════════════════════════════════════════════════════════════════
496
+
497
+ describe('| compareData() — FR-001', (): void => {
498
+ let svc: DyNTS_DataService<TestMetadata>;
499
+
500
+ beforeEach((): void => {
501
+ // Re-use existing TestMetadata + testDataParams.
502
+ const t: TestMetadata = new TestMetadata();
503
+ svc = new DyNTS_DataService<TestMetadata>(t, testDataParams, 'test-issuer');
504
+ });
505
+
506
+ it('| equal POJO returns { result: "equal" } — sin changedFields', (): void => {
507
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x' });
508
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x' });
509
+ const r = svc.compareData(a, b);
510
+ expect(r.result).toBe('equal');
511
+ expect(r.changedFields).toBeUndefined();
512
+ });
513
+
514
+ it('| modified single field — changedFields tartalmazza azt', (): void => {
515
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x' });
516
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'bob', email: 'a@x' });
517
+ const r = svc.compareData(a, b);
518
+ expect(r.result).toBe('modified');
519
+ expect(r.changedFields).toEqual(['name']);
520
+ });
521
+
522
+ it('| modified multi-field — changedFields TELJES listat ad (nem early-return)', (): void => {
523
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x', userId: 'u1' });
524
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'bob', email: 'b@y', userId: 'u2' });
525
+ const r = svc.compareData(a, b);
526
+ expect(r.result).toBe('modified');
527
+ expect(r.changedFields?.sort()).toEqual(['email', 'name', 'userId'] as any);
528
+ });
529
+
530
+ it('| auto-discovery SKIP-eli a metadata fields-et (_id, __created, __lastModified)', (): void => {
531
+ const now: Date = new Date();
532
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', _id: 'X', __created: now, __lastModified: now });
533
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', _id: 'Y', __created: new Date(2000, 0, 1), __lastModified: new Date(2001, 0, 1) });
534
+ const r = svc.compareData(a, b);
535
+ expect(r.result).toBe('equal');
536
+ });
537
+
538
+ it('| explicit fields override — KIFEJEZETT _id-val VIZSGALJA a skip-listet is', (): void => {
539
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', _id: 'X' });
540
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', _id: 'Y' });
541
+ const r = svc.compareData(a, b, { fields: ['_id' as keyof TestMetadata] });
542
+ expect(r.result).toBe('modified');
543
+ expect(r.changedFields).toEqual(['_id' as keyof TestMetadata]);
544
+ });
545
+
546
+ it('| explicit fields scope-szukit — csak a listazott fields szamit', (): void => {
547
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x' });
548
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'b@y' });
549
+ const r = svc.compareData(a, b, { fields: ['name'] });
550
+ expect(r.result).toBe('equal'); // email change ignored, only 'name' checked
551
+ });
552
+
553
+ it('| customComparator override (case-insensitive string)', (): void => {
554
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'Alice' });
555
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice' });
556
+ const r = svc.compareData(a, b, {
557
+ customComparators: { name: (x: string, y: string) => x.toLowerCase() === y.toLowerCase() },
558
+ });
559
+ expect(r.result).toBe('equal');
560
+ });
561
+
562
+ it('| customComparator — array-set-equality eltero sorrendre', (): void => {
563
+ class WithArr extends DyFM_Metadata {
564
+ name?: string;
565
+ tags?: string[];
566
+ }
567
+ const arrSvc = new DyNTS_DataService<WithArr>(
568
+ new WithArr(),
569
+ new DyFM_DataModel_Params<WithArr>({
570
+ dataName: 'with_arr',
571
+ properties: { name: { key: 'name', type: DyFM_BasicProperty_Type.string } },
572
+ }),
573
+ 'test-issuer',
574
+ );
575
+ const a: WithArr = Object.assign(new WithArr(), { name: 'x', tags: ['a', 'b', 'c'] });
576
+ const b: WithArr = Object.assign(new WithArr(), { name: 'x', tags: ['c', 'b', 'a'] });
577
+ const r = arrSvc.compareData(a, b, {
578
+ customComparators: {
579
+ tags: (x: string[], y: string[]) => {
580
+ const s: Set<string> = new Set<string>(x);
581
+ return y.length === x.length && y.every((v: string) => s.has(v));
582
+ },
583
+ },
584
+ });
585
+ expect(r.result).toBe('equal');
586
+ });
587
+
588
+ it('| deep-equal: Date instances ugyanazon time-stamp-pel equal-nek szamitanak', (): void => {
589
+ class WithDate extends DyFM_Metadata {
590
+ when?: Date;
591
+ }
592
+ const dateSvc = new DyNTS_DataService<WithDate>(
593
+ new WithDate(),
594
+ new DyFM_DataModel_Params<WithDate>({
595
+ dataName: 'with_date',
596
+ properties: {},
597
+ }),
598
+ 'test-issuer',
599
+ );
600
+ const t: number = 1_700_000_000_000;
601
+ const a: WithDate = Object.assign(new WithDate(), { when: new Date(t) });
602
+ const b: WithDate = Object.assign(new WithDate(), { when: new Date(t) });
603
+ expect(dateSvc.compareData(a, b).result).toBe('equal');
604
+ });
605
+
606
+ it('| deep-equal: nested object', (): void => {
607
+ class WithNested extends DyFM_Metadata {
608
+ nested?: { a: number; b: { c: string } };
609
+ }
610
+ const nSvc = new DyNTS_DataService<WithNested>(
611
+ new WithNested(),
612
+ new DyFM_DataModel_Params<WithNested>({ dataName: 'with_nested', properties: {} }),
613
+ 'test-issuer',
614
+ );
615
+ const a: WithNested = Object.assign(new WithNested(), { nested: { a: 1, b: { c: 'x' } } });
616
+ const b: WithNested = Object.assign(new WithNested(), { nested: { a: 1, b: { c: 'x' } } });
617
+ const c: WithNested = Object.assign(new WithNested(), { nested: { a: 1, b: { c: 'y' } } });
618
+ expect(nSvc.compareData(a, b).result).toBe('equal');
619
+ expect(nSvc.compareData(a, c).result).toBe('modified');
620
+ });
621
+
622
+ it('| deep-equal: array index-szerinti compare (eltero sorrend → modified default-ban)', (): void => {
623
+ class WithArr2 extends DyFM_Metadata {
624
+ items?: number[];
625
+ }
626
+ const aSvc = new DyNTS_DataService<WithArr2>(
627
+ new WithArr2(),
628
+ new DyFM_DataModel_Params<WithArr2>({ dataName: 'with_arr2', properties: {} }),
629
+ 'test-issuer',
630
+ );
631
+ const a: WithArr2 = Object.assign(new WithArr2(), { items: [1, 2, 3] });
632
+ const b: WithArr2 = Object.assign(new WithArr2(), { items: [1, 2, 3] });
633
+ const c: WithArr2 = Object.assign(new WithArr2(), { items: [3, 2, 1] });
634
+ expect(aSvc.compareData(a, b).result).toBe('equal');
635
+ expect(aSvc.compareData(a, c).result).toBe('modified');
636
+ });
637
+
638
+ it('| throw 400 ha newData null', (): void => {
639
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'x' });
640
+ let thrown: any = null;
641
+ try { svc.compareData(null as any, b); } catch (e) { thrown = e; }
642
+ expect(thrown).not.toBeNull();
643
+ expect(DyFM_Error.getErrorStatus(thrown)).toBe(400);
644
+ expect(DyFM_Error.getErrorCode(thrown)).toContain('DyNTS-DS0-CD1');
645
+ });
646
+
647
+ it('| throw 400 ha oldData undefined', (): void => {
648
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'x' });
649
+ let thrown: any = null;
650
+ try { svc.compareData(a, undefined as any); } catch (e) { thrown = e; }
651
+ expect(thrown).not.toBeNull();
652
+ expect(DyFM_Error.getErrorStatus(thrown)).toBe(400);
653
+ });
654
+
655
+ it('| throw 400 ha options.fields ures array', (): void => {
656
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice' });
657
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice' });
658
+ let thrown: any = null;
659
+ try { svc.compareData(a, b, { fields: [] }); } catch (e) { thrown = e; }
660
+ expect(thrown).not.toBeNull();
661
+ expect(DyFM_Error.getErrorStatus(thrown)).toBe(400);
662
+ expect(DyFM_Error.getErrorCode(thrown)).toContain('DyNTS-DS0-CD2');
663
+ });
664
+
665
+ it('| asymmetric keys: ha az egyik objektum-bol hianyzik a key, modified-nek szamit', (): void => {
666
+ const a: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice', email: 'a@x' });
667
+ const b: TestMetadata = Object.assign(new TestMetadata(), { name: 'alice' }); // no email
668
+ const r = svc.compareData(a, b);
669
+ expect(r.result).toBe('modified');
670
+ expect(r.changedFields).toEqual(['email']);
671
+ });
672
+ });
492
673
  });
493
674