@futdevpro/nts-dynamo 1.15.24 → 1.15.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.husky/pre-commit +1 -0
  2. package/_specifications/BACKLOG.md +42 -0
  3. package/build/_models/interfaces/compare-data-options.interface.d.ts +27 -0
  4. package/build/_models/interfaces/compare-data-options.interface.d.ts.map +1 -0
  5. package/build/_models/interfaces/compare-data-options.interface.js +3 -0
  6. package/build/_models/interfaces/compare-data-options.interface.js.map +1 -0
  7. package/build/_models/interfaces/compare-data-result.interface.d.ts +13 -0
  8. package/build/_models/interfaces/compare-data-result.interface.d.ts.map +1 -0
  9. package/build/_models/interfaces/compare-data-result.interface.js +3 -0
  10. package/build/_models/interfaces/compare-data-result.interface.js.map +1 -0
  11. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.d.ts +14 -0
  12. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.d.ts.map +1 -0
  13. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.js +3 -0
  14. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.js.map +1 -0
  15. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.d.ts +50 -0
  16. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.d.ts.map +1 -0
  17. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.js +3 -0
  18. package/build/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.js.map +1 -0
  19. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.d.ts.map +1 -1
  20. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.js +32 -0
  21. package/build/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.js.map +1 -1
  22. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.d.ts.map +1 -1
  23. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.js +20 -2
  24. package/build/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.js.map +1 -1
  25. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.d.ts +4 -1
  26. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.d.ts.map +1 -1
  27. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.js +28 -1
  28. package/build/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.js.map +1 -1
  29. package/build/_modules/ai/_services/ai-provider.service-base.d.ts +21 -0
  30. package/build/_modules/ai/_services/ai-provider.service-base.d.ts.map +1 -1
  31. package/build/_modules/ai/_services/ai-provider.service-base.js +32 -0
  32. package/build/_modules/ai/_services/ai-provider.service-base.js.map +1 -1
  33. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts +17 -1
  34. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.d.ts.map +1 -1
  35. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js +16 -0
  36. package/build/_modules/local-vector-search/_enums/lvs-search-mode.enum.js.map +1 -1
  37. package/build/_modules/local-vector-search/_services/lvs-bm25.util.d.ts +89 -0
  38. package/build/_modules/local-vector-search/_services/lvs-bm25.util.d.ts.map +1 -0
  39. package/build/_modules/local-vector-search/_services/lvs-bm25.util.js +190 -0
  40. package/build/_modules/local-vector-search/_services/lvs-bm25.util.js.map +1 -0
  41. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts +18 -2
  42. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.d.ts.map +1 -1
  43. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js +57 -3
  44. package/build/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.js.map +1 -1
  45. package/build/_services/base/data.service.d.ts +63 -0
  46. package/build/_services/base/data.service.d.ts.map +1 -1
  47. package/build/_services/base/data.service.js +189 -0
  48. package/build/_services/base/data.service.js.map +1 -1
  49. package/package.json +3 -3
  50. package/src/_models/interfaces/compare-data-options.interface.ts +27 -0
  51. package/src/_models/interfaces/compare-data-result.interface.ts +12 -0
  52. package/src/_modules/ai/_models/interfaces/dynts-ai-cost-event-callback.interface.ts +14 -0
  53. package/src/_modules/ai/_models/interfaces/dynts-ai-cost-event.interface.ts +56 -0
  54. package/src/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.spec.ts +92 -0
  55. package/src/_modules/ai/_modules/open-ai/_services/oai-embedding.control-service.ts +38 -4
  56. package/src/_modules/ai/_modules/open-ai/_services/oai-llm-chat.service-base.ts +24 -5
  57. package/src/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.spec.ts +52 -0
  58. package/src/_modules/ai/_modules/open-ai/_services/oai-llm.service-base.ts +39 -10
  59. package/src/_modules/ai/_services/ai-provider.service-base.spec.ts +79 -0
  60. package/src/_modules/ai/_services/ai-provider.service-base.ts +41 -3
  61. package/src/_modules/local-vector-search/_enums/lvs-search-mode.enum.ts +16 -0
  62. package/src/_modules/local-vector-search/_services/lvs-bm25.util.spec.ts +159 -0
  63. package/src/_modules/local-vector-search/_services/lvs-bm25.util.ts +206 -0
  64. package/src/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.spec.ts +135 -0
  65. package/src/_modules/local-vector-search/_services/lvs-local-vector-search.data-service.ts +95 -9
  66. package/src/_services/base/data.service.spec.ts +181 -0
  67. package/src/_services/base/data.service.ts +196 -2
@@ -0,0 +1,159 @@
1
+ import {
2
+ DyNTS_LVS_BM25_Corpus,
3
+ DyNTS_LVS_BM25_DocScore,
4
+ dyNTS_LVS_BM25_minMaxNormalize,
5
+ } from './lvs-bm25.util';
6
+
7
+
8
+ describe('| DyNTS_LVS_BM25_Corpus.tokenize', (): void => {
9
+ it('| lowercase + split on \\w+ boundaries', (): void => {
10
+ expect(DyNTS_LVS_BM25_Corpus.tokenize('Hello, World!')).toEqual(['hello', 'world']);
11
+ });
12
+
13
+ it('| identifier marad egy tokenkent (PascalCase)', (): void => {
14
+ expect(DyNTS_LVS_BM25_Corpus.tokenize('UserController')).toEqual(['usercontroller']);
15
+ });
16
+
17
+ it('| hyphenated nev ket tokenre esik', (): void => {
18
+ expect(DyNTS_LVS_BM25_Corpus.tokenize('auth-flow')).toEqual(['auth', 'flow']);
19
+ });
20
+
21
+ it('| ures string → ures array', (): void => {
22
+ expect(DyNTS_LVS_BM25_Corpus.tokenize('')).toEqual([]);
23
+ });
24
+
25
+ it('| csak whitespace/punctuation → ures array', (): void => {
26
+ expect(DyNTS_LVS_BM25_Corpus.tokenize(' ,.! ')).toEqual([]);
27
+ });
28
+
29
+ it('| underscore megmarad (snake_case is egy token)', (): void => {
30
+ expect(DyNTS_LVS_BM25_Corpus.tokenize('user_controller')).toEqual(['user_controller']);
31
+ });
32
+ });
33
+
34
+
35
+ describe('| DyNTS_LVS_BM25_Corpus.score', (): void => {
36
+ it('| ures corpus → ures array', (): void => {
37
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([]);
38
+ expect(corpus.size()).toBe(0);
39
+ expect(corpus.score('anything')).toEqual([]);
40
+ });
41
+
42
+ it('| ures query → minden doc 0 score-t kap', (): void => {
43
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
44
+ { id: 'a', text: 'foo bar' },
45
+ { id: 'b', text: 'baz' },
46
+ ]);
47
+ const out: DyNTS_LVS_BM25_DocScore[] = corpus.score('');
48
+ expect(out.length).toBe(2);
49
+ expect(out.every((s: DyNTS_LVS_BM25_DocScore) => s.score === 0)).toBe(true);
50
+ });
51
+
52
+ it('| query NEM matchelo termmel → minden score 0', (): void => {
53
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
54
+ { id: 'a', text: 'foo bar' },
55
+ { id: 'b', text: 'baz' },
56
+ ]);
57
+ const out: DyNTS_LVS_BM25_DocScore[] = corpus.score('xyz');
58
+ expect(out.every((s: DyNTS_LVS_BM25_DocScore) => s.score === 0)).toBe(true);
59
+ });
60
+
61
+ it('| relevant doc magasabb score-t kap mint nem-relevant', (): void => {
62
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
63
+ { id: 'a', text: 'the UserController handles authentication' },
64
+ { id: 'b', text: 'cooking recipes for desserts' },
65
+ { id: 'c', text: 'database setup guide' },
66
+ ]);
67
+ const out: DyNTS_LVS_BM25_DocScore[] = corpus.score('UserController');
68
+ const a: DyNTS_LVS_BM25_DocScore = out.find((s) => s.id === 'a')!;
69
+ const b: DyNTS_LVS_BM25_DocScore = out.find((s) => s.id === 'b')!;
70
+ expect(a.score).toBeGreaterThan(0);
71
+ expect(b.score).toBe(0);
72
+ expect(a.score).toBeGreaterThan(b.score);
73
+ });
74
+
75
+ it('| rarer term (alacsonyabb df) magasabb IDF-et ad → magasabb score', (): void => {
76
+ // 'common' minden docban → alacsony IDF; 'rare' csak az 'a' docban → magas IDF
77
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
78
+ { id: 'a', text: 'common rare' },
79
+ { id: 'b', text: 'common' },
80
+ { id: 'c', text: 'common' },
81
+ { id: 'd', text: 'common' },
82
+ ]);
83
+ const commonOut: DyNTS_LVS_BM25_DocScore[] = corpus.score('common');
84
+ const rareOut: DyNTS_LVS_BM25_DocScore[] = corpus.score('rare');
85
+ const aCommon: number = commonOut.find((s) => s.id === 'a')!.score;
86
+ const aRare: number = rareOut.find((s) => s.id === 'a')!.score;
87
+ expect(aRare).toBeGreaterThan(aCommon);
88
+ });
89
+
90
+ it('| query-term ismetles NEM no monotonan a score-on (term saturation k1)', (): void => {
91
+ // 'a' egy 'foo'-t tartalmaz, 'b' tobbet
92
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
93
+ { id: 'a', text: 'foo bar baz qux' },
94
+ { id: 'b', text: 'foo foo foo foo bar baz qux' },
95
+ ]);
96
+ const out: DyNTS_LVS_BM25_DocScore[] = corpus.score('foo');
97
+ const a: number = out.find((s) => s.id === 'a')!.score;
98
+ const b: number = out.find((s) => s.id === 'b')!.score;
99
+ // b > a, de NEM 4x a-szor (saturation)
100
+ expect(b).toBeGreaterThan(a);
101
+ expect(b).toBeLessThan(4 * a);
102
+ });
103
+
104
+ it('| case-insensitive — uppercase query matchel lowercase doc-ot', (): void => {
105
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
106
+ { id: 'a', text: 'usercontroller is great' },
107
+ ]);
108
+ const out: DyNTS_LVS_BM25_DocScore[] = corpus.score('USERCONTROLLER');
109
+ expect(out[0].score).toBeGreaterThan(0);
110
+ });
111
+
112
+ it('| ket query term aggregalja az IDF-eket additivan', (): void => {
113
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
114
+ { id: 'a', text: 'auth flow handler' },
115
+ { id: 'b', text: 'auth only' },
116
+ { id: 'c', text: 'flow only' },
117
+ ]);
118
+ const both: number = corpus.score('auth flow').find((s) => s.id === 'a')!.score;
119
+ const authOnly: number = corpus.score('auth').find((s) => s.id === 'a')!.score;
120
+ expect(both).toBeGreaterThan(authOnly);
121
+ });
122
+
123
+ it('| invalid input doc (null/missing id/text) silently skipd', (): void => {
124
+ const corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus([
125
+ { id: 'a', text: 'hello' },
126
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
127
+ null as any,
128
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
129
+ { id: 'b' } as any,
130
+ ]);
131
+ expect(corpus.size()).toBe(1);
132
+ });
133
+ });
134
+
135
+
136
+ describe('| dyNTS_LVS_BM25_minMaxNormalize', (): void => {
137
+ it('| ures input → ures array', (): void => {
138
+ expect(dyNTS_LVS_BM25_minMaxNormalize([])).toEqual([]);
139
+ });
140
+
141
+ it('| min 0, max 10 → normalizalva [0..1]', (): void => {
142
+ const out: DyNTS_LVS_BM25_DocScore[] = dyNTS_LVS_BM25_minMaxNormalize([
143
+ { id: 'a', score: 0 },
144
+ { id: 'b', score: 5 },
145
+ { id: 'c', score: 10 },
146
+ ]);
147
+ expect(out.find((s) => s.id === 'a')!.score).toBeCloseTo(0, 5);
148
+ expect(out.find((s) => s.id === 'b')!.score).toBeCloseTo(0.5, 5);
149
+ expect(out.find((s) => s.id === 'c')!.score).toBeCloseTo(1, 5);
150
+ });
151
+
152
+ it('| azonos score → minden 0 (NEM 0.5, nincs jel diszkriminaciora)', (): void => {
153
+ const out: DyNTS_LVS_BM25_DocScore[] = dyNTS_LVS_BM25_minMaxNormalize([
154
+ { id: 'a', score: 3 },
155
+ { id: 'b', score: 3 },
156
+ ]);
157
+ expect(out.every((s) => s.score === 0)).toBe(true);
158
+ });
159
+ });
@@ -0,0 +1,206 @@
1
+ /**
2
+ * BM25 text-search ranking util a LVS hybrid search-hez (FR-004).
3
+ *
4
+ * Pure TypeScript, dependency-free. In-memory corpus alapjan szamol score-okat,
5
+ * NEM perzisztal indexet (a hybrid hivasonkent ujraepiti a corpus-t a candidate
6
+ * dokumentumokon — kis (~100..10000 dokumentum) LVS-corpus eseten ez gyors es
7
+ * egyszeru).
8
+ *
9
+ * Canonical params: k1=1.2, b=0.75 (industry standard a "lucene-szeru"
10
+ * implementaciokban). NEM expose-oltak — ha kell, FR-002 kovetkezo iteracioban
11
+ * tehetjuk parameterizalhatova.
12
+ *
13
+ * Tokenizer: `text.toLowerCase().match(/\w+/g) || []`. Case-insensitive,
14
+ * alphanumeric+underscore boundary-k. `UserController` egy token marad (jo az
15
+ * identifier match-re), `auth-flow` ket tokenre esik (auth + flow).
16
+ *
17
+ * IDF formula (BM25+): `log((N - df + 0.5) / (df + 0.5) + 1)`. A +1 a logon
18
+ * belul garantalja, hogy a kozos szavak is pozitiv (kicsi) IDF-et kapjanak,
19
+ * NEM negativ-t — fontos a hybrid score-merge-nel hogy ne huzzon le dokumentumot
20
+ * ahol kozos szo szerepel.
21
+ */
22
+
23
+
24
+ /** BM25 k1 parameter (term saturation control). Canonical default. */
25
+ const BM25_K1: number = 1.2;
26
+
27
+ /** BM25 b parameter (length normalization weight, 0=off, 1=full). Canonical default. */
28
+ const BM25_B: number = 0.75;
29
+
30
+ /** Token regex — alphanumeric + underscore. */
31
+ const TOKEN_REGEX: RegExp = /\w+/g;
32
+
33
+
34
+ /**
35
+ * Egy dokumentum BM25-score-ja egy query ellen, egy elore-felepitett corpus
36
+ * konteztusaban.
37
+ */
38
+ export interface DyNTS_LVS_BM25_DocScore {
39
+ /** Dokumentum azonosito (mint az LVS_SearchResult `id`-jaben). */
40
+ id: string;
41
+ /** Nyers BM25 score (0..∞). NEM normalizalt. */
42
+ score: number;
43
+ }
44
+
45
+
46
+ /**
47
+ * Felepitett BM25 corpus — egy adott dokumentumhalmaz indexe. A `score()` az
48
+ * indexen kerdez le egy query-t es minden dokumentumra ad egy score-t.
49
+ *
50
+ * Egy corpus egyszer-hasznalatos a hybrid search hivasban — NEM kell cache-elni,
51
+ * a felepites O(N * |doc|) ami pici N-re elhanyagolhato.
52
+ */
53
+ export class DyNTS_LVS_BM25_Corpus {
54
+
55
+ /** Tokenizalt dokumentumok: id -> tokens. */
56
+ private readonly docTokens: Map<string, string[]> = new Map<string, string[]>();
57
+ /** Doc-length: id -> token count. */
58
+ private readonly docLengths: Map<string, number> = new Map<string, number>();
59
+ /** Term -> doc-frequency (hany docban szerepel az adott term, legalabb 1x). */
60
+ private readonly termDocFreq: Map<string, number> = new Map<string, number>();
61
+ /** Term -> id -> term-frequency a docban. */
62
+ private readonly termFreqByDoc: Map<string, Map<string, number>> = new Map<string, Map<string, number>>();
63
+ /** Atlagos dokumentum-hossz (token count). */
64
+ private avgDocLength: number = 0;
65
+ /** Total doc count. */
66
+ private docCount: number = 0;
67
+
68
+ /**
69
+ * Letrehoz egy uj corpus-t a megadott id->text parok-bol.
70
+ * NEM dob hibat ures input-ra — ures corpus ervenyes (minden score = 0).
71
+ */
72
+ constructor(docs: { id: string; text: string }[]) {
73
+ if (!Array.isArray(docs) || docs.length === 0) { return; }
74
+
75
+ let totalLength: number = 0;
76
+ for (const doc of docs) {
77
+ if (!doc || typeof doc.id !== 'string' || typeof doc.text !== 'string') { continue; }
78
+ const tokens: string[] = DyNTS_LVS_BM25_Corpus.tokenize(doc.text);
79
+ this.docTokens.set(doc.id, tokens);
80
+ this.docLengths.set(doc.id, tokens.length);
81
+ totalLength += tokens.length;
82
+
83
+ // Term-frequency a docban
84
+ const localTf: Map<string, number> = new Map<string, number>();
85
+ for (const tok of tokens) {
86
+ localTf.set(tok, (localTf.get(tok) ?? 0) + 1);
87
+ }
88
+
89
+ for (const [term, tf] of localTf) {
90
+ // Doc-frequency: +1 per term, per doc
91
+ this.termDocFreq.set(term, (this.termDocFreq.get(term) ?? 0) + 1);
92
+ // Term-freq-by-doc reverse index
93
+ let perDoc: Map<string, number> | undefined = this.termFreqByDoc.get(term);
94
+ if (!perDoc) {
95
+ perDoc = new Map<string, number>();
96
+ this.termFreqByDoc.set(term, perDoc);
97
+ }
98
+ perDoc.set(doc.id, tf);
99
+ }
100
+ }
101
+
102
+ this.docCount = this.docTokens.size;
103
+ this.avgDocLength = this.docCount > 0 ? totalLength / this.docCount : 0;
104
+ }
105
+
106
+
107
+ /**
108
+ * Public tokenizer — exportalt, hogy spec-ek + hivok ugyanazt a normalizalast
109
+ * tudjak hasznalni mint a corpus.
110
+ */
111
+ static tokenize(text: string): string[] {
112
+ if (typeof text !== 'string' || text.length === 0) { return []; }
113
+ return text.toLowerCase().match(TOKEN_REGEX) ?? [];
114
+ }
115
+
116
+
117
+ /**
118
+ * Visszaadja a corpus dokumentum-szamat (NEM-ures docok).
119
+ */
120
+ size(): number {
121
+ return this.docCount;
122
+ }
123
+
124
+
125
+ /**
126
+ * BM25 score minden dokumentumra a query-re.
127
+ *
128
+ * Ures query → minden score 0 (degenerate case; a hivo kezelje ha kell).
129
+ * Ures corpus → ures array.
130
+ */
131
+ score(query: string): DyNTS_LVS_BM25_DocScore[] {
132
+ if (this.docCount === 0) { return []; }
133
+ const queryTokens: string[] = DyNTS_LVS_BM25_Corpus.tokenize(query);
134
+ if (queryTokens.length === 0) {
135
+ // Minden doc 0 score-t kap
136
+ const results: DyNTS_LVS_BM25_DocScore[] = [];
137
+ for (const id of this.docTokens.keys()) {
138
+ results.push({ id: id, score: 0 });
139
+ }
140
+ return results;
141
+ }
142
+
143
+ // Egyedi query-termek halmaza (ismetlodes nem ad nagyobb IDF-et)
144
+ const uniqueTerms: string[] = Array.from(new Set<string>(queryTokens));
145
+
146
+ // Pre-compute IDF a query-termekre
147
+ const idfMap: Map<string, number> = new Map<string, number>();
148
+ for (const term of uniqueTerms) {
149
+ const df: number = this.termDocFreq.get(term) ?? 0;
150
+ // BM25+ IDF: log((N - df + 0.5) / (df + 0.5) + 1)
151
+ const idf: number = Math.log((this.docCount - df + 0.5) / (df + 0.5) + 1);
152
+ idfMap.set(term, idf);
153
+ }
154
+
155
+ // Per-doc BM25 score
156
+ const results: DyNTS_LVS_BM25_DocScore[] = [];
157
+ for (const [docId, docLen] of this.docLengths) {
158
+ let score: number = 0;
159
+ for (const term of uniqueTerms) {
160
+ const tf: number = this.termFreqByDoc.get(term)?.get(docId) ?? 0;
161
+ if (tf === 0) { continue; }
162
+ const idf: number = idfMap.get(term) ?? 0;
163
+ const norm: number = 1 - BM25_B + BM25_B * (docLen / (this.avgDocLength || 1));
164
+ score += idf * (tf * (BM25_K1 + 1)) / (tf + BM25_K1 * norm);
165
+ }
166
+ results.push({ id: docId, score: score });
167
+ }
168
+
169
+ return results;
170
+ }
171
+ }
172
+
173
+
174
+ /**
175
+ * Min-max normalizalas [0,1] tartomanyra. A hybrid score-merge-hez kell a BM25
176
+ * score-okat a candidate-szetten 0..1 sav-ba hozni (a cosine mar 0..1).
177
+ *
178
+ * Edge case-ek:
179
+ * - Ures array → [].
180
+ * - Minden score azonos (max-min === 0) → minden 0.0 (NEM 0.5 vagy 1.0; ha
181
+ * nincs diszkriminacio, ne tegyunk hozza signal-t).
182
+ * - Negativ score-ok (BM25+IDF garantal pozitivat, de defensive): a min-max
183
+ * ugyanugy mukodik.
184
+ */
185
+ export function dyNTS_LVS_BM25_minMaxNormalize(
186
+ scores: DyNTS_LVS_BM25_DocScore[],
187
+ ): DyNTS_LVS_BM25_DocScore[] {
188
+ if (!Array.isArray(scores) || scores.length === 0) { return []; }
189
+ let min: number = Infinity;
190
+ let max: number = -Infinity;
191
+ for (const s of scores) {
192
+ if (s.score < min) { min = s.score; }
193
+ if (s.score > max) { max = s.score; }
194
+ }
195
+ const range: number = max - min;
196
+ if (range === 0) {
197
+ // Nincs diszkriminacio — minden 0.0 (NEM huzzon le, NEM huzzon fel)
198
+ return scores.map((s: DyNTS_LVS_BM25_DocScore): DyNTS_LVS_BM25_DocScore => ({
199
+ id: s.id, score: 0,
200
+ }));
201
+ }
202
+ return scores.map((s: DyNTS_LVS_BM25_DocScore): DyNTS_LVS_BM25_DocScore => ({
203
+ id: s.id,
204
+ score: (s.score - min) / range,
205
+ }));
206
+ }
@@ -341,5 +341,140 @@ describe('| DyNTS_LVS_VectorDataService', () => {
341
341
  expect(result[1]._id).toBe('data-1');
342
342
  });
343
343
  });
344
+
345
+ describe('| vectorSearch hybrid (FR-004)', () => {
346
+ const buildHybridCorpus = (): TestDataModel[] => {
347
+ const d1: TestDataModel = new TestDataModel();
348
+ d1._id = 'doc-user';
349
+ d1.content = 'the UserController handles authentication flow';
350
+ d1.contentVectorized = [0.4, 0.5, 0.6];
351
+ const d2: TestDataModel = new TestDataModel();
352
+ d2._id = 'doc-recipe';
353
+ d2.content = 'cooking recipes for desserts and cakes';
354
+ d2.contentVectorized = [0.45, 0.55, 0.65];
355
+ const d3: TestDataModel = new TestDataModel();
356
+ d3._id = 'doc-db';
357
+ d3.content = 'database setup guide for MongoDB';
358
+ d3.contentVectorized = [0.42, 0.52, 0.62];
359
+ return [d1, d2, d3];
360
+ };
361
+
362
+ it('| throws ha textSearchKey hianyzik hybrid modban (VS4)', async () => {
363
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
364
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
365
+ try {
366
+ await service.vectorSearch({
367
+ input: 'UserController',
368
+ searchInKey: 'contentVectorized',
369
+ searchMode: LVS_Search_Mode.hybrid,
370
+ });
371
+ fail('Should have thrown an error');
372
+ } catch (err) {
373
+ expect(err).toBeInstanceOf(DyFM_Error);
374
+ expect((err as DyFM_Error)._errorCode).toContain('DyNTS-LVS-VS4');
375
+ }
376
+ });
377
+
378
+ it('| throws ha hybridWeight invalid (negativ) (VS5)', async () => {
379
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
380
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
381
+ try {
382
+ await service.vectorSearch({
383
+ input: 'UserController',
384
+ searchInKey: 'contentVectorized',
385
+ searchMode: LVS_Search_Mode.hybrid,
386
+ textSearchKey: 'content',
387
+ hybridWeight: { vector: -0.5, text: 1.5 },
388
+ });
389
+ fail('Should have thrown an error');
390
+ } catch (err) {
391
+ expect(err).toBeInstanceOf(DyFM_Error);
392
+ expect((err as DyFM_Error)._errorCode).toContain('DyNTS-LVS-VS5');
393
+ }
394
+ });
395
+
396
+ it('| basic hybrid: text-relevant doc top-en', async () => {
397
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
398
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
399
+ const result: TestDataModel[] = await service.vectorSearch({
400
+ input: 'UserController',
401
+ searchInKey: 'contentVectorized',
402
+ searchMode: LVS_Search_Mode.hybrid,
403
+ textSearchKey: 'content',
404
+ limit: 3,
405
+ });
406
+ expect(result.length).toBe(3);
407
+ expect(result[0]._id).toBe('doc-user');
408
+ });
409
+
410
+ it('| weight {vector:1, text:0} → effektivan pure cosine', async () => {
411
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
412
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.42, 0.52, 0.62]));
413
+ const result: TestDataModel[] = await service.vectorSearch({
414
+ input: 'UserController',
415
+ searchInKey: 'contentVectorized',
416
+ searchMode: LVS_Search_Mode.hybrid,
417
+ textSearchKey: 'content',
418
+ hybridWeight: { vector: 1, text: 0 },
419
+ limit: 3,
420
+ });
421
+ expect(result.length).toBe(3);
422
+ expect(result[0]._id).toBe('doc-db');
423
+ });
424
+
425
+ it('| weight {vector:0, text:1} → effektivan pure BM25', async () => {
426
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
427
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.45, 0.55, 0.65]));
428
+ const result: TestDataModel[] = await service.vectorSearch({
429
+ input: 'authentication',
430
+ searchInKey: 'contentVectorized',
431
+ searchMode: LVS_Search_Mode.hybrid,
432
+ textSearchKey: 'content',
433
+ hybridWeight: { vector: 0, text: 1 },
434
+ limit: 3,
435
+ });
436
+ expect(result.length).toBe(3);
437
+ expect(result[0]._id).toBe('doc-user');
438
+ });
439
+
440
+ it('| all-zero BM25 fallback → cosine-rendezes marad', async () => {
441
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
442
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.45, 0.55, 0.65]));
443
+ const result: TestDataModel[] = await service.vectorSearch({
444
+ input: 'xyzzy-nonexistent-token',
445
+ searchInKey: 'contentVectorized',
446
+ searchMode: LVS_Search_Mode.hybrid,
447
+ textSearchKey: 'content',
448
+ limit: 3,
449
+ });
450
+ expect(result.length).toBe(3);
451
+ expect(result[0]._id).toBe('doc-recipe');
452
+ });
453
+
454
+ it('| limit honored hybrid modban', async () => {
455
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve(buildHybridCorpus()));
456
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
457
+ const result: TestDataModel[] = await service.vectorSearch({
458
+ input: 'UserController',
459
+ searchInKey: 'contentVectorized',
460
+ searchMode: LVS_Search_Mode.hybrid,
461
+ textSearchKey: 'content',
462
+ limit: 1,
463
+ });
464
+ expect(result.length).toBe(1);
465
+ });
466
+
467
+ it('| ures candidate-szet → ures eredmeny', async () => {
468
+ spyOn(service, 'getAll').and.returnValue(Promise.resolve([]));
469
+ spyOn(service, 'vectorize').and.returnValue(Promise.resolve([0.4, 0.5, 0.6]));
470
+ const result: TestDataModel[] = await service.vectorSearch({
471
+ input: 'UserController',
472
+ searchInKey: 'contentVectorized',
473
+ searchMode: LVS_Search_Mode.hybrid,
474
+ textSearchKey: 'content',
475
+ });
476
+ expect(result.length).toBe(0);
477
+ });
478
+ });
344
479
  });
345
480
 
@@ -10,6 +10,11 @@ import { DyFM_OAI_Settings } from '@futdevpro/fsm-dynamo/ai/open-ai';
10
10
  import { LVS_Search_Mode } from '../_enums/lvs-search-mode.enum';
11
11
  import { LVS_SearchResult } from '../_models/lvs-search-result.interface';
12
12
  import { LVS_VectorPool_ControlService } from './lvs-vector-pool.control-service';
13
+ import {
14
+ DyNTS_LVS_BM25_Corpus,
15
+ DyNTS_LVS_BM25_DocScore,
16
+ dyNTS_LVS_BM25_minMaxNormalize,
17
+ } from './lvs-bm25.util';
13
18
  import { DyNTS_OAI_VectorDataService } from '../../ai/_modules/open-ai/_services/data-services/oai-vector-data.service';
14
19
  import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
15
20
 
@@ -109,10 +114,23 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
109
114
  */
110
115
  filterBy?: DyFM_DBFilter<T>;
111
116
  /**
112
- * Search mode (cosine similarity or L2 distance)
113
- * Defaults to this.defaultSearchMode
117
+ * Search mode (cosine similarity, L2 distance, or hybrid).
118
+ * Defaults to this.defaultSearchMode.
119
+ *
120
+ * `hybrid` mode combines cosine similarity (vector half) with BM25
121
+ * text scoring (text half) — `textSearchKey` is REQUIRED in hybrid mode.
114
122
  */
115
123
  searchMode?: LVS_Search_Mode;
124
+ /**
125
+ * Csak `hybrid` modban — weighted score-merge a cosine es a BM25 kozott.
126
+ * Default: { vector: 0.5, text: 0.5 }. Mindkettonek 0..1 tartomany javasolt.
127
+ */
128
+ hybridWeight?: { vector: number; text: number };
129
+ /**
130
+ * Csak `hybrid` modban — KOTELEZO; melyik string property-n fut a BM25
131
+ * text-search. NEM kell hogy a vectorized property legyen.
132
+ */
133
+ textSearchKey?: keyof T;
116
134
  },
117
135
  ): Promise<T[]> {
118
136
  try {
@@ -127,7 +145,33 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
127
145
  }
128
146
 
129
147
  set.limit ??= 3;
130
- const { input, searchInKey, limit, filterBy, searchMode } = set;
148
+ const { input, searchInKey, limit, filterBy, searchMode, hybridWeight, textSearchKey } = set;
149
+ const effectiveMode: LVS_Search_Mode = searchMode ?? this.defaultSearchMode;
150
+
151
+ // Hybrid mode korai validacio
152
+ if (effectiveMode === LVS_Search_Mode.hybrid) {
153
+ if (!textSearchKey) {
154
+ throw new DyFM_Error({
155
+ ...this.getDefaultErrorSettings(
156
+ 'vectorSearch',
157
+ new Error('textSearchKey is required when searchMode is hybrid'),
158
+ ),
159
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS4`,
160
+ });
161
+ }
162
+ if (hybridWeight) {
163
+ const w: { vector: number; text: number } = hybridWeight;
164
+ if (!Number.isFinite(w.vector) || !Number.isFinite(w.text) || w.vector < 0 || w.text < 0) {
165
+ throw new DyFM_Error({
166
+ ...this.getDefaultErrorSettings(
167
+ 'vectorSearch',
168
+ new Error('hybridWeight.vector and .text must be non-negative finite numbers'),
169
+ ),
170
+ errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-LVS-VS5`,
171
+ });
172
+ }
173
+ }
174
+ }
131
175
 
132
176
  // Validáljuk, hogy a searchInKey létezik-e
133
177
  const property: DyFM_DataProperty_Params<any, T> =
@@ -250,7 +294,7 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
250
294
  );
251
295
 
252
296
  // 4. Végrehajtjuk a local vector search-t
253
- const mode: LVS_Search_Mode = searchMode ?? this.defaultSearchMode;
297
+ const mode: LVS_Search_Mode = effectiveMode;
254
298
 
255
299
  if (this.debugLog) {
256
300
  DyFM_Log.log(
@@ -258,11 +302,53 @@ export class DyNTS_LVS_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
258
302
  );
259
303
  }
260
304
 
261
- const searchResults: LVS_SearchResult[] = this.vectorPool.search(
262
- queryVector,
263
- limit,
264
- mode
265
- );
305
+ let searchResults: LVS_SearchResult[];
306
+
307
+ if (mode === LVS_Search_Mode.hybrid) {
308
+ // Hybrid: cosine ALL candidate-re + BM25 ALL candidate-re + min-max norm + weighted sum
309
+ const allCandidatesCosine: LVS_SearchResult[] = this.vectorPool.search(
310
+ queryVector,
311
+ dataMap.size,
312
+ LVS_Search_Mode.cosineSimilarity,
313
+ );
314
+
315
+ // BM25 corpus epitese a `textSearchKey` property-bol
316
+ const docs: { id: string; text: string }[] = [];
317
+ for (const [docId, dataItem] of dataMap) {
318
+ const textValue: unknown = dataItem[textSearchKey as keyof T];
319
+ docs.push({
320
+ id: docId,
321
+ text: typeof textValue === 'string' ? textValue : '',
322
+ });
323
+ }
324
+ const bm25Corpus: DyNTS_LVS_BM25_Corpus = new DyNTS_LVS_BM25_Corpus(docs);
325
+ const bm25Raw: DyNTS_LVS_BM25_DocScore[] = bm25Corpus.score(input);
326
+ const bm25Normalized: DyNTS_LVS_BM25_DocScore[] = dyNTS_LVS_BM25_minMaxNormalize(bm25Raw);
327
+
328
+ const bm25ScoreById: Map<string, number> = new Map<string, number>();
329
+ for (const s of bm25Normalized) {
330
+ bm25ScoreById.set(s.id, s.score);
331
+ }
332
+
333
+ const wVector: number = hybridWeight?.vector ?? 0.5;
334
+ const wText: number = hybridWeight?.text ?? 0.5;
335
+
336
+ const merged: LVS_SearchResult[] = allCandidatesCosine.map((c: LVS_SearchResult): LVS_SearchResult => {
337
+ const bm25Score: number = bm25ScoreById.get(c.id) ?? 0;
338
+ return {
339
+ id: c.id,
340
+ score: wVector * c.score + wText * bm25Score,
341
+ };
342
+ });
343
+ merged.sort((a: LVS_SearchResult, b: LVS_SearchResult) => b.score - a.score);
344
+ searchResults = merged.slice(0, limit);
345
+ } else {
346
+ searchResults = this.vectorPool.search(
347
+ queryVector,
348
+ limit,
349
+ mode,
350
+ );
351
+ }
266
352
 
267
353
  if (this.debugLog) {
268
354
  DyFM_Log.log(