@futdevpro/nts-dynamo 1.11.5 → 1.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/main.yml +6 -0
- package/build/_collections/get-environment-settings.util.js +1 -1
- package/build/_collections/get-environment-settings.util.js.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts +11 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.js +82 -50
- package/build/_modules/open-ai/_collections/oai-chunking.util.js.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts +2 -1
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts +3 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts +15 -2
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js +21 -8
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts +3 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js +36 -14
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.js +15 -6
- package/build/_modules/open-ai/_services/oai-vector-data.service.js.map +1 -1
- package/build/_services/base/data.service.d.ts.map +1 -1
- package/build/_services/base/data.service.js +3 -1
- package/build/_services/base/data.service.js.map +1 -1
- package/package.json +4 -3
- package/src/_collections/get-environment-settings.util.ts +1 -1
- package/src/_collections/sample.env +1 -1
- package/src/_modules/discord-assistant/_services/dias-chunk.data-service.ts +1 -1
- package/src/_modules/open-ai/_collections/oai-chunking.util.ts +125 -67
- package/src/_modules/open-ai/_models/interfaces/oai-document-page.interface.ts +2 -1
- package/src/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.ts +3 -1
- package/src/_modules/open-ai/_models/oai-doc-chunk.data-model.ts +27 -13
- package/src/_modules/open-ai/_services/oai-chunk.service-base.ts +42 -6
- package/src/_modules/open-ai/_services/oai-vector-data.service.ts +16 -6
- package/src/_services/base/data.service.ts +3 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
3
3
|
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
4
4
|
import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
|
|
@@ -14,6 +14,8 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
14
14
|
`We encountered an unhandled Control Service Error, ` +
|
|
15
15
|
`\nplease contact the responsible development team.`;
|
|
16
16
|
|
|
17
|
+
protected static readonly debugLog: boolean = false;
|
|
18
|
+
|
|
17
19
|
/**
|
|
18
20
|
* Markdown tartalom chunking-ja a leghosszabb lehetséges chunk-ok létrehozásához
|
|
19
21
|
* @param page - ClickUp dokumentum oldal
|
|
@@ -22,11 +24,12 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
22
24
|
*/
|
|
23
25
|
static async chunkMdContent<
|
|
24
26
|
T_Chunk extends DyNTS_OAI_Chunk,
|
|
25
|
-
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
28
|
+
>(
|
|
29
|
+
page: T_Page,
|
|
30
|
+
getPageLink: (page: T_Page, issuer: string) => string,
|
|
31
|
+
issuer: string,
|
|
32
|
+
debugLog?: boolean
|
|
30
33
|
): Promise<T_Chunk[]> {
|
|
31
34
|
try {
|
|
32
35
|
if (page.content.includes('#####')) {
|
|
@@ -62,15 +65,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
62
65
|
if (chunkContent.length > 0) {
|
|
63
66
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
64
67
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
65
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
68
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
66
69
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
67
70
|
|
|
68
71
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
69
|
-
filePathParents: page.
|
|
72
|
+
filePathParents: page.parentedPath,
|
|
70
73
|
chunkHeadParents: headParents,
|
|
71
74
|
|
|
72
|
-
allFlaggedParents: allFlaggedParents,
|
|
73
|
-
|
|
75
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
76
|
+
path: [ ...page.parentedPath, ...headParents ].join('/'),
|
|
74
77
|
|
|
75
78
|
documentName: page.documentId,
|
|
76
79
|
pageName: page.name,
|
|
@@ -79,9 +82,26 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
79
82
|
|
|
80
83
|
chunkOriginalContent: chunkContent,
|
|
81
84
|
|
|
82
|
-
|
|
85
|
+
chunkContent: chunkWithHierarchy,
|
|
83
86
|
}) as T_Chunk;
|
|
84
|
-
|
|
87
|
+
if (this.debugLog || debugLog) {
|
|
88
|
+
DyFM_Log.H_info(
|
|
89
|
+
'Full content (before assembleChunkWithHeaders)',
|
|
90
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
91
|
+
{ chunk: chunk.chunkContent }
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
95
|
+
if (this.debugLog || debugLog) {
|
|
96
|
+
DyFM_Log.H_info(
|
|
97
|
+
'Full content (after assembleChunkWithHeaders)',
|
|
98
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
99
|
+
{
|
|
100
|
+
chunk: chunk.chunkContent,
|
|
101
|
+
}
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
await DyFM_wait(100_000);
|
|
85
105
|
chunks.push(chunk);
|
|
86
106
|
}
|
|
87
107
|
break;
|
|
@@ -108,18 +128,18 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
108
128
|
|
|
109
129
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
110
130
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
111
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
131
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
112
132
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
113
133
|
|
|
114
134
|
/* console.log(`Chunk path:`, chunkPath);
|
|
115
135
|
console.log(`Chunk with hierarchy preview:`, chunkWithHierarchy.substring(0, 100) + '...'); */
|
|
116
136
|
|
|
117
137
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
118
|
-
filePathParents: page.
|
|
138
|
+
filePathParents: page.parentedPath,
|
|
119
139
|
chunkHeadParents: headParents,
|
|
120
140
|
|
|
121
|
-
allFlaggedParents: allFlaggedParents,
|
|
122
|
-
|
|
141
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
142
|
+
path: [ ...page.parentedPath, ...headParents ].join('/'),
|
|
123
143
|
|
|
124
144
|
documentName: page.documentId,
|
|
125
145
|
pageName: page.name,
|
|
@@ -128,9 +148,25 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
128
148
|
|
|
129
149
|
chunkOriginalContent: chunkContent,
|
|
130
150
|
|
|
131
|
-
|
|
151
|
+
chunkContent: chunkWithHierarchy,
|
|
132
152
|
}) as T_Chunk;
|
|
133
|
-
|
|
153
|
+
if (this.debugLog || debugLog) {
|
|
154
|
+
DyFM_Log.H_info(
|
|
155
|
+
'Break point (before assembleChunkWithHeaders)',
|
|
156
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
157
|
+
{ chunk: chunk.chunkContent }
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
161
|
+
if (this.debugLog || debugLog) {
|
|
162
|
+
DyFM_Log.H_info(
|
|
163
|
+
'Break point (after assembleChunkWithHeaders)',
|
|
164
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
165
|
+
{
|
|
166
|
+
chunk: chunk.chunkContent,
|
|
167
|
+
}
|
|
168
|
+
);
|
|
169
|
+
}
|
|
134
170
|
chunks.push(chunk);
|
|
135
171
|
chunkIndex++;
|
|
136
172
|
currentPosition += breakPoint.index;
|
|
@@ -146,15 +182,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
146
182
|
const chunkContent = leftovers.substring(0, maxChunkSize).trim();
|
|
147
183
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
148
184
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
149
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
185
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
150
186
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
151
187
|
|
|
152
188
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
153
|
-
filePathParents: page.
|
|
189
|
+
filePathParents: page.parentedPath,
|
|
154
190
|
chunkHeadParents: headParents,
|
|
155
191
|
|
|
156
|
-
allFlaggedParents: allFlaggedParents,
|
|
157
|
-
|
|
192
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
193
|
+
path: [ ...page.parentedPath, ...headParents ].join('/'),
|
|
158
194
|
|
|
159
195
|
documentName: page.documentId,
|
|
160
196
|
pageName: page.name,
|
|
@@ -163,9 +199,24 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
163
199
|
|
|
164
200
|
chunkOriginalContent: chunkContent,
|
|
165
201
|
|
|
166
|
-
|
|
202
|
+
chunkContent: chunkWithHierarchy,
|
|
167
203
|
}) as T_Chunk;
|
|
168
|
-
|
|
204
|
+
if (this.debugLog || debugLog) {
|
|
205
|
+
DyFM_Log.H_info(
|
|
206
|
+
'Max chunk size',
|
|
207
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
208
|
+
{ chunk: chunk.chunkContent }
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
212
|
+
if (this.debugLog || debugLog) {
|
|
213
|
+
DyFM_Log.H_info(
|
|
214
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
215
|
+
{
|
|
216
|
+
chunk: chunk.chunkContent,
|
|
217
|
+
}
|
|
218
|
+
);
|
|
219
|
+
}
|
|
169
220
|
chunks.push(chunk);
|
|
170
221
|
chunkIndex++;
|
|
171
222
|
currentPosition += maxChunkSize;
|
|
@@ -357,6 +408,49 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
357
408
|
}
|
|
358
409
|
}
|
|
359
410
|
|
|
411
|
+
/**
|
|
412
|
+
* Visszaadja a chunk-hoz tartozó releváns szülő fejléceket a hierarchia alapján.
|
|
413
|
+
* @param headerHierarchy - A feldolgozott fejléc-hierarchia
|
|
414
|
+
* @param startIndex - A chunk kezdőindexe az eredeti tartalomban
|
|
415
|
+
* @param endIndex - A chunk végindexe az eredeti tartalomban
|
|
416
|
+
* @param lastHeaders - Az utolsó ismert fejlécek szintenként
|
|
417
|
+
* @returns A releváns szülő fejlécek tömbje
|
|
418
|
+
*/
|
|
419
|
+
private static getRelevantHeaders(
|
|
420
|
+
headerHierarchy: Array<{index: number, level: number, header: string}>,
|
|
421
|
+
startIndex: number,
|
|
422
|
+
endIndex: number,
|
|
423
|
+
lastHeaders: Map<number, string>
|
|
424
|
+
): string[] {
|
|
425
|
+
// Keressük meg az első fejlécet a chunkban
|
|
426
|
+
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
427
|
+
header.index >= startIndex && header.index < endIndex
|
|
428
|
+
);
|
|
429
|
+
|
|
430
|
+
if (!firstHeaderInChunk) {
|
|
431
|
+
// Nincs fejléc a chunkban, a legmagasabb szintű utolsó fejlécet használjuk
|
|
432
|
+
let highestLevel = 0;
|
|
433
|
+
let highestHeader = '';
|
|
434
|
+
for (const [level, header] of lastHeaders.entries()) {
|
|
435
|
+
if (level > highestLevel) {
|
|
436
|
+
highestLevel = level;
|
|
437
|
+
highestHeader = header;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return highestHeader ? [highestHeader] : [];
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Szülő fejlécek összegyűjtése az első fejléc szintje alapján
|
|
444
|
+
const relevantHeaders: string[] = [];
|
|
445
|
+
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
446
|
+
const parentHeader = lastHeaders.get(level);
|
|
447
|
+
if (parentHeader) {
|
|
448
|
+
relevantHeaders.push(parentHeader);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
return relevantHeaders;
|
|
452
|
+
}
|
|
453
|
+
|
|
360
454
|
/**
|
|
361
455
|
* Build hierarchical chunk path based on header structure
|
|
362
456
|
* @param pageName - The page name
|
|
@@ -432,51 +526,10 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
432
526
|
endIndex: number,
|
|
433
527
|
lastHeaders: Map<number, string>
|
|
434
528
|
): string {
|
|
435
|
-
|
|
436
|
-
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
437
|
-
header.index >= startIndex && header.index < endIndex
|
|
438
|
-
);
|
|
439
|
-
|
|
440
|
-
if (!firstHeaderInChunk) {
|
|
441
|
-
// No header in chunk, use the highest level from lastHeaders as parent
|
|
442
|
-
const relevantHeaders: string[] = [];
|
|
443
|
-
|
|
444
|
-
// Find the highest level header from lastHeaders
|
|
445
|
-
let highestLevel = 0;
|
|
446
|
-
let highestHeader = '';
|
|
447
|
-
for (const [level, header] of lastHeaders.entries()) {
|
|
448
|
-
if (level > highestLevel) {
|
|
449
|
-
highestLevel = level;
|
|
450
|
-
highestHeader = header;
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
if (highestHeader) {
|
|
455
|
-
relevantHeaders.push(highestHeader);
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
if (relevantHeaders.length > 0) {
|
|
459
|
-
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
return chunkContent;
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
// Build hierarchy from stored last headers (only parent headers, not from chunk content)
|
|
466
|
-
const relevantHeaders: string[] = [];
|
|
467
|
-
|
|
468
|
-
// Add parent headers based on the first header in chunk level
|
|
469
|
-
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
470
|
-
const parentHeader = lastHeaders.get(level);
|
|
471
|
-
if (parentHeader) {
|
|
472
|
-
relevantHeaders.push(parentHeader);
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
|
|
529
|
+
const relevantHeaders = this.getRelevantHeaders(headerHierarchy, startIndex, endIndex, lastHeaders);
|
|
476
530
|
if (relevantHeaders.length > 0) {
|
|
477
531
|
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
478
532
|
}
|
|
479
|
-
|
|
480
533
|
return chunkContent;
|
|
481
534
|
}
|
|
482
535
|
|
|
@@ -512,4 +565,9 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
512
565
|
error: error,
|
|
513
566
|
};
|
|
514
567
|
}
|
|
515
|
-
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
function isLast<T>(element: T, array: T[]): boolean {
|
|
572
|
+
return array.indexOf(element) === array.length - 1;
|
|
573
|
+
}
|
|
@@ -15,8 +15,9 @@ export interface DyNTS_OAI_DocumentPage<
|
|
|
15
15
|
allFlaggedParents: string[];
|
|
16
16
|
allFlaggedParentsMerged: string;
|
|
17
17
|
|
|
18
|
+
filePath: string;
|
|
18
19
|
/** Path to the page in the document, starting with the document name */
|
|
19
|
-
|
|
20
|
+
parentedPath: string[];
|
|
20
21
|
|
|
21
22
|
chunks: T_Chunk[];
|
|
22
23
|
content: string;
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
2
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
2
|
+
import { DyNTS_OAI_ChunkCompareResult } from './oai-chunk-compare-result.interface';
|
|
3
|
+
import { DyNTS_OAI_CompareResult_Type } from '../../_enums/oai-compare-result-type.enum';
|
|
4
|
+
import { DyNTS_OAI_DocumentPage } from './oai-document-page.interface';
|
|
3
5
|
|
|
4
6
|
export interface DyNTS_OAI_PageCompareResult<
|
|
5
7
|
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk,
|
|
@@ -16,10 +16,21 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
16
16
|
* */
|
|
17
17
|
allFlaggedParents: string[] = [];
|
|
18
18
|
/**
|
|
19
|
+
* allFlaggedParentsMerged
|
|
19
20
|
* All parents of the chunk, flagged, merged into a with '/' as separator
|
|
21
|
+
* like: System:FDP/Project:Organizer/Side:Backend/Module:Notes/Document:Note Data Model
|
|
22
|
+
* or: System:FDP/Project:FDP Documentations/Document:Specifications/Services/Service:Organizer/Module:Chat/Page:Chat
|
|
23
|
+
* */
|
|
24
|
+
path: string;
|
|
25
|
+
/**
|
|
26
|
+
* The page name with the full path
|
|
27
|
+
* System/Project/Side/Module/Document/Page
|
|
28
|
+
* */
|
|
29
|
+
/* pageNameWithFullPath: string; */
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* The document name
|
|
20
33
|
* */
|
|
21
|
-
allFlaggedParentsMerged: string;
|
|
22
|
-
|
|
23
34
|
documentName: string;
|
|
24
35
|
pageName: string;
|
|
25
36
|
pageLink: string;
|
|
@@ -28,7 +39,10 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
28
39
|
|
|
29
40
|
chunkOriginalContent: string;
|
|
30
41
|
|
|
31
|
-
|
|
42
|
+
/**
|
|
43
|
+
* chunk Parented Content
|
|
44
|
+
*/
|
|
45
|
+
chunkContent: string;
|
|
32
46
|
chunkParentedContentVectorized?: number[];
|
|
33
47
|
|
|
34
48
|
constructor(
|
|
@@ -54,7 +68,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
54
68
|
type: 'string[]',
|
|
55
69
|
required: true,
|
|
56
70
|
},
|
|
57
|
-
|
|
71
|
+
path: {
|
|
58
72
|
type: 'string', index: true,
|
|
59
73
|
required: true,
|
|
60
74
|
},
|
|
@@ -68,8 +82,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
68
82
|
required: true,
|
|
69
83
|
},
|
|
70
84
|
pageLink: {
|
|
71
|
-
type: 'string',
|
|
72
|
-
required: true,
|
|
85
|
+
type: 'string',
|
|
73
86
|
},
|
|
74
87
|
chunkIndex: {
|
|
75
88
|
type: 'number', index: true,
|
|
@@ -81,21 +94,22 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
81
94
|
required: true,
|
|
82
95
|
},
|
|
83
96
|
|
|
84
|
-
|
|
85
|
-
type: 'string',
|
|
97
|
+
chunkContent: {
|
|
98
|
+
type: 'string',
|
|
86
99
|
required: true,
|
|
87
100
|
},
|
|
88
101
|
chunkParentedContentVectorized: {
|
|
89
102
|
type: 'number[]',
|
|
90
103
|
required: true,
|
|
91
104
|
embeddingModel: DyFM_OpenAIModel.textEmbedding_3Large,
|
|
92
|
-
vectorizedFrom: [ '
|
|
105
|
+
vectorizedFrom: [ 'path', 'chunkContent' ],
|
|
93
106
|
vectorizeUseIndex: 'chunkParentedContentVectorized',
|
|
94
107
|
vectorizeAlways: true,
|
|
95
108
|
},
|
|
96
109
|
}
|
|
97
110
|
|
|
98
|
-
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
112
|
+
new DyFM_DataModel_Params<DyNTS_OAI_Chunk>({
|
|
113
|
+
dataName: 'doc_chunk',
|
|
114
|
+
properties: chunk_propertyParams,
|
|
115
|
+
});
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyFM_OpenAI_Settings, DyFM_OpenAIModel } from '@futdevpro/fsm-dynamo/open-ai';
|
|
3
3
|
import { DyNTS_OAI_VectorDataService } from './oai-vector-data.service';
|
|
4
4
|
import { DyNTS_OAI_Chunking_Util } from '../_collections/oai-chunking.util';
|
|
5
5
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
6
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
6
|
+
import { DyNTS_OAI_ChunkCompareResult } from '../_models/interfaces/oai-chunk-compare-result.interface';
|
|
7
|
+
import { DyNTS_OAI_CompareResult_Type } from '../_enums/oai-compare-result-type.enum';
|
|
8
|
+
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
7
9
|
import { DyNTS_OAI_PageCompareResult } from '../_models/interfaces/oai-page-compare-result.interface';
|
|
8
10
|
|
|
9
11
|
|
|
@@ -12,6 +14,8 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
12
14
|
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk> = DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
13
15
|
> extends DyNTS_OAI_VectorDataService<T_Chunk> {
|
|
14
16
|
|
|
17
|
+
debugLog: boolean = true;
|
|
18
|
+
|
|
15
19
|
constructor(
|
|
16
20
|
dataParams: DyFM_DataModel_Params<T_Chunk>,
|
|
17
21
|
openAiSettings: DyFM_OpenAI_Settings,
|
|
@@ -38,6 +42,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
38
42
|
if (pageCompareResult.oldChunks.length > pageCompareResult.newChunks.length) {
|
|
39
43
|
// delete extra chunks
|
|
40
44
|
for (let i = 0; i < pageCompareResult.oldChunks.length; i++) {
|
|
45
|
+
|
|
41
46
|
if (i < pageCompareResult.newChunks.length) {
|
|
42
47
|
pageCompareResult.newChunks[i]._id = pageCompareResult.oldChunks[i]._id;
|
|
43
48
|
} else {
|
|
@@ -54,6 +59,18 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
54
59
|
if (
|
|
55
60
|
pageCompareResult.chunkCompareResults[chunk.chunkIndex].result !== DyNTS_OAI_CompareResult_Type.equal
|
|
56
61
|
) {
|
|
62
|
+
/* chunk.allFlaggedParents = chunk.allFlaggedParents.map(parent => parent.trim())
|
|
63
|
+
chunk.na */
|
|
64
|
+
|
|
65
|
+
chunk.path = chunk.allFlaggedParents.join('/');
|
|
66
|
+
|
|
67
|
+
if (this.debugLog) {
|
|
68
|
+
DyFM_Log.H_info(`✅ saving Chunk: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`, {
|
|
69
|
+
chunk: chunk,
|
|
70
|
+
});
|
|
71
|
+
await DyFM_wait(10_000);
|
|
72
|
+
}
|
|
73
|
+
|
|
57
74
|
await this.saveData(chunk, false, true);
|
|
58
75
|
DyFM_Log.info(`✅ Chunk saved: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`)
|
|
59
76
|
}
|
|
@@ -71,8 +88,18 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
71
88
|
|
|
72
89
|
async comparePage(page: T_Page, issuer: string): Promise<DyNTS_OAI_PageCompareResult<T_Chunk, T_Page>> {
|
|
73
90
|
try {
|
|
91
|
+
if (!page.allFlaggedParentsMerged) {
|
|
92
|
+
throw new DyFM_Error({
|
|
93
|
+
...this.getDefaultErrorSettings(
|
|
94
|
+
'comparePage',
|
|
95
|
+
new Error(`allFlaggedParentsMerged is not set (${page.name})`),
|
|
96
|
+
),
|
|
97
|
+
errorCode: 'DCH-CSB-CP0',
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
74
101
|
const oldChunks = await this.findDataList({
|
|
75
|
-
|
|
102
|
+
path: page.allFlaggedParentsMerged,
|
|
76
103
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
77
104
|
|
|
78
105
|
const compareResults: DyNTS_OAI_ChunkCompareResult<T_Chunk>[] = await this.compareChunks(
|
|
@@ -143,8 +170,17 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
143
170
|
return [];
|
|
144
171
|
}
|
|
145
172
|
|
|
173
|
+
if (chunks.some(chunk => !chunk.path)) {
|
|
174
|
+
throw new DyFM_Error({
|
|
175
|
+
...this.getDefaultErrorSettings(
|
|
176
|
+
'compareChunks', new Error('allFlaggedParentsMerged is not set'),
|
|
177
|
+
),
|
|
178
|
+
errorCode: 'DCH-CSB-CC0',
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
146
182
|
oldChunks ??= await this.findDataList({
|
|
147
|
-
|
|
183
|
+
path: chunks[0].path,
|
|
148
184
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
149
185
|
oldChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
|
|
150
186
|
|
|
@@ -168,7 +204,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
168
204
|
): Promise<DyNTS_OAI_ChunkCompareResult<T_Chunk>> {
|
|
169
205
|
try {
|
|
170
206
|
oldChunk ??= await this.findData({
|
|
171
|
-
|
|
207
|
+
path: newChunk.path,
|
|
172
208
|
chunkIndex: newChunk.chunkIndex,
|
|
173
209
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
174
210
|
|
|
@@ -187,7 +223,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
187
223
|
};
|
|
188
224
|
}
|
|
189
225
|
|
|
190
|
-
if (oldChunk.
|
|
226
|
+
if (oldChunk.chunkContent === newChunk.chunkContent) {
|
|
191
227
|
return {
|
|
192
228
|
result: DyNTS_OAI_CompareResult_Type.equal,
|
|
193
229
|
oldChunk: oldChunk,
|
|
@@ -162,7 +162,7 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
162
162
|
dataParams.properties[key].embeddingModel ??= model;
|
|
163
163
|
}); */
|
|
164
164
|
|
|
165
|
-
this.
|
|
165
|
+
this.vectorizedProperties.forEach((property: DyFM_DataProperty_Params<any>): void => {
|
|
166
166
|
property.embeddingModel ??= model;
|
|
167
167
|
});
|
|
168
168
|
|
|
@@ -186,7 +186,7 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
186
186
|
try {
|
|
187
187
|
let existingData: T;
|
|
188
188
|
|
|
189
|
-
if (!forceVectorize) {
|
|
189
|
+
if (!forceVectorize && newData?._id) {
|
|
190
190
|
const existingData: T = await this.getDataById(newData._id, true, true);
|
|
191
191
|
}
|
|
192
192
|
|
|
@@ -235,6 +235,7 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
235
235
|
),
|
|
236
236
|
|
|
237
237
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-VDB-VDB1`,
|
|
238
|
+
__localStack: this.dataParams.stackLocation,
|
|
238
239
|
});
|
|
239
240
|
}
|
|
240
241
|
|
|
@@ -251,9 +252,11 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
251
252
|
/* alwaysVectorize?: boolean */
|
|
252
253
|
): Promise<string> {
|
|
253
254
|
if (vectorizedProperty.vectorizedFrom.length === 1) {
|
|
254
|
-
const value: unknown = data[vectorizedProperty.vectorizedFrom[0]];
|
|
255
|
+
const value: unknown = data?.[vectorizedProperty.vectorizedFrom[0]];
|
|
255
256
|
|
|
256
|
-
if (
|
|
257
|
+
if (value === undefined) {
|
|
258
|
+
return undefined;
|
|
259
|
+
} else if (typeof value === 'string') {
|
|
257
260
|
return value;
|
|
258
261
|
} else if (Array.isArray(value) || typeof value === 'object') {
|
|
259
262
|
return JSON.stringify(value);
|
|
@@ -264,7 +267,9 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
264
267
|
const object: any = {};
|
|
265
268
|
|
|
266
269
|
vectorizedProperty.vectorizedFrom.forEach((key: string): void => {
|
|
267
|
-
|
|
270
|
+
if (data?.[key] !== undefined) {
|
|
271
|
+
object[key] = data[key];
|
|
272
|
+
}
|
|
268
273
|
});
|
|
269
274
|
|
|
270
275
|
return JSON.stringify(object);
|
|
@@ -343,10 +348,15 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
343
348
|
throw new DyFM_Error({
|
|
344
349
|
...this.getDefaultErrorSettings(
|
|
345
350
|
'vectorSearch',
|
|
346
|
-
new Error(
|
|
351
|
+
new Error(
|
|
352
|
+
`Property "${searchInKey}" not found! ` +
|
|
353
|
+
`while searching "${this.dataParams.dataName}" ` +
|
|
354
|
+
`(The searchable properties are: ${this.vectorizedProperties.map(p => `"${p.key}"`).join(', ')})`
|
|
355
|
+
)
|
|
347
356
|
),
|
|
348
357
|
|
|
349
358
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-VDB-VS2`,
|
|
359
|
+
__localStack: this.dataParams.stackLocation,
|
|
350
360
|
});
|
|
351
361
|
}
|
|
352
362
|
|
|
@@ -1190,13 +1190,14 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1190
1190
|
'validateForSave',
|
|
1191
1191
|
new Error(
|
|
1192
1192
|
`validateForSave failed, "${element.key}" is missing! ` +
|
|
1193
|
-
`(${this.dataParams.dataName})`
|
|
1193
|
+
`(index or required in "${this.dataParams.dataName}" dataParams) `
|
|
1194
1194
|
)
|
|
1195
1195
|
),
|
|
1196
1196
|
|
|
1197
1197
|
status: 522,
|
|
1198
1198
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD1`,
|
|
1199
1199
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1200
|
+
__localStack: this.dataParams.stackLocation,
|
|
1200
1201
|
additionalContent: {
|
|
1201
1202
|
data: data,
|
|
1202
1203
|
},
|
|
@@ -1219,6 +1220,7 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1219
1220
|
status: 522,
|
|
1220
1221
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD2`,
|
|
1221
1222
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1223
|
+
__localStack: this.dataParams.stackLocation,
|
|
1222
1224
|
additionalContent: {
|
|
1223
1225
|
data: data,
|
|
1224
1226
|
},
|