@futdevpro/nts-dynamo 1.11.5 → 1.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts +10 -0
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.js +78 -46
- package/build/_modules/open-ai/_collections/oai-chunking.util.js.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts +3 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts +15 -2
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js +21 -8
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts +3 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js +36 -14
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.js +8 -3
- package/build/_modules/open-ai/_services/oai-vector-data.service.js.map +1 -1
- package/build/_services/base/data.service.d.ts.map +1 -1
- package/build/_services/base/data.service.js +3 -1
- package/build/_services/base/data.service.js.map +1 -1
- package/package.json +4 -3
- package/src/_modules/discord-assistant/_services/dias-chunk.data-service.ts +1 -1
- package/src/_modules/open-ai/_collections/oai-chunking.util.ts +116 -59
- package/src/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.ts +3 -1
- package/src/_modules/open-ai/_models/oai-doc-chunk.data-model.ts +27 -13
- package/src/_modules/open-ai/_services/oai-chunk.service-base.ts +42 -6
- package/src/_modules/open-ai/_services/oai-vector-data.service.ts +7 -3
- package/src/_services/base/data.service.ts +3 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
3
3
|
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
4
4
|
import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
|
|
@@ -14,6 +14,8 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
14
14
|
`We encountered an unhandled Control Service Error, ` +
|
|
15
15
|
`\nplease contact the responsible development team.`;
|
|
16
16
|
|
|
17
|
+
protected static readonly debugLog: boolean = false;
|
|
18
|
+
|
|
17
19
|
/**
|
|
18
20
|
* Markdown tartalom chunking-ja a leghosszabb lehetséges chunk-ok létrehozásához
|
|
19
21
|
* @param page - ClickUp dokumentum oldal
|
|
@@ -62,15 +64,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
62
64
|
if (chunkContent.length > 0) {
|
|
63
65
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
64
66
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
65
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
67
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
66
68
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
67
69
|
|
|
68
70
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
69
71
|
filePathParents: page.path,
|
|
70
72
|
chunkHeadParents: headParents,
|
|
71
73
|
|
|
72
|
-
allFlaggedParents: allFlaggedParents,
|
|
73
|
-
|
|
74
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
75
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
74
76
|
|
|
75
77
|
documentName: page.documentId,
|
|
76
78
|
pageName: page.name,
|
|
@@ -79,9 +81,26 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
79
81
|
|
|
80
82
|
chunkOriginalContent: chunkContent,
|
|
81
83
|
|
|
82
|
-
|
|
84
|
+
chunkContent: chunkWithHierarchy,
|
|
83
85
|
}) as T_Chunk;
|
|
84
|
-
|
|
86
|
+
if (this.debugLog) {
|
|
87
|
+
DyFM_Log.H_info(
|
|
88
|
+
'Full content (before assembleChunkWithHeaders)',
|
|
89
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
90
|
+
{ chunk: chunk.chunkContent }
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
94
|
+
if (this.debugLog) {
|
|
95
|
+
DyFM_Log.H_info(
|
|
96
|
+
'Full content (after assembleChunkWithHeaders)',
|
|
97
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
98
|
+
{
|
|
99
|
+
chunk: chunk.chunkContent,
|
|
100
|
+
}
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
await DyFM_wait(100_000);
|
|
85
104
|
chunks.push(chunk);
|
|
86
105
|
}
|
|
87
106
|
break;
|
|
@@ -108,7 +127,7 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
108
127
|
|
|
109
128
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
110
129
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
111
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
130
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
112
131
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
113
132
|
|
|
114
133
|
/* console.log(`Chunk path:`, chunkPath);
|
|
@@ -118,8 +137,8 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
118
137
|
filePathParents: page.path,
|
|
119
138
|
chunkHeadParents: headParents,
|
|
120
139
|
|
|
121
|
-
allFlaggedParents: allFlaggedParents,
|
|
122
|
-
|
|
140
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
141
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
123
142
|
|
|
124
143
|
documentName: page.documentId,
|
|
125
144
|
pageName: page.name,
|
|
@@ -128,9 +147,25 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
128
147
|
|
|
129
148
|
chunkOriginalContent: chunkContent,
|
|
130
149
|
|
|
131
|
-
|
|
150
|
+
chunkContent: chunkWithHierarchy,
|
|
132
151
|
}) as T_Chunk;
|
|
133
|
-
|
|
152
|
+
if (this.debugLog) {
|
|
153
|
+
DyFM_Log.H_info(
|
|
154
|
+
'Break point (before assembleChunkWithHeaders)',
|
|
155
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
156
|
+
{ chunk: chunk.chunkContent }
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
160
|
+
if (this.debugLog) {
|
|
161
|
+
DyFM_Log.H_info(
|
|
162
|
+
'Break point (after assembleChunkWithHeaders)',
|
|
163
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
164
|
+
{
|
|
165
|
+
chunk: chunk.chunkContent,
|
|
166
|
+
}
|
|
167
|
+
);
|
|
168
|
+
}
|
|
134
169
|
chunks.push(chunk);
|
|
135
170
|
chunkIndex++;
|
|
136
171
|
currentPosition += breakPoint.index;
|
|
@@ -146,15 +181,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
146
181
|
const chunkContent = leftovers.substring(0, maxChunkSize).trim();
|
|
147
182
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
148
183
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
149
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
184
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
150
185
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
151
186
|
|
|
152
187
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
153
188
|
filePathParents: page.path,
|
|
154
189
|
chunkHeadParents: headParents,
|
|
155
190
|
|
|
156
|
-
allFlaggedParents: allFlaggedParents,
|
|
157
|
-
|
|
191
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
192
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
158
193
|
|
|
159
194
|
documentName: page.documentId,
|
|
160
195
|
pageName: page.name,
|
|
@@ -163,9 +198,24 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
163
198
|
|
|
164
199
|
chunkOriginalContent: chunkContent,
|
|
165
200
|
|
|
166
|
-
|
|
201
|
+
chunkContent: chunkWithHierarchy,
|
|
167
202
|
}) as T_Chunk;
|
|
168
|
-
|
|
203
|
+
if (this.debugLog) {
|
|
204
|
+
DyFM_Log.H_info(
|
|
205
|
+
'Max chunk size',
|
|
206
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
207
|
+
{ chunk: chunk.chunkContent }
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
211
|
+
if (this.debugLog) {
|
|
212
|
+
DyFM_Log.H_info(
|
|
213
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
214
|
+
{
|
|
215
|
+
chunk: chunk.chunkContent,
|
|
216
|
+
}
|
|
217
|
+
);
|
|
218
|
+
}
|
|
169
219
|
chunks.push(chunk);
|
|
170
220
|
chunkIndex++;
|
|
171
221
|
currentPosition += maxChunkSize;
|
|
@@ -357,6 +407,49 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
357
407
|
}
|
|
358
408
|
}
|
|
359
409
|
|
|
410
|
+
/**
|
|
411
|
+
* Visszaadja a chunk-hoz tartozó releváns szülő fejléceket a hierarchia alapján.
|
|
412
|
+
* @param headerHierarchy - A feldolgozott fejléc-hierarchia
|
|
413
|
+
* @param startIndex - A chunk kezdőindexe az eredeti tartalomban
|
|
414
|
+
* @param endIndex - A chunk végindexe az eredeti tartalomban
|
|
415
|
+
* @param lastHeaders - Az utolsó ismert fejlécek szintenként
|
|
416
|
+
* @returns A releváns szülő fejlécek tömbje
|
|
417
|
+
*/
|
|
418
|
+
private static getRelevantHeaders(
|
|
419
|
+
headerHierarchy: Array<{index: number, level: number, header: string}>,
|
|
420
|
+
startIndex: number,
|
|
421
|
+
endIndex: number,
|
|
422
|
+
lastHeaders: Map<number, string>
|
|
423
|
+
): string[] {
|
|
424
|
+
// Keressük meg az első fejlécet a chunkban
|
|
425
|
+
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
426
|
+
header.index >= startIndex && header.index < endIndex
|
|
427
|
+
);
|
|
428
|
+
|
|
429
|
+
if (!firstHeaderInChunk) {
|
|
430
|
+
// Nincs fejléc a chunkban, a legmagasabb szintű utolsó fejlécet használjuk
|
|
431
|
+
let highestLevel = 0;
|
|
432
|
+
let highestHeader = '';
|
|
433
|
+
for (const [level, header] of lastHeaders.entries()) {
|
|
434
|
+
if (level > highestLevel) {
|
|
435
|
+
highestLevel = level;
|
|
436
|
+
highestHeader = header;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return highestHeader ? [highestHeader] : [];
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Szülő fejlécek összegyűjtése az első fejléc szintje alapján
|
|
443
|
+
const relevantHeaders: string[] = [];
|
|
444
|
+
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
445
|
+
const parentHeader = lastHeaders.get(level);
|
|
446
|
+
if (parentHeader) {
|
|
447
|
+
relevantHeaders.push(parentHeader);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
return relevantHeaders;
|
|
451
|
+
}
|
|
452
|
+
|
|
360
453
|
/**
|
|
361
454
|
* Build hierarchical chunk path based on header structure
|
|
362
455
|
* @param pageName - The page name
|
|
@@ -432,51 +525,10 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
432
525
|
endIndex: number,
|
|
433
526
|
lastHeaders: Map<number, string>
|
|
434
527
|
): string {
|
|
435
|
-
|
|
436
|
-
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
437
|
-
header.index >= startIndex && header.index < endIndex
|
|
438
|
-
);
|
|
439
|
-
|
|
440
|
-
if (!firstHeaderInChunk) {
|
|
441
|
-
// No header in chunk, use the highest level from lastHeaders as parent
|
|
442
|
-
const relevantHeaders: string[] = [];
|
|
443
|
-
|
|
444
|
-
// Find the highest level header from lastHeaders
|
|
445
|
-
let highestLevel = 0;
|
|
446
|
-
let highestHeader = '';
|
|
447
|
-
for (const [level, header] of lastHeaders.entries()) {
|
|
448
|
-
if (level > highestLevel) {
|
|
449
|
-
highestLevel = level;
|
|
450
|
-
highestHeader = header;
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
if (highestHeader) {
|
|
455
|
-
relevantHeaders.push(highestHeader);
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
if (relevantHeaders.length > 0) {
|
|
459
|
-
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
return chunkContent;
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
// Build hierarchy from stored last headers (only parent headers, not from chunk content)
|
|
466
|
-
const relevantHeaders: string[] = [];
|
|
467
|
-
|
|
468
|
-
// Add parent headers based on the first header in chunk level
|
|
469
|
-
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
470
|
-
const parentHeader = lastHeaders.get(level);
|
|
471
|
-
if (parentHeader) {
|
|
472
|
-
relevantHeaders.push(parentHeader);
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
|
|
528
|
+
const relevantHeaders = this.getRelevantHeaders(headerHierarchy, startIndex, endIndex, lastHeaders);
|
|
476
529
|
if (relevantHeaders.length > 0) {
|
|
477
530
|
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
478
531
|
}
|
|
479
|
-
|
|
480
532
|
return chunkContent;
|
|
481
533
|
}
|
|
482
534
|
|
|
@@ -512,4 +564,9 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
512
564
|
error: error,
|
|
513
565
|
};
|
|
514
566
|
}
|
|
515
|
-
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
function isLast<T>(element: T, array: T[]): boolean {
|
|
571
|
+
return array.indexOf(element) === array.length - 1;
|
|
572
|
+
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
2
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
2
|
+
import { DyNTS_OAI_ChunkCompareResult } from './oai-chunk-compare-result.interface';
|
|
3
|
+
import { DyNTS_OAI_CompareResult_Type } from '../../_enums/oai-compare-result-type.enum';
|
|
4
|
+
import { DyNTS_OAI_DocumentPage } from './oai-document-page.interface';
|
|
3
5
|
|
|
4
6
|
export interface DyNTS_OAI_PageCompareResult<
|
|
5
7
|
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk,
|
|
@@ -16,10 +16,21 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
16
16
|
* */
|
|
17
17
|
allFlaggedParents: string[] = [];
|
|
18
18
|
/**
|
|
19
|
+
* allFlaggedParentsMerged
|
|
19
20
|
* All parents of the chunk, flagged, merged into a with '/' as separator
|
|
21
|
+
* like: System:FDP/Project:Organizer/Side:Backend/Module:Notes/Document:Note Data Model
|
|
22
|
+
* or: System:FDP/Project:FDP Documentations/Document:Specifications/Services/Service:Organizer/Module:Chat/Page:Chat
|
|
23
|
+
* */
|
|
24
|
+
path: string;
|
|
25
|
+
/**
|
|
26
|
+
* The page name with the full path
|
|
27
|
+
* System/Project/Side/Module/Document/Page
|
|
28
|
+
* */
|
|
29
|
+
/* pageNameWithFullPath: string; */
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* The document name
|
|
20
33
|
* */
|
|
21
|
-
allFlaggedParentsMerged: string;
|
|
22
|
-
|
|
23
34
|
documentName: string;
|
|
24
35
|
pageName: string;
|
|
25
36
|
pageLink: string;
|
|
@@ -28,7 +39,10 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
28
39
|
|
|
29
40
|
chunkOriginalContent: string;
|
|
30
41
|
|
|
31
|
-
|
|
42
|
+
/**
|
|
43
|
+
* chunk Parented Content
|
|
44
|
+
*/
|
|
45
|
+
chunkContent: string;
|
|
32
46
|
chunkParentedContentVectorized?: number[];
|
|
33
47
|
|
|
34
48
|
constructor(
|
|
@@ -54,7 +68,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
54
68
|
type: 'string[]',
|
|
55
69
|
required: true,
|
|
56
70
|
},
|
|
57
|
-
|
|
71
|
+
path: {
|
|
58
72
|
type: 'string', index: true,
|
|
59
73
|
required: true,
|
|
60
74
|
},
|
|
@@ -68,8 +82,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
68
82
|
required: true,
|
|
69
83
|
},
|
|
70
84
|
pageLink: {
|
|
71
|
-
type: 'string',
|
|
72
|
-
required: true,
|
|
85
|
+
type: 'string',
|
|
73
86
|
},
|
|
74
87
|
chunkIndex: {
|
|
75
88
|
type: 'number', index: true,
|
|
@@ -81,21 +94,22 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
81
94
|
required: true,
|
|
82
95
|
},
|
|
83
96
|
|
|
84
|
-
|
|
85
|
-
type: 'string',
|
|
97
|
+
chunkContent: {
|
|
98
|
+
type: 'string',
|
|
86
99
|
required: true,
|
|
87
100
|
},
|
|
88
101
|
chunkParentedContentVectorized: {
|
|
89
102
|
type: 'number[]',
|
|
90
103
|
required: true,
|
|
91
104
|
embeddingModel: DyFM_OpenAIModel.textEmbedding_3Large,
|
|
92
|
-
vectorizedFrom: [ '
|
|
105
|
+
vectorizedFrom: [ 'path', 'chunkContent' ],
|
|
93
106
|
vectorizeUseIndex: 'chunkParentedContentVectorized',
|
|
94
107
|
vectorizeAlways: true,
|
|
95
108
|
},
|
|
96
109
|
}
|
|
97
110
|
|
|
98
|
-
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
112
|
+
new DyFM_DataModel_Params<DyNTS_OAI_Chunk>({
|
|
113
|
+
dataName: 'doc_chunk',
|
|
114
|
+
properties: chunk_propertyParams,
|
|
115
|
+
});
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyFM_OpenAI_Settings, DyFM_OpenAIModel } from '@futdevpro/fsm-dynamo/open-ai';
|
|
3
3
|
import { DyNTS_OAI_VectorDataService } from './oai-vector-data.service';
|
|
4
4
|
import { DyNTS_OAI_Chunking_Util } from '../_collections/oai-chunking.util';
|
|
5
5
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
6
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
6
|
+
import { DyNTS_OAI_ChunkCompareResult } from '../_models/interfaces/oai-chunk-compare-result.interface';
|
|
7
|
+
import { DyNTS_OAI_CompareResult_Type } from '../_enums/oai-compare-result-type.enum';
|
|
8
|
+
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
7
9
|
import { DyNTS_OAI_PageCompareResult } from '../_models/interfaces/oai-page-compare-result.interface';
|
|
8
10
|
|
|
9
11
|
|
|
@@ -12,6 +14,8 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
12
14
|
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk> = DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
13
15
|
> extends DyNTS_OAI_VectorDataService<T_Chunk> {
|
|
14
16
|
|
|
17
|
+
debugLog: boolean = true;
|
|
18
|
+
|
|
15
19
|
constructor(
|
|
16
20
|
dataParams: DyFM_DataModel_Params<T_Chunk>,
|
|
17
21
|
openAiSettings: DyFM_OpenAI_Settings,
|
|
@@ -38,6 +42,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
38
42
|
if (pageCompareResult.oldChunks.length > pageCompareResult.newChunks.length) {
|
|
39
43
|
// delete extra chunks
|
|
40
44
|
for (let i = 0; i < pageCompareResult.oldChunks.length; i++) {
|
|
45
|
+
|
|
41
46
|
if (i < pageCompareResult.newChunks.length) {
|
|
42
47
|
pageCompareResult.newChunks[i]._id = pageCompareResult.oldChunks[i]._id;
|
|
43
48
|
} else {
|
|
@@ -54,6 +59,18 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
54
59
|
if (
|
|
55
60
|
pageCompareResult.chunkCompareResults[chunk.chunkIndex].result !== DyNTS_OAI_CompareResult_Type.equal
|
|
56
61
|
) {
|
|
62
|
+
/* chunk.allFlaggedParents = chunk.allFlaggedParents.map(parent => parent.trim())
|
|
63
|
+
chunk.na */
|
|
64
|
+
|
|
65
|
+
chunk.path = chunk.allFlaggedParents.join('/');
|
|
66
|
+
|
|
67
|
+
if (this.debugLog) {
|
|
68
|
+
DyFM_Log.H_info(`✅ saving Chunk: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`, {
|
|
69
|
+
chunk: chunk,
|
|
70
|
+
});
|
|
71
|
+
await DyFM_wait(10_000);
|
|
72
|
+
}
|
|
73
|
+
|
|
57
74
|
await this.saveData(chunk, false, true);
|
|
58
75
|
DyFM_Log.info(`✅ Chunk saved: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`)
|
|
59
76
|
}
|
|
@@ -71,8 +88,18 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
71
88
|
|
|
72
89
|
async comparePage(page: T_Page, issuer: string): Promise<DyNTS_OAI_PageCompareResult<T_Chunk, T_Page>> {
|
|
73
90
|
try {
|
|
91
|
+
if (!page.allFlaggedParentsMerged) {
|
|
92
|
+
throw new DyFM_Error({
|
|
93
|
+
...this.getDefaultErrorSettings(
|
|
94
|
+
'comparePage',
|
|
95
|
+
new Error(`allFlaggedParentsMerged is not set (${page.name})`),
|
|
96
|
+
),
|
|
97
|
+
errorCode: 'DCH-CSB-CP0',
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
74
101
|
const oldChunks = await this.findDataList({
|
|
75
|
-
|
|
102
|
+
path: page.allFlaggedParentsMerged,
|
|
76
103
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
77
104
|
|
|
78
105
|
const compareResults: DyNTS_OAI_ChunkCompareResult<T_Chunk>[] = await this.compareChunks(
|
|
@@ -143,8 +170,17 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
143
170
|
return [];
|
|
144
171
|
}
|
|
145
172
|
|
|
173
|
+
if (chunks.some(chunk => !chunk.path)) {
|
|
174
|
+
throw new DyFM_Error({
|
|
175
|
+
...this.getDefaultErrorSettings(
|
|
176
|
+
'compareChunks', new Error('allFlaggedParentsMerged is not set'),
|
|
177
|
+
),
|
|
178
|
+
errorCode: 'DCH-CSB-CC0',
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
146
182
|
oldChunks ??= await this.findDataList({
|
|
147
|
-
|
|
183
|
+
path: chunks[0].path,
|
|
148
184
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
149
185
|
oldChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
|
|
150
186
|
|
|
@@ -168,7 +204,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
168
204
|
): Promise<DyNTS_OAI_ChunkCompareResult<T_Chunk>> {
|
|
169
205
|
try {
|
|
170
206
|
oldChunk ??= await this.findData({
|
|
171
|
-
|
|
207
|
+
path: newChunk.path,
|
|
172
208
|
chunkIndex: newChunk.chunkIndex,
|
|
173
209
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
174
210
|
|
|
@@ -187,7 +223,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
187
223
|
};
|
|
188
224
|
}
|
|
189
225
|
|
|
190
|
-
if (oldChunk.
|
|
226
|
+
if (oldChunk.chunkContent === newChunk.chunkContent) {
|
|
191
227
|
return {
|
|
192
228
|
result: DyNTS_OAI_CompareResult_Type.equal,
|
|
193
229
|
oldChunk: oldChunk,
|
|
@@ -251,9 +251,11 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
251
251
|
/* alwaysVectorize?: boolean */
|
|
252
252
|
): Promise<string> {
|
|
253
253
|
if (vectorizedProperty.vectorizedFrom.length === 1) {
|
|
254
|
-
const value: unknown = data[vectorizedProperty.vectorizedFrom[0]];
|
|
254
|
+
const value: unknown = data?.[vectorizedProperty.vectorizedFrom[0]];
|
|
255
255
|
|
|
256
|
-
if (
|
|
256
|
+
if (value === undefined) {
|
|
257
|
+
return undefined;
|
|
258
|
+
} else if (typeof value === 'string') {
|
|
257
259
|
return value;
|
|
258
260
|
} else if (Array.isArray(value) || typeof value === 'object') {
|
|
259
261
|
return JSON.stringify(value);
|
|
@@ -264,7 +266,9 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
264
266
|
const object: any = {};
|
|
265
267
|
|
|
266
268
|
vectorizedProperty.vectorizedFrom.forEach((key: string): void => {
|
|
267
|
-
|
|
269
|
+
if (data?.[key] !== undefined) {
|
|
270
|
+
object[key] = data[key];
|
|
271
|
+
}
|
|
268
272
|
});
|
|
269
273
|
|
|
270
274
|
return JSON.stringify(object);
|
|
@@ -1190,13 +1190,14 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1190
1190
|
'validateForSave',
|
|
1191
1191
|
new Error(
|
|
1192
1192
|
`validateForSave failed, "${element.key}" is missing! ` +
|
|
1193
|
-
`(${this.dataParams.dataName})`
|
|
1193
|
+
`(index or required in "${this.dataParams.dataName}" dataParams) `
|
|
1194
1194
|
)
|
|
1195
1195
|
),
|
|
1196
1196
|
|
|
1197
1197
|
status: 522,
|
|
1198
1198
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD1`,
|
|
1199
1199
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1200
|
+
__localStack: this.dataParams.stackLocation,
|
|
1200
1201
|
additionalContent: {
|
|
1201
1202
|
data: data,
|
|
1202
1203
|
},
|
|
@@ -1219,6 +1220,7 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1219
1220
|
status: 522,
|
|
1220
1221
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD2`,
|
|
1221
1222
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1223
|
+
__localStack: this.dataParams.stackLocation,
|
|
1222
1224
|
additionalContent: {
|
|
1223
1225
|
data: data,
|
|
1224
1226
|
},
|