@futdevpro/nts-dynamo 1.11.4 → 1.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.d.ts +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.d.ts.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts +11 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.js +78 -46
- package/build/_modules/open-ai/_collections/oai-chunking.util.js.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.d.ts +2 -2
- package/build/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts +2 -1
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts +4 -2
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts +15 -2
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js +21 -8
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts +6 -4
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js +36 -14
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.js +8 -3
- package/build/_modules/open-ai/_services/oai-vector-data.service.js.map +1 -1
- package/build/_services/base/data.service.d.ts.map +1 -1
- package/build/_services/base/data.service.js +3 -1
- package/build/_services/base/data.service.js.map +1 -1
- package/package.json +4 -3
- package/src/_modules/discord-assistant/_services/dias-chunk.data-service.ts +4 -4
- package/src/_modules/open-ai/_collections/oai-chunking.util.ts +122 -62
- package/src/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.ts +4 -2
- package/src/_modules/open-ai/_models/interfaces/oai-document-page.interface.ts +5 -1
- package/src/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.ts +5 -3
- package/src/_modules/open-ai/_models/oai-doc-chunk.data-model.ts +27 -13
- package/src/_modules/open-ai/_services/oai-chunk.service-base.ts +46 -10
- package/src/_modules/open-ai/_services/oai-vector-data.service.ts +7 -3
- package/src/_services/base/data.service.ts +3 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
3
3
|
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
4
4
|
import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
|
|
@@ -14,15 +14,20 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
14
14
|
`We encountered an unhandled Control Service Error, ` +
|
|
15
15
|
`\nplease contact the responsible development team.`;
|
|
16
16
|
|
|
17
|
+
protected static readonly debugLog: boolean = false;
|
|
18
|
+
|
|
17
19
|
/**
|
|
18
20
|
* Markdown tartalom chunking-ja a leghosszabb lehetséges chunk-ok létrehozásához
|
|
19
21
|
* @param page - ClickUp dokumentum oldal
|
|
20
22
|
* @param issuer - Kérés kezdeményezője
|
|
21
23
|
* @returns Promise<CU_DocChunk[]> - Létrehozott chunk-ok listája
|
|
22
24
|
*/
|
|
23
|
-
static async chunkMdContent<
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
static async chunkMdContent<
|
|
26
|
+
T_Chunk extends DyNTS_OAI_Chunk,
|
|
27
|
+
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk>
|
|
28
|
+
>(
|
|
29
|
+
page: T_Page,
|
|
30
|
+
getPageLink: (page: T_Page, issuer: string) => string,
|
|
26
31
|
issuer: string
|
|
27
32
|
): Promise<T_Chunk[]> {
|
|
28
33
|
try {
|
|
@@ -59,15 +64,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
59
64
|
if (chunkContent.length > 0) {
|
|
60
65
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
61
66
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
62
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
67
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
63
68
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
64
69
|
|
|
65
70
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
66
71
|
filePathParents: page.path,
|
|
67
72
|
chunkHeadParents: headParents,
|
|
68
73
|
|
|
69
|
-
allFlaggedParents: allFlaggedParents,
|
|
70
|
-
|
|
74
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
75
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
71
76
|
|
|
72
77
|
documentName: page.documentId,
|
|
73
78
|
pageName: page.name,
|
|
@@ -76,9 +81,26 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
76
81
|
|
|
77
82
|
chunkOriginalContent: chunkContent,
|
|
78
83
|
|
|
79
|
-
|
|
84
|
+
chunkContent: chunkWithHierarchy,
|
|
80
85
|
}) as T_Chunk;
|
|
81
|
-
|
|
86
|
+
if (this.debugLog) {
|
|
87
|
+
DyFM_Log.H_info(
|
|
88
|
+
'Full content (before assembleChunkWithHeaders)',
|
|
89
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
90
|
+
{ chunk: chunk.chunkContent }
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
94
|
+
if (this.debugLog) {
|
|
95
|
+
DyFM_Log.H_info(
|
|
96
|
+
'Full content (after assembleChunkWithHeaders)',
|
|
97
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
98
|
+
{
|
|
99
|
+
chunk: chunk.chunkContent,
|
|
100
|
+
}
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
await DyFM_wait(100_000);
|
|
82
104
|
chunks.push(chunk);
|
|
83
105
|
}
|
|
84
106
|
break;
|
|
@@ -105,7 +127,7 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
105
127
|
|
|
106
128
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
107
129
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
108
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
130
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
109
131
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
110
132
|
|
|
111
133
|
/* console.log(`Chunk path:`, chunkPath);
|
|
@@ -115,8 +137,8 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
115
137
|
filePathParents: page.path,
|
|
116
138
|
chunkHeadParents: headParents,
|
|
117
139
|
|
|
118
|
-
allFlaggedParents: allFlaggedParents,
|
|
119
|
-
|
|
140
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
141
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
120
142
|
|
|
121
143
|
documentName: page.documentId,
|
|
122
144
|
pageName: page.name,
|
|
@@ -125,9 +147,25 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
125
147
|
|
|
126
148
|
chunkOriginalContent: chunkContent,
|
|
127
149
|
|
|
128
|
-
|
|
150
|
+
chunkContent: chunkWithHierarchy,
|
|
129
151
|
}) as T_Chunk;
|
|
130
|
-
|
|
152
|
+
if (this.debugLog) {
|
|
153
|
+
DyFM_Log.H_info(
|
|
154
|
+
'Break point (before assembleChunkWithHeaders)',
|
|
155
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
156
|
+
{ chunk: chunk.chunkContent }
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
160
|
+
if (this.debugLog) {
|
|
161
|
+
DyFM_Log.H_info(
|
|
162
|
+
'Break point (after assembleChunkWithHeaders)',
|
|
163
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
164
|
+
{
|
|
165
|
+
chunk: chunk.chunkContent,
|
|
166
|
+
}
|
|
167
|
+
);
|
|
168
|
+
}
|
|
131
169
|
chunks.push(chunk);
|
|
132
170
|
chunkIndex++;
|
|
133
171
|
currentPosition += breakPoint.index;
|
|
@@ -143,15 +181,15 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
143
181
|
const chunkContent = leftovers.substring(0, maxChunkSize).trim();
|
|
144
182
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
145
183
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
146
|
-
const headParents = headerHierarchy.map(header => header.header);
|
|
184
|
+
const headParents = this.getRelevantHeaders(headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders); // headerHierarchy.map(header => header.header);
|
|
147
185
|
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
148
186
|
|
|
149
187
|
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
150
188
|
filePathParents: page.path,
|
|
151
189
|
chunkHeadParents: headParents,
|
|
152
190
|
|
|
153
|
-
allFlaggedParents: allFlaggedParents,
|
|
154
|
-
|
|
191
|
+
allFlaggedParents: [ ...page.allFlaggedParents, ...headParents ],
|
|
192
|
+
path: [ ...page.path, ...headParents ].join('/'),
|
|
155
193
|
|
|
156
194
|
documentName: page.documentId,
|
|
157
195
|
pageName: page.name,
|
|
@@ -160,9 +198,24 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
160
198
|
|
|
161
199
|
chunkOriginalContent: chunkContent,
|
|
162
200
|
|
|
163
|
-
|
|
201
|
+
chunkContent: chunkWithHierarchy,
|
|
164
202
|
}) as T_Chunk;
|
|
165
|
-
|
|
203
|
+
if (this.debugLog) {
|
|
204
|
+
DyFM_Log.H_info(
|
|
205
|
+
'Max chunk size',
|
|
206
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
207
|
+
{ chunk: chunk.chunkContent }
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
chunk.chunkContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
211
|
+
if (this.debugLog) {
|
|
212
|
+
DyFM_Log.H_info(
|
|
213
|
+
`Chunk ${chunkIndex}: ${chunk.path.replaceAll('/', '/\n').split('/')}`,
|
|
214
|
+
{
|
|
215
|
+
chunk: chunk.chunkContent,
|
|
216
|
+
}
|
|
217
|
+
);
|
|
218
|
+
}
|
|
166
219
|
chunks.push(chunk);
|
|
167
220
|
chunkIndex++;
|
|
168
221
|
currentPosition += maxChunkSize;
|
|
@@ -354,6 +407,49 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
354
407
|
}
|
|
355
408
|
}
|
|
356
409
|
|
|
410
|
+
/**
|
|
411
|
+
* Visszaadja a chunk-hoz tartozó releváns szülő fejléceket a hierarchia alapján.
|
|
412
|
+
* @param headerHierarchy - A feldolgozott fejléc-hierarchia
|
|
413
|
+
* @param startIndex - A chunk kezdőindexe az eredeti tartalomban
|
|
414
|
+
* @param endIndex - A chunk végindexe az eredeti tartalomban
|
|
415
|
+
* @param lastHeaders - Az utolsó ismert fejlécek szintenként
|
|
416
|
+
* @returns A releváns szülő fejlécek tömbje
|
|
417
|
+
*/
|
|
418
|
+
private static getRelevantHeaders(
|
|
419
|
+
headerHierarchy: Array<{index: number, level: number, header: string}>,
|
|
420
|
+
startIndex: number,
|
|
421
|
+
endIndex: number,
|
|
422
|
+
lastHeaders: Map<number, string>
|
|
423
|
+
): string[] {
|
|
424
|
+
// Keressük meg az első fejlécet a chunkban
|
|
425
|
+
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
426
|
+
header.index >= startIndex && header.index < endIndex
|
|
427
|
+
);
|
|
428
|
+
|
|
429
|
+
if (!firstHeaderInChunk) {
|
|
430
|
+
// Nincs fejléc a chunkban, a legmagasabb szintű utolsó fejlécet használjuk
|
|
431
|
+
let highestLevel = 0;
|
|
432
|
+
let highestHeader = '';
|
|
433
|
+
for (const [level, header] of lastHeaders.entries()) {
|
|
434
|
+
if (level > highestLevel) {
|
|
435
|
+
highestLevel = level;
|
|
436
|
+
highestHeader = header;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return highestHeader ? [highestHeader] : [];
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Szülő fejlécek összegyűjtése az első fejléc szintje alapján
|
|
443
|
+
const relevantHeaders: string[] = [];
|
|
444
|
+
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
445
|
+
const parentHeader = lastHeaders.get(level);
|
|
446
|
+
if (parentHeader) {
|
|
447
|
+
relevantHeaders.push(parentHeader);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
return relevantHeaders;
|
|
451
|
+
}
|
|
452
|
+
|
|
357
453
|
/**
|
|
358
454
|
* Build hierarchical chunk path based on header structure
|
|
359
455
|
* @param pageName - The page name
|
|
@@ -429,51 +525,10 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
429
525
|
endIndex: number,
|
|
430
526
|
lastHeaders: Map<number, string>
|
|
431
527
|
): string {
|
|
432
|
-
|
|
433
|
-
const firstHeaderInChunk = headerHierarchy.find(header =>
|
|
434
|
-
header.index >= startIndex && header.index < endIndex
|
|
435
|
-
);
|
|
436
|
-
|
|
437
|
-
if (!firstHeaderInChunk) {
|
|
438
|
-
// No header in chunk, use the highest level from lastHeaders as parent
|
|
439
|
-
const relevantHeaders: string[] = [];
|
|
440
|
-
|
|
441
|
-
// Find the highest level header from lastHeaders
|
|
442
|
-
let highestLevel = 0;
|
|
443
|
-
let highestHeader = '';
|
|
444
|
-
for (const [level, header] of lastHeaders.entries()) {
|
|
445
|
-
if (level > highestLevel) {
|
|
446
|
-
highestLevel = level;
|
|
447
|
-
highestHeader = header;
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
if (highestHeader) {
|
|
452
|
-
relevantHeaders.push(highestHeader);
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
if (relevantHeaders.length > 0) {
|
|
456
|
-
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
return chunkContent;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
// Build hierarchy from stored last headers (only parent headers, not from chunk content)
|
|
463
|
-
const relevantHeaders: string[] = [];
|
|
464
|
-
|
|
465
|
-
// Add parent headers based on the first header in chunk level
|
|
466
|
-
for (let level = 1; level < firstHeaderInChunk.level; level++) {
|
|
467
|
-
const parentHeader = lastHeaders.get(level);
|
|
468
|
-
if (parentHeader) {
|
|
469
|
-
relevantHeaders.push(parentHeader);
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
|
|
528
|
+
const relevantHeaders = this.getRelevantHeaders(headerHierarchy, startIndex, endIndex, lastHeaders);
|
|
473
529
|
if (relevantHeaders.length > 0) {
|
|
474
530
|
return relevantHeaders.join('\n') + '\n\n' + chunkContent;
|
|
475
531
|
}
|
|
476
|
-
|
|
477
532
|
return chunkContent;
|
|
478
533
|
}
|
|
479
534
|
|
|
@@ -509,4 +564,9 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
509
564
|
error: error,
|
|
510
565
|
};
|
|
511
566
|
}
|
|
512
|
-
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
function isLast<T>(element: T, array: T[]): boolean {
|
|
571
|
+
return array.indexOf(element) === array.length - 1;
|
|
572
|
+
}
|
|
@@ -4,9 +4,11 @@ import { DyNTS_OAI_CompareResult_Type } from '../../_enums/oai-compare-result-ty
|
|
|
4
4
|
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
export interface DyNTS_OAI_ChunkCompareResult<
|
|
7
|
+
export interface DyNTS_OAI_ChunkCompareResult<
|
|
8
|
+
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk
|
|
9
|
+
> {
|
|
8
10
|
result: DyNTS_OAI_CompareResult_Type;
|
|
9
|
-
oldChunk:
|
|
11
|
+
oldChunk: T_Chunk;
|
|
10
12
|
}
|
|
11
13
|
|
|
12
14
|
|
|
@@ -4,7 +4,9 @@ import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
export interface DyNTS_OAI_DocumentPage<
|
|
7
|
+
export interface DyNTS_OAI_DocumentPage<
|
|
8
|
+
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk
|
|
9
|
+
> {
|
|
8
10
|
documentId: string;
|
|
9
11
|
pageId: string;
|
|
10
12
|
|
|
@@ -18,6 +20,8 @@ export interface DyNTS_OAI_DocumentPage<T_Chunk extends DyNTS_OAI_Chunk> {
|
|
|
18
20
|
|
|
19
21
|
chunks: T_Chunk[];
|
|
20
22
|
content: string;
|
|
23
|
+
|
|
24
|
+
pages?: DyNTS_OAI_DocumentPage<T_Chunk>[];
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
2
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
2
|
+
import { DyNTS_OAI_ChunkCompareResult } from './oai-chunk-compare-result.interface';
|
|
3
|
+
import { DyNTS_OAI_CompareResult_Type } from '../../_enums/oai-compare-result-type.enum';
|
|
4
|
+
import { DyNTS_OAI_DocumentPage } from './oai-document-page.interface';
|
|
3
5
|
|
|
4
6
|
export interface DyNTS_OAI_PageCompareResult<
|
|
5
|
-
|
|
6
|
-
|
|
7
|
+
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk,
|
|
8
|
+
T_Doc extends DyNTS_OAI_DocumentPage<T_Chunk> = DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
7
9
|
> {
|
|
8
10
|
result: DyNTS_OAI_CompareResult_Type;
|
|
9
11
|
subjectPage: T_Doc;
|
|
@@ -16,10 +16,21 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
16
16
|
* */
|
|
17
17
|
allFlaggedParents: string[] = [];
|
|
18
18
|
/**
|
|
19
|
+
* allFlaggedParentsMerged
|
|
19
20
|
* All parents of the chunk, flagged, merged into a with '/' as separator
|
|
21
|
+
* like: System:FDP/Project:Organizer/Side:Backend/Module:Notes/Document:Note Data Model
|
|
22
|
+
* or: System:FDP/Project:FDP Documentations/Document:Specifications/Services/Service:Organizer/Module:Chat/Page:Chat
|
|
23
|
+
* */
|
|
24
|
+
path: string;
|
|
25
|
+
/**
|
|
26
|
+
* The page name with the full path
|
|
27
|
+
* System/Project/Side/Module/Document/Page
|
|
28
|
+
* */
|
|
29
|
+
/* pageNameWithFullPath: string; */
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* The document name
|
|
20
33
|
* */
|
|
21
|
-
allFlaggedParentsMerged: string;
|
|
22
|
-
|
|
23
34
|
documentName: string;
|
|
24
35
|
pageName: string;
|
|
25
36
|
pageLink: string;
|
|
@@ -28,7 +39,10 @@ export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
|
28
39
|
|
|
29
40
|
chunkOriginalContent: string;
|
|
30
41
|
|
|
31
|
-
|
|
42
|
+
/**
|
|
43
|
+
* chunk Parented Content
|
|
44
|
+
*/
|
|
45
|
+
chunkContent: string;
|
|
32
46
|
chunkParentedContentVectorized?: number[];
|
|
33
47
|
|
|
34
48
|
constructor(
|
|
@@ -54,7 +68,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
54
68
|
type: 'string[]',
|
|
55
69
|
required: true,
|
|
56
70
|
},
|
|
57
|
-
|
|
71
|
+
path: {
|
|
58
72
|
type: 'string', index: true,
|
|
59
73
|
required: true,
|
|
60
74
|
},
|
|
@@ -68,8 +82,7 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
68
82
|
required: true,
|
|
69
83
|
},
|
|
70
84
|
pageLink: {
|
|
71
|
-
type: 'string',
|
|
72
|
-
required: true,
|
|
85
|
+
type: 'string',
|
|
73
86
|
},
|
|
74
87
|
chunkIndex: {
|
|
75
88
|
type: 'number', index: true,
|
|
@@ -81,21 +94,22 @@ export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
|
81
94
|
required: true,
|
|
82
95
|
},
|
|
83
96
|
|
|
84
|
-
|
|
85
|
-
type: 'string',
|
|
97
|
+
chunkContent: {
|
|
98
|
+
type: 'string',
|
|
86
99
|
required: true,
|
|
87
100
|
},
|
|
88
101
|
chunkParentedContentVectorized: {
|
|
89
102
|
type: 'number[]',
|
|
90
103
|
required: true,
|
|
91
104
|
embeddingModel: DyFM_OpenAIModel.textEmbedding_3Large,
|
|
92
|
-
vectorizedFrom: [ '
|
|
105
|
+
vectorizedFrom: [ 'path', 'chunkContent' ],
|
|
93
106
|
vectorizeUseIndex: 'chunkParentedContentVectorized',
|
|
94
107
|
vectorizeAlways: true,
|
|
95
108
|
},
|
|
96
109
|
}
|
|
97
110
|
|
|
98
|
-
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> =
|
|
112
|
+
new DyFM_DataModel_Params<DyNTS_OAI_Chunk>({
|
|
113
|
+
dataName: 'doc_chunk',
|
|
114
|
+
properties: chunk_propertyParams,
|
|
115
|
+
});
|
|
@@ -1,17 +1,21 @@
|
|
|
1
|
-
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyFM_Array, DyFM_DataModel_Params, DyFM_DataProperties, DyFM_DBFilterSimple, DyFM_Error, DyFM_Log, DyFM_Metadata, DyFM_wait } from '@futdevpro/fsm-dynamo';
|
|
2
2
|
import { DyFM_OpenAI_Settings, DyFM_OpenAIModel } from '@futdevpro/fsm-dynamo/open-ai';
|
|
3
3
|
import { DyNTS_OAI_VectorDataService } from './oai-vector-data.service';
|
|
4
4
|
import { DyNTS_OAI_Chunking_Util } from '../_collections/oai-chunking.util';
|
|
5
5
|
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
6
|
-
import { DyNTS_OAI_ChunkCompareResult
|
|
6
|
+
import { DyNTS_OAI_ChunkCompareResult } from '../_models/interfaces/oai-chunk-compare-result.interface';
|
|
7
|
+
import { DyNTS_OAI_CompareResult_Type } from '../_enums/oai-compare-result-type.enum';
|
|
8
|
+
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
7
9
|
import { DyNTS_OAI_PageCompareResult } from '../_models/interfaces/oai-page-compare-result.interface';
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
T_Chunk extends DyNTS_OAI_Chunk = DyNTS_OAI_Chunk,
|
|
14
|
+
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk> = DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
13
15
|
> extends DyNTS_OAI_VectorDataService<T_Chunk> {
|
|
14
16
|
|
|
17
|
+
debugLog: boolean = true;
|
|
18
|
+
|
|
15
19
|
constructor(
|
|
16
20
|
dataParams: DyFM_DataModel_Params<T_Chunk>,
|
|
17
21
|
openAiSettings: DyFM_OpenAI_Settings,
|
|
@@ -26,7 +30,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
26
30
|
}
|
|
27
31
|
|
|
28
32
|
async updatePage(
|
|
29
|
-
pageCompareResult: DyNTS_OAI_PageCompareResult<
|
|
33
|
+
pageCompareResult: DyNTS_OAI_PageCompareResult<T_Chunk, T_Page>,
|
|
30
34
|
issuer: string
|
|
31
35
|
): Promise<void> {
|
|
32
36
|
try {
|
|
@@ -38,6 +42,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
38
42
|
if (pageCompareResult.oldChunks.length > pageCompareResult.newChunks.length) {
|
|
39
43
|
// delete extra chunks
|
|
40
44
|
for (let i = 0; i < pageCompareResult.oldChunks.length; i++) {
|
|
45
|
+
|
|
41
46
|
if (i < pageCompareResult.newChunks.length) {
|
|
42
47
|
pageCompareResult.newChunks[i]._id = pageCompareResult.oldChunks[i]._id;
|
|
43
48
|
} else {
|
|
@@ -54,6 +59,18 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
54
59
|
if (
|
|
55
60
|
pageCompareResult.chunkCompareResults[chunk.chunkIndex].result !== DyNTS_OAI_CompareResult_Type.equal
|
|
56
61
|
) {
|
|
62
|
+
/* chunk.allFlaggedParents = chunk.allFlaggedParents.map(parent => parent.trim())
|
|
63
|
+
chunk.na */
|
|
64
|
+
|
|
65
|
+
chunk.path = chunk.allFlaggedParents.join('/');
|
|
66
|
+
|
|
67
|
+
if (this.debugLog) {
|
|
68
|
+
DyFM_Log.H_info(`✅ saving Chunk: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`, {
|
|
69
|
+
chunk: chunk,
|
|
70
|
+
});
|
|
71
|
+
await DyFM_wait(10_000);
|
|
72
|
+
}
|
|
73
|
+
|
|
57
74
|
await this.saveData(chunk, false, true);
|
|
58
75
|
DyFM_Log.info(`✅ Chunk saved: ${pageCompareResult.subjectPage.name}; ${chunk.chunkIndex}`)
|
|
59
76
|
}
|
|
@@ -69,10 +86,20 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
69
86
|
}
|
|
70
87
|
}
|
|
71
88
|
|
|
72
|
-
async comparePage(page: T_Page, issuer: string): Promise<DyNTS_OAI_PageCompareResult<
|
|
89
|
+
async comparePage(page: T_Page, issuer: string): Promise<DyNTS_OAI_PageCompareResult<T_Chunk, T_Page>> {
|
|
73
90
|
try {
|
|
91
|
+
if (!page.allFlaggedParentsMerged) {
|
|
92
|
+
throw new DyFM_Error({
|
|
93
|
+
...this.getDefaultErrorSettings(
|
|
94
|
+
'comparePage',
|
|
95
|
+
new Error(`allFlaggedParentsMerged is not set (${page.name})`),
|
|
96
|
+
),
|
|
97
|
+
errorCode: 'DCH-CSB-CP0',
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
74
101
|
const oldChunks = await this.findDataList({
|
|
75
|
-
|
|
102
|
+
path: page.allFlaggedParentsMerged,
|
|
76
103
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
77
104
|
|
|
78
105
|
const compareResults: DyNTS_OAI_ChunkCompareResult<T_Chunk>[] = await this.compareChunks(
|
|
@@ -143,8 +170,17 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
143
170
|
return [];
|
|
144
171
|
}
|
|
145
172
|
|
|
173
|
+
if (chunks.some(chunk => !chunk.path)) {
|
|
174
|
+
throw new DyFM_Error({
|
|
175
|
+
...this.getDefaultErrorSettings(
|
|
176
|
+
'compareChunks', new Error('allFlaggedParentsMerged is not set'),
|
|
177
|
+
),
|
|
178
|
+
errorCode: 'DCH-CSB-CC0',
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
146
182
|
oldChunks ??= await this.findDataList({
|
|
147
|
-
|
|
183
|
+
path: chunks[0].path,
|
|
148
184
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
149
185
|
oldChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
|
|
150
186
|
|
|
@@ -168,7 +204,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
168
204
|
): Promise<DyNTS_OAI_ChunkCompareResult<T_Chunk>> {
|
|
169
205
|
try {
|
|
170
206
|
oldChunk ??= await this.findData({
|
|
171
|
-
|
|
207
|
+
path: newChunk.path,
|
|
172
208
|
chunkIndex: newChunk.chunkIndex,
|
|
173
209
|
} as DyFM_DBFilterSimple<T_Chunk>);
|
|
174
210
|
|
|
@@ -187,7 +223,7 @@ export class DyNTS_OAI_Chunk_DataServiceBase<
|
|
|
187
223
|
};
|
|
188
224
|
}
|
|
189
225
|
|
|
190
|
-
if (oldChunk.
|
|
226
|
+
if (oldChunk.chunkContent === newChunk.chunkContent) {
|
|
191
227
|
return {
|
|
192
228
|
result: DyNTS_OAI_CompareResult_Type.equal,
|
|
193
229
|
oldChunk: oldChunk,
|
|
@@ -251,9 +251,11 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
251
251
|
/* alwaysVectorize?: boolean */
|
|
252
252
|
): Promise<string> {
|
|
253
253
|
if (vectorizedProperty.vectorizedFrom.length === 1) {
|
|
254
|
-
const value: unknown = data[vectorizedProperty.vectorizedFrom[0]];
|
|
254
|
+
const value: unknown = data?.[vectorizedProperty.vectorizedFrom[0]];
|
|
255
255
|
|
|
256
|
-
if (
|
|
256
|
+
if (value === undefined) {
|
|
257
|
+
return undefined;
|
|
258
|
+
} else if (typeof value === 'string') {
|
|
257
259
|
return value;
|
|
258
260
|
} else if (Array.isArray(value) || typeof value === 'object') {
|
|
259
261
|
return JSON.stringify(value);
|
|
@@ -264,7 +266,9 @@ export class DyNTS_OAI_VectorDataService<T extends DyFM_Metadata> extends DyNTS_
|
|
|
264
266
|
const object: any = {};
|
|
265
267
|
|
|
266
268
|
vectorizedProperty.vectorizedFrom.forEach((key: string): void => {
|
|
267
|
-
|
|
269
|
+
if (data?.[key] !== undefined) {
|
|
270
|
+
object[key] = data[key];
|
|
271
|
+
}
|
|
268
272
|
});
|
|
269
273
|
|
|
270
274
|
return JSON.stringify(object);
|
|
@@ -1190,13 +1190,14 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1190
1190
|
'validateForSave',
|
|
1191
1191
|
new Error(
|
|
1192
1192
|
`validateForSave failed, "${element.key}" is missing! ` +
|
|
1193
|
-
`(${this.dataParams.dataName})`
|
|
1193
|
+
`(index or required in "${this.dataParams.dataName}" dataParams) `
|
|
1194
1194
|
)
|
|
1195
1195
|
),
|
|
1196
1196
|
|
|
1197
1197
|
status: 522,
|
|
1198
1198
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD1`,
|
|
1199
1199
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1200
|
+
__localStack: this.dataParams.stackLocation,
|
|
1200
1201
|
additionalContent: {
|
|
1201
1202
|
data: data,
|
|
1202
1203
|
},
|
|
@@ -1219,6 +1220,7 @@ export class DyNTS_DataService<T extends DyFM_Metadata> {
|
|
|
1219
1220
|
status: 522,
|
|
1220
1221
|
errorCode: `${DyNTS_global_settings.systemShortCodeName}|DyNTS-DS0-VD2`,
|
|
1221
1222
|
userMessage: this.defaultValidationErrorUserMsg,
|
|
1223
|
+
__localStack: this.dataParams.stackLocation,
|
|
1222
1224
|
additionalContent: {
|
|
1223
1225
|
data: data,
|
|
1224
1226
|
},
|