@futdevpro/nts-dynamo 1.11.1 → 1.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/_modules/discord-assistant/_collections/dias.util.d.ts +6 -0
- package/build/_modules/discord-assistant/_collections/dias.util.d.ts.map +1 -0
- package/build/_modules/discord-assistant/_collections/dias.util.js +36 -0
- package/build/_modules/discord-assistant/_collections/dias.util.js.map +1 -0
- package/build/_modules/discord-assistant/_models/dias-knowledge.data-model.d.ts +1 -2
- package/build/_modules/discord-assistant/_models/dias-knowledge.data-model.d.ts.map +1 -1
- package/build/_modules/discord-assistant/_models/dias-knowledge.data-model.js +16 -10
- package/build/_modules/discord-assistant/_models/dias-knowledge.data-model.js.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.d.ts +9 -5
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.d.ts.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js +18 -6
- package/build/_modules/discord-assistant/_services/dias-chunk.data-service.js.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-io.control-service.d.ts.map +1 -1
- package/build/_modules/discord-assistant/_services/dias-io.control-service.js +2 -1
- package/build/_modules/discord-assistant/_services/dias-io.control-service.js.map +1 -1
- package/build/_modules/discord-assistant/_services/dias.service-base.d.ts +0 -2
- package/build/_modules/discord-assistant/_services/dias.service-base.d.ts.map +1 -1
- package/build/_modules/discord-assistant/_services/dias.service-base.js +0 -28
- package/build/_modules/discord-assistant/_services/dias.service-base.js.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts +3 -3
- package/build/_modules/open-ai/_collections/oai-chunking.util.d.ts.map +1 -1
- package/build/_modules/open-ai/_collections/oai-chunking.util.js +36 -24
- package/build/_modules/open-ai/_collections/oai-chunking.util.js.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.d.ts +3 -3
- package/build/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts +9 -10
- package/build/_modules/open-ai/_models/interfaces/oai-document-page.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts +7 -9
- package/build/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts +25 -15
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.d.ts.map +1 -1
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js +73 -65
- package/build/_modules/open-ai/_models/oai-doc-chunk.data-model.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts +10 -9
- package/build/_modules/open-ai/_services/oai-chunk.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js +18 -33
- package/build/_modules/open-ai/_services/oai-chunk.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-llm-chat.service-base.d.ts +20 -0
- package/build/_modules/open-ai/_services/oai-llm-chat.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-llm-chat.service-base.js +12 -0
- package/build/_modules/open-ai/_services/oai-llm-chat.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-llm.service-base.d.ts +4 -1
- package/build/_modules/open-ai/_services/oai-llm.service-base.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-llm.service-base.js +3 -0
- package/build/_modules/open-ai/_services/oai-llm.service-base.js.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.d.ts +4 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.d.ts.map +1 -1
- package/build/_modules/open-ai/_services/oai-vector-data.service.js +80 -24
- package/build/_modules/open-ai/_services/oai-vector-data.service.js.map +1 -1
- package/build/_modules/open-ai/index.d.ts +2 -0
- package/build/_modules/open-ai/index.d.ts.map +1 -1
- package/build/_modules/open-ai/index.js +2 -0
- package/build/_modules/open-ai/index.js.map +1 -1
- package/build/_services/base/data.service.d.ts +6 -0
- package/build/_services/base/data.service.d.ts.map +1 -1
- package/build/_services/base/data.service.js +6 -0
- package/build/_services/base/data.service.js.map +1 -1
- package/build/_services/base/db.service.d.ts +10 -0
- package/build/_services/base/db.service.d.ts.map +1 -1
- package/build/_services/base/db.service.js +10 -0
- package/build/_services/base/db.service.js.map +1 -1
- package/package.json +3 -3
- package/src/_modules/discord-assistant/_collections/dias.util.ts +43 -0
- package/src/_modules/discord-assistant/_models/dias-knowledge.data-model.ts +12 -5
- package/src/_modules/discord-assistant/_services/dias-chunk.data-service.ts +33 -16
- package/src/_modules/discord-assistant/_services/dias-io.control-service.ts +6 -1
- package/src/_modules/discord-assistant/_services/dias.service-base.ts +0 -32
- package/src/_modules/open-ai/_collections/oai-chunking.util.ts +51 -35
- package/src/_modules/open-ai/_models/interfaces/oai-chunk-compare-result.interface.ts +6 -3
- package/src/_modules/open-ai/_models/interfaces/oai-document-page.interface.ts +13 -10
- package/src/_modules/open-ai/_models/interfaces/oai-page-compare-result.interface.ts +10 -13
- package/src/_modules/open-ai/_models/oai-doc-chunk.data-model.ts +81 -72
- package/src/_modules/open-ai/_services/oai-chunk.service-base.ts +36 -39
- package/src/_modules/open-ai/_services/oai-llm-chat.service-base.ts +12 -0
- package/src/_modules/open-ai/_services/oai-llm.service-base.ts +4 -1
- package/src/_modules/open-ai/_services/oai-vector-data.service.ts +124 -37
- package/src/_modules/open-ai/index.ts +2 -0
- package/src/_services/base/data.service.ts +6 -0
- package/src/_services/base/db.service.ts +10 -0
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { DyFM_Error, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
1
|
+
import { DyNTS_OAI_Chunk, DyNTS_OAI_defaultChunk_dataParams } from '../../open-ai/_models/oai-doc-chunk.data-model';
|
|
2
|
+
import { DyNTS_OAI_Chunk_DataServiceBase } from '../../open-ai/_services/oai-chunk.service-base';
|
|
3
|
+
import { DyFM_DataModel_Params, DyFM_Error, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
5
4
|
import { DyFM_OpenAI_Settings, DyFM_OpenAIModel } from '@futdevpro/fsm-dynamo/open-ai';
|
|
6
5
|
import { DyNTS_DiAs_Main_ControlService } from './dias-main.control-service';
|
|
7
6
|
import { DyNTS_OAI_LLMChat_ServiceBase } from '../../open-ai/_services/oai-llm-chat.service-base';
|
|
8
7
|
import { DyNTS_OAI_GPT_Message } from '../../open-ai/_models/interfaces/oai-gpt-message.interface';
|
|
9
8
|
import { DyNTS_OAI_LLMDefaultPredefined_Requests } from '../../open-ai/_collections/oai-llm-predefined-requests.conts';
|
|
10
9
|
import { DyNTS_OAI_Document_Util } from '../../open-ai/_collections/oai-document.util';
|
|
10
|
+
import { DyNTS_OAI_DocumentPage } from '../../open-ai/_models/interfaces/oai-document-page.interface';
|
|
11
|
+
import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
export abstract class DyNTS_DiAs_Chunk_DataService
|
|
14
|
+
export abstract class DyNTS_DiAs_Chunk_DataService<
|
|
15
|
+
T_Page extends DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
16
|
+
T_Chunk extends DyNTS_OAI_Chunk
|
|
17
|
+
> extends DyNTS_OAI_Chunk_DataServiceBase<T_Page, T_Chunk> {
|
|
14
18
|
|
|
15
19
|
protected abstract readonly mainDiscordBot_CS: DyNTS_DiAs_Main_ControlService;
|
|
16
20
|
|
|
@@ -20,10 +24,28 @@ export abstract class DyNTS_DiAs_Chunk_DataService extends DyNTS_OAI_Chunk_Servi
|
|
|
20
24
|
|
|
21
25
|
constructor(
|
|
22
26
|
issuer: string,
|
|
27
|
+
dataParams?: DyFM_DataModel_Params<T_Chunk>,
|
|
28
|
+
openAiSettings?: DyFM_OpenAI_Settings,
|
|
23
29
|
) {
|
|
24
|
-
super(
|
|
30
|
+
super(
|
|
31
|
+
dataParams ?? DyNTS_OAI_defaultChunk_dataParams as DyFM_DataModel_Params<T_Chunk>,
|
|
32
|
+
openAiSettings ??
|
|
33
|
+
new DyFM_OpenAI_Settings({
|
|
34
|
+
openAIConfig: {
|
|
35
|
+
apiKey: DyNTS_global_settings.env_settings.openAi.apiKey,
|
|
36
|
+
organization: DyNTS_global_settings.env_settings.openAi.organization,
|
|
37
|
+
project: DyNTS_global_settings.env_settings.openAi.project,
|
|
38
|
+
},
|
|
39
|
+
defaultSettings: {
|
|
40
|
+
useModel: DyFM_OpenAIModel.textEmbedding_3Large,
|
|
41
|
+
},
|
|
42
|
+
}),
|
|
43
|
+
issuer
|
|
44
|
+
);
|
|
25
45
|
}
|
|
26
46
|
|
|
47
|
+
abstract getPageLink(page: T_Page, issuer: string): string;
|
|
48
|
+
|
|
27
49
|
async resolveConversation(
|
|
28
50
|
conversation: DyNTS_OAI_GPT_Message[],
|
|
29
51
|
): Promise<string> {
|
|
@@ -71,7 +93,7 @@ export abstract class DyNTS_DiAs_Chunk_DataService extends DyNTS_OAI_Chunk_Servi
|
|
|
71
93
|
numberOfCandidates: number = 100,
|
|
72
94
|
): Promise<string> {
|
|
73
95
|
try {
|
|
74
|
-
const searchResults:
|
|
96
|
+
const searchResults: T_Chunk[] = await this.vectorSearchUsingConversation(
|
|
75
97
|
conversation,
|
|
76
98
|
limit,
|
|
77
99
|
numberOfCandidates,
|
|
@@ -79,13 +101,8 @@ export abstract class DyNTS_DiAs_Chunk_DataService extends DyNTS_OAI_Chunk_Servi
|
|
|
79
101
|
|
|
80
102
|
const parsedSearchResults: string = searchResults.map((result) => {
|
|
81
103
|
return '' +
|
|
82
|
-
'\npage: ' + result.
|
|
83
|
-
'\npageLink: ' +
|
|
84
|
-
result.documentName,
|
|
85
|
-
result.workspaceId,
|
|
86
|
-
result.documentId,
|
|
87
|
-
result.documentPageId
|
|
88
|
-
) +
|
|
104
|
+
'\npage: ' + result.pageName +
|
|
105
|
+
'\npageLink: ' + result.pageLink +
|
|
89
106
|
'\ncontent: ' +
|
|
90
107
|
`\n${result.chunkParentedContent.replace(/\n/g, '\n ')}`
|
|
91
108
|
}).join('\n\n');
|
|
@@ -117,7 +134,7 @@ export abstract class DyNTS_DiAs_Chunk_DataService extends DyNTS_OAI_Chunk_Servi
|
|
|
117
134
|
conversation: DyNTS_OAI_GPT_Message[],
|
|
118
135
|
limit: number = 3,
|
|
119
136
|
numberOfCandidates: number = 100,
|
|
120
|
-
): Promise<
|
|
137
|
+
): Promise<T_Chunk[]> {
|
|
121
138
|
try {
|
|
122
139
|
const lastMessage: DyNTS_OAI_GPT_Message = conversation[conversation.length - 1];
|
|
123
140
|
|
|
@@ -134,7 +151,7 @@ export abstract class DyNTS_DiAs_Chunk_DataService extends DyNTS_OAI_Chunk_Servi
|
|
|
134
151
|
|
|
135
152
|
DyFM_Log.testInfo('🔍 Document search question:', searchQuestion);
|
|
136
153
|
|
|
137
|
-
const searchResults:
|
|
154
|
+
const searchResults: T_Chunk[] = await this.vectorSearch(
|
|
138
155
|
searchQuestion + ' ' + lastMessage.content,
|
|
139
156
|
'chunkParentedContentVectorized',
|
|
140
157
|
limit,
|
|
@@ -7,6 +7,7 @@ import { DyNTS_DiAs_Main_ControlService } from './dias-main.control-service'
|
|
|
7
7
|
import { DyNTS_DiBo_global_settings } from '../../discord-bot/_collections/dibo-global-settings.conts'
|
|
8
8
|
import { DyNTS_OAI_LLMChat_ServiceBase } from '../../open-ai/_services/oai-llm-chat.service-base'
|
|
9
9
|
import { DyNTS_OAI_GPT_Message } from '../../open-ai/_models/interfaces/oai-gpt-message.interface'
|
|
10
|
+
import { DyNTS_DiAs_Util } from '../_collections/dias.util'
|
|
10
11
|
|
|
11
12
|
export abstract class DyNTS_DiAs_IO_ControlService extends DyNTS_DiBo_IO_ControlService {
|
|
12
13
|
|
|
@@ -27,7 +28,11 @@ export abstract class DyNTS_DiAs_IO_ControlService extends DyNTS_DiBo_IO_Control
|
|
|
27
28
|
try {
|
|
28
29
|
const messages: Message[] = await this.mainDiscordBot_CS.gatherMessages(message);
|
|
29
30
|
const oaiMessages: DyNTS_OAI_GPT_Message[] =
|
|
30
|
-
|
|
31
|
+
DyNTS_DiAs_Util.convertDiscordMessagesToOAIConversation(
|
|
32
|
+
messages,
|
|
33
|
+
this.mainDiscordBot_CS.botClientId,
|
|
34
|
+
this.mainDiscordBot_CS.botDisplayName,
|
|
35
|
+
);
|
|
31
36
|
|
|
32
37
|
const gptResult: string = await this.llmChat_CS.resolveConversation(
|
|
33
38
|
oaiMessages,
|
|
@@ -46,36 +46,4 @@ export abstract class DyNTS_DiAs_ServiceBase extends DyNTS_DiBo_Main_ControlServ
|
|
|
46
46
|
|
|
47
47
|
return messages;
|
|
48
48
|
}
|
|
49
|
-
|
|
50
|
-
async convertDiscordMessagesToOAIConversation(
|
|
51
|
-
messages: Message[]
|
|
52
|
-
): Promise<DyNTS_OAI_GPT_Message[]> {
|
|
53
|
-
return messages.map(message => {
|
|
54
|
-
if (
|
|
55
|
-
(message?.author &&
|
|
56
|
-
(message.author.bot ||
|
|
57
|
-
message.author.id === this.botClientId ||
|
|
58
|
-
message.author.displayName === this.botDisplayName))
|
|
59
|
-
) {
|
|
60
|
-
if (DyNTS_DiAs_global_settings.userTranslationFlags.some(
|
|
61
|
-
flag => message.content.includes(flag)
|
|
62
|
-
)) {
|
|
63
|
-
return {
|
|
64
|
-
role: DyNTS_OAI_GPT_Message_Role.user,
|
|
65
|
-
content: message.content,
|
|
66
|
-
};
|
|
67
|
-
} else {
|
|
68
|
-
return {
|
|
69
|
-
role: DyNTS_OAI_GPT_Message_Role.assistant,
|
|
70
|
-
content: message.content,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
} else {
|
|
74
|
-
return {
|
|
75
|
-
role: DyNTS_OAI_GPT_Message_Role.user,
|
|
76
|
-
content: message.content,
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
})
|
|
80
|
-
}
|
|
81
49
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DyFM_AnyError, DyFM_Error, DyFM_Error_Settings, DyFM_Log } from '@futdevpro/fsm-dynamo';
|
|
2
|
-
import {
|
|
2
|
+
import { DyNTS_OAI_Chunk } from '../_models/oai-doc-chunk.data-model';
|
|
3
3
|
import { DyNTS_OAI_DocumentPage } from '../_models/interfaces/oai-document-page.interface';
|
|
4
4
|
import { DyNTS_global_settings } from '../../../_collections/global-settings.const';
|
|
5
5
|
|
|
@@ -20,10 +20,11 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
20
20
|
* @param issuer - Kérés kezdeményezője
|
|
21
21
|
* @returns Promise<CU_DocChunk[]> - Létrehozott chunk-ok listája
|
|
22
22
|
*/
|
|
23
|
-
static async chunkMdContent(
|
|
24
|
-
page: DyNTS_OAI_DocumentPage
|
|
23
|
+
static async chunkMdContent<T_Chunk extends DyNTS_OAI_Chunk>(
|
|
24
|
+
page: DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
25
|
+
getPageLink: (page: DyNTS_OAI_DocumentPage<T_Chunk>, issuer: string) => string,
|
|
25
26
|
issuer: string
|
|
26
|
-
): Promise<
|
|
27
|
+
): Promise<T_Chunk[]> {
|
|
27
28
|
try {
|
|
28
29
|
if (page.content.includes('#####')) {
|
|
29
30
|
DyFM_Log.warn('lvl5+ headers are not supported yet for page: ' + page.name);
|
|
@@ -34,7 +35,7 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
34
35
|
let leftovers = page.content;
|
|
35
36
|
const maxChunkSize = DyNTS_global_settings.docChunking.maxChunkSize;
|
|
36
37
|
const maxChunkCount = DyNTS_global_settings.docChunking.maxChunkCount;
|
|
37
|
-
const chunks:
|
|
38
|
+
const chunks: T_Chunk[] = [];
|
|
38
39
|
let chunkIndex = 0;
|
|
39
40
|
let currentPosition = 0; // Track current position in original content
|
|
40
41
|
|
|
@@ -58,20 +59,25 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
58
59
|
if (chunkContent.length > 0) {
|
|
59
60
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
60
61
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + leftovers.length, lastHeaders);
|
|
62
|
+
const headParents = headerHierarchy.map(header => header.header);
|
|
63
|
+
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
61
64
|
|
|
62
|
-
const chunk = new
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
documentPageId: page._id,
|
|
65
|
+
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
66
|
+
filePathParents: page.path,
|
|
67
|
+
chunkHeadParents: headParents,
|
|
66
68
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
+
allFlaggedParents: allFlaggedParents,
|
|
70
|
+
allFlaggedParentsMerged: allFlaggedParents.join('/'),
|
|
69
71
|
|
|
72
|
+
documentName: page.documentId,
|
|
73
|
+
pageName: page.name,
|
|
74
|
+
pageLink: getPageLink(page, issuer),
|
|
70
75
|
chunkIndex: chunkIndex,
|
|
71
|
-
chunkPath: chunkPath,
|
|
72
76
|
|
|
73
77
|
chunkOriginalContent: chunkContent,
|
|
74
|
-
|
|
78
|
+
|
|
79
|
+
chunkParentedContent: chunkWithHierarchy,
|
|
80
|
+
}) as T_Chunk;
|
|
75
81
|
chunk.chunkParentedContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
76
82
|
chunks.push(chunk);
|
|
77
83
|
}
|
|
@@ -99,23 +105,28 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
99
105
|
|
|
100
106
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
101
107
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + breakPoint.index, lastHeaders);
|
|
108
|
+
const headParents = headerHierarchy.map(header => header.header);
|
|
109
|
+
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
102
110
|
|
|
103
111
|
/* console.log(`Chunk path:`, chunkPath);
|
|
104
112
|
console.log(`Chunk with hierarchy preview:`, chunkWithHierarchy.substring(0, 100) + '...'); */
|
|
105
113
|
|
|
106
|
-
const chunk = new
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
documentPageId: page._id,
|
|
110
|
-
|
|
111
|
-
documentName: page.name,
|
|
112
|
-
documentPath: page.path,
|
|
114
|
+
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
115
|
+
filePathParents: page.path,
|
|
116
|
+
chunkHeadParents: headParents,
|
|
113
117
|
|
|
118
|
+
allFlaggedParents: allFlaggedParents,
|
|
119
|
+
allFlaggedParentsMerged: allFlaggedParents.join('/'),
|
|
120
|
+
|
|
121
|
+
documentName: page.documentId,
|
|
122
|
+
pageName: page.name,
|
|
123
|
+
pageLink: getPageLink(page, issuer),
|
|
114
124
|
chunkIndex: chunkIndex,
|
|
115
|
-
|
|
116
|
-
|
|
125
|
+
|
|
117
126
|
chunkOriginalContent: chunkContent,
|
|
118
|
-
|
|
127
|
+
|
|
128
|
+
chunkParentedContent: chunkWithHierarchy,
|
|
129
|
+
}) as T_Chunk;
|
|
119
130
|
chunk.chunkParentedContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
120
131
|
chunks.push(chunk);
|
|
121
132
|
chunkIndex++;
|
|
@@ -132,20 +143,25 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
132
143
|
const chunkContent = leftovers.substring(0, maxChunkSize).trim();
|
|
133
144
|
const chunkWithHierarchy = this.addHeaderHierarchy(chunkContent, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
134
145
|
const chunkPath = this.buildChunkPath(page.name, headerHierarchy, currentPosition, currentPosition + maxChunkSize, lastHeaders);
|
|
146
|
+
const headParents = headerHierarchy.map(header => header.header);
|
|
147
|
+
const allFlaggedParents = [ ...page.allFlaggedParents, ...headParents ];
|
|
135
148
|
|
|
136
|
-
const chunk = new
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
documentPageId: page._id,
|
|
149
|
+
const chunk: T_Chunk = new DyNTS_OAI_Chunk({
|
|
150
|
+
filePathParents: page.path,
|
|
151
|
+
chunkHeadParents: headParents,
|
|
140
152
|
|
|
141
|
-
|
|
142
|
-
|
|
153
|
+
allFlaggedParents: allFlaggedParents,
|
|
154
|
+
allFlaggedParentsMerged: allFlaggedParents.join('/'),
|
|
143
155
|
|
|
156
|
+
documentName: page.documentId,
|
|
157
|
+
pageName: page.name,
|
|
158
|
+
pageLink: getPageLink(page, issuer),
|
|
144
159
|
chunkIndex: chunkIndex,
|
|
145
|
-
chunkPath: chunkPath,
|
|
146
160
|
|
|
147
161
|
chunkOriginalContent: chunkContent,
|
|
148
|
-
|
|
162
|
+
|
|
163
|
+
chunkParentedContent: chunkWithHierarchy,
|
|
164
|
+
}) as T_Chunk;
|
|
149
165
|
chunk.chunkParentedContent = this.assembleChunkWithHeaders(chunk, issuer);
|
|
150
166
|
chunks.push(chunk);
|
|
151
167
|
chunkIndex++;
|
|
@@ -190,12 +206,12 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
190
206
|
}
|
|
191
207
|
}
|
|
192
208
|
|
|
193
|
-
private static assembleChunkWithHeaders(chunk:
|
|
209
|
+
private static assembleChunkWithHeaders(chunk: DyNTS_OAI_Chunk, issuer: string): string {
|
|
194
210
|
try {
|
|
195
|
-
const fileHeaders = chunk.
|
|
211
|
+
const fileHeaders = chunk.filePathParents.map(
|
|
196
212
|
(header, index) => `${'>'.repeat(index + 1)} **${header}**`
|
|
197
213
|
).join('\n');
|
|
198
|
-
const headers = chunk.
|
|
214
|
+
const headers = chunk.chunkHeadParents.filter(
|
|
199
215
|
path => !chunk.chunkOriginalContent.includes(path)
|
|
200
216
|
).join('\n');
|
|
201
217
|
return `${fileHeaders}\n\n${headers}\n\n${chunk.chunkOriginalContent}`;
|
|
@@ -208,7 +224,7 @@ export class DyNTS_OAI_Chunking_Util {
|
|
|
208
224
|
}
|
|
209
225
|
|
|
210
226
|
static reassembleChunksToContent(
|
|
211
|
-
chunks:
|
|
227
|
+
chunks: DyNTS_OAI_Chunk[],
|
|
212
228
|
issuer: string
|
|
213
229
|
): string {
|
|
214
230
|
try {
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
import { DyNTS_OAI_CompareResult_Type } from '../../_enums/oai-compare-result-type.enum';
|
|
4
|
-
import {
|
|
4
|
+
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
export interface DyNTS_OAI_ChunkCompareResult {
|
|
7
|
+
export interface DyNTS_OAI_ChunkCompareResult<T extends DyNTS_OAI_Chunk> {
|
|
8
8
|
result: DyNTS_OAI_CompareResult_Type;
|
|
9
|
-
oldChunk:
|
|
9
|
+
oldChunk: T;
|
|
10
10
|
}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
@@ -1,20 +1,23 @@
|
|
|
1
1
|
import { DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
2
|
-
import {
|
|
2
|
+
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
export interface DyNTS_OAI_DocumentPage extends
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
parentPageId: string;
|
|
7
|
+
export interface DyNTS_OAI_DocumentPage<T_Chunk extends DyNTS_OAI_Chunk> {
|
|
8
|
+
documentId: string;
|
|
9
|
+
pageId: string;
|
|
11
10
|
|
|
11
|
+
name: string;
|
|
12
|
+
|
|
13
|
+
allFlaggedParents: string[];
|
|
14
|
+
allFlaggedParentsMerged: string;
|
|
15
|
+
|
|
16
|
+
/** Path to the page in the document, starting with the document name */
|
|
12
17
|
path: string[];
|
|
13
|
-
documentParent: string;
|
|
14
18
|
|
|
15
|
-
|
|
19
|
+
chunks: T_Chunk[];
|
|
16
20
|
content: string;
|
|
17
|
-
|
|
18
|
-
pages: DyNTS_OAI_DocumentPage[];
|
|
19
|
-
chunks: DyNTS_OAI_DocChunk[];
|
|
20
21
|
}
|
|
22
|
+
|
|
23
|
+
|
|
@@ -1,18 +1,15 @@
|
|
|
1
|
+
import { DyNTS_OAI_Chunk } from '../oai-doc-chunk.data-model';
|
|
2
|
+
import { DyNTS_OAI_ChunkCompareResult, DyNTS_OAI_CompareResult_Type, DyNTS_OAI_DocumentPage } from '../..';
|
|
1
3
|
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
export interface DyNTS_OAI_PageCompareResult {
|
|
4
|
+
export interface DyNTS_OAI_PageCompareResult<
|
|
5
|
+
T_Doc extends DyNTS_OAI_DocumentPage<T_Chunk>,
|
|
6
|
+
T_Chunk extends DyNTS_OAI_Chunk
|
|
7
|
+
> {
|
|
11
8
|
result: DyNTS_OAI_CompareResult_Type;
|
|
12
|
-
subjectPage:
|
|
9
|
+
subjectPage: T_Doc;
|
|
13
10
|
oldPageContent: string;
|
|
14
|
-
chunkCompareResults: DyNTS_OAI_ChunkCompareResult[];
|
|
11
|
+
chunkCompareResults: DyNTS_OAI_ChunkCompareResult<T_Chunk>[];
|
|
15
12
|
|
|
16
|
-
newChunks:
|
|
17
|
-
oldChunks:
|
|
13
|
+
newChunks: T_Chunk[];
|
|
14
|
+
oldChunks: T_Chunk[];
|
|
18
15
|
}
|
|
@@ -1,92 +1,101 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
import { DyFM_DataModel_Params, DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
4
|
+
import { DyFM_DataModel_Params, DyFM_DataProperties, DyFM_Metadata } from '@futdevpro/fsm-dynamo';
|
|
5
5
|
import { DyFM_OpenAIModel } from '@futdevpro/fsm-dynamo/open-ai';
|
|
6
6
|
|
|
7
|
+
export class DyNTS_OAI_Chunk extends DyFM_Metadata {
|
|
8
|
+
/** Path to the page where the chunk is located ending with the page name */
|
|
9
|
+
filePathParents: string[] = [];
|
|
10
|
+
/** Headers that are parents of the chunk */
|
|
11
|
+
chunkHeadParents: string[] = [];
|
|
7
12
|
|
|
13
|
+
/**
|
|
14
|
+
* All parents of the chunk, flagged, from the highest level to the lowest local level
|
|
15
|
+
* like: System:FDP, Project:Organizer, Side:Backend, Module:Chat, etc.
|
|
16
|
+
* */
|
|
17
|
+
allFlaggedParents: string[] = [];
|
|
18
|
+
/**
|
|
19
|
+
* All parents of the chunk, flagged, merged into a with '/' as separator
|
|
20
|
+
* */
|
|
21
|
+
allFlaggedParentsMerged: string;
|
|
22
|
+
|
|
23
|
+
documentName: string;
|
|
24
|
+
pageName: string;
|
|
25
|
+
pageLink: string;
|
|
26
|
+
/** Index of the chunk in the page */
|
|
27
|
+
chunkIndex: number;
|
|
8
28
|
|
|
9
|
-
|
|
10
|
-
workspaceId?: string;
|
|
11
|
-
documentId?: string;
|
|
12
|
-
documentPageId?: string;
|
|
13
|
-
mainDocumentName?: string;
|
|
14
|
-
documentName?: string;
|
|
15
|
-
documentPath?: string[];
|
|
29
|
+
chunkOriginalContent: string;
|
|
16
30
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
chunkPath?: string[];
|
|
20
|
-
|
|
21
|
-
chunkOriginalContent?: string;
|
|
22
|
-
|
|
23
|
-
chunkParentedContent?: string;
|
|
24
|
-
chunkParentedContentVectorized?: number[] = [];
|
|
31
|
+
chunkParentedContent: string;
|
|
32
|
+
chunkParentedContentVectorized?: number[];
|
|
25
33
|
|
|
26
34
|
constructor(
|
|
27
|
-
set
|
|
35
|
+
set: DyNTS_OAI_Chunk
|
|
28
36
|
) {
|
|
29
37
|
super(set);
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
Object.assign(this, set);
|
|
33
|
-
}
|
|
38
|
+
|
|
39
|
+
Object.assign(this, set);
|
|
34
40
|
}
|
|
35
41
|
}
|
|
36
42
|
|
|
37
|
-
export const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
},
|
|
56
|
-
documentName: {
|
|
57
|
-
type: 'string',
|
|
58
|
-
required: true,
|
|
59
|
-
},
|
|
60
|
-
documentPath: {
|
|
61
|
-
type: 'string[]',
|
|
62
|
-
required: true,
|
|
63
|
-
},
|
|
43
|
+
export const chunk_propertyParams: DyFM_DataProperties<DyNTS_OAI_Chunk> = {
|
|
44
|
+
filePathParents: {
|
|
45
|
+
type: 'string[]',
|
|
46
|
+
required: true,
|
|
47
|
+
},
|
|
48
|
+
chunkHeadParents: {
|
|
49
|
+
type: 'string[]',
|
|
50
|
+
required: true,
|
|
51
|
+
},
|
|
52
|
+
|
|
53
|
+
allFlaggedParents: {
|
|
54
|
+
type: 'string[]',
|
|
55
|
+
required: true,
|
|
56
|
+
},
|
|
57
|
+
allFlaggedParentsMerged: {
|
|
58
|
+
type: 'string', index: true,
|
|
59
|
+
required: true,
|
|
60
|
+
},
|
|
64
61
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
62
|
+
documentName: {
|
|
63
|
+
type: 'string', index: true,
|
|
64
|
+
required: true,
|
|
65
|
+
},
|
|
66
|
+
pageName: {
|
|
67
|
+
type: 'string', index: true,
|
|
68
|
+
required: true,
|
|
69
|
+
},
|
|
70
|
+
pageLink: {
|
|
71
|
+
type: 'string', index: true,
|
|
72
|
+
required: true,
|
|
73
|
+
},
|
|
74
|
+
chunkIndex: {
|
|
75
|
+
type: 'number', index: true,
|
|
76
|
+
required: true,
|
|
77
|
+
},
|
|
73
78
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
chunkOriginalContent: {
|
|
80
|
+
type: 'string',
|
|
81
|
+
required: true,
|
|
82
|
+
},
|
|
78
83
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
},
|
|
84
|
+
chunkParentedContent: {
|
|
85
|
+
type: 'string', index: true,
|
|
86
|
+
required: true,
|
|
87
|
+
},
|
|
88
|
+
chunkParentedContentVectorized: {
|
|
89
|
+
type: 'number[]',
|
|
90
|
+
required: true,
|
|
91
|
+
embeddingModel: DyFM_OpenAIModel.textEmbedding_3Large,
|
|
92
|
+
vectorizedFrom: [ 'chunkParentedContent' ],
|
|
93
|
+
vectorizeUseIndex: 'chunkParentedContentVectorized',
|
|
94
|
+
vectorizeAlways: true,
|
|
91
95
|
},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export const DyNTS_OAI_defaultChunk_dataParams: DyFM_DataModel_Params<DyNTS_OAI_Chunk> = new DyFM_DataModel_Params<DyNTS_OAI_Chunk>({
|
|
99
|
+
dataName: 'doc_chunk',
|
|
100
|
+
properties: chunk_propertyParams,
|
|
92
101
|
});
|