@rws-framework/ai-tools 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -3
- package/src/models/convo/ConvoLoader.ts +15 -79
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rws-framework/ai-tools",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.1",
|
|
5
5
|
"description": "",
|
|
6
6
|
"main": "src/index.ts",
|
|
7
7
|
"scripts": {},
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
"@nestjs/core": "^10.3.2",
|
|
15
15
|
"@rws-framework/console": "*",
|
|
16
16
|
"@rws-framework/server": "^3.*",
|
|
17
|
-
"langchain": "^0.3.15",
|
|
18
17
|
"uuid": "^9.0.0",
|
|
19
18
|
"xml2js": "^0.6.2"
|
|
20
19
|
},
|
|
@@ -40,6 +39,6 @@
|
|
|
40
39
|
},
|
|
41
40
|
"repository": {
|
|
42
41
|
"type": "git",
|
|
43
|
-
"url": "https://github.com/
|
|
42
|
+
"url": "https://github.com/rws-framework/ai-tools.git"
|
|
44
43
|
}
|
|
45
44
|
}
|
|
@@ -6,16 +6,14 @@ import RWSPrompt, { IRWSPromptJSON, ILLMChunk } from '../../models/prompts/_prom
|
|
|
6
6
|
import {VectorStoreService} from '../../services/VectorStoreService';
|
|
7
7
|
import RWSVectorStore, { VectorDocType } from '../../models/convo/VectorStore';
|
|
8
8
|
|
|
9
|
-
import { Document } from 'langchain/
|
|
10
|
-
import {
|
|
11
|
-
|
|
12
|
-
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
|
9
|
+
import { Document } from '@langchain/core/documents';
|
|
10
|
+
import { UnstructuredLoader } from '@langchain/community/document_loaders/fs/unstructured';
|
|
11
|
+
|
|
13
12
|
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
|
|
14
13
|
import { BaseLanguageModelInterface, BaseLanguageModelInput } from '@langchain/core/language_models/base';
|
|
15
|
-
import {
|
|
16
|
-
import { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
14
|
+
import { Runnable } from '@langchain/core/runnables';
|
|
17
15
|
import { BaseMessage } from '@langchain/core/messages';
|
|
18
|
-
|
|
16
|
+
|
|
19
17
|
|
|
20
18
|
import { v4 as uuid } from 'uuid';
|
|
21
19
|
import xml2js from 'xml2js';
|
|
@@ -69,16 +67,15 @@ type LLMType = BaseLanguageModelInterface | Runnable<BaseLanguageModelInput, str
|
|
|
69
67
|
|
|
70
68
|
@InjectServices([VectorStoreService])
|
|
71
69
|
class ConvoLoader<LLMChat extends BaseChatModel> {
|
|
72
|
-
private loader:
|
|
73
|
-
private docSplitter: RecursiveCharacterTextSplitter;
|
|
70
|
+
private loader: UnstructuredLoader;
|
|
71
|
+
// private docSplitter: RecursiveCharacterTextSplitter;
|
|
74
72
|
|
|
75
73
|
private embeddings: IEmbeddingsHandler<any>;
|
|
76
74
|
|
|
77
75
|
private docs: Document[] = [];
|
|
78
76
|
private _initiated = false;
|
|
79
77
|
private store: RWSVectorStore;
|
|
80
|
-
private convo_id: string;
|
|
81
|
-
private llmChain: BaseChain;
|
|
78
|
+
private convo_id: string;
|
|
82
79
|
private llmChat: LLMChat;
|
|
83
80
|
private chatConstructor: new (config: any) => LLMChat;
|
|
84
81
|
private thePrompt: RWSPrompt;
|
|
@@ -125,18 +122,18 @@ class ConvoLoader<LLMChat extends BaseChatModel> {
|
|
|
125
122
|
|
|
126
123
|
if(!fs.existsSync(splitDir)){
|
|
127
124
|
console.log(`Split dir ${ConsoleService.color().magentaBright(splitDir)} doesn't exist. Splitting docs...`);
|
|
128
|
-
this.loader = new
|
|
125
|
+
this.loader = new UnstructuredLoader(filePath);
|
|
129
126
|
|
|
130
|
-
this.docSplitter = new RecursiveCharacterTextSplitter({
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
});
|
|
127
|
+
// this.docSplitter = new RecursiveCharacterTextSplitter({
|
|
128
|
+
// chunkSize: params.chunkSize, // The size of the chunk that should be split.
|
|
129
|
+
// chunkOverlap: params.chunkOverlap, // Adding overalap so that if a text is broken inbetween, next document may have part of the previous document
|
|
130
|
+
// separators: params.separators // In this case we are assuming that /n/n would mean one whole sentence. In case there is no nearing /n/n then "." will be used instead. This can be anything that helps derive a complete sentence .
|
|
131
|
+
// });
|
|
135
132
|
|
|
136
133
|
fs.mkdirSync(splitDir, { recursive: true });
|
|
137
134
|
|
|
138
135
|
const orgDocs = await this.loader.load();
|
|
139
|
-
const splitDocs = await this.docSplitter.splitDocuments(orgDocs);
|
|
136
|
+
const splitDocs: any[] = [];//await this.docSplitter.splitDocuments(orgDocs);
|
|
140
137
|
|
|
141
138
|
const avgCharCountPre = this.avgDocLength(orgDocs);
|
|
142
139
|
const avgCharCountPost = this.avgDocLength(splitDocs);
|
|
@@ -214,39 +211,6 @@ class ConvoLoader<LLMChat extends BaseChatModel> {
|
|
|
214
211
|
return documents.reduce((sum, doc: Document) => sum + doc.pageContent.length, 0) / documents.length;
|
|
215
212
|
};
|
|
216
213
|
|
|
217
|
-
async call(values: ChainValues, cfg: any, debugCallback: (debugData: IConvoDebugXMLData) => Promise<IConvoDebugXMLData> = null): Promise<RWSPrompt>
|
|
218
|
-
{
|
|
219
|
-
const output = await (this.chain()).invoke(values, cfg) as IChainCallOutput;
|
|
220
|
-
this.thePrompt.listen(output.text);
|
|
221
|
-
|
|
222
|
-
await this.debugCall(debugCallback);
|
|
223
|
-
|
|
224
|
-
return this.thePrompt;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
async callStream(values: ChainValues, callback: (streamChunk: ILLMChunk) => void, end: () => void = () => {}, cfg: Partial<RunnableConfig> = {}, debugCallback?: (debugData: IConvoDebugXMLData) => Promise<IConvoDebugXMLData>): Promise<RWSPrompt>
|
|
228
|
-
{
|
|
229
|
-
const _self = this;
|
|
230
|
-
|
|
231
|
-
await this.chain().invoke(values, { callbacks: [{
|
|
232
|
-
handleLLMNewToken(token: string) {
|
|
233
|
-
callback({
|
|
234
|
-
content: token,
|
|
235
|
-
status: 'rws_streaming'
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
_self.thePrompt.listen(token, true);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
]});
|
|
242
|
-
|
|
243
|
-
end();
|
|
244
|
-
|
|
245
|
-
this.debugCall(debugCallback);
|
|
246
|
-
|
|
247
|
-
return this.thePrompt;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
214
|
async similaritySearch(query: string, splitCount: number): Promise<string>
|
|
251
215
|
{
|
|
252
216
|
console.log('Store is ready. Searching for embedds...');
|
|
@@ -275,34 +239,6 @@ class ConvoLoader<LLMChat extends BaseChatModel> {
|
|
|
275
239
|
}
|
|
276
240
|
}
|
|
277
241
|
|
|
278
|
-
chain(): BaseChain
|
|
279
|
-
{
|
|
280
|
-
if(this.llmChain){
|
|
281
|
-
return this.llmChain;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
if(!this.thePrompt){
|
|
285
|
-
throw new Error('No prompt initialized for conversation');
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
const chainParams: { prompt: PromptTemplate, values?: ChainValues } = {
|
|
289
|
-
prompt: this.thePrompt.getMultiTemplate()
|
|
290
|
-
};
|
|
291
|
-
|
|
292
|
-
this.createChain(chainParams);
|
|
293
|
-
|
|
294
|
-
return this.llmChain;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
private async createChain(input: { prompt: PromptTemplate, values?: ChainValues }): Promise<BaseChain>
|
|
298
|
-
{
|
|
299
|
-
this.llmChain = new ConversationChain({
|
|
300
|
-
llm: this.llmChat as any,
|
|
301
|
-
prompt: input.prompt as any,
|
|
302
|
-
});
|
|
303
|
-
|
|
304
|
-
return this.llmChain;
|
|
305
|
-
}
|
|
306
242
|
|
|
307
243
|
async waitForInit(): Promise<ConvoLoader<LLMChat> | null>
|
|
308
244
|
{
|