@drax/ai-back 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/OpenAiConfig.js +7 -0
- package/dist/factory/AiProviderFactory.js +17 -0
- package/dist/factory/OpenAiProviderFactory.js +13 -0
- package/dist/index.js +5 -0
- package/dist/interfaces/IAIProvider.js +1 -0
- package/dist/providers/OpenAiProvider.js +75 -0
- package/dist/services/KnowledgeService.js +52 -0
- package/dist/vectors/ChromaVector.js +65 -0
- package/package.json +39 -0
- package/src/config/OpenAiConfig.ts +12 -0
- package/src/factory/AiProviderFactory.ts +25 -0
- package/src/factory/OpenAiProviderFactory.ts +24 -0
- package/src/index.ts +20 -0
- package/src/interfaces/IAIProvider.ts +41 -0
- package/src/providers/OpenAiProvider.ts +106 -0
- package/src/services/KnowledgeService.ts +66 -0
- package/src/vectors/ChromaVector.ts +80 -0
- package/test/KnowledgeService.test.ts +36 -0
- package/test/OpenAiProvider.test.ts +184 -0
- package/tsconfig.json +16 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/types/config/OpenAiConfig.d.ts +7 -0
- package/types/config/OpenAiConfig.d.ts.map +1 -0
- package/types/factory/AiProviderFactory.d.ts +8 -0
- package/types/factory/AiProviderFactory.d.ts.map +1 -0
- package/types/factory/OpenAiProviderFactory.d.ts +8 -0
- package/types/factory/OpenAiProviderFactory.d.ts.map +1 -0
- package/types/index.d.ts +8 -0
- package/types/index.d.ts.map +1 -0
- package/types/interfaces/IAIProvider.d.ts +34 -0
- package/types/interfaces/IAIProvider.d.ts.map +1 -0
- package/types/providers/OpenAiProvider.d.ts +17 -0
- package/types/providers/OpenAiProvider.d.ts.map +1 -0
- package/types/services/KnowledgeService.d.ts +10 -0
- package/types/services/KnowledgeService.d.ts.map +1 -0
- package/types/vectors/ChromaVector.d.ts +21 -0
- package/types/vectors/ChromaVector.d.ts.map +1 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import OpenAiProviderFactory from "./OpenAiProviderFactory.js";
|
|
2
|
+
class AiProviderFactory {
|
|
3
|
+
static instance(provider) {
|
|
4
|
+
if (!AiProviderFactory.singleton) {
|
|
5
|
+
switch (provider) {
|
|
6
|
+
case 'OpenAi':
|
|
7
|
+
AiProviderFactory.singleton = OpenAiProviderFactory.instance();
|
|
8
|
+
break;
|
|
9
|
+
default:
|
|
10
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
return AiProviderFactory.singleton;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export default AiProviderFactory;
|
|
17
|
+
export { AiProviderFactory };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { DraxConfig } from "@drax/common-back";
|
|
2
|
+
import OpenAiConfig from "../config/OpenAiConfig.js";
|
|
3
|
+
import OpenAiProvider from "../providers/OpenAiProvider.js";
|
|
4
|
+
class OpenAiProviderFactory {
|
|
5
|
+
static instance() {
|
|
6
|
+
if (!OpenAiProviderFactory.singleton) {
|
|
7
|
+
OpenAiProviderFactory.singleton = new OpenAiProvider(DraxConfig.getOrLoad(OpenAiConfig.OpenAiApiKey), DraxConfig.getOrLoad(OpenAiConfig.OpenAiModel));
|
|
8
|
+
}
|
|
9
|
+
return OpenAiProviderFactory.singleton;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
export default OpenAiProviderFactory;
|
|
13
|
+
export { OpenAiProviderFactory };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { OpenAiConfig } from "./config/OpenAiConfig.js";
|
|
2
|
+
import { OpenAiProviderFactory } from "./factory/OpenAiProviderFactory.js";
|
|
3
|
+
import { OpenAiProvider } from "./providers/OpenAiProvider.js";
|
|
4
|
+
import { KnowledgeService } from "./services/KnowledgeService.js";
|
|
5
|
+
export { OpenAiConfig, OpenAiProviderFactory, OpenAiProvider, KnowledgeService };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { zodResponseFormat } from "openai/helpers/zod";
|
|
3
|
+
class OpenAiProvider {
|
|
4
|
+
constructor(apiKey, model) {
|
|
5
|
+
if (!apiKey) {
|
|
6
|
+
throw new Error("OpenAI apiKey required");
|
|
7
|
+
}
|
|
8
|
+
if (!model) {
|
|
9
|
+
throw new Error("OpenAI model required");
|
|
10
|
+
}
|
|
11
|
+
this._apiKey = apiKey;
|
|
12
|
+
this._model = model;
|
|
13
|
+
}
|
|
14
|
+
get model() {
|
|
15
|
+
if (!this._model) {
|
|
16
|
+
throw new Error("OpenAI model not found");
|
|
17
|
+
}
|
|
18
|
+
return this._model;
|
|
19
|
+
}
|
|
20
|
+
get client() {
|
|
21
|
+
if (!this._client) {
|
|
22
|
+
this._client = new OpenAI({
|
|
23
|
+
apiKey: this._apiKey,
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
return this._client;
|
|
27
|
+
}
|
|
28
|
+
async generateEmbedding({ text, model = "text-embedding-ada-002" }) {
|
|
29
|
+
const response = await this.client.embeddings.create({
|
|
30
|
+
model: model,
|
|
31
|
+
input: text,
|
|
32
|
+
});
|
|
33
|
+
return response.data[0].embedding;
|
|
34
|
+
}
|
|
35
|
+
async prompt(input) {
|
|
36
|
+
if (!input.systemPrompt) {
|
|
37
|
+
throw new Error("systemPrompt required");
|
|
38
|
+
}
|
|
39
|
+
const model = input.model ?? this.model;
|
|
40
|
+
let systemPrompt = input.systemPrompt;
|
|
41
|
+
if (input.memory && input.memory.length > 0) {
|
|
42
|
+
systemPrompt += `\n\n ${input.memoryHeader ?? '[MEMORIA]'}\n ${input.memory.map(m => `${m.key}: ${m.value}`).join('\n')}`;
|
|
43
|
+
}
|
|
44
|
+
if (input.knowledgeBase && input.knowledgeBase.length > 0) {
|
|
45
|
+
systemPrompt += `\n\n${input.knowledgeBaseHeader ?? '[BASE DE CONOCIMIENTO]'}\n ${input.knowledgeBase.join('\n')}`;
|
|
46
|
+
}
|
|
47
|
+
let userInput = input.userInput;
|
|
48
|
+
const startTime = performance.now();
|
|
49
|
+
const chatCompletion = await this.client.chat.completions.create({
|
|
50
|
+
messages: [
|
|
51
|
+
{ role: 'system', content: systemPrompt },
|
|
52
|
+
...(input.history ? input.history : []),
|
|
53
|
+
{ role: 'user', content: userInput },
|
|
54
|
+
],
|
|
55
|
+
...(input.zodSchema ? { response_format: zodResponseFormat(input.zodSchema, "event") } : {}),
|
|
56
|
+
...(input.jsonSchema ? { response_format: input.jsonSchema } : {}),
|
|
57
|
+
model: model,
|
|
58
|
+
});
|
|
59
|
+
const output = chatCompletion.choices[0].message.content;
|
|
60
|
+
const tokens = chatCompletion.usage.total_tokens;
|
|
61
|
+
const inputTokens = chatCompletion.usage.prompt_tokens;
|
|
62
|
+
const outputTokens = chatCompletion.usage.completion_tokens;
|
|
63
|
+
const endTime = performance.now();
|
|
64
|
+
const time = endTime - startTime;
|
|
65
|
+
return {
|
|
66
|
+
output,
|
|
67
|
+
tokens,
|
|
68
|
+
inputTokens,
|
|
69
|
+
outputTokens,
|
|
70
|
+
time
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export default OpenAiProvider;
|
|
75
|
+
export { OpenAiProvider };
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import { getTextExtractor } from 'office-text-extractor';
|
|
3
|
+
import { JSDOM } from 'jsdom';
|
|
4
|
+
class KnowledgeService {
|
|
5
|
+
async getFromTxt(filePath) {
|
|
6
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
7
|
+
return this.sanitizeText(content);
|
|
8
|
+
}
|
|
9
|
+
async getFromPdf(path, type = 'url') {
|
|
10
|
+
const extractor = getTextExtractor();
|
|
11
|
+
const text = await extractor.extractText({ input: path, type: type });
|
|
12
|
+
return this.sanitizeText(text);
|
|
13
|
+
}
|
|
14
|
+
async getFromUrl(url) {
|
|
15
|
+
const response = await fetch(url);
|
|
16
|
+
if (!response.ok) {
|
|
17
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
|
18
|
+
}
|
|
19
|
+
const html = await response.text();
|
|
20
|
+
const dom = new JSDOM(html);
|
|
21
|
+
const content = dom.window.document.body.textContent || '';
|
|
22
|
+
return this.sanitizeText(content);
|
|
23
|
+
}
|
|
24
|
+
sanitizeText(text) {
|
|
25
|
+
// Normalización
|
|
26
|
+
text = text.toLowerCase();
|
|
27
|
+
// Limpieza ligera (solo espacios largos)
|
|
28
|
+
text = text.replace(/\s+/g, " ").trim();
|
|
29
|
+
return text;
|
|
30
|
+
}
|
|
31
|
+
chunkTextBySentence(text, chunkSize = 512) {
|
|
32
|
+
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
|
|
33
|
+
const chunks = [];
|
|
34
|
+
let currentChunk = '';
|
|
35
|
+
for (const sentence of sentences) {
|
|
36
|
+
const potentialChunk = currentChunk + ' ' + sentence;
|
|
37
|
+
if (potentialChunk.length > chunkSize && currentChunk !== '') {
|
|
38
|
+
chunks.push(currentChunk.trim());
|
|
39
|
+
currentChunk = sentence;
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
currentChunk = potentialChunk;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (currentChunk) {
|
|
46
|
+
chunks.push(currentChunk.trim());
|
|
47
|
+
}
|
|
48
|
+
return chunks;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
export default KnowledgeService;
|
|
52
|
+
export { KnowledgeService };
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { ChromaClient } from 'chromadb';
|
|
2
|
+
class ChromaVector {
|
|
3
|
+
constructor(path = 'http://localhost:8000', database) {
|
|
4
|
+
this.client = new ChromaClient({ path, database });
|
|
5
|
+
this.collections = new Map();
|
|
6
|
+
}
|
|
7
|
+
async initializeCollection(collectionName) {
|
|
8
|
+
if (!this.collections.has(collectionName)) {
|
|
9
|
+
const collection = await this.client.createCollection({
|
|
10
|
+
name: collectionName,
|
|
11
|
+
// embeddingFunction: embeddingFunction
|
|
12
|
+
});
|
|
13
|
+
this.collections.set(collectionName, collection);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
async addDocument(collectionName, document) {
|
|
17
|
+
await this.initializeCollection(collectionName);
|
|
18
|
+
const collection = this.collections.get(collectionName);
|
|
19
|
+
if (collection) {
|
|
20
|
+
await collection.add({
|
|
21
|
+
ids: [document.id],
|
|
22
|
+
documents: [document.content],
|
|
23
|
+
metadatas: [document.metadata || {}],
|
|
24
|
+
embeddings: document.embedding ? [document.embedding] : undefined, // Add this line
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
async queryCollection(collectionName, query, topK = 5) {
|
|
32
|
+
await this.initializeCollection(collectionName);
|
|
33
|
+
const collection = this.collections.get(collectionName);
|
|
34
|
+
if (collection) {
|
|
35
|
+
const results = await collection.query({
|
|
36
|
+
queryTexts: [query],
|
|
37
|
+
nResults: topK,
|
|
38
|
+
});
|
|
39
|
+
return results.documents[0] || [];
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async deleteDocument(collectionName, documentId) {
|
|
46
|
+
const collection = this.collections.get(collectionName);
|
|
47
|
+
if (collection) {
|
|
48
|
+
await collection.delete({
|
|
49
|
+
ids: [documentId],
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async listCollections() {
|
|
57
|
+
return await this.client.listCollections();
|
|
58
|
+
}
|
|
59
|
+
async deleteCollection(collectionName) {
|
|
60
|
+
await this.client.deleteCollection({ name: collectionName });
|
|
61
|
+
this.collections.delete(collectionName);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
export default ChromaVector;
|
|
65
|
+
export { ChromaVector };
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@drax/ai-back",
|
|
3
|
+
"publishConfig": {
|
|
4
|
+
"access": "public"
|
|
5
|
+
},
|
|
6
|
+
"version": "0.13.0",
|
|
7
|
+
"description": "Ai utils",
|
|
8
|
+
"main": "dist/index.js",
|
|
9
|
+
"types": "types/index.d.ts",
|
|
10
|
+
"type": "module",
|
|
11
|
+
"scripts": {
|
|
12
|
+
"prepublish": "tsc",
|
|
13
|
+
"tscrun": "tsc",
|
|
14
|
+
"clean": "rm -rf dist",
|
|
15
|
+
"tsc": "tsc -b tsconfig.json",
|
|
16
|
+
"test": "vitest test/**"
|
|
17
|
+
},
|
|
18
|
+
"author": "Cristian Incarnato & Drax Team",
|
|
19
|
+
"license": "ISC",
|
|
20
|
+
"peerDependencies": {
|
|
21
|
+
"chromadb": "^2.2.0",
|
|
22
|
+
"jsdom": "^26.0.0",
|
|
23
|
+
"office-text-extractor": "^3.0.3",
|
|
24
|
+
"openai": "^4.72.0",
|
|
25
|
+
"zod": "^3.24.2"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^20.12.10",
|
|
29
|
+
"chromadb": "^2.2.0",
|
|
30
|
+
"jsdom": "^26.0.0",
|
|
31
|
+
"office-text-extractor": "^3.0.3",
|
|
32
|
+
"openai": "^4.72.0",
|
|
33
|
+
"ts-node": "^10.9.2",
|
|
34
|
+
"tsc-alias": "^1.8.10",
|
|
35
|
+
"typescript": "^5.6.2",
|
|
36
|
+
"vitest": "^3.0.8"
|
|
37
|
+
},
|
|
38
|
+
"gitHead": "ff7df3bb96303a056ad2e1ee70dc9dfa4a43ef9e"
|
|
39
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type {IAIProvider} from "../interfaces/IAIProvider"
|
|
2
|
+
import OpenAiProviderFactory from "./OpenAiProviderFactory.js";
|
|
3
|
+
|
|
4
|
+
class AiProviderFactory {
|
|
5
|
+
private static singleton: IAIProvider;
|
|
6
|
+
|
|
7
|
+
public static instance(provider: string): IAIProvider {
|
|
8
|
+
if (!AiProviderFactory.singleton) {
|
|
9
|
+
switch (provider) {
|
|
10
|
+
case 'OpenAi':
|
|
11
|
+
AiProviderFactory.singleton = OpenAiProviderFactory.instance()
|
|
12
|
+
break;
|
|
13
|
+
default:
|
|
14
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return AiProviderFactory.singleton;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export default AiProviderFactory
|
|
22
|
+
export {
|
|
23
|
+
AiProviderFactory
|
|
24
|
+
}
|
|
25
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import {DraxConfig} from "@drax/common-back";
|
|
2
|
+
import OpenAiConfig from "../config/OpenAiConfig.js";
|
|
3
|
+
import type {IAIProvider} from "../interfaces/IAIProvider"
|
|
4
|
+
import OpenAiProvider from "../providers/OpenAiProvider.js";
|
|
5
|
+
|
|
6
|
+
class OpenAiProviderFactory {
|
|
7
|
+
private static singleton: IAIProvider;
|
|
8
|
+
|
|
9
|
+
public static instance(): IAIProvider {
|
|
10
|
+
if (!OpenAiProviderFactory.singleton) {
|
|
11
|
+
OpenAiProviderFactory.singleton = new OpenAiProvider(
|
|
12
|
+
DraxConfig.getOrLoad(OpenAiConfig.OpenAiApiKey),
|
|
13
|
+
DraxConfig.getOrLoad(OpenAiConfig.OpenAiModel)
|
|
14
|
+
);
|
|
15
|
+
}
|
|
16
|
+
return OpenAiProviderFactory.singleton;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export default OpenAiProviderFactory
|
|
21
|
+
export {
|
|
22
|
+
OpenAiProviderFactory
|
|
23
|
+
}
|
|
24
|
+
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import {OpenAiConfig} from "./config/OpenAiConfig.js";
|
|
2
|
+
import {OpenAiProviderFactory} from "./factory/OpenAiProviderFactory.js";
|
|
3
|
+
import {OpenAiProvider} from "./providers/OpenAiProvider.js";
|
|
4
|
+
import {KnowledgeService} from "./services/KnowledgeService.js";
|
|
5
|
+
import type {IAIProvider, IPromptResponse, IPromptParams, IPromptMessage, IPromptMemory} from "./interfaces/IAIProvider.js";
|
|
6
|
+
|
|
7
|
+
export type {
|
|
8
|
+
IAIProvider,
|
|
9
|
+
IPromptParams,
|
|
10
|
+
IPromptMessage,
|
|
11
|
+
IPromptMemory,
|
|
12
|
+
IPromptResponse,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export {
|
|
16
|
+
OpenAiConfig,
|
|
17
|
+
OpenAiProviderFactory,
|
|
18
|
+
OpenAiProvider,
|
|
19
|
+
KnowledgeService
|
|
20
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { ZodSchema } from 'zod'
|
|
2
|
+
|
|
3
|
+
type Role = 'user' | 'assistant' | 'system';
|
|
4
|
+
|
|
5
|
+
interface IPromptMessage {
|
|
6
|
+
role: Role;
|
|
7
|
+
content: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface IPromptMemory {
|
|
11
|
+
key: string;
|
|
12
|
+
value: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
interface IPromptParams {
|
|
17
|
+
systemPrompt: string,
|
|
18
|
+
userInput?: string,
|
|
19
|
+
history?: IPromptMessage[],
|
|
20
|
+
memory?: IPromptMemory[],
|
|
21
|
+
memoryHeader?: string | '[MEMORY]' | '[MEMORIA]'
|
|
22
|
+
knowledgeBase?: string[],
|
|
23
|
+
knowledgeBaseHeader?: string | '[KNOWLEDGE BASE]' | '[BASE DE CONOCIMIENTO]',
|
|
24
|
+
zodSchema?: ZodSchema<any>,
|
|
25
|
+
jsonSchema?: object,
|
|
26
|
+
model?: string,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface IPromptResponse {
|
|
30
|
+
output: any,
|
|
31
|
+
tokens: number,
|
|
32
|
+
inputTokens: number,
|
|
33
|
+
outputTokens: number,
|
|
34
|
+
time: number
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface IAIProvider {
|
|
38
|
+
prompt(input: IPromptParams): Promise<IPromptResponse>
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export type {IAIProvider, IPromptParams, IPromptResponse, IPromptMessage, IPromptMemory}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import type {ZodTypeAny} from "zod"
|
|
3
|
+
import { zodResponseFormat } from "openai/helpers/zod";
|
|
4
|
+
import type {IAIProvider, IPromptParams, IPromptResponse} from "../interfaces/IAIProvider";
|
|
5
|
+
|
|
6
|
+
class OpenAiProvider implements IAIProvider{
|
|
7
|
+
protected _apiKey: string
|
|
8
|
+
protected _model: any
|
|
9
|
+
protected _client: any
|
|
10
|
+
|
|
11
|
+
constructor(apiKey: string, model: string) {
|
|
12
|
+
|
|
13
|
+
if (!apiKey) {
|
|
14
|
+
throw new Error("OpenAI apiKey required")
|
|
15
|
+
}
|
|
16
|
+
if (!model) {
|
|
17
|
+
throw new Error("OpenAI model required")
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
this._apiKey = apiKey
|
|
21
|
+
this._model = model
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
get model(){
|
|
25
|
+
if(!this._model){
|
|
26
|
+
throw new Error("OpenAI model not found")
|
|
27
|
+
}
|
|
28
|
+
return this._model;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
get client(){
|
|
32
|
+
if(!this._client){
|
|
33
|
+
this._client = new OpenAI({
|
|
34
|
+
apiKey: this._apiKey,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return this._client
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async generateEmbedding({text, model="text-embedding-ada-002"}: {text:string,model:string }): Promise<number[]> {
|
|
42
|
+
const response = await this.client.embeddings.create({
|
|
43
|
+
model: model,
|
|
44
|
+
input: text,
|
|
45
|
+
});
|
|
46
|
+
return response.data[0].embedding;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async prompt(input: IPromptParams): Promise<IPromptResponse> {
|
|
50
|
+
|
|
51
|
+
if(!input.systemPrompt){
|
|
52
|
+
throw new Error("systemPrompt required")
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const model = input.model ?? this.model
|
|
56
|
+
|
|
57
|
+
let systemPrompt = input.systemPrompt
|
|
58
|
+
|
|
59
|
+
if(input.memory && input.memory.length > 0){
|
|
60
|
+
systemPrompt += `\n\n ${input.memoryHeader ?? '[MEMORIA]'}\n ${input.memory.map(m => `${m.key}: ${m.value}`).join('\n')}`
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if(input.knowledgeBase && input.knowledgeBase.length > 0){
|
|
64
|
+
systemPrompt += `\n\n${input.knowledgeBaseHeader ?? '[BASE DE CONOCIMIENTO]'}\n ${input.knowledgeBase.join('\n')}`
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
let userInput = input.userInput
|
|
69
|
+
|
|
70
|
+
const startTime = performance.now()
|
|
71
|
+
|
|
72
|
+
const chatCompletion = await this.client.chat.completions.create({
|
|
73
|
+
messages: [
|
|
74
|
+
{role: 'system', content: systemPrompt},
|
|
75
|
+
...(input.history ? input.history : []),
|
|
76
|
+
{role: 'user', content: userInput},
|
|
77
|
+
],
|
|
78
|
+
|
|
79
|
+
...(input.zodSchema ? {response_format: zodResponseFormat(input.zodSchema, "event")} : {}),
|
|
80
|
+
...(input.jsonSchema ? {response_format: input.jsonSchema} : {}),
|
|
81
|
+
model: model,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
const output = chatCompletion.choices[0].message.content
|
|
86
|
+
const tokens = chatCompletion.usage.total_tokens
|
|
87
|
+
const inputTokens = chatCompletion.usage.prompt_tokens
|
|
88
|
+
const outputTokens = chatCompletion.usage.completion_tokens
|
|
89
|
+
|
|
90
|
+
const endTime = performance.now()
|
|
91
|
+
const time = endTime - startTime
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
output,
|
|
95
|
+
tokens,
|
|
96
|
+
inputTokens,
|
|
97
|
+
outputTokens,
|
|
98
|
+
time
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
export default OpenAiProvider
|
|
106
|
+
export {OpenAiProvider}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import {getTextExtractor} from 'office-text-extractor'
|
|
3
|
+
import {JSDOM} from 'jsdom';
|
|
4
|
+
|
|
5
|
+
class KnowledgeService {
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async getFromTxt(filePath: string): Promise<string> {
|
|
9
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
10
|
+
return this.sanitizeText(content);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async getFromPdf(path: string, type: 'url'|'file' = 'url'): Promise<string> {
|
|
14
|
+
const extractor = getTextExtractor()
|
|
15
|
+
const text = await extractor.extractText({input: path, type: type});
|
|
16
|
+
return this.sanitizeText(text);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async getFromUrl(url: string): Promise<string> {
|
|
20
|
+
const response = await fetch(url);
|
|
21
|
+
if (!response.ok) {
|
|
22
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
|
23
|
+
}
|
|
24
|
+
const html = await response.text();
|
|
25
|
+
const dom = new JSDOM(html);
|
|
26
|
+
const content = dom.window.document.body.textContent || '';
|
|
27
|
+
return this.sanitizeText(content);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
sanitizeText(text: string): string {
|
|
32
|
+
// Normalización
|
|
33
|
+
text = text.toLowerCase();
|
|
34
|
+
|
|
35
|
+
// Limpieza ligera (solo espacios largos)
|
|
36
|
+
text = text.replace(/\s+/g, " ").trim();
|
|
37
|
+
|
|
38
|
+
return text
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
chunkTextBySentence(text: string, chunkSize: number = 512): string[] {
|
|
43
|
+
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
|
|
44
|
+
const chunks: string[] = [];
|
|
45
|
+
let currentChunk = '';
|
|
46
|
+
|
|
47
|
+
for (const sentence of sentences) {
|
|
48
|
+
const potentialChunk = currentChunk + ' ' + sentence;
|
|
49
|
+
if (potentialChunk.length > chunkSize && currentChunk !== '') {
|
|
50
|
+
chunks.push(currentChunk.trim());
|
|
51
|
+
currentChunk = sentence;
|
|
52
|
+
} else {
|
|
53
|
+
currentChunk = potentialChunk;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (currentChunk) {
|
|
58
|
+
chunks.push(currentChunk.trim());
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return chunks;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export default KnowledgeService;
|
|
66
|
+
export {KnowledgeService}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import {ChromaClient, Collection, Document} from 'chromadb';
|
|
2
|
+
|
|
3
|
+
interface VectorDocument {
|
|
4
|
+
id: string;
|
|
5
|
+
content: string;
|
|
6
|
+
metadata?: Record<string, any>;
|
|
7
|
+
embedding?: number[]; // Add this line
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
class ChromaVector {
|
|
11
|
+
private client: ChromaClient;
|
|
12
|
+
private collections: Map<string, Collection>;
|
|
13
|
+
|
|
14
|
+
constructor(path: string = 'http://localhost:8000', database ?: string) {
|
|
15
|
+
this.client = new ChromaClient({path, database});
|
|
16
|
+
this.collections = new Map();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async initializeCollection(collectionName: string): Promise<void> {
|
|
20
|
+
if (!this.collections.has(collectionName)) {
|
|
21
|
+
const collection = await this.client.createCollection({
|
|
22
|
+
name: collectionName,
|
|
23
|
+
// embeddingFunction: embeddingFunction
|
|
24
|
+
});
|
|
25
|
+
this.collections.set(collectionName, collection);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async addDocument(collectionName: string, document: VectorDocument): Promise<void> {
|
|
30
|
+
await this.initializeCollection(collectionName);
|
|
31
|
+
const collection = this.collections.get(collectionName);
|
|
32
|
+
if (collection) {
|
|
33
|
+
await collection.add({
|
|
34
|
+
ids: [document.id],
|
|
35
|
+
documents: [document.content],
|
|
36
|
+
metadatas: [document.metadata || {}],
|
|
37
|
+
embeddings: document.embedding ? [document.embedding] : undefined, // Add this line
|
|
38
|
+
});
|
|
39
|
+
} else {
|
|
40
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async queryCollection(collectionName: string, query: string, topK: number = 5): Promise<Document[]> {
|
|
45
|
+
await this.initializeCollection(collectionName);
|
|
46
|
+
const collection = this.collections.get(collectionName);
|
|
47
|
+
if (collection) {
|
|
48
|
+
const results = await collection.query({
|
|
49
|
+
queryTexts: [query],
|
|
50
|
+
nResults: topK,
|
|
51
|
+
});
|
|
52
|
+
return results.documents[0] || [];
|
|
53
|
+
} else {
|
|
54
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async deleteDocument(collectionName: string, documentId: string): Promise<void> {
|
|
59
|
+
const collection = this.collections.get(collectionName);
|
|
60
|
+
if (collection) {
|
|
61
|
+
await collection.delete({
|
|
62
|
+
ids: [documentId],
|
|
63
|
+
});
|
|
64
|
+
} else {
|
|
65
|
+
throw new Error(`Collection ${collectionName} not found`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async listCollections(): Promise<string[]> {
|
|
70
|
+
return await this.client.listCollections();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async deleteCollection(collectionName: string): Promise<void> {
|
|
74
|
+
await this.client.deleteCollection({ name: collectionName });
|
|
75
|
+
this.collections.delete(collectionName);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export default ChromaVector;
|
|
80
|
+
export {ChromaVector}
|