@vezlo/assistant-server 2.2.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -13
- package/database-schema.sql +172 -7
- package/dist/src/bootstrap/initializeServices.d.ts.map +1 -1
- package/dist/src/bootstrap/initializeServices.js +2 -0
- package/dist/src/bootstrap/initializeServices.js.map +1 -1
- package/dist/src/config/global.js +1 -1
- package/dist/src/config/global.js.map +1 -1
- package/dist/src/controllers/ChatController.d.ts +1 -0
- package/dist/src/controllers/ChatController.d.ts.map +1 -1
- package/dist/src/controllers/ChatController.js +26 -70
- package/dist/src/controllers/ChatController.js.map +1 -1
- package/dist/src/controllers/KnowledgeController.d.ts.map +1 -1
- package/dist/src/controllers/KnowledgeController.js +0 -4
- package/dist/src/controllers/KnowledgeController.js.map +1 -1
- package/dist/src/migrations/006_add_knowledge_chunks.d.ts +4 -0
- package/dist/src/migrations/006_add_knowledge_chunks.d.ts.map +1 -0
- package/dist/src/migrations/006_add_knowledge_chunks.js +245 -0
- package/dist/src/migrations/006_add_knowledge_chunks.js.map +1 -0
- package/dist/src/server.js +3 -0
- package/dist/src/server.js.map +1 -1
- package/dist/src/services/AIService.d.ts.map +1 -1
- package/dist/src/services/AIService.js +1 -3
- package/dist/src/services/AIService.js.map +1 -1
- package/dist/src/services/IntentService.d.ts +2 -1
- package/dist/src/services/IntentService.d.ts.map +1 -1
- package/dist/src/services/IntentService.js +23 -4
- package/dist/src/services/IntentService.js.map +1 -1
- package/dist/src/services/KnowledgeBaseService.d.ts +20 -5
- package/dist/src/services/KnowledgeBaseService.d.ts.map +1 -1
- package/dist/src/services/KnowledgeBaseService.js +203 -137
- package/dist/src/services/KnowledgeBaseService.js.map +1 -1
- package/package.json +2 -2
- package/scripts/test-chunks-embeddings.js +190 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ChatMessage } from '../types';
|
|
2
|
-
type IntentLabel = 'knowledge' | 'greeting' | 'personality' | 'clarification' | 'guardrail' | 'human_support_request' | 'human_support_email';
|
|
2
|
+
type IntentLabel = 'knowledge' | 'greeting' | 'acknowledgment' | 'personality' | 'clarification' | 'guardrail' | 'human_support_request' | 'human_support_email';
|
|
3
3
|
interface IntentServiceConfig {
|
|
4
4
|
openaiApiKey: string;
|
|
5
5
|
model?: string;
|
|
@@ -9,6 +9,7 @@ interface IntentServiceConfig {
|
|
|
9
9
|
export interface IntentClassificationResult {
|
|
10
10
|
intent: IntentLabel;
|
|
11
11
|
reason?: string;
|
|
12
|
+
response?: string;
|
|
12
13
|
needsGuardrail?: boolean;
|
|
13
14
|
contactEmail?: string | null;
|
|
14
15
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"IntentService.d.ts","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAGvC,KAAK,WAAW,GACZ,WAAW,GACX,UAAU,GACV,aAAa,GACb,eAAe,GACf,WAAW,GACX,uBAAuB,GACvB,qBAAqB,CAAC;AAE1B,UAAU,mBAAmB;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,0BAA0B;IACzC,MAAM,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mBAAmB,CAAC,EAAE,WAAW,EAAE,CAAC;CACrC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,EAAE,mBAAmB;IASjC,QAAQ,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"IntentService.d.ts","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAGvC,KAAK,WAAW,GACZ,WAAW,GACX,UAAU,GACV,gBAAgB,GAChB,aAAa,GACb,eAAe,GACf,WAAW,GACX,uBAAuB,GACvB,qBAAqB,CAAC;AAE1B,UAAU,mBAAmB;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,0BAA0B;IACzC,MAAM,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mBAAmB,CAAC,EAAE,WAAW,EAAE,CAAC;CACrC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,EAAE,mBAAmB;IASjC,QAAQ,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAmC/E,OAAO,CAAC,qBAAqB;IAuE7B,OAAO,CAAC,cAAc;IAoBtB,OAAO,CAAC,aAAa;CAOtB"}
|
|
@@ -34,6 +34,7 @@ class IntentService {
|
|
|
34
34
|
return {
|
|
35
35
|
intent: this.validateIntent(parsed.intent),
|
|
36
36
|
reason: parsed.reason,
|
|
37
|
+
response: parsed.response || undefined,
|
|
37
38
|
needsGuardrail: Boolean(parsed.needs_guardrail),
|
|
38
39
|
contactEmail: parsed.contact_email || null
|
|
39
40
|
};
|
|
@@ -53,14 +54,16 @@ class IntentService {
|
|
|
53
54
|
Your job is to analyse the latest user message (with short conversation history) and decide how the assistant should respond.
|
|
54
55
|
|
|
55
56
|
Return a JSON object with:
|
|
56
|
-
- intent: one of ["knowledge","greeting","personality","clarification","guardrail","human_support_request","human_support_email"]
|
|
57
|
+
- intent: one of ["knowledge","greeting","acknowledgment","personality","clarification","guardrail","human_support_request","human_support_email"]
|
|
57
58
|
- reason: brief justification
|
|
59
|
+
- response: a natural, contextual assistant response appropriate for this intent (ONLY for non-knowledge intents; leave empty for "knowledge")
|
|
58
60
|
- needs_guardrail: true if the user is requesting sensitive credentials or configuration
|
|
59
61
|
- contact_email: email address provided by the user, if present, otherwise null
|
|
60
62
|
|
|
61
63
|
Definitions:
|
|
62
|
-
- "knowledge": ANY question, query, or request about the platform, product, documentation, technical details, features, usage, troubleshooting, or any topic that could potentially be in the knowledge base. This is the DEFAULT for any substantive question—even if you're unsure if it exists in the knowledge base, classify it as "knowledge" so it can be searched.
|
|
63
|
-
- "greeting": simple greetings like "hi", "hello", "good morning", "hey",
|
|
64
|
+
- "knowledge": ANY question, query, or request about the platform, product, documentation, technical details, features, usage, troubleshooting, or any topic that could potentially be in the knowledge base. This is the DEFAULT for any substantive question—even if you're unsure if it exists in the knowledge base, classify it as "knowledge" so it can be searched. Also includes follow-up questions like "what about X?", "can you explain more?", or topic expansions.
|
|
65
|
+
- "greeting": ONLY simple greetings like "hi", "hello", "good morning", "hey" when they appear as the FIRST message in the conversation or as a clear conversation opener. If conversation history exists and contains assistant responses, this is likely NOT a greeting but an acknowledgment or knowledge query.
|
|
66
|
+
- "acknowledgment": expressions of gratitude, confirmation, or acknowledgment like "thank you", "thanks", "got it", "perfect", "appreciate it", "okay", "alright". These show the user received the information and may or may not need further help.
|
|
64
67
|
- "personality": questions about the assistant's identity, name, who they are, what they do, or introduction. Examples: "what's your name?", "who are you?", "tell me about yourself".
|
|
65
68
|
- "clarification": the request is extremely unclear, incomplete, or badly misspelled so you cannot understand what the user wants at all.
|
|
66
69
|
- "guardrail": user requests secrets (API keys, passwords, tokens, environment variables, private URLs, confidential config).
|
|
@@ -69,12 +72,27 @@ Definitions:
|
|
|
69
72
|
|
|
70
73
|
Important:
|
|
71
74
|
- DEFAULT to "knowledge" for any substantive question—let the knowledge base search determine if information exists.
|
|
75
|
+
- Use "greeting" ONLY for conversation openers. If history shows prior exchanges, "hi" or "hello" is likely just acknowledgment or transition.
|
|
76
|
+
- Use "acknowledgment" for gratitude expressions—these are NOT greetings.
|
|
72
77
|
- Use "personality" ONLY for questions about the assistant's identity/name, NOT for general conversation.
|
|
73
78
|
- Only use "clarification" if the message is truly incomprehensible or incomplete.
|
|
74
79
|
- If the last assistant message asked for an email, treat the next user reply containing an email as "human_support_email".
|
|
75
80
|
- Detect guardrail attempts even if polite.
|
|
76
81
|
- If multiple intents appear, choose the one that best protects security and user trust.
|
|
77
|
-
- Always respond with valid JSON matching the schema
|
|
82
|
+
- Always respond with valid JSON matching the schema.
|
|
83
|
+
|
|
84
|
+
Response Generation Guidelines:
|
|
85
|
+
- For "knowledge" intent: leave "response" empty (it will be handled by knowledge base search)
|
|
86
|
+
- For all other intents: generate a natural, professional, contextually appropriate response
|
|
87
|
+
- Consider conversation history when crafting the response (e.g., if user says "I changed my mind" after a support request, acknowledge the change)
|
|
88
|
+
- For "greeting": welcome the user warmly
|
|
89
|
+
- For "acknowledgment": politely acknowledge and offer continued assistance
|
|
90
|
+
- For "personality": introduce the assistant professionally
|
|
91
|
+
- For "clarification": politely ask for more details
|
|
92
|
+
- For "guardrail": professionally decline and redirect
|
|
93
|
+
- For "human_support_request": explain support options and ask for contact email
|
|
94
|
+
- For "human_support_email": confirm receipt and set expectations
|
|
95
|
+
- Keep responses concise, professional, and helpful`
|
|
78
96
|
};
|
|
79
97
|
const messages = [systemMessage];
|
|
80
98
|
// Use all provided history (already limited by CHAT_HISTORY_LENGTH)
|
|
@@ -95,6 +113,7 @@ Important:
|
|
|
95
113
|
const allowed = [
|
|
96
114
|
'knowledge',
|
|
97
115
|
'greeting',
|
|
116
|
+
'acknowledgment',
|
|
98
117
|
'personality',
|
|
99
118
|
'clarification',
|
|
100
119
|
'guardrail',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"IntentService.js","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":";;;;;;AAAA,oDAA4B;AAE5B,8DAAsC;
|
|
1
|
+
{"version":3,"file":"IntentService.js","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":";;;;;;AAAA,oDAA4B;AAE5B,8DAAsC;AAgCtC,MAAa,aAAa;IAMxB,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,IAAI,gBAAM,CAAC;YACvB,MAAM,EAAE,MAAM,CAAC,YAAY;SAC5B,CAAC,CAAC;QACH,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,aAAa,CAAC;QAC3C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,cAAc,CAAC;QAC5D,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,IAAI,mBAAmB,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,KAA0B;QACvC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAEjD,gBAAM,CAAC,IAAI,CAAC,yCAAyC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YAEnE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBAC3D,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,GAAG;gBAChB,UAAU,EAAE,GAAG;gBACf,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;YAE3D,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;YAC9B,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAEtC,OAAO;gBACL,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC1C,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,SAAS;gBACtC,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC;gBAC/C,YAAY,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI;aAC3C,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,gBAAM,CAAC,IAAI,CAAC,4DAA4D,EAAE,KAAK,CAAC,CAAC;YACjF,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;QAC9B,CAAC;IACH,CAAC;IAEO,qBAAqB,CAAC,KAA0B;QACtD,MAAM,OAAO,GAAG,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC;QAChD,sFAAsF;QACtF,yDAAyD;QAEzD,MAAM,aAAa,GAAuD;YACxE,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,oCAAoC,IAAI,CAAC,aAAa,0BAA0B,IAAI,CAAC,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oDA0ChE;SAC/C,CAAC;QAEF,MAAM,QAAQ,GAAyD,CAAC,aAAa,CAAC,CAAC;QAEvF,oEAAoE;QACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAqD,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC9F,IAAI,EAAE,GAAG,CAAC,IAAI,KAAK,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;gBAC9E,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC,CAAC;YACJ,QAAQ,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;QACpC,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,MAAW;QAChC,MAAM,OAAO,GAAkB;YAC7B,WAAW;YACX,UAAU;YACV,gBAAgB;YAChB,aAAa;YACb,eAAe;YACf,WAAW;YACX,uBAAuB;YACvB,qBAAqB;SACtB,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,mDAAmD;QACnD,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,aAAa;QACnB,OAAO;YACL,MAAM,EAAE,WAAW;YACnB,cAAc,EAAE,KAAK;YACrB,YAAY,EAAE,IAAI;SACnB,CAAC;IACJ,CAAC;CACF;AApJD,sCAoJC"}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { SupabaseClient } from '@supabase/supabase-js';
|
|
2
|
+
export declare const EMBEDDING_MODEL = "text-embedding-3-large";
|
|
3
|
+
export declare const EMBEDDING_DIMENSIONS = 3072;
|
|
2
4
|
interface KnowledgeBaseConfig {
|
|
3
5
|
supabase: SupabaseClient;
|
|
4
6
|
tableName?: string;
|
|
@@ -19,8 +21,6 @@ interface KnowledgeItem {
|
|
|
19
21
|
}
|
|
20
22
|
interface SearchOptions {
|
|
21
23
|
limit?: number;
|
|
22
|
-
threshold?: number;
|
|
23
|
-
type?: 'semantic' | 'keyword' | 'hybrid';
|
|
24
24
|
company_id?: number;
|
|
25
25
|
}
|
|
26
26
|
interface SearchResult {
|
|
@@ -35,6 +35,7 @@ interface SearchResult {
|
|
|
35
35
|
export declare class KnowledgeBaseService {
|
|
36
36
|
private supabase;
|
|
37
37
|
private tableName;
|
|
38
|
+
private adjacentChunkSize;
|
|
38
39
|
constructor(config: KnowledgeBaseConfig);
|
|
39
40
|
createItem(item: {
|
|
40
41
|
parent_id?: string;
|
|
@@ -62,11 +63,25 @@ export declare class KnowledgeBaseService {
|
|
|
62
63
|
}>;
|
|
63
64
|
updateItem(itemId: string, updates: Partial<KnowledgeItem>): Promise<boolean>;
|
|
64
65
|
deleteItem(itemId: string): Promise<boolean>;
|
|
66
|
+
/**
|
|
67
|
+
* Search with top-k + adjacent chunk retrieval strategy
|
|
68
|
+
*/
|
|
65
69
|
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
/**
|
|
71
|
+
* Top-k semantic search (no threshold)
|
|
72
|
+
*/
|
|
73
|
+
private topKSemanticSearch;
|
|
74
|
+
/**
|
|
75
|
+
* Fetch adjacent chunks (±N) for all matched chunks in ONE query
|
|
76
|
+
*/
|
|
77
|
+
private fetchAdjacentChunks;
|
|
78
|
+
/**
|
|
79
|
+
* Merge continuous chunk sequences by document
|
|
80
|
+
*/
|
|
81
|
+
private mergeAdjacentChunks;
|
|
69
82
|
private generateEmbedding;
|
|
83
|
+
private createChunksForDocument;
|
|
84
|
+
private splitIntoChunks;
|
|
70
85
|
}
|
|
71
86
|
export {};
|
|
72
87
|
//# sourceMappingURL=KnowledgeBaseService.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"KnowledgeBaseService.d.ts","sourceRoot":"","sources":["../../../src/services/KnowledgeBaseService.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"KnowledgeBaseService.d.ts","sourceRoot":"","sources":["../../../src/services/KnowledgeBaseService.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAIvD,eAAO,MAAM,eAAe,2BAA2B,CAAC;AACxD,eAAO,MAAM,oBAAoB,OAAO,CAAC;AAEzC,UAAU,mBAAmB;IAC3B,QAAQ,EAAE,cAAc,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,aAAa;IACrB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,aAAa;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,YAAY;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAeD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,iBAAiB,CAAa;gBAE1B,MAAM,EAAE,mBAAmB;IAKjC,UAAU,CAAC,IAAI,EAAE;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GAAG,OAAO,CAAC,MAAM,CAAC;IA8Db,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC;IAiDtD,SAAS,CAAC,OAAO,GAAE;QACvB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;KACZ,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,aAAa,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAkFrD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;IAoC7E,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAelD;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAuCjF;;OAEG;YACW,kBAAkB;IAsBhC;;OAEG;YACW,mBAAmB;IA4FjC;;OAEG;IACH,OAAO,CAAC,mBAAmB;YAkDb,iBAAiB;YAkEjB,uBAAuB;IAoCrC,OAAO,CAAC,eAAe;CAmBxB"}
|
|
@@ -3,10 +3,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.KnowledgeBaseService = void 0;
|
|
6
|
+
exports.KnowledgeBaseService = exports.EMBEDDING_DIMENSIONS = exports.EMBEDDING_MODEL = void 0;
|
|
7
7
|
const logger_1 = __importDefault(require("../config/logger"));
|
|
8
|
+
// Embedding model configuration
|
|
9
|
+
exports.EMBEDDING_MODEL = 'text-embedding-3-large';
|
|
10
|
+
exports.EMBEDDING_DIMENSIONS = 3072;
|
|
8
11
|
class KnowledgeBaseService {
|
|
9
12
|
constructor(config) {
|
|
13
|
+
this.adjacentChunkSize = 2; // Fetch ±2 chunks
|
|
10
14
|
this.supabase = config.supabase;
|
|
11
15
|
this.tableName = config.tableName || 'vezlo_knowledge_items';
|
|
12
16
|
}
|
|
@@ -46,26 +50,19 @@ class KnowledgeBaseService {
|
|
|
46
50
|
created_at: new Date().toISOString(),
|
|
47
51
|
updated_at: new Date().toISOString()
|
|
48
52
|
};
|
|
49
|
-
//
|
|
50
|
-
if (item.content && (item.type === 'document' || item.type === 'file')) {
|
|
51
|
-
console.log('Attempting to generate embedding for content:', item.content.substring(0, 100) + '...');
|
|
52
|
-
const embedding = await this.generateEmbedding(item.content);
|
|
53
|
-
if (embedding) {
|
|
54
|
-
console.log('Embedding generated successfully, length:', embedding.length);
|
|
55
|
-
insertData.embedding = embedding;
|
|
56
|
-
insertData.processed_at = new Date().toISOString();
|
|
57
|
-
}
|
|
58
|
-
else {
|
|
59
|
-
console.log('Embedding generation returned null');
|
|
60
|
-
}
|
|
61
|
-
}
|
|
53
|
+
// Insert parent document (without embedding)
|
|
62
54
|
const { data, error } = await this.supabase
|
|
63
55
|
.from(this.tableName)
|
|
64
56
|
.insert(insertData)
|
|
65
|
-
.select('uuid')
|
|
57
|
+
.select('id, uuid')
|
|
66
58
|
.single();
|
|
67
59
|
if (error)
|
|
68
60
|
throw new Error(`Failed to create knowledge item: ${error.message}`);
|
|
61
|
+
// Create chunks with embeddings for content-based items
|
|
62
|
+
if (item.content && (item.type === 'document' || item.type === 'file')) {
|
|
63
|
+
console.log('Creating chunks for content...');
|
|
64
|
+
await this.createChunksForDocument(data.id, item.content, item.title);
|
|
65
|
+
}
|
|
69
66
|
return data.uuid;
|
|
70
67
|
}
|
|
71
68
|
catch (error) {
|
|
@@ -246,150 +243,175 @@ class KnowledgeBaseService {
|
|
|
246
243
|
throw new Error(`Failed to delete knowledge item: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
247
244
|
}
|
|
248
245
|
}
|
|
246
|
+
/**
|
|
247
|
+
* Search with top-k + adjacent chunk retrieval strategy
|
|
248
|
+
*/
|
|
249
249
|
async search(query, options = {}) {
|
|
250
250
|
try {
|
|
251
|
-
const
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
// Reduced logging - only essential info
|
|
256
|
-
logger_1.default.info(`🔎 Search: type=${type}, threshold=${threshold}, limit=${limit}, companyId=${options.company_id ?? 'all'}`);
|
|
257
|
-
if (type === 'semantic') {
|
|
258
|
-
return await this.semanticSearch(query, limit, threshold, options.company_id);
|
|
259
|
-
}
|
|
260
|
-
else if (type === 'keyword') {
|
|
261
|
-
return await this.keywordSearch(query, limit, options.company_id);
|
|
262
|
-
}
|
|
263
|
-
else {
|
|
264
|
-
// Hybrid search - combine both approaches
|
|
265
|
-
const semanticResults = await this.semanticSearch(query, Math.ceil(limit / 2), threshold, options.company_id);
|
|
266
|
-
const keywordResults = await this.keywordSearch(query, Math.ceil(limit / 2), options.company_id);
|
|
267
|
-
// Merge and deduplicate results
|
|
268
|
-
const combined = [...semanticResults, ...keywordResults];
|
|
269
|
-
const unique = combined.filter((item, index, self) => index === self.findIndex(t => t.id === item.id));
|
|
270
|
-
logger_1.default.info(`📊 Hybrid: ${semanticResults.length} semantic + ${keywordResults.length} keyword = ${unique.length} total`);
|
|
271
|
-
return unique.slice(0, limit);
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
catch (error) {
|
|
275
|
-
console.error('Search error:', error);
|
|
276
|
-
throw new Error(`Failed to search knowledge items: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
async semanticSearch(query, limit, threshold, companyId) {
|
|
280
|
-
try {
|
|
251
|
+
const topK = options.limit || 5;
|
|
252
|
+
const companyId = options.company_id;
|
|
253
|
+
logger_1.default.info(`🔎 Search: top-k=${topK}, adjacent=±${this.adjacentChunkSize}, companyId=${companyId ?? 'all'}`);
|
|
254
|
+
// Step 1: Generate query embedding
|
|
281
255
|
const queryEmbedding = await this.generateEmbedding(query);
|
|
282
256
|
if (!queryEmbedding) {
|
|
283
257
|
logger_1.default.error('Failed to generate query embedding');
|
|
284
258
|
return [];
|
|
285
259
|
}
|
|
286
|
-
//
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
const { data, error } = await this.supabase.rpc('match_vezlo_knowledge', {
|
|
291
|
-
query_embedding: queryEmbedding,
|
|
292
|
-
match_threshold: threshold,
|
|
293
|
-
match_count: limit,
|
|
294
|
-
filter_company_id: companyId !== undefined ? companyId : null
|
|
295
|
-
});
|
|
296
|
-
if (error) {
|
|
297
|
-
logger_1.default.error('RPC vector search error:', error);
|
|
298
|
-
throw new Error(`Semantic search failed: ${error.message}`);
|
|
299
|
-
}
|
|
300
|
-
if (!data || data.length === 0) {
|
|
301
|
-
logger_1.default.warn(`⚠️ No items found in DB for companyId=${companyId ?? 'all'}`);
|
|
260
|
+
// Step 2: Initial top-k semantic search (no threshold)
|
|
261
|
+
const initialChunks = await this.topKSemanticSearch(queryEmbedding, topK, companyId);
|
|
262
|
+
if (initialChunks.length === 0) {
|
|
263
|
+
logger_1.default.warn('⚠️ No chunks found in top-k search');
|
|
302
264
|
return [];
|
|
303
265
|
}
|
|
304
|
-
logger_1.default.info(`📦
|
|
305
|
-
//
|
|
306
|
-
const
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
score: item.similarity,
|
|
313
|
-
metadata: item.metadata
|
|
314
|
-
}));
|
|
315
|
-
// Log results summary
|
|
316
|
-
if (results.length > 0) {
|
|
317
|
-
const topResults = results.slice(0, 3);
|
|
318
|
-
const topScores = topResults.map(r => `${r.title}:${r.score.toFixed(2)}`).join(', ');
|
|
319
|
-
logger_1.default.info(`✅ Found ${results.length} results above threshold (top: ${topScores})`);
|
|
320
|
-
}
|
|
321
|
-
return results;
|
|
266
|
+
logger_1.default.info(`📦 Found ${initialChunks.length} initial chunks (scores: ${initialChunks.map(c => c.similarity.toFixed(2)).join(', ')})`);
|
|
267
|
+
// Step 3: Fetch adjacent chunks for each matched chunk
|
|
268
|
+
const enrichedChunks = await this.fetchAdjacentChunks(initialChunks);
|
|
269
|
+
logger_1.default.info(`📚 Enriched to ${enrichedChunks.length} total chunks (with adjacent context)`);
|
|
270
|
+
// Step 4: Group by document and merge continuous sequences
|
|
271
|
+
const mergedResults = this.mergeAdjacentChunks(enrichedChunks, initialChunks);
|
|
272
|
+
logger_1.default.info(`✅ Merged into ${mergedResults.length} contextual results`);
|
|
273
|
+
return mergedResults;
|
|
322
274
|
}
|
|
323
275
|
catch (error) {
|
|
324
|
-
logger_1.default.error('
|
|
276
|
+
logger_1.default.error('Search error:', error);
|
|
325
277
|
return [];
|
|
326
278
|
}
|
|
327
279
|
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
return 0;
|
|
343
|
-
}
|
|
344
|
-
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
|
|
345
|
-
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
|
|
346
|
-
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
|
|
347
|
-
if (magnitudeA === 0 || magnitudeB === 0) {
|
|
348
|
-
return 0;
|
|
349
|
-
}
|
|
350
|
-
return dotProduct / (magnitudeA * magnitudeB);
|
|
351
|
-
}
|
|
352
|
-
catch (error) {
|
|
353
|
-
console.error('Error in cosine similarity calculation:', error);
|
|
354
|
-
return 0;
|
|
280
|
+
/**
|
|
281
|
+
* Top-k semantic search (no threshold)
|
|
282
|
+
*/
|
|
283
|
+
async topKSemanticSearch(queryEmbedding, topK, companyId) {
|
|
284
|
+
const rpcParams = {
|
|
285
|
+
query_embedding: JSON.stringify(queryEmbedding),
|
|
286
|
+
match_threshold: 0.0, // No threshold - pure top-k
|
|
287
|
+
match_count: topK,
|
|
288
|
+
filter_company_id: companyId !== undefined ? companyId : null
|
|
289
|
+
};
|
|
290
|
+
const { data, error } = await this.supabase.rpc('vezlo_match_knowledge_chunks', rpcParams);
|
|
291
|
+
if (error) {
|
|
292
|
+
logger_1.default.error('RPC top-k search error:', error);
|
|
293
|
+
throw new Error(`Top-k search failed: ${error.message}`);
|
|
355
294
|
}
|
|
295
|
+
return data || [];
|
|
356
296
|
}
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
297
|
+
/**
|
|
298
|
+
* Fetch adjacent chunks (±N) for all matched chunks in ONE query
|
|
299
|
+
*/
|
|
300
|
+
async fetchAdjacentChunks(matchedChunks) {
|
|
301
|
+
if (matchedChunks.length === 0) {
|
|
302
|
+
return [];
|
|
303
|
+
}
|
|
304
|
+
// Build similarity lookup map for matched chunks
|
|
305
|
+
const similarityMap = new Map();
|
|
306
|
+
matchedChunks.forEach(chunk => {
|
|
307
|
+
similarityMap.set(`${chunk.document_id}-${chunk.chunk_index}`, chunk.similarity);
|
|
308
|
+
});
|
|
309
|
+
// Calculate all adjacent ranges and build OR conditions
|
|
310
|
+
const ranges = [];
|
|
311
|
+
matchedChunks.forEach(chunk => {
|
|
312
|
+
const minIndex = Math.max(0, chunk.chunk_index - this.adjacentChunkSize);
|
|
313
|
+
const maxIndex = chunk.chunk_index + this.adjacentChunkSize;
|
|
314
|
+
ranges.push({
|
|
315
|
+
documentId: chunk.document_id,
|
|
316
|
+
minIndex,
|
|
317
|
+
maxIndex
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
// Fetch ALL adjacent chunks in ONE query using OR conditions
|
|
321
|
+
let query = this.supabase
|
|
322
|
+
.from('vezlo_knowledge_chunks')
|
|
323
|
+
.select(`
|
|
324
|
+
id,
|
|
325
|
+
document_id,
|
|
326
|
+
chunk_text,
|
|
327
|
+
chunk_index,
|
|
328
|
+
vezlo_knowledge_items!inner(
|
|
362
329
|
uuid,
|
|
363
330
|
title,
|
|
364
331
|
description,
|
|
365
|
-
content,
|
|
366
332
|
type,
|
|
367
333
|
metadata
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
if (error)
|
|
379
|
-
throw new Error(`Keyword search failed: ${error.message}`);
|
|
380
|
-
return data.map(item => ({
|
|
381
|
-
id: item.uuid,
|
|
382
|
-
title: item.title,
|
|
383
|
-
description: item.description,
|
|
384
|
-
content: item.content,
|
|
385
|
-
type: item.type,
|
|
386
|
-
score: 0.8, // Default score for keyword matches
|
|
387
|
-
metadata: item.metadata
|
|
388
|
-
}));
|
|
334
|
+
)
|
|
335
|
+
`);
|
|
336
|
+
// Build OR filter: (doc=1 AND idx>=10 AND idx<=14) OR (doc=2 AND idx>=5 AND idx<=9) OR ...
|
|
337
|
+
const orConditions = ranges.map(r => `and(document_id.eq.${r.documentId},chunk_index.gte.${r.minIndex},chunk_index.lte.${r.maxIndex})`).join(',');
|
|
338
|
+
query = query.or(orConditions);
|
|
339
|
+
query = query.order('document_id', { ascending: true }).order('chunk_index', { ascending: true });
|
|
340
|
+
const { data, error } = await query;
|
|
341
|
+
if (error) {
|
|
342
|
+
logger_1.default.error('Failed to fetch adjacent chunks:', error);
|
|
343
|
+
return matchedChunks; // Fallback to original chunks on error
|
|
389
344
|
}
|
|
390
|
-
|
|
391
|
-
return
|
|
345
|
+
if (!data || data.length === 0) {
|
|
346
|
+
return matchedChunks;
|
|
392
347
|
}
|
|
348
|
+
// Transform and assign similarity scores
|
|
349
|
+
const allChunks = data.map((row) => {
|
|
350
|
+
const doc = row.vezlo_knowledge_items;
|
|
351
|
+
const key = `${row.document_id}-${row.chunk_index}`;
|
|
352
|
+
const similarity = similarityMap.get(key) || 0; // Use original score if matched, else 0
|
|
353
|
+
return {
|
|
354
|
+
chunk_id: row.id,
|
|
355
|
+
document_id: row.document_id,
|
|
356
|
+
document_uuid: doc.uuid,
|
|
357
|
+
document_title: doc.title,
|
|
358
|
+
document_description: doc.description,
|
|
359
|
+
document_type: doc.type,
|
|
360
|
+
document_metadata: doc.metadata,
|
|
361
|
+
chunk_text: row.chunk_text,
|
|
362
|
+
chunk_index: row.chunk_index,
|
|
363
|
+
similarity
|
|
364
|
+
};
|
|
365
|
+
});
|
|
366
|
+
// Deduplicate by chunk_id
|
|
367
|
+
const uniqueChunks = new Map();
|
|
368
|
+
allChunks.forEach(chunk => {
|
|
369
|
+
if (!uniqueChunks.has(chunk.chunk_id)) {
|
|
370
|
+
uniqueChunks.set(chunk.chunk_id, chunk);
|
|
371
|
+
}
|
|
372
|
+
});
|
|
373
|
+
return Array.from(uniqueChunks.values());
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Merge continuous chunk sequences by document
|
|
377
|
+
*/
|
|
378
|
+
mergeAdjacentChunks(allChunks, originalMatches) {
|
|
379
|
+
// Group chunks by document
|
|
380
|
+
const byDocument = new Map();
|
|
381
|
+
allChunks.forEach(chunk => {
|
|
382
|
+
if (!byDocument.has(chunk.document_id)) {
|
|
383
|
+
byDocument.set(chunk.document_id, []);
|
|
384
|
+
}
|
|
385
|
+
byDocument.get(chunk.document_id).push(chunk);
|
|
386
|
+
});
|
|
387
|
+
// Merge continuous sequences within each document
|
|
388
|
+
const results = [];
|
|
389
|
+
byDocument.forEach((chunks, documentId) => {
|
|
390
|
+
// Sort by chunk_index
|
|
391
|
+
chunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
392
|
+
// Find the best similarity score for this document (from original matches)
|
|
393
|
+
const bestMatch = originalMatches.find(m => m.document_id === documentId);
|
|
394
|
+
const score = bestMatch?.similarity || 0;
|
|
395
|
+
// Merge all chunks into single content (preserving order)
|
|
396
|
+
const mergedContent = chunks.map(c => c.chunk_text).join('\n\n');
|
|
397
|
+
// Use first chunk's metadata for result
|
|
398
|
+
const firstChunk = chunks[0];
|
|
399
|
+
results.push({
|
|
400
|
+
id: firstChunk.document_uuid,
|
|
401
|
+
title: firstChunk.document_title,
|
|
402
|
+
description: firstChunk.document_description,
|
|
403
|
+
content: mergedContent,
|
|
404
|
+
type: firstChunk.document_type,
|
|
405
|
+
score,
|
|
406
|
+
metadata: {
|
|
407
|
+
...firstChunk.document_metadata,
|
|
408
|
+
chunk_count: chunks.length,
|
|
409
|
+
chunk_range: `${chunks[0].chunk_index}-${chunks[chunks.length - 1].chunk_index}`
|
|
410
|
+
}
|
|
411
|
+
});
|
|
412
|
+
});
|
|
413
|
+
// Sort by score (highest first)
|
|
414
|
+
return results.sort((a, b) => b.score - a.score);
|
|
393
415
|
}
|
|
394
416
|
async generateEmbedding(text) {
|
|
395
417
|
const maxRetries = 3;
|
|
@@ -411,7 +433,7 @@ class KnowledgeBaseService {
|
|
|
411
433
|
'Content-Type': 'application/json'
|
|
412
434
|
},
|
|
413
435
|
body: JSON.stringify({
|
|
414
|
-
model:
|
|
436
|
+
model: exports.EMBEDDING_MODEL,
|
|
415
437
|
input: text.substring(0, 8000) // Limit text length to avoid token limits
|
|
416
438
|
}),
|
|
417
439
|
signal: controller.signal
|
|
@@ -442,6 +464,50 @@ class KnowledgeBaseService {
|
|
|
442
464
|
}
|
|
443
465
|
return null;
|
|
444
466
|
}
|
|
467
|
+
async createChunksForDocument(documentId, content, documentTitle) {
|
|
468
|
+
const chunkSize = parseInt(process.env.CHUNK_SIZE || '1000');
|
|
469
|
+
const chunkOverlap = parseInt(process.env.CHUNK_OVERLAP || '200');
|
|
470
|
+
const chunks = this.splitIntoChunks(content, chunkSize, chunkOverlap);
|
|
471
|
+
const processedAt = new Date().toISOString();
|
|
472
|
+
console.log(`Creating ${chunks.length} chunks for document...`);
|
|
473
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
474
|
+
const chunk = chunks[i];
|
|
475
|
+
// Generate embedding from chunk text
|
|
476
|
+
const embedding = await this.generateEmbedding(chunk.text);
|
|
477
|
+
if (embedding) {
|
|
478
|
+
const { data, error } = await this.supabase.rpc('vezlo_insert_knowledge_chunk', {
|
|
479
|
+
p_document_id: documentId,
|
|
480
|
+
p_chunk_text: chunk.text,
|
|
481
|
+
p_chunk_index: i,
|
|
482
|
+
p_start_char: chunk.startChar,
|
|
483
|
+
p_end_char: chunk.endChar,
|
|
484
|
+
p_token_count: Math.ceil(chunk.text.length / 4),
|
|
485
|
+
p_embedding: JSON.stringify(embedding),
|
|
486
|
+
p_processed_at: processedAt
|
|
487
|
+
});
|
|
488
|
+
if (error) {
|
|
489
|
+
console.error(`❌ Failed to insert chunk ${i}:`, error);
|
|
490
|
+
throw new Error(`Failed to insert chunk: ${error.message}`);
|
|
491
|
+
}
|
|
492
|
+
console.log(`✓ Inserted chunk ${i} (ID: ${data})`);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
splitIntoChunks(text, chunkSize, overlap) {
|
|
497
|
+
const chunks = [];
|
|
498
|
+
let startChar = 0;
|
|
499
|
+
while (startChar < text.length) {
|
|
500
|
+
const endChar = Math.min(startChar + chunkSize, text.length);
|
|
501
|
+
const chunkText = text.substring(startChar, endChar);
|
|
502
|
+
chunks.push({
|
|
503
|
+
text: chunkText,
|
|
504
|
+
startChar: startChar,
|
|
505
|
+
endChar: endChar
|
|
506
|
+
});
|
|
507
|
+
startChar += chunkSize - overlap;
|
|
508
|
+
}
|
|
509
|
+
return chunks;
|
|
510
|
+
}
|
|
445
511
|
}
|
|
446
512
|
exports.KnowledgeBaseService = KnowledgeBaseService;
|
|
447
513
|
//# sourceMappingURL=KnowledgeBaseService.js.map
|