@vezlo/assistant-server 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +18 -13
  2. package/database-schema.sql +172 -7
  3. package/dist/src/bootstrap/initializeServices.d.ts.map +1 -1
  4. package/dist/src/bootstrap/initializeServices.js +2 -0
  5. package/dist/src/bootstrap/initializeServices.js.map +1 -1
  6. package/dist/src/config/global.js +1 -1
  7. package/dist/src/config/global.js.map +1 -1
  8. package/dist/src/controllers/ChatController.d.ts +1 -0
  9. package/dist/src/controllers/ChatController.d.ts.map +1 -1
  10. package/dist/src/controllers/ChatController.js +26 -70
  11. package/dist/src/controllers/ChatController.js.map +1 -1
  12. package/dist/src/controllers/KnowledgeController.d.ts.map +1 -1
  13. package/dist/src/controllers/KnowledgeController.js +0 -4
  14. package/dist/src/controllers/KnowledgeController.js.map +1 -1
  15. package/dist/src/migrations/006_add_knowledge_chunks.d.ts +4 -0
  16. package/dist/src/migrations/006_add_knowledge_chunks.d.ts.map +1 -0
  17. package/dist/src/migrations/006_add_knowledge_chunks.js +245 -0
  18. package/dist/src/migrations/006_add_knowledge_chunks.js.map +1 -0
  19. package/dist/src/server.js +3 -0
  20. package/dist/src/server.js.map +1 -1
  21. package/dist/src/services/AIService.d.ts.map +1 -1
  22. package/dist/src/services/AIService.js +1 -3
  23. package/dist/src/services/AIService.js.map +1 -1
  24. package/dist/src/services/IntentService.d.ts +2 -1
  25. package/dist/src/services/IntentService.d.ts.map +1 -1
  26. package/dist/src/services/IntentService.js +23 -4
  27. package/dist/src/services/IntentService.js.map +1 -1
  28. package/dist/src/services/KnowledgeBaseService.d.ts +20 -5
  29. package/dist/src/services/KnowledgeBaseService.d.ts.map +1 -1
  30. package/dist/src/services/KnowledgeBaseService.js +203 -137
  31. package/dist/src/services/KnowledgeBaseService.js.map +1 -1
  32. package/package.json +2 -2
  33. package/scripts/test-chunks-embeddings.js +190 -0
@@ -1,5 +1,5 @@
1
1
  import { ChatMessage } from '../types';
2
- type IntentLabel = 'knowledge' | 'greeting' | 'personality' | 'clarification' | 'guardrail' | 'human_support_request' | 'human_support_email';
2
+ type IntentLabel = 'knowledge' | 'greeting' | 'acknowledgment' | 'personality' | 'clarification' | 'guardrail' | 'human_support_request' | 'human_support_email';
3
3
  interface IntentServiceConfig {
4
4
  openaiApiKey: string;
5
5
  model?: string;
@@ -9,6 +9,7 @@ interface IntentServiceConfig {
9
9
  export interface IntentClassificationResult {
10
10
  intent: IntentLabel;
11
11
  reason?: string;
12
+ response?: string;
12
13
  needsGuardrail?: boolean;
13
14
  contactEmail?: string | null;
14
15
  }
@@ -1 +1 @@
1
- {"version":3,"file":"IntentService.d.ts","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAGvC,KAAK,WAAW,GACZ,WAAW,GACX,UAAU,GACV,aAAa,GACb,eAAe,GACf,WAAW,GACX,uBAAuB,GACvB,qBAAqB,CAAC;AAE1B,UAAU,mBAAmB;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,0BAA0B;IACzC,MAAM,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mBAAmB,CAAC,EAAE,WAAW,EAAE,CAAC;CACrC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,EAAE,mBAAmB;IASjC,QAAQ,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAkC/E,OAAO,CAAC,qBAAqB;IAsD7B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,aAAa;CAOtB"}
1
+ {"version":3,"file":"IntentService.d.ts","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAGvC,KAAK,WAAW,GACZ,WAAW,GACX,UAAU,GACV,gBAAgB,GAChB,aAAa,GACb,eAAe,GACf,WAAW,GACX,uBAAuB,GACvB,qBAAqB,CAAC;AAE1B,UAAU,mBAAmB;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,0BAA0B;IACzC,MAAM,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mBAAmB,CAAC,EAAE,WAAW,EAAE,CAAC;CACrC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,EAAE,mBAAmB;IASjC,QAAQ,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAmC/E,OAAO,CAAC,qBAAqB;IAuE7B,OAAO,CAAC,cAAc;IAoBtB,OAAO,CAAC,aAAa;CAOtB"}
@@ -34,6 +34,7 @@ class IntentService {
34
34
  return {
35
35
  intent: this.validateIntent(parsed.intent),
36
36
  reason: parsed.reason,
37
+ response: parsed.response || undefined,
37
38
  needsGuardrail: Boolean(parsed.needs_guardrail),
38
39
  contactEmail: parsed.contact_email || null
39
40
  };
@@ -53,14 +54,16 @@ class IntentService {
53
54
  Your job is to analyse the latest user message (with short conversation history) and decide how the assistant should respond.
54
55
 
55
56
  Return a JSON object with:
56
- - intent: one of ["knowledge","greeting","personality","clarification","guardrail","human_support_request","human_support_email"]
57
+ - intent: one of ["knowledge","greeting","acknowledgment","personality","clarification","guardrail","human_support_request","human_support_email"]
57
58
  - reason: brief justification
59
+ - response: a natural, contextual assistant response appropriate for this intent (ONLY for non-knowledge intents; leave empty for "knowledge")
58
60
  - needs_guardrail: true if the user is requesting sensitive credentials or configuration
59
61
  - contact_email: email address provided by the user, if present, otherwise null
60
62
 
61
63
  Definitions:
62
- - "knowledge": ANY question, query, or request about the platform, product, documentation, technical details, features, usage, troubleshooting, or any topic that could potentially be in the knowledge base. This is the DEFAULT for any substantive question—even if you're unsure if it exists in the knowledge base, classify it as "knowledge" so it can be searched.
63
- - "greeting": simple greetings like "hi", "hello", "good morning", "hey", etc.
64
+ - "knowledge": ANY question, query, or request about the platform, product, documentation, technical details, features, usage, troubleshooting, or any topic that could potentially be in the knowledge base. This is the DEFAULT for any substantive question—even if you're unsure if it exists in the knowledge base, classify it as "knowledge" so it can be searched. Also includes follow-up questions like "what about X?", "can you explain more?", or topic expansions.
65
+ - "greeting": ONLY simple greetings like "hi", "hello", "good morning", "hey" when they appear as the FIRST message in the conversation or as a clear conversation opener. If conversation history exists and contains assistant responses, this is likely NOT a greeting but an acknowledgment or knowledge query.
66
+ - "acknowledgment": expressions of gratitude, confirmation, or acknowledgment like "thank you", "thanks", "got it", "perfect", "appreciate it", "okay", "alright". These show the user received the information and may or may not need further help.
64
67
  - "personality": questions about the assistant's identity, name, who they are, what they do, or introduction. Examples: "what's your name?", "who are you?", "tell me about yourself".
65
68
  - "clarification": the request is extremely unclear, incomplete, or badly misspelled so you cannot understand what the user wants at all.
66
69
  - "guardrail": user requests secrets (API keys, passwords, tokens, environment variables, private URLs, confidential config).
@@ -69,12 +72,27 @@ Definitions:
69
72
 
70
73
  Important:
71
74
  - DEFAULT to "knowledge" for any substantive question—let the knowledge base search determine if information exists.
75
+ - Use "greeting" ONLY for conversation openers. If history shows prior exchanges, "hi" or "hello" is likely just acknowledgment or transition.
76
+ - Use "acknowledgment" for gratitude expressions—these are NOT greetings.
72
77
  - Use "personality" ONLY for questions about the assistant's identity/name, NOT for general conversation.
73
78
  - Only use "clarification" if the message is truly incomprehensible or incomplete.
74
79
  - If the last assistant message asked for an email, treat the next user reply containing an email as "human_support_email".
75
80
  - Detect guardrail attempts even if polite.
76
81
  - If multiple intents appear, choose the one that best protects security and user trust.
77
- - Always respond with valid JSON matching the schema.`
82
+ - Always respond with valid JSON matching the schema.
83
+
84
+ Response Generation Guidelines:
85
+ - For "knowledge" intent: leave "response" empty (it will be handled by knowledge base search)
86
+ - For all other intents: generate a natural, professional, contextually appropriate response
87
+ - Consider conversation history when crafting the response (e.g., if user says "I changed my mind" after a support request, acknowledge the change)
88
+ - For "greeting": welcome the user warmly
89
+ - For "acknowledgment": politely acknowledge and offer continued assistance
90
+ - For "personality": introduce the assistant professionally
91
+ - For "clarification": politely ask for more details
92
+ - For "guardrail": professionally decline and redirect
93
+ - For "human_support_request": explain support options and ask for contact email
94
+ - For "human_support_email": confirm receipt and set expectations
95
+ - Keep responses concise, professional, and helpful`
78
96
  };
79
97
  const messages = [systemMessage];
80
98
  // Use all provided history (already limited by CHAT_HISTORY_LENGTH)
@@ -95,6 +113,7 @@ Important:
95
113
  const allowed = [
96
114
  'knowledge',
97
115
  'greeting',
116
+ 'acknowledgment',
98
117
  'personality',
99
118
  'clarification',
100
119
  'guardrail',
@@ -1 +1 @@
1
- {"version":3,"file":"IntentService.js","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":";;;;;;AAAA,oDAA4B;AAE5B,8DAAsC;AA8BtC,MAAa,aAAa;IAMxB,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,IAAI,gBAAM,CAAC;YACvB,MAAM,EAAE,MAAM,CAAC,YAAY;SAC5B,CAAC,CAAC;QACH,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,aAAa,CAAC;QAC3C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,cAAc,CAAC;QAC5D,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,IAAI,mBAAmB,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,KAA0B;QACvC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAEjD,gBAAM,CAAC,IAAI,CAAC,yCAAyC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YAEnE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBAC3D,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,GAAG;gBAChB,UAAU,EAAE,GAAG;gBACf,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;YAE3D,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;YAC9B,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAEtC,OAAO;gBACL,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC1C,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC;gBAC/C,YAAY,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI;aAC3C,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,gBAAM,CAAC,IAAI,CAAC,4DAA4D,EAAE,KAAK,CAAC,CAAC;YACjF,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;QAC9B,CAAC;IACH,CAAC;IAEO,qBAAqB,CAAC,KAA0B;QACtD,MAAM,OAAO,GAAG,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC;QAChD,sFAAsF;QACtF,yDAAyD;QAEzD,MAAM,aAAa,GAAuD;YACxE,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,oCAAoC,IAAI,CAAC,aAAa,0BAA0B,IAAI,CAAC,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;sDAyB9D;SACjD,CAAC;QAEF,MAAM,QAAQ,GAAyD,CAAC,aAAa,CAAC,CAAC;QAEvF,oEAAoE;QACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAqD,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC9F,IAAI,EAAE,GAAG,CAAC,IAAI,KAAK,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;gBAC9E,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC,CAAC;YACJ,QAAQ,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;QACpC,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,MAAW;QAChC,MAAM,OAAO,GAAkB;YAC7B,WAAW;YACX,UAAU;YACV,aAAa;YACb,eAAe;YACf,WAAW;YACX,uBAAuB;YACvB,qBAAqB;SACtB,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,mDAAmD;QACnD,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,aAAa;QACnB,OAAO;YACL,MAAM,EAAE,WAAW;YACnB,cAAc,EAAE,KAAK;YACrB,YAAY,EAAE,IAAI;SACnB,CAAC;IACJ,CAAC;CACF;AAjID,sCAiIC"}
1
+ {"version":3,"file":"IntentService.js","sourceRoot":"","sources":["../../../src/services/IntentService.ts"],"names":[],"mappings":";;;;;;AAAA,oDAA4B;AAE5B,8DAAsC;AAgCtC,MAAa,aAAa;IAMxB,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,IAAI,gBAAM,CAAC;YACvB,MAAM,EAAE,MAAM,CAAC,YAAY;SAC5B,CAAC,CAAC;QACH,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,aAAa,CAAC;QAC3C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,cAAc,CAAC;QAC5D,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,IAAI,mBAAmB,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,KAA0B;QACvC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAEjD,gBAAM,CAAC,IAAI,CAAC,yCAAyC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YAEnE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBAC3D,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,GAAG;gBAChB,UAAU,EAAE,GAAG;gBACf,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;YAE3D,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;YAC9B,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAEtC,OAAO;gBACL,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC1C,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,SAAS;gBACtC,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC;gBAC/C,YAAY,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI;aAC3C,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,gBAAM,CAAC,IAAI,CAAC,4DAA4D,EAAE,KAAK,CAAC,CAAC;YACjF,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;QAC9B,CAAC;IACH,CAAC;IAEO,qBAAqB,CAAC,KAA0B;QACtD,MAAM,OAAO,GAAG,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC;QAChD,sFAAsF;QACtF,yDAAyD;QAEzD,MAAM,aAAa,GAAuD;YACxE,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,oCAAoC,IAAI,CAAC,aAAa,0BAA0B,IAAI,CAAC,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oDA0ChE;SAC/C,CAAC;QAEF,MAAM,QAAQ,GAAyD,CAAC,aAAa,CAAC,CAAC;QAEvF,oEAAoE;QACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAqD,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC9F,IAAI,EAAE,GAAG,CAAC,IAAI,KAAK,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;gBAC9E,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC,CAAC;YACJ,QAAQ,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;QACpC,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,MAAW;QAChC,MAAM,OAAO,GAAkB;YAC7B,WAAW;YACX,UAAU;YACV,gBAAgB;YAChB,aAAa;YACb,eAAe;YACf,WAAW;YACX,uBAAuB;YACvB,qBAAqB;SACtB,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,mDAAmD;QACnD,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,aAAa;QACnB,OAAO;YACL,MAAM,EAAE,WAAW;YACnB,cAAc,EAAE,KAAK;YACrB,YAAY,EAAE,IAAI;SACnB,CAAC;IACJ,CAAC;CACF;AApJD,sCAoJC"}
@@ -1,4 +1,6 @@
1
1
  import { SupabaseClient } from '@supabase/supabase-js';
2
+ export declare const EMBEDDING_MODEL = "text-embedding-3-large";
3
+ export declare const EMBEDDING_DIMENSIONS = 3072;
2
4
  interface KnowledgeBaseConfig {
3
5
  supabase: SupabaseClient;
4
6
  tableName?: string;
@@ -19,8 +21,6 @@ interface KnowledgeItem {
19
21
  }
20
22
  interface SearchOptions {
21
23
  limit?: number;
22
- threshold?: number;
23
- type?: 'semantic' | 'keyword' | 'hybrid';
24
24
  company_id?: number;
25
25
  }
26
26
  interface SearchResult {
@@ -35,6 +35,7 @@ interface SearchResult {
35
35
  export declare class KnowledgeBaseService {
36
36
  private supabase;
37
37
  private tableName;
38
+ private adjacentChunkSize;
38
39
  constructor(config: KnowledgeBaseConfig);
39
40
  createItem(item: {
40
41
  parent_id?: string;
@@ -62,11 +63,25 @@ export declare class KnowledgeBaseService {
62
63
  }>;
63
64
  updateItem(itemId: string, updates: Partial<KnowledgeItem>): Promise<boolean>;
64
65
  deleteItem(itemId: string): Promise<boolean>;
66
+ /**
67
+ * Search with top-k + adjacent chunk retrieval strategy
68
+ */
65
69
  search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
66
- private semanticSearch;
67
- private cosineSimilarity;
68
- private keywordSearch;
70
+ /**
71
+ * Top-k semantic search (no threshold)
72
+ */
73
+ private topKSemanticSearch;
74
+ /**
75
+ * Fetch adjacent chunks (±N) for all matched chunks in ONE query
76
+ */
77
+ private fetchAdjacentChunks;
78
+ /**
79
+ * Merge continuous chunk sequences by document
80
+ */
81
+ private mergeAdjacentChunks;
69
82
  private generateEmbedding;
83
+ private createChunksForDocument;
84
+ private splitIntoChunks;
70
85
  }
71
86
  export {};
72
87
  //# sourceMappingURL=KnowledgeBaseService.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"KnowledgeBaseService.d.ts","sourceRoot":"","sources":["../../../src/services/KnowledgeBaseService.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAGvD,UAAU,mBAAmB;IAC3B,QAAQ,EAAE,cAAc,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,aAAa;IACrB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,aAAa;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IACzC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,YAAY;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAS;gBAEd,MAAM,EAAE,mBAAmB;IAKjC,UAAU,CAAC,IAAI,EAAE;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GAAG,OAAO,CAAC,MAAM,CAAC;IAmEb,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC;IAiDtD,SAAS,CAAC,OAAO,GAAE;QACvB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;KACZ,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,aAAa,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAkFrD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;IAoC7E,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAe5C,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;YAoCnE,cAAc;IA0D5B,OAAO,CAAC,gBAAgB;YAiCV,aAAa;YAyCb,iBAAiB;CAiEhC"}
1
+ {"version":3,"file":"KnowledgeBaseService.d.ts","sourceRoot":"","sources":["../../../src/services/KnowledgeBaseService.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAIvD,eAAO,MAAM,eAAe,2BAA2B,CAAC;AACxD,eAAO,MAAM,oBAAoB,OAAO,CAAC;AAEzC,UAAU,mBAAmB;IAC3B,QAAQ,EAAE,cAAc,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,aAAa;IACrB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,aAAa;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,YAAY;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAeD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,iBAAiB,CAAa;gBAE1B,MAAM,EAAE,mBAAmB;IAKjC,UAAU,CAAC,IAAI,EAAE;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GAAG,OAAO,CAAC,MAAM,CAAC;IA8Db,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC;IAiDtD,SAAS,CAAC,OAAO,GAAE;QACvB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,MAAM,CAAC,EAAE,MAAM,CAAC;KACZ,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,aAAa,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAkFrD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;IAoC7E,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAelD;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAuCjF;;OAEG;YACW,kBAAkB;IAsBhC;;OAEG;YACW,mBAAmB;IA4FjC;;OAEG;IACH,OAAO,CAAC,mBAAmB;YAkDb,iBAAiB;YAkEjB,uBAAuB;IAoCrC,OAAO,CAAC,eAAe;CAmBxB"}
@@ -3,10 +3,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.KnowledgeBaseService = void 0;
6
+ exports.KnowledgeBaseService = exports.EMBEDDING_DIMENSIONS = exports.EMBEDDING_MODEL = void 0;
7
7
  const logger_1 = __importDefault(require("../config/logger"));
8
+ // Embedding model configuration
9
+ exports.EMBEDDING_MODEL = 'text-embedding-3-large';
10
+ exports.EMBEDDING_DIMENSIONS = 3072;
8
11
  class KnowledgeBaseService {
9
12
  constructor(config) {
13
+ this.adjacentChunkSize = 2; // Fetch ±2 chunks
10
14
  this.supabase = config.supabase;
11
15
  this.tableName = config.tableName || 'vezlo_knowledge_items';
12
16
  }
@@ -46,26 +50,19 @@ class KnowledgeBaseService {
46
50
  created_at: new Date().toISOString(),
47
51
  updated_at: new Date().toISOString()
48
52
  };
49
- // Generate embedding for content-based items
50
- if (item.content && (item.type === 'document' || item.type === 'file')) {
51
- console.log('Attempting to generate embedding for content:', item.content.substring(0, 100) + '...');
52
- const embedding = await this.generateEmbedding(item.content);
53
- if (embedding) {
54
- console.log('Embedding generated successfully, length:', embedding.length);
55
- insertData.embedding = embedding;
56
- insertData.processed_at = new Date().toISOString();
57
- }
58
- else {
59
- console.log('Embedding generation returned null');
60
- }
61
- }
53
+ // Insert parent document (without embedding)
62
54
  const { data, error } = await this.supabase
63
55
  .from(this.tableName)
64
56
  .insert(insertData)
65
- .select('uuid')
57
+ .select('id, uuid')
66
58
  .single();
67
59
  if (error)
68
60
  throw new Error(`Failed to create knowledge item: ${error.message}`);
61
+ // Create chunks with embeddings for content-based items
62
+ if (item.content && (item.type === 'document' || item.type === 'file')) {
63
+ console.log('Creating chunks for content...');
64
+ await this.createChunksForDocument(data.id, item.content, item.title);
65
+ }
69
66
  return data.uuid;
70
67
  }
71
68
  catch (error) {
@@ -246,150 +243,175 @@ class KnowledgeBaseService {
246
243
  throw new Error(`Failed to delete knowledge item: ${error instanceof Error ? error.message : 'Unknown error'}`);
247
244
  }
248
245
  }
246
+ /**
247
+ * Search with top-k + adjacent chunk retrieval strategy
248
+ */
249
249
  async search(query, options = {}) {
250
250
  try {
251
- const limit = options.limit || 5;
252
- // Balanced precision/recall (0.5 is industry standard)
253
- const threshold = options.threshold || 0.5;
254
- const type = options.type || 'semantic'; // Modern RAG best practice: semantic-first
255
- // Reduced logging - only essential info
256
- logger_1.default.info(`🔎 Search: type=${type}, threshold=${threshold}, limit=${limit}, companyId=${options.company_id ?? 'all'}`);
257
- if (type === 'semantic') {
258
- return await this.semanticSearch(query, limit, threshold, options.company_id);
259
- }
260
- else if (type === 'keyword') {
261
- return await this.keywordSearch(query, limit, options.company_id);
262
- }
263
- else {
264
- // Hybrid search - combine both approaches
265
- const semanticResults = await this.semanticSearch(query, Math.ceil(limit / 2), threshold, options.company_id);
266
- const keywordResults = await this.keywordSearch(query, Math.ceil(limit / 2), options.company_id);
267
- // Merge and deduplicate results
268
- const combined = [...semanticResults, ...keywordResults];
269
- const unique = combined.filter((item, index, self) => index === self.findIndex(t => t.id === item.id));
270
- logger_1.default.info(`📊 Hybrid: ${semanticResults.length} semantic + ${keywordResults.length} keyword = ${unique.length} total`);
271
- return unique.slice(0, limit);
272
- }
273
- }
274
- catch (error) {
275
- console.error('Search error:', error);
276
- throw new Error(`Failed to search knowledge items: ${error instanceof Error ? error.message : 'Unknown error'}`);
277
- }
278
- }
279
- async semanticSearch(query, limit, threshold, companyId) {
280
- try {
251
+ const topK = options.limit || 5;
252
+ const companyId = options.company_id;
253
+ logger_1.default.info(`🔎 Search: top-k=${topK}, adjacent=±${this.adjacentChunkSize}, companyId=${companyId ?? 'all'}`);
254
+ // Step 1: Generate query embedding
281
255
  const queryEmbedding = await this.generateEmbedding(query);
282
256
  if (!queryEmbedding) {
283
257
  logger_1.default.error('Failed to generate query embedding');
284
258
  return [];
285
259
  }
286
- // Use optimized RPC function for vector search
287
- // This uses pgvector's <=> operator directly in the database for efficient
288
- // nearest-neighbor search, avoiding the need to fetch all records and calculate
289
- // similarity in Node.js
290
- const { data, error } = await this.supabase.rpc('match_vezlo_knowledge', {
291
- query_embedding: queryEmbedding,
292
- match_threshold: threshold,
293
- match_count: limit,
294
- filter_company_id: companyId !== undefined ? companyId : null
295
- });
296
- if (error) {
297
- logger_1.default.error('RPC vector search error:', error);
298
- throw new Error(`Semantic search failed: ${error.message}`);
299
- }
300
- if (!data || data.length === 0) {
301
- logger_1.default.warn(`⚠️ No items found in DB for companyId=${companyId ?? 'all'}`);
260
+ // Step 2: Initial top-k semantic search (no threshold)
261
+ const initialChunks = await this.topKSemanticSearch(queryEmbedding, topK, companyId);
262
+ if (initialChunks.length === 0) {
263
+ logger_1.default.warn('⚠️ No chunks found in top-k search');
302
264
  return [];
303
265
  }
304
- logger_1.default.info(`📦 RPC returned ${data.length} items`);
305
- // Transform RPC results to SearchResult format
306
- const results = data.map((item) => ({
307
- id: item.uuid,
308
- title: item.title,
309
- description: item.description,
310
- content: item.content,
311
- type: item.type,
312
- score: item.similarity,
313
- metadata: item.metadata
314
- }));
315
- // Log results summary
316
- if (results.length > 0) {
317
- const topResults = results.slice(0, 3);
318
- const topScores = topResults.map(r => `${r.title}:${r.score.toFixed(2)}`).join(', ');
319
- logger_1.default.info(`✅ Found ${results.length} results above threshold (top: ${topScores})`);
320
- }
321
- return results;
266
+ logger_1.default.info(`📦 Found ${initialChunks.length} initial chunks (scores: ${initialChunks.map(c => c.similarity.toFixed(2)).join(', ')})`);
267
+ // Step 3: Fetch adjacent chunks for each matched chunk
268
+ const enrichedChunks = await this.fetchAdjacentChunks(initialChunks);
269
+ logger_1.default.info(`📚 Enriched to ${enrichedChunks.length} total chunks (with adjacent context)`);
270
+ // Step 4: Group by document and merge continuous sequences
271
+ const mergedResults = this.mergeAdjacentChunks(enrichedChunks, initialChunks);
272
+ logger_1.default.info(`✅ Merged into ${mergedResults.length} contextual results`);
273
+ return mergedResults;
322
274
  }
323
275
  catch (error) {
324
- logger_1.default.error('Semantic search error:', error);
276
+ logger_1.default.error('Search error:', error);
325
277
  return [];
326
278
  }
327
279
  }
328
- // Add cosine similarity function (from original implementation)
329
- cosineSimilarity(a, b) {
330
- try {
331
- // Validate inputs
332
- if (!Array.isArray(a) || !Array.isArray(b)) {
333
- console.error('Cosine similarity: inputs are not arrays', typeof a, typeof b);
334
- return 0;
335
- }
336
- if (a.length !== b.length) {
337
- console.error('Cosine similarity: arrays have different lengths', a.length, b.length);
338
- return 0;
339
- }
340
- if (a.length === 0) {
341
- console.error('Cosine similarity: arrays are empty');
342
- return 0;
343
- }
344
- const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
345
- const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
346
- const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
347
- if (magnitudeA === 0 || magnitudeB === 0) {
348
- return 0;
349
- }
350
- return dotProduct / (magnitudeA * magnitudeB);
351
- }
352
- catch (error) {
353
- console.error('Error in cosine similarity calculation:', error);
354
- return 0;
280
+ /**
281
+ * Top-k semantic search (no threshold)
282
+ */
283
+ async topKSemanticSearch(queryEmbedding, topK, companyId) {
284
+ const rpcParams = {
285
+ query_embedding: JSON.stringify(queryEmbedding),
286
+ match_threshold: 0.0, // No threshold - pure top-k
287
+ match_count: topK,
288
+ filter_company_id: companyId !== undefined ? companyId : null
289
+ };
290
+ const { data, error } = await this.supabase.rpc('vezlo_match_knowledge_chunks', rpcParams);
291
+ if (error) {
292
+ logger_1.default.error('RPC top-k search error:', error);
293
+ throw new Error(`Top-k search failed: ${error.message}`);
355
294
  }
295
+ return data || [];
356
296
  }
357
- async keywordSearch(query, limit, companyId) {
358
- try {
359
- let dbQuery = this.supabase
360
- .from(this.tableName)
361
- .select(`
297
+ /**
298
+ * Fetch adjacent chunks (±N) for all matched chunks in ONE query
299
+ */
300
+ async fetchAdjacentChunks(matchedChunks) {
301
+ if (matchedChunks.length === 0) {
302
+ return [];
303
+ }
304
+ // Build similarity lookup map for matched chunks
305
+ const similarityMap = new Map();
306
+ matchedChunks.forEach(chunk => {
307
+ similarityMap.set(`${chunk.document_id}-${chunk.chunk_index}`, chunk.similarity);
308
+ });
309
+ // Calculate all adjacent ranges and build OR conditions
310
+ const ranges = [];
311
+ matchedChunks.forEach(chunk => {
312
+ const minIndex = Math.max(0, chunk.chunk_index - this.adjacentChunkSize);
313
+ const maxIndex = chunk.chunk_index + this.adjacentChunkSize;
314
+ ranges.push({
315
+ documentId: chunk.document_id,
316
+ minIndex,
317
+ maxIndex
318
+ });
319
+ });
320
+ // Fetch ALL adjacent chunks in ONE query using OR conditions
321
+ let query = this.supabase
322
+ .from('vezlo_knowledge_chunks')
323
+ .select(`
324
+ id,
325
+ document_id,
326
+ chunk_text,
327
+ chunk_index,
328
+ vezlo_knowledge_items!inner(
362
329
  uuid,
363
330
  title,
364
331
  description,
365
- content,
366
332
  type,
367
333
  metadata
368
- `)
369
- .textSearch('title,description,content', query, {
370
- type: 'websearch',
371
- config: 'english'
372
- })
373
- .limit(limit);
374
- if (companyId) {
375
- dbQuery = dbQuery.eq('company_id', companyId);
376
- }
377
- const { data, error } = await dbQuery;
378
- if (error)
379
- throw new Error(`Keyword search failed: ${error.message}`);
380
- return data.map(item => ({
381
- id: item.uuid,
382
- title: item.title,
383
- description: item.description,
384
- content: item.content,
385
- type: item.type,
386
- score: 0.8, // Default score for keyword matches
387
- metadata: item.metadata
388
- }));
334
+ )
335
+ `);
336
+ // Build OR filter: (doc=1 AND idx>=10 AND idx<=14) OR (doc=2 AND idx>=5 AND idx<=9) OR ...
337
+ const orConditions = ranges.map(r => `and(document_id.eq.${r.documentId},chunk_index.gte.${r.minIndex},chunk_index.lte.${r.maxIndex})`).join(',');
338
+ query = query.or(orConditions);
339
+ query = query.order('document_id', { ascending: true }).order('chunk_index', { ascending: true });
340
+ const { data, error } = await query;
341
+ if (error) {
342
+ logger_1.default.error('Failed to fetch adjacent chunks:', error);
343
+ return matchedChunks; // Fallback to original chunks on error
389
344
  }
390
- catch (error) {
391
- return [];
345
+ if (!data || data.length === 0) {
346
+ return matchedChunks;
392
347
  }
348
+ // Transform and assign similarity scores
349
+ const allChunks = data.map((row) => {
350
+ const doc = row.vezlo_knowledge_items;
351
+ const key = `${row.document_id}-${row.chunk_index}`;
352
+ const similarity = similarityMap.get(key) || 0; // Use original score if matched, else 0
353
+ return {
354
+ chunk_id: row.id,
355
+ document_id: row.document_id,
356
+ document_uuid: doc.uuid,
357
+ document_title: doc.title,
358
+ document_description: doc.description,
359
+ document_type: doc.type,
360
+ document_metadata: doc.metadata,
361
+ chunk_text: row.chunk_text,
362
+ chunk_index: row.chunk_index,
363
+ similarity
364
+ };
365
+ });
366
+ // Deduplicate by chunk_id
367
+ const uniqueChunks = new Map();
368
+ allChunks.forEach(chunk => {
369
+ if (!uniqueChunks.has(chunk.chunk_id)) {
370
+ uniqueChunks.set(chunk.chunk_id, chunk);
371
+ }
372
+ });
373
+ return Array.from(uniqueChunks.values());
374
+ }
375
+ /**
376
+ * Merge continuous chunk sequences by document
377
+ */
378
+ mergeAdjacentChunks(allChunks, originalMatches) {
379
+ // Group chunks by document
380
+ const byDocument = new Map();
381
+ allChunks.forEach(chunk => {
382
+ if (!byDocument.has(chunk.document_id)) {
383
+ byDocument.set(chunk.document_id, []);
384
+ }
385
+ byDocument.get(chunk.document_id).push(chunk);
386
+ });
387
+ // Merge continuous sequences within each document
388
+ const results = [];
389
+ byDocument.forEach((chunks, documentId) => {
390
+ // Sort by chunk_index
391
+ chunks.sort((a, b) => a.chunk_index - b.chunk_index);
392
+ // Find the best similarity score for this document (from original matches)
393
+ const bestMatch = originalMatches.find(m => m.document_id === documentId);
394
+ const score = bestMatch?.similarity || 0;
395
+ // Merge all chunks into single content (preserving order)
396
+ const mergedContent = chunks.map(c => c.chunk_text).join('\n\n');
397
+ // Use first chunk's metadata for result
398
+ const firstChunk = chunks[0];
399
+ results.push({
400
+ id: firstChunk.document_uuid,
401
+ title: firstChunk.document_title,
402
+ description: firstChunk.document_description,
403
+ content: mergedContent,
404
+ type: firstChunk.document_type,
405
+ score,
406
+ metadata: {
407
+ ...firstChunk.document_metadata,
408
+ chunk_count: chunks.length,
409
+ chunk_range: `${chunks[0].chunk_index}-${chunks[chunks.length - 1].chunk_index}`
410
+ }
411
+ });
412
+ });
413
+ // Sort by score (highest first)
414
+ return results.sort((a, b) => b.score - a.score);
393
415
  }
394
416
  async generateEmbedding(text) {
395
417
  const maxRetries = 3;
@@ -411,7 +433,7 @@ class KnowledgeBaseService {
411
433
  'Content-Type': 'application/json'
412
434
  },
413
435
  body: JSON.stringify({
414
- model: 'text-embedding-ada-002',
436
+ model: exports.EMBEDDING_MODEL,
415
437
  input: text.substring(0, 8000) // Limit text length to avoid token limits
416
438
  }),
417
439
  signal: controller.signal
@@ -442,6 +464,50 @@ class KnowledgeBaseService {
442
464
  }
443
465
  return null;
444
466
  }
467
+ async createChunksForDocument(documentId, content, documentTitle) {
468
+ const chunkSize = parseInt(process.env.CHUNK_SIZE || '1000');
469
+ const chunkOverlap = parseInt(process.env.CHUNK_OVERLAP || '200');
470
+ const chunks = this.splitIntoChunks(content, chunkSize, chunkOverlap);
471
+ const processedAt = new Date().toISOString();
472
+ console.log(`Creating ${chunks.length} chunks for document...`);
473
+ for (let i = 0; i < chunks.length; i++) {
474
+ const chunk = chunks[i];
475
+ // Generate embedding from chunk text
476
+ const embedding = await this.generateEmbedding(chunk.text);
477
+ if (embedding) {
478
+ const { data, error } = await this.supabase.rpc('vezlo_insert_knowledge_chunk', {
479
+ p_document_id: documentId,
480
+ p_chunk_text: chunk.text,
481
+ p_chunk_index: i,
482
+ p_start_char: chunk.startChar,
483
+ p_end_char: chunk.endChar,
484
+ p_token_count: Math.ceil(chunk.text.length / 4),
485
+ p_embedding: JSON.stringify(embedding),
486
+ p_processed_at: processedAt
487
+ });
488
+ if (error) {
489
+ console.error(`❌ Failed to insert chunk ${i}:`, error);
490
+ throw new Error(`Failed to insert chunk: ${error.message}`);
491
+ }
492
+ console.log(`✓ Inserted chunk ${i} (ID: ${data})`);
493
+ }
494
+ }
495
+ }
496
+ splitIntoChunks(text, chunkSize, overlap) {
497
+ const chunks = [];
498
+ let startChar = 0;
499
+ while (startChar < text.length) {
500
+ const endChar = Math.min(startChar + chunkSize, text.length);
501
+ const chunkText = text.substring(startChar, endChar);
502
+ chunks.push({
503
+ text: chunkText,
504
+ startChar: startChar,
505
+ endChar: endChar
506
+ });
507
+ startChar += chunkSize - overlap;
508
+ }
509
+ return chunks;
510
+ }
445
511
  }
446
512
  exports.KnowledgeBaseService = KnowledgeBaseService;
447
513
  //# sourceMappingURL=KnowledgeBaseService.js.map