ambivo-agents 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,935 @@
1
+ # ambivo_agents/agents/knowledge_base.py
2
+ """
3
+ LLM-Aware Knowledge Base Agent with conversation history and intelligent intent detection
4
+ Updated for consistency with other agents
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import uuid
10
+ import time
11
+ import tempfile
12
+ import requests
13
+ from pathlib import Path
14
+ from typing import Dict, List, Any, Optional
15
+ from datetime import datetime
16
+
17
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
18
+ from ..config.loader import load_config, get_config_section
19
+ from ..core.history import KnowledgeBaseAgentHistoryMixin, ContextType
20
+
21
+
22
+ class QdrantServiceAdapter:
23
+ """Adapter for Knowledge Base functionality using YAML configuration"""
24
+
25
+ def __init__(self):
26
+ # Load from YAML configuration
27
+ config = load_config()
28
+ kb_config = get_config_section('knowledge_base', config)
29
+
30
+ self.qdrant_url = kb_config.get('qdrant_url')
31
+ self.qdrant_api_key = kb_config.get('qdrant_api_key')
32
+
33
+ if not self.qdrant_url:
34
+ raise ValueError("qdrant_url is required in knowledge_base configuration")
35
+
36
+ # Initialize Qdrant client
37
+ try:
38
+ import qdrant_client
39
+ if self.qdrant_api_key:
40
+ self.client = qdrant_client.QdrantClient(
41
+ url=self.qdrant_url,
42
+ api_key=self.qdrant_api_key
43
+ )
44
+ else:
45
+ self.client = qdrant_client.QdrantClient(url=self.qdrant_url)
46
+
47
+ except ImportError:
48
+ raise ImportError("qdrant-client package required for Knowledge Base functionality")
49
+ except Exception as e:
50
+ raise ConnectionError(f"Failed to connect to Qdrant: {e}")
51
+
52
+ def documents_from_text(self, input_text: str) -> list:
53
+ """Convert text to documents format"""
54
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
55
+ from llama_index.core.readers import Document as LIDoc
56
+
57
+ # Load chunk settings from config
58
+ config = load_config()
59
+ kb_config = get_config_section('knowledge_base', config)
60
+
61
+ chunk_size = kb_config.get('chunk_size', 1024)
62
+ chunk_overlap = kb_config.get('chunk_overlap', 20)
63
+
64
+ text_splitter = RecursiveCharacterTextSplitter(
65
+ chunk_size=chunk_size,
66
+ chunk_overlap=chunk_overlap
67
+ )
68
+ splitted_documents = text_splitter.create_documents(texts=[input_text])
69
+
70
+ # Convert to llama-index format
71
+ docs = [LIDoc.from_langchain_format(doc) for doc in splitted_documents]
72
+ return docs
73
+
74
+ def persist_embeddings(self, kb_name: str, doc_path: str = None,
75
+ documents=None, custom_meta: Dict[str, Any] = None) -> int:
76
+ """Persist embeddings to Qdrant"""
77
+ try:
78
+ config = load_config()
79
+ kb_config = get_config_section('knowledge_base', config)
80
+
81
+ if not documents and doc_path:
82
+ # Load document from file
83
+ from langchain_unstructured import UnstructuredLoader
84
+ from llama_index.core.readers import Document as LIDoc
85
+
86
+ loader = UnstructuredLoader(doc_path)
87
+ lang_docs = loader.load()
88
+ documents = [LIDoc.from_langchain_format(doc) for doc in lang_docs]
89
+
90
+ if not documents:
91
+ return 2 # Error
92
+
93
+ # Add custom metadata
94
+ if custom_meta:
95
+ for doc in documents:
96
+ if not hasattr(doc, 'metadata'):
97
+ doc.metadata = {}
98
+ doc.metadata.update(custom_meta)
99
+
100
+ # Create collection name with prefix from config
101
+ collection_prefix = kb_config.get('default_collection_prefix', 'kb')
102
+ collection_name = f"{collection_prefix}_{kb_name}"
103
+
104
+ # Create vector store and index
105
+ from llama_index.core import VectorStoreIndex, StorageContext
106
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
107
+
108
+ vector_store = QdrantVectorStore(
109
+ client=self.client,
110
+ collection_name=collection_name
111
+ )
112
+
113
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
114
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
115
+
116
+ return 1 # Success
117
+
118
+ except Exception as e:
119
+ print(f"Error persisting embeddings: {e}")
120
+ return 2 # Error
121
+
122
+ def conduct_query(self, query: str, kb_name: str, additional_prompt: str = None,
123
+ question_type: str = "free-text", option_list=None) -> tuple:
124
+ """Query the knowledge base"""
125
+ try:
126
+ config = load_config()
127
+ kb_config = get_config_section('knowledge_base', config)
128
+
129
+ collection_prefix = kb_config.get('default_collection_prefix', 'kb')
130
+ collection_name = f"{collection_prefix}_{kb_name}"
131
+
132
+ similarity_top_k = kb_config.get('similarity_top_k', 5)
133
+
134
+ # Create vector store and query engine
135
+ from llama_index.core import VectorStoreIndex
136
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
137
+ from llama_index.core.indices.vector_store import VectorIndexRetriever
138
+ from llama_index.core.query_engine import RetrieverQueryEngine
139
+ from llama_index.core import get_response_synthesizer
140
+
141
+ vector_store = QdrantVectorStore(
142
+ client=self.client,
143
+ collection_name=collection_name
144
+ )
145
+
146
+ index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
147
+ retriever = VectorIndexRetriever(similarity_top_k=similarity_top_k, index=index)
148
+ response_synthesizer = get_response_synthesizer()
149
+ query_engine = RetrieverQueryEngine(
150
+ retriever=retriever,
151
+ response_synthesizer=response_synthesizer
152
+ )
153
+
154
+ # Execute query
155
+ response = query_engine.query(query)
156
+ answer = str(response)
157
+ source_list = []
158
+
159
+ if hasattr(response, 'source_nodes') and response.source_nodes:
160
+ for node in response.source_nodes:
161
+ source_info = {
162
+ "text": node.node.get_text()[:200] + "...",
163
+ "score": getattr(node, 'score', 0.0),
164
+ "metadata": getattr(node.node, 'metadata', {})
165
+ }
166
+ source_list.append(source_info)
167
+
168
+ ans_dict_list = [{
169
+ "answer": answer,
170
+ "source": f"Found {len(source_list)} relevant sources",
171
+ "source_list": source_list
172
+ }]
173
+
174
+ return answer, ans_dict_list
175
+
176
+ except Exception as e:
177
+ error_msg = f"Query error: {str(e)}"
178
+ return error_msg, [{"answer": error_msg, "source": "", "source_list": []}]
179
+
180
+
181
+ class KnowledgeBaseAgent(BaseAgent, KnowledgeBaseAgentHistoryMixin):
182
+ """LLM-Aware Knowledge Base Agent with conversation context and intelligent routing"""
183
+
184
+ def __init__(self, agent_id: str = None, memory_manager=None, llm_service=None, **kwargs):
185
+ if agent_id is None:
186
+ agent_id = f"kb_{str(uuid.uuid4())[:8]}"
187
+
188
+ super().__init__(
189
+ agent_id=agent_id,
190
+ role=AgentRole.RESEARCHER,
191
+ memory_manager=memory_manager,
192
+ llm_service=llm_service,
193
+ name="Knowledge Base Agent",
194
+ description="LLM-aware knowledge base agent with conversation history",
195
+ **kwargs
196
+ )
197
+
198
+ # Initialize history mixin
199
+ self.setup_history_mixin()
200
+
201
+ # Initialize Qdrant service
202
+ try:
203
+ self.qdrant_service = QdrantServiceAdapter()
204
+ except Exception as e:
205
+ raise RuntimeError(f"Failed to initialize Knowledge Base service: {e}")
206
+
207
+ # Add knowledge base tools
208
+ self._add_knowledge_base_tools()
209
+
210
+ async def _llm_analyze_kb_intent(self, user_message: str, conversation_context: str = "") -> Dict[str, Any]:
211
+ """Use LLM to analyze knowledge base related intent"""
212
+ if not self.llm_service:
213
+ return self._keyword_based_kb_analysis(user_message)
214
+
215
+ prompt = f"""
216
+ Analyze this user message in the context of a knowledge base conversation and extract:
217
+ 1. Primary intent (ingest_document, ingest_text, query_kb, create_kb, manage_kb, help_request)
218
+ 2. Knowledge base name (if mentioned or inferrable)
219
+ 3. Document/file references (file paths, document names)
220
+ 4. Query content (if querying)
221
+ 5. Context references (referring to previous KB operations)
222
+ 6. Operation specifics (metadata, query type, etc.)
223
+
224
+ Conversation Context:
225
+ {conversation_context}
226
+
227
+ Current User Message: {user_message}
228
+
229
+ Respond in JSON format:
230
+ {{
231
+ "primary_intent": "ingest_document|ingest_text|query_kb|create_kb|manage_kb|help_request",
232
+ "kb_name": "knowledge_base_name or null",
233
+ "document_references": ["file1.pdf", "doc2.txt"],
234
+ "query_content": "the actual question to ask" or null,
235
+ "uses_context_reference": true/false,
236
+ "context_type": "previous_kb|previous_document|previous_query",
237
+ "operation_details": {{
238
+ "query_type": "free-text|multi-select|single-select|yes-no",
239
+ "custom_metadata": {{}},
240
+ "source_type": "file|url|text"
241
+ }},
242
+ "confidence": 0.0-1.0
243
+ }}
244
+ """
245
+
246
+ try:
247
+ response = await self.llm_service.generate_response(prompt)
248
+ import re
249
+ json_match = re.search(r'\{.*\}', response, re.DOTALL)
250
+ if json_match:
251
+ return json.loads(json_match.group())
252
+ else:
253
+ return self._extract_kb_intent_from_llm_response(response, user_message)
254
+ except Exception as e:
255
+ return self._keyword_based_kb_analysis(user_message)
256
+
257
+ def _keyword_based_kb_analysis(self, user_message: str) -> Dict[str, Any]:
258
+ """Fallback keyword-based KB intent analysis"""
259
+ content_lower = user_message.lower()
260
+
261
+ # Determine intent
262
+ if any(word in content_lower for word in ['ingest', 'upload', 'add document', 'import', 'load']):
263
+ intent = 'ingest_document'
264
+ elif any(word in content_lower for word in ['add text', 'ingest text', 'text to']):
265
+ intent = 'ingest_text'
266
+ elif any(word in content_lower for word in ['query', 'search', 'find', 'ask', 'what', 'how', 'where']):
267
+ intent = 'query_kb'
268
+ elif any(word in content_lower for word in ['create', 'new kb', 'make kb', 'setup']):
269
+ intent = 'create_kb'
270
+ elif any(word in content_lower for word in ['help', 'what can', 'how to']):
271
+ intent = 'help_request'
272
+ else:
273
+ intent = 'help_request'
274
+
275
+ # Extract KB names and documents
276
+ kb_names = self.extract_context_from_text(user_message, ContextType.KNOWLEDGE_BASE)
277
+ documents = self.extract_context_from_text(user_message, ContextType.DOCUMENT_NAME)
278
+ file_paths = self.extract_context_from_text(user_message, ContextType.FILE_PATH)
279
+ all_documents = documents + file_paths
280
+
281
+ # Extract query content
282
+ query_content = self._extract_query_from_kb_message(user_message) if intent == 'query_kb' else None
283
+
284
+ return {
285
+ "primary_intent": intent,
286
+ "kb_name": kb_names[0] if kb_names else None,
287
+ "document_references": all_documents,
288
+ "query_content": query_content,
289
+ "uses_context_reference": any(word in content_lower for word in ['this', 'that', 'it']),
290
+ "context_type": "previous_kb",
291
+ "operation_details": {
292
+ "query_type": "free-text",
293
+ "custom_metadata": {},
294
+ "source_type": "file"
295
+ },
296
+ "confidence": 0.7
297
+ }
298
+
299
+ async def process_message(self, message: AgentMessage, context: ExecutionContext = None) -> AgentMessage:
300
+ """Process message with LLM-based KB intent detection and history context"""
301
+ self.memory.store_message(message)
302
+
303
+ try:
304
+ user_message = message.content
305
+
306
+ # Update conversation state
307
+ self.update_conversation_state(user_message)
308
+
309
+ # Get conversation context for LLM analysis
310
+ conversation_context = self._get_kb_conversation_context_summary()
311
+
312
+ # Use LLM to analyze intent
313
+ intent_analysis = await self._llm_analyze_kb_intent(user_message, conversation_context)
314
+
315
+ # Route request based on LLM analysis
316
+ response_content = await self._route_kb_with_llm_analysis(intent_analysis, user_message, context)
317
+
318
+ response = self.create_response(
319
+ content=response_content,
320
+ recipient_id=message.sender_id,
321
+ session_id=message.session_id,
322
+ conversation_id=message.conversation_id
323
+ )
324
+
325
+ self.memory.store_message(response)
326
+ return response
327
+
328
+ except Exception as e:
329
+ error_response = self.create_response(
330
+ content=f"Knowledge Base Agent error: {str(e)}",
331
+ recipient_id=message.sender_id,
332
+ message_type=MessageType.ERROR,
333
+ session_id=message.session_id,
334
+ conversation_id=message.conversation_id
335
+ )
336
+ return error_response
337
+
338
+ def _get_kb_conversation_context_summary(self) -> str:
339
+ """Get KB conversation context summary"""
340
+ try:
341
+ recent_history = self.get_conversation_history_with_context(
342
+ limit=3,
343
+ context_types=[ContextType.KNOWLEDGE_BASE, ContextType.DOCUMENT_NAME]
344
+ )
345
+
346
+ context_summary = []
347
+ for msg in recent_history:
348
+ if msg.get('message_type') == 'user_input':
349
+ extracted_context = msg.get('extracted_context', {})
350
+ kb_names = extracted_context.get('knowledge_base', [])
351
+ docs = extracted_context.get('document_name', [])
352
+
353
+ if kb_names:
354
+ context_summary.append(f"Previous KB: {kb_names[0]}")
355
+ if docs:
356
+ context_summary.append(f"Previous document: {docs[0]}")
357
+
358
+ # Add current state
359
+ current_kb = self.get_current_knowledge_base()
360
+ if current_kb:
361
+ context_summary.append(f"Current KB: {current_kb}")
362
+
363
+ return "\n".join(context_summary) if context_summary else "No previous KB context"
364
+ except:
365
+ return "No previous KB context"
366
+
367
+ async def _route_kb_with_llm_analysis(self, intent_analysis: Dict[str, Any], user_message: str,
368
+ context: ExecutionContext) -> str:
369
+ """Route KB request based on LLM intent analysis"""
370
+
371
+ primary_intent = intent_analysis.get("primary_intent", "help_request")
372
+ kb_name = intent_analysis.get("kb_name")
373
+ documents = intent_analysis.get("document_references", [])
374
+ query_content = intent_analysis.get("query_content")
375
+ uses_context = intent_analysis.get("uses_context_reference", False)
376
+ operation_details = intent_analysis.get("operation_details", {})
377
+
378
+ # Resolve context references if needed
379
+ if uses_context:
380
+ kb_name = kb_name or self.get_current_knowledge_base()
381
+ if not documents:
382
+ recent_doc = self.get_recent_document()
383
+ if recent_doc:
384
+ documents = [recent_doc]
385
+
386
+ # Route based on intent
387
+ if primary_intent == "help_request":
388
+ return await self._handle_kb_help_request(user_message)
389
+ elif primary_intent == "ingest_document":
390
+ return await self._handle_document_ingestion(kb_name, documents, operation_details, user_message)
391
+ elif primary_intent == "ingest_text":
392
+ return await self._handle_text_ingestion(kb_name, user_message, operation_details)
393
+ elif primary_intent == "query_kb":
394
+ return await self._handle_kb_query(kb_name, query_content, operation_details)
395
+ elif primary_intent == "create_kb":
396
+ return await self._handle_kb_creation(kb_name, user_message)
397
+ elif primary_intent == "manage_kb":
398
+ return await self._handle_kb_management(kb_name, user_message)
399
+ else:
400
+ return await self._handle_kb_help_request(user_message)
401
+
402
+ async def _handle_document_ingestion(self, kb_name: str, documents: List[str], operation_details: Dict[str, Any],
403
+ user_message: str) -> str:
404
+ """Handle document ingestion with LLM analysis"""
405
+
406
+ # Resolve missing parameters
407
+ if not kb_name:
408
+ available_kbs = self.conversation_state.knowledge_bases
409
+ if available_kbs:
410
+ return f"I can ingest documents! Which knowledge base?\n\n" \
411
+ f"**Available KBs:**\n" + "\n".join([f"• {kb}" for kb in available_kbs]) + \
412
+ f"\n\nOr specify a new KB name."
413
+ else:
414
+ return "I can ingest documents into knowledge bases. Please specify:\n\n" \
415
+ "1. **Knowledge base name** (I'll create it if it doesn't exist)\n" \
416
+ "2. **Document path** or just tell me which document\n\n" \
417
+ "Example: 'Ingest research.pdf into ai_papers'"
418
+
419
+ if not documents:
420
+ return f"I'll ingest into the **{kb_name}** knowledge base. Which document would you like to add?\n\n" \
421
+ f"Please provide the document path or tell me the filename."
422
+
423
+ # Perform ingestion
424
+ document_path = documents[0]
425
+
426
+ try:
427
+ # Check if it's a URL or file path
428
+ if document_path.startswith('http'):
429
+ result = await self._ingest_web_content(kb_name, document_path)
430
+ operation_type = "Web content"
431
+ else:
432
+ result = await self._ingest_document(kb_name, document_path)
433
+ operation_type = "Document"
434
+
435
+ if result['success']:
436
+ return f"✅ **{operation_type} Ingestion Completed**\n\n" \
437
+ f"📄 **Source:** {document_path}\n" \
438
+ f"🗃️ **Knowledge Base:** {kb_name}\n" \
439
+ f"⏱️ **Status:** Successfully processed and indexed\n\n" \
440
+ f"You can now query this knowledge base! 🎉"
441
+ else:
442
+ return f"❌ **Ingestion failed:** {result['error']}"
443
+
444
+ except Exception as e:
445
+ return f"❌ **Error during ingestion:** {str(e)}"
446
+
447
+ async def _handle_text_ingestion(self, kb_name: str, user_message: str, operation_details: Dict[str, Any]) -> str:
448
+ """Handle text ingestion with LLM analysis"""
449
+
450
+ if not kb_name:
451
+ return "I can ingest text into knowledge bases. Please specify which knowledge base to use."
452
+
453
+ # Extract text content from message (after removing command parts)
454
+ text_content = self._extract_text_for_ingestion(user_message)
455
+
456
+ if not text_content:
457
+ return f"I'll add text to the **{kb_name}** knowledge base. What text would you like me to ingest?"
458
+
459
+ try:
460
+ result = await self._ingest_text(kb_name, text_content)
461
+
462
+ if result['success']:
463
+ preview = text_content[:100] + "..." if len(text_content) > 100 else text_content
464
+ return f"✅ **Text Ingestion Completed**\n\n" \
465
+ f"📝 **Text Preview:** {preview}\n" \
466
+ f"🗃️ **Knowledge Base:** {kb_name}\n" \
467
+ f"📊 **Length:** {len(text_content)} characters\n\n" \
468
+ f"Text successfully indexed! 🎉"
469
+ else:
470
+ return f"❌ **Text ingestion failed:** {result['error']}"
471
+
472
+ except Exception as e:
473
+ return f"❌ **Error during text ingestion:** {str(e)}"
474
+
475
+ async def _handle_kb_query(self, kb_name: str, query_content: str, operation_details: Dict[str, Any]) -> str:
476
+ """Handle KB queries with LLM analysis"""
477
+
478
+ # Resolve missing parameters
479
+ if not kb_name:
480
+ available_kbs = self.conversation_state.knowledge_bases
481
+ if available_kbs:
482
+ return f"I can query knowledge bases! Which one?\n\n" \
483
+ f"**Available KBs from our conversation:**\n" + \
484
+ "\n".join([f"• {kb}" for kb in available_kbs]) + \
485
+ f"\n\nExample: 'Query {available_kbs[0]}: {query_content or 'your question'}'"
486
+ else:
487
+ return "I can query knowledge bases, but I need to know which one to search.\n\n" \
488
+ "Please specify: `Query [kb_name]: [your question]`"
489
+
490
+ if not query_content:
491
+ return f"I'll search the **{kb_name}** knowledge base. What would you like me to find?"
492
+
493
+ try:
494
+ query_type = operation_details.get("query_type", "free-text")
495
+ result = await self._query_knowledge_base(kb_name, query_content, question_type=query_type)
496
+
497
+ if result['success']:
498
+ answer = result['answer']
499
+ source_count = len(result.get('source_details', []))
500
+
501
+ return f"🔍 **Query Results from {kb_name}**\n\n" \
502
+ f"**Question:** {query_content}\n\n" \
503
+ f"**Answer:**\n{answer}\n\n" \
504
+ f"📊 **Sources:** {source_count} relevant documents found"
505
+ else:
506
+ return f"❌ **Query failed:** {result['error']}"
507
+
508
+ except Exception as e:
509
+ return f"❌ **Error during query:** {str(e)}"
510
+
511
+ async def _handle_kb_creation(self, kb_name: str, user_message: str) -> str:
512
+ """Handle KB creation requests"""
513
+
514
+ if not kb_name:
515
+ return "I can create knowledge bases! What would you like to name the new knowledge base?\n\n" \
516
+ "Example: 'Create a knowledge base called research_papers'"
517
+
518
+ # KB creation is implicit when first document is ingested
519
+ return f"Great! I'll create the **{kb_name}** knowledge base when you add the first document.\n\n" \
520
+ f"To get started:\n" \
521
+ f"• `Ingest document.pdf into {kb_name}`\n" \
522
+ f"• `Add text to {kb_name}: [your text content]`\n" \
523
+ f"• `Ingest https://example.com into {kb_name}`"
524
+
525
+ async def _handle_kb_management(self, kb_name: str, user_message: str) -> str:
526
+ """Handle KB management requests"""
527
+
528
+ available_kbs = self.conversation_state.knowledge_bases
529
+
530
+ if not available_kbs:
531
+ return "No knowledge bases found in our conversation. Create one by ingesting your first document!"
532
+
533
+ response = f"📚 **Knowledge Base Management**\n\n"
534
+ response += f"**Available Knowledge Bases:**\n"
535
+ for kb in available_kbs:
536
+ response += f"• {kb}\n"
537
+
538
+ response += f"\n**Management Options:**\n"
539
+ response += f"• Query: `Query {available_kbs[0]}: your question`\n"
540
+ response += f"• Add docs: `Ingest file.pdf into {available_kbs[0]}`\n"
541
+ response += f"• Add text: `Add text to {available_kbs[0]}: content`\n"
542
+
543
+ return response
544
+
545
+ async def _handle_kb_help_request(self, user_message: str) -> str:
546
+ """Handle KB help requests with conversation context"""
547
+
548
+ state = self.get_conversation_state()
549
+
550
+ response = ("I'm your Knowledge Base Agent! I can help you with:\n\n"
551
+ "📄 **Document Management**\n"
552
+ "- Ingest PDFs, DOCX, TXT, MD files\n"
553
+ "- Process web content from URLs\n"
554
+ "- Add text content directly\n\n"
555
+ "🔍 **Intelligent Search**\n"
556
+ "- Natural language queries\n"
557
+ "- Semantic similarity search\n"
558
+ "- Source attribution\n\n"
559
+ "🧠 **Smart Context Features**\n"
560
+ "- Remembers knowledge bases from conversation\n"
561
+ "- Understands 'that KB' and 'this document'\n"
562
+ "- Maintains working context\n\n")
563
+
564
+ # Add current context information
565
+ if state.knowledge_bases:
566
+ response += f"🗃️ **Your Knowledge Bases:**\n"
567
+ for kb in state.knowledge_bases[-3:]: # Show last 3
568
+ response += f" • {kb}\n"
569
+
570
+ if state.working_files:
571
+ response += f"\n📁 **Recent Documents:** {len(state.working_files)} files\n"
572
+
573
+ response += "\n💡 **Examples:**\n"
574
+ response += "• 'Ingest research.pdf into ai_papers'\n"
575
+ response += "• 'Query ai_papers: What are the main findings?'\n"
576
+ response += "• 'Add this text to the knowledge base: [content]'\n"
577
+ response += "\nI understand context from our conversation! 🚀"
578
+
579
+ return response
580
+
581
+ def _extract_query_from_kb_message(self, message: str) -> str:
582
+ """Extract query content from KB message"""
583
+ # Look for colon pattern first
584
+ import re
585
+ colon_match = re.search(r':\s*(.+)', message)
586
+ if colon_match:
587
+ return colon_match.group(1).strip()
588
+
589
+ # Remove KB operation keywords
590
+ query_keywords = ['query', 'search', 'find', 'ask', 'what', 'how', 'where', 'when', 'why']
591
+ words = message.split()
592
+ filtered_words = []
593
+
594
+ for word in words:
595
+ if word.lower() not in query_keywords and not word.lower().endswith('_kb'):
596
+ filtered_words.append(word)
597
+
598
+ return ' '.join(filtered_words).strip()
599
+
600
+ def _extract_text_for_ingestion(self, message: str) -> str:
601
+ """Extract text content for ingestion from message"""
602
+ # Look for colon pattern
603
+ import re
604
+ colon_match = re.search(r':\s*(.+)', message)
605
+ if colon_match:
606
+ return colon_match.group(1).strip()
607
+
608
+ # Remove ingestion keywords
609
+ ingest_keywords = ['ingest', 'add', 'upload', 'text', 'into', 'to']
610
+ words = message.split()
611
+ filtered_words = []
612
+ skip_next = False
613
+
614
+ for word in words:
615
+ if skip_next:
616
+ skip_next = False
617
+ continue
618
+
619
+ if word.lower() in ingest_keywords:
620
+ continue
621
+ elif word.lower().endswith('_kb') or word.lower().endswith('_base'):
622
+ continue
623
+ else:
624
+ filtered_words.append(word)
625
+
626
+ return ' '.join(filtered_words).strip()
627
+
628
+ def _extract_kb_intent_from_llm_response(self, llm_response: str, user_message: str) -> Dict[str, Any]:
629
+ """Extract KB intent from non-JSON LLM response"""
630
+ content_lower = llm_response.lower()
631
+
632
+ if 'ingest' in content_lower or 'upload' in content_lower:
633
+ intent = 'ingest_document'
634
+ elif 'query' in content_lower or 'search' in content_lower:
635
+ intent = 'query_kb'
636
+ elif 'create' in content_lower:
637
+ intent = 'create_kb'
638
+ else:
639
+ intent = 'help_request'
640
+
641
+ return {
642
+ "primary_intent": intent,
643
+ "kb_name": None,
644
+ "document_references": [],
645
+ "query_content": None,
646
+ "uses_context_reference": False,
647
+ "context_type": "none",
648
+ "operation_details": {"query_type": "free-text"},
649
+ "confidence": 0.6
650
+ }
651
+
652
+ # Tool implementations
653
+ def _add_knowledge_base_tools(self):
654
+ """Add all knowledge base related tools"""
655
+
656
+ # Document ingestion tool
657
+ self.add_tool(AgentTool(
658
+ name="ingest_document",
659
+ description="Ingest a document into the knowledge base",
660
+ function=self._ingest_document,
661
+ parameters_schema={
662
+ "type": "object",
663
+ "properties": {
664
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
665
+ "doc_path": {"type": "string", "description": "Path to document file"},
666
+ "custom_meta": {"type": "object", "description": "Custom metadata for the document"}
667
+ },
668
+ "required": ["kb_name", "doc_path"]
669
+ }
670
+ ))
671
+
672
+ # Text ingestion tool
673
+ self.add_tool(AgentTool(
674
+ name="ingest_text",
675
+ description="Ingest a Text string into the knowledge base",
676
+ function=self._ingest_text,
677
+ parameters_schema={
678
+ "type": "object",
679
+ "properties": {
680
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
681
+ "input_text": {"type": "string", "description": "Text to Ingest"},
682
+ "custom_meta": {"type": "object", "description": "Custom metadata for the text"}
683
+ },
684
+ "required": ["kb_name", "input_text"]
685
+ }
686
+ ))
687
+
688
+ # Knowledge base query tool
689
+ self.add_tool(AgentTool(
690
+ name="query_knowledge_base",
691
+ description="Query the knowledge base for information",
692
+ function=self._query_knowledge_base,
693
+ parameters_schema={
694
+ "type": "object",
695
+ "properties": {
696
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
697
+ "query": {"type": "string", "description": "Query string"},
698
+ "question_type": {"type": "string",
699
+ "enum": ["free-text", "multi-select", "single-select", "yes-no"],
700
+ "default": "free-text"},
701
+ "option_list": {"type": "array", "items": {"type": "string"},
702
+ "description": "Options for multi/single select questions"},
703
+ "additional_prompt": {"type": "string", "description": "Additional prompt context"}
704
+ },
705
+ "required": ["kb_name", "query"]
706
+ }
707
+ ))
708
+
709
+ # Web content ingestion tool
710
+ self.add_tool(AgentTool(
711
+ name="ingest_web_content",
712
+ description="Ingest content from web URLs",
713
+ function=self._ingest_web_content,
714
+ parameters_schema={
715
+ "type": "object",
716
+ "properties": {
717
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
718
+ "url": {"type": "string", "description": "URL to ingest"},
719
+ "custom_meta": {"type": "object", "description": "Custom metadata"}
720
+ },
721
+ "required": ["kb_name", "url"]
722
+ }
723
+ ))
724
+
725
+ # API call tool
726
+ self.add_tool(AgentTool(
727
+ name="call_api",
728
+ description="Make API calls to external services",
729
+ function=self._call_api,
730
+ parameters_schema={
731
+ "type": "object",
732
+ "properties": {
733
+ "url": {"type": "string", "description": "API endpoint URL"},
734
+ "method": {"type": "string", "enum": ["GET", "POST", "PUT", "DELETE"], "default": "GET"},
735
+ "headers": {"type": "object", "description": "Request headers"},
736
+ "payload": {"type": "object", "description": "Request payload for POST/PUT"},
737
+ "timeout": {"type": "number", "default": 30}
738
+ },
739
+ "required": ["url"]
740
+ }
741
+ ))
742
+
743
+ async def _ingest_document(self, kb_name: str, doc_path: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
744
+ """Ingest a document into the knowledge base"""
745
+ try:
746
+ if not Path(doc_path).exists():
747
+ return {"success": False, "error": f"File not found: {doc_path}"}
748
+
749
+ # Add metadata
750
+ if not custom_meta:
751
+ custom_meta = {}
752
+
753
+ custom_meta.update({
754
+ "ingestion_time": time.time(),
755
+ "agent_id": self.agent_id,
756
+ "file_path": doc_path
757
+ })
758
+
759
+ # Use existing persist_embeddings method
760
+ result = self.qdrant_service.persist_embeddings(
761
+ kb_name=kb_name,
762
+ doc_path=doc_path,
763
+ custom_meta=custom_meta
764
+ )
765
+
766
+ if result == 1:
767
+ return {
768
+ "success": True,
769
+ "message": f"Document {doc_path} successfully ingested into {kb_name}",
770
+ "kb_name": kb_name,
771
+ "file_path": doc_path
772
+ }
773
+ else:
774
+ return {
775
+ "success": False,
776
+ "error": f"Failed to ingest document {doc_path}"
777
+ }
778
+
779
+ except Exception as e:
780
+ return {"success": False, "error": str(e)}
781
+
782
+ async def _ingest_text(self, kb_name: str, input_text: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
783
+ """Ingest text into the knowledge base"""
784
+ try:
785
+ # Add metadata
786
+ if not custom_meta:
787
+ custom_meta = {}
788
+
789
+ custom_meta.update({
790
+ "ingestion_time": time.time(),
791
+ "agent_id": self.agent_id,
792
+ })
793
+
794
+ document_list = self.qdrant_service.documents_from_text(input_text)
795
+
796
+ # Use existing persist_embeddings method
797
+ result = self.qdrant_service.persist_embeddings(
798
+ kb_name=kb_name,
799
+ doc_path=None,
800
+ documents=document_list,
801
+ custom_meta=custom_meta
802
+ )
803
+
804
+ if result == 1:
805
+ return {
806
+ "success": True,
807
+ "message": f"Text successfully ingested into {kb_name}",
808
+ "kb_name": kb_name,
809
+ }
810
+ else:
811
+ return {
812
+ "success": False,
813
+ "error": f"Failed to ingest text"
814
+ }
815
+
816
+ except Exception as e:
817
+ return {"success": False, "error": str(e)}
818
+
819
+ async def get_answer(self, kb_name: str, query: str, question_type: str = "free-text"):
820
+ """Get answer from knowledge base"""
821
+ try:
822
+ # Use existing conduct_query method
823
+ answer, ans_dict_list = self.qdrant_service.conduct_query(
824
+ query=query,
825
+ kb_name=kb_name,
826
+ question_type=question_type
827
+ )
828
+
829
+ return {
830
+ "success": True,
831
+ "answer": answer,
832
+ }
833
+
834
+ except Exception as e:
835
+ return {"success": False, "error": str(e)}
836
+
837
+ async def _query_knowledge_base(self, kb_name: str, query: str, question_type: str = "free-text",
838
+ option_list: List[str] = None, additional_prompt: str = None) -> Dict[str, Any]:
839
+ """Query the knowledge base"""
840
+ try:
841
+ # Use existing conduct_query method
842
+ answer, ans_dict_list = self.qdrant_service.conduct_query(
843
+ query=query,
844
+ kb_name=kb_name,
845
+ additional_prompt=additional_prompt,
846
+ question_type=question_type,
847
+ option_list=option_list
848
+ )
849
+
850
+ return {
851
+ "success": True,
852
+ "answer": answer,
853
+ "source_details": ans_dict_list,
854
+ "kb_name": kb_name,
855
+ "query": query,
856
+ "question_type": question_type
857
+ }
858
+
859
+ except Exception as e:
860
+ return {"success": False, "error": str(e)}
861
+
862
+ async def _ingest_web_content(self, kb_name: str, url: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
863
+ """Ingest content from web URLs"""
864
+ try:
865
+ # Fetch web content
866
+ response = requests.get(url, timeout=30)
867
+ response.raise_for_status()
868
+
869
+ # Create temporary file with content
870
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as tmp_file:
871
+ tmp_file.write(response.text)
872
+ tmp_path = tmp_file.name
873
+
874
+ # Add URL to metadata
875
+ if not custom_meta:
876
+ custom_meta = {}
877
+
878
+ custom_meta.update({
879
+ "source_url": url,
880
+ "fetch_time": time.time(),
881
+ "content_type": response.headers.get('content-type', 'unknown')
882
+ })
883
+
884
+ # Ingest the content
885
+ result = await self._ingest_document(kb_name, tmp_path, custom_meta)
886
+
887
+ # Clean up temporary file
888
+ Path(tmp_path).unlink()
889
+
890
+ if result["success"]:
891
+ result["url"] = url
892
+ result["message"] = f"Web content from {url} successfully ingested into {kb_name}"
893
+
894
+ return result
895
+
896
+ except Exception as e:
897
+ return {"success": False, "error": str(e)}
898
+
899
+ async def _call_api(self, url: str, method: str = "GET", headers: Dict[str, str] = None,
900
+ payload: Dict[str, Any] = None, timeout: int = 30) -> Dict[str, Any]:
901
+ """Make API calls to external services"""
902
+ try:
903
+ # Prepare request
904
+ kwargs = {
905
+ "url": url,
906
+ "method": method.upper(),
907
+ "timeout": timeout
908
+ }
909
+
910
+ if headers:
911
+ kwargs["headers"] = headers
912
+
913
+ if payload and method.upper() in ["POST", "PUT"]:
914
+ kwargs["json"] = payload
915
+
916
+ # Make request
917
+ response = requests.request(**kwargs)
918
+
919
+ # Parse response
920
+ try:
921
+ response_data = response.json()
922
+ except:
923
+ response_data = response.text
924
+
925
+ return {
926
+ "success": True,
927
+ "status_code": response.status_code,
928
+ "response_data": response_data,
929
+ "headers": dict(response.headers),
930
+ "url": url,
931
+ "method": method.upper()
932
+ }
933
+
934
+ except Exception as e:
935
+ return {"success": False, "error": str(e)}