ambivo-agents 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,595 @@
1
+ # ambivo_agents/agents/knowledge_base.py
2
+ """
3
+ Knowledge Base Agent with Qdrant integration.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import uuid
9
+ import time
10
+ import tempfile
11
+ import requests
12
+ from pathlib import Path
13
+ from typing import Dict, List, Any, Optional
14
+ from datetime import datetime
15
+
16
+
17
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
18
+ from ..config.loader import load_config, get_config_section
19
+
20
+
21
+ class QdrantServiceAdapter:
22
+ """Adapter for Knowledge Base functionality using YAML configuration"""
23
+
24
+ def __init__(self):
25
+ # Load from YAML configuration
26
+ config = load_config()
27
+ kb_config = get_config_section('knowledge_base', config)
28
+
29
+ self.qdrant_url = kb_config.get('qdrant_url')
30
+ self.qdrant_api_key = kb_config.get('qdrant_api_key')
31
+
32
+ if not self.qdrant_url:
33
+ raise ValueError("qdrant_url is required in knowledge_base configuration")
34
+
35
+ # Initialize Qdrant client
36
+ try:
37
+ import qdrant_client
38
+ if self.qdrant_api_key:
39
+ self.client = qdrant_client.QdrantClient(
40
+ url=self.qdrant_url,
41
+ api_key=self.qdrant_api_key
42
+ )
43
+ else:
44
+ self.client = qdrant_client.QdrantClient(url=self.qdrant_url)
45
+
46
+ except ImportError:
47
+ raise ImportError("qdrant-client package required for Knowledge Base functionality")
48
+ except Exception as e:
49
+ raise ConnectionError(f"Failed to connect to Qdrant: {e}")
50
+
51
+ def documents_from_text(self, input_text: str) -> list:
52
+ """Convert text to documents format"""
53
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
54
+ from llama_index.core.readers import Document as LIDoc
55
+
56
+ # Load chunk settings from config
57
+ config = load_config()
58
+ kb_config = get_config_section('knowledge_base', config)
59
+
60
+ chunk_size = kb_config.get('chunk_size', 1024)
61
+ chunk_overlap = kb_config.get('chunk_overlap', 20)
62
+
63
+ text_splitter = RecursiveCharacterTextSplitter(
64
+ chunk_size=chunk_size,
65
+ chunk_overlap=chunk_overlap
66
+ )
67
+ splitted_documents = text_splitter.create_documents(texts=[input_text])
68
+
69
+ # Convert to llama-index format
70
+ docs = [LIDoc.from_langchain_format(doc) for doc in splitted_documents]
71
+ return docs
72
+
73
+ def persist_embeddings(self, kb_name: str, doc_path: str = None,
74
+ documents=None, custom_meta: Dict[str, Any] = None) -> int:
75
+ """Persist embeddings to Qdrant"""
76
+ try:
77
+ config = load_config()
78
+ kb_config = get_config_section('knowledge_base', config)
79
+
80
+ if not documents and doc_path:
81
+ # Load document from file
82
+ #from langchain_community.document_loaders import UnstructuredFileLoader
83
+ from langchain_unstructured import UnstructuredLoader
84
+
85
+ from llama_index.core.readers import Document as LIDoc
86
+
87
+ #loader = UnstructuredFileLoader(doc_path)
88
+ loader = UnstructuredLoader(doc_path)
89
+ lang_docs = loader.load()
90
+ documents = [LIDoc.from_langchain_format(doc) for doc in lang_docs]
91
+
92
+ if not documents:
93
+ return 2 # Error
94
+
95
+ # Add custom metadata
96
+ if custom_meta:
97
+ for doc in documents:
98
+ if not hasattr(doc, 'metadata'):
99
+ doc.metadata = {}
100
+ doc.metadata.update(custom_meta)
101
+
102
+ # Create collection name with prefix from config
103
+ collection_prefix = kb_config.get('default_collection_prefix', 'kb')
104
+ collection_name = f"{collection_prefix}_{kb_name}"
105
+
106
+ # Create vector store and index
107
+ from llama_index.core import VectorStoreIndex, StorageContext
108
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
109
+
110
+ vector_store = QdrantVectorStore(
111
+ client=self.client,
112
+ collection_name=collection_name
113
+ )
114
+
115
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
116
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
117
+
118
+ return 1 # Success
119
+
120
+ except Exception as e:
121
+ print(f"Error persisting embeddings: {e}")
122
+ return 2 # Error
123
+
124
+ def conduct_query(self, query: str, kb_name: str, additional_prompt: str = None,
125
+ question_type: str = "free-text", option_list=None) -> tuple:
126
+ """Query the knowledge base"""
127
+ try:
128
+ config = load_config()
129
+ kb_config = get_config_section('knowledge_base', config)
130
+
131
+ collection_prefix = kb_config.get('default_collection_prefix', 'kb')
132
+ collection_name = f"{collection_prefix}_{kb_name}"
133
+
134
+ similarity_top_k = kb_config.get('similarity_top_k', 5)
135
+
136
+ # Create vector store and query engine
137
+ from llama_index.core import VectorStoreIndex
138
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
139
+ from llama_index.core.indices.vector_store import VectorIndexRetriever
140
+ from llama_index.core.query_engine import RetrieverQueryEngine
141
+ from llama_index.core import get_response_synthesizer
142
+
143
+ vector_store = QdrantVectorStore(
144
+ client=self.client,
145
+ collection_name=collection_name
146
+ )
147
+
148
+ index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
149
+ retriever = VectorIndexRetriever(similarity_top_k=similarity_top_k, index=index)
150
+ response_synthesizer = get_response_synthesizer()
151
+ query_engine = RetrieverQueryEngine(
152
+ retriever=retriever,
153
+ response_synthesizer=response_synthesizer
154
+ )
155
+
156
+ # Execute query
157
+ response = query_engine.query(query)
158
+ answer = str(response)
159
+ source_list = []
160
+
161
+ if hasattr(response, 'source_nodes') and response.source_nodes:
162
+ for node in response.source_nodes:
163
+ source_info = {
164
+ "text": node.node.get_text()[:200] + "...",
165
+ "score": getattr(node, 'score', 0.0),
166
+ "metadata": getattr(node.node, 'metadata', {})
167
+ }
168
+ source_list.append(source_info)
169
+
170
+ ans_dict_list = [{
171
+ "answer": answer,
172
+ "source": f"Found {len(source_list)} relevant sources",
173
+ "source_list": source_list
174
+ }]
175
+
176
+ return answer, ans_dict_list
177
+
178
+ except Exception as e:
179
+ error_msg = f"Query error: {str(e)}"
180
+ return error_msg, [{"answer": error_msg, "source": "", "source_list": []}]
181
+
182
+
183
+ class KnowledgeBaseAgent(BaseAgent):
184
+ """Knowledge Base Agent that integrates with Qdrant infrastructure"""
185
+
186
+ def __init__(self, agent_id: str|None=None, memory_manager=None, llm_service=None, **kwargs):
187
+ if agent_id is None:
188
+ agent_id = f"kb_{str(uuid.uuid4())[:8]}"
189
+
190
+ super().__init__(
191
+ agent_id=agent_id,
192
+ role=AgentRole.RESEARCHER,
193
+ memory_manager=memory_manager,
194
+ llm_service=llm_service,
195
+ name="Knowledge Base Agent",
196
+ description="Agent for knowledge base operations, document processing, and intelligent retrieval",
197
+ **kwargs
198
+ )
199
+
200
+ # Initialize Qdrant service
201
+ try:
202
+ self.qdrant_service = QdrantServiceAdapter()
203
+ except Exception as e:
204
+ raise RuntimeError(f"Failed to initialize Knowledge Base service: {e}")
205
+
206
+ # Add knowledge base tools
207
+ self._add_knowledge_base_tools()
208
+
209
+ def _add_knowledge_base_tools(self):
210
+ """Add all knowledge base related tools"""
211
+
212
+ # Document ingestion tool
213
+ self.add_tool(AgentTool(
214
+ name="ingest_document",
215
+ description="Ingest a document into the knowledge base",
216
+ function=self._ingest_document,
217
+ parameters_schema={
218
+ "type": "object",
219
+ "properties": {
220
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
221
+ "doc_path": {"type": "string", "description": "Path to document file"},
222
+ "custom_meta": {"type": "object", "description": "Custom metadata for the document"}
223
+ },
224
+ "required": ["kb_name", "doc_path"]
225
+ }
226
+ ))
227
+
228
+ # Text ingestion tool
229
+ self.add_tool(AgentTool(
230
+ name="ingest_text",
231
+ description="Ingest a Text string into the knowledge base",
232
+ function=self._ingest_text,
233
+ parameters_schema={
234
+ "type": "object",
235
+ "properties": {
236
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
237
+ "input_text": {"type": "string", "description": "Text to Ingest"},
238
+ "custom_meta": {"type": "object", "description": "Custom metadata for the text"}
239
+ },
240
+ "required": ["kb_name", "input_text"]
241
+ }
242
+ ))
243
+
244
+ # Knowledge base query tool
245
+ self.add_tool(AgentTool(
246
+ name="query_knowledge_base",
247
+ description="Query the knowledge base for information",
248
+ function=self._query_knowledge_base,
249
+ parameters_schema={
250
+ "type": "object",
251
+ "properties": {
252
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
253
+ "query": {"type": "string", "description": "Query string"},
254
+ "question_type": {"type": "string",
255
+ "enum": ["free-text", "multi-select", "single-select", "yes-no"],
256
+ "default": "free-text"},
257
+ "option_list": {"type": "array", "items": {"type": "string"},
258
+ "description": "Options for multi/single select questions"},
259
+ "additional_prompt": {"type": "string", "description": "Additional prompt context"}
260
+ },
261
+ "required": ["kb_name", "query"]
262
+ }
263
+ ))
264
+
265
+ # Web content ingestion tool
266
+ self.add_tool(AgentTool(
267
+ name="ingest_web_content",
268
+ description="Ingest content from web URLs",
269
+ function=self._ingest_web_content,
270
+ parameters_schema={
271
+ "type": "object",
272
+ "properties": {
273
+ "kb_name": {"type": "string", "description": "Knowledge base name"},
274
+ "url": {"type": "string", "description": "URL to ingest"},
275
+ "custom_meta": {"type": "object", "description": "Custom metadata"}
276
+ },
277
+ "required": ["kb_name", "url"]
278
+ }
279
+ ))
280
+
281
+ # API call tool
282
+ self.add_tool(AgentTool(
283
+ name="call_api",
284
+ description="Make API calls to external services",
285
+ function=self._call_api,
286
+ parameters_schema={
287
+ "type": "object",
288
+ "properties": {
289
+ "url": {"type": "string", "description": "API endpoint URL"},
290
+ "method": {"type": "string", "enum": ["GET", "POST", "PUT", "DELETE"], "default": "GET"},
291
+ "headers": {"type": "object", "description": "Request headers"},
292
+ "payload": {"type": "object", "description": "Request payload for POST/PUT"},
293
+ "timeout": {"type": "number", "default": 30}
294
+ },
295
+ "required": ["url"]
296
+ }
297
+ ))
298
+
299
+ async def _ingest_document(self, kb_name: str, doc_path: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
300
+ """Ingest a document into the knowledge base"""
301
+ try:
302
+ if not Path(doc_path).exists():
303
+ return {"success": False, "error": f"File not found: {doc_path}"}
304
+
305
+ # Add metadata
306
+ if not custom_meta:
307
+ custom_meta = {}
308
+
309
+ custom_meta.update({
310
+ "ingestion_time": time.time(),
311
+ "agent_id": self.agent_id,
312
+ "file_path": doc_path
313
+ })
314
+
315
+ # Use existing persist_embeddings method
316
+ result = self.qdrant_service.persist_embeddings(
317
+ kb_name=kb_name,
318
+ doc_path=doc_path,
319
+ custom_meta=custom_meta
320
+ )
321
+
322
+ if result == 1:
323
+ return {
324
+ "success": True,
325
+ "message": f"Document {doc_path} successfully ingested into {kb_name}",
326
+ "kb_name": kb_name,
327
+ "file_path": doc_path
328
+ }
329
+ else:
330
+ return {
331
+ "success": False,
332
+ "error": f"Failed to ingest document {doc_path}"
333
+ }
334
+
335
+ except Exception as e:
336
+ return {"success": False, "error": str(e)}
337
+
338
+ async def _ingest_text(self, kb_name: str, input_text: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
339
+ """Ingest text into the knowledge base"""
340
+ try:
341
+ # Add metadata
342
+ if not custom_meta:
343
+ custom_meta = {}
344
+
345
+ custom_meta.update({
346
+ "ingestion_time": time.time(),
347
+ "agent_id": self.agent_id,
348
+ })
349
+
350
+ document_list = self.qdrant_service.documents_from_text(input_text)
351
+
352
+ # Use existing persist_embeddings method
353
+ result = self.qdrant_service.persist_embeddings(
354
+ kb_name=kb_name,
355
+ doc_path=None,
356
+ documents=document_list,
357
+ custom_meta=custom_meta
358
+ )
359
+
360
+ if result == 1:
361
+ return {
362
+ "success": True,
363
+ "message": f"Text successfully ingested into {kb_name}",
364
+ "kb_name": kb_name,
365
+ }
366
+ else:
367
+ return {
368
+ "success": False,
369
+ "error": f"Failed to ingest text"
370
+ }
371
+
372
+ except Exception as e:
373
+ return {"success": False, "error": str(e)}
374
+
375
+ async def get_answer(self, kb_name: str, query: str, question_type: str = "free-text"):
376
+ """Get answer from knowledge base"""
377
+ try:
378
+ # Use existing conduct_query method
379
+ answer, ans_dict_list = self.qdrant_service.conduct_query(
380
+ query=query,
381
+ kb_name=kb_name,
382
+ question_type=question_type
383
+ )
384
+
385
+ return {
386
+ "success": True,
387
+ "answer": answer,
388
+ # "source_details": ans_dict_list,
389
+ # "kb_name": kb_name,
390
+ # "query": query,
391
+ # "question_type": question_type
392
+ }
393
+
394
+ except Exception as e:
395
+ return {"success": False, "error": str(e)}
396
+ async def _query_knowledge_base(self, kb_name: str, query: str, question_type: str = "free-text",
397
+ option_list: List[str] = None, additional_prompt: str = None) -> Dict[str, Any]:
398
+ """Query the knowledge base"""
399
+ try:
400
+ # Use existing conduct_query method
401
+ answer, ans_dict_list = self.qdrant_service.conduct_query(
402
+ query=query,
403
+ kb_name=kb_name,
404
+ additional_prompt=additional_prompt,
405
+ question_type=question_type,
406
+ option_list=option_list
407
+ )
408
+
409
+ return {
410
+ "success": True,
411
+ "answer": answer,
412
+ "source_details": ans_dict_list,
413
+ "kb_name": kb_name,
414
+ "query": query,
415
+ "question_type": question_type
416
+ }
417
+
418
+ except Exception as e:
419
+ return {"success": False, "error": str(e)}
420
+
421
+ async def _ingest_web_content(self, kb_name: str, url: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
422
+ """Ingest content from web URLs"""
423
+ try:
424
+ # Fetch web content
425
+ response = requests.get(url, timeout=30)
426
+ response.raise_for_status()
427
+
428
+ # Create temporary file with content
429
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as tmp_file:
430
+ tmp_file.write(response.text)
431
+ tmp_path = tmp_file.name
432
+
433
+ # Add URL to metadata
434
+ if not custom_meta:
435
+ custom_meta = {}
436
+
437
+ custom_meta.update({
438
+ "source_url": url,
439
+ "fetch_time": time.time(),
440
+ "content_type": response.headers.get('content-type', 'unknown')
441
+ })
442
+
443
+ # Ingest the content
444
+ result = await self._ingest_document(kb_name, tmp_path, custom_meta)
445
+
446
+ # Clean up temporary file
447
+ Path(tmp_path).unlink()
448
+
449
+ if result["success"]:
450
+ result["url"] = url
451
+ result["message"] = f"Web content from {url} successfully ingested into {kb_name}"
452
+
453
+ return result
454
+
455
+ except Exception as e:
456
+ return {"success": False, "error": str(e)}
457
+
458
+ async def _call_api(self, url: str, method: str = "GET", headers: Dict[str, str] = None,
459
+ payload: Dict[str, Any] = None, timeout: int = 30) -> Dict[str, Any]:
460
+ """Make API calls to external services"""
461
+ try:
462
+ # Prepare request
463
+ kwargs = {
464
+ "url": url,
465
+ "method": method.upper(),
466
+ "timeout": timeout
467
+ }
468
+
469
+ if headers:
470
+ kwargs["headers"] = headers
471
+
472
+ if payload and method.upper() in ["POST", "PUT"]:
473
+ kwargs["json"] = payload
474
+
475
+ # Make request
476
+ response = requests.request(**kwargs)
477
+
478
+ # Parse response
479
+ try:
480
+ response_data = response.json()
481
+ except:
482
+ response_data = response.text
483
+
484
+ return {
485
+ "success": True,
486
+ "status_code": response.status_code,
487
+ "response_data": response_data,
488
+ "headers": dict(response.headers),
489
+ "url": url,
490
+ "method": method.upper()
491
+ }
492
+
493
+ except Exception as e:
494
+ return {"success": False, "error": str(e)}
495
+
496
+ async def process_message(self, message: AgentMessage, context: ExecutionContext) -> AgentMessage:
497
+ """Process incoming message and route to appropriate knowledge base operations"""
498
+ self.memory.store_message(message)
499
+
500
+ try:
501
+ content = message.content.lower()
502
+ user_message = message.content
503
+
504
+ # Determine the appropriate action based on message content
505
+ if any(keyword in content for keyword in ['ingest', 'upload', 'add document', 'add file']):
506
+ response_content = await self._handle_ingestion_request(user_message, context)
507
+ elif any(keyword in content for keyword in ['query', 'search', 'find', 'what', 'how', 'where', 'when']):
508
+ response_content = await self._handle_query_request(user_message, context)
509
+ else:
510
+ response_content = await self._handle_general_request(user_message, context)
511
+
512
+ response = self.create_response(
513
+ content=response_content,
514
+ recipient_id=message.sender_id,
515
+ session_id=message.session_id,
516
+ conversation_id=message.conversation_id
517
+ )
518
+
519
+ self.memory.store_message(response)
520
+ return response
521
+
522
+ except Exception as e:
523
+ error_response = self.create_response(
524
+ content=f"Knowledge Base Agent error: {str(e)}",
525
+ recipient_id=message.sender_id,
526
+ message_type=MessageType.ERROR,
527
+ session_id=message.session_id,
528
+ conversation_id=message.conversation_id
529
+ )
530
+ return error_response
531
+
532
+ async def _handle_ingestion_request(self, user_message: str, context: ExecutionContext) -> str:
533
+ """Handle document ingestion requests"""
534
+ return ("I can help you ingest documents into your knowledge base. Please provide:\n\n"
535
+ "1. Knowledge base name\n"
536
+ "2. Document path or URL\n"
537
+ "3. Any custom metadata (optional)\n\n"
538
+ "I support PDF, DOCX, TXT files and web URLs. Would you like to proceed?")
539
+
540
+ async def _handle_query_request(self, user_message: str, context: ExecutionContext) -> str:
541
+ """Handle knowledge base query requests"""
542
+ if self.llm_service:
543
+ prompt = f"""
544
+ The user wants to query a knowledge base. Based on their message, help determine:
545
+ 1. What knowledge base they want to query (if mentioned)
546
+ 2. What their actual question is
547
+
548
+ User message: {user_message}
549
+
550
+ Please provide a helpful response about how to query the knowledge base.
551
+ """
552
+
553
+ response = await self.llm_service.generate_response(prompt, context.metadata)
554
+ return response
555
+ else:
556
+ return ("I can help you query knowledge bases. Please specify:\n\n"
557
+ "1. Knowledge base name\n"
558
+ "2. Your question\n"
559
+ "3. Question type (free-text, multiple choice, yes/no)\n\n"
560
+ "Example: 'Query the company_docs knowledge base: What is our return policy?'")
561
+
562
+ async def _handle_general_request(self, user_message: str, context: ExecutionContext) -> str:
563
+ """Handle general knowledge base requests"""
564
+ if self.llm_service:
565
+ prompt = f"""
566
+ You are a Knowledge Base Agent that helps with document management and retrieval.
567
+
568
+ Your capabilities include:
569
+ - Ingesting documents (PDF, DOCX, TXT, web URLs)
570
+ - Querying knowledge bases with intelligent retrieval
571
+ - Managing document lifecycle (add, update, delete)
572
+ - Processing various file types
573
+ - Making API calls and database queries
574
+
575
+ User message: {user_message}
576
+
577
+ Provide a helpful response about how you can assist with their knowledge base needs.
578
+ """
579
+
580
+ response = await self.llm_service.generate_response(prompt, context.metadata)
581
+ return response
582
+ else:
583
+ return ("I'm your Knowledge Base Agent! I can help you with:\n\n"
584
+ "📄 **Document Management**\n"
585
+ "- Ingest PDFs, DOCX, TXT files\n"
586
+ "- Process web content from URLs\n"
587
+ "- Delete documents and manage collections\n\n"
588
+ "🔍 **Intelligent Search**\n"
589
+ "- Query knowledge bases with natural language\n"
590
+ "- Support for different question types\n"
591
+ "- Source attribution and relevance scoring\n\n"
592
+ "🔧 **Integration Tools**\n"
593
+ "- API calls to external services\n"
594
+ "- Status monitoring and analytics\n\n"
595
+ "How can I help you today?")