ambivo-agents 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ambivo_agents/__init__.py +91 -0
- ambivo_agents/agents/__init__.py +21 -0
- ambivo_agents/agents/assistant.py +203 -0
- ambivo_agents/agents/code_executor.py +133 -0
- ambivo_agents/agents/code_executor2.py +222 -0
- ambivo_agents/agents/knowledge_base.py +935 -0
- ambivo_agents/agents/media_editor.py +992 -0
- ambivo_agents/agents/moderator.py +617 -0
- ambivo_agents/agents/simple_web_search.py +404 -0
- ambivo_agents/agents/web_scraper.py +1027 -0
- ambivo_agents/agents/web_search.py +933 -0
- ambivo_agents/agents/youtube_download.py +784 -0
- ambivo_agents/cli.py +699 -0
- ambivo_agents/config/__init__.py +4 -0
- ambivo_agents/config/loader.py +301 -0
- ambivo_agents/core/__init__.py +33 -0
- ambivo_agents/core/base.py +1024 -0
- ambivo_agents/core/history.py +606 -0
- ambivo_agents/core/llm.py +333 -0
- ambivo_agents/core/memory.py +640 -0
- ambivo_agents/executors/__init__.py +8 -0
- ambivo_agents/executors/docker_executor.py +108 -0
- ambivo_agents/executors/media_executor.py +237 -0
- ambivo_agents/executors/youtube_executor.py +404 -0
- ambivo_agents/services/__init__.py +6 -0
- ambivo_agents/services/agent_service.py +605 -0
- ambivo_agents/services/factory.py +370 -0
- ambivo_agents-1.0.1.dist-info/METADATA +1090 -0
- ambivo_agents-1.0.1.dist-info/RECORD +33 -0
- ambivo_agents-1.0.1.dist-info/WHEEL +5 -0
- ambivo_agents-1.0.1.dist-info/entry_points.txt +3 -0
- ambivo_agents-1.0.1.dist-info/licenses/LICENSE +21 -0
- ambivo_agents-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,935 @@
|
|
1
|
+
# ambivo_agents/agents/knowledge_base.py
|
2
|
+
"""
|
3
|
+
LLM-Aware Knowledge Base Agent with conversation history and intelligent intent detection
|
4
|
+
Updated for consistency with other agents
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import json
|
9
|
+
import uuid
|
10
|
+
import time
|
11
|
+
import tempfile
|
12
|
+
import requests
|
13
|
+
from pathlib import Path
|
14
|
+
from typing import Dict, List, Any, Optional
|
15
|
+
from datetime import datetime
|
16
|
+
|
17
|
+
from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
|
18
|
+
from ..config.loader import load_config, get_config_section
|
19
|
+
from ..core.history import KnowledgeBaseAgentHistoryMixin, ContextType
|
20
|
+
|
21
|
+
|
22
|
+
class QdrantServiceAdapter:
|
23
|
+
"""Adapter for Knowledge Base functionality using YAML configuration"""
|
24
|
+
|
25
|
+
def __init__(self):
|
26
|
+
# Load from YAML configuration
|
27
|
+
config = load_config()
|
28
|
+
kb_config = get_config_section('knowledge_base', config)
|
29
|
+
|
30
|
+
self.qdrant_url = kb_config.get('qdrant_url')
|
31
|
+
self.qdrant_api_key = kb_config.get('qdrant_api_key')
|
32
|
+
|
33
|
+
if not self.qdrant_url:
|
34
|
+
raise ValueError("qdrant_url is required in knowledge_base configuration")
|
35
|
+
|
36
|
+
# Initialize Qdrant client
|
37
|
+
try:
|
38
|
+
import qdrant_client
|
39
|
+
if self.qdrant_api_key:
|
40
|
+
self.client = qdrant_client.QdrantClient(
|
41
|
+
url=self.qdrant_url,
|
42
|
+
api_key=self.qdrant_api_key
|
43
|
+
)
|
44
|
+
else:
|
45
|
+
self.client = qdrant_client.QdrantClient(url=self.qdrant_url)
|
46
|
+
|
47
|
+
except ImportError:
|
48
|
+
raise ImportError("qdrant-client package required for Knowledge Base functionality")
|
49
|
+
except Exception as e:
|
50
|
+
raise ConnectionError(f"Failed to connect to Qdrant: {e}")
|
51
|
+
|
52
|
+
def documents_from_text(self, input_text: str) -> list:
|
53
|
+
"""Convert text to documents format"""
|
54
|
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
55
|
+
from llama_index.core.readers import Document as LIDoc
|
56
|
+
|
57
|
+
# Load chunk settings from config
|
58
|
+
config = load_config()
|
59
|
+
kb_config = get_config_section('knowledge_base', config)
|
60
|
+
|
61
|
+
chunk_size = kb_config.get('chunk_size', 1024)
|
62
|
+
chunk_overlap = kb_config.get('chunk_overlap', 20)
|
63
|
+
|
64
|
+
text_splitter = RecursiveCharacterTextSplitter(
|
65
|
+
chunk_size=chunk_size,
|
66
|
+
chunk_overlap=chunk_overlap
|
67
|
+
)
|
68
|
+
splitted_documents = text_splitter.create_documents(texts=[input_text])
|
69
|
+
|
70
|
+
# Convert to llama-index format
|
71
|
+
docs = [LIDoc.from_langchain_format(doc) for doc in splitted_documents]
|
72
|
+
return docs
|
73
|
+
|
74
|
+
def persist_embeddings(self, kb_name: str, doc_path: str = None,
|
75
|
+
documents=None, custom_meta: Dict[str, Any] = None) -> int:
|
76
|
+
"""Persist embeddings to Qdrant"""
|
77
|
+
try:
|
78
|
+
config = load_config()
|
79
|
+
kb_config = get_config_section('knowledge_base', config)
|
80
|
+
|
81
|
+
if not documents and doc_path:
|
82
|
+
# Load document from file
|
83
|
+
from langchain_unstructured import UnstructuredLoader
|
84
|
+
from llama_index.core.readers import Document as LIDoc
|
85
|
+
|
86
|
+
loader = UnstructuredLoader(doc_path)
|
87
|
+
lang_docs = loader.load()
|
88
|
+
documents = [LIDoc.from_langchain_format(doc) for doc in lang_docs]
|
89
|
+
|
90
|
+
if not documents:
|
91
|
+
return 2 # Error
|
92
|
+
|
93
|
+
# Add custom metadata
|
94
|
+
if custom_meta:
|
95
|
+
for doc in documents:
|
96
|
+
if not hasattr(doc, 'metadata'):
|
97
|
+
doc.metadata = {}
|
98
|
+
doc.metadata.update(custom_meta)
|
99
|
+
|
100
|
+
# Create collection name with prefix from config
|
101
|
+
collection_prefix = kb_config.get('default_collection_prefix', 'kb')
|
102
|
+
collection_name = f"{collection_prefix}_{kb_name}"
|
103
|
+
|
104
|
+
# Create vector store and index
|
105
|
+
from llama_index.core import VectorStoreIndex, StorageContext
|
106
|
+
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
107
|
+
|
108
|
+
vector_store = QdrantVectorStore(
|
109
|
+
client=self.client,
|
110
|
+
collection_name=collection_name
|
111
|
+
)
|
112
|
+
|
113
|
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
114
|
+
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
|
115
|
+
|
116
|
+
return 1 # Success
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
print(f"Error persisting embeddings: {e}")
|
120
|
+
return 2 # Error
|
121
|
+
|
122
|
+
def conduct_query(self, query: str, kb_name: str, additional_prompt: str = None,
|
123
|
+
question_type: str = "free-text", option_list=None) -> tuple:
|
124
|
+
"""Query the knowledge base"""
|
125
|
+
try:
|
126
|
+
config = load_config()
|
127
|
+
kb_config = get_config_section('knowledge_base', config)
|
128
|
+
|
129
|
+
collection_prefix = kb_config.get('default_collection_prefix', 'kb')
|
130
|
+
collection_name = f"{collection_prefix}_{kb_name}"
|
131
|
+
|
132
|
+
similarity_top_k = kb_config.get('similarity_top_k', 5)
|
133
|
+
|
134
|
+
# Create vector store and query engine
|
135
|
+
from llama_index.core import VectorStoreIndex
|
136
|
+
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
137
|
+
from llama_index.core.indices.vector_store import VectorIndexRetriever
|
138
|
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
139
|
+
from llama_index.core import get_response_synthesizer
|
140
|
+
|
141
|
+
vector_store = QdrantVectorStore(
|
142
|
+
client=self.client,
|
143
|
+
collection_name=collection_name
|
144
|
+
)
|
145
|
+
|
146
|
+
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
|
147
|
+
retriever = VectorIndexRetriever(similarity_top_k=similarity_top_k, index=index)
|
148
|
+
response_synthesizer = get_response_synthesizer()
|
149
|
+
query_engine = RetrieverQueryEngine(
|
150
|
+
retriever=retriever,
|
151
|
+
response_synthesizer=response_synthesizer
|
152
|
+
)
|
153
|
+
|
154
|
+
# Execute query
|
155
|
+
response = query_engine.query(query)
|
156
|
+
answer = str(response)
|
157
|
+
source_list = []
|
158
|
+
|
159
|
+
if hasattr(response, 'source_nodes') and response.source_nodes:
|
160
|
+
for node in response.source_nodes:
|
161
|
+
source_info = {
|
162
|
+
"text": node.node.get_text()[:200] + "...",
|
163
|
+
"score": getattr(node, 'score', 0.0),
|
164
|
+
"metadata": getattr(node.node, 'metadata', {})
|
165
|
+
}
|
166
|
+
source_list.append(source_info)
|
167
|
+
|
168
|
+
ans_dict_list = [{
|
169
|
+
"answer": answer,
|
170
|
+
"source": f"Found {len(source_list)} relevant sources",
|
171
|
+
"source_list": source_list
|
172
|
+
}]
|
173
|
+
|
174
|
+
return answer, ans_dict_list
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
error_msg = f"Query error: {str(e)}"
|
178
|
+
return error_msg, [{"answer": error_msg, "source": "", "source_list": []}]
|
179
|
+
|
180
|
+
|
181
|
+
class KnowledgeBaseAgent(BaseAgent, KnowledgeBaseAgentHistoryMixin):
|
182
|
+
"""LLM-Aware Knowledge Base Agent with conversation context and intelligent routing"""
|
183
|
+
|
184
|
+
def __init__(self, agent_id: str = None, memory_manager=None, llm_service=None, **kwargs):
|
185
|
+
if agent_id is None:
|
186
|
+
agent_id = f"kb_{str(uuid.uuid4())[:8]}"
|
187
|
+
|
188
|
+
super().__init__(
|
189
|
+
agent_id=agent_id,
|
190
|
+
role=AgentRole.RESEARCHER,
|
191
|
+
memory_manager=memory_manager,
|
192
|
+
llm_service=llm_service,
|
193
|
+
name="Knowledge Base Agent",
|
194
|
+
description="LLM-aware knowledge base agent with conversation history",
|
195
|
+
**kwargs
|
196
|
+
)
|
197
|
+
|
198
|
+
# Initialize history mixin
|
199
|
+
self.setup_history_mixin()
|
200
|
+
|
201
|
+
# Initialize Qdrant service
|
202
|
+
try:
|
203
|
+
self.qdrant_service = QdrantServiceAdapter()
|
204
|
+
except Exception as e:
|
205
|
+
raise RuntimeError(f"Failed to initialize Knowledge Base service: {e}")
|
206
|
+
|
207
|
+
# Add knowledge base tools
|
208
|
+
self._add_knowledge_base_tools()
|
209
|
+
|
210
|
+
async def _llm_analyze_kb_intent(self, user_message: str, conversation_context: str = "") -> Dict[str, Any]:
|
211
|
+
"""Use LLM to analyze knowledge base related intent"""
|
212
|
+
if not self.llm_service:
|
213
|
+
return self._keyword_based_kb_analysis(user_message)
|
214
|
+
|
215
|
+
prompt = f"""
|
216
|
+
Analyze this user message in the context of a knowledge base conversation and extract:
|
217
|
+
1. Primary intent (ingest_document, ingest_text, query_kb, create_kb, manage_kb, help_request)
|
218
|
+
2. Knowledge base name (if mentioned or inferrable)
|
219
|
+
3. Document/file references (file paths, document names)
|
220
|
+
4. Query content (if querying)
|
221
|
+
5. Context references (referring to previous KB operations)
|
222
|
+
6. Operation specifics (metadata, query type, etc.)
|
223
|
+
|
224
|
+
Conversation Context:
|
225
|
+
{conversation_context}
|
226
|
+
|
227
|
+
Current User Message: {user_message}
|
228
|
+
|
229
|
+
Respond in JSON format:
|
230
|
+
{{
|
231
|
+
"primary_intent": "ingest_document|ingest_text|query_kb|create_kb|manage_kb|help_request",
|
232
|
+
"kb_name": "knowledge_base_name or null",
|
233
|
+
"document_references": ["file1.pdf", "doc2.txt"],
|
234
|
+
"query_content": "the actual question to ask" or null,
|
235
|
+
"uses_context_reference": true/false,
|
236
|
+
"context_type": "previous_kb|previous_document|previous_query",
|
237
|
+
"operation_details": {{
|
238
|
+
"query_type": "free-text|multi-select|single-select|yes-no",
|
239
|
+
"custom_metadata": {{}},
|
240
|
+
"source_type": "file|url|text"
|
241
|
+
}},
|
242
|
+
"confidence": 0.0-1.0
|
243
|
+
}}
|
244
|
+
"""
|
245
|
+
|
246
|
+
try:
|
247
|
+
response = await self.llm_service.generate_response(prompt)
|
248
|
+
import re
|
249
|
+
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
250
|
+
if json_match:
|
251
|
+
return json.loads(json_match.group())
|
252
|
+
else:
|
253
|
+
return self._extract_kb_intent_from_llm_response(response, user_message)
|
254
|
+
except Exception as e:
|
255
|
+
return self._keyword_based_kb_analysis(user_message)
|
256
|
+
|
257
|
+
def _keyword_based_kb_analysis(self, user_message: str) -> Dict[str, Any]:
|
258
|
+
"""Fallback keyword-based KB intent analysis"""
|
259
|
+
content_lower = user_message.lower()
|
260
|
+
|
261
|
+
# Determine intent
|
262
|
+
if any(word in content_lower for word in ['ingest', 'upload', 'add document', 'import', 'load']):
|
263
|
+
intent = 'ingest_document'
|
264
|
+
elif any(word in content_lower for word in ['add text', 'ingest text', 'text to']):
|
265
|
+
intent = 'ingest_text'
|
266
|
+
elif any(word in content_lower for word in ['query', 'search', 'find', 'ask', 'what', 'how', 'where']):
|
267
|
+
intent = 'query_kb'
|
268
|
+
elif any(word in content_lower for word in ['create', 'new kb', 'make kb', 'setup']):
|
269
|
+
intent = 'create_kb'
|
270
|
+
elif any(word in content_lower for word in ['help', 'what can', 'how to']):
|
271
|
+
intent = 'help_request'
|
272
|
+
else:
|
273
|
+
intent = 'help_request'
|
274
|
+
|
275
|
+
# Extract KB names and documents
|
276
|
+
kb_names = self.extract_context_from_text(user_message, ContextType.KNOWLEDGE_BASE)
|
277
|
+
documents = self.extract_context_from_text(user_message, ContextType.DOCUMENT_NAME)
|
278
|
+
file_paths = self.extract_context_from_text(user_message, ContextType.FILE_PATH)
|
279
|
+
all_documents = documents + file_paths
|
280
|
+
|
281
|
+
# Extract query content
|
282
|
+
query_content = self._extract_query_from_kb_message(user_message) if intent == 'query_kb' else None
|
283
|
+
|
284
|
+
return {
|
285
|
+
"primary_intent": intent,
|
286
|
+
"kb_name": kb_names[0] if kb_names else None,
|
287
|
+
"document_references": all_documents,
|
288
|
+
"query_content": query_content,
|
289
|
+
"uses_context_reference": any(word in content_lower for word in ['this', 'that', 'it']),
|
290
|
+
"context_type": "previous_kb",
|
291
|
+
"operation_details": {
|
292
|
+
"query_type": "free-text",
|
293
|
+
"custom_metadata": {},
|
294
|
+
"source_type": "file"
|
295
|
+
},
|
296
|
+
"confidence": 0.7
|
297
|
+
}
|
298
|
+
|
299
|
+
async def process_message(self, message: AgentMessage, context: ExecutionContext = None) -> AgentMessage:
|
300
|
+
"""Process message with LLM-based KB intent detection and history context"""
|
301
|
+
self.memory.store_message(message)
|
302
|
+
|
303
|
+
try:
|
304
|
+
user_message = message.content
|
305
|
+
|
306
|
+
# Update conversation state
|
307
|
+
self.update_conversation_state(user_message)
|
308
|
+
|
309
|
+
# Get conversation context for LLM analysis
|
310
|
+
conversation_context = self._get_kb_conversation_context_summary()
|
311
|
+
|
312
|
+
# Use LLM to analyze intent
|
313
|
+
intent_analysis = await self._llm_analyze_kb_intent(user_message, conversation_context)
|
314
|
+
|
315
|
+
# Route request based on LLM analysis
|
316
|
+
response_content = await self._route_kb_with_llm_analysis(intent_analysis, user_message, context)
|
317
|
+
|
318
|
+
response = self.create_response(
|
319
|
+
content=response_content,
|
320
|
+
recipient_id=message.sender_id,
|
321
|
+
session_id=message.session_id,
|
322
|
+
conversation_id=message.conversation_id
|
323
|
+
)
|
324
|
+
|
325
|
+
self.memory.store_message(response)
|
326
|
+
return response
|
327
|
+
|
328
|
+
except Exception as e:
|
329
|
+
error_response = self.create_response(
|
330
|
+
content=f"Knowledge Base Agent error: {str(e)}",
|
331
|
+
recipient_id=message.sender_id,
|
332
|
+
message_type=MessageType.ERROR,
|
333
|
+
session_id=message.session_id,
|
334
|
+
conversation_id=message.conversation_id
|
335
|
+
)
|
336
|
+
return error_response
|
337
|
+
|
338
|
+
def _get_kb_conversation_context_summary(self) -> str:
|
339
|
+
"""Get KB conversation context summary"""
|
340
|
+
try:
|
341
|
+
recent_history = self.get_conversation_history_with_context(
|
342
|
+
limit=3,
|
343
|
+
context_types=[ContextType.KNOWLEDGE_BASE, ContextType.DOCUMENT_NAME]
|
344
|
+
)
|
345
|
+
|
346
|
+
context_summary = []
|
347
|
+
for msg in recent_history:
|
348
|
+
if msg.get('message_type') == 'user_input':
|
349
|
+
extracted_context = msg.get('extracted_context', {})
|
350
|
+
kb_names = extracted_context.get('knowledge_base', [])
|
351
|
+
docs = extracted_context.get('document_name', [])
|
352
|
+
|
353
|
+
if kb_names:
|
354
|
+
context_summary.append(f"Previous KB: {kb_names[0]}")
|
355
|
+
if docs:
|
356
|
+
context_summary.append(f"Previous document: {docs[0]}")
|
357
|
+
|
358
|
+
# Add current state
|
359
|
+
current_kb = self.get_current_knowledge_base()
|
360
|
+
if current_kb:
|
361
|
+
context_summary.append(f"Current KB: {current_kb}")
|
362
|
+
|
363
|
+
return "\n".join(context_summary) if context_summary else "No previous KB context"
|
364
|
+
except:
|
365
|
+
return "No previous KB context"
|
366
|
+
|
367
|
+
async def _route_kb_with_llm_analysis(self, intent_analysis: Dict[str, Any], user_message: str,
|
368
|
+
context: ExecutionContext) -> str:
|
369
|
+
"""Route KB request based on LLM intent analysis"""
|
370
|
+
|
371
|
+
primary_intent = intent_analysis.get("primary_intent", "help_request")
|
372
|
+
kb_name = intent_analysis.get("kb_name")
|
373
|
+
documents = intent_analysis.get("document_references", [])
|
374
|
+
query_content = intent_analysis.get("query_content")
|
375
|
+
uses_context = intent_analysis.get("uses_context_reference", False)
|
376
|
+
operation_details = intent_analysis.get("operation_details", {})
|
377
|
+
|
378
|
+
# Resolve context references if needed
|
379
|
+
if uses_context:
|
380
|
+
kb_name = kb_name or self.get_current_knowledge_base()
|
381
|
+
if not documents:
|
382
|
+
recent_doc = self.get_recent_document()
|
383
|
+
if recent_doc:
|
384
|
+
documents = [recent_doc]
|
385
|
+
|
386
|
+
# Route based on intent
|
387
|
+
if primary_intent == "help_request":
|
388
|
+
return await self._handle_kb_help_request(user_message)
|
389
|
+
elif primary_intent == "ingest_document":
|
390
|
+
return await self._handle_document_ingestion(kb_name, documents, operation_details, user_message)
|
391
|
+
elif primary_intent == "ingest_text":
|
392
|
+
return await self._handle_text_ingestion(kb_name, user_message, operation_details)
|
393
|
+
elif primary_intent == "query_kb":
|
394
|
+
return await self._handle_kb_query(kb_name, query_content, operation_details)
|
395
|
+
elif primary_intent == "create_kb":
|
396
|
+
return await self._handle_kb_creation(kb_name, user_message)
|
397
|
+
elif primary_intent == "manage_kb":
|
398
|
+
return await self._handle_kb_management(kb_name, user_message)
|
399
|
+
else:
|
400
|
+
return await self._handle_kb_help_request(user_message)
|
401
|
+
|
402
|
+
async def _handle_document_ingestion(self, kb_name: str, documents: List[str], operation_details: Dict[str, Any],
|
403
|
+
user_message: str) -> str:
|
404
|
+
"""Handle document ingestion with LLM analysis"""
|
405
|
+
|
406
|
+
# Resolve missing parameters
|
407
|
+
if not kb_name:
|
408
|
+
available_kbs = self.conversation_state.knowledge_bases
|
409
|
+
if available_kbs:
|
410
|
+
return f"I can ingest documents! Which knowledge base?\n\n" \
|
411
|
+
f"**Available KBs:**\n" + "\n".join([f"• {kb}" for kb in available_kbs]) + \
|
412
|
+
f"\n\nOr specify a new KB name."
|
413
|
+
else:
|
414
|
+
return "I can ingest documents into knowledge bases. Please specify:\n\n" \
|
415
|
+
"1. **Knowledge base name** (I'll create it if it doesn't exist)\n" \
|
416
|
+
"2. **Document path** or just tell me which document\n\n" \
|
417
|
+
"Example: 'Ingest research.pdf into ai_papers'"
|
418
|
+
|
419
|
+
if not documents:
|
420
|
+
return f"I'll ingest into the **{kb_name}** knowledge base. Which document would you like to add?\n\n" \
|
421
|
+
f"Please provide the document path or tell me the filename."
|
422
|
+
|
423
|
+
# Perform ingestion
|
424
|
+
document_path = documents[0]
|
425
|
+
|
426
|
+
try:
|
427
|
+
# Check if it's a URL or file path
|
428
|
+
if document_path.startswith('http'):
|
429
|
+
result = await self._ingest_web_content(kb_name, document_path)
|
430
|
+
operation_type = "Web content"
|
431
|
+
else:
|
432
|
+
result = await self._ingest_document(kb_name, document_path)
|
433
|
+
operation_type = "Document"
|
434
|
+
|
435
|
+
if result['success']:
|
436
|
+
return f"✅ **{operation_type} Ingestion Completed**\n\n" \
|
437
|
+
f"📄 **Source:** {document_path}\n" \
|
438
|
+
f"🗃️ **Knowledge Base:** {kb_name}\n" \
|
439
|
+
f"⏱️ **Status:** Successfully processed and indexed\n\n" \
|
440
|
+
f"You can now query this knowledge base! 🎉"
|
441
|
+
else:
|
442
|
+
return f"❌ **Ingestion failed:** {result['error']}"
|
443
|
+
|
444
|
+
except Exception as e:
|
445
|
+
return f"❌ **Error during ingestion:** {str(e)}"
|
446
|
+
|
447
|
+
async def _handle_text_ingestion(self, kb_name: str, user_message: str, operation_details: Dict[str, Any]) -> str:
|
448
|
+
"""Handle text ingestion with LLM analysis"""
|
449
|
+
|
450
|
+
if not kb_name:
|
451
|
+
return "I can ingest text into knowledge bases. Please specify which knowledge base to use."
|
452
|
+
|
453
|
+
# Extract text content from message (after removing command parts)
|
454
|
+
text_content = self._extract_text_for_ingestion(user_message)
|
455
|
+
|
456
|
+
if not text_content:
|
457
|
+
return f"I'll add text to the **{kb_name}** knowledge base. What text would you like me to ingest?"
|
458
|
+
|
459
|
+
try:
|
460
|
+
result = await self._ingest_text(kb_name, text_content)
|
461
|
+
|
462
|
+
if result['success']:
|
463
|
+
preview = text_content[:100] + "..." if len(text_content) > 100 else text_content
|
464
|
+
return f"✅ **Text Ingestion Completed**\n\n" \
|
465
|
+
f"📝 **Text Preview:** {preview}\n" \
|
466
|
+
f"🗃️ **Knowledge Base:** {kb_name}\n" \
|
467
|
+
f"📊 **Length:** {len(text_content)} characters\n\n" \
|
468
|
+
f"Text successfully indexed! 🎉"
|
469
|
+
else:
|
470
|
+
return f"❌ **Text ingestion failed:** {result['error']}"
|
471
|
+
|
472
|
+
except Exception as e:
|
473
|
+
return f"❌ **Error during text ingestion:** {str(e)}"
|
474
|
+
|
475
|
+
async def _handle_kb_query(self, kb_name: str, query_content: str, operation_details: Dict[str, Any]) -> str:
|
476
|
+
"""Handle KB queries with LLM analysis"""
|
477
|
+
|
478
|
+
# Resolve missing parameters
|
479
|
+
if not kb_name:
|
480
|
+
available_kbs = self.conversation_state.knowledge_bases
|
481
|
+
if available_kbs:
|
482
|
+
return f"I can query knowledge bases! Which one?\n\n" \
|
483
|
+
f"**Available KBs from our conversation:**\n" + \
|
484
|
+
"\n".join([f"• {kb}" for kb in available_kbs]) + \
|
485
|
+
f"\n\nExample: 'Query {available_kbs[0]}: {query_content or 'your question'}'"
|
486
|
+
else:
|
487
|
+
return "I can query knowledge bases, but I need to know which one to search.\n\n" \
|
488
|
+
"Please specify: `Query [kb_name]: [your question]`"
|
489
|
+
|
490
|
+
if not query_content:
|
491
|
+
return f"I'll search the **{kb_name}** knowledge base. What would you like me to find?"
|
492
|
+
|
493
|
+
try:
|
494
|
+
query_type = operation_details.get("query_type", "free-text")
|
495
|
+
result = await self._query_knowledge_base(kb_name, query_content, question_type=query_type)
|
496
|
+
|
497
|
+
if result['success']:
|
498
|
+
answer = result['answer']
|
499
|
+
source_count = len(result.get('source_details', []))
|
500
|
+
|
501
|
+
return f"🔍 **Query Results from {kb_name}**\n\n" \
|
502
|
+
f"**Question:** {query_content}\n\n" \
|
503
|
+
f"**Answer:**\n{answer}\n\n" \
|
504
|
+
f"📊 **Sources:** {source_count} relevant documents found"
|
505
|
+
else:
|
506
|
+
return f"❌ **Query failed:** {result['error']}"
|
507
|
+
|
508
|
+
except Exception as e:
|
509
|
+
return f"❌ **Error during query:** {str(e)}"
|
510
|
+
|
511
|
+
async def _handle_kb_creation(self, kb_name: str, user_message: str) -> str:
|
512
|
+
"""Handle KB creation requests"""
|
513
|
+
|
514
|
+
if not kb_name:
|
515
|
+
return "I can create knowledge bases! What would you like to name the new knowledge base?\n\n" \
|
516
|
+
"Example: 'Create a knowledge base called research_papers'"
|
517
|
+
|
518
|
+
# KB creation is implicit when first document is ingested
|
519
|
+
return f"Great! I'll create the **{kb_name}** knowledge base when you add the first document.\n\n" \
|
520
|
+
f"To get started:\n" \
|
521
|
+
f"• `Ingest document.pdf into {kb_name}`\n" \
|
522
|
+
f"• `Add text to {kb_name}: [your text content]`\n" \
|
523
|
+
f"• `Ingest https://example.com into {kb_name}`"
|
524
|
+
|
525
|
+
async def _handle_kb_management(self, kb_name: str, user_message: str) -> str:
|
526
|
+
"""Handle KB management requests"""
|
527
|
+
|
528
|
+
available_kbs = self.conversation_state.knowledge_bases
|
529
|
+
|
530
|
+
if not available_kbs:
|
531
|
+
return "No knowledge bases found in our conversation. Create one by ingesting your first document!"
|
532
|
+
|
533
|
+
response = f"📚 **Knowledge Base Management**\n\n"
|
534
|
+
response += f"**Available Knowledge Bases:**\n"
|
535
|
+
for kb in available_kbs:
|
536
|
+
response += f"• {kb}\n"
|
537
|
+
|
538
|
+
response += f"\n**Management Options:**\n"
|
539
|
+
response += f"• Query: `Query {available_kbs[0]}: your question`\n"
|
540
|
+
response += f"• Add docs: `Ingest file.pdf into {available_kbs[0]}`\n"
|
541
|
+
response += f"• Add text: `Add text to {available_kbs[0]}: content`\n"
|
542
|
+
|
543
|
+
return response
|
544
|
+
|
545
|
+
async def _handle_kb_help_request(self, user_message: str) -> str:
|
546
|
+
"""Handle KB help requests with conversation context"""
|
547
|
+
|
548
|
+
state = self.get_conversation_state()
|
549
|
+
|
550
|
+
response = ("I'm your Knowledge Base Agent! I can help you with:\n\n"
|
551
|
+
"📄 **Document Management**\n"
|
552
|
+
"- Ingest PDFs, DOCX, TXT, MD files\n"
|
553
|
+
"- Process web content from URLs\n"
|
554
|
+
"- Add text content directly\n\n"
|
555
|
+
"🔍 **Intelligent Search**\n"
|
556
|
+
"- Natural language queries\n"
|
557
|
+
"- Semantic similarity search\n"
|
558
|
+
"- Source attribution\n\n"
|
559
|
+
"🧠 **Smart Context Features**\n"
|
560
|
+
"- Remembers knowledge bases from conversation\n"
|
561
|
+
"- Understands 'that KB' and 'this document'\n"
|
562
|
+
"- Maintains working context\n\n")
|
563
|
+
|
564
|
+
# Add current context information
|
565
|
+
if state.knowledge_bases:
|
566
|
+
response += f"🗃️ **Your Knowledge Bases:**\n"
|
567
|
+
for kb in state.knowledge_bases[-3:]: # Show last 3
|
568
|
+
response += f" • {kb}\n"
|
569
|
+
|
570
|
+
if state.working_files:
|
571
|
+
response += f"\n📁 **Recent Documents:** {len(state.working_files)} files\n"
|
572
|
+
|
573
|
+
response += "\n💡 **Examples:**\n"
|
574
|
+
response += "• 'Ingest research.pdf into ai_papers'\n"
|
575
|
+
response += "• 'Query ai_papers: What are the main findings?'\n"
|
576
|
+
response += "• 'Add this text to the knowledge base: [content]'\n"
|
577
|
+
response += "\nI understand context from our conversation! 🚀"
|
578
|
+
|
579
|
+
return response
|
580
|
+
|
581
|
+
def _extract_query_from_kb_message(self, message: str) -> str:
|
582
|
+
"""Extract query content from KB message"""
|
583
|
+
# Look for colon pattern first
|
584
|
+
import re
|
585
|
+
colon_match = re.search(r':\s*(.+)', message)
|
586
|
+
if colon_match:
|
587
|
+
return colon_match.group(1).strip()
|
588
|
+
|
589
|
+
# Remove KB operation keywords
|
590
|
+
query_keywords = ['query', 'search', 'find', 'ask', 'what', 'how', 'where', 'when', 'why']
|
591
|
+
words = message.split()
|
592
|
+
filtered_words = []
|
593
|
+
|
594
|
+
for word in words:
|
595
|
+
if word.lower() not in query_keywords and not word.lower().endswith('_kb'):
|
596
|
+
filtered_words.append(word)
|
597
|
+
|
598
|
+
return ' '.join(filtered_words).strip()
|
599
|
+
|
600
|
+
def _extract_text_for_ingestion(self, message: str) -> str:
|
601
|
+
"""Extract text content for ingestion from message"""
|
602
|
+
# Look for colon pattern
|
603
|
+
import re
|
604
|
+
colon_match = re.search(r':\s*(.+)', message)
|
605
|
+
if colon_match:
|
606
|
+
return colon_match.group(1).strip()
|
607
|
+
|
608
|
+
# Remove ingestion keywords
|
609
|
+
ingest_keywords = ['ingest', 'add', 'upload', 'text', 'into', 'to']
|
610
|
+
words = message.split()
|
611
|
+
filtered_words = []
|
612
|
+
skip_next = False
|
613
|
+
|
614
|
+
for word in words:
|
615
|
+
if skip_next:
|
616
|
+
skip_next = False
|
617
|
+
continue
|
618
|
+
|
619
|
+
if word.lower() in ingest_keywords:
|
620
|
+
continue
|
621
|
+
elif word.lower().endswith('_kb') or word.lower().endswith('_base'):
|
622
|
+
continue
|
623
|
+
else:
|
624
|
+
filtered_words.append(word)
|
625
|
+
|
626
|
+
return ' '.join(filtered_words).strip()
|
627
|
+
|
628
|
+
def _extract_kb_intent_from_llm_response(self, llm_response: str, user_message: str) -> Dict[str, Any]:
|
629
|
+
"""Extract KB intent from non-JSON LLM response"""
|
630
|
+
content_lower = llm_response.lower()
|
631
|
+
|
632
|
+
if 'ingest' in content_lower or 'upload' in content_lower:
|
633
|
+
intent = 'ingest_document'
|
634
|
+
elif 'query' in content_lower or 'search' in content_lower:
|
635
|
+
intent = 'query_kb'
|
636
|
+
elif 'create' in content_lower:
|
637
|
+
intent = 'create_kb'
|
638
|
+
else:
|
639
|
+
intent = 'help_request'
|
640
|
+
|
641
|
+
return {
|
642
|
+
"primary_intent": intent,
|
643
|
+
"kb_name": None,
|
644
|
+
"document_references": [],
|
645
|
+
"query_content": None,
|
646
|
+
"uses_context_reference": False,
|
647
|
+
"context_type": "none",
|
648
|
+
"operation_details": {"query_type": "free-text"},
|
649
|
+
"confidence": 0.6
|
650
|
+
}
|
651
|
+
|
652
|
+
# Tool implementations
|
653
|
+
def _add_knowledge_base_tools(self):
|
654
|
+
"""Add all knowledge base related tools"""
|
655
|
+
|
656
|
+
# Document ingestion tool
|
657
|
+
self.add_tool(AgentTool(
|
658
|
+
name="ingest_document",
|
659
|
+
description="Ingest a document into the knowledge base",
|
660
|
+
function=self._ingest_document,
|
661
|
+
parameters_schema={
|
662
|
+
"type": "object",
|
663
|
+
"properties": {
|
664
|
+
"kb_name": {"type": "string", "description": "Knowledge base name"},
|
665
|
+
"doc_path": {"type": "string", "description": "Path to document file"},
|
666
|
+
"custom_meta": {"type": "object", "description": "Custom metadata for the document"}
|
667
|
+
},
|
668
|
+
"required": ["kb_name", "doc_path"]
|
669
|
+
}
|
670
|
+
))
|
671
|
+
|
672
|
+
# Text ingestion tool
|
673
|
+
self.add_tool(AgentTool(
|
674
|
+
name="ingest_text",
|
675
|
+
description="Ingest a Text string into the knowledge base",
|
676
|
+
function=self._ingest_text,
|
677
|
+
parameters_schema={
|
678
|
+
"type": "object",
|
679
|
+
"properties": {
|
680
|
+
"kb_name": {"type": "string", "description": "Knowledge base name"},
|
681
|
+
"input_text": {"type": "string", "description": "Text to Ingest"},
|
682
|
+
"custom_meta": {"type": "object", "description": "Custom metadata for the text"}
|
683
|
+
},
|
684
|
+
"required": ["kb_name", "input_text"]
|
685
|
+
}
|
686
|
+
))
|
687
|
+
|
688
|
+
# Knowledge base query tool
|
689
|
+
self.add_tool(AgentTool(
|
690
|
+
name="query_knowledge_base",
|
691
|
+
description="Query the knowledge base for information",
|
692
|
+
function=self._query_knowledge_base,
|
693
|
+
parameters_schema={
|
694
|
+
"type": "object",
|
695
|
+
"properties": {
|
696
|
+
"kb_name": {"type": "string", "description": "Knowledge base name"},
|
697
|
+
"query": {"type": "string", "description": "Query string"},
|
698
|
+
"question_type": {"type": "string",
|
699
|
+
"enum": ["free-text", "multi-select", "single-select", "yes-no"],
|
700
|
+
"default": "free-text"},
|
701
|
+
"option_list": {"type": "array", "items": {"type": "string"},
|
702
|
+
"description": "Options for multi/single select questions"},
|
703
|
+
"additional_prompt": {"type": "string", "description": "Additional prompt context"}
|
704
|
+
},
|
705
|
+
"required": ["kb_name", "query"]
|
706
|
+
}
|
707
|
+
))
|
708
|
+
|
709
|
+
# Web content ingestion tool
|
710
|
+
self.add_tool(AgentTool(
|
711
|
+
name="ingest_web_content",
|
712
|
+
description="Ingest content from web URLs",
|
713
|
+
function=self._ingest_web_content,
|
714
|
+
parameters_schema={
|
715
|
+
"type": "object",
|
716
|
+
"properties": {
|
717
|
+
"kb_name": {"type": "string", "description": "Knowledge base name"},
|
718
|
+
"url": {"type": "string", "description": "URL to ingest"},
|
719
|
+
"custom_meta": {"type": "object", "description": "Custom metadata"}
|
720
|
+
},
|
721
|
+
"required": ["kb_name", "url"]
|
722
|
+
}
|
723
|
+
))
|
724
|
+
|
725
|
+
# API call tool
|
726
|
+
self.add_tool(AgentTool(
|
727
|
+
name="call_api",
|
728
|
+
description="Make API calls to external services",
|
729
|
+
function=self._call_api,
|
730
|
+
parameters_schema={
|
731
|
+
"type": "object",
|
732
|
+
"properties": {
|
733
|
+
"url": {"type": "string", "description": "API endpoint URL"},
|
734
|
+
"method": {"type": "string", "enum": ["GET", "POST", "PUT", "DELETE"], "default": "GET"},
|
735
|
+
"headers": {"type": "object", "description": "Request headers"},
|
736
|
+
"payload": {"type": "object", "description": "Request payload for POST/PUT"},
|
737
|
+
"timeout": {"type": "number", "default": 30}
|
738
|
+
},
|
739
|
+
"required": ["url"]
|
740
|
+
}
|
741
|
+
))
|
742
|
+
|
743
|
+
async def _ingest_document(self, kb_name: str, doc_path: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
|
744
|
+
"""Ingest a document into the knowledge base"""
|
745
|
+
try:
|
746
|
+
if not Path(doc_path).exists():
|
747
|
+
return {"success": False, "error": f"File not found: {doc_path}"}
|
748
|
+
|
749
|
+
# Add metadata
|
750
|
+
if not custom_meta:
|
751
|
+
custom_meta = {}
|
752
|
+
|
753
|
+
custom_meta.update({
|
754
|
+
"ingestion_time": time.time(),
|
755
|
+
"agent_id": self.agent_id,
|
756
|
+
"file_path": doc_path
|
757
|
+
})
|
758
|
+
|
759
|
+
# Use existing persist_embeddings method
|
760
|
+
result = self.qdrant_service.persist_embeddings(
|
761
|
+
kb_name=kb_name,
|
762
|
+
doc_path=doc_path,
|
763
|
+
custom_meta=custom_meta
|
764
|
+
)
|
765
|
+
|
766
|
+
if result == 1:
|
767
|
+
return {
|
768
|
+
"success": True,
|
769
|
+
"message": f"Document {doc_path} successfully ingested into {kb_name}",
|
770
|
+
"kb_name": kb_name,
|
771
|
+
"file_path": doc_path
|
772
|
+
}
|
773
|
+
else:
|
774
|
+
return {
|
775
|
+
"success": False,
|
776
|
+
"error": f"Failed to ingest document {doc_path}"
|
777
|
+
}
|
778
|
+
|
779
|
+
except Exception as e:
|
780
|
+
return {"success": False, "error": str(e)}
|
781
|
+
|
782
|
+
async def _ingest_text(self, kb_name: str, input_text: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
|
783
|
+
"""Ingest text into the knowledge base"""
|
784
|
+
try:
|
785
|
+
# Add metadata
|
786
|
+
if not custom_meta:
|
787
|
+
custom_meta = {}
|
788
|
+
|
789
|
+
custom_meta.update({
|
790
|
+
"ingestion_time": time.time(),
|
791
|
+
"agent_id": self.agent_id,
|
792
|
+
})
|
793
|
+
|
794
|
+
document_list = self.qdrant_service.documents_from_text(input_text)
|
795
|
+
|
796
|
+
# Use existing persist_embeddings method
|
797
|
+
result = self.qdrant_service.persist_embeddings(
|
798
|
+
kb_name=kb_name,
|
799
|
+
doc_path=None,
|
800
|
+
documents=document_list,
|
801
|
+
custom_meta=custom_meta
|
802
|
+
)
|
803
|
+
|
804
|
+
if result == 1:
|
805
|
+
return {
|
806
|
+
"success": True,
|
807
|
+
"message": f"Text successfully ingested into {kb_name}",
|
808
|
+
"kb_name": kb_name,
|
809
|
+
}
|
810
|
+
else:
|
811
|
+
return {
|
812
|
+
"success": False,
|
813
|
+
"error": f"Failed to ingest text"
|
814
|
+
}
|
815
|
+
|
816
|
+
except Exception as e:
|
817
|
+
return {"success": False, "error": str(e)}
|
818
|
+
|
819
|
+
async def get_answer(self, kb_name: str, query: str, question_type: str = "free-text"):
|
820
|
+
"""Get answer from knowledge base"""
|
821
|
+
try:
|
822
|
+
# Use existing conduct_query method
|
823
|
+
answer, ans_dict_list = self.qdrant_service.conduct_query(
|
824
|
+
query=query,
|
825
|
+
kb_name=kb_name,
|
826
|
+
question_type=question_type
|
827
|
+
)
|
828
|
+
|
829
|
+
return {
|
830
|
+
"success": True,
|
831
|
+
"answer": answer,
|
832
|
+
}
|
833
|
+
|
834
|
+
except Exception as e:
|
835
|
+
return {"success": False, "error": str(e)}
|
836
|
+
|
837
|
+
async def _query_knowledge_base(self, kb_name: str, query: str, question_type: str = "free-text",
|
838
|
+
option_list: List[str] = None, additional_prompt: str = None) -> Dict[str, Any]:
|
839
|
+
"""Query the knowledge base"""
|
840
|
+
try:
|
841
|
+
# Use existing conduct_query method
|
842
|
+
answer, ans_dict_list = self.qdrant_service.conduct_query(
|
843
|
+
query=query,
|
844
|
+
kb_name=kb_name,
|
845
|
+
additional_prompt=additional_prompt,
|
846
|
+
question_type=question_type,
|
847
|
+
option_list=option_list
|
848
|
+
)
|
849
|
+
|
850
|
+
return {
|
851
|
+
"success": True,
|
852
|
+
"answer": answer,
|
853
|
+
"source_details": ans_dict_list,
|
854
|
+
"kb_name": kb_name,
|
855
|
+
"query": query,
|
856
|
+
"question_type": question_type
|
857
|
+
}
|
858
|
+
|
859
|
+
except Exception as e:
|
860
|
+
return {"success": False, "error": str(e)}
|
861
|
+
|
862
|
+
async def _ingest_web_content(self, kb_name: str, url: str, custom_meta: Dict[str, Any] = None) -> Dict[str, Any]:
|
863
|
+
"""Ingest content from web URLs"""
|
864
|
+
try:
|
865
|
+
# Fetch web content
|
866
|
+
response = requests.get(url, timeout=30)
|
867
|
+
response.raise_for_status()
|
868
|
+
|
869
|
+
# Create temporary file with content
|
870
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as tmp_file:
|
871
|
+
tmp_file.write(response.text)
|
872
|
+
tmp_path = tmp_file.name
|
873
|
+
|
874
|
+
# Add URL to metadata
|
875
|
+
if not custom_meta:
|
876
|
+
custom_meta = {}
|
877
|
+
|
878
|
+
custom_meta.update({
|
879
|
+
"source_url": url,
|
880
|
+
"fetch_time": time.time(),
|
881
|
+
"content_type": response.headers.get('content-type', 'unknown')
|
882
|
+
})
|
883
|
+
|
884
|
+
# Ingest the content
|
885
|
+
result = await self._ingest_document(kb_name, tmp_path, custom_meta)
|
886
|
+
|
887
|
+
# Clean up temporary file
|
888
|
+
Path(tmp_path).unlink()
|
889
|
+
|
890
|
+
if result["success"]:
|
891
|
+
result["url"] = url
|
892
|
+
result["message"] = f"Web content from {url} successfully ingested into {kb_name}"
|
893
|
+
|
894
|
+
return result
|
895
|
+
|
896
|
+
except Exception as e:
|
897
|
+
return {"success": False, "error": str(e)}
|
898
|
+
|
899
|
+
async def _call_api(self, url: str, method: str = "GET", headers: Dict[str, str] = None,
|
900
|
+
payload: Dict[str, Any] = None, timeout: int = 30) -> Dict[str, Any]:
|
901
|
+
"""Make API calls to external services"""
|
902
|
+
try:
|
903
|
+
# Prepare request
|
904
|
+
kwargs = {
|
905
|
+
"url": url,
|
906
|
+
"method": method.upper(),
|
907
|
+
"timeout": timeout
|
908
|
+
}
|
909
|
+
|
910
|
+
if headers:
|
911
|
+
kwargs["headers"] = headers
|
912
|
+
|
913
|
+
if payload and method.upper() in ["POST", "PUT"]:
|
914
|
+
kwargs["json"] = payload
|
915
|
+
|
916
|
+
# Make request
|
917
|
+
response = requests.request(**kwargs)
|
918
|
+
|
919
|
+
# Parse response
|
920
|
+
try:
|
921
|
+
response_data = response.json()
|
922
|
+
except:
|
923
|
+
response_data = response.text
|
924
|
+
|
925
|
+
return {
|
926
|
+
"success": True,
|
927
|
+
"status_code": response.status_code,
|
928
|
+
"response_data": response_data,
|
929
|
+
"headers": dict(response.headers),
|
930
|
+
"url": url,
|
931
|
+
"method": method.upper()
|
932
|
+
}
|
933
|
+
|
934
|
+
except Exception as e:
|
935
|
+
return {"success": False, "error": str(e)}
|