quantalogic 0.59.3__py3-none-any.whl → 0.61.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantalogic/agent.py +268 -24
- quantalogic/agent_config.py +5 -5
- quantalogic/agent_factory.py +2 -2
- quantalogic/codeact/__init__.py +0 -0
- quantalogic/codeact/agent.py +499 -0
- quantalogic/codeact/cli.py +232 -0
- quantalogic/codeact/constants.py +9 -0
- quantalogic/codeact/events.py +78 -0
- quantalogic/codeact/llm_util.py +76 -0
- quantalogic/codeact/prompts/error_format.j2 +11 -0
- quantalogic/codeact/prompts/generate_action.j2 +26 -0
- quantalogic/codeact/prompts/generate_program.j2 +39 -0
- quantalogic/codeact/prompts/response_format.j2 +11 -0
- quantalogic/codeact/tools_manager.py +135 -0
- quantalogic/codeact/utils.py +135 -0
- quantalogic/coding_agent.py +2 -2
- quantalogic/create_custom_agent.py +26 -78
- quantalogic/prompts/chat_system_prompt.j2 +10 -7
- quantalogic/prompts/code_2_system_prompt.j2 +190 -0
- quantalogic/prompts/code_system_prompt.j2 +142 -0
- quantalogic/prompts/doc_system_prompt.j2 +178 -0
- quantalogic/prompts/legal_2_system_prompt.j2 +218 -0
- quantalogic/prompts/legal_system_prompt.j2 +140 -0
- quantalogic/prompts/system_prompt.j2 +6 -2
- quantalogic/prompts/tools_prompt.j2 +2 -4
- quantalogic/prompts.py +23 -4
- quantalogic/python_interpreter/__init__.py +23 -0
- quantalogic/python_interpreter/assignment_visitors.py +63 -0
- quantalogic/python_interpreter/base_visitors.py +20 -0
- quantalogic/python_interpreter/class_visitors.py +22 -0
- quantalogic/python_interpreter/comprehension_visitors.py +172 -0
- quantalogic/python_interpreter/context_visitors.py +59 -0
- quantalogic/python_interpreter/control_flow_visitors.py +88 -0
- quantalogic/python_interpreter/exception_visitors.py +109 -0
- quantalogic/python_interpreter/exceptions.py +39 -0
- quantalogic/python_interpreter/execution.py +202 -0
- quantalogic/python_interpreter/function_utils.py +386 -0
- quantalogic/python_interpreter/function_visitors.py +209 -0
- quantalogic/python_interpreter/import_visitors.py +28 -0
- quantalogic/python_interpreter/interpreter_core.py +358 -0
- quantalogic/python_interpreter/literal_visitors.py +74 -0
- quantalogic/python_interpreter/misc_visitors.py +148 -0
- quantalogic/python_interpreter/operator_visitors.py +108 -0
- quantalogic/python_interpreter/scope.py +10 -0
- quantalogic/python_interpreter/visit_handlers.py +110 -0
- quantalogic/server/agent_server.py +1 -1
- quantalogic/tools/__init__.py +6 -3
- quantalogic/tools/action_gen.py +366 -0
- quantalogic/tools/duckduckgo_search_tool.py +1 -0
- quantalogic/tools/execute_bash_command_tool.py +114 -57
- quantalogic/tools/file_tracker_tool.py +49 -0
- quantalogic/tools/google_packages/google_news_tool.py +3 -0
- quantalogic/tools/image_generation/dalle_e.py +89 -137
- quantalogic/tools/python_tool.py +13 -0
- quantalogic/tools/rag_tool/__init__.py +2 -9
- quantalogic/tools/rag_tool/document_rag_sources_.py +728 -0
- quantalogic/tools/rag_tool/ocr_pdf_markdown.py +144 -0
- quantalogic/tools/replace_in_file_tool.py +1 -1
- quantalogic/tools/{search_definition_names.py → search_definition_names_tool.py} +2 -2
- quantalogic/tools/terminal_capture_tool.py +293 -0
- quantalogic/tools/tool.py +120 -22
- quantalogic/tools/utilities/__init__.py +2 -0
- quantalogic/tools/utilities/download_file_tool.py +3 -5
- quantalogic/tools/utilities/llm_tool.py +283 -0
- quantalogic/tools/utilities/selenium_tool.py +296 -0
- quantalogic/tools/utilities/vscode_tool.py +1 -1
- quantalogic/tools/web_navigation/__init__.py +5 -0
- quantalogic/tools/web_navigation/web_tool.py +145 -0
- quantalogic/tools/write_file_tool.py +72 -36
- quantalogic/utils/__init__.py +0 -1
- quantalogic/utils/test_python_interpreter.py +119 -0
- {quantalogic-0.59.3.dist-info → quantalogic-0.61.0.dist-info}/METADATA +7 -2
- {quantalogic-0.59.3.dist-info → quantalogic-0.61.0.dist-info}/RECORD +76 -35
- quantalogic/tools/rag_tool/document_metadata.py +0 -15
- quantalogic/tools/rag_tool/query_response.py +0 -20
- quantalogic/tools/rag_tool/rag_tool.py +0 -566
- quantalogic/tools/rag_tool/rag_tool_beta.py +0 -264
- quantalogic/utils/python_interpreter.py +0 -905
- {quantalogic-0.59.3.dist-info → quantalogic-0.61.0.dist-info}/LICENSE +0 -0
- {quantalogic-0.59.3.dist-info → quantalogic-0.61.0.dist-info}/WHEEL +0 -0
- {quantalogic-0.59.3.dist-info → quantalogic-0.61.0.dist-info}/entry_points.txt +0 -0
@@ -1,566 +0,0 @@
|
|
1
|
-
"""RAG (Retrieval Augmented Generation) Tool using LlamaIndex.
|
2
|
-
|
3
|
-
This tool provides a flexible RAG implementation supporting multiple vector stores
|
4
|
-
and embedding models, with configurable document processing options.
|
5
|
-
"""
|
6
|
-
|
7
|
-
import datetime
|
8
|
-
import json
|
9
|
-
import os
|
10
|
-
import time
|
11
|
-
from enum import Enum
|
12
|
-
from typing import Any, Dict, List, Optional, Tuple
|
13
|
-
|
14
|
-
from loguru import logger
|
15
|
-
from pydantic import BaseModel, Field
|
16
|
-
|
17
|
-
from quantalogic.tools.tool import Tool, ToolArgument
|
18
|
-
|
19
|
-
from .document_metadata import DocumentMetadata
|
20
|
-
from .query_response import QueryResponse
|
21
|
-
|
22
|
-
|
23
|
-
class EmbeddingType(str, Enum):
|
24
|
-
"""Supported embedding model types."""
|
25
|
-
OPENAI = "openai"
|
26
|
-
HUGGINGFACE = "huggingface"
|
27
|
-
INSTRUCTOR = "instructor"
|
28
|
-
BEDROCK = "bedrock"
|
29
|
-
|
30
|
-
class VectorStoreType(str, Enum):
|
31
|
-
"""Supported vector store types."""
|
32
|
-
CHROMA = "chroma"
|
33
|
-
FAISS = "faiss"
|
34
|
-
|
35
|
-
class RagToolConfig(BaseModel):
|
36
|
-
"""Configuration for RagTool."""
|
37
|
-
persist_dir: Optional[str] = None
|
38
|
-
chunk_size: int = Field(default=512)
|
39
|
-
chunk_overlap: int = Field(default=50)
|
40
|
-
similarity_top_k: int = Field(default=4)
|
41
|
-
similarity_threshold: float = Field(default=0.6)
|
42
|
-
api_key: Optional[str] = None
|
43
|
-
vector_store: str = Field(default="chroma")
|
44
|
-
embedding_model: str = Field(default="openai")
|
45
|
-
document_paths: Optional[List[str]] = None
|
46
|
-
|
47
|
-
class RagTool(Tool):
|
48
|
-
"""Enhanced RAG tool with advanced features and performance optimizations."""
|
49
|
-
|
50
|
-
name: str = "rag_tool"
|
51
|
-
description: str = (
|
52
|
-
"Advanced RAG tool with metadata tracking, source attribution, "
|
53
|
-
"and configurable processing options."
|
54
|
-
)
|
55
|
-
arguments: List[ToolArgument] = [
|
56
|
-
ToolArgument(
|
57
|
-
name="query",
|
58
|
-
arg_type="string",
|
59
|
-
description="Query string for searching the index",
|
60
|
-
required=True,
|
61
|
-
example="What is the main topic?",
|
62
|
-
),
|
63
|
-
ToolArgument(
|
64
|
-
name="top_k",
|
65
|
-
arg_type="int",
|
66
|
-
description="Number of top results to consider",
|
67
|
-
required=False,
|
68
|
-
example="5",
|
69
|
-
),
|
70
|
-
ToolArgument(
|
71
|
-
name="similarity_threshold",
|
72
|
-
arg_type="float",
|
73
|
-
description="Minimum similarity score (0-1)",
|
74
|
-
required=False,
|
75
|
-
example="0.7",
|
76
|
-
),
|
77
|
-
]
|
78
|
-
|
79
|
-
def __init__(
|
80
|
-
self,
|
81
|
-
vector_store: str = "chroma",
|
82
|
-
embedding_model: str = "openai",
|
83
|
-
persist_dir: str = None,
|
84
|
-
document_paths: List[str] = None,
|
85
|
-
chunk_size: int = 512,
|
86
|
-
chunk_overlap: int = 50,
|
87
|
-
similarity_top_k: int = 4,
|
88
|
-
similarity_threshold: float = 0.6,
|
89
|
-
api_key: str = None,
|
90
|
-
):
|
91
|
-
"""Initialize the RAG tool with custom settings.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
vector_store: Type of vector store to use
|
95
|
-
embedding_model: Type of embedding model to use
|
96
|
-
persist_dir: Directory to persist the index
|
97
|
-
document_paths: List of paths to documents to index
|
98
|
-
chunk_size: Size of text chunks for processing
|
99
|
-
chunk_overlap: Overlap between chunks
|
100
|
-
similarity_top_k: Number of similar chunks to retrieve
|
101
|
-
similarity_threshold: Minimum similarity score threshold
|
102
|
-
api_key: OpenAI API key for embeddings
|
103
|
-
"""
|
104
|
-
super().__init__()
|
105
|
-
|
106
|
-
# Initialize config
|
107
|
-
self._config = RagToolConfig(
|
108
|
-
persist_dir=persist_dir,
|
109
|
-
chunk_size=chunk_size,
|
110
|
-
chunk_overlap=chunk_overlap,
|
111
|
-
similarity_top_k=similarity_top_k,
|
112
|
-
similarity_threshold=similarity_threshold,
|
113
|
-
api_key=api_key,
|
114
|
-
vector_store=vector_store,
|
115
|
-
embedding_model=embedding_model,
|
116
|
-
document_paths=document_paths
|
117
|
-
)
|
118
|
-
|
119
|
-
# Store instance attributes without loading dependencies yet
|
120
|
-
self._index = None
|
121
|
-
self._vector_store = None
|
122
|
-
self._storage_context = None
|
123
|
-
self._document_metadata = {}
|
124
|
-
self._dependencies_loaded = False
|
125
|
-
|
126
|
-
def _load_dependencies(self):
|
127
|
-
"""Lazily load heavy dependencies."""
|
128
|
-
if not self._dependencies_loaded:
|
129
|
-
global VectorStoreIndex, Document, StorageContext, SentenceSplitter, VectorIndexRetriever
|
130
|
-
global SimilarityPostprocessor, KeywordNodePostprocessor, Settings, SimpleNodeParser
|
131
|
-
global OpenAIEmbedding, HuggingFaceEmbedding, InstructorEmbedding, BedrockEmbedding
|
132
|
-
global ChromaVectorStore, FaissVectorStore, PersistentClient
|
133
|
-
|
134
|
-
from chromadb import PersistentClient
|
135
|
-
from llama_index.core import (
|
136
|
-
Document,
|
137
|
-
KeywordNodePostprocessor,
|
138
|
-
SentenceSplitter,
|
139
|
-
Settings,
|
140
|
-
SimilarityPostprocessor,
|
141
|
-
SimpleNodeParser,
|
142
|
-
StorageContext,
|
143
|
-
VectorIndexRetriever,
|
144
|
-
VectorStoreIndex,
|
145
|
-
)
|
146
|
-
from llama_index.embeddings.bedrock import BedrockEmbedding
|
147
|
-
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
148
|
-
from llama_index.embeddings.instructor import InstructorEmbedding
|
149
|
-
from llama_index.embeddings.openai import OpenAIEmbedding
|
150
|
-
from llama_index.vector_stores.chroma import ChromaVectorStore
|
151
|
-
from llama_index.vector_stores.faiss import FaissVectorStore
|
152
|
-
|
153
|
-
self._dependencies_loaded = True
|
154
|
-
self._setup_components()
|
155
|
-
|
156
|
-
def _setup_components(self):
|
157
|
-
"""Configure embeddings and settings."""
|
158
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
159
|
-
|
160
|
-
# Create storage context
|
161
|
-
self._storage_context = StorageContext.from_defaults(
|
162
|
-
vector_store=self._vector_store
|
163
|
-
)
|
164
|
-
|
165
|
-
# Configure embeddings
|
166
|
-
embed_model = self._setup_embedding_model(self._config.embedding_model)
|
167
|
-
|
168
|
-
# Initialize settings with our configuration
|
169
|
-
settings = Settings(
|
170
|
-
embed_model=embed_model,
|
171
|
-
node_parser=SimpleNodeParser.from_defaults(
|
172
|
-
chunk_size=self._config.chunk_size,
|
173
|
-
chunk_overlap=self._config.chunk_overlap
|
174
|
-
),
|
175
|
-
chunk_size=self._config.chunk_size,
|
176
|
-
chunk_overlap=self._config.chunk_overlap,
|
177
|
-
)
|
178
|
-
Settings.instance = settings
|
179
|
-
|
180
|
-
# Load existing index if available
|
181
|
-
if self._config.persist_dir and os.path.exists(self._config.persist_dir):
|
182
|
-
try:
|
183
|
-
storage_context = StorageContext.from_defaults(
|
184
|
-
persist_dir=self._config.persist_dir
|
185
|
-
)
|
186
|
-
self._index = VectorStoreIndex.load_from_storage(
|
187
|
-
storage_context,
|
188
|
-
)
|
189
|
-
logger.info(f"Loaded existing index from {self._config.persist_dir}")
|
190
|
-
except Exception as e:
|
191
|
-
logger.error(f"Error loading index: {str(e)}")
|
192
|
-
self._index = None
|
193
|
-
|
194
|
-
# Initialize vector store
|
195
|
-
self._vector_store = self._setup_vector_store(
|
196
|
-
self._config.vector_store,
|
197
|
-
self._config.persist_dir
|
198
|
-
)
|
199
|
-
|
200
|
-
# Initialize with documents if provided
|
201
|
-
if self._config.document_paths:
|
202
|
-
self.initialize_with_documents(self._config.document_paths)
|
203
|
-
|
204
|
-
def _setup_embedding_model(self, model_type: str) -> Any:
|
205
|
-
"""Set up the embedding model based on type.
|
206
|
-
|
207
|
-
Args:
|
208
|
-
model_type: Type of embedding model to use
|
209
|
-
|
210
|
-
Returns:
|
211
|
-
Configured embedding model instance
|
212
|
-
"""
|
213
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
214
|
-
model_type = EmbeddingType(model_type.lower())
|
215
|
-
if model_type == EmbeddingType.OPENAI:
|
216
|
-
return OpenAIEmbedding(api_key=self._config.api_key)
|
217
|
-
elif model_type == EmbeddingType.HUGGINGFACE:
|
218
|
-
return HuggingFaceEmbedding()
|
219
|
-
elif model_type == EmbeddingType.INSTRUCTOR:
|
220
|
-
return InstructorEmbedding()
|
221
|
-
elif model_type == EmbeddingType.BEDROCK:
|
222
|
-
return BedrockEmbedding()
|
223
|
-
else:
|
224
|
-
raise ValueError(f"Unsupported embedding model type: {model_type}")
|
225
|
-
|
226
|
-
def _setup_vector_store(self, store_type: str, persist_dir: str) -> Any:
|
227
|
-
"""Set up the vector store based on type.
|
228
|
-
|
229
|
-
Args:
|
230
|
-
store_type: Type of vector store to use
|
231
|
-
persist_dir: Directory for persistence
|
232
|
-
|
233
|
-
Returns:
|
234
|
-
Configured vector store instance
|
235
|
-
"""
|
236
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
237
|
-
store_type = VectorStoreType(store_type.lower())
|
238
|
-
|
239
|
-
# Ensure the persist directory exists
|
240
|
-
os.makedirs(persist_dir, exist_ok=True)
|
241
|
-
|
242
|
-
if store_type == VectorStoreType.CHROMA:
|
243
|
-
# Use PersistentClient with explicit settings
|
244
|
-
chroma_persist_dir = os.path.join(persist_dir, "chroma")
|
245
|
-
os.makedirs(chroma_persist_dir, exist_ok=True)
|
246
|
-
|
247
|
-
chroma_client = PersistentClient(
|
248
|
-
path=chroma_persist_dir,
|
249
|
-
)
|
250
|
-
collection = chroma_client.create_collection(
|
251
|
-
name="default_collection",
|
252
|
-
get_or_create=True
|
253
|
-
)
|
254
|
-
return ChromaVectorStore(
|
255
|
-
chroma_collection=collection,
|
256
|
-
)
|
257
|
-
elif store_type == VectorStoreType.FAISS:
|
258
|
-
return FaissVectorStore()
|
259
|
-
else:
|
260
|
-
raise ValueError(f"Unsupported vector store type: {store_type}")
|
261
|
-
|
262
|
-
def _load_existing_index(self):
|
263
|
-
"""Load existing index and metadata if available."""
|
264
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
265
|
-
try:
|
266
|
-
metadata_path = os.path.join(self._config.persist_dir, "metadata.json")
|
267
|
-
if os.path.exists(metadata_path):
|
268
|
-
with open(metadata_path) as f:
|
269
|
-
self._document_metadata = json.load(f)
|
270
|
-
|
271
|
-
if os.path.exists(os.path.join(self._config.persist_dir, "docstore.json")):
|
272
|
-
self._index = VectorStoreIndex.load_from_storage(
|
273
|
-
storage_context=StorageContext.from_defaults(vector_store=self._vector_store),
|
274
|
-
)
|
275
|
-
logger.info(f"Loaded existing index from {self._config.persist_dir}")
|
276
|
-
except Exception as e:
|
277
|
-
logger.error(f"Failed to load existing index: {str(e)}")
|
278
|
-
self._index = None
|
279
|
-
|
280
|
-
def _save_metadata(self):
|
281
|
-
"""Save document metadata to disk."""
|
282
|
-
try:
|
283
|
-
metadata_path = os.path.join(self._config.persist_dir, "metadata.json")
|
284
|
-
with open(metadata_path, 'w') as f:
|
285
|
-
json.dump(self._document_metadata, f)
|
286
|
-
except Exception as e:
|
287
|
-
logger.error(f"Failed to save metadata: {str(e)}")
|
288
|
-
|
289
|
-
def _process_document(self, doc_path: str) -> List[Dict[str, Any]]:
|
290
|
-
"""Process a document with advanced chunking and metadata extraction.
|
291
|
-
|
292
|
-
Args:
|
293
|
-
doc_path: Path to the document
|
294
|
-
|
295
|
-
Returns:
|
296
|
-
List of processed document chunks
|
297
|
-
"""
|
298
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
299
|
-
file_stats = os.stat(doc_path)
|
300
|
-
metadata = DocumentMetadata(
|
301
|
-
source_path=doc_path,
|
302
|
-
file_type=os.path.splitext(doc_path)[1],
|
303
|
-
creation_date=datetime.fromtimestamp(file_stats.st_ctime),
|
304
|
-
last_modified=datetime.fromtimestamp(file_stats.st_mtime),
|
305
|
-
chunk_size=self._config.chunk_size,
|
306
|
-
overlap=self._config.chunk_overlap,
|
307
|
-
)
|
308
|
-
|
309
|
-
# Load and chunk document
|
310
|
-
from llama_index.core import SimpleDirectoryReader # Lazy import
|
311
|
-
reader = SimpleDirectoryReader(
|
312
|
-
input_files=[doc_path],
|
313
|
-
file_metadata=lambda x: metadata.dict(),
|
314
|
-
)
|
315
|
-
documents = reader.load_data()
|
316
|
-
|
317
|
-
# Store metadata
|
318
|
-
self._document_metadata[doc_path] = metadata.dict()
|
319
|
-
return documents
|
320
|
-
|
321
|
-
def add_documents(self, document_path: str, custom_metadata: Optional[Dict[str, Any]] = None) -> bool:
|
322
|
-
"""Add documents with metadata tracking.
|
323
|
-
|
324
|
-
Args:
|
325
|
-
document_path: Path to document or directory
|
326
|
-
custom_metadata: Optional custom metadata to associate
|
327
|
-
|
328
|
-
Returns:
|
329
|
-
bool: Success status
|
330
|
-
"""
|
331
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
332
|
-
try:
|
333
|
-
if not os.path.exists(document_path):
|
334
|
-
logger.error(f"Document path does not exist: {document_path}")
|
335
|
-
return False
|
336
|
-
|
337
|
-
# Process documents with metadata
|
338
|
-
documents = []
|
339
|
-
if os.path.isfile(document_path):
|
340
|
-
documents.extend(self._process_document(document_path))
|
341
|
-
else:
|
342
|
-
for root, _, files in os.walk(document_path):
|
343
|
-
for file in files:
|
344
|
-
doc_path = os.path.join(root, file)
|
345
|
-
documents.extend(self._process_document(doc_path))
|
346
|
-
|
347
|
-
# Update metadata with custom fields
|
348
|
-
if custom_metadata:
|
349
|
-
for doc_path in self._document_metadata:
|
350
|
-
self._document_metadata[doc_path]["custom_metadata"] = custom_metadata
|
351
|
-
|
352
|
-
# Create or update index
|
353
|
-
if self._index is None:
|
354
|
-
self._index = VectorStoreIndex.from_documents(
|
355
|
-
documents,
|
356
|
-
storage_context=StorageContext.from_defaults(vector_store=self._vector_store),
|
357
|
-
)
|
358
|
-
else:
|
359
|
-
self._index.insert_nodes(documents)
|
360
|
-
|
361
|
-
# Save metadata
|
362
|
-
self._save_metadata()
|
363
|
-
return True
|
364
|
-
|
365
|
-
except Exception as e:
|
366
|
-
logger.error(f"Error adding documents: {str(e)}")
|
367
|
-
return False
|
368
|
-
|
369
|
-
def _create_retriever(self, top_k: int) -> 'VectorIndexRetriever':
|
370
|
-
"""Create an optimized retriever for document search.
|
371
|
-
|
372
|
-
Args:
|
373
|
-
top_k: Number of results to retrieve
|
374
|
-
|
375
|
-
Returns:
|
376
|
-
Configured retriever instance
|
377
|
-
"""
|
378
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
379
|
-
return VectorIndexRetriever(
|
380
|
-
index=self._index,
|
381
|
-
similarity_top_k=top_k * 2, # Get more candidates for better filtering
|
382
|
-
filters=None
|
383
|
-
)
|
384
|
-
|
385
|
-
def _create_query_engine(self, retriever: 'VectorIndexRetriever', threshold: float):
|
386
|
-
"""Create a query engine with advanced processing.
|
387
|
-
|
388
|
-
Args:
|
389
|
-
retriever: Configured retriever instance
|
390
|
-
threshold: Similarity threshold for filtering
|
391
|
-
|
392
|
-
Returns:
|
393
|
-
Configured query engine
|
394
|
-
"""
|
395
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
396
|
-
return self._index.as_query_engine(
|
397
|
-
retriever=retriever,
|
398
|
-
node_postprocessors=[
|
399
|
-
SimilarityPostprocessor(similarity_cutoff=threshold),
|
400
|
-
KeywordNodePostprocessor(required_keywords=[])
|
401
|
-
],
|
402
|
-
response_mode="compact",
|
403
|
-
service_context=Settings.instance.service_context
|
404
|
-
)
|
405
|
-
|
406
|
-
def _process_source_nodes(
|
407
|
-
self,
|
408
|
-
source_nodes: List[Any],
|
409
|
-
top_k: int
|
410
|
-
) -> Tuple[List[Dict[str, Any]], List[float]]:
|
411
|
-
"""Process and extract information from source nodes.
|
412
|
-
|
413
|
-
Args:
|
414
|
-
source_nodes: List of source nodes
|
415
|
-
top_k: Number of top results to return
|
416
|
-
|
417
|
-
Returns:
|
418
|
-
Tuple of (sources, scores)
|
419
|
-
"""
|
420
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
421
|
-
# Sort by score and take top_k
|
422
|
-
nodes = sorted(
|
423
|
-
source_nodes,
|
424
|
-
key=lambda x: x.score if hasattr(x, 'score') else 0,
|
425
|
-
reverse=True
|
426
|
-
)[:top_k]
|
427
|
-
|
428
|
-
sources = []
|
429
|
-
scores = []
|
430
|
-
|
431
|
-
for node in nodes:
|
432
|
-
metadata = node.node.metadata
|
433
|
-
source_info = {
|
434
|
-
"content": node.node.text,
|
435
|
-
"source_path": metadata.get("source_path", "Unknown"),
|
436
|
-
"chunk_index": metadata.get("chunk_index", 0),
|
437
|
-
"file_type": metadata.get("file_type", "Unknown"),
|
438
|
-
"page_number": metadata.get("page_number", None),
|
439
|
-
"section": metadata.get("section", None)
|
440
|
-
}
|
441
|
-
sources.append(source_info)
|
442
|
-
scores.append(node.score if hasattr(node, 'score') else 0.0)
|
443
|
-
|
444
|
-
return sources, scores
|
445
|
-
|
446
|
-
def execute(
|
447
|
-
self,
|
448
|
-
query: str,
|
449
|
-
top_k: Optional[int] = None,
|
450
|
-
similarity_threshold: Optional[float] = None,
|
451
|
-
) -> QueryResponse:
|
452
|
-
"""Execute a query against the indexed documents.
|
453
|
-
|
454
|
-
Args:
|
455
|
-
query: Query string
|
456
|
-
top_k: Optional number of results to return
|
457
|
-
similarity_threshold: Optional similarity threshold
|
458
|
-
|
459
|
-
Returns:
|
460
|
-
QueryResponse with answer and sources
|
461
|
-
"""
|
462
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
463
|
-
start_time = time.time()
|
464
|
-
try:
|
465
|
-
if not self._index:
|
466
|
-
logger.error("No index available. Please add documents first.")
|
467
|
-
return QueryResponse(
|
468
|
-
answer="No documents have been indexed yet. Please add documents first.",
|
469
|
-
sources=[],
|
470
|
-
relevance_scores=[],
|
471
|
-
total_chunks_searched=0,
|
472
|
-
query_time_ms=round((time.time() - start_time) * 1000, 2)
|
473
|
-
)
|
474
|
-
|
475
|
-
# Configure parameters
|
476
|
-
top_k = top_k or self._config.similarity_top_k
|
477
|
-
threshold = similarity_threshold or self._config.similarity_threshold
|
478
|
-
|
479
|
-
# Set up retrieval pipeline
|
480
|
-
retriever = self._create_retriever(top_k)
|
481
|
-
query_engine = self._create_query_engine(retriever, threshold)
|
482
|
-
|
483
|
-
# Execute query
|
484
|
-
response = query_engine.query(query)
|
485
|
-
|
486
|
-
if not hasattr(response, 'source_nodes') or not response.source_nodes:
|
487
|
-
logger.warning(
|
488
|
-
f"Query '{query}' returned no results "
|
489
|
-
f"(top_k={top_k}, threshold={threshold})"
|
490
|
-
)
|
491
|
-
return QueryResponse(
|
492
|
-
answer="No relevant information found. Try adjusting the similarity threshold or increasing top_k.",
|
493
|
-
sources=[],
|
494
|
-
relevance_scores=[],
|
495
|
-
total_chunks_searched=0,
|
496
|
-
query_time_ms=round((time.time() - start_time) * 1000, 2)
|
497
|
-
)
|
498
|
-
|
499
|
-
# Process results
|
500
|
-
sources, scores = self._process_source_nodes(
|
501
|
-
response.source_nodes,
|
502
|
-
top_k
|
503
|
-
)
|
504
|
-
|
505
|
-
return QueryResponse(
|
506
|
-
answer=str(response),
|
507
|
-
sources=sources,
|
508
|
-
relevance_scores=scores,
|
509
|
-
total_chunks_searched=len(response.source_nodes),
|
510
|
-
query_time_ms=round((time.time() - start_time) * 1000, 2)
|
511
|
-
)
|
512
|
-
|
513
|
-
except Exception as e:
|
514
|
-
logger.error(f"Error in RAG query: {str(e)}")
|
515
|
-
return QueryResponse(
|
516
|
-
answer=f"An error occurred while processing your query: {str(e)}",
|
517
|
-
sources=[],
|
518
|
-
relevance_scores=[],
|
519
|
-
total_chunks_searched=0,
|
520
|
-
query_time_ms=round((time.time() - start_time) * 1000, 2)
|
521
|
-
)
|
522
|
-
|
523
|
-
def initialize_with_documents(self, document_paths: List[str]) -> None:
|
524
|
-
"""Initialize the index with the given documents.
|
525
|
-
|
526
|
-
Args:
|
527
|
-
document_paths: List of paths to documents to index
|
528
|
-
"""
|
529
|
-
self._load_dependencies() # Ensure dependencies are loaded
|
530
|
-
try:
|
531
|
-
all_documents = []
|
532
|
-
for doc_path in document_paths:
|
533
|
-
documents = self._process_document(doc_path)
|
534
|
-
all_documents.extend(documents)
|
535
|
-
|
536
|
-
if all_documents:
|
537
|
-
self._index = VectorStoreIndex.from_documents(
|
538
|
-
all_documents,
|
539
|
-
storage_context=self._storage_context,
|
540
|
-
)
|
541
|
-
|
542
|
-
if self._config.persist_dir:
|
543
|
-
self._storage_context.persist(persist_dir=self._config.persist_dir)
|
544
|
-
logger.info(f"Created and persisted new index with {len(all_documents)} documents")
|
545
|
-
else:
|
546
|
-
logger.warning("No valid documents found in provided paths")
|
547
|
-
|
548
|
-
except Exception as e:
|
549
|
-
logger.error(f"Error initializing with documents: {str(e)}")
|
550
|
-
raise RuntimeError(f"Failed to initialize with documents: {str(e)}")
|
551
|
-
|
552
|
-
|
553
|
-
if __name__ == "__main__":
|
554
|
-
# Example usage
|
555
|
-
tool = RagTool(
|
556
|
-
vector_store="chroma",
|
557
|
-
embedding_model="openai",
|
558
|
-
persist_dir="./storage/rag",
|
559
|
-
document_paths=[
|
560
|
-
"./docs/file1.pdf",
|
561
|
-
"./docs/directory1"
|
562
|
-
]
|
563
|
-
)
|
564
|
-
|
565
|
-
# Query
|
566
|
-
print(tool.execute("What is the main topic?"))
|