quantalogic 0.59.2__py3-none-any.whl → 0.60.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. quantalogic/agent.py +268 -24
  2. quantalogic/create_custom_agent.py +26 -78
  3. quantalogic/prompts/chat_system_prompt.j2 +10 -7
  4. quantalogic/prompts/code_2_system_prompt.j2 +190 -0
  5. quantalogic/prompts/code_system_prompt.j2 +142 -0
  6. quantalogic/prompts/doc_system_prompt.j2 +178 -0
  7. quantalogic/prompts/legal_2_system_prompt.j2 +218 -0
  8. quantalogic/prompts/legal_system_prompt.j2 +140 -0
  9. quantalogic/prompts/system_prompt.j2 +6 -2
  10. quantalogic/prompts/task_prompt.j2 +1 -1
  11. quantalogic/prompts/tools_prompt.j2 +2 -4
  12. quantalogic/prompts.py +23 -4
  13. quantalogic/server/agent_server.py +1 -1
  14. quantalogic/tools/__init__.py +2 -0
  15. quantalogic/tools/duckduckgo_search_tool.py +1 -0
  16. quantalogic/tools/execute_bash_command_tool.py +114 -57
  17. quantalogic/tools/file_tracker_tool.py +49 -0
  18. quantalogic/tools/google_packages/google_news_tool.py +3 -0
  19. quantalogic/tools/image_generation/dalle_e.py +89 -137
  20. quantalogic/tools/rag_tool/__init__.py +2 -9
  21. quantalogic/tools/rag_tool/document_rag_sources_.py +728 -0
  22. quantalogic/tools/rag_tool/ocr_pdf_markdown.py +144 -0
  23. quantalogic/tools/replace_in_file_tool.py +1 -1
  24. quantalogic/tools/terminal_capture_tool.py +293 -0
  25. quantalogic/tools/tool.py +4 -0
  26. quantalogic/tools/utilities/__init__.py +2 -0
  27. quantalogic/tools/utilities/download_file_tool.py +3 -5
  28. quantalogic/tools/utilities/llm_tool.py +283 -0
  29. quantalogic/tools/utilities/selenium_tool.py +296 -0
  30. quantalogic/tools/utilities/vscode_tool.py +1 -1
  31. quantalogic/tools/web_navigation/__init__.py +5 -0
  32. quantalogic/tools/web_navigation/web_tool.py +145 -0
  33. quantalogic/tools/write_file_tool.py +72 -36
  34. {quantalogic-0.59.2.dist-info → quantalogic-0.60.0.dist-info}/METADATA +2 -2
  35. {quantalogic-0.59.2.dist-info → quantalogic-0.60.0.dist-info}/RECORD +38 -29
  36. quantalogic/tools/rag_tool/document_metadata.py +0 -15
  37. quantalogic/tools/rag_tool/query_response.py +0 -20
  38. quantalogic/tools/rag_tool/rag_tool.py +0 -566
  39. quantalogic/tools/rag_tool/rag_tool_beta.py +0 -264
  40. {quantalogic-0.59.2.dist-info → quantalogic-0.60.0.dist-info}/LICENSE +0 -0
  41. {quantalogic-0.59.2.dist-info → quantalogic-0.60.0.dist-info}/WHEEL +0 -0
  42. {quantalogic-0.59.2.dist-info → quantalogic-0.60.0.dist-info}/entry_points.txt +0 -0
@@ -1,566 +0,0 @@
1
- """RAG (Retrieval Augmented Generation) Tool using LlamaIndex.
2
-
3
- This tool provides a flexible RAG implementation supporting multiple vector stores
4
- and embedding models, with configurable document processing options.
5
- """
6
-
7
- import datetime
8
- import json
9
- import os
10
- import time
11
- from enum import Enum
12
- from typing import Any, Dict, List, Optional, Tuple
13
-
14
- from loguru import logger
15
- from pydantic import BaseModel, Field
16
-
17
- from quantalogic.tools.tool import Tool, ToolArgument
18
-
19
- from .document_metadata import DocumentMetadata
20
- from .query_response import QueryResponse
21
-
22
-
23
- class EmbeddingType(str, Enum):
24
- """Supported embedding model types."""
25
- OPENAI = "openai"
26
- HUGGINGFACE = "huggingface"
27
- INSTRUCTOR = "instructor"
28
- BEDROCK = "bedrock"
29
-
30
- class VectorStoreType(str, Enum):
31
- """Supported vector store types."""
32
- CHROMA = "chroma"
33
- FAISS = "faiss"
34
-
35
- class RagToolConfig(BaseModel):
36
- """Configuration for RagTool."""
37
- persist_dir: Optional[str] = None
38
- chunk_size: int = Field(default=512)
39
- chunk_overlap: int = Field(default=50)
40
- similarity_top_k: int = Field(default=4)
41
- similarity_threshold: float = Field(default=0.6)
42
- api_key: Optional[str] = None
43
- vector_store: str = Field(default="chroma")
44
- embedding_model: str = Field(default="openai")
45
- document_paths: Optional[List[str]] = None
46
-
47
- class RagTool(Tool):
48
- """Enhanced RAG tool with advanced features and performance optimizations."""
49
-
50
- name: str = "rag_tool"
51
- description: str = (
52
- "Advanced RAG tool with metadata tracking, source attribution, "
53
- "and configurable processing options."
54
- )
55
- arguments: List[ToolArgument] = [
56
- ToolArgument(
57
- name="query",
58
- arg_type="string",
59
- description="Query string for searching the index",
60
- required=True,
61
- example="What is the main topic?",
62
- ),
63
- ToolArgument(
64
- name="top_k",
65
- arg_type="int",
66
- description="Number of top results to consider",
67
- required=False,
68
- example="5",
69
- ),
70
- ToolArgument(
71
- name="similarity_threshold",
72
- arg_type="float",
73
- description="Minimum similarity score (0-1)",
74
- required=False,
75
- example="0.7",
76
- ),
77
- ]
78
-
79
- def __init__(
80
- self,
81
- vector_store: str = "chroma",
82
- embedding_model: str = "openai",
83
- persist_dir: str = None,
84
- document_paths: List[str] = None,
85
- chunk_size: int = 512,
86
- chunk_overlap: int = 50,
87
- similarity_top_k: int = 4,
88
- similarity_threshold: float = 0.6,
89
- api_key: str = None,
90
- ):
91
- """Initialize the RAG tool with custom settings.
92
-
93
- Args:
94
- vector_store: Type of vector store to use
95
- embedding_model: Type of embedding model to use
96
- persist_dir: Directory to persist the index
97
- document_paths: List of paths to documents to index
98
- chunk_size: Size of text chunks for processing
99
- chunk_overlap: Overlap between chunks
100
- similarity_top_k: Number of similar chunks to retrieve
101
- similarity_threshold: Minimum similarity score threshold
102
- api_key: OpenAI API key for embeddings
103
- """
104
- super().__init__()
105
-
106
- # Initialize config
107
- self._config = RagToolConfig(
108
- persist_dir=persist_dir,
109
- chunk_size=chunk_size,
110
- chunk_overlap=chunk_overlap,
111
- similarity_top_k=similarity_top_k,
112
- similarity_threshold=similarity_threshold,
113
- api_key=api_key,
114
- vector_store=vector_store,
115
- embedding_model=embedding_model,
116
- document_paths=document_paths
117
- )
118
-
119
- # Store instance attributes without loading dependencies yet
120
- self._index = None
121
- self._vector_store = None
122
- self._storage_context = None
123
- self._document_metadata = {}
124
- self._dependencies_loaded = False
125
-
126
- def _load_dependencies(self):
127
- """Lazily load heavy dependencies."""
128
- if not self._dependencies_loaded:
129
- global VectorStoreIndex, Document, StorageContext, SentenceSplitter, VectorIndexRetriever
130
- global SimilarityPostprocessor, KeywordNodePostprocessor, Settings, SimpleNodeParser
131
- global OpenAIEmbedding, HuggingFaceEmbedding, InstructorEmbedding, BedrockEmbedding
132
- global ChromaVectorStore, FaissVectorStore, PersistentClient
133
-
134
- from chromadb import PersistentClient
135
- from llama_index.core import (
136
- Document,
137
- KeywordNodePostprocessor,
138
- SentenceSplitter,
139
- Settings,
140
- SimilarityPostprocessor,
141
- SimpleNodeParser,
142
- StorageContext,
143
- VectorIndexRetriever,
144
- VectorStoreIndex,
145
- )
146
- from llama_index.embeddings.bedrock import BedrockEmbedding
147
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
148
- from llama_index.embeddings.instructor import InstructorEmbedding
149
- from llama_index.embeddings.openai import OpenAIEmbedding
150
- from llama_index.vector_stores.chroma import ChromaVectorStore
151
- from llama_index.vector_stores.faiss import FaissVectorStore
152
-
153
- self._dependencies_loaded = True
154
- self._setup_components()
155
-
156
- def _setup_components(self):
157
- """Configure embeddings and settings."""
158
- self._load_dependencies() # Ensure dependencies are loaded
159
-
160
- # Create storage context
161
- self._storage_context = StorageContext.from_defaults(
162
- vector_store=self._vector_store
163
- )
164
-
165
- # Configure embeddings
166
- embed_model = self._setup_embedding_model(self._config.embedding_model)
167
-
168
- # Initialize settings with our configuration
169
- settings = Settings(
170
- embed_model=embed_model,
171
- node_parser=SimpleNodeParser.from_defaults(
172
- chunk_size=self._config.chunk_size,
173
- chunk_overlap=self._config.chunk_overlap
174
- ),
175
- chunk_size=self._config.chunk_size,
176
- chunk_overlap=self._config.chunk_overlap,
177
- )
178
- Settings.instance = settings
179
-
180
- # Load existing index if available
181
- if self._config.persist_dir and os.path.exists(self._config.persist_dir):
182
- try:
183
- storage_context = StorageContext.from_defaults(
184
- persist_dir=self._config.persist_dir
185
- )
186
- self._index = VectorStoreIndex.load_from_storage(
187
- storage_context,
188
- )
189
- logger.info(f"Loaded existing index from {self._config.persist_dir}")
190
- except Exception as e:
191
- logger.error(f"Error loading index: {str(e)}")
192
- self._index = None
193
-
194
- # Initialize vector store
195
- self._vector_store = self._setup_vector_store(
196
- self._config.vector_store,
197
- self._config.persist_dir
198
- )
199
-
200
- # Initialize with documents if provided
201
- if self._config.document_paths:
202
- self.initialize_with_documents(self._config.document_paths)
203
-
204
- def _setup_embedding_model(self, model_type: str) -> Any:
205
- """Set up the embedding model based on type.
206
-
207
- Args:
208
- model_type: Type of embedding model to use
209
-
210
- Returns:
211
- Configured embedding model instance
212
- """
213
- self._load_dependencies() # Ensure dependencies are loaded
214
- model_type = EmbeddingType(model_type.lower())
215
- if model_type == EmbeddingType.OPENAI:
216
- return OpenAIEmbedding(api_key=self._config.api_key)
217
- elif model_type == EmbeddingType.HUGGINGFACE:
218
- return HuggingFaceEmbedding()
219
- elif model_type == EmbeddingType.INSTRUCTOR:
220
- return InstructorEmbedding()
221
- elif model_type == EmbeddingType.BEDROCK:
222
- return BedrockEmbedding()
223
- else:
224
- raise ValueError(f"Unsupported embedding model type: {model_type}")
225
-
226
- def _setup_vector_store(self, store_type: str, persist_dir: str) -> Any:
227
- """Set up the vector store based on type.
228
-
229
- Args:
230
- store_type: Type of vector store to use
231
- persist_dir: Directory for persistence
232
-
233
- Returns:
234
- Configured vector store instance
235
- """
236
- self._load_dependencies() # Ensure dependencies are loaded
237
- store_type = VectorStoreType(store_type.lower())
238
-
239
- # Ensure the persist directory exists
240
- os.makedirs(persist_dir, exist_ok=True)
241
-
242
- if store_type == VectorStoreType.CHROMA:
243
- # Use PersistentClient with explicit settings
244
- chroma_persist_dir = os.path.join(persist_dir, "chroma")
245
- os.makedirs(chroma_persist_dir, exist_ok=True)
246
-
247
- chroma_client = PersistentClient(
248
- path=chroma_persist_dir,
249
- )
250
- collection = chroma_client.create_collection(
251
- name="default_collection",
252
- get_or_create=True
253
- )
254
- return ChromaVectorStore(
255
- chroma_collection=collection,
256
- )
257
- elif store_type == VectorStoreType.FAISS:
258
- return FaissVectorStore()
259
- else:
260
- raise ValueError(f"Unsupported vector store type: {store_type}")
261
-
262
- def _load_existing_index(self):
263
- """Load existing index and metadata if available."""
264
- self._load_dependencies() # Ensure dependencies are loaded
265
- try:
266
- metadata_path = os.path.join(self._config.persist_dir, "metadata.json")
267
- if os.path.exists(metadata_path):
268
- with open(metadata_path) as f:
269
- self._document_metadata = json.load(f)
270
-
271
- if os.path.exists(os.path.join(self._config.persist_dir, "docstore.json")):
272
- self._index = VectorStoreIndex.load_from_storage(
273
- storage_context=StorageContext.from_defaults(vector_store=self._vector_store),
274
- )
275
- logger.info(f"Loaded existing index from {self._config.persist_dir}")
276
- except Exception as e:
277
- logger.error(f"Failed to load existing index: {str(e)}")
278
- self._index = None
279
-
280
- def _save_metadata(self):
281
- """Save document metadata to disk."""
282
- try:
283
- metadata_path = os.path.join(self._config.persist_dir, "metadata.json")
284
- with open(metadata_path, 'w') as f:
285
- json.dump(self._document_metadata, f)
286
- except Exception as e:
287
- logger.error(f"Failed to save metadata: {str(e)}")
288
-
289
- def _process_document(self, doc_path: str) -> List[Dict[str, Any]]:
290
- """Process a document with advanced chunking and metadata extraction.
291
-
292
- Args:
293
- doc_path: Path to the document
294
-
295
- Returns:
296
- List of processed document chunks
297
- """
298
- self._load_dependencies() # Ensure dependencies are loaded
299
- file_stats = os.stat(doc_path)
300
- metadata = DocumentMetadata(
301
- source_path=doc_path,
302
- file_type=os.path.splitext(doc_path)[1],
303
- creation_date=datetime.fromtimestamp(file_stats.st_ctime),
304
- last_modified=datetime.fromtimestamp(file_stats.st_mtime),
305
- chunk_size=self._config.chunk_size,
306
- overlap=self._config.chunk_overlap,
307
- )
308
-
309
- # Load and chunk document
310
- from llama_index.core import SimpleDirectoryReader # Lazy import
311
- reader = SimpleDirectoryReader(
312
- input_files=[doc_path],
313
- file_metadata=lambda x: metadata.dict(),
314
- )
315
- documents = reader.load_data()
316
-
317
- # Store metadata
318
- self._document_metadata[doc_path] = metadata.dict()
319
- return documents
320
-
321
- def add_documents(self, document_path: str, custom_metadata: Optional[Dict[str, Any]] = None) -> bool:
322
- """Add documents with metadata tracking.
323
-
324
- Args:
325
- document_path: Path to document or directory
326
- custom_metadata: Optional custom metadata to associate
327
-
328
- Returns:
329
- bool: Success status
330
- """
331
- self._load_dependencies() # Ensure dependencies are loaded
332
- try:
333
- if not os.path.exists(document_path):
334
- logger.error(f"Document path does not exist: {document_path}")
335
- return False
336
-
337
- # Process documents with metadata
338
- documents = []
339
- if os.path.isfile(document_path):
340
- documents.extend(self._process_document(document_path))
341
- else:
342
- for root, _, files in os.walk(document_path):
343
- for file in files:
344
- doc_path = os.path.join(root, file)
345
- documents.extend(self._process_document(doc_path))
346
-
347
- # Update metadata with custom fields
348
- if custom_metadata:
349
- for doc_path in self._document_metadata:
350
- self._document_metadata[doc_path]["custom_metadata"] = custom_metadata
351
-
352
- # Create or update index
353
- if self._index is None:
354
- self._index = VectorStoreIndex.from_documents(
355
- documents,
356
- storage_context=StorageContext.from_defaults(vector_store=self._vector_store),
357
- )
358
- else:
359
- self._index.insert_nodes(documents)
360
-
361
- # Save metadata
362
- self._save_metadata()
363
- return True
364
-
365
- except Exception as e:
366
- logger.error(f"Error adding documents: {str(e)}")
367
- return False
368
-
369
- def _create_retriever(self, top_k: int) -> 'VectorIndexRetriever':
370
- """Create an optimized retriever for document search.
371
-
372
- Args:
373
- top_k: Number of results to retrieve
374
-
375
- Returns:
376
- Configured retriever instance
377
- """
378
- self._load_dependencies() # Ensure dependencies are loaded
379
- return VectorIndexRetriever(
380
- index=self._index,
381
- similarity_top_k=top_k * 2, # Get more candidates for better filtering
382
- filters=None
383
- )
384
-
385
- def _create_query_engine(self, retriever: 'VectorIndexRetriever', threshold: float):
386
- """Create a query engine with advanced processing.
387
-
388
- Args:
389
- retriever: Configured retriever instance
390
- threshold: Similarity threshold for filtering
391
-
392
- Returns:
393
- Configured query engine
394
- """
395
- self._load_dependencies() # Ensure dependencies are loaded
396
- return self._index.as_query_engine(
397
- retriever=retriever,
398
- node_postprocessors=[
399
- SimilarityPostprocessor(similarity_cutoff=threshold),
400
- KeywordNodePostprocessor(required_keywords=[])
401
- ],
402
- response_mode="compact",
403
- service_context=Settings.instance.service_context
404
- )
405
-
406
- def _process_source_nodes(
407
- self,
408
- source_nodes: List[Any],
409
- top_k: int
410
- ) -> Tuple[List[Dict[str, Any]], List[float]]:
411
- """Process and extract information from source nodes.
412
-
413
- Args:
414
- source_nodes: List of source nodes
415
- top_k: Number of top results to return
416
-
417
- Returns:
418
- Tuple of (sources, scores)
419
- """
420
- self._load_dependencies() # Ensure dependencies are loaded
421
- # Sort by score and take top_k
422
- nodes = sorted(
423
- source_nodes,
424
- key=lambda x: x.score if hasattr(x, 'score') else 0,
425
- reverse=True
426
- )[:top_k]
427
-
428
- sources = []
429
- scores = []
430
-
431
- for node in nodes:
432
- metadata = node.node.metadata
433
- source_info = {
434
- "content": node.node.text,
435
- "source_path": metadata.get("source_path", "Unknown"),
436
- "chunk_index": metadata.get("chunk_index", 0),
437
- "file_type": metadata.get("file_type", "Unknown"),
438
- "page_number": metadata.get("page_number", None),
439
- "section": metadata.get("section", None)
440
- }
441
- sources.append(source_info)
442
- scores.append(node.score if hasattr(node, 'score') else 0.0)
443
-
444
- return sources, scores
445
-
446
- def execute(
447
- self,
448
- query: str,
449
- top_k: Optional[int] = None,
450
- similarity_threshold: Optional[float] = None,
451
- ) -> QueryResponse:
452
- """Execute a query against the indexed documents.
453
-
454
- Args:
455
- query: Query string
456
- top_k: Optional number of results to return
457
- similarity_threshold: Optional similarity threshold
458
-
459
- Returns:
460
- QueryResponse with answer and sources
461
- """
462
- self._load_dependencies() # Ensure dependencies are loaded
463
- start_time = time.time()
464
- try:
465
- if not self._index:
466
- logger.error("No index available. Please add documents first.")
467
- return QueryResponse(
468
- answer="No documents have been indexed yet. Please add documents first.",
469
- sources=[],
470
- relevance_scores=[],
471
- total_chunks_searched=0,
472
- query_time_ms=round((time.time() - start_time) * 1000, 2)
473
- )
474
-
475
- # Configure parameters
476
- top_k = top_k or self._config.similarity_top_k
477
- threshold = similarity_threshold or self._config.similarity_threshold
478
-
479
- # Set up retrieval pipeline
480
- retriever = self._create_retriever(top_k)
481
- query_engine = self._create_query_engine(retriever, threshold)
482
-
483
- # Execute query
484
- response = query_engine.query(query)
485
-
486
- if not hasattr(response, 'source_nodes') or not response.source_nodes:
487
- logger.warning(
488
- f"Query '{query}' returned no results "
489
- f"(top_k={top_k}, threshold={threshold})"
490
- )
491
- return QueryResponse(
492
- answer="No relevant information found. Try adjusting the similarity threshold or increasing top_k.",
493
- sources=[],
494
- relevance_scores=[],
495
- total_chunks_searched=0,
496
- query_time_ms=round((time.time() - start_time) * 1000, 2)
497
- )
498
-
499
- # Process results
500
- sources, scores = self._process_source_nodes(
501
- response.source_nodes,
502
- top_k
503
- )
504
-
505
- return QueryResponse(
506
- answer=str(response),
507
- sources=sources,
508
- relevance_scores=scores,
509
- total_chunks_searched=len(response.source_nodes),
510
- query_time_ms=round((time.time() - start_time) * 1000, 2)
511
- )
512
-
513
- except Exception as e:
514
- logger.error(f"Error in RAG query: {str(e)}")
515
- return QueryResponse(
516
- answer=f"An error occurred while processing your query: {str(e)}",
517
- sources=[],
518
- relevance_scores=[],
519
- total_chunks_searched=0,
520
- query_time_ms=round((time.time() - start_time) * 1000, 2)
521
- )
522
-
523
- def initialize_with_documents(self, document_paths: List[str]) -> None:
524
- """Initialize the index with the given documents.
525
-
526
- Args:
527
- document_paths: List of paths to documents to index
528
- """
529
- self._load_dependencies() # Ensure dependencies are loaded
530
- try:
531
- all_documents = []
532
- for doc_path in document_paths:
533
- documents = self._process_document(doc_path)
534
- all_documents.extend(documents)
535
-
536
- if all_documents:
537
- self._index = VectorStoreIndex.from_documents(
538
- all_documents,
539
- storage_context=self._storage_context,
540
- )
541
-
542
- if self._config.persist_dir:
543
- self._storage_context.persist(persist_dir=self._config.persist_dir)
544
- logger.info(f"Created and persisted new index with {len(all_documents)} documents")
545
- else:
546
- logger.warning("No valid documents found in provided paths")
547
-
548
- except Exception as e:
549
- logger.error(f"Error initializing with documents: {str(e)}")
550
- raise RuntimeError(f"Failed to initialize with documents: {str(e)}")
551
-
552
-
553
- if __name__ == "__main__":
554
- # Example usage
555
- tool = RagTool(
556
- vector_store="chroma",
557
- embedding_model="openai",
558
- persist_dir="./storage/rag",
559
- document_paths=[
560
- "./docs/file1.pdf",
561
- "./docs/directory1"
562
- ]
563
- )
564
-
565
- # Query
566
- print(tool.execute("What is the main topic?"))