praisonaiagents 0.0.29__py3-none-any.whl → 0.0.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ import os
2
+ import logging
3
+ import uuid
4
+ import time
5
+ from .chunking import Chunking
6
+ from functools import cached_property
7
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class CustomMemory:
12
+ @classmethod
13
+ def from_config(cls, config):
14
+ from mem0 import Memory
15
+ return type('CustomMemory', (Memory,), {
16
+ '_add_to_vector_store': cls._add_to_vector_store
17
+ }).from_config(config)
18
+
19
+ @staticmethod
20
+ def _add_to_vector_store(self, messages, metadata, filters):
21
+ # Custom implementation that doesn't use LLM
22
+ parsed_messages = "\n".join([msg["content"] for msg in messages])
23
+
24
+ # Create a simple fact without using LLM
25
+ new_retrieved_facts = [parsed_messages]
26
+
27
+ # Process embeddings and continue with vector store operations
28
+ new_message_embeddings = {}
29
+ for new_mem in new_retrieved_facts:
30
+ messages_embeddings = self.embedding_model.embed(new_mem)
31
+ new_message_embeddings[new_mem] = messages_embeddings
32
+
33
+ # Create the memory
34
+ memory_id = self._create_memory(
35
+ data=parsed_messages,
36
+ existing_embeddings=new_message_embeddings,
37
+ metadata=metadata
38
+ )
39
+
40
+ return [{
41
+ "id": memory_id,
42
+ "memory": parsed_messages,
43
+ "event": "ADD"
44
+ }]
45
+
46
+ class Knowledge:
47
+ def __init__(self, config=None, verbose=None):
48
+ self._config = config
49
+ self._verbose = verbose or 0
50
+ os.environ['ANONYMIZED_TELEMETRY'] = 'False' # Chromadb
51
+
52
+ # Configure logging levels based on verbose setting
53
+ if not self._verbose:
54
+ # Suppress logs from all relevant dependencies
55
+ for logger_name in [
56
+ 'mem0',
57
+ 'chromadb',
58
+ 'local_persistent_hnsw',
59
+ '_client',
60
+ 'main'
61
+ ]:
62
+ logging.getLogger(logger_name).setLevel(logging.WARNING)
63
+
64
+ # Disable OpenAI API request logging
65
+ logging.getLogger('openai').setLevel(logging.WARNING)
66
+
67
+ # Set root logger to warning to catch any uncategorized logs
68
+ logging.getLogger().setLevel(logging.WARNING)
69
+
70
+ @cached_property
71
+ def _deps(self):
72
+ try:
73
+ from markitdown import MarkItDown
74
+ import chromadb
75
+ return {
76
+ 'chromadb': chromadb,
77
+ 'markdown': MarkItDown()
78
+ }
79
+ except ImportError:
80
+ raise ImportError(
81
+ "Required packages not installed. Please install using: "
82
+ 'pip install "praisonaiagents[knowledge]"'
83
+ )
84
+
85
+ @cached_property
86
+ def config(self):
87
+ # Generate unique collection name for each instance (only if not provided in config)
88
+ default_collection = f"test_{int(time.time())}_{str(uuid.uuid4())[:8]}"
89
+ persist_dir = ".praison"
90
+
91
+ # Create persistent client config
92
+ base_config = {
93
+ "vector_store": {
94
+ "provider": "chroma",
95
+ "config": {
96
+ "collection_name": default_collection,
97
+ "path": persist_dir,
98
+ "client": self._deps['chromadb'].PersistentClient(path=persist_dir),
99
+ "host": None,
100
+ "port": None
101
+ }
102
+ },
103
+ "version": "v1.1",
104
+ "custom_prompt": "Return {{\"facts\": [text]}} where text is the exact input provided and json response"
105
+ }
106
+
107
+ # If config is provided, merge it with base config
108
+ if self._config:
109
+ # Merge version if provided
110
+ if "version" in self._config:
111
+ base_config["version"] = self._config["version"]
112
+
113
+ # Merge vector_store config
114
+ if "vector_store" in self._config:
115
+ if "provider" in self._config["vector_store"]:
116
+ base_config["vector_store"]["provider"] = self._config["vector_store"]["provider"]
117
+
118
+ if "config" in self._config["vector_store"]:
119
+ config_copy = self._config["vector_store"]["config"].copy()
120
+ # Only exclude client as it's managed internally
121
+ if "client" in config_copy:
122
+ del config_copy["client"]
123
+ base_config["vector_store"]["config"].update(config_copy)
124
+
125
+ # Merge embedder config if provided
126
+ if "embedder" in self._config:
127
+ base_config["embedder"] = self._config["embedder"]
128
+
129
+ # Merge llm config if provided
130
+ if "llm" in self._config:
131
+ base_config["llm"] = self._config["llm"]
132
+ return base_config
133
+
134
+ @cached_property
135
+ def memory(self):
136
+ try:
137
+ return CustomMemory.from_config(self.config)
138
+ except (NotImplementedError, ValueError) as e:
139
+ if "list_collections" in str(e) or "Extra fields not allowed" in str(e):
140
+ # Keep only allowed fields
141
+ vector_store_config = {
142
+ "collection_name": self.config["vector_store"]["config"]["collection_name"],
143
+ "path": self.config["vector_store"]["config"]["path"]
144
+ }
145
+ self.config["vector_store"]["config"] = vector_store_config
146
+ from mem0 import Memory
147
+ return Memory.from_config(self.config)
148
+ raise
149
+
150
+ @cached_property
151
+ def markdown(self):
152
+ return self._deps['markdown']
153
+
154
+ @cached_property
155
+ def chunker(self):
156
+ return Chunking(
157
+ chunker_type='recursive',
158
+ chunk_size=512,
159
+ chunk_overlap=50
160
+ )
161
+
162
+ def _log(self, message, level=2):
163
+ """Internal logging helper"""
164
+ if self._verbose and self._verbose >= level:
165
+ logger.info(message)
166
+
167
+ def store(self, content, user_id=None, agent_id=None, run_id=None, metadata=None):
168
+ """Store a memory."""
169
+ try:
170
+ if isinstance(content, str):
171
+ if any(content.lower().endswith(ext) for ext in ['.pdf', '.doc', '.docx', '.txt']):
172
+ self._log(f"Content appears to be a file path, processing file: {content}")
173
+ return self.add(content, user_id=user_id, agent_id=agent_id, run_id=run_id, metadata=metadata)
174
+
175
+ content = content.strip()
176
+ if not content:
177
+ return []
178
+
179
+ result = self.memory.add(content, user_id=user_id, agent_id=agent_id, run_id=run_id, metadata=metadata)
180
+ self._log(f"Store operation result: {result}")
181
+ return result
182
+ except Exception as e:
183
+ logger.error(f"Error storing content: {str(e)}")
184
+ return []
185
+
186
+ def get_all(self, user_id=None, agent_id=None, run_id=None):
187
+ """Retrieve all memories."""
188
+ return self.memory.get_all(user_id=user_id, agent_id=agent_id, run_id=run_id)
189
+
190
+ def get(self, memory_id):
191
+ """Retrieve a specific memory by ID."""
192
+ return self.memory.get(memory_id)
193
+
194
+ def search(self, query, user_id=None, agent_id=None, run_id=None):
195
+ """Search for memories related to a query."""
196
+ return self.memory.search(query, user_id=user_id, agent_id=agent_id, run_id=run_id)
197
+
198
+ def update(self, memory_id, data):
199
+ """Update a memory."""
200
+ return self.memory.update(memory_id, data)
201
+
202
+ def history(self, memory_id):
203
+ """Get the history of changes for a memory."""
204
+ return self.memory.history(memory_id)
205
+
206
+ def delete(self, memory_id):
207
+ """Delete a memory."""
208
+ self.memory.delete(memory_id)
209
+
210
+ def delete_all(self, user_id=None, agent_id=None, run_id=None):
211
+ """Delete all memories."""
212
+ self.memory.delete_all(user_id=user_id, agent_id=agent_id, run_id=run_id)
213
+
214
+ def reset(self):
215
+ """Reset all memories."""
216
+ self.memory.reset()
217
+
218
+ def normalize_content(self, content):
219
+ """Normalize content for consistent storage."""
220
+ # Example normalization: strip whitespace, convert to lowercase
221
+ return content.strip().lower()
222
+
223
+ def add(self, file_path, user_id=None, agent_id=None, run_id=None, metadata=None):
224
+ """Read file content and store it in memory.
225
+
226
+ Args:
227
+ file_path: Can be:
228
+ - A string path to local file
229
+ - A URL string
230
+ - A list containing file paths and/or URLs
231
+ """
232
+ if isinstance(file_path, (list, tuple)):
233
+ results = []
234
+ for path in file_path:
235
+ result = self._process_single_input(path, user_id, agent_id, run_id, metadata)
236
+ results.extend(result.get('results', []))
237
+ return {'results': results, 'relations': []}
238
+
239
+ return self._process_single_input(file_path, user_id, agent_id, run_id, metadata)
240
+
241
+ def _process_single_input(self, input_path, user_id=None, agent_id=None, run_id=None, metadata=None):
242
+ """Process a single input which can be a file path or URL."""
243
+ try:
244
+ # Define supported file extensions
245
+ DOCUMENT_EXTENSIONS = {
246
+ 'document': ('.pdf', '.ppt', '.pptx', '.doc', '.docx', '.xls', '.xlsx'),
247
+ 'media': ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.mp3', '.wav', '.ogg', '.m4a'),
248
+ 'text': ('.txt', '.csv', '.json', '.xml', '.md', '.html', '.htm'),
249
+ 'archive': '.zip'
250
+ }
251
+
252
+ # Check if input is URL
253
+ if isinstance(input_path, str) and (input_path.startswith('http://') or input_path.startswith('https://')):
254
+ self._log(f"Processing URL: {input_path}")
255
+ raise NotImplementedError("URL processing not yet implemented")
256
+
257
+ # Check if input ends with any supported extension
258
+ is_supported_file = any(input_path.lower().endswith(ext)
259
+ for exts in DOCUMENT_EXTENSIONS.values()
260
+ for ext in (exts if isinstance(exts, tuple) else (exts,)))
261
+
262
+ if is_supported_file:
263
+ self._log(f"Processing as file path: {input_path}")
264
+ if not os.path.exists(input_path):
265
+ logger.error(f"File not found: {input_path}")
266
+ raise FileNotFoundError(f"File not found: {input_path}")
267
+
268
+ file_ext = '.' + input_path.lower().split('.')[-1] # Get extension reliably
269
+
270
+ # Process file based on type
271
+ if file_ext in DOCUMENT_EXTENSIONS['text']:
272
+ with open(input_path, 'r', encoding='utf-8') as file:
273
+ content = file.read().strip()
274
+ if not content:
275
+ raise ValueError("Empty text file")
276
+ memories = [self.normalize_content(content)]
277
+ else:
278
+ # Use MarkItDown for documents and media
279
+ result = self.markdown.convert(input_path)
280
+ content = result.text_content
281
+ if not content:
282
+ raise ValueError("No content could be extracted from file")
283
+ chunks = self.chunker.chunk(content)
284
+ memories = [chunk.text.strip() if hasattr(chunk, 'text') else str(chunk).strip()
285
+ for chunk in chunks if chunk]
286
+
287
+ # Set metadata for file
288
+ if not metadata:
289
+ metadata = {}
290
+ metadata['file_type'] = file_ext.lstrip('.')
291
+ metadata['filename'] = os.path.basename(input_path)
292
+ else:
293
+ # Treat as raw text content only if no file extension
294
+ memories = [self.normalize_content(input_path)]
295
+
296
+ # Create progress display
297
+ progress = Progress(
298
+ SpinnerColumn(),
299
+ TextColumn("[progress.description]{task.description}"),
300
+ BarColumn(),
301
+ TaskProgressColumn(),
302
+ transient=True
303
+ )
304
+
305
+ # Store memories with progress bar
306
+ all_results = []
307
+ with progress:
308
+ store_task = progress.add_task(f"Adding to Knowledge from {os.path.basename(input_path)}", total=len(memories))
309
+ for memory in memories:
310
+ if memory:
311
+ memory_result = self.store(memory, user_id=user_id, agent_id=agent_id,
312
+ run_id=run_id, metadata=metadata)
313
+ if memory_result:
314
+ all_results.extend(memory_result.get('results', []))
315
+ progress.advance(store_task)
316
+
317
+ return {'results': all_results, 'relations': []}
318
+
319
+ except Exception as e:
320
+ logger.error(f"Error processing input {input_path}: {str(e)}", exc_info=True)
321
+ raise
@@ -0,0 +1,20 @@
1
+ import logging
2
+ import warnings
3
+
4
+ # Suppress all relevant logs at module level
5
+ logging.getLogger("litellm").setLevel(logging.ERROR)
6
+ logging.getLogger("openai").setLevel(logging.ERROR)
7
+ logging.getLogger("httpx").setLevel(logging.ERROR)
8
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
9
+ logging.getLogger("pydantic").setLevel(logging.ERROR)
10
+
11
+ # Suppress pydantic warnings
12
+ warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
13
+
14
+ # Configure logging to suppress all INFO messages
15
+ logging.basicConfig(level=logging.WARNING)
16
+
17
+ # Import after suppressing warnings
18
+ from .llm import LLM, LLMContextLengthExceededException
19
+
20
+ __all__ = ["LLM", "LLMContextLengthExceededException"]