noesium 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. noesium/core/__init__.py +4 -0
  2. noesium/core/agent/__init__.py +14 -0
  3. noesium/core/agent/base.py +227 -0
  4. noesium/core/consts.py +6 -0
  5. noesium/core/goalith/conflict/conflict.py +104 -0
  6. noesium/core/goalith/conflict/detector.py +53 -0
  7. noesium/core/goalith/decomposer/__init__.py +6 -0
  8. noesium/core/goalith/decomposer/base.py +46 -0
  9. noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
  10. noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
  11. noesium/core/goalith/decomposer/prompts.py +140 -0
  12. noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
  13. noesium/core/goalith/errors.py +22 -0
  14. noesium/core/goalith/goalgraph/graph.py +526 -0
  15. noesium/core/goalith/goalgraph/node.py +179 -0
  16. noesium/core/goalith/replanner/base.py +31 -0
  17. noesium/core/goalith/replanner/replanner.py +36 -0
  18. noesium/core/goalith/service.py +26 -0
  19. noesium/core/llm/__init__.py +154 -0
  20. noesium/core/llm/base.py +152 -0
  21. noesium/core/llm/litellm.py +528 -0
  22. noesium/core/llm/llamacpp.py +487 -0
  23. noesium/core/llm/message.py +184 -0
  24. noesium/core/llm/ollama.py +459 -0
  25. noesium/core/llm/openai.py +520 -0
  26. noesium/core/llm/openrouter.py +89 -0
  27. noesium/core/llm/prompt.py +551 -0
  28. noesium/core/memory/__init__.py +11 -0
  29. noesium/core/memory/base.py +464 -0
  30. noesium/core/memory/memu/__init__.py +24 -0
  31. noesium/core/memory/memu/config/__init__.py +26 -0
  32. noesium/core/memory/memu/config/activity/config.py +46 -0
  33. noesium/core/memory/memu/config/event/config.py +46 -0
  34. noesium/core/memory/memu/config/markdown_config.py +241 -0
  35. noesium/core/memory/memu/config/profile/config.py +48 -0
  36. noesium/core/memory/memu/llm_adapter.py +129 -0
  37. noesium/core/memory/memu/memory/__init__.py +31 -0
  38. noesium/core/memory/memu/memory/actions/__init__.py +40 -0
  39. noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
  40. noesium/core/memory/memu/memory/actions/base_action.py +342 -0
  41. noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
  42. noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
  43. noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
  44. noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
  45. noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
  46. noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
  47. noesium/core/memory/memu/memory/embeddings.py +130 -0
  48. noesium/core/memory/memu/memory/file_manager.py +306 -0
  49. noesium/core/memory/memu/memory/memory_agent.py +578 -0
  50. noesium/core/memory/memu/memory/recall_agent.py +376 -0
  51. noesium/core/memory/memu/memory_store.py +628 -0
  52. noesium/core/memory/models.py +149 -0
  53. noesium/core/msgbus/__init__.py +12 -0
  54. noesium/core/msgbus/base.py +395 -0
  55. noesium/core/orchestrix/__init__.py +0 -0
  56. noesium/core/py.typed +0 -0
  57. noesium/core/routing/__init__.py +20 -0
  58. noesium/core/routing/base.py +66 -0
  59. noesium/core/routing/router.py +241 -0
  60. noesium/core/routing/strategies/__init__.py +9 -0
  61. noesium/core/routing/strategies/dynamic_complexity.py +361 -0
  62. noesium/core/routing/strategies/self_assessment.py +147 -0
  63. noesium/core/routing/types.py +38 -0
  64. noesium/core/toolify/__init__.py +39 -0
  65. noesium/core/toolify/base.py +360 -0
  66. noesium/core/toolify/config.py +138 -0
  67. noesium/core/toolify/mcp_integration.py +275 -0
  68. noesium/core/toolify/registry.py +214 -0
  69. noesium/core/toolify/toolkits/__init__.py +1 -0
  70. noesium/core/tracing/__init__.py +37 -0
  71. noesium/core/tracing/langgraph_hooks.py +308 -0
  72. noesium/core/tracing/opik_tracing.py +144 -0
  73. noesium/core/tracing/token_tracker.py +166 -0
  74. noesium/core/utils/__init__.py +10 -0
  75. noesium/core/utils/logging.py +172 -0
  76. noesium/core/utils/statistics.py +12 -0
  77. noesium/core/utils/typing.py +17 -0
  78. noesium/core/vector_store/__init__.py +79 -0
  79. noesium/core/vector_store/base.py +94 -0
  80. noesium/core/vector_store/pgvector.py +304 -0
  81. noesium/core/vector_store/weaviate.py +383 -0
  82. noesium-0.1.0.dist-info/METADATA +525 -0
  83. noesium-0.1.0.dist-info/RECORD +86 -0
  84. noesium-0.1.0.dist-info/WHEEL +5 -0
  85. noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
  86. noesium-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,528 @@
1
+ """
2
+ LiteLLM provider for Cogents.
3
+
4
+ This module provides:
5
+ - Unified interface to multiple LLM providers via LiteLLM
6
+ - Chat completion using various models through LiteLLM
7
+ - Image understanding using vision models
8
+ - Instructor integration for structured output
9
+
10
+ - Support for OpenAI, Anthropic, Cohere, Ollama, and many other providers
11
+ """
12
+
13
+ import base64
14
+ import os
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
18
+
19
+ import litellm
20
+ from instructor import Instructor, Mode, patch
21
+
22
+ from noesium.core.llm.base import BaseLLMClient
23
+ from noesium.core.tracing import configure_opik, estimate_token_usage, get_token_tracker, is_opik_enabled
24
+ from noesium.core.utils.logging import get_logger
25
+
26
+ # Only import OPIK if tracing is enabled
27
+ OPIK_AVAILABLE = False
28
+ track = lambda func: func # Default no-op decorator
29
+ if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
30
+ try:
31
+ from opik import track
32
+
33
+ OPIK_AVAILABLE = True
34
+ except ImportError:
35
+ pass
36
+
37
+
38
+ T = TypeVar("T")
39
+
40
+ logger = get_logger(__name__)
41
+
42
+
43
+ class LLMClient(BaseLLMClient):
44
+ """Client for interacting with multiple LLM services via LiteLLM."""
45
+
46
+ def __init__(
47
+ self,
48
+ base_url: Optional[str] = None,
49
+ api_key: Optional[str] = None,
50
+ instructor: bool = False,
51
+ chat_model: Optional[str] = None,
52
+ vision_model: Optional[str] = None,
53
+ embed_model: Optional[str] = None,
54
+ **kwargs,
55
+ ):
56
+ """
57
+ Initialize the LiteLLM client.
58
+
59
+ Args:
60
+ base_url: Base URL for custom API endpoints (optional)
61
+ api_key: API key for the provider (can be set via environment variables)
62
+ instructor: Whether to enable instructor for structured output
63
+ chat_model: Model to use for chat completions (e.g., "gpt-3.5-turbo", "claude-3-sonnet")
64
+ vision_model: Model to use for vision tasks (e.g., "gpt-4-vision-preview", "claude-3-sonnet")
65
+ **kwargs: Additional arguments
66
+ """
67
+ super().__init__(**kwargs)
68
+ # Configure Opik tracing for observability only if enabled
69
+ if OPIK_AVAILABLE:
70
+ configure_opik()
71
+ self._opik_provider = "litellm"
72
+ else:
73
+ self._opik_provider = None
74
+
75
+ # Set base URL if provided
76
+ self.base_url = base_url
77
+ if self.base_url:
78
+ litellm.api_base = self.base_url
79
+
80
+ # Set API key if provided
81
+ self.api_key = api_key
82
+ if self.api_key:
83
+ litellm.api_key = self.api_key
84
+
85
+ # Model configurations
86
+ self.chat_model = chat_model or os.getenv("LITELLM_CHAT_MODEL", "gpt-3.5-turbo")
87
+ self.vision_model = vision_model or os.getenv("LITELLM_VISION_MODEL", "gpt-4-vision-preview")
88
+ self.embed_model = embed_model or os.getenv("LITELLM_EMBED_MODEL", "text-embedding-ada-002")
89
+
90
+ # Initialize instructor if requested
91
+ self.instructor = None
92
+ if instructor:
93
+ try:
94
+ from openai import OpenAI
95
+
96
+ # Create a mock client for instructor
97
+ mock_client = OpenAI(
98
+ api_key="litellm",
99
+ base_url="http://localhost:8000", # LiteLLM proxy default
100
+ )
101
+ patched_client = patch(mock_client, mode=Mode.JSON)
102
+ self.instructor = Instructor(
103
+ client=patched_client,
104
+ create=patched_client.chat.completions.create,
105
+ mode=Mode.JSON,
106
+ )
107
+ except ImportError:
108
+ logger.warning("OpenAI package not available, structured completion will not work")
109
+
110
+ # Configure LiteLLM settings
111
+ litellm.drop_params = True # Drop unsupported parameters
112
+ litellm.set_verbose = False # Reduce verbosity
113
+
114
+ @track
115
+ def completion(
116
+ self,
117
+ messages: List[Dict[str, str]],
118
+ temperature: float = 0.7,
119
+ max_tokens: Optional[int] = None,
120
+ stream: bool = False,
121
+ **kwargs,
122
+ ) -> Union[str, Dict[str, Any]]:
123
+ """
124
+ Generate chat completion using LiteLLM.
125
+
126
+ Args:
127
+ messages: List of message dictionaries with 'role' and 'content' keys
128
+ temperature: Sampling temperature (0.0 to 2.0)
129
+ max_tokens: Maximum tokens to generate
130
+ stream: Whether to stream the response
131
+ **kwargs: Additional arguments
132
+
133
+ Returns:
134
+ Generated text response or streaming response
135
+ """
136
+ # Add Opik tracing metadata
137
+ opik_metadata = {}
138
+ if is_opik_enabled():
139
+ opik_metadata = {
140
+ "provider": self._opik_provider,
141
+ "model": self.chat_model,
142
+ "temperature": temperature,
143
+ "max_tokens": max_tokens,
144
+ "stream": stream,
145
+ "call_type": "completion",
146
+ }
147
+
148
+ try:
149
+ if self.debug:
150
+ logger.debug(f"Chat completion: {messages}")
151
+
152
+ response = litellm.completion(
153
+ model=self.chat_model,
154
+ messages=messages,
155
+ temperature=temperature,
156
+ max_tokens=max_tokens,
157
+ stream=stream,
158
+ **kwargs,
159
+ )
160
+
161
+ if stream:
162
+ return response
163
+ else:
164
+ # Extract token usage if available
165
+ try:
166
+ if hasattr(response, "usage") and response.usage:
167
+ usage_data = {
168
+ "prompt_tokens": response.usage.prompt_tokens,
169
+ "completion_tokens": response.usage.completion_tokens,
170
+ "total_tokens": response.usage.total_tokens,
171
+ "model_name": self.chat_model,
172
+ "call_type": "completion",
173
+ }
174
+ from noesium.core.tracing import TokenUsage
175
+
176
+ usage = TokenUsage(**usage_data)
177
+ get_token_tracker().record_usage(usage)
178
+ logger.debug(f"Token usage for completion: {usage.total_tokens} tokens")
179
+ else:
180
+ # Fallback to estimation
181
+ prompt_text = "\n".join([msg.get("content", "") for msg in messages])
182
+ completion_text = response.choices[0].message.content
183
+ usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "completion")
184
+ get_token_tracker().record_usage(usage)
185
+ logger.debug(f"Estimated token usage for completion: {usage.total_tokens} tokens")
186
+ except Exception as e:
187
+ logger.debug(f"Could not track token usage: {e}")
188
+
189
+ return response.choices[0].message.content
190
+
191
+ except Exception as e:
192
+ logger.error(f"Error in LiteLLM completion: {e}")
193
+ raise
194
+
195
+ @track
196
+ def structured_completion(
197
+ self,
198
+ messages: List[Dict[str, str]],
199
+ response_model: Type[T],
200
+ temperature: float = 0.7,
201
+ max_tokens: Optional[int] = None,
202
+ attempts: int = 2,
203
+ backoff: float = 0.5,
204
+ **kwargs,
205
+ ) -> T:
206
+ """
207
+ Generate structured completion using instructor with LiteLLM.
208
+
209
+ Args:
210
+ messages: List of message dictionaries with 'role' and 'content' keys
211
+ response_model: Pydantic model class for structured output
212
+ temperature: Sampling temperature (0.0 to 2.0)
213
+ max_tokens: Maximum tokens to generate
214
+ attempts: Number of attempts to make
215
+ backoff: Backoff factor for exponential backoff
216
+ **kwargs: Additional arguments to pass to instructor
217
+
218
+ Returns:
219
+ Structured response as the specified model type
220
+ """
221
+ if not self.instructor:
222
+ raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
223
+
224
+ last_err = None
225
+ for i in range(attempts):
226
+ try:
227
+ # Use LiteLLM directly with JSON mode for structured output
228
+ # Add system message to enforce JSON structure
229
+ structured_messages = messages.copy()
230
+ if response_model.__doc__:
231
+ schema_prompt = f"Respond with JSON matching this schema: {response_model.model_json_schema()}"
232
+ else:
233
+ schema_prompt = f"Respond with JSON matching this Pydantic model: {response_model.__name__}"
234
+
235
+ # Add schema instruction to the last user message or create a new one
236
+ if structured_messages and structured_messages[-1]["role"] == "user":
237
+ structured_messages[-1]["content"] += f"\n\n{schema_prompt}"
238
+ else:
239
+ structured_messages.append({"role": "user", "content": schema_prompt})
240
+
241
+ if self.debug:
242
+ logger.debug(f"Structured completion: {structured_messages}")
243
+
244
+ response = litellm.completion(
245
+ model=self.chat_model,
246
+ messages=structured_messages,
247
+ temperature=temperature,
248
+ max_tokens=max_tokens,
249
+ response_format={"type": "json_object"} if "gpt" in self.chat_model.lower() else None,
250
+ **kwargs,
251
+ )
252
+
253
+ # Parse the JSON response into the Pydantic model
254
+ import json
255
+
256
+ response_text = response.choices[0].message.content
257
+ response_json = json.loads(response_text)
258
+ result = response_model.model_validate(response_json)
259
+
260
+ # Estimate token usage for logging
261
+ try:
262
+ prompt_text = "\n".join([msg.get("content", "") for msg in structured_messages])
263
+ completion_text = response_text
264
+ usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "structured")
265
+ get_token_tracker().record_usage(usage)
266
+ logger.debug(f"Estimated token usage for structured completion: {usage.total_tokens} tokens")
267
+ except Exception as e:
268
+ logger.debug(f"Could not estimate token usage: {e}")
269
+
270
+ return result
271
+
272
+ except Exception as e:
273
+ last_err = e
274
+ if i < attempts - 1:
275
+ time.sleep(backoff * (2**i))
276
+ else:
277
+ logger.error(f"Error in structured completion: {e}")
278
+ raise
279
+ raise last_err
280
+
281
+ @track
282
+ def understand_image(
283
+ self,
284
+ image_path: Union[str, Path],
285
+ prompt: str,
286
+ temperature: float = 0.7,
287
+ max_tokens: Optional[int] = None,
288
+ **kwargs,
289
+ ) -> str:
290
+ """
291
+ Analyze an image using LiteLLM vision model.
292
+
293
+ Args:
294
+ image_path: Path to the image file
295
+ prompt: Text prompt describing what to analyze in the image
296
+ temperature: Sampling temperature
297
+ max_tokens: Maximum tokens to generate
298
+ **kwargs: Additional arguments
299
+
300
+ Returns:
301
+ Analysis of the image
302
+ """
303
+
304
+ try:
305
+ # Read and encode the image
306
+ image_path = Path(image_path)
307
+ if not image_path.exists():
308
+ raise FileNotFoundError(f"Image file not found: {image_path}")
309
+
310
+ with open(image_path, "rb") as image_file:
311
+ image_data = image_file.read()
312
+ image_base64 = base64.b64encode(image_data).decode("utf-8")
313
+
314
+ # Determine the image format
315
+ image_format = image_path.suffix.lower().lstrip(".")
316
+ if image_format == "jpg":
317
+ image_format = "jpeg"
318
+
319
+ # Prepare the message with image
320
+ messages = [
321
+ {
322
+ "role": "user",
323
+ "content": [
324
+ {"type": "text", "text": prompt},
325
+ {"type": "image_url", "image_url": {"url": f"data:image/{image_format};base64,{image_base64}"}},
326
+ ],
327
+ }
328
+ ]
329
+
330
+ if self.debug:
331
+ logger.debug(f"Understand image: {messages}")
332
+
333
+ response = litellm.completion(
334
+ model=self.vision_model,
335
+ messages=messages,
336
+ temperature=temperature,
337
+ max_tokens=max_tokens,
338
+ **kwargs,
339
+ )
340
+
341
+ # Estimate token usage for logging
342
+ try:
343
+ completion_text = response.choices[0].message.content
344
+ usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
345
+ get_token_tracker().record_usage(usage)
346
+ logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
347
+ except Exception as e:
348
+ logger.debug(f"Could not estimate token usage: {e}")
349
+
350
+ return response.choices[0].message.content
351
+
352
+ except Exception as e:
353
+ logger.error(f"Error analyzing image with LiteLLM: {e}")
354
+ raise
355
+
356
+ @track
357
+ def understand_image_from_url(
358
+ self,
359
+ image_url: str,
360
+ prompt: str,
361
+ temperature: float = 0.7,
362
+ max_tokens: Optional[int] = None,
363
+ **kwargs,
364
+ ) -> str:
365
+ """
366
+ Analyze an image from URL using LiteLLM vision model.
367
+
368
+ Args:
369
+ image_url: URL of the image
370
+ prompt: Text prompt describing what to analyze in the image
371
+ temperature: Sampling temperature
372
+ max_tokens: Maximum tokens to generate
373
+ **kwargs: Additional arguments
374
+
375
+ Returns:
376
+ Analysis of the image
377
+ """
378
+
379
+ try:
380
+ # Prepare the message with image URL
381
+ messages = [
382
+ {
383
+ "role": "user",
384
+ "content": [
385
+ {"type": "text", "text": prompt},
386
+ {"type": "image_url", "image_url": {"url": image_url}},
387
+ ],
388
+ }
389
+ ]
390
+
391
+ if self.debug:
392
+ logger.debug(f"Understand image from url: {messages}")
393
+
394
+ response = litellm.completion(
395
+ model=self.vision_model,
396
+ messages=messages,
397
+ temperature=temperature,
398
+ max_tokens=max_tokens,
399
+ **kwargs,
400
+ )
401
+
402
+ # Estimate token usage for logging
403
+ try:
404
+ completion_text = response.choices[0].message.content
405
+ usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
406
+ get_token_tracker().record_usage(usage)
407
+ logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
408
+ except Exception as e:
409
+ logger.debug(f"Could not estimate token usage: {e}")
410
+
411
+ return response.choices[0].message.content
412
+
413
+ except Exception as e:
414
+ logger.error(f"Error analyzing image from URL with LiteLLM: {e}")
415
+ raise
416
+
417
+ def embed(self, text: str) -> List[float]:
418
+ """
419
+ Generate embeddings using LiteLLM.
420
+
421
+ Args:
422
+ text: Text to embed
423
+
424
+ Returns:
425
+ List of embedding values
426
+ """
427
+ try:
428
+ response = litellm.embedding(
429
+ model=self.embed_model,
430
+ input=[text],
431
+ dimensions=self.get_embedding_dimensions(),
432
+ )
433
+
434
+ embedding = response.data[0].embedding
435
+
436
+ # Validate embedding dimensions
437
+ expected_dims = self.get_embedding_dimensions()
438
+ if len(embedding) != expected_dims:
439
+ logger.warning(
440
+ f"Embedding has {len(embedding)} dimensions, expected {expected_dims}. "
441
+ f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
442
+ f"using a different embedding model."
443
+ )
444
+
445
+ return embedding
446
+
447
+ except Exception as e:
448
+ logger.error(f"Error generating embedding with LiteLLM: {e}")
449
+ raise
450
+
451
+ def embed_batch(self, chunks: List[str]) -> List[List[float]]:
452
+ """
453
+ Generate embeddings for multiple texts using LiteLLM.
454
+
455
+ Args:
456
+ chunks: List of texts to embed
457
+
458
+ Returns:
459
+ List of embedding lists
460
+ """
461
+ try:
462
+ response = litellm.embedding(
463
+ model=self.embed_model,
464
+ input=chunks,
465
+ dimensions=self.get_embedding_dimensions(),
466
+ )
467
+
468
+ embeddings = [item.embedding for item in response.data]
469
+
470
+ # Validate embedding dimensions
471
+ expected_dims = self.get_embedding_dimensions()
472
+ for i, embedding in enumerate(embeddings):
473
+ if len(embedding) != expected_dims:
474
+ logger.warning(
475
+ f"Embedding at index {i} has {len(embedding)} dimensions, expected {expected_dims}. "
476
+ f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
477
+ f"using a different embedding model."
478
+ )
479
+
480
+ return embeddings
481
+
482
+ except Exception as e:
483
+ logger.error(f"Error generating batch embeddings with LiteLLM: {e}")
484
+ # Fallback to individual calls
485
+ embeddings = []
486
+ for chunk in chunks:
487
+ embedding = self.embed(chunk)
488
+ embeddings.append(embedding)
489
+ return embeddings
490
+
491
+ def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
492
+ """
493
+ Rerank chunks based on their relevance to the query.
494
+
495
+ Note: LiteLLM doesn't have a native reranking API, so this implementation
496
+ uses a simple similarity-based approach with embeddings.
497
+
498
+ Args:
499
+ query: The query to rank against
500
+ chunks: List of text chunks to rerank
501
+
502
+ Returns:
503
+ List of tuples (similarity_score, original_index, chunk_text)
504
+ sorted by similarity score in descending order
505
+ """
506
+ try:
507
+ # Get embeddings for query and chunks
508
+ query_embedding = self.embed(query)
509
+ chunk_embeddings = self.embed_batch(chunks)
510
+
511
+ from noesium.core.utils.statistics import cosine_similarity
512
+
513
+ # Calculate similarities and sort
514
+ similarities = []
515
+ for i, chunk_embedding in enumerate(chunk_embeddings):
516
+ similarity = cosine_similarity(query_embedding, chunk_embedding)
517
+ similarities.append((similarity, i, chunks[i]))
518
+
519
+ # Sort by similarity (descending)
520
+ similarities.sort(key=lambda x: x[0], reverse=True)
521
+
522
+ # Return sorted tuples
523
+ return similarities
524
+
525
+ except Exception as e:
526
+ logger.error(f"Error reranking with LiteLLM: {e}")
527
+ # Fallback: return original order with zero similarities
528
+ return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]