noesium 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. noesium/core/__init__.py +4 -0
  2. noesium/core/agent/__init__.py +14 -0
  3. noesium/core/agent/base.py +227 -0
  4. noesium/core/consts.py +6 -0
  5. noesium/core/goalith/conflict/conflict.py +104 -0
  6. noesium/core/goalith/conflict/detector.py +53 -0
  7. noesium/core/goalith/decomposer/__init__.py +6 -0
  8. noesium/core/goalith/decomposer/base.py +46 -0
  9. noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
  10. noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
  11. noesium/core/goalith/decomposer/prompts.py +140 -0
  12. noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
  13. noesium/core/goalith/errors.py +22 -0
  14. noesium/core/goalith/goalgraph/graph.py +526 -0
  15. noesium/core/goalith/goalgraph/node.py +179 -0
  16. noesium/core/goalith/replanner/base.py +31 -0
  17. noesium/core/goalith/replanner/replanner.py +36 -0
  18. noesium/core/goalith/service.py +26 -0
  19. noesium/core/llm/__init__.py +154 -0
  20. noesium/core/llm/base.py +152 -0
  21. noesium/core/llm/litellm.py +528 -0
  22. noesium/core/llm/llamacpp.py +487 -0
  23. noesium/core/llm/message.py +184 -0
  24. noesium/core/llm/ollama.py +459 -0
  25. noesium/core/llm/openai.py +520 -0
  26. noesium/core/llm/openrouter.py +89 -0
  27. noesium/core/llm/prompt.py +551 -0
  28. noesium/core/memory/__init__.py +11 -0
  29. noesium/core/memory/base.py +464 -0
  30. noesium/core/memory/memu/__init__.py +24 -0
  31. noesium/core/memory/memu/config/__init__.py +26 -0
  32. noesium/core/memory/memu/config/activity/config.py +46 -0
  33. noesium/core/memory/memu/config/event/config.py +46 -0
  34. noesium/core/memory/memu/config/markdown_config.py +241 -0
  35. noesium/core/memory/memu/config/profile/config.py +48 -0
  36. noesium/core/memory/memu/llm_adapter.py +129 -0
  37. noesium/core/memory/memu/memory/__init__.py +31 -0
  38. noesium/core/memory/memu/memory/actions/__init__.py +40 -0
  39. noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
  40. noesium/core/memory/memu/memory/actions/base_action.py +342 -0
  41. noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
  42. noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
  43. noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
  44. noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
  45. noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
  46. noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
  47. noesium/core/memory/memu/memory/embeddings.py +130 -0
  48. noesium/core/memory/memu/memory/file_manager.py +306 -0
  49. noesium/core/memory/memu/memory/memory_agent.py +578 -0
  50. noesium/core/memory/memu/memory/recall_agent.py +376 -0
  51. noesium/core/memory/memu/memory_store.py +628 -0
  52. noesium/core/memory/models.py +149 -0
  53. noesium/core/msgbus/__init__.py +12 -0
  54. noesium/core/msgbus/base.py +395 -0
  55. noesium/core/orchestrix/__init__.py +0 -0
  56. noesium/core/py.typed +0 -0
  57. noesium/core/routing/__init__.py +20 -0
  58. noesium/core/routing/base.py +66 -0
  59. noesium/core/routing/router.py +241 -0
  60. noesium/core/routing/strategies/__init__.py +9 -0
  61. noesium/core/routing/strategies/dynamic_complexity.py +361 -0
  62. noesium/core/routing/strategies/self_assessment.py +147 -0
  63. noesium/core/routing/types.py +38 -0
  64. noesium/core/toolify/__init__.py +39 -0
  65. noesium/core/toolify/base.py +360 -0
  66. noesium/core/toolify/config.py +138 -0
  67. noesium/core/toolify/mcp_integration.py +275 -0
  68. noesium/core/toolify/registry.py +214 -0
  69. noesium/core/toolify/toolkits/__init__.py +1 -0
  70. noesium/core/tracing/__init__.py +37 -0
  71. noesium/core/tracing/langgraph_hooks.py +308 -0
  72. noesium/core/tracing/opik_tracing.py +144 -0
  73. noesium/core/tracing/token_tracker.py +166 -0
  74. noesium/core/utils/__init__.py +10 -0
  75. noesium/core/utils/logging.py +172 -0
  76. noesium/core/utils/statistics.py +12 -0
  77. noesium/core/utils/typing.py +17 -0
  78. noesium/core/vector_store/__init__.py +79 -0
  79. noesium/core/vector_store/base.py +94 -0
  80. noesium/core/vector_store/pgvector.py +304 -0
  81. noesium/core/vector_store/weaviate.py +383 -0
  82. noesium-0.1.0.dist-info/METADATA +525 -0
  83. noesium-0.1.0.dist-info/RECORD +86 -0
  84. noesium-0.1.0.dist-info/WHEEL +5 -0
  85. noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
  86. noesium-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,520 @@
1
+ """
2
+ LLM utilities for Noesium using OpenAI-compatible APIs.
3
+
4
+ This module provides:
5
+ - Chat completion using various models via OpenAI-compatible endpoints
6
+ - Image understanding using vision models
7
+ - Instructor integration for structured output
8
+
9
+ - Configurable base URL and API key for OpenAI-compatible services
10
+ """
11
+
12
+ import base64
13
+ import os
14
+ import time
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
17
+
18
+ # Import instructor for structured output
19
+ from instructor import Instructor, Mode, patch
20
+ from openai import OpenAI
21
+
22
+ from noesium.core.llm.base import BaseLLMClient
23
+ from noesium.core.tracing import (
24
+ configure_opik,
25
+ estimate_token_usage,
26
+ extract_token_usage_from_openai_response,
27
+ get_token_tracker,
28
+ is_opik_enabled,
29
+ )
30
+ from noesium.core.utils.logging import get_logger
31
+
32
+ # Only import OPIK if tracing is enabled
33
+ OPIK_AVAILABLE = False
34
+ track = lambda func: func # Default no-op decorator
35
+ track_openai = lambda client: client # Default no-op function
36
+ if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
37
+ try:
38
+ from opik import track
39
+ from opik.integrations.openai import track_openai
40
+
41
+ OPIK_AVAILABLE = True
42
+ except ImportError:
43
+ pass
44
+
45
+
46
+ T = TypeVar("T")
47
+
48
+ logger = get_logger(__name__)
49
+
50
+
51
+ class LLMClient(BaseLLMClient):
52
+ """Client for interacting with OpenAI-compatible LLM services."""
53
+
54
+ def __init__(
55
+ self,
56
+ base_url: Optional[str] = None,
57
+ api_key: Optional[str] = None,
58
+ instructor: bool = False,
59
+ chat_model: Optional[str] = None,
60
+ vision_model: Optional[str] = None,
61
+ embed_model: Optional[str] = None,
62
+ **kwargs,
63
+ ):
64
+ """
65
+ Initialize the LLM client.
66
+
67
+ Args:
68
+ base_url: Base URL for the OpenAI-compatible API (defaults to OpenAI's URL)
69
+ api_key: API key for authentication (defaults to OPENAI_API_KEY env var)
70
+ instructor: Whether to enable instructor for structured output
71
+ chat_model: Model to use for chat completions (defaults to gpt-3.5-turbo)
72
+ vision_model: Model to use for vision tasks (defaults to gpt-4-vision-preview)
73
+ embed_model: Model to use for embeddings (defaults to text-embedding-3-small)
74
+ **kwargs: Additional arguments to pass to the LLM client
75
+ """
76
+ super().__init__(**kwargs)
77
+ # Configure Opik tracing for observability only if enabled
78
+ if OPIK_AVAILABLE:
79
+ configure_opik()
80
+ self._opik_provider = "openai"
81
+ else:
82
+ self._opik_provider = None
83
+
84
+ # Set API key from parameter or environment
85
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
86
+ if not self.api_key:
87
+ raise ValueError(
88
+ "OpenAI API key is required. Provide api_key parameter or set OPENAI_API_KEY environment variable."
89
+ )
90
+
91
+ # Set base URL (defaults to OpenAI if not provided)
92
+ self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
93
+
94
+ # Initialize OpenAI client
95
+ client_kwargs = {"api_key": self.api_key, **kwargs}
96
+ if self.base_url:
97
+ client_kwargs["base_url"] = self.base_url
98
+
99
+ base_client = OpenAI(**client_kwargs)
100
+
101
+ # Wrap with Opik tracking if available
102
+ self.client = track_openai(base_client) if OPIK_AVAILABLE and is_opik_enabled() else base_client
103
+
104
+ # Model configurations
105
+ self.chat_model = chat_model or os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
106
+ self.vision_model = vision_model or os.getenv("OPENAI_VISION_MODEL", "gpt-4-vision-preview")
107
+ self.embed_model = embed_model or os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
108
+
109
+ # Initialize instructor if requested
110
+ self.instructor = None
111
+ if instructor:
112
+ # Create instructor instance for structured output
113
+ patched_client = patch(self.client, mode=Mode.JSON)
114
+ self.instructor = Instructor(
115
+ client=patched_client,
116
+ create=patched_client.chat.completions.create,
117
+ mode=Mode.JSON,
118
+ )
119
+
120
+ @track
121
+ def completion(
122
+ self,
123
+ messages: List[Dict[str, str]],
124
+ temperature: float = 0.7,
125
+ max_tokens: Optional[int] = None,
126
+ stream: bool = False,
127
+ **kwargs,
128
+ ) -> Union[str, Dict[str, Any]]:
129
+ """
130
+ Generate chat completion using the configured model.
131
+
132
+ Args:
133
+ messages: List of message dictionaries with 'role' and 'content' keys
134
+ temperature: Sampling temperature (0.0 to 2.0)
135
+ max_tokens: Maximum tokens to generate
136
+ stream: Whether to stream the response
137
+ **kwargs: Additional arguments to pass to OpenAI API
138
+
139
+ Returns:
140
+ Generated text response or streaming response
141
+ """
142
+
143
+ try:
144
+ if self.debug:
145
+ logger.debug(f"Chat completion: {messages}")
146
+ response = self.client.chat.completions.create(
147
+ model=self.chat_model,
148
+ messages=messages,
149
+ temperature=temperature,
150
+ max_tokens=max_tokens,
151
+ stream=stream,
152
+ **kwargs,
153
+ )
154
+ if stream:
155
+ return response
156
+ else:
157
+ # Log token usage if available
158
+ self._log_token_usage_if_available(response)
159
+ return response.choices[0].message.content
160
+ except Exception as e:
161
+ logger.error(f"Error in chat completion: {e}")
162
+ raise
163
+
164
+ @track
165
+ def structured_completion(
166
+ self,
167
+ messages: List[Dict[str, str]],
168
+ response_model: Type[T],
169
+ temperature: float = 0.7,
170
+ max_tokens: Optional[int] = None,
171
+ attempts: int = 2,
172
+ backoff: float = 0.5,
173
+ **kwargs,
174
+ ) -> T:
175
+ """
176
+ Generate structured completion using instructor.
177
+
178
+ Args:
179
+ messages: List of message dictionaries with 'role' and 'content' keys
180
+ response_model: Pydantic model class for structured output
181
+ temperature: Sampling temperature (0.0 to 2.0)
182
+ max_tokens: Maximum tokens to generate
183
+ attempts: Number of attempts to make
184
+ backoff: Backoff factor for exponential backoff
185
+ **kwargs: Additional arguments to pass to instructor
186
+
187
+ Returns:
188
+ Structured response as the specified model type
189
+ """
190
+ if not self.instructor:
191
+ raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
192
+
193
+ if self.debug:
194
+ logger.debug(f"Structured completion: {messages}")
195
+
196
+ last_err = None
197
+ for i in range(attempts):
198
+ try:
199
+ # Capture token usage by enabling detailed response
200
+ kwargs_with_usage = kwargs.copy()
201
+ kwargs_with_usage.setdefault("stream", False)
202
+
203
+ result = self.instructor.create(
204
+ model=self.chat_model,
205
+ messages=messages,
206
+ response_model=response_model,
207
+ temperature=temperature,
208
+ max_tokens=max_tokens,
209
+ **kwargs_with_usage,
210
+ )
211
+
212
+ # Try to capture token usage from instructor's underlying response
213
+ # The instructor library usually stores the raw response
214
+ if hasattr(result, "_raw_response"):
215
+ self._log_token_usage_if_available(result._raw_response, "structured")
216
+ else:
217
+ # If no raw response, try to estimate usage
218
+ try:
219
+ prompt_text = "\n".join([msg.get("content", "") for msg in messages])
220
+ completion_text = str(result)
221
+ if hasattr(result, "model_dump_json"):
222
+ completion_text = result.model_dump_json()
223
+
224
+ usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "structured")
225
+ get_token_tracker().record_usage(usage)
226
+ logger.debug(f"Estimated token usage for structured completion: {usage.total_tokens} tokens")
227
+ except Exception as e:
228
+ logger.debug(f"Could not estimate token usage: {e}")
229
+
230
+ return result
231
+ except Exception as e:
232
+ last_err = e
233
+ if i < attempts - 1:
234
+ time.sleep(backoff * (2**i))
235
+ else:
236
+ logger.error(f"Error in structured completion: {e}")
237
+ raise
238
+ raise last_err
239
+
240
+ @track
241
+ def understand_image(
242
+ self,
243
+ image_path: Union[str, Path],
244
+ prompt: str,
245
+ temperature: float = 0.7,
246
+ max_tokens: Optional[int] = None,
247
+ **kwargs,
248
+ ) -> str:
249
+ """
250
+ Analyze an image using the configured vision model.
251
+
252
+ Args:
253
+ image_path: Path to the image file
254
+ prompt: Text prompt describing what to analyze in the image
255
+ temperature: Sampling temperature
256
+ max_tokens: Maximum tokens to generate
257
+ **kwargs: Additional arguments
258
+
259
+ Returns:
260
+ Analysis of the image
261
+ """
262
+
263
+ try:
264
+ # Read and encode the image
265
+ image_path = Path(image_path)
266
+ if not image_path.exists():
267
+ raise FileNotFoundError(f"Image file not found: {image_path}")
268
+
269
+ with open(image_path, "rb") as image_file:
270
+ image_data = image_file.read()
271
+ image_base64 = base64.b64encode(image_data).decode("utf-8")
272
+
273
+ # Create message with image
274
+ messages = [
275
+ {
276
+ "role": "user",
277
+ "content": [
278
+ {"type": "text", "text": prompt},
279
+ {
280
+ "type": "image_url",
281
+ "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
282
+ },
283
+ ],
284
+ }
285
+ ]
286
+
287
+ if self.debug:
288
+ logger.debug(f"Understand image: {messages}")
289
+
290
+ response = self.client.chat.completions.create(
291
+ model=self.vision_model,
292
+ messages=messages,
293
+ temperature=temperature,
294
+ max_tokens=max_tokens,
295
+ **kwargs,
296
+ )
297
+
298
+ # Record token usage for vision call
299
+ self._log_token_usage_if_available(response, "vision")
300
+ return response.choices[0].message.content
301
+
302
+ except Exception as e:
303
+ logger.error(f"Error analyzing image: {e}")
304
+ raise
305
+
306
+ @track
307
+ def understand_image_from_url(
308
+ self,
309
+ image_url: str,
310
+ prompt: str,
311
+ temperature: float = 0.7,
312
+ max_tokens: Optional[int] = None,
313
+ **kwargs,
314
+ ) -> str:
315
+ """
316
+ Analyze an image from URL using the configured vision model.
317
+
318
+ Args:
319
+ image_url: URL of the image
320
+ prompt: Text prompt describing what to analyze in the image
321
+ temperature: Sampling temperature
322
+ max_tokens: Maximum tokens to generate
323
+ **kwargs: Additional arguments
324
+
325
+ Returns:
326
+ Analysis of the image
327
+ """
328
+
329
+ try:
330
+ messages = [
331
+ {
332
+ "role": "user",
333
+ "content": [
334
+ {"type": "text", "text": prompt},
335
+ {"type": "image_url", "image_url": {"url": image_url}},
336
+ ],
337
+ }
338
+ ]
339
+
340
+ if self.debug:
341
+ logger.debug(f"Understand image from url: {messages}")
342
+
343
+ response = self.client.chat.completions.create(
344
+ model=self.vision_model,
345
+ messages=messages,
346
+ temperature=temperature,
347
+ max_tokens=max_tokens,
348
+ **kwargs,
349
+ )
350
+
351
+ # Record token usage for vision URL call
352
+ self._log_token_usage_if_available(response, "vision")
353
+ return response.choices[0].message.content
354
+
355
+ except Exception as e:
356
+ logger.error(f"Error analyzing image from URL: {e}")
357
+ raise
358
+
359
+ def _log_token_usage_if_available(self, response, call_type: str = "completion"):
360
+ """Extract and record token usage from OpenAI response if available."""
361
+ try:
362
+ usage = extract_token_usage_from_openai_response(response, self.chat_model, call_type)
363
+ if usage:
364
+ get_token_tracker().record_usage(usage)
365
+ logger.debug(
366
+ f"Token usage - Prompt: {usage.prompt_tokens}, "
367
+ f"Completion: {usage.completion_tokens}, "
368
+ f"Total: {usage.total_tokens} (model: {usage.model_name})"
369
+ )
370
+ except Exception as e:
371
+ logger.debug(f"Could not extract token usage: {e}")
372
+
373
+ def embed(self, text: str) -> List[float]:
374
+ """
375
+ Generate embeddings using OpenAI's embedding model.
376
+
377
+ Args:
378
+ text: Text to embed
379
+
380
+ Returns:
381
+ List of embedding values
382
+ """
383
+ try:
384
+ response = self.client.embeddings.create(
385
+ model=self.embed_model,
386
+ input=text,
387
+ dimensions=self.get_embedding_dimensions(),
388
+ )
389
+
390
+ # Record token usage if available
391
+ try:
392
+ if hasattr(response, "usage") and response.usage:
393
+ usage_data = {
394
+ "prompt_tokens": response.usage.prompt_tokens,
395
+ "completion_tokens": 0, # Embeddings don't have completion tokens
396
+ "total_tokens": response.usage.total_tokens,
397
+ "model_name": self.embed_model,
398
+ "call_type": "embedding",
399
+ }
400
+ from noesium.core.tracing import TokenUsage
401
+
402
+ usage = TokenUsage(**usage_data)
403
+ get_token_tracker().record_usage(usage)
404
+ logger.debug(f"Token usage for embedding: {usage.total_tokens} tokens")
405
+ except Exception as e:
406
+ logger.debug(f"Could not track embedding token usage: {e}")
407
+
408
+ embedding = response.data[0].embedding
409
+
410
+ # Validate embedding dimensions
411
+ expected_dims = self.get_embedding_dimensions()
412
+ if len(embedding) != expected_dims:
413
+ logger.warning(
414
+ f"Embedding has {len(embedding)} dimensions, expected {expected_dims}. "
415
+ f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
416
+ f"using a different embedding model."
417
+ )
418
+
419
+ return embedding
420
+
421
+ except Exception as e:
422
+ logger.error(f"Error generating embedding with OpenAI: {e}")
423
+ raise
424
+
425
+ def embed_batch(self, chunks: List[str]) -> List[List[float]]:
426
+ """
427
+ Generate embeddings for multiple texts using OpenAI.
428
+
429
+ Args:
430
+ chunks: List of texts to embed
431
+
432
+ Returns:
433
+ List of embedding lists
434
+ """
435
+ try:
436
+ response = self.client.embeddings.create(
437
+ model=self.embed_model,
438
+ input=chunks,
439
+ dimensions=self.get_embedding_dimensions(),
440
+ )
441
+
442
+ # Record token usage if available
443
+ try:
444
+ if hasattr(response, "usage") and response.usage:
445
+ usage_data = {
446
+ "prompt_tokens": response.usage.prompt_tokens,
447
+ "completion_tokens": 0,
448
+ "total_tokens": response.usage.total_tokens,
449
+ "model_name": self.embed_model,
450
+ "call_type": "embedding",
451
+ }
452
+ from noesium.core.tracing import TokenUsage
453
+
454
+ usage = TokenUsage(**usage_data)
455
+ get_token_tracker().record_usage(usage)
456
+ logger.debug(f"Token usage for batch embedding: {usage.total_tokens} tokens")
457
+ except Exception as e:
458
+ logger.debug(f"Could not track batch embedding token usage: {e}")
459
+
460
+ embeddings = [item.embedding for item in response.data]
461
+
462
+ # Validate embedding dimensions
463
+ expected_dims = self.get_embedding_dimensions()
464
+ for i, embedding in enumerate(embeddings):
465
+ if len(embedding) != expected_dims:
466
+ logger.warning(
467
+ f"Embedding at index {i} has {len(embedding)} dimensions, expected {expected_dims}. "
468
+ f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
469
+ f"using a different embedding model."
470
+ )
471
+
472
+ return embeddings
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error generating batch embeddings with OpenAI: {e}")
476
+ # Fallback to individual calls
477
+ embeddings = []
478
+ for chunk in chunks:
479
+ embedding = self.embed(chunk)
480
+ embeddings.append(embedding)
481
+ return embeddings
482
+
483
+ def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
484
+ """
485
+ Rerank chunks based on their relevance to the query using embeddings.
486
+
487
+ Note: OpenAI doesn't have a native reranking API, so this implementation
488
+ uses a similarity-based approach with embeddings.
489
+
490
+ Args:
491
+ query: The query to rank against
492
+ chunks: List of text chunks to rerank
493
+
494
+ Returns:
495
+ List of tuples (similarity_score, original_index, chunk_text)
496
+ sorted by similarity score in descending order
497
+ """
498
+ try:
499
+ # Get embeddings for query and chunks
500
+ query_embedding = self.embed(query)
501
+ chunk_embeddings = self.embed_batch(chunks)
502
+
503
+ from noesium.core.utils.statistics import cosine_similarity
504
+
505
+ # Calculate similarities and sort
506
+ similarities = []
507
+ for i, chunk_embedding in enumerate(chunk_embeddings):
508
+ similarity = cosine_similarity(query_embedding, chunk_embedding)
509
+ similarities.append((similarity, i, chunks[i]))
510
+
511
+ # Sort by similarity (descending)
512
+ similarities.sort(key=lambda x: x[0], reverse=True)
513
+
514
+ # Return sorted tuples
515
+ return similarities
516
+
517
+ except Exception as e:
518
+ logger.error(f"Error reranking with OpenAI: {e}")
519
+ # Fallback: return original order with zero similarities
520
+ return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]
@@ -0,0 +1,89 @@
1
+ """
2
+ LLM utilities for Noesium using OpenRouter via OpenAI SDK.
3
+
4
+ This module provides:
5
+ - Chat completion using various models via OpenRouter
6
+ - Text embeddings using OpenAI text-embedding-3-small
7
+ - Image understanding using vision models
8
+ - Instructor integration for structured output
9
+
10
+ """
11
+
12
+ import os
13
+ from typing import Optional, TypeVar
14
+
15
+ from noesium.core.consts import GEMINI_FLASH
16
+ from noesium.core.llm.openai import LLMClient as OpenAILLMClient
17
+ from noesium.core.tracing.opik_tracing import configure_opik
18
+ from noesium.core.utils.logging import get_logger
19
+
20
+ # Only import OPIK if tracing is enabled
21
+ OPIK_AVAILABLE = False
22
+ track = lambda func: func # Default no-op decorator
23
+ if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
24
+ try:
25
+ pass
26
+
27
+ OPIK_AVAILABLE = True
28
+ except ImportError:
29
+ pass
30
+
31
+
32
+ T = TypeVar("T")
33
+
34
+ logger = get_logger(__name__)
35
+
36
+
37
+ class LLMClient(OpenAILLMClient):
38
+ """Client for interacting with LLMs via OpenRouter using OpenAI SDK."""
39
+
40
+ def __init__(
41
+ self,
42
+ base_url: Optional[str] = None,
43
+ api_key: Optional[str] = None,
44
+ instructor: bool = False,
45
+ chat_model: Optional[str] = None,
46
+ vision_model: Optional[str] = None,
47
+ embed_model: Optional[str] = None,
48
+ **kwargs,
49
+ ):
50
+ """
51
+ Initialize the LLM client.
52
+
53
+ Args:
54
+ base_url: Base URL for the OpenRouter API (defaults to OpenRouter's URL)
55
+ api_key: API key for authentication (defaults to OPENROUTER_API_KEY env var)
56
+ instructor: Whether to enable instructor for structured output
57
+ chat_model: Model to use for chat completions (defaults to gemini-flash)
58
+ vision_model: Model to use for vision tasks (defaults to gemini-flash)
59
+ **kwargs: Additional arguments to pass to OpenAILLMClient
60
+ """
61
+ self.openrouter_api_key = api_key or os.getenv("OPENROUTER_API_KEY")
62
+ if not self.openrouter_api_key:
63
+ raise ValueError(
64
+ "OpenRouter API key is required. Provide api_key parameter or set OPENROUTER_API_KEY environment variable."
65
+ )
66
+
67
+ self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
68
+
69
+ # Model configurations (can be overridden by environment variables)
70
+ self.chat_model = chat_model or os.getenv("OPENROUTER_CHAT_MODEL", GEMINI_FLASH)
71
+ self.vision_model = vision_model or os.getenv("OPENROUTER_VISION_MODEL", GEMINI_FLASH)
72
+ self.embed_model = embed_model or os.getenv("OPENROUTER_EMBED_MODEL", "text-embedding-3-small")
73
+
74
+ super().__init__(
75
+ base_url=self.base_url,
76
+ api_key=self.openrouter_api_key,
77
+ instructor=instructor,
78
+ chat_model=self.chat_model,
79
+ vision_model=self.vision_model,
80
+ embed_model=self.embed_model,
81
+ **kwargs,
82
+ )
83
+
84
+ # Configure Opik tracing for observability only if enabled
85
+ if OPIK_AVAILABLE:
86
+ configure_opik()
87
+ self._opik_provider = "openrouter"
88
+ else:
89
+ self._opik_provider = None