noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM utilities for Noesium using OpenAI-compatible APIs.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Chat completion using various models via OpenAI-compatible endpoints
|
|
6
|
+
- Image understanding using vision models
|
|
7
|
+
- Instructor integration for structured output
|
|
8
|
+
|
|
9
|
+
- Configurable base URL and API key for OpenAI-compatible services
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import base64
|
|
13
|
+
import os
|
|
14
|
+
import time
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
17
|
+
|
|
18
|
+
# Import instructor for structured output
|
|
19
|
+
from instructor import Instructor, Mode, patch
|
|
20
|
+
from openai import OpenAI
|
|
21
|
+
|
|
22
|
+
from noesium.core.llm.base import BaseLLMClient
|
|
23
|
+
from noesium.core.tracing import (
|
|
24
|
+
configure_opik,
|
|
25
|
+
estimate_token_usage,
|
|
26
|
+
extract_token_usage_from_openai_response,
|
|
27
|
+
get_token_tracker,
|
|
28
|
+
is_opik_enabled,
|
|
29
|
+
)
|
|
30
|
+
from noesium.core.utils.logging import get_logger
|
|
31
|
+
|
|
32
|
+
# Only import OPIK if tracing is enabled
|
|
33
|
+
OPIK_AVAILABLE = False
|
|
34
|
+
track = lambda func: func # Default no-op decorator
|
|
35
|
+
track_openai = lambda client: client # Default no-op function
|
|
36
|
+
if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
|
|
37
|
+
try:
|
|
38
|
+
from opik import track
|
|
39
|
+
from opik.integrations.openai import track_openai
|
|
40
|
+
|
|
41
|
+
OPIK_AVAILABLE = True
|
|
42
|
+
except ImportError:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
T = TypeVar("T")
|
|
47
|
+
|
|
48
|
+
logger = get_logger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LLMClient(BaseLLMClient):
|
|
52
|
+
"""Client for interacting with OpenAI-compatible LLM services."""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
base_url: Optional[str] = None,
|
|
57
|
+
api_key: Optional[str] = None,
|
|
58
|
+
instructor: bool = False,
|
|
59
|
+
chat_model: Optional[str] = None,
|
|
60
|
+
vision_model: Optional[str] = None,
|
|
61
|
+
embed_model: Optional[str] = None,
|
|
62
|
+
**kwargs,
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
Initialize the LLM client.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
base_url: Base URL for the OpenAI-compatible API (defaults to OpenAI's URL)
|
|
69
|
+
api_key: API key for authentication (defaults to OPENAI_API_KEY env var)
|
|
70
|
+
instructor: Whether to enable instructor for structured output
|
|
71
|
+
chat_model: Model to use for chat completions (defaults to gpt-3.5-turbo)
|
|
72
|
+
vision_model: Model to use for vision tasks (defaults to gpt-4-vision-preview)
|
|
73
|
+
embed_model: Model to use for embeddings (defaults to text-embedding-3-small)
|
|
74
|
+
**kwargs: Additional arguments to pass to the LLM client
|
|
75
|
+
"""
|
|
76
|
+
super().__init__(**kwargs)
|
|
77
|
+
# Configure Opik tracing for observability only if enabled
|
|
78
|
+
if OPIK_AVAILABLE:
|
|
79
|
+
configure_opik()
|
|
80
|
+
self._opik_provider = "openai"
|
|
81
|
+
else:
|
|
82
|
+
self._opik_provider = None
|
|
83
|
+
|
|
84
|
+
# Set API key from parameter or environment
|
|
85
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
86
|
+
if not self.api_key:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"OpenAI API key is required. Provide api_key parameter or set OPENAI_API_KEY environment variable."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Set base URL (defaults to OpenAI if not provided)
|
|
92
|
+
self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
|
|
93
|
+
|
|
94
|
+
# Initialize OpenAI client
|
|
95
|
+
client_kwargs = {"api_key": self.api_key, **kwargs}
|
|
96
|
+
if self.base_url:
|
|
97
|
+
client_kwargs["base_url"] = self.base_url
|
|
98
|
+
|
|
99
|
+
base_client = OpenAI(**client_kwargs)
|
|
100
|
+
|
|
101
|
+
# Wrap with Opik tracking if available
|
|
102
|
+
self.client = track_openai(base_client) if OPIK_AVAILABLE and is_opik_enabled() else base_client
|
|
103
|
+
|
|
104
|
+
# Model configurations
|
|
105
|
+
self.chat_model = chat_model or os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
|
|
106
|
+
self.vision_model = vision_model or os.getenv("OPENAI_VISION_MODEL", "gpt-4-vision-preview")
|
|
107
|
+
self.embed_model = embed_model or os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
|
|
108
|
+
|
|
109
|
+
# Initialize instructor if requested
|
|
110
|
+
self.instructor = None
|
|
111
|
+
if instructor:
|
|
112
|
+
# Create instructor instance for structured output
|
|
113
|
+
patched_client = patch(self.client, mode=Mode.JSON)
|
|
114
|
+
self.instructor = Instructor(
|
|
115
|
+
client=patched_client,
|
|
116
|
+
create=patched_client.chat.completions.create,
|
|
117
|
+
mode=Mode.JSON,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@track
|
|
121
|
+
def completion(
|
|
122
|
+
self,
|
|
123
|
+
messages: List[Dict[str, str]],
|
|
124
|
+
temperature: float = 0.7,
|
|
125
|
+
max_tokens: Optional[int] = None,
|
|
126
|
+
stream: bool = False,
|
|
127
|
+
**kwargs,
|
|
128
|
+
) -> Union[str, Dict[str, Any]]:
|
|
129
|
+
"""
|
|
130
|
+
Generate chat completion using the configured model.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
134
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
135
|
+
max_tokens: Maximum tokens to generate
|
|
136
|
+
stream: Whether to stream the response
|
|
137
|
+
**kwargs: Additional arguments to pass to OpenAI API
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Generated text response or streaming response
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
if self.debug:
|
|
145
|
+
logger.debug(f"Chat completion: {messages}")
|
|
146
|
+
response = self.client.chat.completions.create(
|
|
147
|
+
model=self.chat_model,
|
|
148
|
+
messages=messages,
|
|
149
|
+
temperature=temperature,
|
|
150
|
+
max_tokens=max_tokens,
|
|
151
|
+
stream=stream,
|
|
152
|
+
**kwargs,
|
|
153
|
+
)
|
|
154
|
+
if stream:
|
|
155
|
+
return response
|
|
156
|
+
else:
|
|
157
|
+
# Log token usage if available
|
|
158
|
+
self._log_token_usage_if_available(response)
|
|
159
|
+
return response.choices[0].message.content
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Error in chat completion: {e}")
|
|
162
|
+
raise
|
|
163
|
+
|
|
164
|
+
@track
|
|
165
|
+
def structured_completion(
|
|
166
|
+
self,
|
|
167
|
+
messages: List[Dict[str, str]],
|
|
168
|
+
response_model: Type[T],
|
|
169
|
+
temperature: float = 0.7,
|
|
170
|
+
max_tokens: Optional[int] = None,
|
|
171
|
+
attempts: int = 2,
|
|
172
|
+
backoff: float = 0.5,
|
|
173
|
+
**kwargs,
|
|
174
|
+
) -> T:
|
|
175
|
+
"""
|
|
176
|
+
Generate structured completion using instructor.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
180
|
+
response_model: Pydantic model class for structured output
|
|
181
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
182
|
+
max_tokens: Maximum tokens to generate
|
|
183
|
+
attempts: Number of attempts to make
|
|
184
|
+
backoff: Backoff factor for exponential backoff
|
|
185
|
+
**kwargs: Additional arguments to pass to instructor
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Structured response as the specified model type
|
|
189
|
+
"""
|
|
190
|
+
if not self.instructor:
|
|
191
|
+
raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
|
|
192
|
+
|
|
193
|
+
if self.debug:
|
|
194
|
+
logger.debug(f"Structured completion: {messages}")
|
|
195
|
+
|
|
196
|
+
last_err = None
|
|
197
|
+
for i in range(attempts):
|
|
198
|
+
try:
|
|
199
|
+
# Capture token usage by enabling detailed response
|
|
200
|
+
kwargs_with_usage = kwargs.copy()
|
|
201
|
+
kwargs_with_usage.setdefault("stream", False)
|
|
202
|
+
|
|
203
|
+
result = self.instructor.create(
|
|
204
|
+
model=self.chat_model,
|
|
205
|
+
messages=messages,
|
|
206
|
+
response_model=response_model,
|
|
207
|
+
temperature=temperature,
|
|
208
|
+
max_tokens=max_tokens,
|
|
209
|
+
**kwargs_with_usage,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Try to capture token usage from instructor's underlying response
|
|
213
|
+
# The instructor library usually stores the raw response
|
|
214
|
+
if hasattr(result, "_raw_response"):
|
|
215
|
+
self._log_token_usage_if_available(result._raw_response, "structured")
|
|
216
|
+
else:
|
|
217
|
+
# If no raw response, try to estimate usage
|
|
218
|
+
try:
|
|
219
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in messages])
|
|
220
|
+
completion_text = str(result)
|
|
221
|
+
if hasattr(result, "model_dump_json"):
|
|
222
|
+
completion_text = result.model_dump_json()
|
|
223
|
+
|
|
224
|
+
usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "structured")
|
|
225
|
+
get_token_tracker().record_usage(usage)
|
|
226
|
+
logger.debug(f"Estimated token usage for structured completion: {usage.total_tokens} tokens")
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
229
|
+
|
|
230
|
+
return result
|
|
231
|
+
except Exception as e:
|
|
232
|
+
last_err = e
|
|
233
|
+
if i < attempts - 1:
|
|
234
|
+
time.sleep(backoff * (2**i))
|
|
235
|
+
else:
|
|
236
|
+
logger.error(f"Error in structured completion: {e}")
|
|
237
|
+
raise
|
|
238
|
+
raise last_err
|
|
239
|
+
|
|
240
|
+
@track
|
|
241
|
+
def understand_image(
|
|
242
|
+
self,
|
|
243
|
+
image_path: Union[str, Path],
|
|
244
|
+
prompt: str,
|
|
245
|
+
temperature: float = 0.7,
|
|
246
|
+
max_tokens: Optional[int] = None,
|
|
247
|
+
**kwargs,
|
|
248
|
+
) -> str:
|
|
249
|
+
"""
|
|
250
|
+
Analyze an image using the configured vision model.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
image_path: Path to the image file
|
|
254
|
+
prompt: Text prompt describing what to analyze in the image
|
|
255
|
+
temperature: Sampling temperature
|
|
256
|
+
max_tokens: Maximum tokens to generate
|
|
257
|
+
**kwargs: Additional arguments
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Analysis of the image
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
# Read and encode the image
|
|
265
|
+
image_path = Path(image_path)
|
|
266
|
+
if not image_path.exists():
|
|
267
|
+
raise FileNotFoundError(f"Image file not found: {image_path}")
|
|
268
|
+
|
|
269
|
+
with open(image_path, "rb") as image_file:
|
|
270
|
+
image_data = image_file.read()
|
|
271
|
+
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
|
272
|
+
|
|
273
|
+
# Create message with image
|
|
274
|
+
messages = [
|
|
275
|
+
{
|
|
276
|
+
"role": "user",
|
|
277
|
+
"content": [
|
|
278
|
+
{"type": "text", "text": prompt},
|
|
279
|
+
{
|
|
280
|
+
"type": "image_url",
|
|
281
|
+
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
|
|
282
|
+
},
|
|
283
|
+
],
|
|
284
|
+
}
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
if self.debug:
|
|
288
|
+
logger.debug(f"Understand image: {messages}")
|
|
289
|
+
|
|
290
|
+
response = self.client.chat.completions.create(
|
|
291
|
+
model=self.vision_model,
|
|
292
|
+
messages=messages,
|
|
293
|
+
temperature=temperature,
|
|
294
|
+
max_tokens=max_tokens,
|
|
295
|
+
**kwargs,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Record token usage for vision call
|
|
299
|
+
self._log_token_usage_if_available(response, "vision")
|
|
300
|
+
return response.choices[0].message.content
|
|
301
|
+
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.error(f"Error analyzing image: {e}")
|
|
304
|
+
raise
|
|
305
|
+
|
|
306
|
+
@track
|
|
307
|
+
def understand_image_from_url(
|
|
308
|
+
self,
|
|
309
|
+
image_url: str,
|
|
310
|
+
prompt: str,
|
|
311
|
+
temperature: float = 0.7,
|
|
312
|
+
max_tokens: Optional[int] = None,
|
|
313
|
+
**kwargs,
|
|
314
|
+
) -> str:
|
|
315
|
+
"""
|
|
316
|
+
Analyze an image from URL using the configured vision model.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
image_url: URL of the image
|
|
320
|
+
prompt: Text prompt describing what to analyze in the image
|
|
321
|
+
temperature: Sampling temperature
|
|
322
|
+
max_tokens: Maximum tokens to generate
|
|
323
|
+
**kwargs: Additional arguments
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Analysis of the image
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
messages = [
|
|
331
|
+
{
|
|
332
|
+
"role": "user",
|
|
333
|
+
"content": [
|
|
334
|
+
{"type": "text", "text": prompt},
|
|
335
|
+
{"type": "image_url", "image_url": {"url": image_url}},
|
|
336
|
+
],
|
|
337
|
+
}
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
if self.debug:
|
|
341
|
+
logger.debug(f"Understand image from url: {messages}")
|
|
342
|
+
|
|
343
|
+
response = self.client.chat.completions.create(
|
|
344
|
+
model=self.vision_model,
|
|
345
|
+
messages=messages,
|
|
346
|
+
temperature=temperature,
|
|
347
|
+
max_tokens=max_tokens,
|
|
348
|
+
**kwargs,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Record token usage for vision URL call
|
|
352
|
+
self._log_token_usage_if_available(response, "vision")
|
|
353
|
+
return response.choices[0].message.content
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
logger.error(f"Error analyzing image from URL: {e}")
|
|
357
|
+
raise
|
|
358
|
+
|
|
359
|
+
def _log_token_usage_if_available(self, response, call_type: str = "completion"):
|
|
360
|
+
"""Extract and record token usage from OpenAI response if available."""
|
|
361
|
+
try:
|
|
362
|
+
usage = extract_token_usage_from_openai_response(response, self.chat_model, call_type)
|
|
363
|
+
if usage:
|
|
364
|
+
get_token_tracker().record_usage(usage)
|
|
365
|
+
logger.debug(
|
|
366
|
+
f"Token usage - Prompt: {usage.prompt_tokens}, "
|
|
367
|
+
f"Completion: {usage.completion_tokens}, "
|
|
368
|
+
f"Total: {usage.total_tokens} (model: {usage.model_name})"
|
|
369
|
+
)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.debug(f"Could not extract token usage: {e}")
|
|
372
|
+
|
|
373
|
+
def embed(self, text: str) -> List[float]:
|
|
374
|
+
"""
|
|
375
|
+
Generate embeddings using OpenAI's embedding model.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
text: Text to embed
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
List of embedding values
|
|
382
|
+
"""
|
|
383
|
+
try:
|
|
384
|
+
response = self.client.embeddings.create(
|
|
385
|
+
model=self.embed_model,
|
|
386
|
+
input=text,
|
|
387
|
+
dimensions=self.get_embedding_dimensions(),
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Record token usage if available
|
|
391
|
+
try:
|
|
392
|
+
if hasattr(response, "usage") and response.usage:
|
|
393
|
+
usage_data = {
|
|
394
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
395
|
+
"completion_tokens": 0, # Embeddings don't have completion tokens
|
|
396
|
+
"total_tokens": response.usage.total_tokens,
|
|
397
|
+
"model_name": self.embed_model,
|
|
398
|
+
"call_type": "embedding",
|
|
399
|
+
}
|
|
400
|
+
from noesium.core.tracing import TokenUsage
|
|
401
|
+
|
|
402
|
+
usage = TokenUsage(**usage_data)
|
|
403
|
+
get_token_tracker().record_usage(usage)
|
|
404
|
+
logger.debug(f"Token usage for embedding: {usage.total_tokens} tokens")
|
|
405
|
+
except Exception as e:
|
|
406
|
+
logger.debug(f"Could not track embedding token usage: {e}")
|
|
407
|
+
|
|
408
|
+
embedding = response.data[0].embedding
|
|
409
|
+
|
|
410
|
+
# Validate embedding dimensions
|
|
411
|
+
expected_dims = self.get_embedding_dimensions()
|
|
412
|
+
if len(embedding) != expected_dims:
|
|
413
|
+
logger.warning(
|
|
414
|
+
f"Embedding has {len(embedding)} dimensions, expected {expected_dims}. "
|
|
415
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
|
|
416
|
+
f"using a different embedding model."
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return embedding
|
|
420
|
+
|
|
421
|
+
except Exception as e:
|
|
422
|
+
logger.error(f"Error generating embedding with OpenAI: {e}")
|
|
423
|
+
raise
|
|
424
|
+
|
|
425
|
+
def embed_batch(self, chunks: List[str]) -> List[List[float]]:
|
|
426
|
+
"""
|
|
427
|
+
Generate embeddings for multiple texts using OpenAI.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
chunks: List of texts to embed
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
List of embedding lists
|
|
434
|
+
"""
|
|
435
|
+
try:
|
|
436
|
+
response = self.client.embeddings.create(
|
|
437
|
+
model=self.embed_model,
|
|
438
|
+
input=chunks,
|
|
439
|
+
dimensions=self.get_embedding_dimensions(),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Record token usage if available
|
|
443
|
+
try:
|
|
444
|
+
if hasattr(response, "usage") and response.usage:
|
|
445
|
+
usage_data = {
|
|
446
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
447
|
+
"completion_tokens": 0,
|
|
448
|
+
"total_tokens": response.usage.total_tokens,
|
|
449
|
+
"model_name": self.embed_model,
|
|
450
|
+
"call_type": "embedding",
|
|
451
|
+
}
|
|
452
|
+
from noesium.core.tracing import TokenUsage
|
|
453
|
+
|
|
454
|
+
usage = TokenUsage(**usage_data)
|
|
455
|
+
get_token_tracker().record_usage(usage)
|
|
456
|
+
logger.debug(f"Token usage for batch embedding: {usage.total_tokens} tokens")
|
|
457
|
+
except Exception as e:
|
|
458
|
+
logger.debug(f"Could not track batch embedding token usage: {e}")
|
|
459
|
+
|
|
460
|
+
embeddings = [item.embedding for item in response.data]
|
|
461
|
+
|
|
462
|
+
# Validate embedding dimensions
|
|
463
|
+
expected_dims = self.get_embedding_dimensions()
|
|
464
|
+
for i, embedding in enumerate(embeddings):
|
|
465
|
+
if len(embedding) != expected_dims:
|
|
466
|
+
logger.warning(
|
|
467
|
+
f"Embedding at index {i} has {len(embedding)} dimensions, expected {expected_dims}. "
|
|
468
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
|
|
469
|
+
f"using a different embedding model."
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
return embeddings
|
|
473
|
+
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.error(f"Error generating batch embeddings with OpenAI: {e}")
|
|
476
|
+
# Fallback to individual calls
|
|
477
|
+
embeddings = []
|
|
478
|
+
for chunk in chunks:
|
|
479
|
+
embedding = self.embed(chunk)
|
|
480
|
+
embeddings.append(embedding)
|
|
481
|
+
return embeddings
|
|
482
|
+
|
|
483
|
+
def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
|
|
484
|
+
"""
|
|
485
|
+
Rerank chunks based on their relevance to the query using embeddings.
|
|
486
|
+
|
|
487
|
+
Note: OpenAI doesn't have a native reranking API, so this implementation
|
|
488
|
+
uses a similarity-based approach with embeddings.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
query: The query to rank against
|
|
492
|
+
chunks: List of text chunks to rerank
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
List of tuples (similarity_score, original_index, chunk_text)
|
|
496
|
+
sorted by similarity score in descending order
|
|
497
|
+
"""
|
|
498
|
+
try:
|
|
499
|
+
# Get embeddings for query and chunks
|
|
500
|
+
query_embedding = self.embed(query)
|
|
501
|
+
chunk_embeddings = self.embed_batch(chunks)
|
|
502
|
+
|
|
503
|
+
from noesium.core.utils.statistics import cosine_similarity
|
|
504
|
+
|
|
505
|
+
# Calculate similarities and sort
|
|
506
|
+
similarities = []
|
|
507
|
+
for i, chunk_embedding in enumerate(chunk_embeddings):
|
|
508
|
+
similarity = cosine_similarity(query_embedding, chunk_embedding)
|
|
509
|
+
similarities.append((similarity, i, chunks[i]))
|
|
510
|
+
|
|
511
|
+
# Sort by similarity (descending)
|
|
512
|
+
similarities.sort(key=lambda x: x[0], reverse=True)
|
|
513
|
+
|
|
514
|
+
# Return sorted tuples
|
|
515
|
+
return similarities
|
|
516
|
+
|
|
517
|
+
except Exception as e:
|
|
518
|
+
logger.error(f"Error reranking with OpenAI: {e}")
|
|
519
|
+
# Fallback: return original order with zero similarities
|
|
520
|
+
return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM utilities for Noesium using OpenRouter via OpenAI SDK.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Chat completion using various models via OpenRouter
|
|
6
|
+
- Text embeddings using OpenAI text-embedding-3-small
|
|
7
|
+
- Image understanding using vision models
|
|
8
|
+
- Instructor integration for structured output
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
from typing import Optional, TypeVar
|
|
14
|
+
|
|
15
|
+
from noesium.core.consts import GEMINI_FLASH
|
|
16
|
+
from noesium.core.llm.openai import LLMClient as OpenAILLMClient
|
|
17
|
+
from noesium.core.tracing.opik_tracing import configure_opik
|
|
18
|
+
from noesium.core.utils.logging import get_logger
|
|
19
|
+
|
|
20
|
+
# Only import OPIK if tracing is enabled
|
|
21
|
+
OPIK_AVAILABLE = False
|
|
22
|
+
track = lambda func: func # Default no-op decorator
|
|
23
|
+
if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
|
|
24
|
+
try:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
OPIK_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
T = TypeVar("T")
|
|
33
|
+
|
|
34
|
+
logger = get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LLMClient(OpenAILLMClient):
|
|
38
|
+
"""Client for interacting with LLMs via OpenRouter using OpenAI SDK."""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
base_url: Optional[str] = None,
|
|
43
|
+
api_key: Optional[str] = None,
|
|
44
|
+
instructor: bool = False,
|
|
45
|
+
chat_model: Optional[str] = None,
|
|
46
|
+
vision_model: Optional[str] = None,
|
|
47
|
+
embed_model: Optional[str] = None,
|
|
48
|
+
**kwargs,
|
|
49
|
+
):
|
|
50
|
+
"""
|
|
51
|
+
Initialize the LLM client.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
base_url: Base URL for the OpenRouter API (defaults to OpenRouter's URL)
|
|
55
|
+
api_key: API key for authentication (defaults to OPENROUTER_API_KEY env var)
|
|
56
|
+
instructor: Whether to enable instructor for structured output
|
|
57
|
+
chat_model: Model to use for chat completions (defaults to gemini-flash)
|
|
58
|
+
vision_model: Model to use for vision tasks (defaults to gemini-flash)
|
|
59
|
+
**kwargs: Additional arguments to pass to OpenAILLMClient
|
|
60
|
+
"""
|
|
61
|
+
self.openrouter_api_key = api_key or os.getenv("OPENROUTER_API_KEY")
|
|
62
|
+
if not self.openrouter_api_key:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"OpenRouter API key is required. Provide api_key parameter or set OPENROUTER_API_KEY environment variable."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
|
68
|
+
|
|
69
|
+
# Model configurations (can be overridden by environment variables)
|
|
70
|
+
self.chat_model = chat_model or os.getenv("OPENROUTER_CHAT_MODEL", GEMINI_FLASH)
|
|
71
|
+
self.vision_model = vision_model or os.getenv("OPENROUTER_VISION_MODEL", GEMINI_FLASH)
|
|
72
|
+
self.embed_model = embed_model or os.getenv("OPENROUTER_EMBED_MODEL", "text-embedding-3-small")
|
|
73
|
+
|
|
74
|
+
super().__init__(
|
|
75
|
+
base_url=self.base_url,
|
|
76
|
+
api_key=self.openrouter_api_key,
|
|
77
|
+
instructor=instructor,
|
|
78
|
+
chat_model=self.chat_model,
|
|
79
|
+
vision_model=self.vision_model,
|
|
80
|
+
embed_model=self.embed_model,
|
|
81
|
+
**kwargs,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Configure Opik tracing for observability only if enabled
|
|
85
|
+
if OPIK_AVAILABLE:
|
|
86
|
+
configure_opik()
|
|
87
|
+
self._opik_provider = "openrouter"
|
|
88
|
+
else:
|
|
89
|
+
self._opik_provider = None
|