noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LiteLLM provider for Cogents.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Unified interface to multiple LLM providers via LiteLLM
|
|
6
|
+
- Chat completion using various models through LiteLLM
|
|
7
|
+
- Image understanding using vision models
|
|
8
|
+
- Instructor integration for structured output
|
|
9
|
+
|
|
10
|
+
- Support for OpenAI, Anthropic, Cohere, Ollama, and many other providers
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import base64
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
18
|
+
|
|
19
|
+
import litellm
|
|
20
|
+
from instructor import Instructor, Mode, patch
|
|
21
|
+
|
|
22
|
+
from noesium.core.llm.base import BaseLLMClient
|
|
23
|
+
from noesium.core.tracing import configure_opik, estimate_token_usage, get_token_tracker, is_opik_enabled
|
|
24
|
+
from noesium.core.utils.logging import get_logger
|
|
25
|
+
|
|
26
|
+
# Only import OPIK if tracing is enabled
|
|
27
|
+
OPIK_AVAILABLE = False
|
|
28
|
+
track = lambda func: func # Default no-op decorator
|
|
29
|
+
if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
|
|
30
|
+
try:
|
|
31
|
+
from opik import track
|
|
32
|
+
|
|
33
|
+
OPIK_AVAILABLE = True
|
|
34
|
+
except ImportError:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
T = TypeVar("T")
|
|
39
|
+
|
|
40
|
+
logger = get_logger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LLMClient(BaseLLMClient):
|
|
44
|
+
"""Client for interacting with multiple LLM services via LiteLLM."""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
base_url: Optional[str] = None,
|
|
49
|
+
api_key: Optional[str] = None,
|
|
50
|
+
instructor: bool = False,
|
|
51
|
+
chat_model: Optional[str] = None,
|
|
52
|
+
vision_model: Optional[str] = None,
|
|
53
|
+
embed_model: Optional[str] = None,
|
|
54
|
+
**kwargs,
|
|
55
|
+
):
|
|
56
|
+
"""
|
|
57
|
+
Initialize the LiteLLM client.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
base_url: Base URL for custom API endpoints (optional)
|
|
61
|
+
api_key: API key for the provider (can be set via environment variables)
|
|
62
|
+
instructor: Whether to enable instructor for structured output
|
|
63
|
+
chat_model: Model to use for chat completions (e.g., "gpt-3.5-turbo", "claude-3-sonnet")
|
|
64
|
+
vision_model: Model to use for vision tasks (e.g., "gpt-4-vision-preview", "claude-3-sonnet")
|
|
65
|
+
**kwargs: Additional arguments
|
|
66
|
+
"""
|
|
67
|
+
super().__init__(**kwargs)
|
|
68
|
+
# Configure Opik tracing for observability only if enabled
|
|
69
|
+
if OPIK_AVAILABLE:
|
|
70
|
+
configure_opik()
|
|
71
|
+
self._opik_provider = "litellm"
|
|
72
|
+
else:
|
|
73
|
+
self._opik_provider = None
|
|
74
|
+
|
|
75
|
+
# Set base URL if provided
|
|
76
|
+
self.base_url = base_url
|
|
77
|
+
if self.base_url:
|
|
78
|
+
litellm.api_base = self.base_url
|
|
79
|
+
|
|
80
|
+
# Set API key if provided
|
|
81
|
+
self.api_key = api_key
|
|
82
|
+
if self.api_key:
|
|
83
|
+
litellm.api_key = self.api_key
|
|
84
|
+
|
|
85
|
+
# Model configurations
|
|
86
|
+
self.chat_model = chat_model or os.getenv("LITELLM_CHAT_MODEL", "gpt-3.5-turbo")
|
|
87
|
+
self.vision_model = vision_model or os.getenv("LITELLM_VISION_MODEL", "gpt-4-vision-preview")
|
|
88
|
+
self.embed_model = embed_model or os.getenv("LITELLM_EMBED_MODEL", "text-embedding-ada-002")
|
|
89
|
+
|
|
90
|
+
# Initialize instructor if requested
|
|
91
|
+
self.instructor = None
|
|
92
|
+
if instructor:
|
|
93
|
+
try:
|
|
94
|
+
from openai import OpenAI
|
|
95
|
+
|
|
96
|
+
# Create a mock client for instructor
|
|
97
|
+
mock_client = OpenAI(
|
|
98
|
+
api_key="litellm",
|
|
99
|
+
base_url="http://localhost:8000", # LiteLLM proxy default
|
|
100
|
+
)
|
|
101
|
+
patched_client = patch(mock_client, mode=Mode.JSON)
|
|
102
|
+
self.instructor = Instructor(
|
|
103
|
+
client=patched_client,
|
|
104
|
+
create=patched_client.chat.completions.create,
|
|
105
|
+
mode=Mode.JSON,
|
|
106
|
+
)
|
|
107
|
+
except ImportError:
|
|
108
|
+
logger.warning("OpenAI package not available, structured completion will not work")
|
|
109
|
+
|
|
110
|
+
# Configure LiteLLM settings
|
|
111
|
+
litellm.drop_params = True # Drop unsupported parameters
|
|
112
|
+
litellm.set_verbose = False # Reduce verbosity
|
|
113
|
+
|
|
114
|
+
@track
|
|
115
|
+
def completion(
|
|
116
|
+
self,
|
|
117
|
+
messages: List[Dict[str, str]],
|
|
118
|
+
temperature: float = 0.7,
|
|
119
|
+
max_tokens: Optional[int] = None,
|
|
120
|
+
stream: bool = False,
|
|
121
|
+
**kwargs,
|
|
122
|
+
) -> Union[str, Dict[str, Any]]:
|
|
123
|
+
"""
|
|
124
|
+
Generate chat completion using LiteLLM.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
128
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
129
|
+
max_tokens: Maximum tokens to generate
|
|
130
|
+
stream: Whether to stream the response
|
|
131
|
+
**kwargs: Additional arguments
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Generated text response or streaming response
|
|
135
|
+
"""
|
|
136
|
+
# Add Opik tracing metadata
|
|
137
|
+
opik_metadata = {}
|
|
138
|
+
if is_opik_enabled():
|
|
139
|
+
opik_metadata = {
|
|
140
|
+
"provider": self._opik_provider,
|
|
141
|
+
"model": self.chat_model,
|
|
142
|
+
"temperature": temperature,
|
|
143
|
+
"max_tokens": max_tokens,
|
|
144
|
+
"stream": stream,
|
|
145
|
+
"call_type": "completion",
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
if self.debug:
|
|
150
|
+
logger.debug(f"Chat completion: {messages}")
|
|
151
|
+
|
|
152
|
+
response = litellm.completion(
|
|
153
|
+
model=self.chat_model,
|
|
154
|
+
messages=messages,
|
|
155
|
+
temperature=temperature,
|
|
156
|
+
max_tokens=max_tokens,
|
|
157
|
+
stream=stream,
|
|
158
|
+
**kwargs,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if stream:
|
|
162
|
+
return response
|
|
163
|
+
else:
|
|
164
|
+
# Extract token usage if available
|
|
165
|
+
try:
|
|
166
|
+
if hasattr(response, "usage") and response.usage:
|
|
167
|
+
usage_data = {
|
|
168
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
169
|
+
"completion_tokens": response.usage.completion_tokens,
|
|
170
|
+
"total_tokens": response.usage.total_tokens,
|
|
171
|
+
"model_name": self.chat_model,
|
|
172
|
+
"call_type": "completion",
|
|
173
|
+
}
|
|
174
|
+
from noesium.core.tracing import TokenUsage
|
|
175
|
+
|
|
176
|
+
usage = TokenUsage(**usage_data)
|
|
177
|
+
get_token_tracker().record_usage(usage)
|
|
178
|
+
logger.debug(f"Token usage for completion: {usage.total_tokens} tokens")
|
|
179
|
+
else:
|
|
180
|
+
# Fallback to estimation
|
|
181
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in messages])
|
|
182
|
+
completion_text = response.choices[0].message.content
|
|
183
|
+
usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "completion")
|
|
184
|
+
get_token_tracker().record_usage(usage)
|
|
185
|
+
logger.debug(f"Estimated token usage for completion: {usage.total_tokens} tokens")
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.debug(f"Could not track token usage: {e}")
|
|
188
|
+
|
|
189
|
+
return response.choices[0].message.content
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error(f"Error in LiteLLM completion: {e}")
|
|
193
|
+
raise
|
|
194
|
+
|
|
195
|
+
@track
|
|
196
|
+
def structured_completion(
|
|
197
|
+
self,
|
|
198
|
+
messages: List[Dict[str, str]],
|
|
199
|
+
response_model: Type[T],
|
|
200
|
+
temperature: float = 0.7,
|
|
201
|
+
max_tokens: Optional[int] = None,
|
|
202
|
+
attempts: int = 2,
|
|
203
|
+
backoff: float = 0.5,
|
|
204
|
+
**kwargs,
|
|
205
|
+
) -> T:
|
|
206
|
+
"""
|
|
207
|
+
Generate structured completion using instructor with LiteLLM.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
211
|
+
response_model: Pydantic model class for structured output
|
|
212
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
213
|
+
max_tokens: Maximum tokens to generate
|
|
214
|
+
attempts: Number of attempts to make
|
|
215
|
+
backoff: Backoff factor for exponential backoff
|
|
216
|
+
**kwargs: Additional arguments to pass to instructor
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Structured response as the specified model type
|
|
220
|
+
"""
|
|
221
|
+
if not self.instructor:
|
|
222
|
+
raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
|
|
223
|
+
|
|
224
|
+
last_err = None
|
|
225
|
+
for i in range(attempts):
|
|
226
|
+
try:
|
|
227
|
+
# Use LiteLLM directly with JSON mode for structured output
|
|
228
|
+
# Add system message to enforce JSON structure
|
|
229
|
+
structured_messages = messages.copy()
|
|
230
|
+
if response_model.__doc__:
|
|
231
|
+
schema_prompt = f"Respond with JSON matching this schema: {response_model.model_json_schema()}"
|
|
232
|
+
else:
|
|
233
|
+
schema_prompt = f"Respond with JSON matching this Pydantic model: {response_model.__name__}"
|
|
234
|
+
|
|
235
|
+
# Add schema instruction to the last user message or create a new one
|
|
236
|
+
if structured_messages and structured_messages[-1]["role"] == "user":
|
|
237
|
+
structured_messages[-1]["content"] += f"\n\n{schema_prompt}"
|
|
238
|
+
else:
|
|
239
|
+
structured_messages.append({"role": "user", "content": schema_prompt})
|
|
240
|
+
|
|
241
|
+
if self.debug:
|
|
242
|
+
logger.debug(f"Structured completion: {structured_messages}")
|
|
243
|
+
|
|
244
|
+
response = litellm.completion(
|
|
245
|
+
model=self.chat_model,
|
|
246
|
+
messages=structured_messages,
|
|
247
|
+
temperature=temperature,
|
|
248
|
+
max_tokens=max_tokens,
|
|
249
|
+
response_format={"type": "json_object"} if "gpt" in self.chat_model.lower() else None,
|
|
250
|
+
**kwargs,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Parse the JSON response into the Pydantic model
|
|
254
|
+
import json
|
|
255
|
+
|
|
256
|
+
response_text = response.choices[0].message.content
|
|
257
|
+
response_json = json.loads(response_text)
|
|
258
|
+
result = response_model.model_validate(response_json)
|
|
259
|
+
|
|
260
|
+
# Estimate token usage for logging
|
|
261
|
+
try:
|
|
262
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in structured_messages])
|
|
263
|
+
completion_text = response_text
|
|
264
|
+
usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "structured")
|
|
265
|
+
get_token_tracker().record_usage(usage)
|
|
266
|
+
logger.debug(f"Estimated token usage for structured completion: {usage.total_tokens} tokens")
|
|
267
|
+
except Exception as e:
|
|
268
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
269
|
+
|
|
270
|
+
return result
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
last_err = e
|
|
274
|
+
if i < attempts - 1:
|
|
275
|
+
time.sleep(backoff * (2**i))
|
|
276
|
+
else:
|
|
277
|
+
logger.error(f"Error in structured completion: {e}")
|
|
278
|
+
raise
|
|
279
|
+
raise last_err
|
|
280
|
+
|
|
281
|
+
@track
|
|
282
|
+
def understand_image(
|
|
283
|
+
self,
|
|
284
|
+
image_path: Union[str, Path],
|
|
285
|
+
prompt: str,
|
|
286
|
+
temperature: float = 0.7,
|
|
287
|
+
max_tokens: Optional[int] = None,
|
|
288
|
+
**kwargs,
|
|
289
|
+
) -> str:
|
|
290
|
+
"""
|
|
291
|
+
Analyze an image using LiteLLM vision model.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
image_path: Path to the image file
|
|
295
|
+
prompt: Text prompt describing what to analyze in the image
|
|
296
|
+
temperature: Sampling temperature
|
|
297
|
+
max_tokens: Maximum tokens to generate
|
|
298
|
+
**kwargs: Additional arguments
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
Analysis of the image
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
# Read and encode the image
|
|
306
|
+
image_path = Path(image_path)
|
|
307
|
+
if not image_path.exists():
|
|
308
|
+
raise FileNotFoundError(f"Image file not found: {image_path}")
|
|
309
|
+
|
|
310
|
+
with open(image_path, "rb") as image_file:
|
|
311
|
+
image_data = image_file.read()
|
|
312
|
+
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
|
313
|
+
|
|
314
|
+
# Determine the image format
|
|
315
|
+
image_format = image_path.suffix.lower().lstrip(".")
|
|
316
|
+
if image_format == "jpg":
|
|
317
|
+
image_format = "jpeg"
|
|
318
|
+
|
|
319
|
+
# Prepare the message with image
|
|
320
|
+
messages = [
|
|
321
|
+
{
|
|
322
|
+
"role": "user",
|
|
323
|
+
"content": [
|
|
324
|
+
{"type": "text", "text": prompt},
|
|
325
|
+
{"type": "image_url", "image_url": {"url": f"data:image/{image_format};base64,{image_base64}"}},
|
|
326
|
+
],
|
|
327
|
+
}
|
|
328
|
+
]
|
|
329
|
+
|
|
330
|
+
if self.debug:
|
|
331
|
+
logger.debug(f"Understand image: {messages}")
|
|
332
|
+
|
|
333
|
+
response = litellm.completion(
|
|
334
|
+
model=self.vision_model,
|
|
335
|
+
messages=messages,
|
|
336
|
+
temperature=temperature,
|
|
337
|
+
max_tokens=max_tokens,
|
|
338
|
+
**kwargs,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Estimate token usage for logging
|
|
342
|
+
try:
|
|
343
|
+
completion_text = response.choices[0].message.content
|
|
344
|
+
usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
|
|
345
|
+
get_token_tracker().record_usage(usage)
|
|
346
|
+
logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
349
|
+
|
|
350
|
+
return response.choices[0].message.content
|
|
351
|
+
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.error(f"Error analyzing image with LiteLLM: {e}")
|
|
354
|
+
raise
|
|
355
|
+
|
|
356
|
+
@track
|
|
357
|
+
def understand_image_from_url(
|
|
358
|
+
self,
|
|
359
|
+
image_url: str,
|
|
360
|
+
prompt: str,
|
|
361
|
+
temperature: float = 0.7,
|
|
362
|
+
max_tokens: Optional[int] = None,
|
|
363
|
+
**kwargs,
|
|
364
|
+
) -> str:
|
|
365
|
+
"""
|
|
366
|
+
Analyze an image from URL using LiteLLM vision model.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
image_url: URL of the image
|
|
370
|
+
prompt: Text prompt describing what to analyze in the image
|
|
371
|
+
temperature: Sampling temperature
|
|
372
|
+
max_tokens: Maximum tokens to generate
|
|
373
|
+
**kwargs: Additional arguments
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
Analysis of the image
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
# Prepare the message with image URL
|
|
381
|
+
messages = [
|
|
382
|
+
{
|
|
383
|
+
"role": "user",
|
|
384
|
+
"content": [
|
|
385
|
+
{"type": "text", "text": prompt},
|
|
386
|
+
{"type": "image_url", "image_url": {"url": image_url}},
|
|
387
|
+
],
|
|
388
|
+
}
|
|
389
|
+
]
|
|
390
|
+
|
|
391
|
+
if self.debug:
|
|
392
|
+
logger.debug(f"Understand image from url: {messages}")
|
|
393
|
+
|
|
394
|
+
response = litellm.completion(
|
|
395
|
+
model=self.vision_model,
|
|
396
|
+
messages=messages,
|
|
397
|
+
temperature=temperature,
|
|
398
|
+
max_tokens=max_tokens,
|
|
399
|
+
**kwargs,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Estimate token usage for logging
|
|
403
|
+
try:
|
|
404
|
+
completion_text = response.choices[0].message.content
|
|
405
|
+
usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
|
|
406
|
+
get_token_tracker().record_usage(usage)
|
|
407
|
+
logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
410
|
+
|
|
411
|
+
return response.choices[0].message.content
|
|
412
|
+
|
|
413
|
+
except Exception as e:
|
|
414
|
+
logger.error(f"Error analyzing image from URL with LiteLLM: {e}")
|
|
415
|
+
raise
|
|
416
|
+
|
|
417
|
+
def embed(self, text: str) -> List[float]:
|
|
418
|
+
"""
|
|
419
|
+
Generate embeddings using LiteLLM.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
text: Text to embed
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
List of embedding values
|
|
426
|
+
"""
|
|
427
|
+
try:
|
|
428
|
+
response = litellm.embedding(
|
|
429
|
+
model=self.embed_model,
|
|
430
|
+
input=[text],
|
|
431
|
+
dimensions=self.get_embedding_dimensions(),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
embedding = response.data[0].embedding
|
|
435
|
+
|
|
436
|
+
# Validate embedding dimensions
|
|
437
|
+
expected_dims = self.get_embedding_dimensions()
|
|
438
|
+
if len(embedding) != expected_dims:
|
|
439
|
+
logger.warning(
|
|
440
|
+
f"Embedding has {len(embedding)} dimensions, expected {expected_dims}. "
|
|
441
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
|
|
442
|
+
f"using a different embedding model."
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
return embedding
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.error(f"Error generating embedding with LiteLLM: {e}")
|
|
449
|
+
raise
|
|
450
|
+
|
|
451
|
+
def embed_batch(self, chunks: List[str]) -> List[List[float]]:
|
|
452
|
+
"""
|
|
453
|
+
Generate embeddings for multiple texts using LiteLLM.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
chunks: List of texts to embed
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
List of embedding lists
|
|
460
|
+
"""
|
|
461
|
+
try:
|
|
462
|
+
response = litellm.embedding(
|
|
463
|
+
model=self.embed_model,
|
|
464
|
+
input=chunks,
|
|
465
|
+
dimensions=self.get_embedding_dimensions(),
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
embeddings = [item.embedding for item in response.data]
|
|
469
|
+
|
|
470
|
+
# Validate embedding dimensions
|
|
471
|
+
expected_dims = self.get_embedding_dimensions()
|
|
472
|
+
for i, embedding in enumerate(embeddings):
|
|
473
|
+
if len(embedding) != expected_dims:
|
|
474
|
+
logger.warning(
|
|
475
|
+
f"Embedding at index {i} has {len(embedding)} dimensions, expected {expected_dims}. "
|
|
476
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
|
|
477
|
+
f"using a different embedding model."
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
return embeddings
|
|
481
|
+
|
|
482
|
+
except Exception as e:
|
|
483
|
+
logger.error(f"Error generating batch embeddings with LiteLLM: {e}")
|
|
484
|
+
# Fallback to individual calls
|
|
485
|
+
embeddings = []
|
|
486
|
+
for chunk in chunks:
|
|
487
|
+
embedding = self.embed(chunk)
|
|
488
|
+
embeddings.append(embedding)
|
|
489
|
+
return embeddings
|
|
490
|
+
|
|
491
|
+
def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
|
|
492
|
+
"""
|
|
493
|
+
Rerank chunks based on their relevance to the query.
|
|
494
|
+
|
|
495
|
+
Note: LiteLLM doesn't have a native reranking API, so this implementation
|
|
496
|
+
uses a simple similarity-based approach with embeddings.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
query: The query to rank against
|
|
500
|
+
chunks: List of text chunks to rerank
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
List of tuples (similarity_score, original_index, chunk_text)
|
|
504
|
+
sorted by similarity score in descending order
|
|
505
|
+
"""
|
|
506
|
+
try:
|
|
507
|
+
# Get embeddings for query and chunks
|
|
508
|
+
query_embedding = self.embed(query)
|
|
509
|
+
chunk_embeddings = self.embed_batch(chunks)
|
|
510
|
+
|
|
511
|
+
from noesium.core.utils.statistics import cosine_similarity
|
|
512
|
+
|
|
513
|
+
# Calculate similarities and sort
|
|
514
|
+
similarities = []
|
|
515
|
+
for i, chunk_embedding in enumerate(chunk_embeddings):
|
|
516
|
+
similarity = cosine_similarity(query_embedding, chunk_embedding)
|
|
517
|
+
similarities.append((similarity, i, chunks[i]))
|
|
518
|
+
|
|
519
|
+
# Sort by similarity (descending)
|
|
520
|
+
similarities.sort(key=lambda x: x[0], reverse=True)
|
|
521
|
+
|
|
522
|
+
# Return sorted tuples
|
|
523
|
+
return similarities
|
|
524
|
+
|
|
525
|
+
except Exception as e:
|
|
526
|
+
logger.error(f"Error reranking with LiteLLM: {e}")
|
|
527
|
+
# Fallback: return original order with zero similarities
|
|
528
|
+
return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]
|