noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ollama LLM provider for Cogents.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Chat completion using Ollama models
|
|
6
|
+
- Image understanding using Ollama vision models
|
|
7
|
+
- Instructor integration for structured output
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
15
|
+
|
|
16
|
+
import ollama
|
|
17
|
+
from instructor import Instructor, Mode, patch
|
|
18
|
+
|
|
19
|
+
from noesium.core.llm.base import BaseLLMClient
|
|
20
|
+
from noesium.core.tracing import estimate_token_usage, get_token_tracker
|
|
21
|
+
from noesium.core.tracing.opik_tracing import configure_opik
|
|
22
|
+
from noesium.core.utils.logging import get_logger
|
|
23
|
+
|
|
24
|
+
# Only import OPIK if tracing is enabled
|
|
25
|
+
OPIK_AVAILABLE = False
|
|
26
|
+
track = lambda func: func # Default no-op decorator
|
|
27
|
+
if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
|
|
28
|
+
try:
|
|
29
|
+
from opik import track
|
|
30
|
+
|
|
31
|
+
OPIK_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
T = TypeVar("T")
|
|
37
|
+
|
|
38
|
+
logger = get_logger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMClient(BaseLLMClient):
|
|
42
|
+
"""Client for interacting with Ollama LLM services."""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
base_url: Optional[str] = None,
|
|
47
|
+
api_key: Optional[str] = None,
|
|
48
|
+
instructor: bool = False,
|
|
49
|
+
chat_model: Optional[str] = None,
|
|
50
|
+
vision_model: Optional[str] = None,
|
|
51
|
+
embed_model: Optional[str] = None,
|
|
52
|
+
**kwargs,
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
Initialize the Ollama LLM client.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
base_url: Base URL for the Ollama API (defaults to http://localhost:11434)
|
|
59
|
+
api_key: Not used for Ollama but kept for compatibility
|
|
60
|
+
instructor: Whether to enable instructor for structured output
|
|
61
|
+
chat_model: Model to use for chat completions (defaults to gemma3:4b)
|
|
62
|
+
vision_model: Model to use for vision tasks (defaults to gemma3:4b)
|
|
63
|
+
**kwargs: Additional arguments
|
|
64
|
+
"""
|
|
65
|
+
super().__init__(**kwargs)
|
|
66
|
+
# Configure Opik tracing for observability only if enabled
|
|
67
|
+
if OPIK_AVAILABLE:
|
|
68
|
+
configure_opik()
|
|
69
|
+
self._opik_provider = "ollama"
|
|
70
|
+
else:
|
|
71
|
+
self._opik_provider = None
|
|
72
|
+
|
|
73
|
+
# Set base URL (defaults to Ollama default)
|
|
74
|
+
self.base_url = base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
|
75
|
+
|
|
76
|
+
# Initialize Ollama client
|
|
77
|
+
self.client = ollama.Client(host=self.base_url)
|
|
78
|
+
|
|
79
|
+
# Model configurations
|
|
80
|
+
self.chat_model = chat_model or os.getenv("OLLAMA_CHAT_MODEL", "gemma3:4b")
|
|
81
|
+
self.vision_model = vision_model or os.getenv("OLLAMA_VISION_MODEL", "gemma3:4b")
|
|
82
|
+
self.embed_model = embed_model or os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text:latest")
|
|
83
|
+
|
|
84
|
+
# Initialize instructor if requested
|
|
85
|
+
self.instructor = None
|
|
86
|
+
if instructor:
|
|
87
|
+
# Create a mock OpenAI-compatible client for instructor
|
|
88
|
+
try:
|
|
89
|
+
from openai import OpenAI
|
|
90
|
+
|
|
91
|
+
# Create a mock client that uses Ollama through OpenAI-compatible API
|
|
92
|
+
mock_client = OpenAI(
|
|
93
|
+
base_url=f"{self.base_url}/v1",
|
|
94
|
+
api_key="ollama", # Ollama doesn't require real API key
|
|
95
|
+
)
|
|
96
|
+
patched_client = patch(mock_client, mode=Mode.JSON)
|
|
97
|
+
self.instructor = Instructor(
|
|
98
|
+
client=patched_client,
|
|
99
|
+
create=patched_client.chat.completions.create,
|
|
100
|
+
mode=Mode.JSON,
|
|
101
|
+
)
|
|
102
|
+
except ImportError:
|
|
103
|
+
logger.warning("OpenAI package not available, structured completion will not work")
|
|
104
|
+
|
|
105
|
+
@track
|
|
106
|
+
def completion(
|
|
107
|
+
self,
|
|
108
|
+
messages: List[Dict[str, str]],
|
|
109
|
+
temperature: float = 0.7,
|
|
110
|
+
max_tokens: Optional[int] = None,
|
|
111
|
+
stream: bool = False,
|
|
112
|
+
**kwargs,
|
|
113
|
+
) -> Union[str, Dict[str, Any]]:
|
|
114
|
+
"""
|
|
115
|
+
Generate chat completion using Ollama.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
119
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
120
|
+
max_tokens: Maximum tokens to generate
|
|
121
|
+
stream: Whether to stream the response
|
|
122
|
+
**kwargs: Additional arguments
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Generated text response or streaming response
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
options = {
|
|
130
|
+
"temperature": temperature,
|
|
131
|
+
}
|
|
132
|
+
if max_tokens:
|
|
133
|
+
options["num_predict"] = max_tokens
|
|
134
|
+
|
|
135
|
+
if self.debug:
|
|
136
|
+
logger.debug(f"Chat completion: {messages}")
|
|
137
|
+
|
|
138
|
+
response = self.client.chat(
|
|
139
|
+
model=self.chat_model,
|
|
140
|
+
messages=messages,
|
|
141
|
+
stream=stream,
|
|
142
|
+
options=options,
|
|
143
|
+
**kwargs,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if stream:
|
|
147
|
+
return response
|
|
148
|
+
else:
|
|
149
|
+
# Estimate token usage for logging
|
|
150
|
+
try:
|
|
151
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in messages])
|
|
152
|
+
completion_text = response["message"]["content"]
|
|
153
|
+
usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "completion")
|
|
154
|
+
get_token_tracker().record_usage(usage)
|
|
155
|
+
logger.debug(f"Estimated token usage for completion: {usage.total_tokens} tokens")
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
158
|
+
|
|
159
|
+
return response["message"]["content"]
|
|
160
|
+
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error(f"Error in Ollama completion: {e}")
|
|
163
|
+
raise
|
|
164
|
+
|
|
165
|
+
@track
|
|
166
|
+
def structured_completion(
|
|
167
|
+
self,
|
|
168
|
+
messages: List[Dict[str, str]],
|
|
169
|
+
response_model: Type[T],
|
|
170
|
+
temperature: float = 0.7,
|
|
171
|
+
max_tokens: Optional[int] = None,
|
|
172
|
+
attempts: int = 2,
|
|
173
|
+
backoff: float = 0.5,
|
|
174
|
+
**kwargs,
|
|
175
|
+
) -> T:
|
|
176
|
+
"""
|
|
177
|
+
Generate structured completion using instructor with Ollama.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
181
|
+
response_model: Pydantic model class for structured output
|
|
182
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
183
|
+
max_tokens: Maximum tokens to generate
|
|
184
|
+
attempts: Number of attempts to make
|
|
185
|
+
backoff: Backoff factor for exponential backoff
|
|
186
|
+
**kwargs: Additional arguments to pass to instructor
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Structured response as the specified model type
|
|
190
|
+
"""
|
|
191
|
+
if not self.instructor:
|
|
192
|
+
raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
|
|
193
|
+
|
|
194
|
+
if self.debug:
|
|
195
|
+
logger.debug(f"Structured completion: {messages}")
|
|
196
|
+
|
|
197
|
+
last_err = None
|
|
198
|
+
for i in range(attempts):
|
|
199
|
+
try:
|
|
200
|
+
result = self.instructor.create(
|
|
201
|
+
model=self.chat_model,
|
|
202
|
+
messages=messages,
|
|
203
|
+
response_model=response_model,
|
|
204
|
+
temperature=temperature,
|
|
205
|
+
max_tokens=max_tokens,
|
|
206
|
+
**kwargs,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Estimate token usage for logging
|
|
210
|
+
try:
|
|
211
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in messages])
|
|
212
|
+
completion_text = str(result)
|
|
213
|
+
if hasattr(result, "model_dump_json"):
|
|
214
|
+
completion_text = result.model_dump_json()
|
|
215
|
+
|
|
216
|
+
usage = estimate_token_usage(prompt_text, completion_text, self.chat_model, "structured")
|
|
217
|
+
get_token_tracker().record_usage(usage)
|
|
218
|
+
logger.debug(f"Estimated token usage for structured completion: {usage.total_tokens} tokens")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
221
|
+
|
|
222
|
+
return result
|
|
223
|
+
except Exception as e:
|
|
224
|
+
last_err = e
|
|
225
|
+
if i < attempts - 1:
|
|
226
|
+
time.sleep(backoff * (2**i))
|
|
227
|
+
else:
|
|
228
|
+
logger.error(f"Error in structured completion: {e}")
|
|
229
|
+
raise
|
|
230
|
+
raise last_err
|
|
231
|
+
|
|
232
|
+
@track
|
|
233
|
+
def understand_image(
|
|
234
|
+
self,
|
|
235
|
+
image_path: Union[str, Path],
|
|
236
|
+
prompt: str,
|
|
237
|
+
temperature: float = 0.7,
|
|
238
|
+
max_tokens: Optional[int] = None,
|
|
239
|
+
**kwargs,
|
|
240
|
+
) -> str:
|
|
241
|
+
"""
|
|
242
|
+
Analyze an image using Ollama vision model.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
image_path: Path to the image file
|
|
246
|
+
prompt: Text prompt describing what to analyze in the image
|
|
247
|
+
temperature: Sampling temperature
|
|
248
|
+
max_tokens: Maximum tokens to generate
|
|
249
|
+
**kwargs: Additional arguments
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Analysis of the image
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
# Read and encode the image
|
|
257
|
+
image_path = Path(image_path)
|
|
258
|
+
if not image_path.exists():
|
|
259
|
+
raise FileNotFoundError(f"Image file not found: {image_path}")
|
|
260
|
+
|
|
261
|
+
with open(image_path, "rb") as image_file:
|
|
262
|
+
image_data = image_file.read()
|
|
263
|
+
|
|
264
|
+
# Prepare the message with image
|
|
265
|
+
messages = [
|
|
266
|
+
{
|
|
267
|
+
"role": "user",
|
|
268
|
+
"content": prompt,
|
|
269
|
+
"images": [image_data],
|
|
270
|
+
}
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
options = {
|
|
274
|
+
"temperature": temperature,
|
|
275
|
+
}
|
|
276
|
+
if max_tokens:
|
|
277
|
+
options["num_predict"] = max_tokens
|
|
278
|
+
|
|
279
|
+
if self.debug:
|
|
280
|
+
logger.debug(f"Understand image: {messages}")
|
|
281
|
+
|
|
282
|
+
response = self.client.chat(
|
|
283
|
+
model=self.vision_model,
|
|
284
|
+
messages=messages,
|
|
285
|
+
options=options,
|
|
286
|
+
**kwargs,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Estimate token usage for logging
|
|
290
|
+
try:
|
|
291
|
+
completion_text = response["message"]["content"]
|
|
292
|
+
usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
|
|
293
|
+
get_token_tracker().record_usage(usage)
|
|
294
|
+
logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
297
|
+
|
|
298
|
+
return response["message"]["content"]
|
|
299
|
+
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.error(f"Error analyzing image with Ollama: {e}")
|
|
302
|
+
raise
|
|
303
|
+
|
|
304
|
+
@track
|
|
305
|
+
def understand_image_from_url(
|
|
306
|
+
self,
|
|
307
|
+
image_url: str,
|
|
308
|
+
prompt: str,
|
|
309
|
+
temperature: float = 0.7,
|
|
310
|
+
max_tokens: Optional[int] = None,
|
|
311
|
+
**kwargs,
|
|
312
|
+
) -> str:
|
|
313
|
+
"""
|
|
314
|
+
Analyze an image from URL using Ollama vision model.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
image_url: URL of the image
|
|
318
|
+
prompt: Text prompt describing what to analyze in the image
|
|
319
|
+
temperature: Sampling temperature
|
|
320
|
+
max_tokens: Maximum tokens to generate
|
|
321
|
+
**kwargs: Additional arguments
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Analysis of the image
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
import requests
|
|
329
|
+
|
|
330
|
+
# Download the image
|
|
331
|
+
response = requests.get(image_url)
|
|
332
|
+
response.raise_for_status()
|
|
333
|
+
image_data = response.content
|
|
334
|
+
|
|
335
|
+
# Prepare the message with image
|
|
336
|
+
messages = [
|
|
337
|
+
{
|
|
338
|
+
"role": "user",
|
|
339
|
+
"content": prompt,
|
|
340
|
+
"images": [image_data],
|
|
341
|
+
}
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
options = {
|
|
345
|
+
"temperature": temperature,
|
|
346
|
+
}
|
|
347
|
+
if max_tokens:
|
|
348
|
+
options["num_predict"] = max_tokens
|
|
349
|
+
|
|
350
|
+
if self.debug:
|
|
351
|
+
logger.debug(f"Understand image from url: {messages}")
|
|
352
|
+
|
|
353
|
+
response = self.client.chat(
|
|
354
|
+
model=self.vision_model,
|
|
355
|
+
messages=messages,
|
|
356
|
+
options=options,
|
|
357
|
+
**kwargs,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Estimate token usage for logging
|
|
361
|
+
try:
|
|
362
|
+
completion_text = response["message"]["content"]
|
|
363
|
+
usage = estimate_token_usage(prompt, completion_text, self.vision_model, "vision")
|
|
364
|
+
get_token_tracker().record_usage(usage)
|
|
365
|
+
logger.debug(f"Estimated token usage for vision: {usage.total_tokens} tokens")
|
|
366
|
+
except Exception as e:
|
|
367
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
368
|
+
|
|
369
|
+
return response["message"]["content"]
|
|
370
|
+
|
|
371
|
+
except Exception as e:
|
|
372
|
+
logger.error(f"Error analyzing image from URL with Ollama: {e}")
|
|
373
|
+
raise
|
|
374
|
+
|
|
375
|
+
def embed(self, text: str) -> List[float]:
|
|
376
|
+
"""
|
|
377
|
+
Generate embeddings using Ollama.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
text: Text to embed
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
List of embedding values
|
|
384
|
+
"""
|
|
385
|
+
try:
|
|
386
|
+
response = self.client.embeddings(
|
|
387
|
+
model=self.embed_model,
|
|
388
|
+
prompt=text,
|
|
389
|
+
)
|
|
390
|
+
embedding = response["embedding"]
|
|
391
|
+
|
|
392
|
+
# Validate embedding dimensions
|
|
393
|
+
expected_dims = self.get_embedding_dimensions()
|
|
394
|
+
if len(embedding) != expected_dims:
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"Embedding has {len(embedding)} dimensions, expected {expected_dims}. "
|
|
397
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding)} or "
|
|
398
|
+
f"using a different embedding model."
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
return embedding
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.error(f"Error generating embedding with Ollama: {e}")
|
|
404
|
+
raise
|
|
405
|
+
|
|
406
|
+
def embed_batch(self, chunks: List[str]) -> List[List[float]]:
|
|
407
|
+
"""
|
|
408
|
+
Generate embeddings for multiple texts using Ollama.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
chunks: List of texts to embed
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
List of embedding lists
|
|
415
|
+
"""
|
|
416
|
+
embeddings = []
|
|
417
|
+
for chunk in chunks:
|
|
418
|
+
embedding = self.embed(chunk)
|
|
419
|
+
embeddings.append(embedding)
|
|
420
|
+
return embeddings
|
|
421
|
+
|
|
422
|
+
def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
|
|
423
|
+
"""
|
|
424
|
+
Rerank chunks based on their relevance to the query.
|
|
425
|
+
|
|
426
|
+
Note: Ollama doesn't have a native reranking API, so this implementation
|
|
427
|
+
uses a simple similarity-based approach with embeddings.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
query: The query to rank against
|
|
431
|
+
chunks: List of text chunks to rerank
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
List of tuples (similarity_score, original_index, chunk_text)
|
|
435
|
+
sorted by similarity score in descending order
|
|
436
|
+
"""
|
|
437
|
+
try:
|
|
438
|
+
# Get embeddings for query and chunks
|
|
439
|
+
query_embedding = self.embed(query)
|
|
440
|
+
chunk_embeddings = self.embed_batch(chunks)
|
|
441
|
+
|
|
442
|
+
from noesium.core.utils.statistics import cosine_similarity
|
|
443
|
+
|
|
444
|
+
# Calculate similarities and sort
|
|
445
|
+
similarities = []
|
|
446
|
+
for i, chunk_embedding in enumerate(chunk_embeddings):
|
|
447
|
+
similarity = cosine_similarity(query_embedding, chunk_embedding)
|
|
448
|
+
similarities.append((similarity, i, chunks[i]))
|
|
449
|
+
|
|
450
|
+
# Sort by similarity (descending)
|
|
451
|
+
similarities.sort(key=lambda x: x[0], reverse=True)
|
|
452
|
+
|
|
453
|
+
# Return sorted tuples
|
|
454
|
+
return similarities
|
|
455
|
+
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logger.error(f"Error reranking with Ollama: {e}")
|
|
458
|
+
# Fallback: return original order with zero similarities
|
|
459
|
+
return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]
|