noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
5
|
+
|
|
6
|
+
from huggingface_hub import snapshot_download
|
|
7
|
+
from llama_cpp import Llama
|
|
8
|
+
|
|
9
|
+
from noesium.core.llm.base import BaseLLMClient
|
|
10
|
+
from noesium.core.tracing import estimate_token_usage, get_token_tracker
|
|
11
|
+
from noesium.core.tracing.opik_tracing import configure_opik
|
|
12
|
+
from noesium.core.utils.logging import get_logger
|
|
13
|
+
|
|
14
|
+
# Only import OPIK if tracing is enabled
|
|
15
|
+
OPIK_AVAILABLE = False
|
|
16
|
+
track = lambda func: func # Default no-op decorator
|
|
17
|
+
if os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true":
|
|
18
|
+
try:
|
|
19
|
+
from opik import track
|
|
20
|
+
|
|
21
|
+
OPIK_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
T = TypeVar("T")
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
# Default model configuration
|
|
31
|
+
DEFAULT_MODEL_REPO = "ggml-org/gemma-3-270m-it-GGUF"
|
|
32
|
+
DEFAULT_MODEL_FILENAME = "gemma-3-270m-it-Q8_0.gguf"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _download_default_model() -> str:
|
|
36
|
+
"""
|
|
37
|
+
Download the default model from Hugging Face Hub if not already cached.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Path to the downloaded model file
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
logger.info(f"No model path provided, downloading default model: {DEFAULT_MODEL_REPO}")
|
|
44
|
+
|
|
45
|
+
# Download the model repository to local cache
|
|
46
|
+
local_dir = snapshot_download(DEFAULT_MODEL_REPO)
|
|
47
|
+
|
|
48
|
+
# Construct path to the specific model file
|
|
49
|
+
model_path = os.path.join(local_dir, DEFAULT_MODEL_FILENAME)
|
|
50
|
+
|
|
51
|
+
if not os.path.exists(model_path):
|
|
52
|
+
# If the expected file doesn't exist, try to find any .gguf file
|
|
53
|
+
gguf_files = [f for f in os.listdir(local_dir) if f.endswith(".gguf")]
|
|
54
|
+
if gguf_files:
|
|
55
|
+
model_path = os.path.join(local_dir, gguf_files[0])
|
|
56
|
+
logger.info(f"Using found model file: {gguf_files[0]}")
|
|
57
|
+
else:
|
|
58
|
+
raise FileNotFoundError(f"No .gguf files found in downloaded model directory: {local_dir}")
|
|
59
|
+
|
|
60
|
+
logger.info(f"Model downloaded successfully to: {model_path}")
|
|
61
|
+
return model_path
|
|
62
|
+
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.error(f"Failed to download default model: {e}")
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Failed to download default model {DEFAULT_MODEL_REPO}. "
|
|
67
|
+
"Please provide a model_path parameter or set LLAMACPP_MODEL_PATH environment variable."
|
|
68
|
+
) from e
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class LLMClient(BaseLLMClient):
|
|
72
|
+
"""
|
|
73
|
+
Client for interacting with local LLMs using llama-cpp-python.
|
|
74
|
+
|
|
75
|
+
Automatically downloads a default model (ggml-org/gemma-3-270m-it-GGUF) from
|
|
76
|
+
Hugging Face Hub if no model path is provided via parameter or environment variable.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
model_path: Optional[str] = None,
|
|
82
|
+
instructor: bool = False,
|
|
83
|
+
chat_model: Optional[str] = None,
|
|
84
|
+
vision_model: Optional[str] = None,
|
|
85
|
+
embed_model: Optional[str] = None,
|
|
86
|
+
n_ctx: int = 2048,
|
|
87
|
+
n_gpu_layers: int = -1,
|
|
88
|
+
**kwargs,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Initialize the LLM client.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
model_path: Path to the GGUF model file. If not provided and LLAMACPP_MODEL_PATH
|
|
95
|
+
environment variable is not set, automatically downloads the default model
|
|
96
|
+
(ggml-org/gemma-3-270m-it-GGUF) from Hugging Face Hub.
|
|
97
|
+
instructor: Whether to enable instructor for structured output
|
|
98
|
+
chat_model: Model name (used for logging, defaults to model filename)
|
|
99
|
+
vision_model: Vision model name (llamacpp doesn't support vision yet)
|
|
100
|
+
n_ctx: Context window size
|
|
101
|
+
n_gpu_layers: Number of layers to offload to GPU (-1 for all)
|
|
102
|
+
**kwargs: Additional arguments to pass to Llama constructor
|
|
103
|
+
"""
|
|
104
|
+
super().__init__(**kwargs)
|
|
105
|
+
# Configure Opik tracing for observability only if enabled
|
|
106
|
+
if OPIK_AVAILABLE:
|
|
107
|
+
configure_opik()
|
|
108
|
+
self._opik_provider = "llamacpp"
|
|
109
|
+
else:
|
|
110
|
+
self._opik_provider = None
|
|
111
|
+
|
|
112
|
+
# Get model path from parameter or environment, or download default model
|
|
113
|
+
self.model_path = model_path or os.getenv("LLAMACPP_MODEL_PATH")
|
|
114
|
+
if not self.model_path:
|
|
115
|
+
logger.info("No model path provided, attempting to download default model...")
|
|
116
|
+
self.model_path = _download_default_model()
|
|
117
|
+
|
|
118
|
+
if not os.path.exists(self.model_path):
|
|
119
|
+
raise FileNotFoundError(f"Model file not found: {self.model_path}")
|
|
120
|
+
|
|
121
|
+
# Initialize Llama model
|
|
122
|
+
llama_kwargs = {
|
|
123
|
+
"model_path": self.model_path,
|
|
124
|
+
"n_ctx": n_ctx,
|
|
125
|
+
"n_gpu_layers": n_gpu_layers,
|
|
126
|
+
"verbose": kwargs.get("verbose", False),
|
|
127
|
+
**kwargs,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
self.llama = Llama(**llama_kwargs)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Failed to load model from {self.model_path}: {e}")
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
# Model configurations
|
|
137
|
+
model_filename = Path(self.model_path).stem
|
|
138
|
+
self.chat_model = chat_model or os.getenv("LLAMACPP_CHAT_MODEL", model_filename)
|
|
139
|
+
self.vision_model = vision_model or os.getenv("LLAMACPP_VISION_MODEL", model_filename)
|
|
140
|
+
self.embed_model = embed_model or os.getenv("LLAMACPP_EMBED_MODEL", model_filename)
|
|
141
|
+
|
|
142
|
+
# Set instructor flag
|
|
143
|
+
self.instructor_enabled = instructor
|
|
144
|
+
|
|
145
|
+
logger.info(f"Initialized LlamaCpp client with model: {self.model_path}")
|
|
146
|
+
|
|
147
|
+
@track
|
|
148
|
+
def completion(
|
|
149
|
+
self,
|
|
150
|
+
messages: List[Dict[str, str]],
|
|
151
|
+
temperature: float = 0.7,
|
|
152
|
+
max_tokens: Optional[int] = None,
|
|
153
|
+
stream: bool = False,
|
|
154
|
+
**kwargs,
|
|
155
|
+
) -> Union[str, Dict[str, Any]]:
|
|
156
|
+
"""
|
|
157
|
+
Generate chat completion using the loaded model.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
161
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
162
|
+
max_tokens: Maximum tokens to generate
|
|
163
|
+
stream: Whether to stream the response (not supported in llamacpp)
|
|
164
|
+
**kwargs: Additional arguments
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Generated text response
|
|
168
|
+
"""
|
|
169
|
+
if stream:
|
|
170
|
+
logger.warning("Streaming is not supported in llamacpp provider, falling back to non-streaming")
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# Convert messages to prompt format
|
|
174
|
+
prompt = self._format_messages_as_prompt(messages)
|
|
175
|
+
|
|
176
|
+
# Set default max_tokens if not provided
|
|
177
|
+
if max_tokens is None:
|
|
178
|
+
max_tokens = kwargs.get("max_tokens", 512)
|
|
179
|
+
|
|
180
|
+
if self.debug:
|
|
181
|
+
logger.debug(f"Chat completion: {prompt}")
|
|
182
|
+
|
|
183
|
+
# Generate response
|
|
184
|
+
response = self.llama(
|
|
185
|
+
prompt,
|
|
186
|
+
max_tokens=max_tokens,
|
|
187
|
+
temperature=temperature,
|
|
188
|
+
echo=False,
|
|
189
|
+
**kwargs,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Extract the generated text
|
|
193
|
+
output_text = response["choices"][0]["text"]
|
|
194
|
+
|
|
195
|
+
# Log token usage
|
|
196
|
+
self._log_token_usage(prompt, output_text, "completion")
|
|
197
|
+
|
|
198
|
+
return output_text.strip()
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Error in chat completion: {e}")
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
@track
|
|
205
|
+
def structured_completion(
|
|
206
|
+
self,
|
|
207
|
+
messages: List[Dict[str, str]],
|
|
208
|
+
response_model: Type[T],
|
|
209
|
+
temperature: float = 0.7,
|
|
210
|
+
max_tokens: Optional[int] = None,
|
|
211
|
+
attempts: int = 2,
|
|
212
|
+
backoff: float = 0.5,
|
|
213
|
+
**kwargs,
|
|
214
|
+
) -> T:
|
|
215
|
+
"""
|
|
216
|
+
Generate structured completion by prompting for JSON output.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
messages: List of message dictionaries with 'role' and 'content' keys
|
|
220
|
+
response_model: Pydantic model class for structured output
|
|
221
|
+
temperature: Sampling temperature (0.0 to 2.0)
|
|
222
|
+
max_tokens: Maximum tokens to generate
|
|
223
|
+
attempts: Number of attempts to make
|
|
224
|
+
backoff: Backoff factor for exponential backoff
|
|
225
|
+
**kwargs: Additional arguments
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Structured response as the specified model type
|
|
229
|
+
"""
|
|
230
|
+
if not self.instructor_enabled:
|
|
231
|
+
raise ValueError("Instructor is not enabled. Initialize LLMClient with instructor=True")
|
|
232
|
+
|
|
233
|
+
# Add JSON schema instruction to the last message
|
|
234
|
+
schema = response_model.model_json_schema()
|
|
235
|
+
json_instruction = f"\n\nPlease respond with a valid JSON object that matches this schema:\n{json.dumps(schema, indent=2)}\n\nRespond with only the JSON object, no additional text."
|
|
236
|
+
|
|
237
|
+
# Modify the last message to include JSON instruction
|
|
238
|
+
modified_messages = messages.copy()
|
|
239
|
+
if modified_messages:
|
|
240
|
+
modified_messages[-1]["content"] += json_instruction
|
|
241
|
+
else:
|
|
242
|
+
modified_messages = [{"role": "user", "content": json_instruction}]
|
|
243
|
+
|
|
244
|
+
if self.debug:
|
|
245
|
+
logger.debug(f"Structured completion: {modified_messages}")
|
|
246
|
+
|
|
247
|
+
import time
|
|
248
|
+
|
|
249
|
+
last_err = None
|
|
250
|
+
for i in range(attempts):
|
|
251
|
+
try:
|
|
252
|
+
# Get raw text response
|
|
253
|
+
raw_response = self.completion(
|
|
254
|
+
modified_messages, temperature=temperature, max_tokens=max_tokens, **kwargs
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Try to parse as JSON
|
|
258
|
+
try:
|
|
259
|
+
# Clean the response (remove any markdown formatting)
|
|
260
|
+
clean_response = raw_response.strip()
|
|
261
|
+
if clean_response.startswith("```json"):
|
|
262
|
+
clean_response = clean_response[7:]
|
|
263
|
+
if clean_response.endswith("```"):
|
|
264
|
+
clean_response = clean_response[:-3]
|
|
265
|
+
clean_response = clean_response.strip()
|
|
266
|
+
|
|
267
|
+
# Parse JSON
|
|
268
|
+
parsed_json = json.loads(clean_response)
|
|
269
|
+
result = response_model(**parsed_json)
|
|
270
|
+
|
|
271
|
+
# Log token usage for structured completion
|
|
272
|
+
prompt_text = "\n".join([msg.get("content", "") for msg in modified_messages])
|
|
273
|
+
self._log_token_usage(prompt_text, str(result), "structured")
|
|
274
|
+
|
|
275
|
+
return result
|
|
276
|
+
|
|
277
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
278
|
+
logger.warning(f"Failed to parse JSON response (attempt {i+1}): {e}")
|
|
279
|
+
last_err = e
|
|
280
|
+
if i < attempts - 1:
|
|
281
|
+
time.sleep(backoff * (2**i))
|
|
282
|
+
continue
|
|
283
|
+
else:
|
|
284
|
+
raise ValueError(f"Failed to get valid JSON after {attempts} attempts: {last_err}")
|
|
285
|
+
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.error(f"Error in structured completion attempt {i+1}: {e}")
|
|
288
|
+
last_err = e
|
|
289
|
+
if i < attempts - 1:
|
|
290
|
+
time.sleep(backoff * (2**i))
|
|
291
|
+
else:
|
|
292
|
+
raise
|
|
293
|
+
|
|
294
|
+
raise ValueError(f"Failed to complete structured generation after {attempts} attempts: {last_err}")
|
|
295
|
+
|
|
296
|
+
def understand_image(
|
|
297
|
+
self,
|
|
298
|
+
image_path: Union[str, Path],
|
|
299
|
+
prompt: str,
|
|
300
|
+
temperature: float = 0.7,
|
|
301
|
+
max_tokens: Optional[int] = None,
|
|
302
|
+
**kwargs,
|
|
303
|
+
) -> str:
|
|
304
|
+
"""
|
|
305
|
+
Analyze an image (not supported by llamacpp).
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
image_path: Path to the image file
|
|
309
|
+
prompt: Text prompt describing what to analyze in the image
|
|
310
|
+
temperature: Sampling temperature
|
|
311
|
+
max_tokens: Maximum tokens to generate
|
|
312
|
+
**kwargs: Additional arguments
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Analysis of the image
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
NotImplementedError: Vision capabilities are not supported by llamacpp
|
|
319
|
+
"""
|
|
320
|
+
raise NotImplementedError("Vision capabilities are not supported by the llamacpp provider")
|
|
321
|
+
|
|
322
|
+
def understand_image_from_url(
|
|
323
|
+
self,
|
|
324
|
+
image_url: str,
|
|
325
|
+
prompt: str,
|
|
326
|
+
temperature: float = 0.7,
|
|
327
|
+
max_tokens: Optional[int] = None,
|
|
328
|
+
**kwargs,
|
|
329
|
+
) -> str:
|
|
330
|
+
"""
|
|
331
|
+
Analyze an image from URL (not supported by llamacpp).
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
image_url: URL of the image
|
|
335
|
+
prompt: Text prompt describing what to analyze in the image
|
|
336
|
+
temperature: Sampling temperature
|
|
337
|
+
max_tokens: Maximum tokens to generate
|
|
338
|
+
**kwargs: Additional arguments
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Analysis of the image
|
|
342
|
+
|
|
343
|
+
Raises:
|
|
344
|
+
NotImplementedError: Vision capabilities are not supported by llamacpp
|
|
345
|
+
"""
|
|
346
|
+
raise NotImplementedError("Vision capabilities are not supported by the llamacpp provider")
|
|
347
|
+
|
|
348
|
+
def _format_messages_as_prompt(self, messages: List[Dict[str, str]]) -> str:
|
|
349
|
+
"""
|
|
350
|
+
Convert OpenAI-style messages to a single prompt string.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
messages: List of message dictionaries
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Formatted prompt string
|
|
357
|
+
"""
|
|
358
|
+
prompt_parts = []
|
|
359
|
+
for message in messages:
|
|
360
|
+
role = message.get("role", "user")
|
|
361
|
+
content = message.get("content", "")
|
|
362
|
+
|
|
363
|
+
if role == "system":
|
|
364
|
+
prompt_parts.append(f"System: {content}")
|
|
365
|
+
elif role == "user":
|
|
366
|
+
prompt_parts.append(f"User: {content}")
|
|
367
|
+
elif role == "assistant":
|
|
368
|
+
prompt_parts.append(f"Assistant: {content}")
|
|
369
|
+
else:
|
|
370
|
+
prompt_parts.append(f"{role}: {content}")
|
|
371
|
+
|
|
372
|
+
prompt_parts.append("Assistant:")
|
|
373
|
+
return "\n\n".join(prompt_parts)
|
|
374
|
+
|
|
375
|
+
def _log_token_usage(self, prompt: str, completion: str, call_type: str = "completion"):
|
|
376
|
+
"""Estimate and record token usage."""
|
|
377
|
+
try:
|
|
378
|
+
usage = estimate_token_usage(prompt, completion, self.chat_model, call_type)
|
|
379
|
+
if usage:
|
|
380
|
+
get_token_tracker().record_usage(usage)
|
|
381
|
+
logger.debug(
|
|
382
|
+
f"Token usage (estimated) - Prompt: {usage.prompt_tokens}, "
|
|
383
|
+
f"Completion: {usage.completion_tokens}, "
|
|
384
|
+
f"Total: {usage.total_tokens} (model: {usage.model_name})"
|
|
385
|
+
)
|
|
386
|
+
except Exception as e:
|
|
387
|
+
logger.debug(f"Could not estimate token usage: {e}")
|
|
388
|
+
|
|
389
|
+
def embed(self, text: str) -> List[float]:
|
|
390
|
+
"""
|
|
391
|
+
Generate embeddings using llama.cpp.
|
|
392
|
+
|
|
393
|
+
Note: This requires the model to support embeddings. Many GGUF models
|
|
394
|
+
can generate embeddings through llama.cpp.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
text: Text to embed
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
List of embedding values
|
|
401
|
+
"""
|
|
402
|
+
try:
|
|
403
|
+
# Use llama.cpp's embedding functionality
|
|
404
|
+
embedding = self.llama.create_embedding(text)
|
|
405
|
+
|
|
406
|
+
if "data" in embedding and len(embedding["data"]) > 0:
|
|
407
|
+
embedding_vector = embedding["data"][0]["embedding"]
|
|
408
|
+
|
|
409
|
+
# Validate embedding dimensions
|
|
410
|
+
expected_dims = self.get_embedding_dimensions()
|
|
411
|
+
if len(embedding_vector) != expected_dims:
|
|
412
|
+
logger.warning(
|
|
413
|
+
f"Embedding has {len(embedding_vector)} dimensions, expected {expected_dims}. "
|
|
414
|
+
f"Consider setting COGENTS_EMBEDDING_DIMS={len(embedding_vector)} or "
|
|
415
|
+
f"using a different embedding model."
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return embedding_vector
|
|
419
|
+
else:
|
|
420
|
+
raise ValueError("No embedding data returned from llama.cpp")
|
|
421
|
+
|
|
422
|
+
except Exception as e:
|
|
423
|
+
logger.error(f"Error generating embedding with llama.cpp: {e}")
|
|
424
|
+
logger.warning(
|
|
425
|
+
"Make sure your model supports embeddings. Consider using a different provider for embeddings."
|
|
426
|
+
)
|
|
427
|
+
raise
|
|
428
|
+
|
|
429
|
+
def embed_batch(self, chunks: List[str]) -> List[List[float]]:
|
|
430
|
+
"""
|
|
431
|
+
Generate embeddings for multiple texts using llama.cpp.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
chunks: List of texts to embed
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
List of embedding lists
|
|
438
|
+
"""
|
|
439
|
+
try:
|
|
440
|
+
embeddings = []
|
|
441
|
+
for chunk in chunks:
|
|
442
|
+
embedding = self.embed(chunk)
|
|
443
|
+
embeddings.append(embedding)
|
|
444
|
+
return embeddings
|
|
445
|
+
|
|
446
|
+
except Exception as e:
|
|
447
|
+
logger.error(f"Error generating batch embeddings with llama.cpp: {e}")
|
|
448
|
+
raise
|
|
449
|
+
|
|
450
|
+
def rerank(self, query: str, chunks: List[str]) -> List[Tuple[float, int, str]]:
|
|
451
|
+
"""
|
|
452
|
+
Rerank chunks based on their relevance to the query.
|
|
453
|
+
|
|
454
|
+
This implementation uses embeddings to calculate similarity scores.
|
|
455
|
+
If embeddings are not available, it falls back to a simple text-based approach.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
query: The query to rank against
|
|
459
|
+
chunks: List of text chunks to rerank
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
List of tuples (similarity_score, original_index, chunk_text)
|
|
463
|
+
sorted by similarity score in descending order
|
|
464
|
+
"""
|
|
465
|
+
try:
|
|
466
|
+
# Try to use embeddings for reranking
|
|
467
|
+
query_embedding = self.embed(query)
|
|
468
|
+
chunk_embeddings = self.embed_batch(chunks)
|
|
469
|
+
|
|
470
|
+
from noesium.core.utils.statistics import cosine_similarity
|
|
471
|
+
|
|
472
|
+
# Calculate similarities and sort
|
|
473
|
+
similarities = []
|
|
474
|
+
for i, chunk_embedding in enumerate(chunk_embeddings):
|
|
475
|
+
similarity = cosine_similarity(query_embedding, chunk_embedding)
|
|
476
|
+
similarities.append((similarity, i, chunks[i]))
|
|
477
|
+
|
|
478
|
+
# Sort by similarity (descending)
|
|
479
|
+
similarities.sort(key=lambda x: x[0], reverse=True)
|
|
480
|
+
|
|
481
|
+
# Return sorted tuples
|
|
482
|
+
return similarities
|
|
483
|
+
|
|
484
|
+
except Exception as e:
|
|
485
|
+
logger.error(f"Fallback reranking also failed: {e}")
|
|
486
|
+
# Last resort: return original order with zero similarities
|
|
487
|
+
return [(0.0, i, chunk) for i, chunk in enumerate(chunks)]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from typing import List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
# ============================================================================
|
|
6
|
+
# Message System Definitions
|
|
7
|
+
# ============================================================================
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _truncate(text: str, max_length: int = 50) -> str:
|
|
11
|
+
"""Truncate text to max_length characters, adding ellipsis if truncated."""
|
|
12
|
+
if len(text) <= max_length:
|
|
13
|
+
return text
|
|
14
|
+
return text[: max_length - 3] + "..."
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _format_image_url(url: str, max_length: int = 50) -> str:
|
|
18
|
+
"""Format image URL for display, truncating if necessary."""
|
|
19
|
+
if url.startswith("data:"):
|
|
20
|
+
# Base64 image
|
|
21
|
+
media_type = url.split(";")[0].split(":")[1] if ";" in url else "image"
|
|
22
|
+
return f"<base64 {media_type}>"
|
|
23
|
+
else:
|
|
24
|
+
# Regular URL
|
|
25
|
+
return _truncate(url, max_length)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
SupportedImageMediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ContentPartTextParam(BaseModel):
|
|
32
|
+
text: str
|
|
33
|
+
type: Literal["text"] = "text"
|
|
34
|
+
|
|
35
|
+
def __str__(self) -> str:
|
|
36
|
+
return f"Text: {_truncate(self.text)}"
|
|
37
|
+
|
|
38
|
+
def __repr__(self) -> str:
|
|
39
|
+
return f"ContentPartTextParam(text={_truncate(self.text)})"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ContentPartRefusalParam(BaseModel):
|
|
43
|
+
refusal: str
|
|
44
|
+
type: Literal["refusal"] = "refusal"
|
|
45
|
+
|
|
46
|
+
def __str__(self) -> str:
|
|
47
|
+
return f"Refusal: {_truncate(self.refusal)}"
|
|
48
|
+
|
|
49
|
+
def __repr__(self) -> str:
|
|
50
|
+
return f"ContentPartRefusalParam(refusal={_truncate(repr(self.refusal), 50)})"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ImageURL(BaseModel):
|
|
54
|
+
url: str
|
|
55
|
+
detail: Literal["auto", "low", "high"] = "auto"
|
|
56
|
+
media_type: SupportedImageMediaType = "image/png"
|
|
57
|
+
|
|
58
|
+
def __str__(self) -> str:
|
|
59
|
+
url_display = _format_image_url(self.url)
|
|
60
|
+
return f"Image[{self.media_type}, detail={self.detail}]: {url_display}"
|
|
61
|
+
|
|
62
|
+
def __repr__(self) -> str:
|
|
63
|
+
url_repr = _format_image_url(self.url, 30)
|
|
64
|
+
return f"ImageURL(url={repr(url_repr)}, detail={repr(self.detail)}, media_type={repr(self.media_type)})"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ContentPartImageParam(BaseModel):
|
|
68
|
+
image_url: ImageURL
|
|
69
|
+
type: Literal["image_url"] = "image_url"
|
|
70
|
+
|
|
71
|
+
def __str__(self) -> str:
|
|
72
|
+
return str(self.image_url)
|
|
73
|
+
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
return f"ContentPartImageParam(image_url={repr(self.image_url)})"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Function(BaseModel):
|
|
79
|
+
arguments: str
|
|
80
|
+
name: str
|
|
81
|
+
|
|
82
|
+
def __str__(self) -> str:
|
|
83
|
+
args_preview = _truncate(self.arguments, 80)
|
|
84
|
+
return f"{self.name}({args_preview})"
|
|
85
|
+
|
|
86
|
+
def __repr__(self) -> str:
|
|
87
|
+
args_repr = _truncate(repr(self.arguments), 50)
|
|
88
|
+
return f"Function(name={repr(self.name)}, arguments={args_repr})"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ToolCall(BaseModel):
|
|
92
|
+
id: str
|
|
93
|
+
function: Function
|
|
94
|
+
type: Literal["function"] = "function"
|
|
95
|
+
|
|
96
|
+
def __str__(self) -> str:
|
|
97
|
+
return f"ToolCall[{self.id}]: {self.function}"
|
|
98
|
+
|
|
99
|
+
def __repr__(self) -> str:
|
|
100
|
+
return f"ToolCall(id={repr(self.id)}, function={repr(self.function)})"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class _MessageBase(BaseModel):
|
|
104
|
+
"""Base class for all message types"""
|
|
105
|
+
|
|
106
|
+
role: Literal["user", "system", "assistant"]
|
|
107
|
+
cache: bool = False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class UserMessage(_MessageBase):
|
|
111
|
+
role: Literal["user"] = "user"
|
|
112
|
+
content: Union[str, List[Union[ContentPartTextParam, ContentPartImageParam]]]
|
|
113
|
+
name: Optional[str] = None
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def text(self) -> str:
|
|
117
|
+
"""Automatically parse the text inside content"""
|
|
118
|
+
if isinstance(self.content, str):
|
|
119
|
+
return self.content
|
|
120
|
+
elif isinstance(self.content, list):
|
|
121
|
+
return "\n".join([part.text for part in self.content if hasattr(part, "text") and part.type == "text"])
|
|
122
|
+
else:
|
|
123
|
+
return ""
|
|
124
|
+
|
|
125
|
+
def __str__(self) -> str:
|
|
126
|
+
return f"UserMessage(content={_truncate(self.text)})"
|
|
127
|
+
|
|
128
|
+
def __repr__(self) -> str:
|
|
129
|
+
return f"UserMessage(content={repr(_truncate(self.text))})"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class SystemMessage(_MessageBase):
|
|
133
|
+
role: Literal["system"] = "system"
|
|
134
|
+
content: Union[str, List[ContentPartTextParam]]
|
|
135
|
+
name: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def text(self) -> str:
|
|
139
|
+
"""Automatically parse the text inside content"""
|
|
140
|
+
if isinstance(self.content, str):
|
|
141
|
+
return self.content
|
|
142
|
+
elif isinstance(self.content, list):
|
|
143
|
+
return "\n".join([part.text for part in self.content if hasattr(part, "text") and part.type == "text"])
|
|
144
|
+
else:
|
|
145
|
+
return ""
|
|
146
|
+
|
|
147
|
+
def __str__(self) -> str:
|
|
148
|
+
return f"SystemMessage(content={_truncate(self.text)})"
|
|
149
|
+
|
|
150
|
+
def __repr__(self) -> str:
|
|
151
|
+
return f"SystemMessage(content={repr(_truncate(self.text))})"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class AssistantMessage(_MessageBase):
|
|
155
|
+
role: Literal["assistant"] = "assistant"
|
|
156
|
+
content: Optional[Union[str, List[Union[ContentPartTextParam, ContentPartRefusalParam]]]] = None
|
|
157
|
+
name: Optional[str] = None
|
|
158
|
+
refusal: Optional[str] = None
|
|
159
|
+
tool_calls: List[ToolCall] = Field(default_factory=list)
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def text(self) -> str:
|
|
163
|
+
"""Automatically parse the text inside content"""
|
|
164
|
+
if isinstance(self.content, str):
|
|
165
|
+
return self.content
|
|
166
|
+
elif isinstance(self.content, list):
|
|
167
|
+
text = ""
|
|
168
|
+
for part in self.content:
|
|
169
|
+
if hasattr(part, "text") and part.type == "text":
|
|
170
|
+
text += part.text
|
|
171
|
+
elif hasattr(part, "refusal") and part.type == "refusal":
|
|
172
|
+
text += f"[Refusal] {part.refusal}"
|
|
173
|
+
return text
|
|
174
|
+
else:
|
|
175
|
+
return ""
|
|
176
|
+
|
|
177
|
+
def __str__(self) -> str:
|
|
178
|
+
return f"AssistantMessage(content={_truncate(self.text)})"
|
|
179
|
+
|
|
180
|
+
def __repr__(self) -> str:
|
|
181
|
+
return f"AssistantMessage(content={repr(_truncate(self.text))})"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
BaseMessage = Union[UserMessage, SystemMessage, AssistantMessage]
|