noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
6
|
+
from langchain_core.outputs import LLMResult
|
|
7
|
+
|
|
8
|
+
from noesium.core.tracing.token_tracker import get_token_tracker
|
|
9
|
+
from noesium.core.utils.logging import color_text, get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NodeLoggingCallback(BaseCallbackHandler):
|
|
15
|
+
def __init__(self, node_id: Optional[str] = None):
|
|
16
|
+
self.node_id = node_id
|
|
17
|
+
|
|
18
|
+
def _prefix(self) -> str:
|
|
19
|
+
return f"[{self.node_id}] " if self.node_id else ""
|
|
20
|
+
|
|
21
|
+
def on_tool_end(self, output, run_id, parent_run_id, **kwargs):
|
|
22
|
+
logger.info(color_text(f"{self._prefix()}[TOOL END] output={output}", "cyan", ["dim"]))
|
|
23
|
+
|
|
24
|
+
def on_chain_end(self, output, run_id, parent_run_id, **kwargs):
|
|
25
|
+
logger.info(color_text(f"{self._prefix()}[CHAIN END] output={output}", "blue", ["dim"]))
|
|
26
|
+
|
|
27
|
+
def on_llm_end(self, response, run_id, parent_run_id, **kwargs):
|
|
28
|
+
logger.info(color_text(f"{self._prefix()}[LLM END] response={response}", "magenta", ["dim"]))
|
|
29
|
+
|
|
30
|
+
def on_custom_event(self, event_name, payload, **kwargs):
|
|
31
|
+
logger.info(color_text(f"{self._prefix()}[EVENT] {event_name}: {payload}", "yellow", ["dim"]))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TokenUsageCallback(BaseCallbackHandler):
|
|
35
|
+
"""Enhanced token usage callback that works with LangGraph and custom LLM clients."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, model_name: Optional[str] = None, verbose: bool = True):
|
|
38
|
+
super().__init__()
|
|
39
|
+
self.total_prompt_tokens = 0
|
|
40
|
+
self.total_completion_tokens = 0
|
|
41
|
+
self.model_name = model_name
|
|
42
|
+
self.verbose = verbose
|
|
43
|
+
self.session_start = time.time()
|
|
44
|
+
self.llm_calls = 0
|
|
45
|
+
self.token_usage_history: List[Dict] = []
|
|
46
|
+
|
|
47
|
+
# Track custom events from our LLM clients
|
|
48
|
+
self._pending_calls: Dict[str, Dict] = {}
|
|
49
|
+
|
|
50
|
+
def on_llm_start(
|
|
51
|
+
self, serialized: Dict, prompts: List[str], run_id: str, parent_run_id: Optional[str] = None, **kwargs
|
|
52
|
+
):
|
|
53
|
+
"""Track when LLM calls start"""
|
|
54
|
+
self.llm_calls += 1
|
|
55
|
+
|
|
56
|
+
# Store prompt info for token counting
|
|
57
|
+
self._pending_calls[run_id] = {"prompts": prompts, "start_time": time.time(), "call_number": self.llm_calls}
|
|
58
|
+
|
|
59
|
+
if self.verbose:
|
|
60
|
+
logger.info(
|
|
61
|
+
color_text(
|
|
62
|
+
f"[TOKEN CALLBACK] LLM call #{self.llm_calls} started (run_id: {run_id[:8]}...)", "magenta", ["dim"]
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def on_llm_end(self, response: LLMResult, run_id: str, parent_run_id: Optional[str] = None, **kwargs):
|
|
67
|
+
"""Enhanced token usage tracking with multiple extraction methods"""
|
|
68
|
+
usage_data = self._extract_token_usage_from_response(response)
|
|
69
|
+
|
|
70
|
+
# Fallback to prompt estimation if no usage data available
|
|
71
|
+
if not usage_data and run_id in self._pending_calls:
|
|
72
|
+
usage_data = self._estimate_token_usage(self._pending_calls[run_id], response)
|
|
73
|
+
|
|
74
|
+
if usage_data:
|
|
75
|
+
prompt_tokens = usage_data.get("prompt_tokens", 0)
|
|
76
|
+
completion_tokens = usage_data.get("completion_tokens", 0)
|
|
77
|
+
total_tokens = usage_data.get("total_tokens", prompt_tokens + completion_tokens)
|
|
78
|
+
|
|
79
|
+
# Update totals
|
|
80
|
+
self.total_prompt_tokens += prompt_tokens
|
|
81
|
+
self.total_completion_tokens += completion_tokens
|
|
82
|
+
|
|
83
|
+
# Store in history
|
|
84
|
+
call_data = {
|
|
85
|
+
"run_id": run_id,
|
|
86
|
+
"timestamp": datetime.now().isoformat(),
|
|
87
|
+
"prompt_tokens": prompt_tokens,
|
|
88
|
+
"completion_tokens": completion_tokens,
|
|
89
|
+
"total_tokens": total_tokens,
|
|
90
|
+
"model_name": self.model_name or "unknown",
|
|
91
|
+
}
|
|
92
|
+
self.token_usage_history.append(call_data)
|
|
93
|
+
|
|
94
|
+
if self.verbose:
|
|
95
|
+
self._log_token_usage(prompt_tokens, completion_tokens, total_tokens, run_id)
|
|
96
|
+
else:
|
|
97
|
+
# Even when not verbose, show basic token usage at info level
|
|
98
|
+
token_log = f"TOKENS: {total_tokens} | langchain | unknown | P:{prompt_tokens} C:{completion_tokens}"
|
|
99
|
+
logger.info(color_text(token_log, "magenta"))
|
|
100
|
+
|
|
101
|
+
# Clean up pending call
|
|
102
|
+
self._pending_calls.pop(run_id, None)
|
|
103
|
+
|
|
104
|
+
def on_custom_event(self, name: str, data: Any, run_id: str, **kwargs):
|
|
105
|
+
"""Handle custom events from our LLM clients for token usage"""
|
|
106
|
+
if name == "token_usage":
|
|
107
|
+
self._handle_custom_token_usage(data, run_id)
|
|
108
|
+
|
|
109
|
+
def _handle_custom_token_usage(self, data: Dict, run_id: str):
|
|
110
|
+
"""Handle custom token usage events from our LLM clients"""
|
|
111
|
+
if isinstance(data, dict) and "usage" in data:
|
|
112
|
+
usage = data["usage"]
|
|
113
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
114
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
115
|
+
total_tokens = usage.get("total_tokens", prompt_tokens + completion_tokens)
|
|
116
|
+
|
|
117
|
+
# Update totals
|
|
118
|
+
self.total_prompt_tokens += prompt_tokens
|
|
119
|
+
self.total_completion_tokens += completion_tokens
|
|
120
|
+
|
|
121
|
+
# Store in history
|
|
122
|
+
call_data = {
|
|
123
|
+
"run_id": run_id,
|
|
124
|
+
"timestamp": datetime.now().isoformat(),
|
|
125
|
+
"prompt_tokens": prompt_tokens,
|
|
126
|
+
"completion_tokens": completion_tokens,
|
|
127
|
+
"total_tokens": total_tokens,
|
|
128
|
+
"model_name": self.model_name or data.get("model", "unknown"),
|
|
129
|
+
"source": "custom_event",
|
|
130
|
+
}
|
|
131
|
+
self.token_usage_history.append(call_data)
|
|
132
|
+
|
|
133
|
+
if self.verbose:
|
|
134
|
+
self._log_token_usage(prompt_tokens, completion_tokens, total_tokens, run_id)
|
|
135
|
+
else:
|
|
136
|
+
# Even when not verbose, show basic token usage at info level
|
|
137
|
+
model_name = (
|
|
138
|
+
data.get("model", "unknown").split("/")[-1]
|
|
139
|
+
if "/" in data.get("model", "unknown")
|
|
140
|
+
else data.get("model", "unknown")
|
|
141
|
+
)
|
|
142
|
+
token_log = (
|
|
143
|
+
f"TOKENS: {total_tokens} | custom_event | {model_name} | P:{prompt_tokens} C:{completion_tokens}"
|
|
144
|
+
)
|
|
145
|
+
logger.info(color_text(token_log, "magenta"))
|
|
146
|
+
|
|
147
|
+
def _extract_token_usage_from_response(self, response: LLMResult) -> Optional[Dict]:
|
|
148
|
+
"""Extract token usage from LangChain LLMResult"""
|
|
149
|
+
usage = None
|
|
150
|
+
|
|
151
|
+
# Method 1: LLMResult llm_output
|
|
152
|
+
if hasattr(response, "llm_output") and response.llm_output:
|
|
153
|
+
usage = response.llm_output.get("token_usage") or response.llm_output.get("usage")
|
|
154
|
+
|
|
155
|
+
# Method 2: LLMResult response_metadata
|
|
156
|
+
if not usage and hasattr(response, "response_metadata") and response.response_metadata:
|
|
157
|
+
usage = response.response_metadata.get("token_usage") or response.response_metadata.get("usage")
|
|
158
|
+
|
|
159
|
+
# Method 3: Check generations for usage info
|
|
160
|
+
if not usage and hasattr(response, "generations") and response.generations:
|
|
161
|
+
for generation_list in response.generations:
|
|
162
|
+
for generation in generation_list:
|
|
163
|
+
if hasattr(generation, "generation_info") and generation.generation_info:
|
|
164
|
+
gen_usage = generation.generation_info.get("token_usage") or generation.generation_info.get(
|
|
165
|
+
"usage"
|
|
166
|
+
)
|
|
167
|
+
if gen_usage:
|
|
168
|
+
usage = gen_usage
|
|
169
|
+
break
|
|
170
|
+
if usage:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return usage
|
|
174
|
+
|
|
175
|
+
def _estimate_token_usage(self, call_info: Dict, response: LLMResult) -> Dict:
|
|
176
|
+
"""Estimate token usage when actual usage is not available"""
|
|
177
|
+
try:
|
|
178
|
+
# Simple estimation: ~4 characters per token for English text
|
|
179
|
+
prompts = call_info.get("prompts", [])
|
|
180
|
+
prompt_chars = sum(len(prompt) for prompt in prompts)
|
|
181
|
+
prompt_tokens = max(1, prompt_chars // 4)
|
|
182
|
+
|
|
183
|
+
# Estimate completion tokens from response
|
|
184
|
+
completion_chars = 0
|
|
185
|
+
if hasattr(response, "generations"):
|
|
186
|
+
for generation_list in response.generations:
|
|
187
|
+
for generation in generation_list:
|
|
188
|
+
if hasattr(generation, "text"):
|
|
189
|
+
completion_chars += len(generation.text)
|
|
190
|
+
|
|
191
|
+
completion_tokens = max(1, completion_chars // 4)
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
"prompt_tokens": prompt_tokens,
|
|
195
|
+
"completion_tokens": completion_tokens,
|
|
196
|
+
"total_tokens": prompt_tokens + completion_tokens,
|
|
197
|
+
"estimated": True,
|
|
198
|
+
}
|
|
199
|
+
except Exception as e:
|
|
200
|
+
logger.warning(f"Failed to estimate token usage: {e}")
|
|
201
|
+
return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "estimated": True, "error": str(e)}
|
|
202
|
+
|
|
203
|
+
def _log_token_usage(
|
|
204
|
+
self, prompt_tokens: int, completion_tokens: int, total_tokens: int, run_id: Optional[str] = None
|
|
205
|
+
):
|
|
206
|
+
"""Log token usage with detailed information"""
|
|
207
|
+
run_info = f" (run_id: {run_id[:8]}...)" if run_id else ""
|
|
208
|
+
model_info = f" [{self.model_name}]" if self.model_name else ""
|
|
209
|
+
|
|
210
|
+
logger.info(color_text(f"[TOKEN USAGE]{model_info}{run_info}", "magenta", ["dim"]))
|
|
211
|
+
logger.info(color_text(f" Prompt: {prompt_tokens:,} tokens", None, ["dim"]))
|
|
212
|
+
logger.info(color_text(f" Completion: {completion_tokens:,} tokens", None, ["dim"]))
|
|
213
|
+
logger.info(color_text(f" Total: {total_tokens:,} tokens", None, ["dim"]))
|
|
214
|
+
|
|
215
|
+
# Show session totals
|
|
216
|
+
session_total = self.total_tokens()
|
|
217
|
+
logger.info(color_text(f" Session Total: {session_total:,} tokens", None, ["dim"]))
|
|
218
|
+
|
|
219
|
+
def total_tokens(self) -> int:
|
|
220
|
+
"""Get total tokens used in this session"""
|
|
221
|
+
return self.total_prompt_tokens + self.total_completion_tokens
|
|
222
|
+
|
|
223
|
+
def get_session_summary(self) -> Dict:
|
|
224
|
+
"""Get comprehensive session summary including custom LLM client usage"""
|
|
225
|
+
session_duration = time.time() - self.session_start
|
|
226
|
+
|
|
227
|
+
# Get data from global token tracker (custom LLM clients)
|
|
228
|
+
tracker_stats = get_token_tracker().get_stats()
|
|
229
|
+
|
|
230
|
+
# Combine callback and tracker statistics
|
|
231
|
+
combined_prompt_tokens = self.total_prompt_tokens + tracker_stats.get("total_prompt_tokens", 0)
|
|
232
|
+
combined_completion_tokens = self.total_completion_tokens + tracker_stats.get("total_completion_tokens", 0)
|
|
233
|
+
combined_total_tokens = combined_prompt_tokens + combined_completion_tokens
|
|
234
|
+
combined_calls = self.llm_calls + tracker_stats.get("total_calls", 0)
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
"session_duration_seconds": session_duration,
|
|
238
|
+
# Combined stats from both callback and tracker
|
|
239
|
+
"total_llm_calls": combined_calls,
|
|
240
|
+
"total_prompt_tokens": combined_prompt_tokens,
|
|
241
|
+
"total_completion_tokens": combined_completion_tokens,
|
|
242
|
+
"total_tokens": combined_total_tokens,
|
|
243
|
+
# Separate stats for debugging
|
|
244
|
+
"callback_stats": {
|
|
245
|
+
"llm_calls": self.llm_calls,
|
|
246
|
+
"prompt_tokens": self.total_prompt_tokens,
|
|
247
|
+
"completion_tokens": self.total_completion_tokens,
|
|
248
|
+
"total_tokens": self.total_tokens(),
|
|
249
|
+
},
|
|
250
|
+
"tracker_stats": tracker_stats,
|
|
251
|
+
"model_name": self.model_name,
|
|
252
|
+
"token_usage_history": self.token_usage_history,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
def print_session_summary(self):
|
|
256
|
+
"""Print a formatted session summary"""
|
|
257
|
+
summary = self.get_session_summary()
|
|
258
|
+
|
|
259
|
+
logger.info(color_text("\n" + "=" * 50, "blue", ["dim"]))
|
|
260
|
+
logger.info(color_text("TOKEN USAGE SESSION SUMMARY", "blue", ["dim"]))
|
|
261
|
+
logger.info(color_text("=" * 50, "blue", ["dim"]))
|
|
262
|
+
logger.info(color_text(f"Session Duration: {summary['session_duration_seconds']:.2f} seconds", None, ["dim"]))
|
|
263
|
+
logger.info(color_text(f"Total LLM Calls: {summary['total_llm_calls']}", None, ["dim"]))
|
|
264
|
+
logger.info(color_text(f"Total Prompt Tokens: {summary['total_prompt_tokens']:,}", None, ["dim"]))
|
|
265
|
+
logger.info(color_text(f"Total Completion Tokens: {summary['total_completion_tokens']:,}", None, ["dim"]))
|
|
266
|
+
logger.info(color_text(f"Total Tokens: {summary['total_tokens']:,}", None, ["dim"]))
|
|
267
|
+
|
|
268
|
+
# Show breakdown by source
|
|
269
|
+
callback_stats = summary["callback_stats"]
|
|
270
|
+
tracker_stats = summary["tracker_stats"]
|
|
271
|
+
|
|
272
|
+
if callback_stats["llm_calls"] > 0:
|
|
273
|
+
logger.info(
|
|
274
|
+
color_text(
|
|
275
|
+
f" LangChain Calls: {callback_stats['llm_calls']} ({callback_stats['total_tokens']:,} tokens)",
|
|
276
|
+
None,
|
|
277
|
+
["dim"],
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if tracker_stats.get("total_calls", 0) > 0:
|
|
282
|
+
logger.info(
|
|
283
|
+
color_text(
|
|
284
|
+
f" Custom Client Calls: {tracker_stats['total_calls']} ({tracker_stats['total_tokens']:,} tokens)",
|
|
285
|
+
None,
|
|
286
|
+
["dim"],
|
|
287
|
+
)
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
if self.model_name:
|
|
291
|
+
logger.info(color_text(f"Model: {self.model_name}", None, ["dim"]))
|
|
292
|
+
|
|
293
|
+
logger.info(color_text("=" * 50, "blue", ["dim"]))
|
|
294
|
+
|
|
295
|
+
def reset_session(self):
|
|
296
|
+
"""Reset all counters for a new session"""
|
|
297
|
+
self.total_prompt_tokens = 0
|
|
298
|
+
self.total_completion_tokens = 0
|
|
299
|
+
self.session_start = time.time()
|
|
300
|
+
self.llm_calls = 0
|
|
301
|
+
self.token_usage_history = []
|
|
302
|
+
self._pending_calls.clear()
|
|
303
|
+
|
|
304
|
+
# Also reset the global token tracker
|
|
305
|
+
get_token_tracker().reset()
|
|
306
|
+
|
|
307
|
+
if self.verbose:
|
|
308
|
+
logger.info(color_text("[TOKEN CALLBACK] Session reset (including custom clients)", "magenta", ["dim"]))
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Opik tracing configuration for observability of LLM communications.
|
|
3
|
+
|
|
4
|
+
This module configures Opik tracing to monitor and analyze
|
|
5
|
+
all LLM interactions in the Noesium application.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def configure_opik() -> bool:
|
|
16
|
+
"""
|
|
17
|
+
Configure Opik tracing based on environment variables.
|
|
18
|
+
|
|
19
|
+
Environment variables:
|
|
20
|
+
COGENTS_OPIK_TRACING: Global toggle for Opik tracing (default: false)
|
|
21
|
+
OPIK_USE_LOCAL: Use local Opik deployment (default: true)
|
|
22
|
+
OPIK_LOCAL_URL: Local Opik URL (default: http://localhost:5173)
|
|
23
|
+
OPIK_API_KEY: API key for Comet ML/Opik (only needed for cloud)
|
|
24
|
+
OPIK_WORKSPACE: Workspace name (optional)
|
|
25
|
+
OPIK_PROJECT_NAME: Project name for organizing traces
|
|
26
|
+
OPIK_URL: Custom Opik URL (for cloud deployment)
|
|
27
|
+
OPIK_TRACING: Enable/disable tracing (default: true if enabled globally)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
bool: True if Opik was successfully configured, False otherwise
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
# Check global Noesium Opik tracing toggle first
|
|
34
|
+
cogents_opik_enabled = os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true"
|
|
35
|
+
|
|
36
|
+
if not cogents_opik_enabled:
|
|
37
|
+
logger.debug("Opik tracing disabled via COGENTS_OPIK_TRACING=false")
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
# Check if using local deployment
|
|
41
|
+
use_local = os.getenv("OPIK_USE_LOCAL", "true").lower() == "true"
|
|
42
|
+
|
|
43
|
+
# Configuration variables
|
|
44
|
+
opik_api_key = os.getenv("OPIK_API_KEY")
|
|
45
|
+
os.getenv("OPIK_WORKSPACE")
|
|
46
|
+
opik_project = os.getenv("OPIK_PROJECT_NAME", "cogents-llm")
|
|
47
|
+
opik_tracing = os.getenv("OPIK_TRACING", "true").lower() == "true"
|
|
48
|
+
|
|
49
|
+
if not opik_tracing:
|
|
50
|
+
logger.debug("Opik tracing disabled via OPIK_TRACING=false")
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
# For cloud deployment, API key is required
|
|
54
|
+
if not use_local and not opik_api_key:
|
|
55
|
+
logger.debug("No OPIK_API_KEY found for cloud deployment, Opik tracing disabled")
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
# Import opik here to avoid import errors if not installed
|
|
59
|
+
import opik
|
|
60
|
+
|
|
61
|
+
# Configure Opik
|
|
62
|
+
if use_local:
|
|
63
|
+
# Local deployment configuration
|
|
64
|
+
local_url = os.getenv("OPIK_LOCAL_URL", "http://localhost:5173")
|
|
65
|
+
opik.configure(use_local=True)
|
|
66
|
+
logger.info(f"Opik tracing configured for local deployment at {local_url}, project: {opik_project}")
|
|
67
|
+
else:
|
|
68
|
+
# Cloud deployment configuration
|
|
69
|
+
opik.configure(api_key=opik_api_key, use_local=False)
|
|
70
|
+
logger.info(f"Opik tracing configured for cloud deployment, project: {opik_project}")
|
|
71
|
+
|
|
72
|
+
return True
|
|
73
|
+
|
|
74
|
+
except ImportError:
|
|
75
|
+
logger.warning("Opik package not installed, tracing disabled")
|
|
76
|
+
return False
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Failed to configure Opik: {e}")
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_opik_enabled() -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Check if Opik tracing is currently enabled.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
bool: True if Opik tracing is enabled, False otherwise
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
# Check global Noesium Opik tracing toggle first
|
|
91
|
+
cogents_opik_enabled = os.getenv("COGENTS_OPIK_TRACING", "false").lower() == "true"
|
|
92
|
+
|
|
93
|
+
if not cogents_opik_enabled:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
import opik
|
|
97
|
+
|
|
98
|
+
# Check if Opik is properly configured
|
|
99
|
+
return opik.get_current_project_name() is not None
|
|
100
|
+
except (ImportError, AttributeError):
|
|
101
|
+
return False
|
|
102
|
+
except Exception:
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_opik_project() -> Optional[str]:
|
|
107
|
+
"""
|
|
108
|
+
Get the current Opik project name.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Optional[str]: Project name if configured, None otherwise
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
import opik
|
|
115
|
+
|
|
116
|
+
return opik.get_current_project_name()
|
|
117
|
+
except (ImportError, AttributeError):
|
|
118
|
+
return None
|
|
119
|
+
except Exception:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def create_opik_trace(name: str, input_data: dict = None, metadata: dict = None):
|
|
124
|
+
"""
|
|
125
|
+
Create a new Opik trace.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
name: Name of the trace
|
|
129
|
+
input_data: Input data for the trace
|
|
130
|
+
metadata: Additional metadata
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Opik trace object or None if tracing is disabled
|
|
134
|
+
"""
|
|
135
|
+
if not is_opik_enabled():
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
import opik
|
|
140
|
+
|
|
141
|
+
return opik.trace(name=name, input=input_data or {}, metadata=metadata or {})
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.debug(f"Failed to create Opik trace: {e}")
|
|
144
|
+
return None
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token usage tracking system for custom LLM clients.
|
|
3
|
+
|
|
4
|
+
This module provides a thread-safe token usage tracker that can be used
|
|
5
|
+
by custom LLM clients to report usage and by callbacks to access statistics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
from noesium.core.utils.logging import color_text, get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TokenUsage:
|
|
21
|
+
"""Token usage data for a single LLM call."""
|
|
22
|
+
|
|
23
|
+
prompt_tokens: int
|
|
24
|
+
completion_tokens: int
|
|
25
|
+
total_tokens: int
|
|
26
|
+
model_name: str
|
|
27
|
+
timestamp: str
|
|
28
|
+
run_id: Optional[str] = None
|
|
29
|
+
call_type: str = "completion" # completion, structured, vision
|
|
30
|
+
estimated: bool = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TokenUsageTracker:
|
|
34
|
+
"""Thread-safe token usage tracker."""
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
self._lock = threading.RLock() # Use reentrant lock to avoid deadlock
|
|
38
|
+
self.total_prompt_tokens = 0
|
|
39
|
+
self.total_completion_tokens = 0
|
|
40
|
+
self.session_start = time.time()
|
|
41
|
+
self.call_count = 0
|
|
42
|
+
self.usage_history: List[TokenUsage] = []
|
|
43
|
+
|
|
44
|
+
def record_usage(self, usage: TokenUsage) -> None:
|
|
45
|
+
"""Record token usage from an LLM call."""
|
|
46
|
+
with self._lock:
|
|
47
|
+
self.total_prompt_tokens += usage.prompt_tokens
|
|
48
|
+
self.total_completion_tokens += usage.completion_tokens
|
|
49
|
+
self.call_count += 1
|
|
50
|
+
self.usage_history.append(usage)
|
|
51
|
+
|
|
52
|
+
# Log token usage in structured format for analysis with color
|
|
53
|
+
model_short = usage.model_name.split("/")[-1] if "/" in usage.model_name else usage.model_name
|
|
54
|
+
token_log = f"TOKENS: {usage.total_tokens} | {usage.call_type} | {model_short} | P:{usage.prompt_tokens} C:{usage.completion_tokens}{'*' if usage.estimated else ''}"
|
|
55
|
+
logger.info(color_text(token_log, "magenta"))
|
|
56
|
+
|
|
57
|
+
def get_total_tokens(self) -> int:
|
|
58
|
+
"""Get total tokens used."""
|
|
59
|
+
with self._lock:
|
|
60
|
+
return self.total_prompt_tokens + self.total_completion_tokens
|
|
61
|
+
|
|
62
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
63
|
+
"""Get comprehensive usage statistics."""
|
|
64
|
+
with self._lock:
|
|
65
|
+
session_duration = time.time() - self.session_start
|
|
66
|
+
return {
|
|
67
|
+
"session_duration_seconds": session_duration,
|
|
68
|
+
"total_calls": self.call_count,
|
|
69
|
+
"total_prompt_tokens": self.total_prompt_tokens,
|
|
70
|
+
"total_completion_tokens": self.total_completion_tokens,
|
|
71
|
+
"total_tokens": self.get_total_tokens(),
|
|
72
|
+
"usage_history": [
|
|
73
|
+
{
|
|
74
|
+
"prompt_tokens": usage.prompt_tokens,
|
|
75
|
+
"completion_tokens": usage.completion_tokens,
|
|
76
|
+
"total_tokens": usage.total_tokens,
|
|
77
|
+
"model_name": usage.model_name,
|
|
78
|
+
"timestamp": usage.timestamp,
|
|
79
|
+
"call_type": usage.call_type,
|
|
80
|
+
"estimated": usage.estimated,
|
|
81
|
+
}
|
|
82
|
+
for usage in self.usage_history
|
|
83
|
+
],
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
def reset(self) -> None:
|
|
87
|
+
"""Reset all usage statistics."""
|
|
88
|
+
with self._lock:
|
|
89
|
+
self.total_prompt_tokens = 0
|
|
90
|
+
self.total_completion_tokens = 0
|
|
91
|
+
self.session_start = time.time()
|
|
92
|
+
self.call_count = 0
|
|
93
|
+
self.usage_history.clear()
|
|
94
|
+
logger.info("Token usage tracker reset")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Global token tracker instance
|
|
98
|
+
_global_tracker = TokenUsageTracker()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_token_tracker() -> TokenUsageTracker:
|
|
102
|
+
"""Get the global token usage tracker."""
|
|
103
|
+
return _global_tracker
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def record_token_usage(
|
|
107
|
+
prompt_tokens: int,
|
|
108
|
+
completion_tokens: int,
|
|
109
|
+
model_name: str,
|
|
110
|
+
call_type: str = "completion",
|
|
111
|
+
run_id: Optional[str] = None,
|
|
112
|
+
estimated: bool = False,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Record token usage in the global tracker."""
|
|
115
|
+
usage = TokenUsage(
|
|
116
|
+
prompt_tokens=prompt_tokens,
|
|
117
|
+
completion_tokens=completion_tokens,
|
|
118
|
+
total_tokens=prompt_tokens + completion_tokens,
|
|
119
|
+
model_name=model_name,
|
|
120
|
+
timestamp=datetime.now().isoformat(),
|
|
121
|
+
run_id=run_id,
|
|
122
|
+
call_type=call_type,
|
|
123
|
+
estimated=estimated,
|
|
124
|
+
)
|
|
125
|
+
_global_tracker.record_usage(usage)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def extract_token_usage_from_openai_response(
|
|
129
|
+
response, model_name: str, call_type: str = "completion"
|
|
130
|
+
) -> Optional[TokenUsage]:
|
|
131
|
+
"""Extract token usage from OpenAI API response."""
|
|
132
|
+
try:
|
|
133
|
+
if hasattr(response, "usage") and response.usage:
|
|
134
|
+
usage = response.usage
|
|
135
|
+
return TokenUsage(
|
|
136
|
+
prompt_tokens=usage.prompt_tokens,
|
|
137
|
+
completion_tokens=usage.completion_tokens,
|
|
138
|
+
total_tokens=usage.total_tokens,
|
|
139
|
+
model_name=model_name,
|
|
140
|
+
timestamp=datetime.now().isoformat(),
|
|
141
|
+
call_type=call_type,
|
|
142
|
+
estimated=False,
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.debug(f"Could not extract token usage from response: {e}")
|
|
146
|
+
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def estimate_token_usage(
|
|
151
|
+
prompt_text: str, completion_text: str, model_name: str, call_type: str = "completion"
|
|
152
|
+
) -> TokenUsage:
|
|
153
|
+
"""Estimate token usage when actual usage is not available."""
|
|
154
|
+
# Simple estimation: ~4 characters per token for English text
|
|
155
|
+
prompt_tokens = max(1, len(prompt_text) // 4)
|
|
156
|
+
completion_tokens = max(1, len(completion_text) // 4)
|
|
157
|
+
|
|
158
|
+
return TokenUsage(
|
|
159
|
+
prompt_tokens=prompt_tokens,
|
|
160
|
+
completion_tokens=completion_tokens,
|
|
161
|
+
total_tokens=prompt_tokens + completion_tokens,
|
|
162
|
+
model_name=model_name,
|
|
163
|
+
timestamp=datetime.now().isoformat(),
|
|
164
|
+
call_type=call_type,
|
|
165
|
+
estimated=True,
|
|
166
|
+
)
|