llumo 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +5 -2
- llumo/callback.py +480 -0
- llumo/callbacks-0.py +258 -0
- llumo/client.py +88 -30
- llumo/exceptions.py +2 -2
- llumo/helpingFuntions.py +59 -8
- llumo/llumoLogger.py +57 -0
- llumo/llumoSessionContext.py +364 -0
- llumo/openai.py +196 -50
- {llumo-0.2.23.dist-info → llumo-0.2.25.dist-info}/METADATA +1 -1
- llumo-0.2.25.dist-info/RECORD +20 -0
- llumo-0.2.23.dist-info/RECORD +0 -16
- {llumo-0.2.23.dist-info → llumo-0.2.25.dist-info}/WHEEL +0 -0
- {llumo-0.2.23.dist-info → llumo-0.2.25.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.23.dist-info → llumo-0.2.25.dist-info}/top_level.txt +0 -0
llumo/callbacks-0.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
import time
|
|
4
|
+
import json
|
|
5
|
+
from llumo.llumoSessionContext import getSessionID, getLlumoRun
|
|
6
|
+
from llumo.llumoSessionContext import LlumoSessionContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LlumoCallbackHandler(BaseCallbackHandler):
|
|
10
|
+
"""
|
|
11
|
+
LangChain callback handler that integrates with Llumo logging system.
|
|
12
|
+
Tracks LLM calls, tool usage, agent actions, and chains.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, logger):
|
|
16
|
+
self.logger = logger
|
|
17
|
+
self.start_times = {} # Track start times for latency calculation
|
|
18
|
+
self.step_counters = {} # Track step counts for agents
|
|
19
|
+
|
|
20
|
+
def _get_session_context(self) -> Optional[LlumoSessionContext]:
|
|
21
|
+
"""Get the current Llumo session context from context variables."""
|
|
22
|
+
try:
|
|
23
|
+
session_id = getSessionID()
|
|
24
|
+
run = getLlumoRun()
|
|
25
|
+
if session_id and run:
|
|
26
|
+
# Create a temporary context object to access logging methods
|
|
27
|
+
ctx = LlumoSessionContext(self.logger, session_id)
|
|
28
|
+
return ctx
|
|
29
|
+
except Exception:
|
|
30
|
+
pass
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
def _safe_serialize(self, obj: Any) -> str:
|
|
34
|
+
"""Safely serialize objects to JSON string."""
|
|
35
|
+
try:
|
|
36
|
+
return json.dumps(obj, default=str, ensure_ascii=False)
|
|
37
|
+
except Exception:
|
|
38
|
+
return str(obj)
|
|
39
|
+
|
|
40
|
+
# LLM Callbacks
|
|
41
|
+
def on_llm_start(
|
|
42
|
+
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Called when LLM starts generating."""
|
|
45
|
+
run_id = kwargs.get('run_id')
|
|
46
|
+
if run_id:
|
|
47
|
+
self.start_times[run_id] = time.time()
|
|
48
|
+
|
|
49
|
+
print("LLM started - prompts:", len(prompts))
|
|
50
|
+
|
|
51
|
+
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
|
52
|
+
"""Called when LLM generates a new token."""
|
|
53
|
+
# Optional: Could be used for streaming token tracking
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def on_llm_end(self, response: Any, **kwargs: Any) -> None:
|
|
57
|
+
"""Called when LLM finishes generating."""
|
|
58
|
+
ctx = self._get_session_context()
|
|
59
|
+
if not ctx:
|
|
60
|
+
print("No Llumo session context available")
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
run_id = kwargs.get('run_id')
|
|
64
|
+
start_time = self.start_times.pop(run_id, time.time())
|
|
65
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
66
|
+
|
|
67
|
+
# Extract LLM response details
|
|
68
|
+
model = getattr(response, 'model_name', 'unknown')
|
|
69
|
+
|
|
70
|
+
# Get token usage if available
|
|
71
|
+
token_usage = getattr(response, 'llm_output', {}).get('token_usage', {})
|
|
72
|
+
input_tokens = token_usage.get('prompt_tokens', 0)
|
|
73
|
+
output_tokens = token_usage.get('completion_tokens', 0)
|
|
74
|
+
|
|
75
|
+
# Get the generated text
|
|
76
|
+
if hasattr(response, 'generations') and response.generations:
|
|
77
|
+
output_text = response.generations[0][0].text if response.generations[0] else ""
|
|
78
|
+
else:
|
|
79
|
+
output_text = str(response)
|
|
80
|
+
|
|
81
|
+
# Get the original prompt
|
|
82
|
+
prompts = kwargs.get('prompts', [''])
|
|
83
|
+
query = prompts[0] if prompts else ""
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
ctx.logLlmStep(
|
|
87
|
+
stepName=f"LLM Call - {model}",
|
|
88
|
+
model=model,
|
|
89
|
+
provider="langchain",
|
|
90
|
+
inputTokens=input_tokens,
|
|
91
|
+
outputTokens=output_tokens,
|
|
92
|
+
temperature=kwargs.get('temperature', 0.7),
|
|
93
|
+
promptTruncated=False,
|
|
94
|
+
latencyMs=latency_ms,
|
|
95
|
+
query=query,
|
|
96
|
+
output=output_text,
|
|
97
|
+
status="SUCCESS",
|
|
98
|
+
message=""
|
|
99
|
+
)
|
|
100
|
+
print(f"Logged LLM step: {model} ({latency_ms}ms)")
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"Failed to log LLM step: {e}")
|
|
103
|
+
|
|
104
|
+
def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
|
|
105
|
+
"""Called when LLM encounters an error."""
|
|
106
|
+
ctx = self._get_session_context()
|
|
107
|
+
if not ctx:
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
run_id = kwargs.get('run_id')
|
|
111
|
+
start_time = self.start_times.pop(run_id, time.time())
|
|
112
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
113
|
+
|
|
114
|
+
prompts = kwargs.get('prompts', [''])
|
|
115
|
+
query = prompts[0] if prompts else ""
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
ctx.logLlmStep(
|
|
119
|
+
stepName="LLM Call - Error",
|
|
120
|
+
model="unknown",
|
|
121
|
+
provider="langchain",
|
|
122
|
+
inputTokens=0,
|
|
123
|
+
outputTokens=0,
|
|
124
|
+
temperature=0.7,
|
|
125
|
+
promptTruncated=False,
|
|
126
|
+
latencyMs=latency_ms,
|
|
127
|
+
query=query,
|
|
128
|
+
output="",
|
|
129
|
+
status="FAILURE",
|
|
130
|
+
message=str(error)
|
|
131
|
+
)
|
|
132
|
+
print(f"Logged LLM error: {error}")
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f"Failed to log LLM error: {e}")
|
|
135
|
+
|
|
136
|
+
# Chain Callbacks
|
|
137
|
+
def on_chain_start(
|
|
138
|
+
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
|
139
|
+
) -> None:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
|
143
|
+
"""Called when a chain ends."""
|
|
144
|
+
print("Chain execution completed")
|
|
145
|
+
|
|
146
|
+
def on_chain_error(self, error: Exception, **kwargs: Any) -> None:
|
|
147
|
+
"""Called when a chain encounters an error."""
|
|
148
|
+
print(f"Chain error: {error}")
|
|
149
|
+
|
|
150
|
+
# Tool Callbacks
|
|
151
|
+
def on_tool_start(
|
|
152
|
+
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
|
153
|
+
) -> None:
|
|
154
|
+
"""Called when a tool starts."""
|
|
155
|
+
run_id = kwargs.get('run_id')
|
|
156
|
+
if run_id:
|
|
157
|
+
self.start_times[run_id] = time.time()
|
|
158
|
+
|
|
159
|
+
tool_name = serialized.get('name', 'Unknown Tool')
|
|
160
|
+
print(f"Tool started: {tool_name}")
|
|
161
|
+
|
|
162
|
+
def on_tool_end(self, output: str, **kwargs: Any) -> None:
|
|
163
|
+
"""Called when a tool ends."""
|
|
164
|
+
ctx = self._get_session_context()
|
|
165
|
+
if not ctx:
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
run_id = kwargs.get('run_id')
|
|
169
|
+
start_time = self.start_times.pop(run_id, time.time())
|
|
170
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
171
|
+
|
|
172
|
+
# Extract tool info from kwargs
|
|
173
|
+
serialized = kwargs.get('serialized', {})
|
|
174
|
+
tool_name = serialized.get('name', 'Unknown Tool')
|
|
175
|
+
input_str = kwargs.get('input_str', '')
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
ctx.logToolStep(
|
|
179
|
+
stepName=f"Tool - {tool_name}",
|
|
180
|
+
toolName=tool_name,
|
|
181
|
+
input={"input": input_str},
|
|
182
|
+
output=output,
|
|
183
|
+
latencyMs=latency_ms,
|
|
184
|
+
status="SUCCESS",
|
|
185
|
+
message=""
|
|
186
|
+
)
|
|
187
|
+
print(f"Logged tool step: {tool_name} ({latency_ms}ms)")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
print(f"Failed to log tool step: {e}")
|
|
190
|
+
|
|
191
|
+
def on_tool_error(self, error: Exception, **kwargs: Any) -> None:
|
|
192
|
+
"""Called when a tool encounters an error."""
|
|
193
|
+
ctx = self._get_session_context()
|
|
194
|
+
if not ctx:
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
run_id = kwargs.get('run_id')
|
|
198
|
+
start_time = self.start_times.pop(run_id, time.time())
|
|
199
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
200
|
+
|
|
201
|
+
serialized = kwargs.get('serialized', {})
|
|
202
|
+
tool_name = serialized.get('name', 'Unknown Tool')
|
|
203
|
+
input_str = kwargs.get('input_str', '')
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
ctx.logToolStep(
|
|
207
|
+
stepName=f"Tool - {tool_name} - Error",
|
|
208
|
+
toolName=tool_name,
|
|
209
|
+
input={"input": input_str},
|
|
210
|
+
output="",
|
|
211
|
+
latencyMs=latency_ms,
|
|
212
|
+
status="FAILURE",
|
|
213
|
+
message=str(error)
|
|
214
|
+
)
|
|
215
|
+
print(f"Logged tool error: {tool_name} - {error}")
|
|
216
|
+
except Exception as e:
|
|
217
|
+
print(f"Failed to log tool error: {e}")
|
|
218
|
+
|
|
219
|
+
# Agent Callbacks
|
|
220
|
+
def on_agent_action(self, action: Any, **kwargs: Any) -> None:
|
|
221
|
+
"""Called when an agent takes an action."""
|
|
222
|
+
run_id = kwargs.get('run_id')
|
|
223
|
+
|
|
224
|
+
# Track agent step count
|
|
225
|
+
if run_id not in self.step_counters:
|
|
226
|
+
self.step_counters[run_id] = 0
|
|
227
|
+
self.step_counters[run_id] += 1
|
|
228
|
+
|
|
229
|
+
print(f"Agent action: {getattr(action, 'tool', 'unknown')}")
|
|
230
|
+
|
|
231
|
+
def on_agent_finish(self, finish: Any, **kwargs: Any) -> None:
|
|
232
|
+
"""Called when an agent finishes."""
|
|
233
|
+
ctx = self._get_session_context()
|
|
234
|
+
if not ctx:
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
run_id = kwargs.get('run_id')
|
|
238
|
+
num_steps = self.step_counters.pop(run_id, 0)
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
ctx.logAgentStep(
|
|
242
|
+
stepName="Agent Execution",
|
|
243
|
+
agentType="langchain_agent",
|
|
244
|
+
agentName="LangChain Agent",
|
|
245
|
+
numStepsTaken=num_steps,
|
|
246
|
+
tools=[], # Could be populated if tool info is available
|
|
247
|
+
query=getattr(finish, 'log', ''),
|
|
248
|
+
status="SUCCESS",
|
|
249
|
+
message=""
|
|
250
|
+
)
|
|
251
|
+
print(f"Logged agent finish: {num_steps} steps")
|
|
252
|
+
except Exception as e:
|
|
253
|
+
print(f"Failed to log agent step: {e}")
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
|
llumo/client.py
CHANGED
|
@@ -99,11 +99,23 @@ class LlumoClient:
|
|
|
99
99
|
)
|
|
100
100
|
self.email = data["data"]["data"].get("email", None)
|
|
101
101
|
|
|
102
|
-
self.definationMapping[evalName] = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName,
|
|
103
|
-
|
|
102
|
+
self.definationMapping[evalName] = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "")
|
|
103
|
+
self.categories = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("categories", {})
|
|
104
|
+
self.evaluationStrictness=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("evaluationStrictness", {})
|
|
105
|
+
self.grammarCheckOutput=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("grammarCheckOutput", {})
|
|
106
|
+
self.insightsLength=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("insightsLength", {})
|
|
107
|
+
self.insightsLevel=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("insightsLevel", {})
|
|
108
|
+
self.executionDependency=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("executionDependency", {})
|
|
109
|
+
self.sampleData=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("sampleData", {})
|
|
110
|
+
self.numJudges=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("numJudges", {})
|
|
111
|
+
self.penaltyBonusInstructions=data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("penaltyBonusInstructions", [])
|
|
112
|
+
self.probableEdgeCases= data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("probableEdgeCases", [])
|
|
113
|
+
self.fieldMapping= data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, "").get("fieldMapping", [])
|
|
114
|
+
|
|
115
|
+
|
|
104
116
|
except Exception as e:
|
|
105
117
|
# print(f"Error extracting data from response: {str(e)}")
|
|
106
|
-
raise LlumoAIError.UnexpectedError(detail=
|
|
118
|
+
raise LlumoAIError.UnexpectedError(detail=evalName)
|
|
107
119
|
|
|
108
120
|
def postBatch(self, batch, workspaceID):
|
|
109
121
|
payload = {
|
|
@@ -617,10 +629,12 @@ class LlumoClient:
|
|
|
617
629
|
self,
|
|
618
630
|
data,
|
|
619
631
|
evals: list, # list of eval metric names
|
|
632
|
+
session, # Add session parameter
|
|
620
633
|
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
|
621
634
|
outputColName="output",
|
|
622
635
|
createExperiment: bool = False,
|
|
623
636
|
getDataFrame:bool =False,
|
|
637
|
+
playgroundID: str = None,
|
|
624
638
|
_tocheck=True,
|
|
625
639
|
):
|
|
626
640
|
if isinstance(data, dict):
|
|
@@ -668,19 +682,20 @@ class LlumoClient:
|
|
|
668
682
|
listener_thread.start()
|
|
669
683
|
self.validateApiKey(evalName=evals[0])
|
|
670
684
|
if createExperiment:
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
685
|
+
if playgroundID:
|
|
686
|
+
activePlayground = playgroundID
|
|
687
|
+
else:
|
|
688
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
|
689
|
+
else:
|
|
690
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
677
691
|
for evalName in evals:
|
|
678
692
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
|
679
693
|
|
|
680
694
|
# Validate API and dependencies
|
|
681
695
|
self.validateApiKey(evalName=evalName)
|
|
696
|
+
customAnalytics=getCustomAnalytics(self.workspaceID)
|
|
682
697
|
metricDependencies = checkDependency(
|
|
683
|
-
evalName, list(dataframe.columns), tocheck=_tocheck
|
|
698
|
+
evalName, list(dataframe.columns), tocheck=_tocheck,customevals=customAnalytics
|
|
684
699
|
)
|
|
685
700
|
if not metricDependencies["status"]:
|
|
686
701
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
@@ -691,6 +706,15 @@ class LlumoClient:
|
|
|
691
706
|
evalType = "LLM"
|
|
692
707
|
workspaceID = self.workspaceID
|
|
693
708
|
email = self.email
|
|
709
|
+
categories=self.categories
|
|
710
|
+
evaluationStrictness=self.evaluationStrictness
|
|
711
|
+
grammarCheckOutput=self.grammarCheckOutput
|
|
712
|
+
insightLength=self.insightsLength
|
|
713
|
+
numJudges=self.numJudges
|
|
714
|
+
penaltyBonusInstructions=self.penaltyBonusInstructions
|
|
715
|
+
probableEdgeCases=self.probableEdgeCases
|
|
716
|
+
fieldMapping=self.fieldMapping
|
|
717
|
+
|
|
694
718
|
|
|
695
719
|
userHits = checkUserHits(
|
|
696
720
|
self.workspaceID,
|
|
@@ -732,31 +756,39 @@ class LlumoClient:
|
|
|
732
756
|
templateData = {
|
|
733
757
|
"processID": getProcessID(),
|
|
734
758
|
"socketID": socketID,
|
|
759
|
+
"rowID": rowID,
|
|
760
|
+
"columnID": columnID,
|
|
761
|
+
"processType": "EVAL",
|
|
762
|
+
"evalType": evalType,
|
|
763
|
+
"workspaceID": workspaceID,
|
|
764
|
+
"email": email,
|
|
765
|
+
"playgroundID": activePlayground,
|
|
735
766
|
"source": "SDK",
|
|
736
767
|
"processData": {
|
|
768
|
+
"analyticsName": evalName,
|
|
769
|
+
"definition": evalDefinition,
|
|
737
770
|
"executionDependency": {
|
|
738
|
-
"
|
|
739
|
-
"
|
|
740
|
-
"
|
|
741
|
-
"
|
|
742
|
-
"
|
|
743
|
-
"
|
|
744
|
-
"
|
|
771
|
+
"Query": "",
|
|
772
|
+
"Context": "",
|
|
773
|
+
"Output": output,
|
|
774
|
+
"Tools": tools,
|
|
775
|
+
"GroundTruth": groundTruth,
|
|
776
|
+
"MessageHistory": messageHistory,
|
|
777
|
+
"IntermediateSteps": intermediateSteps,
|
|
745
778
|
},
|
|
746
|
-
"
|
|
779
|
+
"categories":categories,
|
|
780
|
+
"evaluationStrictness": evaluationStrictness,
|
|
781
|
+
"grammarCheckOutput": grammarCheckOutput,
|
|
782
|
+
"insightLength": insightLength,
|
|
783
|
+
"numJudges": numJudges,
|
|
784
|
+
"penaltyBonusInstructions": penaltyBonusInstructions,
|
|
785
|
+
"probableEdgeCases": probableEdgeCases,
|
|
747
786
|
"model": model,
|
|
748
787
|
"provider": provider,
|
|
749
|
-
"analytics": evalName,
|
|
750
788
|
},
|
|
751
|
-
"workspaceID": workspaceID,
|
|
752
789
|
"type": "EVAL",
|
|
753
|
-
"evalType": evalType,
|
|
754
790
|
"kpi": evalName,
|
|
755
|
-
"
|
|
756
|
-
"rowID": rowID,
|
|
757
|
-
"playgroundID": activePlayground,
|
|
758
|
-
"processType": "EVAL",
|
|
759
|
-
"email": email,
|
|
791
|
+
"fieldMappig":fieldMapping,
|
|
760
792
|
}
|
|
761
793
|
|
|
762
794
|
query = ""
|
|
@@ -828,7 +860,6 @@ class LlumoClient:
|
|
|
828
860
|
# print("All received keys:", received_rowIDs)
|
|
829
861
|
# print("Missing keys:", len(missingRowIDs))
|
|
830
862
|
missingRowIDs=list(missingRowIDs)
|
|
831
|
-
|
|
832
863
|
if len(missingRowIDs) > 0:
|
|
833
864
|
dataFromDb=fetchData(workspaceID,activePlayground,missingRowIDs)
|
|
834
865
|
rawResults.extend(dataFromDb)
|
|
@@ -850,6 +881,30 @@ class LlumoClient:
|
|
|
850
881
|
dataframe.at[index, evalName] = value.get("value")
|
|
851
882
|
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
|
852
883
|
|
|
884
|
+
# Log the evaluation step
|
|
885
|
+
if session:
|
|
886
|
+
try:
|
|
887
|
+
start_time = time.time()
|
|
888
|
+
session.logEvalStep(
|
|
889
|
+
stepName=f"EVAL-{evalName}",
|
|
890
|
+
output=value.get("value"),
|
|
891
|
+
context=row.get("context", ""),
|
|
892
|
+
query=row.get("query", ""),
|
|
893
|
+
messageHistory=row.get("messageHistory", ""),
|
|
894
|
+
tools=row.get("tools", ""),
|
|
895
|
+
intermediateSteps=row.get("intermediateSteps", ""),
|
|
896
|
+
groundTruth=row.get("groundTruth", ""),
|
|
897
|
+
analyticsScore=value.get("analyticsScore", {}),
|
|
898
|
+
reasoning=value.get("reasoning", {}),
|
|
899
|
+
classification=value.get("classification", {}),
|
|
900
|
+
evalLabel=value.get("evalLabel", {}),
|
|
901
|
+
latencyMs=int((time.time() - start_time) * 1000),
|
|
902
|
+
status="SUCCESS",
|
|
903
|
+
message="",
|
|
904
|
+
)
|
|
905
|
+
except Exception as e:
|
|
906
|
+
print(f"Error logging eval step: {e}")
|
|
907
|
+
|
|
853
908
|
self.socket.disconnect()
|
|
854
909
|
|
|
855
910
|
|
|
@@ -868,7 +923,8 @@ class LlumoClient:
|
|
|
868
923
|
promptText=prompt_template,
|
|
869
924
|
definationMapping=self.definationMapping,
|
|
870
925
|
outputColName=outputColName,
|
|
871
|
-
activePlayground= activePlayground
|
|
926
|
+
activePlayground= activePlayground,
|
|
927
|
+
customAnalytics=customAnalytics
|
|
872
928
|
):
|
|
873
929
|
print(
|
|
874
930
|
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
@@ -1607,8 +1663,10 @@ class LlumoClient:
|
|
|
1607
1663
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
|
1608
1664
|
self.validateApiKey(evalName=evals[0])
|
|
1609
1665
|
if createExperiment:
|
|
1610
|
-
|
|
1611
|
-
|
|
1666
|
+
if playgroundID:
|
|
1667
|
+
activePlayground = playgroundID
|
|
1668
|
+
else:
|
|
1669
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
|
1612
1670
|
else:
|
|
1613
1671
|
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
|
1614
1672
|
"-", ""
|
llumo/exceptions.py
CHANGED
|
@@ -22,8 +22,8 @@ class LlumoAIError(Exception):
|
|
|
22
22
|
return LlumoAIError("The API response is not in valid JSON format")
|
|
23
23
|
|
|
24
24
|
@staticmethod
|
|
25
|
-
def UnexpectedError(detail="
|
|
26
|
-
return LlumoAIError(f"
|
|
25
|
+
def UnexpectedError(detail="Metric"):
|
|
26
|
+
return LlumoAIError(f"Can you please check if {detail} is written correctly. If you want to run {detail} please create a custom eval with same name of app.llumo.ai/evallm ")
|
|
27
27
|
|
|
28
28
|
@staticmethod
|
|
29
29
|
def EvalError(detail="Some error occured while processing"):
|
llumo/helpingFuntions.py
CHANGED
|
@@ -26,6 +26,7 @@ uploadRowList = (
|
|
|
26
26
|
)
|
|
27
27
|
createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
|
|
28
28
|
|
|
29
|
+
getCustomAnalyticsUrl="https://app.llumo.ai/api/workspace/get-all-analytics"
|
|
29
30
|
|
|
30
31
|
def getProcessID():
|
|
31
32
|
return f"{int(time.time() * 1000)}{uuid.uuid4()}"
|
|
@@ -217,7 +218,7 @@ def deleteColumnListInPlayground(workspaceID: str, playgroundID: str):
|
|
|
217
218
|
return None
|
|
218
219
|
|
|
219
220
|
def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColName=None,
|
|
220
|
-
outputColName= "output",dataStreamName=None,definationMapping=None,evalOutputMap = None):
|
|
221
|
+
outputColName= "output",dataStreamName=None,definationMapping=None,evalOutputMap = None,customAnalytics=[]):
|
|
221
222
|
if len(dataframe) > 100:
|
|
222
223
|
dataframe = dataframe.head(100)
|
|
223
224
|
print("⚠️ Dataframe truncated to 100 rows for upload.")
|
|
@@ -228,9 +229,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
|
228
229
|
"columnListToUpload": [],
|
|
229
230
|
}
|
|
230
231
|
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)','PII Check','Prompt Injection']
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
232
|
+
try:
|
|
233
|
+
allEvals.extend(list(customAnalytics.keys()))
|
|
234
|
+
except Exception as e:
|
|
235
|
+
pass
|
|
236
|
+
evalDependencies = checkDependency(_returnDepMapping=True,customevals=customAnalytics)
|
|
237
|
+
print(allEvals)
|
|
234
238
|
# Create a mapping of column names to unique column IDs
|
|
235
239
|
columnIDMapping = {}
|
|
236
240
|
|
|
@@ -296,7 +300,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
|
296
300
|
"order": indx}
|
|
297
301
|
|
|
298
302
|
|
|
299
|
-
elif any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText is not None:
|
|
303
|
+
elif any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText is not None :
|
|
300
304
|
if evalOutputMap != None:
|
|
301
305
|
outputColName = evalOutputMap[col]
|
|
302
306
|
else:
|
|
@@ -455,14 +459,14 @@ def uploadRowsInDBPlayground(payload):
|
|
|
455
459
|
return None
|
|
456
460
|
|
|
457
461
|
|
|
458
|
-
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
|
|
462
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None,customAnalytics=[]):
|
|
459
463
|
|
|
460
464
|
if activePlayground != None:
|
|
461
465
|
playgroundId=activePlayground
|
|
462
466
|
else:
|
|
463
467
|
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
|
464
468
|
payload1, payload2 = createColumn(
|
|
465
|
-
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
|
|
469
|
+
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap,customAnalytics=customAnalytics
|
|
466
470
|
)
|
|
467
471
|
|
|
468
472
|
# Debugging line to check the payload2 structure
|
|
@@ -515,7 +519,7 @@ def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: l
|
|
|
515
519
|
else:
|
|
516
520
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
|
517
521
|
return None
|
|
518
|
-
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
|
|
522
|
+
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False,customevals={}):
|
|
519
523
|
"""
|
|
520
524
|
Checks if all the required input columns for the selected evaluation metric are present.
|
|
521
525
|
|
|
@@ -527,6 +531,7 @@ def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_retu
|
|
|
527
531
|
- LlumoAIError.dependencyError: If any required column is missing.
|
|
528
532
|
"""
|
|
529
533
|
# Define required dependencies for each evaluation metric
|
|
534
|
+
|
|
530
535
|
metricDependencies = {
|
|
531
536
|
'Response Completeness': ['context', 'query', 'output'],
|
|
532
537
|
'Response Bias': ['output'],
|
|
@@ -553,6 +558,8 @@ def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_retu
|
|
|
553
558
|
'PII Check':["query","output"],
|
|
554
559
|
'Prompt Injection':["query"]
|
|
555
560
|
}
|
|
561
|
+
|
|
562
|
+
metricDependencies.update(customevals)
|
|
556
563
|
if _returnDepMapping == True:
|
|
557
564
|
return metricDependencies
|
|
558
565
|
|
|
@@ -652,6 +659,7 @@ def validateGoogleKey(api_key):
|
|
|
652
659
|
|
|
653
660
|
def groupLogsByClass(logs, max_logs=2):
|
|
654
661
|
# Initialize the final result structures (no defaultdict)
|
|
662
|
+
|
|
655
663
|
groupedLogs = {}
|
|
656
664
|
uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
|
|
657
665
|
|
|
@@ -685,3 +693,46 @@ def groupLogsByClass(logs, max_logs=2):
|
|
|
685
693
|
uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
|
|
686
694
|
|
|
687
695
|
return groupedLogs, uniqueEdgeCases
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def getCustomAnalytics(workspaceID):
|
|
699
|
+
try:
|
|
700
|
+
url = getCustomAnalyticsUrl
|
|
701
|
+
payload = {
|
|
702
|
+
"workspaceID": workspaceID
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
headers = {
|
|
706
|
+
"Content-Type": "application/json"
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
response = requests.post(url, json=payload, headers=headers)
|
|
710
|
+
data=response.json()
|
|
711
|
+
customData=data.get("data","").get("analyticsCustom","")
|
|
712
|
+
customMapping = {
|
|
713
|
+
"QUERY": "query",
|
|
714
|
+
"CONTEXT": "context",
|
|
715
|
+
"OUTPUT": "output",
|
|
716
|
+
"MESSAGEHISTORY": "messageHistory",
|
|
717
|
+
"TOOLS": "tools",
|
|
718
|
+
"INTERMEDIATESTEPS": "intermediateSteps",
|
|
719
|
+
"GROUNDTRUTH": "groundTruth",
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
metricDependencies = {}
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
for eval in customData:
|
|
727
|
+
evalName = eval.get("analyticsName")
|
|
728
|
+
evalDependencyRaw = list(eval.get("variableMappings").values())
|
|
729
|
+
|
|
730
|
+
# Replace each value using the custom mapping
|
|
731
|
+
evalDependency = [customMapping.get(val.upper(), val.lower()) for val in evalDependencyRaw]
|
|
732
|
+
|
|
733
|
+
# Build the dict
|
|
734
|
+
metricDependencies[evalName] = evalDependency
|
|
735
|
+
return metricDependencies
|
|
736
|
+
|
|
737
|
+
except Exception as e:
|
|
738
|
+
return {}
|
llumo/llumoLogger.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LLUMOLogger:
|
|
5
|
+
def __init__(self, apiKey: str, playground: str):
|
|
6
|
+
self.apiKey = apiKey
|
|
7
|
+
self.playground = playground
|
|
8
|
+
self.workspaceID = None
|
|
9
|
+
self.playgroundID = None
|
|
10
|
+
self.userEmailID = None
|
|
11
|
+
self._authenticate()
|
|
12
|
+
|
|
13
|
+
def _authenticate(self):
|
|
14
|
+
url = "https://app.llumo.ai/api/get-playground-name"
|
|
15
|
+
try:
|
|
16
|
+
response = requests.post(
|
|
17
|
+
url,
|
|
18
|
+
headers={
|
|
19
|
+
"Authorization": f"Bearer {self.apiKey}",
|
|
20
|
+
"Content-Type": "application/json",
|
|
21
|
+
},
|
|
22
|
+
json={"playgroundName": self.playground},
|
|
23
|
+
timeout=10,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
response.raise_for_status()
|
|
27
|
+
res_json = response.json()
|
|
28
|
+
|
|
29
|
+
# Navigate into the nested "data" structure
|
|
30
|
+
inner_data = res_json.get("data", {}).get("data", {})
|
|
31
|
+
|
|
32
|
+
self.workspaceID = inner_data.get("workspaceID")
|
|
33
|
+
self.playgroundID = inner_data.get("playgroundID")
|
|
34
|
+
self.userEmailID = inner_data.get("createdBy")
|
|
35
|
+
|
|
36
|
+
if not self.workspaceID or not self.playgroundID:
|
|
37
|
+
raise RuntimeError(
|
|
38
|
+
f"Invalid response: workspaceID or playgroundID missing. Full response: {res_json}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
except requests.exceptions.RequestException as req_err:
|
|
42
|
+
raise RuntimeError(
|
|
43
|
+
f"Network or HTTP error during authentication: {req_err}"
|
|
44
|
+
)
|
|
45
|
+
except ValueError as json_err:
|
|
46
|
+
raise RuntimeError(f"Invalid JSON in authentication response: {json_err}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
raise RuntimeError(f"Authentication failed: {e}")
|
|
49
|
+
|
|
50
|
+
def getWorkspaceID(self):
|
|
51
|
+
return self.workspaceID
|
|
52
|
+
|
|
53
|
+
def getUserEmailID(self):
|
|
54
|
+
return self.userEmailID
|
|
55
|
+
|
|
56
|
+
def getPlaygroundID(self):
|
|
57
|
+
return self.playgroundID
|