llumo 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/.env +6 -0
- llumo/__init__.py +7 -7
- llumo/client.py +561 -561
- llumo/exceptions.py +44 -44
- llumo/execution.py +38 -38
- llumo/functionCalling.py +190 -190
- llumo/helpingFuntions.py +50 -50
- llumo/models.py +42 -42
- llumo/sockets.py +148 -148
- {llumo-0.1.8.dist-info → llumo-0.1.9.dist-info}/METADATA +26 -26
- llumo-0.1.9.dist-info/RECORD +14 -0
- {llumo-0.1.8.dist-info → llumo-0.1.9.dist-info}/licenses/LICENSE +4 -4
- llumo-0.1.8.dist-info/RECORD +0 -13
- {llumo-0.1.8.dist-info → llumo-0.1.9.dist-info}/WHEEL +0 -0
- {llumo-0.1.8.dist-info → llumo-0.1.9.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -1,561 +1,561 @@
|
|
1
|
-
import requests
|
2
|
-
from .exceptions import LlumoAIError
|
3
|
-
import time
|
4
|
-
import re
|
5
|
-
import json
|
6
|
-
import uuid
|
7
|
-
import threading
|
8
|
-
from .helpingFuntions import *
|
9
|
-
from dotenv import load_dotenv
|
10
|
-
import os
|
11
|
-
import itertools
|
12
|
-
import pandas as pd
|
13
|
-
from typing import List, Dict
|
14
|
-
from .models import AVAILABLEMODELS,getProviderFromModel
|
15
|
-
from .execution import ModelExecutor
|
16
|
-
from .sockets import LlumoSocketClient
|
17
|
-
from .functionCalling import LlumoAgentExecutor
|
18
|
-
|
19
|
-
|
20
|
-
# 👇 NEW: Explicitly load .env from the package folder
|
21
|
-
envPath = os.path.join(os.path.dirname(__file__), '.env')
|
22
|
-
load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
|
23
|
-
|
24
|
-
postUrl = os.getenv("postUrl")
|
25
|
-
fetchUrl = os.getenv("fetchUrl")
|
26
|
-
validateUrl = os.getenv("validateUrl")
|
27
|
-
socketUrl = os.getenv("SOCKET_URL")
|
28
|
-
|
29
|
-
|
30
|
-
class LlumoClient:
|
31
|
-
|
32
|
-
def __init__(self, api_key):
|
33
|
-
self.apiKey = api_key
|
34
|
-
self.socket = LlumoSocketClient(socketUrl)
|
35
|
-
self.processMapping = {}
|
36
|
-
|
37
|
-
|
38
|
-
def validateApiKey(self, evalName = ""):
|
39
|
-
headers = {
|
40
|
-
"Authorization": f"Bearer {self.apiKey}",
|
41
|
-
"Content-Type": "application/json",
|
42
|
-
}
|
43
|
-
reqBody = {"analytics": [evalName]}
|
44
|
-
|
45
|
-
# print(f"Making API key validation request to: {validateUrl}")
|
46
|
-
# print(f"Request body: {reqBody}")
|
47
|
-
|
48
|
-
try:
|
49
|
-
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
50
|
-
# print(response.text)
|
51
|
-
# Print response info for debugging
|
52
|
-
# print(f"Response status code: {response.status_code}")
|
53
|
-
# print(f"Response headers: {response.headers}")
|
54
|
-
|
55
|
-
# Try to get at least some of the response content
|
56
|
-
try:
|
57
|
-
response_preview = response.text[:500] # First 500 chars
|
58
|
-
# print(f"Response preview: {response_preview}")
|
59
|
-
except Exception as e:
|
60
|
-
print(f"Could not get response preview: {e}")
|
61
|
-
|
62
|
-
except requests.exceptions.RequestException as e:
|
63
|
-
print(f"Request exception: {str(e)}")
|
64
|
-
raise LlumoAIError.RequestFailed(detail=str(e))
|
65
|
-
|
66
|
-
if response.status_code == 401:
|
67
|
-
raise LlumoAIError.InvalidApiKey()
|
68
|
-
|
69
|
-
# Handle other common status codes
|
70
|
-
if response.status_code == 404:
|
71
|
-
raise LlumoAIError.RequestFailed(
|
72
|
-
detail=f"Endpoint not found (404): {validateUrl}"
|
73
|
-
)
|
74
|
-
|
75
|
-
# if response.status_code >= 500:
|
76
|
-
# raise LlumoAIError.ServerError(
|
77
|
-
# detail=f"Server error ({response.status_code})"
|
78
|
-
# )
|
79
|
-
|
80
|
-
if response.status_code != 200:
|
81
|
-
raise LlumoAIError.RequestFailed(
|
82
|
-
detail=f"Unexpected status code: {response.status_code}"
|
83
|
-
)
|
84
|
-
|
85
|
-
# Try to parse JSON
|
86
|
-
try:
|
87
|
-
data = response.json()
|
88
|
-
except ValueError as e:
|
89
|
-
print(f"JSON parsing error: {str(e)}")
|
90
|
-
# print(f"Response content that could not be parsed: {response.text[:1000]}...")
|
91
|
-
raise LlumoAIError.InvalidJsonResponse()
|
92
|
-
|
93
|
-
if "data" not in data or not data["data"]:
|
94
|
-
# print(f"Invalid API response structure: {data}")
|
95
|
-
raise LlumoAIError.InvalidApiResponse()
|
96
|
-
|
97
|
-
try:
|
98
|
-
self.hitsAvailable = data["data"].get("remainingHits", 0)
|
99
|
-
self.workspaceID = data["data"].get("workspaceID")
|
100
|
-
self.evalDefinition = data["data"].get("analyticsMapping")
|
101
|
-
self.socketToken = data["data"].get("token")
|
102
|
-
|
103
|
-
# print(f"API key validation successful:")
|
104
|
-
# print(f"- Remaining hits: {self.hitsAvailable}")
|
105
|
-
# print(f"- Workspace ID: {self.workspaceID}")
|
106
|
-
# print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
|
107
|
-
|
108
|
-
except Exception as e:
|
109
|
-
# print(f"Error extracting data from response: {str(e)}")
|
110
|
-
raise LlumoAIError.UnexpectedError(detail=str(e))
|
111
|
-
|
112
|
-
def postBatch(self, batch, workspaceID):
|
113
|
-
payload = {
|
114
|
-
"batch": json.dumps(batch),
|
115
|
-
"runType": "EVAL",
|
116
|
-
"workspaceID": workspaceID,
|
117
|
-
}
|
118
|
-
headers = {
|
119
|
-
"Authorization": f"Bearer {self.socketToken}",
|
120
|
-
"Content-Type": "application/json",
|
121
|
-
}
|
122
|
-
try:
|
123
|
-
# print(postUrl)
|
124
|
-
response = requests.post(postUrl, json=payload, headers=headers)
|
125
|
-
# print(f"Post API Status Code: {response.status_code}")
|
126
|
-
# print(response.text)
|
127
|
-
|
128
|
-
except Exception as e:
|
129
|
-
print(f"Error in posting batch: {e}")
|
130
|
-
|
131
|
-
def AllProcessMapping(self):
|
132
|
-
for batch in self.allBatches:
|
133
|
-
for record in batch:
|
134
|
-
rowId = record['rowID']
|
135
|
-
colId = record['columnID']
|
136
|
-
pid = f'{rowId}-{colId}-{colId}'
|
137
|
-
self.processMapping[pid] = record
|
138
|
-
|
139
|
-
|
140
|
-
def finalResp(self,results):
|
141
|
-
seen = set()
|
142
|
-
uniqueResults = []
|
143
|
-
|
144
|
-
for item in results:
|
145
|
-
for rowID in item: # Each item has only one key
|
146
|
-
if rowID not in seen:
|
147
|
-
seen.add(rowID)
|
148
|
-
uniqueResults.append(item)
|
149
|
-
|
150
|
-
return uniqueResults
|
151
|
-
|
152
|
-
def evaluate(self, dataframe, eval ="Response Completeness", prompt_template="", outputColName="output"):
|
153
|
-
|
154
|
-
results = {}
|
155
|
-
try:
|
156
|
-
socketID = self.socket.connect(timeout=150)
|
157
|
-
|
158
|
-
# Ensure full connection before proceeding
|
159
|
-
max_wait_secs = 20
|
160
|
-
waited_secs = 0
|
161
|
-
while not self.socket._connection_established.is_set():
|
162
|
-
time.sleep(0.1)
|
163
|
-
waited_secs += 0.1
|
164
|
-
if waited_secs >= max_wait_secs:
|
165
|
-
raise RuntimeError("Timeout waiting for server 'connection-established' event.")
|
166
|
-
|
167
|
-
rowIdMapping = {}
|
168
|
-
|
169
|
-
|
170
|
-
print(f"\n======= Running evaluation for: {eval} =======")
|
171
|
-
|
172
|
-
try:
|
173
|
-
self.validateApiKey(evalName=eval)
|
174
|
-
except Exception as e:
|
175
|
-
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
176
|
-
pass
|
177
|
-
raise
|
178
|
-
|
179
|
-
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
180
|
-
raise LlumoAIError.InsufficientCredits()
|
181
|
-
|
182
|
-
evalDefinition = self.evalDefinition[eval]
|
183
|
-
model = "GPT_4"
|
184
|
-
provider = "OPENAI"
|
185
|
-
evalType = "LLM"
|
186
|
-
workspaceID = self.workspaceID
|
187
|
-
|
188
|
-
self.allBatches = []
|
189
|
-
currentBatch = []
|
190
|
-
|
191
|
-
for index, row in dataframe.iterrows():
|
192
|
-
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
193
|
-
groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
|
194
|
-
messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
|
195
|
-
promptTemplate = prompt_template
|
196
|
-
|
197
|
-
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
198
|
-
|
199
|
-
if not all([ky in dataframe.columns for ky in keys]):
|
200
|
-
raise LlumoAIError.InvalidPromptTemplate()
|
201
|
-
|
202
|
-
inputDict = {key: row[key] for key in keys if key in row}
|
203
|
-
output = row[outputColName] if outputColName in dataframe.columns else ""
|
204
|
-
|
205
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
206
|
-
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
207
|
-
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
208
|
-
|
209
|
-
rowIdMapping[rowID] = index
|
210
|
-
|
211
|
-
templateData = {
|
212
|
-
"processID": getProcessID(),
|
213
|
-
"socketID": socketID,
|
214
|
-
"source": "SDK",
|
215
|
-
"processData": {
|
216
|
-
"executionDependency": {
|
217
|
-
"query": "",
|
218
|
-
"context": "",
|
219
|
-
"output": output,
|
220
|
-
"tools": tools,
|
221
|
-
"groundTruth": groundTruth,
|
222
|
-
"messageHistory": messageHistory,
|
223
|
-
},
|
224
|
-
"definition": evalDefinition,
|
225
|
-
"model": model,
|
226
|
-
"provider": provider,
|
227
|
-
"analytics": eval,
|
228
|
-
},
|
229
|
-
"workspaceID": workspaceID,
|
230
|
-
"type": "EVAL",
|
231
|
-
"evalType": evalType,
|
232
|
-
"kpi": eval,
|
233
|
-
"columnID": columnID,
|
234
|
-
"rowID": rowID,
|
235
|
-
"playgroundID": activePlayground,
|
236
|
-
"processType": "EVAL",
|
237
|
-
}
|
238
|
-
|
239
|
-
query = ""
|
240
|
-
context = ""
|
241
|
-
for key, value in inputDict.items():
|
242
|
-
if isinstance(value, str):
|
243
|
-
length = len(value.split()) * 1.5
|
244
|
-
if length > 50:
|
245
|
-
context += f" {key}: {value}, "
|
246
|
-
else:
|
247
|
-
if promptTemplate:
|
248
|
-
tempObj = {key: value}
|
249
|
-
promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
|
250
|
-
else:
|
251
|
-
query += f" {key}: {value}, "
|
252
|
-
|
253
|
-
if not context.strip():
|
254
|
-
for key, value in inputDict.items():
|
255
|
-
context += f" {key}: {value}, "
|
256
|
-
|
257
|
-
templateData["processData"]["executionDependency"]["context"] = context.strip()
|
258
|
-
templateData["processData"]["executionDependency"]["query"] = query.strip()
|
259
|
-
|
260
|
-
if promptTemplate and not query.strip():
|
261
|
-
templateData["processData"]["executionDependency"]["query"] = promptTemplate
|
262
|
-
|
263
|
-
currentBatch.append(templateData)
|
264
|
-
|
265
|
-
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
266
|
-
self.allBatches.append(currentBatch)
|
267
|
-
currentBatch = []
|
268
|
-
|
269
|
-
totalItems = sum(len(batch) for batch in self.allBatches)
|
270
|
-
|
271
|
-
for cnt, batch in enumerate(self.allBatches):
|
272
|
-
try:
|
273
|
-
self.postBatch(batch=batch, workspaceID=workspaceID)
|
274
|
-
# print("Betch Posted with item len: ", len(batch))
|
275
|
-
except Exception as e:
|
276
|
-
continue
|
277
|
-
|
278
|
-
time.sleep(1)
|
279
|
-
|
280
|
-
timeout = max(50, min(600, totalItems * 10))
|
281
|
-
|
282
|
-
self.socket.listenForResults(
|
283
|
-
min_wait=40, max_wait=timeout, inactivity_timeout=150, expected_results=totalItems
|
284
|
-
)
|
285
|
-
|
286
|
-
eval_results = self.socket.getReceivedData()
|
287
|
-
results[eval] = self.finalResp(eval_results)
|
288
|
-
|
289
|
-
except Exception as e:
|
290
|
-
raise
|
291
|
-
finally:
|
292
|
-
try:
|
293
|
-
self.socket.disconnect()
|
294
|
-
except Exception as e:
|
295
|
-
pass
|
296
|
-
|
297
|
-
for evalName, records in results.items():
|
298
|
-
dataframe[evalName] = None
|
299
|
-
for item in records:
|
300
|
-
for compound_key, value in item.items():
|
301
|
-
rowID = compound_key.split('-')[0]
|
302
|
-
if rowID in rowIdMapping:
|
303
|
-
index = rowIdMapping[rowID]
|
304
|
-
dataframe.at[index, evalName] = value
|
305
|
-
else:
|
306
|
-
pass
|
307
|
-
# print(f"⚠️ Warning: Could not find rowID {rowID} in mapping")
|
308
|
-
|
309
|
-
return dataframe
|
310
|
-
|
311
|
-
def evaluateCompressor(self, dataframe, prompt_template):
|
312
|
-
results = []
|
313
|
-
|
314
|
-
try:
|
315
|
-
# Connect to socket first
|
316
|
-
# print("Connecting to socket server...")
|
317
|
-
socketID = self.socket.connect(timeout=150)
|
318
|
-
|
319
|
-
# Ensure full connection before proceeding
|
320
|
-
max_wait_secs = 20
|
321
|
-
waited_secs = 0
|
322
|
-
while not self.socket._connection_established.is_set():
|
323
|
-
time.sleep(0.1)
|
324
|
-
waited_secs += 0.1
|
325
|
-
if waited_secs >= max_wait_secs:
|
326
|
-
raise RuntimeError("Timeout waiting for server 'connection-established' event.")
|
327
|
-
|
328
|
-
# print(f"Connected with socket ID: {socketID}")
|
329
|
-
|
330
|
-
try:
|
331
|
-
# print(f"Validating API key...")
|
332
|
-
self.validateApiKey()
|
333
|
-
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
334
|
-
except Exception as e:
|
335
|
-
print(f"Error during API key validation: {str(e)}")
|
336
|
-
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
337
|
-
print(f"Status code: {e.response.status_code}")
|
338
|
-
print(f"Response content: {e.response.text[:500]}...")
|
339
|
-
raise
|
340
|
-
|
341
|
-
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
342
|
-
raise LlumoAIError.InsufficientCredits()
|
343
|
-
|
344
|
-
model = "GPT_4"
|
345
|
-
provider = "OPENAI"
|
346
|
-
evalType = "LLUMO"
|
347
|
-
workspaceID = self.workspaceID
|
348
|
-
|
349
|
-
# Prepare all batches before sending
|
350
|
-
# print("Preparing batches...")
|
351
|
-
self.allBatches = []
|
352
|
-
currentBatch = []
|
353
|
-
|
354
|
-
for index, row in dataframe.iterrows():
|
355
|
-
promptTemplate = prompt_template
|
356
|
-
|
357
|
-
# extracting the placeholders from the prompt template
|
358
|
-
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
359
|
-
inputDict = {key: row[key] for key in keys if key in row}
|
360
|
-
|
361
|
-
if not all([ky in dataframe.columns for ky in keys]):
|
362
|
-
raise LlumoAIError.InvalidPromptTemplate()
|
363
|
-
|
364
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
365
|
-
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
366
|
-
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
367
|
-
|
368
|
-
compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
369
|
-
compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
370
|
-
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
371
|
-
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
372
|
-
|
373
|
-
# Use the server-provided socket ID here
|
374
|
-
templateData = {
|
375
|
-
"processID": getProcessID(),
|
376
|
-
"socketID": socketID,
|
377
|
-
"source": "SDK",
|
378
|
-
"rowID": rowID,
|
379
|
-
"columnID": columnID,
|
380
|
-
"processType": "COST_SAVING",
|
381
|
-
"evalType": evalType,
|
382
|
-
"dependency": list(inputDict.keys()),
|
383
|
-
"costColumnMapping": {
|
384
|
-
"compressed_prompt": compressed_prompt_id,
|
385
|
-
"compressed_prompt_output": compressed_prompt_output_id,
|
386
|
-
"cost": cost_id,
|
387
|
-
"cost_saving": cost_saving_id
|
388
|
-
},
|
389
|
-
"processData": {
|
390
|
-
"rowData": {
|
391
|
-
"query": {"type": "VARIABLE", "value": ""},
|
392
|
-
"context": {"type": "VARIABLE", "value": ""},
|
393
|
-
},
|
394
|
-
"dependency": list(inputDict.keys()),
|
395
|
-
"dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
|
396
|
-
"provider": provider,
|
397
|
-
"model": model,
|
398
|
-
"promptText": promptTemplate,
|
399
|
-
"costColumnMapping": {
|
400
|
-
"compressed_prompt": compressed_prompt_id,
|
401
|
-
"compressed_prompt_output": compressed_prompt_output_id,
|
402
|
-
"cost": cost_id,
|
403
|
-
"cost_saving": cost_saving_id
|
404
|
-
}
|
405
|
-
},
|
406
|
-
"workspaceID": workspaceID,
|
407
|
-
"email": "",
|
408
|
-
"playgroundID": activePlayground
|
409
|
-
}
|
410
|
-
|
411
|
-
|
412
|
-
# Build query/context from input
|
413
|
-
query = ""
|
414
|
-
context = ""
|
415
|
-
|
416
|
-
for key, value in inputDict.items():
|
417
|
-
if isinstance(value, str):
|
418
|
-
length = len(value.split()) * 1.5
|
419
|
-
if length > 50:
|
420
|
-
context += f" {key}: {value}, "
|
421
|
-
else:
|
422
|
-
if promptTemplate:
|
423
|
-
populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
|
424
|
-
query += f"{populatedPrompt} "
|
425
|
-
else:
|
426
|
-
query += f" {key}: {value}, "
|
427
|
-
|
428
|
-
if not context.strip():
|
429
|
-
for key, value in inputDict.items():
|
430
|
-
context += f" {key}: {value}, "
|
431
|
-
|
432
|
-
templateData["processData"]["rowData"]["context"]["value"] = context.strip()
|
433
|
-
templateData["processData"]["rowData"]["query"]["value"] = query.strip()
|
434
|
-
|
435
|
-
if promptTemplate and not query.strip():
|
436
|
-
templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
|
437
|
-
|
438
|
-
# print(templateData)
|
439
|
-
currentBatch.append(templateData)
|
440
|
-
|
441
|
-
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
442
|
-
self.allBatches.append(currentBatch)
|
443
|
-
currentBatch = []
|
444
|
-
|
445
|
-
# Post all batches
|
446
|
-
total_items = sum(len(batch) for batch in self.allBatches)
|
447
|
-
# print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
|
448
|
-
|
449
|
-
for cnt, batch in enumerate(self.allBatches):
|
450
|
-
# print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
|
451
|
-
try:
|
452
|
-
self.postBatch(batch=batch, workspaceID=workspaceID)
|
453
|
-
# print(f"Batch {cnt + 1} posted successfully")
|
454
|
-
except Exception as e:
|
455
|
-
print(f"Error posting batch {cnt + 1}: {str(e)}")
|
456
|
-
continue
|
457
|
-
|
458
|
-
# Small delay between batches to prevent overwhelming the server
|
459
|
-
time.sleep(1)
|
460
|
-
|
461
|
-
# updating the dict for row column mapping
|
462
|
-
self.AllProcessMapping()
|
463
|
-
# Calculate a reasonable timeout based on the data size
|
464
|
-
timeout = max(60, min(600, total_items * 10))
|
465
|
-
# print(f"All batches posted. Waiting up to {timeout} seconds for results...")
|
466
|
-
|
467
|
-
# Listen for results
|
468
|
-
self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30,expected_results=None)
|
469
|
-
|
470
|
-
# Get results for this evaluation
|
471
|
-
eval_results = self.socket.getReceivedData()
|
472
|
-
# print(f"Received {len(eval_results)} results for evaluation '{eval}'")
|
473
|
-
|
474
|
-
# Add these results to our overall results
|
475
|
-
results = self.finalResp(eval_results)
|
476
|
-
print(f"======= Completed evaluation: {eval} =======\n")
|
477
|
-
|
478
|
-
# print("All evaluations completed successfully")
|
479
|
-
|
480
|
-
except Exception as e:
|
481
|
-
print(f"Error during evaluation: {e}")
|
482
|
-
raise
|
483
|
-
finally:
|
484
|
-
# Always disconnect the socket when done
|
485
|
-
try:
|
486
|
-
self.socket.disconnect()
|
487
|
-
# print("Socket disconnected")
|
488
|
-
except Exception as e:
|
489
|
-
print(f"Error disconnecting socket: {e}")
|
490
|
-
|
491
|
-
compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
|
492
|
-
dataframe["compressed_prompt"] = compressed_prompt
|
493
|
-
dataframe["compressed_prompt_output"] = compressed_prompt_output
|
494
|
-
dataframe["cost"] = cost
|
495
|
-
dataframe["cost_saving"] = cost_saving
|
496
|
-
return dataframe
|
497
|
-
|
498
|
-
|
499
|
-
def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, eval = ["Response Correctness"],toEvaluate:bool =False ) -> pd.DataFrame:
|
500
|
-
executor = ModelExecutor(apiKey)
|
501
|
-
|
502
|
-
keys = list(dataset.keys())
|
503
|
-
value_combinations = list(itertools.product(*dataset.values()))
|
504
|
-
combinations = [dict(zip(keys, values)) for values in value_combinations]
|
505
|
-
|
506
|
-
results = []
|
507
|
-
|
508
|
-
# Iterate through combinations
|
509
|
-
for combo in combinations:
|
510
|
-
for template in templates:
|
511
|
-
prompt = template
|
512
|
-
for k, v in combo.items():
|
513
|
-
prompt = prompt.replace(f"{{{{{k}}}}}", v)
|
514
|
-
# Add a row for each model
|
515
|
-
for model in model_aliases:
|
516
|
-
row = {
|
517
|
-
"template": template,
|
518
|
-
"prompt": prompt,
|
519
|
-
**combo,
|
520
|
-
"model": model.value
|
521
|
-
}
|
522
|
-
|
523
|
-
|
524
|
-
try:
|
525
|
-
provider = getProviderFromModel(model)
|
526
|
-
response = executor.execute(provider, model.value, prompt, apiKey)
|
527
|
-
row["output"] = response
|
528
|
-
except Exception as e:
|
529
|
-
row["output"] = f"Error: {str(e)}"
|
530
|
-
|
531
|
-
results.append(row)
|
532
|
-
df=pd.DataFrame(results)
|
533
|
-
if toEvaluate:
|
534
|
-
|
535
|
-
res = self.evaluate(df,eval =eval ,prompt_template=str(templates[0]))
|
536
|
-
return res
|
537
|
-
|
538
|
-
return df
|
539
|
-
|
540
|
-
def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
|
541
|
-
prompt_template="Give answer for the given query: {{query}}"):
|
542
|
-
if model.lower() not in ["openai", "google"]:
|
543
|
-
raise ValueError("Model must be 'openai' or 'google'")
|
544
|
-
|
545
|
-
# Run unified agent execution
|
546
|
-
toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
|
547
|
-
evals = ["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"]
|
548
|
-
|
549
|
-
for eval in evals:
|
550
|
-
# Perform evaluation
|
551
|
-
toolResponseDf = self.evaluate(
|
552
|
-
toolResponseDf,
|
553
|
-
eval = eval,
|
554
|
-
prompt_template=prompt_template
|
555
|
-
)
|
556
|
-
return toolResponseDf
|
557
|
-
|
558
|
-
|
559
|
-
class SafeDict(dict):
|
560
|
-
def __missing__(self, key):
|
561
|
-
return ""
|
1
|
+
import requests
|
2
|
+
from .exceptions import LlumoAIError
|
3
|
+
import time
|
4
|
+
import re
|
5
|
+
import json
|
6
|
+
import uuid
|
7
|
+
import threading
|
8
|
+
from .helpingFuntions import *
|
9
|
+
from dotenv import load_dotenv
|
10
|
+
import os
|
11
|
+
import itertools
|
12
|
+
import pandas as pd
|
13
|
+
from typing import List, Dict
|
14
|
+
from .models import AVAILABLEMODELS,getProviderFromModel
|
15
|
+
from .execution import ModelExecutor
|
16
|
+
from .sockets import LlumoSocketClient
|
17
|
+
from .functionCalling import LlumoAgentExecutor
|
18
|
+
|
19
|
+
|
20
|
+
# 👇 NEW: Explicitly load .env from the package folder
|
21
|
+
envPath = os.path.join(os.path.dirname(__file__), '.env')
|
22
|
+
load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
|
23
|
+
|
24
|
+
postUrl = os.getenv("postUrl")
|
25
|
+
fetchUrl = os.getenv("fetchUrl")
|
26
|
+
validateUrl = os.getenv("validateUrl")
|
27
|
+
socketUrl = os.getenv("SOCKET_URL")
|
28
|
+
|
29
|
+
|
30
|
+
class LlumoClient:
|
31
|
+
|
32
|
+
def __init__(self, api_key):
|
33
|
+
self.apiKey = api_key
|
34
|
+
self.socket = LlumoSocketClient(socketUrl)
|
35
|
+
self.processMapping = {}
|
36
|
+
|
37
|
+
|
38
|
+
def validateApiKey(self, evalName = ""):
|
39
|
+
headers = {
|
40
|
+
"Authorization": f"Bearer {self.apiKey}",
|
41
|
+
"Content-Type": "application/json",
|
42
|
+
}
|
43
|
+
reqBody = {"analytics": [evalName]}
|
44
|
+
|
45
|
+
# print(f"Making API key validation request to: {validateUrl}")
|
46
|
+
# print(f"Request body: {reqBody}")
|
47
|
+
|
48
|
+
try:
|
49
|
+
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
50
|
+
# print(response.text)
|
51
|
+
# Print response info for debugging
|
52
|
+
# print(f"Response status code: {response.status_code}")
|
53
|
+
# print(f"Response headers: {response.headers}")
|
54
|
+
|
55
|
+
# Try to get at least some of the response content
|
56
|
+
try:
|
57
|
+
response_preview = response.text[:500] # First 500 chars
|
58
|
+
# print(f"Response preview: {response_preview}")
|
59
|
+
except Exception as e:
|
60
|
+
print(f"Could not get response preview: {e}")
|
61
|
+
|
62
|
+
except requests.exceptions.RequestException as e:
|
63
|
+
print(f"Request exception: {str(e)}")
|
64
|
+
raise LlumoAIError.RequestFailed(detail=str(e))
|
65
|
+
|
66
|
+
if response.status_code == 401:
|
67
|
+
raise LlumoAIError.InvalidApiKey()
|
68
|
+
|
69
|
+
# Handle other common status codes
|
70
|
+
if response.status_code == 404:
|
71
|
+
raise LlumoAIError.RequestFailed(
|
72
|
+
detail=f"Endpoint not found (404): {validateUrl}"
|
73
|
+
)
|
74
|
+
|
75
|
+
# if response.status_code >= 500:
|
76
|
+
# raise LlumoAIError.ServerError(
|
77
|
+
# detail=f"Server error ({response.status_code})"
|
78
|
+
# )
|
79
|
+
|
80
|
+
if response.status_code != 200:
|
81
|
+
raise LlumoAIError.RequestFailed(
|
82
|
+
detail=f"Unexpected status code: {response.status_code}"
|
83
|
+
)
|
84
|
+
|
85
|
+
# Try to parse JSON
|
86
|
+
try:
|
87
|
+
data = response.json()
|
88
|
+
except ValueError as e:
|
89
|
+
print(f"JSON parsing error: {str(e)}")
|
90
|
+
# print(f"Response content that could not be parsed: {response.text[:1000]}...")
|
91
|
+
raise LlumoAIError.InvalidJsonResponse()
|
92
|
+
|
93
|
+
if "data" not in data or not data["data"]:
|
94
|
+
# print(f"Invalid API response structure: {data}")
|
95
|
+
raise LlumoAIError.InvalidApiResponse()
|
96
|
+
|
97
|
+
try:
|
98
|
+
self.hitsAvailable = data["data"].get("remainingHits", 0)
|
99
|
+
self.workspaceID = data["data"].get("workspaceID")
|
100
|
+
self.evalDefinition = data["data"].get("analyticsMapping")
|
101
|
+
self.socketToken = data["data"].get("token")
|
102
|
+
|
103
|
+
# print(f"API key validation successful:")
|
104
|
+
# print(f"- Remaining hits: {self.hitsAvailable}")
|
105
|
+
# print(f"- Workspace ID: {self.workspaceID}")
|
106
|
+
# print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
# print(f"Error extracting data from response: {str(e)}")
|
110
|
+
raise LlumoAIError.UnexpectedError(detail=str(e))
|
111
|
+
|
112
|
+
def postBatch(self, batch, workspaceID):
|
113
|
+
payload = {
|
114
|
+
"batch": json.dumps(batch),
|
115
|
+
"runType": "EVAL",
|
116
|
+
"workspaceID": workspaceID,
|
117
|
+
}
|
118
|
+
headers = {
|
119
|
+
"Authorization": f"Bearer {self.socketToken}",
|
120
|
+
"Content-Type": "application/json",
|
121
|
+
}
|
122
|
+
try:
|
123
|
+
# print(postUrl)
|
124
|
+
response = requests.post(postUrl, json=payload, headers=headers)
|
125
|
+
# print(f"Post API Status Code: {response.status_code}")
|
126
|
+
# print(response.text)
|
127
|
+
|
128
|
+
except Exception as e:
|
129
|
+
print(f"Error in posting batch: {e}")
|
130
|
+
|
131
|
+
def AllProcessMapping(self):
|
132
|
+
for batch in self.allBatches:
|
133
|
+
for record in batch:
|
134
|
+
rowId = record['rowID']
|
135
|
+
colId = record['columnID']
|
136
|
+
pid = f'{rowId}-{colId}-{colId}'
|
137
|
+
self.processMapping[pid] = record
|
138
|
+
|
139
|
+
|
140
|
+
def finalResp(self,results):
|
141
|
+
seen = set()
|
142
|
+
uniqueResults = []
|
143
|
+
|
144
|
+
for item in results:
|
145
|
+
for rowID in item: # Each item has only one key
|
146
|
+
if rowID not in seen:
|
147
|
+
seen.add(rowID)
|
148
|
+
uniqueResults.append(item)
|
149
|
+
|
150
|
+
return uniqueResults
|
151
|
+
|
152
|
+
def evaluate(self, dataframe, eval ="Response Completeness", prompt_template="", outputColName="output"):
|
153
|
+
|
154
|
+
results = {}
|
155
|
+
try:
|
156
|
+
socketID = self.socket.connect(timeout=150)
|
157
|
+
|
158
|
+
# Ensure full connection before proceeding
|
159
|
+
max_wait_secs = 20
|
160
|
+
waited_secs = 0
|
161
|
+
while not self.socket._connection_established.is_set():
|
162
|
+
time.sleep(0.1)
|
163
|
+
waited_secs += 0.1
|
164
|
+
if waited_secs >= max_wait_secs:
|
165
|
+
raise RuntimeError("Timeout waiting for server 'connection-established' event.")
|
166
|
+
|
167
|
+
rowIdMapping = {}
|
168
|
+
|
169
|
+
|
170
|
+
print(f"\n======= Running evaluation for: {eval} =======")
|
171
|
+
|
172
|
+
try:
|
173
|
+
self.validateApiKey(evalName=eval)
|
174
|
+
except Exception as e:
|
175
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
176
|
+
pass
|
177
|
+
raise
|
178
|
+
|
179
|
+
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
180
|
+
raise LlumoAIError.InsufficientCredits()
|
181
|
+
|
182
|
+
evalDefinition = self.evalDefinition[eval]
|
183
|
+
model = "GPT_4"
|
184
|
+
provider = "OPENAI"
|
185
|
+
evalType = "LLM"
|
186
|
+
workspaceID = self.workspaceID
|
187
|
+
|
188
|
+
self.allBatches = []
|
189
|
+
currentBatch = []
|
190
|
+
|
191
|
+
for index, row in dataframe.iterrows():
|
192
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
193
|
+
groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
|
194
|
+
messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
|
195
|
+
promptTemplate = prompt_template
|
196
|
+
|
197
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
198
|
+
|
199
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
200
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
201
|
+
|
202
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
203
|
+
output = row[outputColName] if outputColName in dataframe.columns else ""
|
204
|
+
|
205
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
206
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
207
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
208
|
+
|
209
|
+
rowIdMapping[rowID] = index
|
210
|
+
|
211
|
+
templateData = {
|
212
|
+
"processID": getProcessID(),
|
213
|
+
"socketID": socketID,
|
214
|
+
"source": "SDK",
|
215
|
+
"processData": {
|
216
|
+
"executionDependency": {
|
217
|
+
"query": "",
|
218
|
+
"context": "",
|
219
|
+
"output": output,
|
220
|
+
"tools": tools,
|
221
|
+
"groundTruth": groundTruth,
|
222
|
+
"messageHistory": messageHistory,
|
223
|
+
},
|
224
|
+
"definition": evalDefinition,
|
225
|
+
"model": model,
|
226
|
+
"provider": provider,
|
227
|
+
"analytics": eval,
|
228
|
+
},
|
229
|
+
"workspaceID": workspaceID,
|
230
|
+
"type": "EVAL",
|
231
|
+
"evalType": evalType,
|
232
|
+
"kpi": eval,
|
233
|
+
"columnID": columnID,
|
234
|
+
"rowID": rowID,
|
235
|
+
"playgroundID": activePlayground,
|
236
|
+
"processType": "EVAL",
|
237
|
+
}
|
238
|
+
|
239
|
+
query = ""
|
240
|
+
context = ""
|
241
|
+
for key, value in inputDict.items():
|
242
|
+
if isinstance(value, str):
|
243
|
+
length = len(value.split()) * 1.5
|
244
|
+
if length > 50:
|
245
|
+
context += f" {key}: {value}, "
|
246
|
+
else:
|
247
|
+
if promptTemplate:
|
248
|
+
tempObj = {key: value}
|
249
|
+
promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
|
250
|
+
else:
|
251
|
+
query += f" {key}: {value}, "
|
252
|
+
|
253
|
+
if not context.strip():
|
254
|
+
for key, value in inputDict.items():
|
255
|
+
context += f" {key}: {value}, "
|
256
|
+
|
257
|
+
templateData["processData"]["executionDependency"]["context"] = context.strip()
|
258
|
+
templateData["processData"]["executionDependency"]["query"] = query.strip()
|
259
|
+
|
260
|
+
if promptTemplate and not query.strip():
|
261
|
+
templateData["processData"]["executionDependency"]["query"] = promptTemplate
|
262
|
+
|
263
|
+
currentBatch.append(templateData)
|
264
|
+
|
265
|
+
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
266
|
+
self.allBatches.append(currentBatch)
|
267
|
+
currentBatch = []
|
268
|
+
|
269
|
+
totalItems = sum(len(batch) for batch in self.allBatches)
|
270
|
+
|
271
|
+
for cnt, batch in enumerate(self.allBatches):
|
272
|
+
try:
|
273
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
274
|
+
# print("Betch Posted with item len: ", len(batch))
|
275
|
+
except Exception as e:
|
276
|
+
continue
|
277
|
+
|
278
|
+
time.sleep(1)
|
279
|
+
|
280
|
+
timeout = max(50, min(600, totalItems * 10))
|
281
|
+
|
282
|
+
self.socket.listenForResults(
|
283
|
+
min_wait=40, max_wait=timeout, inactivity_timeout=150, expected_results=totalItems
|
284
|
+
)
|
285
|
+
|
286
|
+
eval_results = self.socket.getReceivedData()
|
287
|
+
results[eval] = self.finalResp(eval_results)
|
288
|
+
|
289
|
+
except Exception as e:
|
290
|
+
raise
|
291
|
+
finally:
|
292
|
+
try:
|
293
|
+
self.socket.disconnect()
|
294
|
+
except Exception as e:
|
295
|
+
pass
|
296
|
+
|
297
|
+
for evalName, records in results.items():
|
298
|
+
dataframe[evalName] = None
|
299
|
+
for item in records:
|
300
|
+
for compound_key, value in item.items():
|
301
|
+
rowID = compound_key.split('-')[0]
|
302
|
+
if rowID in rowIdMapping:
|
303
|
+
index = rowIdMapping[rowID]
|
304
|
+
dataframe.at[index, evalName] = value
|
305
|
+
else:
|
306
|
+
pass
|
307
|
+
# print(f"⚠️ Warning: Could not find rowID {rowID} in mapping")
|
308
|
+
|
309
|
+
return dataframe
|
310
|
+
|
311
|
+
def evaluateCompressor(self, dataframe, prompt_template):
|
312
|
+
results = []
|
313
|
+
|
314
|
+
try:
|
315
|
+
# Connect to socket first
|
316
|
+
# print("Connecting to socket server...")
|
317
|
+
socketID = self.socket.connect(timeout=150)
|
318
|
+
|
319
|
+
# Ensure full connection before proceeding
|
320
|
+
max_wait_secs = 20
|
321
|
+
waited_secs = 0
|
322
|
+
while not self.socket._connection_established.is_set():
|
323
|
+
time.sleep(0.1)
|
324
|
+
waited_secs += 0.1
|
325
|
+
if waited_secs >= max_wait_secs:
|
326
|
+
raise RuntimeError("Timeout waiting for server 'connection-established' event.")
|
327
|
+
|
328
|
+
# print(f"Connected with socket ID: {socketID}")
|
329
|
+
|
330
|
+
try:
|
331
|
+
# print(f"Validating API key...")
|
332
|
+
self.validateApiKey()
|
333
|
+
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
334
|
+
except Exception as e:
|
335
|
+
print(f"Error during API key validation: {str(e)}")
|
336
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
337
|
+
print(f"Status code: {e.response.status_code}")
|
338
|
+
print(f"Response content: {e.response.text[:500]}...")
|
339
|
+
raise
|
340
|
+
|
341
|
+
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
342
|
+
raise LlumoAIError.InsufficientCredits()
|
343
|
+
|
344
|
+
model = "GPT_4"
|
345
|
+
provider = "OPENAI"
|
346
|
+
evalType = "LLUMO"
|
347
|
+
workspaceID = self.workspaceID
|
348
|
+
|
349
|
+
# Prepare all batches before sending
|
350
|
+
# print("Preparing batches...")
|
351
|
+
self.allBatches = []
|
352
|
+
currentBatch = []
|
353
|
+
|
354
|
+
for index, row in dataframe.iterrows():
|
355
|
+
promptTemplate = prompt_template
|
356
|
+
|
357
|
+
# extracting the placeholders from the prompt template
|
358
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
359
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
360
|
+
|
361
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
362
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
363
|
+
|
364
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
365
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
366
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
367
|
+
|
368
|
+
compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
369
|
+
compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
370
|
+
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
371
|
+
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
372
|
+
|
373
|
+
# Use the server-provided socket ID here
|
374
|
+
templateData = {
|
375
|
+
"processID": getProcessID(),
|
376
|
+
"socketID": socketID,
|
377
|
+
"source": "SDK",
|
378
|
+
"rowID": rowID,
|
379
|
+
"columnID": columnID,
|
380
|
+
"processType": "COST_SAVING",
|
381
|
+
"evalType": evalType,
|
382
|
+
"dependency": list(inputDict.keys()),
|
383
|
+
"costColumnMapping": {
|
384
|
+
"compressed_prompt": compressed_prompt_id,
|
385
|
+
"compressed_prompt_output": compressed_prompt_output_id,
|
386
|
+
"cost": cost_id,
|
387
|
+
"cost_saving": cost_saving_id
|
388
|
+
},
|
389
|
+
"processData": {
|
390
|
+
"rowData": {
|
391
|
+
"query": {"type": "VARIABLE", "value": ""},
|
392
|
+
"context": {"type": "VARIABLE", "value": ""},
|
393
|
+
},
|
394
|
+
"dependency": list(inputDict.keys()),
|
395
|
+
"dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
|
396
|
+
"provider": provider,
|
397
|
+
"model": model,
|
398
|
+
"promptText": promptTemplate,
|
399
|
+
"costColumnMapping": {
|
400
|
+
"compressed_prompt": compressed_prompt_id,
|
401
|
+
"compressed_prompt_output": compressed_prompt_output_id,
|
402
|
+
"cost": cost_id,
|
403
|
+
"cost_saving": cost_saving_id
|
404
|
+
}
|
405
|
+
},
|
406
|
+
"workspaceID": workspaceID,
|
407
|
+
"email": "",
|
408
|
+
"playgroundID": activePlayground
|
409
|
+
}
|
410
|
+
|
411
|
+
|
412
|
+
# Build query/context from input
|
413
|
+
query = ""
|
414
|
+
context = ""
|
415
|
+
|
416
|
+
for key, value in inputDict.items():
|
417
|
+
if isinstance(value, str):
|
418
|
+
length = len(value.split()) * 1.5
|
419
|
+
if length > 50:
|
420
|
+
context += f" {key}: {value}, "
|
421
|
+
else:
|
422
|
+
if promptTemplate:
|
423
|
+
populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
|
424
|
+
query += f"{populatedPrompt} "
|
425
|
+
else:
|
426
|
+
query += f" {key}: {value}, "
|
427
|
+
|
428
|
+
if not context.strip():
|
429
|
+
for key, value in inputDict.items():
|
430
|
+
context += f" {key}: {value}, "
|
431
|
+
|
432
|
+
templateData["processData"]["rowData"]["context"]["value"] = context.strip()
|
433
|
+
templateData["processData"]["rowData"]["query"]["value"] = query.strip()
|
434
|
+
|
435
|
+
if promptTemplate and not query.strip():
|
436
|
+
templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
|
437
|
+
|
438
|
+
# print(templateData)
|
439
|
+
currentBatch.append(templateData)
|
440
|
+
|
441
|
+
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
442
|
+
self.allBatches.append(currentBatch)
|
443
|
+
currentBatch = []
|
444
|
+
|
445
|
+
# Post all batches
|
446
|
+
total_items = sum(len(batch) for batch in self.allBatches)
|
447
|
+
# print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
|
448
|
+
|
449
|
+
for cnt, batch in enumerate(self.allBatches):
|
450
|
+
# print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
|
451
|
+
try:
|
452
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
453
|
+
# print(f"Batch {cnt + 1} posted successfully")
|
454
|
+
except Exception as e:
|
455
|
+
print(f"Error posting batch {cnt + 1}: {str(e)}")
|
456
|
+
continue
|
457
|
+
|
458
|
+
# Small delay between batches to prevent overwhelming the server
|
459
|
+
time.sleep(1)
|
460
|
+
|
461
|
+
# updating the dict for row column mapping
|
462
|
+
self.AllProcessMapping()
|
463
|
+
# Calculate a reasonable timeout based on the data size
|
464
|
+
timeout = max(60, min(600, total_items * 10))
|
465
|
+
# print(f"All batches posted. Waiting up to {timeout} seconds for results...")
|
466
|
+
|
467
|
+
# Listen for results
|
468
|
+
self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30,expected_results=None)
|
469
|
+
|
470
|
+
# Get results for this evaluation
|
471
|
+
eval_results = self.socket.getReceivedData()
|
472
|
+
# print(f"Received {len(eval_results)} results for evaluation '{eval}'")
|
473
|
+
|
474
|
+
# Add these results to our overall results
|
475
|
+
results = self.finalResp(eval_results)
|
476
|
+
print(f"======= Completed evaluation: {eval} =======\n")
|
477
|
+
|
478
|
+
# print("All evaluations completed successfully")
|
479
|
+
|
480
|
+
except Exception as e:
|
481
|
+
print(f"Error during evaluation: {e}")
|
482
|
+
raise
|
483
|
+
finally:
|
484
|
+
# Always disconnect the socket when done
|
485
|
+
try:
|
486
|
+
self.socket.disconnect()
|
487
|
+
# print("Socket disconnected")
|
488
|
+
except Exception as e:
|
489
|
+
print(f"Error disconnecting socket: {e}")
|
490
|
+
|
491
|
+
compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
|
492
|
+
dataframe["compressed_prompt"] = compressed_prompt
|
493
|
+
dataframe["compressed_prompt_output"] = compressed_prompt_output
|
494
|
+
dataframe["cost"] = cost
|
495
|
+
dataframe["cost_saving"] = cost_saving
|
496
|
+
return dataframe
|
497
|
+
|
498
|
+
|
499
|
+
def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, eval = ["Response Correctness"],toEvaluate:bool =False ) -> pd.DataFrame:
|
500
|
+
executor = ModelExecutor(apiKey)
|
501
|
+
|
502
|
+
keys = list(dataset.keys())
|
503
|
+
value_combinations = list(itertools.product(*dataset.values()))
|
504
|
+
combinations = [dict(zip(keys, values)) for values in value_combinations]
|
505
|
+
|
506
|
+
results = []
|
507
|
+
|
508
|
+
# Iterate through combinations
|
509
|
+
for combo in combinations:
|
510
|
+
for template in templates:
|
511
|
+
prompt = template
|
512
|
+
for k, v in combo.items():
|
513
|
+
prompt = prompt.replace(f"{{{{{k}}}}}", v)
|
514
|
+
# Add a row for each model
|
515
|
+
for model in model_aliases:
|
516
|
+
row = {
|
517
|
+
"template": template,
|
518
|
+
"prompt": prompt,
|
519
|
+
**combo,
|
520
|
+
"model": model.value
|
521
|
+
}
|
522
|
+
|
523
|
+
|
524
|
+
try:
|
525
|
+
provider = getProviderFromModel(model)
|
526
|
+
response = executor.execute(provider, model.value, prompt, apiKey)
|
527
|
+
row["output"] = response
|
528
|
+
except Exception as e:
|
529
|
+
row["output"] = f"Error: {str(e)}"
|
530
|
+
|
531
|
+
results.append(row)
|
532
|
+
df=pd.DataFrame(results)
|
533
|
+
if toEvaluate:
|
534
|
+
|
535
|
+
res = self.evaluate(df,eval =eval ,prompt_template=str(templates[0]))
|
536
|
+
return res
|
537
|
+
|
538
|
+
return df
|
539
|
+
|
540
|
+
def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
|
541
|
+
prompt_template="Give answer for the given query: {{query}}"):
|
542
|
+
if model.lower() not in ["openai", "google"]:
|
543
|
+
raise ValueError("Model must be 'openai' or 'google'")
|
544
|
+
|
545
|
+
# Run unified agent execution
|
546
|
+
toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
|
547
|
+
evals = ["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"]
|
548
|
+
|
549
|
+
for eval in evals:
|
550
|
+
# Perform evaluation
|
551
|
+
toolResponseDf = self.evaluate(
|
552
|
+
toolResponseDf,
|
553
|
+
eval = eval,
|
554
|
+
prompt_template=prompt_template
|
555
|
+
)
|
556
|
+
return toolResponseDf
|
557
|
+
|
558
|
+
|
559
|
+
class SafeDict(dict):
|
560
|
+
def __missing__(self, key):
|
561
|
+
return ""
|