llumo 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llumo/client.py CHANGED
@@ -1,561 +1,561 @@
1
- import requests
2
- from .exceptions import LlumoAIError
3
- import time
4
- import re
5
- import json
6
- import uuid
7
- import threading
8
- from .helpingFuntions import *
9
- from dotenv import load_dotenv
10
- import os
11
- import itertools
12
- import pandas as pd
13
- from typing import List, Dict
14
- from .models import AVAILABLEMODELS,getProviderFromModel
15
- from .execution import ModelExecutor
16
- from .sockets import LlumoSocketClient
17
- from .functionCalling import LlumoAgentExecutor
18
-
19
-
20
- # 👇 NEW: Explicitly load .env from the package folder
21
- envPath = os.path.join(os.path.dirname(__file__), '.env')
22
- load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
23
-
24
- postUrl = os.getenv("postUrl")
25
- fetchUrl = os.getenv("fetchUrl")
26
- validateUrl = os.getenv("validateUrl")
27
- socketUrl = os.getenv("SOCKET_URL")
28
-
29
-
30
- class LlumoClient:
31
-
32
- def __init__(self, api_key):
33
- self.apiKey = api_key
34
- self.socket = LlumoSocketClient(socketUrl)
35
- self.processMapping = {}
36
-
37
-
38
- def validateApiKey(self, evalName = ""):
39
- headers = {
40
- "Authorization": f"Bearer {self.apiKey}",
41
- "Content-Type": "application/json",
42
- }
43
- reqBody = {"analytics": [evalName]}
44
-
45
- # print(f"Making API key validation request to: {validateUrl}")
46
- # print(f"Request body: {reqBody}")
47
-
48
- try:
49
- response = requests.post(url=validateUrl, json=reqBody, headers=headers)
50
- # print(response.text)
51
- # Print response info for debugging
52
- # print(f"Response status code: {response.status_code}")
53
- # print(f"Response headers: {response.headers}")
54
-
55
- # Try to get at least some of the response content
56
- try:
57
- response_preview = response.text[:500] # First 500 chars
58
- # print(f"Response preview: {response_preview}")
59
- except Exception as e:
60
- print(f"Could not get response preview: {e}")
61
-
62
- except requests.exceptions.RequestException as e:
63
- print(f"Request exception: {str(e)}")
64
- raise LlumoAIError.RequestFailed(detail=str(e))
65
-
66
- if response.status_code == 401:
67
- raise LlumoAIError.InvalidApiKey()
68
-
69
- # Handle other common status codes
70
- if response.status_code == 404:
71
- raise LlumoAIError.RequestFailed(
72
- detail=f"Endpoint not found (404): {validateUrl}"
73
- )
74
-
75
- # if response.status_code >= 500:
76
- # raise LlumoAIError.ServerError(
77
- # detail=f"Server error ({response.status_code})"
78
- # )
79
-
80
- if response.status_code != 200:
81
- raise LlumoAIError.RequestFailed(
82
- detail=f"Unexpected status code: {response.status_code}"
83
- )
84
-
85
- # Try to parse JSON
86
- try:
87
- data = response.json()
88
- except ValueError as e:
89
- print(f"JSON parsing error: {str(e)}")
90
- # print(f"Response content that could not be parsed: {response.text[:1000]}...")
91
- raise LlumoAIError.InvalidJsonResponse()
92
-
93
- if "data" not in data or not data["data"]:
94
- # print(f"Invalid API response structure: {data}")
95
- raise LlumoAIError.InvalidApiResponse()
96
-
97
- try:
98
- self.hitsAvailable = data["data"].get("remainingHits", 0)
99
- self.workspaceID = data["data"].get("workspaceID")
100
- self.evalDefinition = data["data"].get("analyticsMapping")
101
- self.socketToken = data["data"].get("token")
102
-
103
- # print(f"API key validation successful:")
104
- # print(f"- Remaining hits: {self.hitsAvailable}")
105
- # print(f"- Workspace ID: {self.workspaceID}")
106
- # print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
107
-
108
- except Exception as e:
109
- # print(f"Error extracting data from response: {str(e)}")
110
- raise LlumoAIError.UnexpectedError(detail=str(e))
111
-
112
- def postBatch(self, batch, workspaceID):
113
- payload = {
114
- "batch": json.dumps(batch),
115
- "runType": "EVAL",
116
- "workspaceID": workspaceID,
117
- }
118
- headers = {
119
- "Authorization": f"Bearer {self.socketToken}",
120
- "Content-Type": "application/json",
121
- }
122
- try:
123
- # print(postUrl)
124
- response = requests.post(postUrl, json=payload, headers=headers)
125
- # print(f"Post API Status Code: {response.status_code}")
126
- # print(response.text)
127
-
128
- except Exception as e:
129
- print(f"Error in posting batch: {e}")
130
-
131
- def AllProcessMapping(self):
132
- for batch in self.allBatches:
133
- for record in batch:
134
- rowId = record['rowID']
135
- colId = record['columnID']
136
- pid = f'{rowId}-{colId}-{colId}'
137
- self.processMapping[pid] = record
138
-
139
-
140
- def finalResp(self,results):
141
- seen = set()
142
- uniqueResults = []
143
-
144
- for item in results:
145
- for rowID in item: # Each item has only one key
146
- if rowID not in seen:
147
- seen.add(rowID)
148
- uniqueResults.append(item)
149
-
150
- return uniqueResults
151
-
152
- def evaluate(self, dataframe, eval ="Response Completeness", prompt_template="", outputColName="output"):
153
-
154
- results = {}
155
- try:
156
- socketID = self.socket.connect(timeout=150)
157
-
158
- # Ensure full connection before proceeding
159
- max_wait_secs = 20
160
- waited_secs = 0
161
- while not self.socket._connection_established.is_set():
162
- time.sleep(0.1)
163
- waited_secs += 0.1
164
- if waited_secs >= max_wait_secs:
165
- raise RuntimeError("Timeout waiting for server 'connection-established' event.")
166
-
167
- rowIdMapping = {}
168
-
169
-
170
- print(f"\n======= Running evaluation for: {eval} =======")
171
-
172
- try:
173
- self.validateApiKey(evalName=eval)
174
- except Exception as e:
175
- if hasattr(e, "response") and getattr(e, "response", None) is not None:
176
- pass
177
- raise
178
-
179
- if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
180
- raise LlumoAIError.InsufficientCredits()
181
-
182
- evalDefinition = self.evalDefinition[eval]
183
- model = "GPT_4"
184
- provider = "OPENAI"
185
- evalType = "LLM"
186
- workspaceID = self.workspaceID
187
-
188
- self.allBatches = []
189
- currentBatch = []
190
-
191
- for index, row in dataframe.iterrows():
192
- tools = [row["tools"]] if "tools" in dataframe.columns else []
193
- groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
194
- messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
195
- promptTemplate = prompt_template
196
-
197
- keys = re.findall(r"{{(.*?)}}", promptTemplate)
198
-
199
- if not all([ky in dataframe.columns for ky in keys]):
200
- raise LlumoAIError.InvalidPromptTemplate()
201
-
202
- inputDict = {key: row[key] for key in keys if key in row}
203
- output = row[outputColName] if outputColName in dataframe.columns else ""
204
-
205
- activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
206
- rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
207
- columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
208
-
209
- rowIdMapping[rowID] = index
210
-
211
- templateData = {
212
- "processID": getProcessID(),
213
- "socketID": socketID,
214
- "source": "SDK",
215
- "processData": {
216
- "executionDependency": {
217
- "query": "",
218
- "context": "",
219
- "output": output,
220
- "tools": tools,
221
- "groundTruth": groundTruth,
222
- "messageHistory": messageHistory,
223
- },
224
- "definition": evalDefinition,
225
- "model": model,
226
- "provider": provider,
227
- "analytics": eval,
228
- },
229
- "workspaceID": workspaceID,
230
- "type": "EVAL",
231
- "evalType": evalType,
232
- "kpi": eval,
233
- "columnID": columnID,
234
- "rowID": rowID,
235
- "playgroundID": activePlayground,
236
- "processType": "EVAL",
237
- }
238
-
239
- query = ""
240
- context = ""
241
- for key, value in inputDict.items():
242
- if isinstance(value, str):
243
- length = len(value.split()) * 1.5
244
- if length > 50:
245
- context += f" {key}: {value}, "
246
- else:
247
- if promptTemplate:
248
- tempObj = {key: value}
249
- promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
250
- else:
251
- query += f" {key}: {value}, "
252
-
253
- if not context.strip():
254
- for key, value in inputDict.items():
255
- context += f" {key}: {value}, "
256
-
257
- templateData["processData"]["executionDependency"]["context"] = context.strip()
258
- templateData["processData"]["executionDependency"]["query"] = query.strip()
259
-
260
- if promptTemplate and not query.strip():
261
- templateData["processData"]["executionDependency"]["query"] = promptTemplate
262
-
263
- currentBatch.append(templateData)
264
-
265
- if len(currentBatch) == 10 or index == len(dataframe) - 1:
266
- self.allBatches.append(currentBatch)
267
- currentBatch = []
268
-
269
- totalItems = sum(len(batch) for batch in self.allBatches)
270
-
271
- for cnt, batch in enumerate(self.allBatches):
272
- try:
273
- self.postBatch(batch=batch, workspaceID=workspaceID)
274
- # print("Betch Posted with item len: ", len(batch))
275
- except Exception as e:
276
- continue
277
-
278
- time.sleep(1)
279
-
280
- timeout = max(50, min(600, totalItems * 10))
281
-
282
- self.socket.listenForResults(
283
- min_wait=40, max_wait=timeout, inactivity_timeout=150, expected_results=totalItems
284
- )
285
-
286
- eval_results = self.socket.getReceivedData()
287
- results[eval] = self.finalResp(eval_results)
288
-
289
- except Exception as e:
290
- raise
291
- finally:
292
- try:
293
- self.socket.disconnect()
294
- except Exception as e:
295
- pass
296
-
297
- for evalName, records in results.items():
298
- dataframe[evalName] = None
299
- for item in records:
300
- for compound_key, value in item.items():
301
- rowID = compound_key.split('-')[0]
302
- if rowID in rowIdMapping:
303
- index = rowIdMapping[rowID]
304
- dataframe.at[index, evalName] = value
305
- else:
306
- pass
307
- # print(f"⚠️ Warning: Could not find rowID {rowID} in mapping")
308
-
309
- return dataframe
310
-
311
- def evaluateCompressor(self, dataframe, prompt_template):
312
- results = []
313
-
314
- try:
315
- # Connect to socket first
316
- # print("Connecting to socket server...")
317
- socketID = self.socket.connect(timeout=150)
318
-
319
- # Ensure full connection before proceeding
320
- max_wait_secs = 20
321
- waited_secs = 0
322
- while not self.socket._connection_established.is_set():
323
- time.sleep(0.1)
324
- waited_secs += 0.1
325
- if waited_secs >= max_wait_secs:
326
- raise RuntimeError("Timeout waiting for server 'connection-established' event.")
327
-
328
- # print(f"Connected with socket ID: {socketID}")
329
-
330
- try:
331
- # print(f"Validating API key...")
332
- self.validateApiKey()
333
- # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
334
- except Exception as e:
335
- print(f"Error during API key validation: {str(e)}")
336
- if hasattr(e, "response") and getattr(e, "response", None) is not None:
337
- print(f"Status code: {e.response.status_code}")
338
- print(f"Response content: {e.response.text[:500]}...")
339
- raise
340
-
341
- if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
342
- raise LlumoAIError.InsufficientCredits()
343
-
344
- model = "GPT_4"
345
- provider = "OPENAI"
346
- evalType = "LLUMO"
347
- workspaceID = self.workspaceID
348
-
349
- # Prepare all batches before sending
350
- # print("Preparing batches...")
351
- self.allBatches = []
352
- currentBatch = []
353
-
354
- for index, row in dataframe.iterrows():
355
- promptTemplate = prompt_template
356
-
357
- # extracting the placeholders from the prompt template
358
- keys = re.findall(r"{{(.*?)}}", promptTemplate)
359
- inputDict = {key: row[key] for key in keys if key in row}
360
-
361
- if not all([ky in dataframe.columns for ky in keys]):
362
- raise LlumoAIError.InvalidPromptTemplate()
363
-
364
- activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
365
- rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
366
- columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
367
-
368
- compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
369
- compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
370
- cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
371
- cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
372
-
373
- # Use the server-provided socket ID here
374
- templateData = {
375
- "processID": getProcessID(),
376
- "socketID": socketID,
377
- "source": "SDK",
378
- "rowID": rowID,
379
- "columnID": columnID,
380
- "processType": "COST_SAVING",
381
- "evalType": evalType,
382
- "dependency": list(inputDict.keys()),
383
- "costColumnMapping": {
384
- "compressed_prompt": compressed_prompt_id,
385
- "compressed_prompt_output": compressed_prompt_output_id,
386
- "cost": cost_id,
387
- "cost_saving": cost_saving_id
388
- },
389
- "processData": {
390
- "rowData": {
391
- "query": {"type": "VARIABLE", "value": ""},
392
- "context": {"type": "VARIABLE", "value": ""},
393
- },
394
- "dependency": list(inputDict.keys()),
395
- "dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
396
- "provider": provider,
397
- "model": model,
398
- "promptText": promptTemplate,
399
- "costColumnMapping": {
400
- "compressed_prompt": compressed_prompt_id,
401
- "compressed_prompt_output": compressed_prompt_output_id,
402
- "cost": cost_id,
403
- "cost_saving": cost_saving_id
404
- }
405
- },
406
- "workspaceID": workspaceID,
407
- "email": "",
408
- "playgroundID": activePlayground
409
- }
410
-
411
-
412
- # Build query/context from input
413
- query = ""
414
- context = ""
415
-
416
- for key, value in inputDict.items():
417
- if isinstance(value, str):
418
- length = len(value.split()) * 1.5
419
- if length > 50:
420
- context += f" {key}: {value}, "
421
- else:
422
- if promptTemplate:
423
- populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
424
- query += f"{populatedPrompt} "
425
- else:
426
- query += f" {key}: {value}, "
427
-
428
- if not context.strip():
429
- for key, value in inputDict.items():
430
- context += f" {key}: {value}, "
431
-
432
- templateData["processData"]["rowData"]["context"]["value"] = context.strip()
433
- templateData["processData"]["rowData"]["query"]["value"] = query.strip()
434
-
435
- if promptTemplate and not query.strip():
436
- templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
437
-
438
- # print(templateData)
439
- currentBatch.append(templateData)
440
-
441
- if len(currentBatch) == 10 or index == len(dataframe) - 1:
442
- self.allBatches.append(currentBatch)
443
- currentBatch = []
444
-
445
- # Post all batches
446
- total_items = sum(len(batch) for batch in self.allBatches)
447
- # print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
448
-
449
- for cnt, batch in enumerate(self.allBatches):
450
- # print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
451
- try:
452
- self.postBatch(batch=batch, workspaceID=workspaceID)
453
- # print(f"Batch {cnt + 1} posted successfully")
454
- except Exception as e:
455
- print(f"Error posting batch {cnt + 1}: {str(e)}")
456
- continue
457
-
458
- # Small delay between batches to prevent overwhelming the server
459
- time.sleep(1)
460
-
461
- # updating the dict for row column mapping
462
- self.AllProcessMapping()
463
- # Calculate a reasonable timeout based on the data size
464
- timeout = max(60, min(600, total_items * 10))
465
- # print(f"All batches posted. Waiting up to {timeout} seconds for results...")
466
-
467
- # Listen for results
468
- self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30,expected_results=None)
469
-
470
- # Get results for this evaluation
471
- eval_results = self.socket.getReceivedData()
472
- # print(f"Received {len(eval_results)} results for evaluation '{eval}'")
473
-
474
- # Add these results to our overall results
475
- results = self.finalResp(eval_results)
476
- print(f"======= Completed evaluation: {eval} =======\n")
477
-
478
- # print("All evaluations completed successfully")
479
-
480
- except Exception as e:
481
- print(f"Error during evaluation: {e}")
482
- raise
483
- finally:
484
- # Always disconnect the socket when done
485
- try:
486
- self.socket.disconnect()
487
- # print("Socket disconnected")
488
- except Exception as e:
489
- print(f"Error disconnecting socket: {e}")
490
-
491
- compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
492
- dataframe["compressed_prompt"] = compressed_prompt
493
- dataframe["compressed_prompt_output"] = compressed_prompt_output
494
- dataframe["cost"] = cost
495
- dataframe["cost_saving"] = cost_saving
496
- return dataframe
497
-
498
-
499
- def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, eval = ["Response Correctness"],toEvaluate:bool =False ) -> pd.DataFrame:
500
- executor = ModelExecutor(apiKey)
501
-
502
- keys = list(dataset.keys())
503
- value_combinations = list(itertools.product(*dataset.values()))
504
- combinations = [dict(zip(keys, values)) for values in value_combinations]
505
-
506
- results = []
507
-
508
- # Iterate through combinations
509
- for combo in combinations:
510
- for template in templates:
511
- prompt = template
512
- for k, v in combo.items():
513
- prompt = prompt.replace(f"{{{{{k}}}}}", v)
514
- # Add a row for each model
515
- for model in model_aliases:
516
- row = {
517
- "template": template,
518
- "prompt": prompt,
519
- **combo,
520
- "model": model.value
521
- }
522
-
523
-
524
- try:
525
- provider = getProviderFromModel(model)
526
- response = executor.execute(provider, model.value, prompt, apiKey)
527
- row["output"] = response
528
- except Exception as e:
529
- row["output"] = f"Error: {str(e)}"
530
-
531
- results.append(row)
532
- df=pd.DataFrame(results)
533
- if toEvaluate:
534
-
535
- res = self.evaluate(df,eval =eval ,prompt_template=str(templates[0]))
536
- return res
537
-
538
- return df
539
-
540
- def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
541
- prompt_template="Give answer for the given query: {{query}}"):
542
- if model.lower() not in ["openai", "google"]:
543
- raise ValueError("Model must be 'openai' or 'google'")
544
-
545
- # Run unified agent execution
546
- toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
547
- evals = ["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"]
548
-
549
- for eval in evals:
550
- # Perform evaluation
551
- toolResponseDf = self.evaluate(
552
- toolResponseDf,
553
- eval = eval,
554
- prompt_template=prompt_template
555
- )
556
- return toolResponseDf
557
-
558
-
559
- class SafeDict(dict):
560
- def __missing__(self, key):
561
- return ""
1
+ import requests
2
+ from .exceptions import LlumoAIError
3
+ import time
4
+ import re
5
+ import json
6
+ import uuid
7
+ import threading
8
+ from .helpingFuntions import *
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import itertools
12
+ import pandas as pd
13
+ from typing import List, Dict
14
+ from .models import AVAILABLEMODELS,getProviderFromModel
15
+ from .execution import ModelExecutor
16
+ from .sockets import LlumoSocketClient
17
+ from .functionCalling import LlumoAgentExecutor
18
+
19
+
20
+ # 👇 NEW: Explicitly load .env from the package folder
21
+ envPath = os.path.join(os.path.dirname(__file__), '.env')
22
+ load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
23
+
24
+ postUrl = os.getenv("postUrl")
25
+ fetchUrl = os.getenv("fetchUrl")
26
+ validateUrl = os.getenv("validateUrl")
27
+ socketUrl = os.getenv("SOCKET_URL")
28
+
29
+
30
+ class LlumoClient:
31
+
32
+ def __init__(self, api_key):
33
+ self.apiKey = api_key
34
+ self.socket = LlumoSocketClient(socketUrl)
35
+ self.processMapping = {}
36
+
37
+
38
+ def validateApiKey(self, evalName = ""):
39
+ headers = {
40
+ "Authorization": f"Bearer {self.apiKey}",
41
+ "Content-Type": "application/json",
42
+ }
43
+ reqBody = {"analytics": [evalName]}
44
+
45
+ # print(f"Making API key validation request to: {validateUrl}")
46
+ # print(f"Request body: {reqBody}")
47
+
48
+ try:
49
+ response = requests.post(url=validateUrl, json=reqBody, headers=headers)
50
+ # print(response.text)
51
+ # Print response info for debugging
52
+ # print(f"Response status code: {response.status_code}")
53
+ # print(f"Response headers: {response.headers}")
54
+
55
+ # Try to get at least some of the response content
56
+ try:
57
+ response_preview = response.text[:500] # First 500 chars
58
+ # print(f"Response preview: {response_preview}")
59
+ except Exception as e:
60
+ print(f"Could not get response preview: {e}")
61
+
62
+ except requests.exceptions.RequestException as e:
63
+ print(f"Request exception: {str(e)}")
64
+ raise LlumoAIError.RequestFailed(detail=str(e))
65
+
66
+ if response.status_code == 401:
67
+ raise LlumoAIError.InvalidApiKey()
68
+
69
+ # Handle other common status codes
70
+ if response.status_code == 404:
71
+ raise LlumoAIError.RequestFailed(
72
+ detail=f"Endpoint not found (404): {validateUrl}"
73
+ )
74
+
75
+ # if response.status_code >= 500:
76
+ # raise LlumoAIError.ServerError(
77
+ # detail=f"Server error ({response.status_code})"
78
+ # )
79
+
80
+ if response.status_code != 200:
81
+ raise LlumoAIError.RequestFailed(
82
+ detail=f"Unexpected status code: {response.status_code}"
83
+ )
84
+
85
+ # Try to parse JSON
86
+ try:
87
+ data = response.json()
88
+ except ValueError as e:
89
+ print(f"JSON parsing error: {str(e)}")
90
+ # print(f"Response content that could not be parsed: {response.text[:1000]}...")
91
+ raise LlumoAIError.InvalidJsonResponse()
92
+
93
+ if "data" not in data or not data["data"]:
94
+ # print(f"Invalid API response structure: {data}")
95
+ raise LlumoAIError.InvalidApiResponse()
96
+
97
+ try:
98
+ self.hitsAvailable = data["data"].get("remainingHits", 0)
99
+ self.workspaceID = data["data"].get("workspaceID")
100
+ self.evalDefinition = data["data"].get("analyticsMapping")
101
+ self.socketToken = data["data"].get("token")
102
+
103
+ # print(f"API key validation successful:")
104
+ # print(f"- Remaining hits: {self.hitsAvailable}")
105
+ # print(f"- Workspace ID: {self.workspaceID}")
106
+ # print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
107
+
108
+ except Exception as e:
109
+ # print(f"Error extracting data from response: {str(e)}")
110
+ raise LlumoAIError.UnexpectedError(detail=str(e))
111
+
112
+ def postBatch(self, batch, workspaceID):
113
+ payload = {
114
+ "batch": json.dumps(batch),
115
+ "runType": "EVAL",
116
+ "workspaceID": workspaceID,
117
+ }
118
+ headers = {
119
+ "Authorization": f"Bearer {self.socketToken}",
120
+ "Content-Type": "application/json",
121
+ }
122
+ try:
123
+ # print(postUrl)
124
+ response = requests.post(postUrl, json=payload, headers=headers)
125
+ # print(f"Post API Status Code: {response.status_code}")
126
+ # print(response.text)
127
+
128
+ except Exception as e:
129
+ print(f"Error in posting batch: {e}")
130
+
131
+ def AllProcessMapping(self):
132
+ for batch in self.allBatches:
133
+ for record in batch:
134
+ rowId = record['rowID']
135
+ colId = record['columnID']
136
+ pid = f'{rowId}-{colId}-{colId}'
137
+ self.processMapping[pid] = record
138
+
139
+
140
+ def finalResp(self,results):
141
+ seen = set()
142
+ uniqueResults = []
143
+
144
+ for item in results:
145
+ for rowID in item: # Each item has only one key
146
+ if rowID not in seen:
147
+ seen.add(rowID)
148
+ uniqueResults.append(item)
149
+
150
+ return uniqueResults
151
+
152
+ def evaluate(self, dataframe, eval ="Response Completeness", prompt_template="", outputColName="output"):
153
+
154
+ results = {}
155
+ try:
156
+ socketID = self.socket.connect(timeout=150)
157
+
158
+ # Ensure full connection before proceeding
159
+ max_wait_secs = 20
160
+ waited_secs = 0
161
+ while not self.socket._connection_established.is_set():
162
+ time.sleep(0.1)
163
+ waited_secs += 0.1
164
+ if waited_secs >= max_wait_secs:
165
+ raise RuntimeError("Timeout waiting for server 'connection-established' event.")
166
+
167
+ rowIdMapping = {}
168
+
169
+
170
+ print(f"\n======= Running evaluation for: {eval} =======")
171
+
172
+ try:
173
+ self.validateApiKey(evalName=eval)
174
+ except Exception as e:
175
+ if hasattr(e, "response") and getattr(e, "response", None) is not None:
176
+ pass
177
+ raise
178
+
179
+ if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
180
+ raise LlumoAIError.InsufficientCredits()
181
+
182
+ evalDefinition = self.evalDefinition[eval]
183
+ model = "GPT_4"
184
+ provider = "OPENAI"
185
+ evalType = "LLM"
186
+ workspaceID = self.workspaceID
187
+
188
+ self.allBatches = []
189
+ currentBatch = []
190
+
191
+ for index, row in dataframe.iterrows():
192
+ tools = [row["tools"]] if "tools" in dataframe.columns else []
193
+ groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
194
+ messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
195
+ promptTemplate = prompt_template
196
+
197
+ keys = re.findall(r"{{(.*?)}}", promptTemplate)
198
+
199
+ if not all([ky in dataframe.columns for ky in keys]):
200
+ raise LlumoAIError.InvalidPromptTemplate()
201
+
202
+ inputDict = {key: row[key] for key in keys if key in row}
203
+ output = row[outputColName] if outputColName in dataframe.columns else ""
204
+
205
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
206
+ rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
207
+ columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
208
+
209
+ rowIdMapping[rowID] = index
210
+
211
+ templateData = {
212
+ "processID": getProcessID(),
213
+ "socketID": socketID,
214
+ "source": "SDK",
215
+ "processData": {
216
+ "executionDependency": {
217
+ "query": "",
218
+ "context": "",
219
+ "output": output,
220
+ "tools": tools,
221
+ "groundTruth": groundTruth,
222
+ "messageHistory": messageHistory,
223
+ },
224
+ "definition": evalDefinition,
225
+ "model": model,
226
+ "provider": provider,
227
+ "analytics": eval,
228
+ },
229
+ "workspaceID": workspaceID,
230
+ "type": "EVAL",
231
+ "evalType": evalType,
232
+ "kpi": eval,
233
+ "columnID": columnID,
234
+ "rowID": rowID,
235
+ "playgroundID": activePlayground,
236
+ "processType": "EVAL",
237
+ }
238
+
239
+ query = ""
240
+ context = ""
241
+ for key, value in inputDict.items():
242
+ if isinstance(value, str):
243
+ length = len(value.split()) * 1.5
244
+ if length > 50:
245
+ context += f" {key}: {value}, "
246
+ else:
247
+ if promptTemplate:
248
+ tempObj = {key: value}
249
+ promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
250
+ else:
251
+ query += f" {key}: {value}, "
252
+
253
+ if not context.strip():
254
+ for key, value in inputDict.items():
255
+ context += f" {key}: {value}, "
256
+
257
+ templateData["processData"]["executionDependency"]["context"] = context.strip()
258
+ templateData["processData"]["executionDependency"]["query"] = query.strip()
259
+
260
+ if promptTemplate and not query.strip():
261
+ templateData["processData"]["executionDependency"]["query"] = promptTemplate
262
+
263
+ currentBatch.append(templateData)
264
+
265
+ if len(currentBatch) == 10 or index == len(dataframe) - 1:
266
+ self.allBatches.append(currentBatch)
267
+ currentBatch = []
268
+
269
+ totalItems = sum(len(batch) for batch in self.allBatches)
270
+
271
+ for cnt, batch in enumerate(self.allBatches):
272
+ try:
273
+ self.postBatch(batch=batch, workspaceID=workspaceID)
274
+ # print("Betch Posted with item len: ", len(batch))
275
+ except Exception as e:
276
+ continue
277
+
278
+ time.sleep(1)
279
+
280
+ timeout = max(50, min(600, totalItems * 10))
281
+
282
+ self.socket.listenForResults(
283
+ min_wait=40, max_wait=timeout, inactivity_timeout=150, expected_results=totalItems
284
+ )
285
+
286
+ eval_results = self.socket.getReceivedData()
287
+ results[eval] = self.finalResp(eval_results)
288
+
289
+ except Exception as e:
290
+ raise
291
+ finally:
292
+ try:
293
+ self.socket.disconnect()
294
+ except Exception as e:
295
+ pass
296
+
297
+ for evalName, records in results.items():
298
+ dataframe[evalName] = None
299
+ for item in records:
300
+ for compound_key, value in item.items():
301
+ rowID = compound_key.split('-')[0]
302
+ if rowID in rowIdMapping:
303
+ index = rowIdMapping[rowID]
304
+ dataframe.at[index, evalName] = value
305
+ else:
306
+ pass
307
+ # print(f"⚠️ Warning: Could not find rowID {rowID} in mapping")
308
+
309
+ return dataframe
310
+
311
+ def evaluateCompressor(self, dataframe, prompt_template):
312
+ results = []
313
+
314
+ try:
315
+ # Connect to socket first
316
+ # print("Connecting to socket server...")
317
+ socketID = self.socket.connect(timeout=150)
318
+
319
+ # Ensure full connection before proceeding
320
+ max_wait_secs = 20
321
+ waited_secs = 0
322
+ while not self.socket._connection_established.is_set():
323
+ time.sleep(0.1)
324
+ waited_secs += 0.1
325
+ if waited_secs >= max_wait_secs:
326
+ raise RuntimeError("Timeout waiting for server 'connection-established' event.")
327
+
328
+ # print(f"Connected with socket ID: {socketID}")
329
+
330
+ try:
331
+ # print(f"Validating API key...")
332
+ self.validateApiKey()
333
+ # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
334
+ except Exception as e:
335
+ print(f"Error during API key validation: {str(e)}")
336
+ if hasattr(e, "response") and getattr(e, "response", None) is not None:
337
+ print(f"Status code: {e.response.status_code}")
338
+ print(f"Response content: {e.response.text[:500]}...")
339
+ raise
340
+
341
+ if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
342
+ raise LlumoAIError.InsufficientCredits()
343
+
344
+ model = "GPT_4"
345
+ provider = "OPENAI"
346
+ evalType = "LLUMO"
347
+ workspaceID = self.workspaceID
348
+
349
+ # Prepare all batches before sending
350
+ # print("Preparing batches...")
351
+ self.allBatches = []
352
+ currentBatch = []
353
+
354
+ for index, row in dataframe.iterrows():
355
+ promptTemplate = prompt_template
356
+
357
+ # extracting the placeholders from the prompt template
358
+ keys = re.findall(r"{{(.*?)}}", promptTemplate)
359
+ inputDict = {key: row[key] for key in keys if key in row}
360
+
361
+ if not all([ky in dataframe.columns for ky in keys]):
362
+ raise LlumoAIError.InvalidPromptTemplate()
363
+
364
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
365
+ rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
366
+ columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
367
+
368
+ compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
369
+ compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
370
+ cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
371
+ cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
372
+
373
+ # Use the server-provided socket ID here
374
+ templateData = {
375
+ "processID": getProcessID(),
376
+ "socketID": socketID,
377
+ "source": "SDK",
378
+ "rowID": rowID,
379
+ "columnID": columnID,
380
+ "processType": "COST_SAVING",
381
+ "evalType": evalType,
382
+ "dependency": list(inputDict.keys()),
383
+ "costColumnMapping": {
384
+ "compressed_prompt": compressed_prompt_id,
385
+ "compressed_prompt_output": compressed_prompt_output_id,
386
+ "cost": cost_id,
387
+ "cost_saving": cost_saving_id
388
+ },
389
+ "processData": {
390
+ "rowData": {
391
+ "query": {"type": "VARIABLE", "value": ""},
392
+ "context": {"type": "VARIABLE", "value": ""},
393
+ },
394
+ "dependency": list(inputDict.keys()),
395
+ "dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
396
+ "provider": provider,
397
+ "model": model,
398
+ "promptText": promptTemplate,
399
+ "costColumnMapping": {
400
+ "compressed_prompt": compressed_prompt_id,
401
+ "compressed_prompt_output": compressed_prompt_output_id,
402
+ "cost": cost_id,
403
+ "cost_saving": cost_saving_id
404
+ }
405
+ },
406
+ "workspaceID": workspaceID,
407
+ "email": "",
408
+ "playgroundID": activePlayground
409
+ }
410
+
411
+
412
+ # Build query/context from input
413
+ query = ""
414
+ context = ""
415
+
416
+ for key, value in inputDict.items():
417
+ if isinstance(value, str):
418
+ length = len(value.split()) * 1.5
419
+ if length > 50:
420
+ context += f" {key}: {value}, "
421
+ else:
422
+ if promptTemplate:
423
+ populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
424
+ query += f"{populatedPrompt} "
425
+ else:
426
+ query += f" {key}: {value}, "
427
+
428
+ if not context.strip():
429
+ for key, value in inputDict.items():
430
+ context += f" {key}: {value}, "
431
+
432
+ templateData["processData"]["rowData"]["context"]["value"] = context.strip()
433
+ templateData["processData"]["rowData"]["query"]["value"] = query.strip()
434
+
435
+ if promptTemplate and not query.strip():
436
+ templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
437
+
438
+ # print(templateData)
439
+ currentBatch.append(templateData)
440
+
441
+ if len(currentBatch) == 10 or index == len(dataframe) - 1:
442
+ self.allBatches.append(currentBatch)
443
+ currentBatch = []
444
+
445
+ # Post all batches
446
+ total_items = sum(len(batch) for batch in self.allBatches)
447
+ # print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
448
+
449
+ for cnt, batch in enumerate(self.allBatches):
450
+ # print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
451
+ try:
452
+ self.postBatch(batch=batch, workspaceID=workspaceID)
453
+ # print(f"Batch {cnt + 1} posted successfully")
454
+ except Exception as e:
455
+ print(f"Error posting batch {cnt + 1}: {str(e)}")
456
+ continue
457
+
458
+ # Small delay between batches to prevent overwhelming the server
459
+ time.sleep(1)
460
+
461
+ # updating the dict for row column mapping
462
+ self.AllProcessMapping()
463
+ # Calculate a reasonable timeout based on the data size
464
+ timeout = max(60, min(600, total_items * 10))
465
+ # print(f"All batches posted. Waiting up to {timeout} seconds for results...")
466
+
467
+ # Listen for results
468
+ self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30,expected_results=None)
469
+
470
+ # Get results for this evaluation
471
+ eval_results = self.socket.getReceivedData()
472
+ # print(f"Received {len(eval_results)} results for evaluation '{eval}'")
473
+
474
+ # Add these results to our overall results
475
+ results = self.finalResp(eval_results)
476
+ print(f"======= Completed evaluation: {eval} =======\n")
477
+
478
+ # print("All evaluations completed successfully")
479
+
480
+ except Exception as e:
481
+ print(f"Error during evaluation: {e}")
482
+ raise
483
+ finally:
484
+ # Always disconnect the socket when done
485
+ try:
486
+ self.socket.disconnect()
487
+ # print("Socket disconnected")
488
+ except Exception as e:
489
+ print(f"Error disconnecting socket: {e}")
490
+
491
+ compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
492
+ dataframe["compressed_prompt"] = compressed_prompt
493
+ dataframe["compressed_prompt_output"] = compressed_prompt_output
494
+ dataframe["cost"] = cost
495
+ dataframe["cost_saving"] = cost_saving
496
+ return dataframe
497
+
498
+
499
+ def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, eval = ["Response Correctness"],toEvaluate:bool =False ) -> pd.DataFrame:
500
+ executor = ModelExecutor(apiKey)
501
+
502
+ keys = list(dataset.keys())
503
+ value_combinations = list(itertools.product(*dataset.values()))
504
+ combinations = [dict(zip(keys, values)) for values in value_combinations]
505
+
506
+ results = []
507
+
508
+ # Iterate through combinations
509
+ for combo in combinations:
510
+ for template in templates:
511
+ prompt = template
512
+ for k, v in combo.items():
513
+ prompt = prompt.replace(f"{{{{{k}}}}}", v)
514
+ # Add a row for each model
515
+ for model in model_aliases:
516
+ row = {
517
+ "template": template,
518
+ "prompt": prompt,
519
+ **combo,
520
+ "model": model.value
521
+ }
522
+
523
+
524
+ try:
525
+ provider = getProviderFromModel(model)
526
+ response = executor.execute(provider, model.value, prompt, apiKey)
527
+ row["output"] = response
528
+ except Exception as e:
529
+ row["output"] = f"Error: {str(e)}"
530
+
531
+ results.append(row)
532
+ df=pd.DataFrame(results)
533
+ if toEvaluate:
534
+
535
+ res = self.evaluate(df,eval =eval ,prompt_template=str(templates[0]))
536
+ return res
537
+
538
+ return df
539
+
540
+ def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
541
+ prompt_template="Give answer for the given query: {{query}}"):
542
+ if model.lower() not in ["openai", "google"]:
543
+ raise ValueError("Model must be 'openai' or 'google'")
544
+
545
+ # Run unified agent execution
546
+ toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
547
+ evals = ["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"]
548
+
549
+ for eval in evals:
550
+ # Perform evaluation
551
+ toolResponseDf = self.evaluate(
552
+ toolResponseDf,
553
+ eval = eval,
554
+ prompt_template=prompt_template
555
+ )
556
+ return toolResponseDf
557
+
558
+
559
+ class SafeDict(dict):
560
+ def __missing__(self, key):
561
+ return ""