llumo 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llumo/client.py CHANGED
@@ -1,554 +1,565 @@
1
- import requests
2
- from .exceptions import LlumoAPIError
3
- import time
4
- import re
5
- import json
6
- import uuid
7
- import threading
8
- from .helpingFuntions import *
9
- from dotenv import load_dotenv
10
- import os
11
- import itertools
12
- import pandas as pd
13
- from typing import List, Dict
14
- from .models import AVAILABLEMODELS,getProviderFromModel
15
- from .execution import ModelExecutor
16
- from .sockets import LlumoSocketClient
17
-
18
-
19
- # 👇 NEW: Explicitly load .env from the package folder
20
- envPath = os.path.join(os.path.dirname(__file__), '.env')
21
- load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
22
-
23
- postUrl = os.getenv("postUrl")
24
- fetchUrl = os.getenv("fetchUrl")
25
- validateUrl = os.getenv("validateUrl")
26
- socketUrl = os.getenv("SOCKET_URL")
27
-
28
-
29
- class LlumoClient:
30
-
31
- def __init__(self, api_key):
32
- self.apiKey = api_key
33
- self.socket = LlumoSocketClient(socketUrl)
34
- self.processMapping = {}
35
-
36
-
37
- def validateApiKey(self, evalName = ""):
38
- headers = {
39
- "Authorization": f"Bearer {self.apiKey}",
40
- "Content-Type": "application/json",
41
- }
42
- reqBody = {"analytics": [evalName]}
43
-
44
- print(f"Making API key validation request to: {validateUrl}")
45
- print(f"Request body: {reqBody}")
46
-
47
- try:
48
- response = requests.post(url=validateUrl, json=reqBody, headers=headers)
49
- print(response.text)
50
- # Print response info for debugging
51
- print(f"Response status code: {response.status_code}")
52
- print(f"Response headers: {response.headers}")
53
-
54
- # Try to get at least some of the response content
55
- try:
56
- response_preview = response.text[:500] # First 500 chars
57
- print(f"Response preview: {response_preview}")
58
- except Exception as e:
59
- print(f"Could not get response preview: {e}")
60
-
61
- except requests.exceptions.RequestException as e:
62
- print(f"Request exception: {str(e)}")
63
- raise LlumoAPIError.RequestFailed(detail=str(e))
64
-
65
- if response.status_code == 401:
66
- raise LlumoAPIError.InvalidApiKey()
67
-
68
- # Handle other common status codes
69
- if response.status_code == 404:
70
- raise LlumoAPIError.RequestFailed(
71
- detail=f"Endpoint not found (404): {validateUrl}"
72
- )
73
-
74
- if response.status_code >= 500:
75
- raise LlumoAPIError.ServerError(
76
- detail=f"Server error ({response.status_code})"
77
- )
78
-
79
- if response.status_code != 200:
80
- raise LlumoAPIError.RequestFailed(
81
- detail=f"Unexpected status code: {response.status_code}"
82
- )
83
-
84
- # Try to parse JSON
85
- try:
86
- data = response.json()
87
- except ValueError as e:
88
- print(f"JSON parsing error: {str(e)}")
89
- print(
90
- f"Response content that could not be parsed: {response.text[:1000]}..."
91
- )
92
- raise LlumoAPIError.InvalidJsonResponse()
93
-
94
- if "data" not in data or not data["data"]:
95
- print(f"Invalid API response structure: {data}")
96
- raise LlumoAPIError.InvalidApiResponse()
97
-
98
- try:
99
- self.hitsAvailable = data["data"].get("remainingHits", 0)
100
- self.workspaceID = data["data"].get("workspaceID")
101
- self.evalDefinition = data["data"].get("analyticsMapping")
102
- self.token = data["data"].get("token")
103
-
104
- print(f"API key validation successful:")
105
- # print(f"- Remaining hits: {self.hitsAvailable}")
106
- # print(f"- Workspace ID: {self.workspaceID}")
107
- # print(f"- Token received: {'Yes' if self.token else 'No'}")
108
-
109
- except Exception as e:
110
- print(f"Error extracting data from response: {str(e)}")
111
- raise LlumoAPIError.UnexpectedError(detail=str(e))
112
-
113
- def postBatch(self, batch, workspaceID):
114
- payload = {
115
- "batch": json.dumps(batch),
116
- "runType": "EVAL",
117
- "workspaceID": workspaceID,
118
- }
119
- headers = {
120
- "Authorization": f"Bearer {self.token}",
121
- "Content-Type": "application/json",
122
- }
123
- try:
124
- print(postUrl)
125
- response = requests.post(postUrl, json=payload, headers=headers)
126
- # print(f"Post API Status Code: {response.status_code}")
127
- # print(response.text)
128
-
129
- except Exception as e:
130
- print(f"Error in posting batch: {e}")
131
-
132
- def AllProcessMapping(self):
133
- for batch in self.allBatches:
134
- for record in batch:
135
- rowId = record['rowID']
136
- colId = record['columnID']
137
- pid = f'{rowId}-{colId}-{colId}'
138
- self.processMapping[pid] = record
139
-
140
-
141
- def finalResp(self,results):
142
- seen = set()
143
- uniqueResults = []
144
-
145
- for item in results:
146
- for rowID in item: # Each item has only one key
147
- if rowID not in seen:
148
- seen.add(rowID)
149
- uniqueResults.append(item)
150
-
151
- return uniqueResults
152
-
153
- def evaluate(self, dataframe, evals=["Response Completeness"],prompt_template = ""):
154
- results = {}
155
- try:
156
- # Connect to socket first
157
- print("Connecting to socket server...")
158
- socketID = self.socket.connect(timeout=20)
159
- print(f"Connected with socket ID: {socketID}")
160
-
161
- # Process each evaluation
162
- for eval in evals:
163
- print(f"\n======= Running evaluation for: {eval} =======")
164
-
165
- try:
166
- print(f"Validating API key for {eval}...")
167
- self.validateApiKey(evalName=eval)
168
- print(
169
- f"API key validation successful. Hits available: {self.hitsAvailable}"
170
- )
171
- except Exception as e:
172
- print(f"Error during API key validation: {str(e)}")
173
- if (
174
- hasattr(e, "response")
175
- and getattr(e, "response", None) is not None
176
- ):
177
- print(f"Status code: {e.response.status_code}")
178
- print(f"Response content: {e.response.text[:500]}...")
179
- raise
180
-
181
- if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
182
- raise LlumoAPIError.InsufficientCredits()
183
-
184
- evalDefinition = self.evalDefinition[eval]
185
- model = "GPT_4"
186
- provider = "OPENAI"
187
- evalType = "LLM"
188
- workspaceID = self.workspaceID
189
-
190
- # Prepare all batches before sending
191
- print("Preparing batches...")
192
- self.allBatches = []
193
- currentBatch = []
194
-
195
- for index, row in dataframe.iterrows():
196
-
197
- tools = row["tools"] if "tools" in dataframe.columns else []
198
- groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
199
- messageHistory = row["messageHistory"] if "messageHistory" in dataframe.columns else []
200
- promptTemplate = prompt_template
201
-
202
- keys = re.findall(r"{{(.*?)}}", promptTemplate)
203
-
204
- # extracting the required values for the the columns based on the prompt template
205
- inputDict = {key: row[key] for key in keys if key in row}
206
- output = row["output"]
207
-
208
- activePlayground = (
209
- f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
210
- )
211
- rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
212
- columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
213
- "-", ""
214
- )
215
-
216
- # Use the server-provided socket ID here
217
- templateData = {
218
- "processID": getProcessID(),
219
- "socketID": socketID, # Using the server-assigned socket ID
220
- "processData": {
221
- "executionDependency": {
222
- "query": "",
223
- "context": "",
224
- "output": output,
225
- "tools": tools,
226
- "groundTruth": groundTruth,
227
- "messageHistory": messageHistory,
228
- },
229
- "definition": evalDefinition,
230
- "model": model,
231
- "provider": provider,
232
- "analytics": eval,
233
- },
234
- "workspaceID": workspaceID,
235
- "type": "EVAL",
236
- "evalType": evalType,
237
- "kpi": eval,
238
- "columnID": columnID,
239
- "rowID": rowID,
240
- "playgroundID": activePlayground,
241
- "processType": "EVAL",
242
- }
243
-
244
- # Build query/context from input
245
- query = ""
246
- context = ""
247
- for key, value in inputDict.items():
248
- if isinstance(value, str):
249
- length = len(value.split()) * 1.5
250
- if length > 50:
251
- context += f" {key}: {value}, "
252
- else:
253
- if promptTemplate:
254
- tempObj = {key: value}
255
- promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
256
- else:
257
- query += f" {key}: {value}, "
258
-
259
- if not context.strip():
260
- for key, value in inputDict.items():
261
- context += f" {key}: {value}, "
262
-
263
- templateData["processData"]["executionDependency"]["context"] = context.strip()
264
- templateData["processData"]["executionDependency"]["query"] = query.strip()
265
-
266
- if promptTemplate and not query.strip():
267
- templateData["processData"]["executionDependency"]["query"] = promptTemplate
268
-
269
- currentBatch.append(templateData)
270
-
271
- if len(currentBatch) == 10 or index == len(dataframe) - 1:
272
- self.allBatches.append(currentBatch)
273
- currentBatch = []
274
-
275
- # Post all batches
276
- total_items = sum(len(batch) for batch in self.allBatches)
277
- print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
278
-
279
- for cnt, batch in enumerate(self.allBatches):
280
- print(
281
- f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'"
282
- )
283
- try:
284
- self.postBatch(batch=batch, workspaceID=workspaceID)
285
- print(f"Batch {cnt + 1} posted successfully")
286
- except Exception as e:
287
- print(f"Error posting batch {cnt + 1}: {str(e)}")
288
- continue
289
-
290
-
291
-
292
- # Small delay between batches to prevent overwhelming the server
293
- time.sleep(1)
294
-
295
- # updating the dict for row column mapping
296
- self.AllProcessMapping()
297
- # Calculate a reasonable timeout based on the data size
298
- timeout = max(60, min(600, total_items * 10))
299
- print(
300
- f"All batches posted. Waiting up to {timeout} seconds for results..."
301
- )
302
-
303
- # Listen for results
304
- self.socket.listen_for_results(
305
- min_wait=10, max_wait=timeout, inactivity_timeout=30
306
- )
307
-
308
- # Get results for this evaluation
309
- eval_results = self.socket.get_received_data()
310
- print(f"Received {len(eval_results)} results for evaluation '{eval}'")
311
-
312
- # Add these results to our overall results
313
- results[eval] = self.finalResp(eval_results)
314
- print(f"======= Completed evaluation: {eval} =======\n")
315
-
316
- print("All evaluations completed successfully")
317
-
318
- except Exception as e:
319
- print(f"Error during evaluation: {e}")
320
- raise
321
- finally:
322
- # Always disconnect the socket when done
323
- try:
324
- self.socket.disconnect()
325
- print("Socket disconnected")
326
- except Exception as e:
327
- print(f"Error disconnecting socket: {e}")
328
-
329
- for evalName, records in results.items():
330
- for item in records:
331
- self.processMapping[list(item.keys())[0]] = list(item.values())[0]
332
-
333
-
334
-
335
- dataframe[evalName] = self.processMapping.values()
336
-
337
- return dataframe
338
-
339
- def evaluateCompressor(self, dataframe, prompt_template):
340
- results = []
341
- try:
342
- # Connect to socket first
343
- print("Connecting to socket server...")
344
- socketID = self.socket.connect(timeout=20)
345
- print(f"Connected with socket ID: {socketID}")
346
-
347
- try:
348
- print(f"Validating API key...")
349
- self.validateApiKey()
350
- print(f"API key validation successful. Hits available: {self.hitsAvailable}")
351
- except Exception as e:
352
- print(f"Error during API key validation: {str(e)}")
353
- if hasattr(e, "response") and getattr(e, "response", None) is not None:
354
- print(f"Status code: {e.response.status_code}")
355
- print(f"Response content: {e.response.text[:500]}...")
356
- raise
357
-
358
- if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
359
- raise LlumoAPIError.InsufficientCredits()
360
-
361
- model = "GPT_4"
362
- provider = "OPENAI"
363
- evalType = "LLUMO"
364
- workspaceID = self.workspaceID
365
-
366
- # Prepare all batches before sending
367
- print("Preparing batches...")
368
- self.allBatches = []
369
- currentBatch = []
370
-
371
- for index, row in dataframe.iterrows():
372
- promptTemplate = prompt_template
373
-
374
- # extracting the placeholders from the prompt template
375
- keys = re.findall(r"{{(.*?)}}", promptTemplate)
376
- inputDict = {key: row[key] for key in keys if key in row}
377
-
378
- activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
379
- rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
380
- columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
381
-
382
- compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
383
- compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
384
- cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
385
- cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
386
-
387
- # Use the server-provided socket ID here
388
- templateData = {
389
- "processID": getProcessID(),
390
- "socketID": socketID,
391
- "rowID": rowID,
392
- "columnID": columnID,
393
- "processType": "COST_SAVING",
394
- "evalType": evalType,
395
- "dependency": list(inputDict.keys()),
396
- "costColumnMapping": {
397
- "compressed_prompt": compressed_prompt_id,
398
- "compressed_prompt_output": compressed_prompt_output_id,
399
- "cost": cost_id,
400
- "cost_saving": cost_saving_id
401
- },
402
- "processData": {
403
- "rowData": {
404
- "query": {"type": "VARIABLE", "value": ""},
405
- "context": {"type": "VARIABLE", "value": ""},
406
- },
407
- "dependency": list(inputDict.keys()),
408
- "dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
409
- "provider": provider,
410
- "model": model,
411
- "promptText": promptTemplate,
412
- "costColumnMapping": {
413
- "compressed_prompt": compressed_prompt_id,
414
- "compressed_prompt_output": compressed_prompt_output_id,
415
- "cost": cost_id,
416
- "cost_saving": cost_saving_id
417
- }
418
- },
419
- "workspaceID": workspaceID,
420
- "email": "",
421
- "playgroundID": activePlayground
422
- }
423
-
424
-
425
- # Build query/context from input
426
- query = ""
427
- context = ""
428
-
429
- for key, value in inputDict.items():
430
- if isinstance(value, str):
431
- length = len(value.split()) * 1.5
432
- if length > 50:
433
- context += f" {key}: {value}, "
434
- else:
435
- if promptTemplate:
436
- populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
437
- query += f"{populatedPrompt} "
438
- else:
439
- query += f" {key}: {value}, "
440
-
441
- if not context.strip():
442
- for key, value in inputDict.items():
443
- context += f" {key}: {value}, "
444
-
445
- templateData["processData"]["rowData"]["context"]["value"] = context.strip()
446
- templateData["processData"]["rowData"]["query"]["value"] = query.strip()
447
-
448
- if promptTemplate and not query.strip():
449
- templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
450
-
451
- print(templateData)
452
- currentBatch.append(templateData)
453
-
454
- if len(currentBatch) == 10 or index == len(dataframe) - 1:
455
- self.allBatches.append(currentBatch)
456
- currentBatch = []
457
-
458
- # Post all batches
459
- total_items = sum(len(batch) for batch in self.allBatches)
460
- print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
461
-
462
- for cnt, batch in enumerate(self.allBatches):
463
- print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
464
- try:
465
- self.postBatch(batch=batch, workspaceID=workspaceID)
466
- print(f"Batch {cnt + 1} posted successfully")
467
- except Exception as e:
468
- print(f"Error posting batch {cnt + 1}: {str(e)}")
469
- continue
470
-
471
- # Small delay between batches to prevent overwhelming the server
472
- time.sleep(1)
473
-
474
- # updating the dict for row column mapping
475
- self.AllProcessMapping()
476
- # Calculate a reasonable timeout based on the data size
477
- timeout = max(60, min(600, total_items * 10))
478
- print(f"All batches posted. Waiting up to {timeout} seconds for results...")
479
-
480
- # Listen for results
481
- self.socket.listen_for_results(min_wait=10, max_wait=timeout, inactivity_timeout=30)
482
-
483
- # Get results for this evaluation
484
- eval_results = self.socket.get_received_data()
485
- print(f"Received {len(eval_results)} results for evaluation '{eval}'")
486
-
487
- # Add these results to our overall results
488
- results = self.finalResp(eval_results)
489
- print(f"======= Completed evaluation: {eval} =======\n")
490
-
491
- print("All evaluations completed successfully")
492
-
493
- except Exception as e:
494
- print(f"Error during evaluation: {e}")
495
- raise
496
- finally:
497
- # Always disconnect the socket when done
498
- try:
499
- self.socket.disconnect()
500
- print("Socket disconnected")
501
- except Exception as e:
502
- print(f"Error disconnecting socket: {e}")
503
-
504
- compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
505
- dataframe["compressed_prompt"] = compressed_prompt
506
- dataframe["compressed_prompt_output"] = compressed_prompt_output
507
- dataframe["cost"] = cost
508
- dataframe["cost_saving"] = cost_saving
509
- return dataframe
510
-
511
-
512
- def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, evals = ["Response Correctness"]) -> pd.DataFrame:
513
- executor = ModelExecutor(apiKey)
514
-
515
- keys = list(dataset.keys())
516
- value_combinations = list(itertools.product(*dataset.values()))
517
- combinations = [dict(zip(keys, values)) for values in value_combinations]
518
-
519
- results = []
520
-
521
- # Iterate through combinations
522
- for combo in combinations:
523
- for template in templates:
524
- prompt = template
525
- for k, v in combo.items():
526
- prompt = prompt.replace(f"{{{{{k}}}}}", v)
527
- # Add a row for each model
528
- for model in model_aliases:
529
- row = {
530
- "template": template,
531
- "prompt": prompt,
532
- **combo,
533
- "model": model.value
534
- }
535
-
536
-
537
- try:
538
- provider = getProviderFromModel(model)
539
- response = executor.execute(provider, model.value, prompt, apiKey)
540
- row["output"] = response
541
- except Exception as e:
542
- row["output"] = f"Error: {str(e)}"
543
-
544
- results.append(row)
545
- df=pd.DataFrame(results)
546
- df.to_csv("sweep_results.csv", index=False)
547
- print(str(templates[0]))
548
- res = self.evaluate(df,evals =evals,prompt_template=str(templates[0]))
549
- return res
550
-
551
-
552
- class SafeDict(dict):
553
- def __missing__(self, key):
554
- return ""
1
+ import requests
2
+ from .exceptions import LlumoAIError
3
+ import time
4
+ import re
5
+ import json
6
+ import uuid
7
+ import threading
8
+ from .helpingFuntions import *
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import itertools
12
+ import pandas as pd
13
+ from typing import List, Dict
14
+ from .models import AVAILABLEMODELS,getProviderFromModel
15
+ from .execution import ModelExecutor
16
+ from .sockets import LlumoSocketClient
17
+ from .functionCalling import LlumoAgentExecutor
18
+
19
+
20
+ # 👇 NEW: Explicitly load .env from the package folder
21
+ envPath = os.path.join(os.path.dirname(__file__), '.env')
22
+ load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
23
+
24
+ postUrl = os.getenv("postUrl")
25
+ fetchUrl = os.getenv("fetchUrl")
26
+ validateUrl = os.getenv("validateUrl")
27
+ socketUrl = os.getenv("SOCKET_URL")
28
+
29
+
30
+ class LlumoClient:
31
+
32
+ def __init__(self, api_key):
33
+ self.apiKey = api_key
34
+ self.socket = LlumoSocketClient(socketUrl)
35
+ self.processMapping = {}
36
+
37
+
38
+ def validateApiKey(self, evalName = ""):
39
+ headers = {
40
+ "Authorization": f"Bearer {self.apiKey}",
41
+ "Content-Type": "application/json",
42
+ }
43
+ reqBody = {"analytics": [evalName]}
44
+
45
+ # print(f"Making API key validation request to: {validateUrl}")
46
+ # print(f"Request body: {reqBody}")
47
+
48
+ try:
49
+ response = requests.post(url=validateUrl, json=reqBody, headers=headers)
50
+ # print(response.text)
51
+ # Print response info for debugging
52
+ # print(f"Response status code: {response.status_code}")
53
+ # print(f"Response headers: {response.headers}")
54
+
55
+ # Try to get at least some of the response content
56
+ try:
57
+ response_preview = response.text[:500] # First 500 chars
58
+ # print(f"Response preview: {response_preview}")
59
+ except Exception as e:
60
+ print(f"Could not get response preview: {e}")
61
+
62
+ except requests.exceptions.RequestException as e:
63
+ print(f"Request exception: {str(e)}")
64
+ raise LlumoAIError.RequestFailed(detail=str(e))
65
+
66
+ if response.status_code == 401:
67
+ raise LlumoAIError.InvalidApiKey()
68
+
69
+ # Handle other common status codes
70
+ if response.status_code == 404:
71
+ raise LlumoAIError.RequestFailed(
72
+ detail=f"Endpoint not found (404): {validateUrl}"
73
+ )
74
+
75
+ # if response.status_code >= 500:
76
+ # raise LlumoAIError.ServerError(
77
+ # detail=f"Server error ({response.status_code})"
78
+ # )
79
+
80
+ if response.status_code != 200:
81
+ raise LlumoAIError.RequestFailed(
82
+ detail=f"Unexpected status code: {response.status_code}"
83
+ )
84
+
85
+ # Try to parse JSON
86
+ try:
87
+ data = response.json()
88
+ except ValueError as e:
89
+ print(f"JSON parsing error: {str(e)}")
90
+ # print(f"Response content that could not be parsed: {response.text[:1000]}...")
91
+ raise LlumoAIError.InvalidJsonResponse()
92
+
93
+ if "data" not in data or not data["data"]:
94
+ # print(f"Invalid API response structure: {data}")
95
+ raise LlumoAIError.InvalidApiResponse()
96
+
97
+ try:
98
+ self.hitsAvailable = data["data"].get("remainingHits", 0)
99
+ self.workspaceID = data["data"].get("workspaceID")
100
+ self.evalDefinition = data["data"].get("analyticsMapping")
101
+ self.socketToken = data["data"].get("token")
102
+
103
+ # print(f"API key validation successful:")
104
+ # print(f"- Remaining hits: {self.hitsAvailable}")
105
+ # print(f"- Workspace ID: {self.workspaceID}")
106
+ # print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
107
+
108
+ except Exception as e:
109
+ # print(f"Error extracting data from response: {str(e)}")
110
+ raise LlumoAIError.UnexpectedError(detail=str(e))
111
+
112
+ def postBatch(self, batch, workspaceID):
113
+ payload = {
114
+ "batch": json.dumps(batch),
115
+ "runType": "EVAL",
116
+ "workspaceID": workspaceID,
117
+ }
118
+ headers = {
119
+ "Authorization": f"Bearer {self.socketToken}",
120
+ "Content-Type": "application/json",
121
+ }
122
+ try:
123
+ # print(postUrl)
124
+ response = requests.post(postUrl, json=payload, headers=headers)
125
+ # print(f"Post API Status Code: {response.status_code}")
126
+ # print(response.text)
127
+
128
+ except Exception as e:
129
+ print(f"Error in posting batch: {e}")
130
+
131
+ def AllProcessMapping(self):
132
+ for batch in self.allBatches:
133
+ for record in batch:
134
+ rowId = record['rowID']
135
+ colId = record['columnID']
136
+ pid = f'{rowId}-{colId}-{colId}'
137
+ self.processMapping[pid] = record
138
+
139
+
140
+ def finalResp(self,results):
141
+ seen = set()
142
+ uniqueResults = []
143
+
144
+ for item in results:
145
+ for rowID in item: # Each item has only one key
146
+ if rowID not in seen:
147
+ seen.add(rowID)
148
+ uniqueResults.append(item)
149
+
150
+ return uniqueResults
151
+
152
+ def evaluate(self, dataframe, evals=["Response Completeness"], prompt_template="",outputColName = "output"):
153
+
154
+
155
+ results = {}
156
+ try:
157
+ # Connect to socket first
158
+ # print("Connecting to socket server...")
159
+ socketID = self.socket.connect(timeout=30)
160
+ # print(f"Connected with socket ID: {socketID}")
161
+
162
+ # Store the mapping of row IDs to dataframe indices
163
+ rowIdMapping = {}
164
+
165
+ # Process each evaluation
166
+ for eval in evals:
167
+ print(f"\n======= Running evaluation for: {eval} =======")
168
+
169
+ try:
170
+ # print(f"Validating API key for {eval}...")
171
+ self.validateApiKey(evalName=eval)
172
+ # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
173
+ except Exception as e:
174
+ # print(f"Error during API key validation: {str(e)}")
175
+ if hasattr(e, "response") and getattr(e, "response", None) is not None:
176
+ # print(f"Status code: {e.response.status_code}")
177
+ # print(f"Response content: {e.response.text[:500]}...")
178
+ pass
179
+ raise
180
+
181
+ if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
182
+ raise LlumoAIError.InsufficientCredits()
183
+
184
+ evalDefinition = self.evalDefinition[eval]
185
+ model = "GPT_4"
186
+ provider = "OPENAI"
187
+ evalType = "LLM"
188
+ workspaceID = self.workspaceID
189
+
190
+ # Prepare all batches before sending
191
+ # print("Preparing batches...")
192
+ self.allBatches = []
193
+ currentBatch = []
194
+
195
+ for index, row in dataframe.iterrows():
196
+ tools = [row["tools"]] if "tools" in dataframe.columns else []
197
+ groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
198
+ messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
199
+ promptTemplate = prompt_template
200
+
201
+ keys = re.findall(r"{{(.*?)}}", promptTemplate)
202
+
203
+ # raise error if the prompt template is not in expected format
204
+ if not all([ky in dataframe.columns for ky in keys]):
205
+ raise LlumoAIError.InvalidPromptTemplate()
206
+
207
+ inputDict = {key: row[key] for key in keys if key in row}
208
+
209
+
210
+ output = row[outputColName] if outputColName in dataframe.columns else ""
211
+
212
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
213
+ rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
214
+ columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
215
+
216
+ rowIdMapping[rowID] = index
217
+
218
+ templateData = {
219
+ "processID": getProcessID(),
220
+ "socketID": socketID,
221
+ "source": "SDK",
222
+ "processData": {
223
+ "executionDependency": {
224
+ "query": "",
225
+ "context": "",
226
+ "output": output,
227
+ "tools": tools,
228
+ "groundTruth": groundTruth,
229
+ "messageHistory": messageHistory,
230
+ },
231
+ "definition": evalDefinition,
232
+ "model": model,
233
+ "provider": provider,
234
+ "analytics": eval,
235
+ },
236
+ "workspaceID": workspaceID,
237
+ "type": "EVAL",
238
+ "evalType": evalType,
239
+ "kpi": eval,
240
+ "columnID": columnID,
241
+ "rowID": rowID,
242
+ "playgroundID": activePlayground,
243
+ "processType": "EVAL",
244
+ }
245
+
246
+ query = ""
247
+ context = ""
248
+ for key, value in inputDict.items():
249
+ if isinstance(value, str):
250
+ length = len(value.split()) * 1.5
251
+ if length > 50:
252
+ context += f" {key}: {value}, "
253
+ else:
254
+ if promptTemplate:
255
+ tempObj = {key: value}
256
+ promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
257
+ else:
258
+ query += f" {key}: {value}, "
259
+
260
+ if not context.strip():
261
+ for key, value in inputDict.items():
262
+ context += f" {key}: {value}, "
263
+
264
+ templateData["processData"]["executionDependency"]["context"] = context.strip()
265
+ templateData["processData"]["executionDependency"]["query"] = query.strip()
266
+
267
+ if promptTemplate and not query.strip():
268
+ templateData["processData"]["executionDependency"]["query"] = promptTemplate
269
+
270
+ currentBatch.append(templateData)
271
+
272
+ if len(currentBatch) == 10 or index == len(dataframe) - 1:
273
+ self.allBatches.append(currentBatch)
274
+ currentBatch = []
275
+
276
+ totalItems = sum(len(batch) for batch in self.allBatches)
277
+ # print(f"Posting {len(self.allBatches)} batches ({totalItems } items total)")
278
+
279
+ for cnt, batch in enumerate(self.allBatches):
280
+ # print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
281
+ try:
282
+ self.postBatch(batch=batch, workspaceID=workspaceID)
283
+ # print(f"Batch {cnt + 1} posted successfully")
284
+ except Exception as e:
285
+ # print(f"Error posting batch {cnt + 1}: {str(e)}")
286
+ continue
287
+
288
+ time.sleep(1)
289
+
290
+ timeout = max(60, min(600, totalItems * 10))
291
+ # print(f"All batches posted. Waiting up to {timeout} seconds for results...")
292
+
293
+ self.socket.listenForResults(
294
+ min_wait=40, max_wait=timeout, inactivity_timeout=40
295
+ )
296
+
297
+ eval_results = self.socket.getReceivedData()
298
+ # print(f"Received {len(eval_results)} results for evaluation '{eval}'")
299
+
300
+ results[eval] = self.finalResp(eval_results)
301
+ # print(f"======= Completed evaluation: {eval} =======\n")
302
+
303
+ # print("All evaluations completed successfully")
304
+
305
+ except Exception as e:
306
+ # print(f"Error during evaluation: {e}")
307
+ raise
308
+ finally:
309
+ try:
310
+ self.socket.disconnect()
311
+ # print("Socket disconnected")
312
+ except Exception as e:
313
+ # print(f"Error disconnecting socket: {e}")
314
+ pass
315
+
316
+ for evalName, records in results.items():
317
+ dataframe[evalName] = None
318
+ for item in records:
319
+ for compound_key, value in item.items():
320
+ rowID = compound_key.split('-')[0]
321
+ if rowID in rowIdMapping:
322
+ index = rowIdMapping[rowID]
323
+ dataframe.at[index, evalName] = value
324
+ else:
325
+ print(f"Warning: Could not find rowID {rowID} in mapping")
326
+ pass
327
+
328
+ return dataframe
329
+
330
+ def evaluateCompressor(self, dataframe, prompt_template):
331
+ results = []
332
+ try:
333
+ # Connect to socket first
334
+ # print("Connecting to socket server...")
335
+ socketID = self.socket.connect(timeout=30)
336
+ # print(f"Connected with socket ID: {socketID}")
337
+
338
+ try:
339
+ # print(f"Validating API key...")
340
+ self.validateApiKey()
341
+ # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
342
+ except Exception as e:
343
+ print(f"Error during API key validation: {str(e)}")
344
+ if hasattr(e, "response") and getattr(e, "response", None) is not None:
345
+ print(f"Status code: {e.response.status_code}")
346
+ print(f"Response content: {e.response.text[:500]}...")
347
+ raise
348
+
349
+ if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
350
+ raise LlumoAIError.InsufficientCredits()
351
+
352
+ model = "GPT_4"
353
+ provider = "OPENAI"
354
+ evalType = "LLUMO"
355
+ workspaceID = self.workspaceID
356
+
357
+ # Prepare all batches before sending
358
+ # print("Preparing batches...")
359
+ self.allBatches = []
360
+ currentBatch = []
361
+
362
+ for index, row in dataframe.iterrows():
363
+ promptTemplate = prompt_template
364
+
365
+ # extracting the placeholders from the prompt template
366
+ keys = re.findall(r"{{(.*?)}}", promptTemplate)
367
+ inputDict = {key: row[key] for key in keys if key in row}
368
+
369
+ if not all([ky in dataframe.columns for ky in keys]):
370
+ raise LlumoAIError.InvalidPromptTemplate()
371
+
372
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
373
+ rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
374
+ columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
375
+
376
+ compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
377
+ compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
378
+ cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
379
+ cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
380
+
381
+ # Use the server-provided socket ID here
382
+ templateData = {
383
+ "processID": getProcessID(),
384
+ "socketID": socketID,
385
+ "source": "SDK",
386
+ "rowID": rowID,
387
+ "columnID": columnID,
388
+ "processType": "COST_SAVING",
389
+ "evalType": evalType,
390
+ "dependency": list(inputDict.keys()),
391
+ "costColumnMapping": {
392
+ "compressed_prompt": compressed_prompt_id,
393
+ "compressed_prompt_output": compressed_prompt_output_id,
394
+ "cost": cost_id,
395
+ "cost_saving": cost_saving_id
396
+ },
397
+ "processData": {
398
+ "rowData": {
399
+ "query": {"type": "VARIABLE", "value": ""},
400
+ "context": {"type": "VARIABLE", "value": ""},
401
+ },
402
+ "dependency": list(inputDict.keys()),
403
+ "dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
404
+ "provider": provider,
405
+ "model": model,
406
+ "promptText": promptTemplate,
407
+ "costColumnMapping": {
408
+ "compressed_prompt": compressed_prompt_id,
409
+ "compressed_prompt_output": compressed_prompt_output_id,
410
+ "cost": cost_id,
411
+ "cost_saving": cost_saving_id
412
+ }
413
+ },
414
+ "workspaceID": workspaceID,
415
+ "email": "",
416
+ "playgroundID": activePlayground
417
+ }
418
+
419
+
420
+ # Build query/context from input
421
+ query = ""
422
+ context = ""
423
+
424
+ for key, value in inputDict.items():
425
+ if isinstance(value, str):
426
+ length = len(value.split()) * 1.5
427
+ if length > 50:
428
+ context += f" {key}: {value}, "
429
+ else:
430
+ if promptTemplate:
431
+ populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
432
+ query += f"{populatedPrompt} "
433
+ else:
434
+ query += f" {key}: {value}, "
435
+
436
+ if not context.strip():
437
+ for key, value in inputDict.items():
438
+ context += f" {key}: {value}, "
439
+
440
+ templateData["processData"]["rowData"]["context"]["value"] = context.strip()
441
+ templateData["processData"]["rowData"]["query"]["value"] = query.strip()
442
+
443
+ if promptTemplate and not query.strip():
444
+ templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
445
+
446
+ print(templateData)
447
+ currentBatch.append(templateData)
448
+
449
+ if len(currentBatch) == 10 or index == len(dataframe) - 1:
450
+ self.allBatches.append(currentBatch)
451
+ currentBatch = []
452
+
453
+ # Post all batches
454
+ total_items = sum(len(batch) for batch in self.allBatches)
455
+ print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
456
+
457
+ for cnt, batch in enumerate(self.allBatches):
458
+ print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
459
+ try:
460
+ self.postBatch(batch=batch, workspaceID=workspaceID)
461
+ print(f"Batch {cnt + 1} posted successfully")
462
+ except Exception as e:
463
+ print(f"Error posting batch {cnt + 1}: {str(e)}")
464
+ continue
465
+
466
+ # Small delay between batches to prevent overwhelming the server
467
+ time.sleep(1)
468
+
469
+ # updating the dict for row column mapping
470
+ self.AllProcessMapping()
471
+ # Calculate a reasonable timeout based on the data size
472
+ timeout = max(60, min(600, total_items * 10))
473
+ # print(f"All batches posted. Waiting up to {timeout} seconds for results...")
474
+
475
+ # Listen for results
476
+ self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30)
477
+
478
+ # Get results for this evaluation
479
+ eval_results = self.socket.getReceivedData()
480
+ # print(f"Received {len(eval_results)} results for evaluation '{eval}'")
481
+
482
+ # Add these results to our overall results
483
+ results = self.finalResp(eval_results)
484
+ print(f"======= Completed evaluation: {eval} =======\n")
485
+
486
+ # print("All evaluations completed successfully")
487
+
488
+ except Exception as e:
489
+ print(f"Error during evaluation: {e}")
490
+ raise
491
+ finally:
492
+ # Always disconnect the socket when done
493
+ try:
494
+ self.socket.disconnect()
495
+ # print("Socket disconnected")
496
+ except Exception as e:
497
+ print(f"Error disconnecting socket: {e}")
498
+
499
+ compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
500
+ dataframe["compressed_prompt"] = compressed_prompt
501
+ dataframe["compressed_prompt_output"] = compressed_prompt_output
502
+ dataframe["cost"] = cost
503
+ dataframe["cost_saving"] = cost_saving
504
+ return dataframe
505
+
506
+
507
+ def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, evals = ["Response Correctness"]) -> pd.DataFrame:
508
+ executor = ModelExecutor(apiKey)
509
+
510
+ keys = list(dataset.keys())
511
+ value_combinations = list(itertools.product(*dataset.values()))
512
+ combinations = [dict(zip(keys, values)) for values in value_combinations]
513
+
514
+ results = []
515
+
516
+ # Iterate through combinations
517
+ for combo in combinations:
518
+ for template in templates:
519
+ prompt = template
520
+ for k, v in combo.items():
521
+ prompt = prompt.replace(f"{{{{{k}}}}}", v)
522
+ # Add a row for each model
523
+ for model in model_aliases:
524
+ row = {
525
+ "template": template,
526
+ "prompt": prompt,
527
+ **combo,
528
+ "model": model.value
529
+ }
530
+
531
+
532
+ try:
533
+ provider = getProviderFromModel(model)
534
+ response = executor.execute(provider, model.value, prompt, apiKey)
535
+ row["output"] = response
536
+ except Exception as e:
537
+ row["output"] = f"Error: {str(e)}"
538
+
539
+ results.append(row)
540
+ df=pd.DataFrame(results)
541
+
542
+ print(str(templates[0]))
543
+ res = self.evaluate(df,evals =evals,prompt_template=str(templates[0]))
544
+ return res
545
+
546
+ def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
547
+ prompt_template="Give answer for the given query: {{query}}"):
548
+ if model.lower() not in ["openai", "google"]:
549
+ raise ValueError("Model must be 'openai' or 'google'")
550
+
551
+ # Run unified agent execution
552
+ toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
553
+
554
+ # Perform evaluation
555
+ res = self.evaluate(
556
+ toolResponseDf,
557
+ evals=["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"],
558
+ prompt_template=prompt_template
559
+ )
560
+ return toolResponseDf
561
+
562
+
563
+ class SafeDict(dict):
564
+ def __missing__(self, key):
565
+ return ""