llumo 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +7 -7
- llumo/client.py +565 -554
- llumo/exceptions.py +45 -31
- llumo/execution.py +38 -38
- llumo/functionCalling.py +189 -0
- llumo/helpingFuntions.py +50 -60
- llumo/models.py +42 -42
- llumo/sockets.py +146 -154
- {llumo-0.1.4.dist-info → llumo-0.1.6.dist-info}/METADATA +26 -26
- llumo-0.1.6.dist-info/RECORD +13 -0
- {llumo-0.1.4.dist-info → llumo-0.1.6.dist-info}/WHEEL +1 -1
- {llumo-0.1.4.dist-info → llumo-0.1.6.dist-info}/licenses/LICENSE +4 -4
- llumo/.env +0 -6
- llumo-0.1.4.dist-info/RECORD +0 -13
- {llumo-0.1.4.dist-info → llumo-0.1.6.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -1,554 +1,565 @@
|
|
1
|
-
import requests
|
2
|
-
from .exceptions import
|
3
|
-
import time
|
4
|
-
import re
|
5
|
-
import json
|
6
|
-
import uuid
|
7
|
-
import threading
|
8
|
-
from .helpingFuntions import *
|
9
|
-
from dotenv import load_dotenv
|
10
|
-
import os
|
11
|
-
import itertools
|
12
|
-
import pandas as pd
|
13
|
-
from typing import List, Dict
|
14
|
-
from .models import AVAILABLEMODELS,getProviderFromModel
|
15
|
-
from .execution import ModelExecutor
|
16
|
-
from .sockets import LlumoSocketClient
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
self.
|
34
|
-
self.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
"
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
print(f"
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
#
|
51
|
-
|
52
|
-
print(f"Response
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
print(
|
90
|
-
|
91
|
-
)
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
self.
|
100
|
-
self.
|
101
|
-
self.
|
102
|
-
|
103
|
-
|
104
|
-
print(f"
|
105
|
-
# print(f"-
|
106
|
-
# print(f"-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
"
|
116
|
-
"
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
"
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
# print(
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
pid =
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
)
|
171
|
-
|
172
|
-
print(f"
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
print(f"
|
178
|
-
|
179
|
-
raise
|
180
|
-
|
181
|
-
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
182
|
-
raise
|
183
|
-
|
184
|
-
evalDefinition = self.evalDefinition[eval]
|
185
|
-
model = "GPT_4"
|
186
|
-
provider = "OPENAI"
|
187
|
-
evalType = "LLM"
|
188
|
-
workspaceID = self.workspaceID
|
189
|
-
|
190
|
-
# Prepare all batches before sending
|
191
|
-
print("Preparing batches...")
|
192
|
-
self.allBatches = []
|
193
|
-
currentBatch = []
|
194
|
-
|
195
|
-
for index, row in dataframe.iterrows():
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
)
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
"
|
220
|
-
"
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
"
|
225
|
-
"
|
226
|
-
"
|
227
|
-
"
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
"
|
232
|
-
"
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
"
|
237
|
-
"
|
238
|
-
"
|
239
|
-
"
|
240
|
-
"
|
241
|
-
"
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
templateData["processData"]["executionDependency"]["
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
currentBatch
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
print(f"Posting {len(self.allBatches)} batches ({
|
278
|
-
|
279
|
-
for cnt, batch in enumerate(self.allBatches):
|
280
|
-
print(
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
print(f"
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
)
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
"
|
390
|
-
"
|
391
|
-
"
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
"
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
"
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
"
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
templateData
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
#
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
print(f"
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
1
|
+
import requests
|
2
|
+
from .exceptions import LlumoAIError
|
3
|
+
import time
|
4
|
+
import re
|
5
|
+
import json
|
6
|
+
import uuid
|
7
|
+
import threading
|
8
|
+
from .helpingFuntions import *
|
9
|
+
from dotenv import load_dotenv
|
10
|
+
import os
|
11
|
+
import itertools
|
12
|
+
import pandas as pd
|
13
|
+
from typing import List, Dict
|
14
|
+
from .models import AVAILABLEMODELS,getProviderFromModel
|
15
|
+
from .execution import ModelExecutor
|
16
|
+
from .sockets import LlumoSocketClient
|
17
|
+
from .functionCalling import LlumoAgentExecutor
|
18
|
+
|
19
|
+
|
20
|
+
# 👇 NEW: Explicitly load .env from the package folder
|
21
|
+
envPath = os.path.join(os.path.dirname(__file__), '.env')
|
22
|
+
load_dotenv(dotenv_path=envPath, override=False)# Automatically looks for .env in current directory
|
23
|
+
|
24
|
+
postUrl = os.getenv("postUrl")
|
25
|
+
fetchUrl = os.getenv("fetchUrl")
|
26
|
+
validateUrl = os.getenv("validateUrl")
|
27
|
+
socketUrl = os.getenv("SOCKET_URL")
|
28
|
+
|
29
|
+
|
30
|
+
class LlumoClient:
|
31
|
+
|
32
|
+
def __init__(self, api_key):
|
33
|
+
self.apiKey = api_key
|
34
|
+
self.socket = LlumoSocketClient(socketUrl)
|
35
|
+
self.processMapping = {}
|
36
|
+
|
37
|
+
|
38
|
+
def validateApiKey(self, evalName = ""):
|
39
|
+
headers = {
|
40
|
+
"Authorization": f"Bearer {self.apiKey}",
|
41
|
+
"Content-Type": "application/json",
|
42
|
+
}
|
43
|
+
reqBody = {"analytics": [evalName]}
|
44
|
+
|
45
|
+
# print(f"Making API key validation request to: {validateUrl}")
|
46
|
+
# print(f"Request body: {reqBody}")
|
47
|
+
|
48
|
+
try:
|
49
|
+
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
50
|
+
# print(response.text)
|
51
|
+
# Print response info for debugging
|
52
|
+
# print(f"Response status code: {response.status_code}")
|
53
|
+
# print(f"Response headers: {response.headers}")
|
54
|
+
|
55
|
+
# Try to get at least some of the response content
|
56
|
+
try:
|
57
|
+
response_preview = response.text[:500] # First 500 chars
|
58
|
+
# print(f"Response preview: {response_preview}")
|
59
|
+
except Exception as e:
|
60
|
+
print(f"Could not get response preview: {e}")
|
61
|
+
|
62
|
+
except requests.exceptions.RequestException as e:
|
63
|
+
print(f"Request exception: {str(e)}")
|
64
|
+
raise LlumoAIError.RequestFailed(detail=str(e))
|
65
|
+
|
66
|
+
if response.status_code == 401:
|
67
|
+
raise LlumoAIError.InvalidApiKey()
|
68
|
+
|
69
|
+
# Handle other common status codes
|
70
|
+
if response.status_code == 404:
|
71
|
+
raise LlumoAIError.RequestFailed(
|
72
|
+
detail=f"Endpoint not found (404): {validateUrl}"
|
73
|
+
)
|
74
|
+
|
75
|
+
# if response.status_code >= 500:
|
76
|
+
# raise LlumoAIError.ServerError(
|
77
|
+
# detail=f"Server error ({response.status_code})"
|
78
|
+
# )
|
79
|
+
|
80
|
+
if response.status_code != 200:
|
81
|
+
raise LlumoAIError.RequestFailed(
|
82
|
+
detail=f"Unexpected status code: {response.status_code}"
|
83
|
+
)
|
84
|
+
|
85
|
+
# Try to parse JSON
|
86
|
+
try:
|
87
|
+
data = response.json()
|
88
|
+
except ValueError as e:
|
89
|
+
print(f"JSON parsing error: {str(e)}")
|
90
|
+
# print(f"Response content that could not be parsed: {response.text[:1000]}...")
|
91
|
+
raise LlumoAIError.InvalidJsonResponse()
|
92
|
+
|
93
|
+
if "data" not in data or not data["data"]:
|
94
|
+
# print(f"Invalid API response structure: {data}")
|
95
|
+
raise LlumoAIError.InvalidApiResponse()
|
96
|
+
|
97
|
+
try:
|
98
|
+
self.hitsAvailable = data["data"].get("remainingHits", 0)
|
99
|
+
self.workspaceID = data["data"].get("workspaceID")
|
100
|
+
self.evalDefinition = data["data"].get("analyticsMapping")
|
101
|
+
self.socketToken = data["data"].get("token")
|
102
|
+
|
103
|
+
# print(f"API key validation successful:")
|
104
|
+
# print(f"- Remaining hits: {self.hitsAvailable}")
|
105
|
+
# print(f"- Workspace ID: {self.workspaceID}")
|
106
|
+
# print(f"- Token received: {'Yes' if self.socketToken else 'No'}")
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
# print(f"Error extracting data from response: {str(e)}")
|
110
|
+
raise LlumoAIError.UnexpectedError(detail=str(e))
|
111
|
+
|
112
|
+
def postBatch(self, batch, workspaceID):
|
113
|
+
payload = {
|
114
|
+
"batch": json.dumps(batch),
|
115
|
+
"runType": "EVAL",
|
116
|
+
"workspaceID": workspaceID,
|
117
|
+
}
|
118
|
+
headers = {
|
119
|
+
"Authorization": f"Bearer {self.socketToken}",
|
120
|
+
"Content-Type": "application/json",
|
121
|
+
}
|
122
|
+
try:
|
123
|
+
# print(postUrl)
|
124
|
+
response = requests.post(postUrl, json=payload, headers=headers)
|
125
|
+
# print(f"Post API Status Code: {response.status_code}")
|
126
|
+
# print(response.text)
|
127
|
+
|
128
|
+
except Exception as e:
|
129
|
+
print(f"Error in posting batch: {e}")
|
130
|
+
|
131
|
+
def AllProcessMapping(self):
|
132
|
+
for batch in self.allBatches:
|
133
|
+
for record in batch:
|
134
|
+
rowId = record['rowID']
|
135
|
+
colId = record['columnID']
|
136
|
+
pid = f'{rowId}-{colId}-{colId}'
|
137
|
+
self.processMapping[pid] = record
|
138
|
+
|
139
|
+
|
140
|
+
def finalResp(self,results):
|
141
|
+
seen = set()
|
142
|
+
uniqueResults = []
|
143
|
+
|
144
|
+
for item in results:
|
145
|
+
for rowID in item: # Each item has only one key
|
146
|
+
if rowID not in seen:
|
147
|
+
seen.add(rowID)
|
148
|
+
uniqueResults.append(item)
|
149
|
+
|
150
|
+
return uniqueResults
|
151
|
+
|
152
|
+
def evaluate(self, dataframe, evals=["Response Completeness"], prompt_template="",outputColName = "output"):
|
153
|
+
|
154
|
+
|
155
|
+
results = {}
|
156
|
+
try:
|
157
|
+
# Connect to socket first
|
158
|
+
# print("Connecting to socket server...")
|
159
|
+
socketID = self.socket.connect(timeout=30)
|
160
|
+
# print(f"Connected with socket ID: {socketID}")
|
161
|
+
|
162
|
+
# Store the mapping of row IDs to dataframe indices
|
163
|
+
rowIdMapping = {}
|
164
|
+
|
165
|
+
# Process each evaluation
|
166
|
+
for eval in evals:
|
167
|
+
print(f"\n======= Running evaluation for: {eval} =======")
|
168
|
+
|
169
|
+
try:
|
170
|
+
# print(f"Validating API key for {eval}...")
|
171
|
+
self.validateApiKey(evalName=eval)
|
172
|
+
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
173
|
+
except Exception as e:
|
174
|
+
# print(f"Error during API key validation: {str(e)}")
|
175
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
176
|
+
# print(f"Status code: {e.response.status_code}")
|
177
|
+
# print(f"Response content: {e.response.text[:500]}...")
|
178
|
+
pass
|
179
|
+
raise
|
180
|
+
|
181
|
+
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
182
|
+
raise LlumoAIError.InsufficientCredits()
|
183
|
+
|
184
|
+
evalDefinition = self.evalDefinition[eval]
|
185
|
+
model = "GPT_4"
|
186
|
+
provider = "OPENAI"
|
187
|
+
evalType = "LLM"
|
188
|
+
workspaceID = self.workspaceID
|
189
|
+
|
190
|
+
# Prepare all batches before sending
|
191
|
+
# print("Preparing batches...")
|
192
|
+
self.allBatches = []
|
193
|
+
currentBatch = []
|
194
|
+
|
195
|
+
for index, row in dataframe.iterrows():
|
196
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
197
|
+
groundTruth = row["groundTruth"] if "groundTruth" in dataframe.columns else ""
|
198
|
+
messageHistory = [row["messageHistory"]] if "messageHistory" in dataframe.columns else []
|
199
|
+
promptTemplate = prompt_template
|
200
|
+
|
201
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
202
|
+
|
203
|
+
# raise error if the prompt template is not in expected format
|
204
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
205
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
206
|
+
|
207
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
208
|
+
|
209
|
+
|
210
|
+
output = row[outputColName] if outputColName in dataframe.columns else ""
|
211
|
+
|
212
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
213
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
214
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
215
|
+
|
216
|
+
rowIdMapping[rowID] = index
|
217
|
+
|
218
|
+
templateData = {
|
219
|
+
"processID": getProcessID(),
|
220
|
+
"socketID": socketID,
|
221
|
+
"source": "SDK",
|
222
|
+
"processData": {
|
223
|
+
"executionDependency": {
|
224
|
+
"query": "",
|
225
|
+
"context": "",
|
226
|
+
"output": output,
|
227
|
+
"tools": tools,
|
228
|
+
"groundTruth": groundTruth,
|
229
|
+
"messageHistory": messageHistory,
|
230
|
+
},
|
231
|
+
"definition": evalDefinition,
|
232
|
+
"model": model,
|
233
|
+
"provider": provider,
|
234
|
+
"analytics": eval,
|
235
|
+
},
|
236
|
+
"workspaceID": workspaceID,
|
237
|
+
"type": "EVAL",
|
238
|
+
"evalType": evalType,
|
239
|
+
"kpi": eval,
|
240
|
+
"columnID": columnID,
|
241
|
+
"rowID": rowID,
|
242
|
+
"playgroundID": activePlayground,
|
243
|
+
"processType": "EVAL",
|
244
|
+
}
|
245
|
+
|
246
|
+
query = ""
|
247
|
+
context = ""
|
248
|
+
for key, value in inputDict.items():
|
249
|
+
if isinstance(value, str):
|
250
|
+
length = len(value.split()) * 1.5
|
251
|
+
if length > 50:
|
252
|
+
context += f" {key}: {value}, "
|
253
|
+
else:
|
254
|
+
if promptTemplate:
|
255
|
+
tempObj = {key: value}
|
256
|
+
promptTemplate = getInputPopulatedPrompt(promptTemplate, tempObj)
|
257
|
+
else:
|
258
|
+
query += f" {key}: {value}, "
|
259
|
+
|
260
|
+
if not context.strip():
|
261
|
+
for key, value in inputDict.items():
|
262
|
+
context += f" {key}: {value}, "
|
263
|
+
|
264
|
+
templateData["processData"]["executionDependency"]["context"] = context.strip()
|
265
|
+
templateData["processData"]["executionDependency"]["query"] = query.strip()
|
266
|
+
|
267
|
+
if promptTemplate and not query.strip():
|
268
|
+
templateData["processData"]["executionDependency"]["query"] = promptTemplate
|
269
|
+
|
270
|
+
currentBatch.append(templateData)
|
271
|
+
|
272
|
+
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
273
|
+
self.allBatches.append(currentBatch)
|
274
|
+
currentBatch = []
|
275
|
+
|
276
|
+
totalItems = sum(len(batch) for batch in self.allBatches)
|
277
|
+
# print(f"Posting {len(self.allBatches)} batches ({totalItems } items total)")
|
278
|
+
|
279
|
+
for cnt, batch in enumerate(self.allBatches):
|
280
|
+
# print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
|
281
|
+
try:
|
282
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
283
|
+
# print(f"Batch {cnt + 1} posted successfully")
|
284
|
+
except Exception as e:
|
285
|
+
# print(f"Error posting batch {cnt + 1}: {str(e)}")
|
286
|
+
continue
|
287
|
+
|
288
|
+
time.sleep(1)
|
289
|
+
|
290
|
+
timeout = max(60, min(600, totalItems * 10))
|
291
|
+
# print(f"All batches posted. Waiting up to {timeout} seconds for results...")
|
292
|
+
|
293
|
+
self.socket.listenForResults(
|
294
|
+
min_wait=40, max_wait=timeout, inactivity_timeout=40
|
295
|
+
)
|
296
|
+
|
297
|
+
eval_results = self.socket.getReceivedData()
|
298
|
+
# print(f"Received {len(eval_results)} results for evaluation '{eval}'")
|
299
|
+
|
300
|
+
results[eval] = self.finalResp(eval_results)
|
301
|
+
# print(f"======= Completed evaluation: {eval} =======\n")
|
302
|
+
|
303
|
+
# print("All evaluations completed successfully")
|
304
|
+
|
305
|
+
except Exception as e:
|
306
|
+
# print(f"Error during evaluation: {e}")
|
307
|
+
raise
|
308
|
+
finally:
|
309
|
+
try:
|
310
|
+
self.socket.disconnect()
|
311
|
+
# print("Socket disconnected")
|
312
|
+
except Exception as e:
|
313
|
+
# print(f"Error disconnecting socket: {e}")
|
314
|
+
pass
|
315
|
+
|
316
|
+
for evalName, records in results.items():
|
317
|
+
dataframe[evalName] = None
|
318
|
+
for item in records:
|
319
|
+
for compound_key, value in item.items():
|
320
|
+
rowID = compound_key.split('-')[0]
|
321
|
+
if rowID in rowIdMapping:
|
322
|
+
index = rowIdMapping[rowID]
|
323
|
+
dataframe.at[index, evalName] = value
|
324
|
+
else:
|
325
|
+
print(f"Warning: Could not find rowID {rowID} in mapping")
|
326
|
+
pass
|
327
|
+
|
328
|
+
return dataframe
|
329
|
+
|
330
|
+
def evaluateCompressor(self, dataframe, prompt_template):
|
331
|
+
results = []
|
332
|
+
try:
|
333
|
+
# Connect to socket first
|
334
|
+
# print("Connecting to socket server...")
|
335
|
+
socketID = self.socket.connect(timeout=30)
|
336
|
+
# print(f"Connected with socket ID: {socketID}")
|
337
|
+
|
338
|
+
try:
|
339
|
+
# print(f"Validating API key...")
|
340
|
+
self.validateApiKey()
|
341
|
+
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
342
|
+
except Exception as e:
|
343
|
+
print(f"Error during API key validation: {str(e)}")
|
344
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
345
|
+
print(f"Status code: {e.response.status_code}")
|
346
|
+
print(f"Response content: {e.response.text[:500]}...")
|
347
|
+
raise
|
348
|
+
|
349
|
+
if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
350
|
+
raise LlumoAIError.InsufficientCredits()
|
351
|
+
|
352
|
+
model = "GPT_4"
|
353
|
+
provider = "OPENAI"
|
354
|
+
evalType = "LLUMO"
|
355
|
+
workspaceID = self.workspaceID
|
356
|
+
|
357
|
+
# Prepare all batches before sending
|
358
|
+
# print("Preparing batches...")
|
359
|
+
self.allBatches = []
|
360
|
+
currentBatch = []
|
361
|
+
|
362
|
+
for index, row in dataframe.iterrows():
|
363
|
+
promptTemplate = prompt_template
|
364
|
+
|
365
|
+
# extracting the placeholders from the prompt template
|
366
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
367
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
368
|
+
|
369
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
370
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
371
|
+
|
372
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
373
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
374
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
375
|
+
|
376
|
+
compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
377
|
+
compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
378
|
+
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
379
|
+
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
380
|
+
|
381
|
+
# Use the server-provided socket ID here
|
382
|
+
templateData = {
|
383
|
+
"processID": getProcessID(),
|
384
|
+
"socketID": socketID,
|
385
|
+
"source": "SDK",
|
386
|
+
"rowID": rowID,
|
387
|
+
"columnID": columnID,
|
388
|
+
"processType": "COST_SAVING",
|
389
|
+
"evalType": evalType,
|
390
|
+
"dependency": list(inputDict.keys()),
|
391
|
+
"costColumnMapping": {
|
392
|
+
"compressed_prompt": compressed_prompt_id,
|
393
|
+
"compressed_prompt_output": compressed_prompt_output_id,
|
394
|
+
"cost": cost_id,
|
395
|
+
"cost_saving": cost_saving_id
|
396
|
+
},
|
397
|
+
"processData": {
|
398
|
+
"rowData": {
|
399
|
+
"query": {"type": "VARIABLE", "value": ""},
|
400
|
+
"context": {"type": "VARIABLE", "value": ""},
|
401
|
+
},
|
402
|
+
"dependency": list(inputDict.keys()),
|
403
|
+
"dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
|
404
|
+
"provider": provider,
|
405
|
+
"model": model,
|
406
|
+
"promptText": promptTemplate,
|
407
|
+
"costColumnMapping": {
|
408
|
+
"compressed_prompt": compressed_prompt_id,
|
409
|
+
"compressed_prompt_output": compressed_prompt_output_id,
|
410
|
+
"cost": cost_id,
|
411
|
+
"cost_saving": cost_saving_id
|
412
|
+
}
|
413
|
+
},
|
414
|
+
"workspaceID": workspaceID,
|
415
|
+
"email": "",
|
416
|
+
"playgroundID": activePlayground
|
417
|
+
}
|
418
|
+
|
419
|
+
|
420
|
+
# Build query/context from input
|
421
|
+
query = ""
|
422
|
+
context = ""
|
423
|
+
|
424
|
+
for key, value in inputDict.items():
|
425
|
+
if isinstance(value, str):
|
426
|
+
length = len(value.split()) * 1.5
|
427
|
+
if length > 50:
|
428
|
+
context += f" {key}: {value}, "
|
429
|
+
else:
|
430
|
+
if promptTemplate:
|
431
|
+
populatedPrompt = getInputPopulatedPrompt(promptTemplate, {key: value})
|
432
|
+
query += f"{populatedPrompt} "
|
433
|
+
else:
|
434
|
+
query += f" {key}: {value}, "
|
435
|
+
|
436
|
+
if not context.strip():
|
437
|
+
for key, value in inputDict.items():
|
438
|
+
context += f" {key}: {value}, "
|
439
|
+
|
440
|
+
templateData["processData"]["rowData"]["context"]["value"] = context.strip()
|
441
|
+
templateData["processData"]["rowData"]["query"]["value"] = query.strip()
|
442
|
+
|
443
|
+
if promptTemplate and not query.strip():
|
444
|
+
templateData["processData"]["rowData"]["query"]["value"] = promptTemplate
|
445
|
+
|
446
|
+
print(templateData)
|
447
|
+
currentBatch.append(templateData)
|
448
|
+
|
449
|
+
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
450
|
+
self.allBatches.append(currentBatch)
|
451
|
+
currentBatch = []
|
452
|
+
|
453
|
+
# Post all batches
|
454
|
+
total_items = sum(len(batch) for batch in self.allBatches)
|
455
|
+
print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
|
456
|
+
|
457
|
+
for cnt, batch in enumerate(self.allBatches):
|
458
|
+
print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
|
459
|
+
try:
|
460
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
461
|
+
print(f"Batch {cnt + 1} posted successfully")
|
462
|
+
except Exception as e:
|
463
|
+
print(f"Error posting batch {cnt + 1}: {str(e)}")
|
464
|
+
continue
|
465
|
+
|
466
|
+
# Small delay between batches to prevent overwhelming the server
|
467
|
+
time.sleep(1)
|
468
|
+
|
469
|
+
# updating the dict for row column mapping
|
470
|
+
self.AllProcessMapping()
|
471
|
+
# Calculate a reasonable timeout based on the data size
|
472
|
+
timeout = max(60, min(600, total_items * 10))
|
473
|
+
# print(f"All batches posted. Waiting up to {timeout} seconds for results...")
|
474
|
+
|
475
|
+
# Listen for results
|
476
|
+
self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30)
|
477
|
+
|
478
|
+
# Get results for this evaluation
|
479
|
+
eval_results = self.socket.getReceivedData()
|
480
|
+
# print(f"Received {len(eval_results)} results for evaluation '{eval}'")
|
481
|
+
|
482
|
+
# Add these results to our overall results
|
483
|
+
results = self.finalResp(eval_results)
|
484
|
+
print(f"======= Completed evaluation: {eval} =======\n")
|
485
|
+
|
486
|
+
# print("All evaluations completed successfully")
|
487
|
+
|
488
|
+
except Exception as e:
|
489
|
+
print(f"Error during evaluation: {e}")
|
490
|
+
raise
|
491
|
+
finally:
|
492
|
+
# Always disconnect the socket when done
|
493
|
+
try:
|
494
|
+
self.socket.disconnect()
|
495
|
+
# print("Socket disconnected")
|
496
|
+
except Exception as e:
|
497
|
+
print(f"Error disconnecting socket: {e}")
|
498
|
+
|
499
|
+
compressed_prompt , compressed_prompt_output , cost , cost_saving = costColumnMapping(results,self.processMapping)
|
500
|
+
dataframe["compressed_prompt"] = compressed_prompt
|
501
|
+
dataframe["compressed_prompt_output"] = compressed_prompt_output
|
502
|
+
dataframe["cost"] = cost
|
503
|
+
dataframe["cost_saving"] = cost_saving
|
504
|
+
return dataframe
|
505
|
+
|
506
|
+
|
507
|
+
def run_sweep(self,templates: List[str], dataset: Dict[str, List[str]], model_aliases: List[AVAILABLEMODELS], apiKey: str, evals = ["Response Correctness"]) -> pd.DataFrame:
|
508
|
+
executor = ModelExecutor(apiKey)
|
509
|
+
|
510
|
+
keys = list(dataset.keys())
|
511
|
+
value_combinations = list(itertools.product(*dataset.values()))
|
512
|
+
combinations = [dict(zip(keys, values)) for values in value_combinations]
|
513
|
+
|
514
|
+
results = []
|
515
|
+
|
516
|
+
# Iterate through combinations
|
517
|
+
for combo in combinations:
|
518
|
+
for template in templates:
|
519
|
+
prompt = template
|
520
|
+
for k, v in combo.items():
|
521
|
+
prompt = prompt.replace(f"{{{{{k}}}}}", v)
|
522
|
+
# Add a row for each model
|
523
|
+
for model in model_aliases:
|
524
|
+
row = {
|
525
|
+
"template": template,
|
526
|
+
"prompt": prompt,
|
527
|
+
**combo,
|
528
|
+
"model": model.value
|
529
|
+
}
|
530
|
+
|
531
|
+
|
532
|
+
try:
|
533
|
+
provider = getProviderFromModel(model)
|
534
|
+
response = executor.execute(provider, model.value, prompt, apiKey)
|
535
|
+
row["output"] = response
|
536
|
+
except Exception as e:
|
537
|
+
row["output"] = f"Error: {str(e)}"
|
538
|
+
|
539
|
+
results.append(row)
|
540
|
+
df=pd.DataFrame(results)
|
541
|
+
|
542
|
+
print(str(templates[0]))
|
543
|
+
res = self.evaluate(df,evals =evals,prompt_template=str(templates[0]))
|
544
|
+
return res
|
545
|
+
|
546
|
+
def evaluateAgents(self, dataframe, model, agents, model_api_key=None,
|
547
|
+
prompt_template="Give answer for the given query: {{query}}"):
|
548
|
+
if model.lower() not in ["openai", "google"]:
|
549
|
+
raise ValueError("Model must be 'openai' or 'google'")
|
550
|
+
|
551
|
+
# Run unified agent execution
|
552
|
+
toolResponseDf = LlumoAgentExecutor.run(dataframe, agents, model=model, model_api_key=model_api_key)
|
553
|
+
|
554
|
+
# Perform evaluation
|
555
|
+
res = self.evaluate(
|
556
|
+
toolResponseDf,
|
557
|
+
evals=["Tool Reliability", "Stepwise Progression", "Tool Selection Accuracy", "Final Task Alignment"],
|
558
|
+
prompt_template=prompt_template
|
559
|
+
)
|
560
|
+
return toolResponseDf
|
561
|
+
|
562
|
+
|
563
|
+
class SafeDict(dict):
|
564
|
+
def __missing__(self, key):
|
565
|
+
return ""
|