llumo 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/client.py +166 -174
- llumo/helpingFuntions.py +201 -15
- llumo/sockets.py +1 -1
- {llumo-0.2.11.dist-info → llumo-0.2.13.dist-info}/METADATA +1 -1
- llumo-0.2.13.dist-info/RECORD +13 -0
- {llumo-0.2.11.dist-info → llumo-0.2.13.dist-info}/WHEEL +1 -1
- llumo-0.2.11.dist-info/RECORD +0 -13
- {llumo-0.2.11.dist-info → llumo-0.2.13.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.11.dist-info → llumo-0.2.13.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -30,6 +30,7 @@ class LlumoClient:
|
|
30
30
|
self.apiKey = api_key
|
31
31
|
self.socket = LlumoSocketClient(socketUrl)
|
32
32
|
self.processMapping = {}
|
33
|
+
self.definationMapping = {}
|
33
34
|
|
34
35
|
def validateApiKey(self, evalName=" "):
|
35
36
|
headers = {
|
@@ -87,6 +88,8 @@ class LlumoClient:
|
|
87
88
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
88
89
|
self.subscriptionEndDate = data["data"]["data"].get("subscriptionEndDate", None)
|
89
90
|
self.email = data["data"]["data"].get("email", None)
|
91
|
+
|
92
|
+
self.definationMapping[evalName] = self.evalDefinition
|
90
93
|
|
91
94
|
except Exception as e:
|
92
95
|
# print(f"Error extracting data from response: {str(e)}")
|
@@ -153,6 +156,7 @@ class LlumoClient:
|
|
153
156
|
|
154
157
|
return uniqueResults
|
155
158
|
|
159
|
+
# this function allows the users to run exactl one eval at a time
|
156
160
|
def evaluate(
|
157
161
|
self,
|
158
162
|
dataframe,
|
@@ -314,6 +318,7 @@ class LlumoClient:
|
|
314
318
|
|
315
319
|
for cnt, batch in enumerate(self.allBatches):
|
316
320
|
try:
|
321
|
+
|
317
322
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
318
323
|
# print("Betch Posted with item len: ", len(batch))
|
319
324
|
except Exception as e:
|
@@ -362,38 +367,53 @@ class LlumoClient:
|
|
362
367
|
pd.set_option("future.no_silent_downcasting", True)
|
363
368
|
df = dataframe.fillna("Some error occured").astype(object)
|
364
369
|
|
365
|
-
if createPlayground(email, workspaceID, df):
|
370
|
+
if createPlayground(email, workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping,outputColName=outputColName):
|
366
371
|
print(
|
367
|
-
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
372
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
368
373
|
)
|
369
374
|
else:
|
370
375
|
return dataframe
|
371
376
|
|
377
|
+
# this function allows the users to run multiple evals at once
|
378
|
+
def evaluateMultiple(
|
379
|
+
self,
|
380
|
+
dataframe,
|
381
|
+
eval=["Response Completeness"],
|
382
|
+
prompt_template="",
|
383
|
+
outputColName="output",
|
384
|
+
createExperiment: bool = False,
|
385
|
+
):
|
386
|
+
resultdf = dataframe.copy()
|
387
|
+
for evalName in eval:
|
388
|
+
resultdf = self.evaluate(dataframe = resultdf,eval=evalName,prompt_template=prompt_template,outputColName=outputColName,createExperiment = False)
|
389
|
+
|
390
|
+
if createExperiment:
|
391
|
+
pd.set_option("future.no_silent_downcasting", True)
|
392
|
+
df = resultdf.fillna("Some error occured").astype(object)
|
393
|
+
|
394
|
+
if createPlayground(self.email, self.workspaceID, df,definationMapping=self.definationMapping,outputColName=outputColName,promptText=prompt_template):
|
395
|
+
print(
|
396
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
|
+
)
|
398
|
+
else:
|
399
|
+
return resultdf
|
400
|
+
|
401
|
+
|
372
402
|
def evaluateCompressor(self, dataframe, prompt_template):
|
373
403
|
results = []
|
374
404
|
|
375
405
|
try:
|
376
|
-
# Connect to socket first
|
377
|
-
# print("Connecting to socket server...")
|
378
406
|
socketID = self.socket.connect(timeout=150)
|
379
|
-
|
380
|
-
# Ensure full connection before proceeding
|
381
407
|
max_wait_secs = 20
|
382
408
|
waited_secs = 0
|
383
409
|
while not self.socket._connection_established.is_set():
|
384
410
|
time.sleep(0.1)
|
385
411
|
waited_secs += 0.1
|
386
412
|
if waited_secs >= max_wait_secs:
|
387
|
-
raise RuntimeError(
|
388
|
-
"Timeout waiting for server 'connection-established' event."
|
389
|
-
)
|
390
|
-
|
391
|
-
# print(f"Connected with socket ID: {socketID}")
|
413
|
+
raise RuntimeError("Timeout waiting for server 'connection-established' event.")
|
392
414
|
|
393
415
|
try:
|
394
|
-
# print(f"Validating API key...")
|
395
416
|
self.validateApiKey()
|
396
|
-
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
397
417
|
except Exception as e:
|
398
418
|
print(f"Error during API key validation: {str(e)}")
|
399
419
|
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
@@ -401,61 +421,42 @@ class LlumoClient:
|
|
401
421
|
print(f"Response content: {e.response.text[:500]}...")
|
402
422
|
raise
|
403
423
|
|
404
|
-
|
405
|
-
|
406
|
-
self.workspaceID,
|
407
|
-
self.hasSubscribed,
|
408
|
-
self.trialEndDate,
|
409
|
-
self.subscriptionEndDate,
|
410
|
-
self.hitsAvailable,
|
411
|
-
len(dataframe),
|
412
|
-
)
|
424
|
+
userHits = checkUserHits(self.workspaceID, self.hasSubscribed, self.trialEndDate, self.subscriptionEndDate,
|
425
|
+
self.hitsAvailable, len(dataframe))
|
413
426
|
|
414
|
-
# do not proceed if subscription or trial limit has exhausted
|
415
427
|
if not userHits["success"]:
|
416
428
|
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
417
429
|
|
418
|
-
# if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
419
|
-
# raise LlumoAIError.InsufficientCredits()
|
420
|
-
|
421
430
|
model = "GPT_4"
|
422
431
|
provider = "OPENAI"
|
423
432
|
evalType = "LLUMO"
|
424
433
|
workspaceID = self.workspaceID
|
425
434
|
email = self.email
|
426
|
-
# Prepare all batches before sending
|
427
|
-
# print("Preparing batches...")
|
428
435
|
self.allBatches = []
|
429
436
|
currentBatch = []
|
430
|
-
|
437
|
+
rowIdMapping = {}
|
431
438
|
for index, row in dataframe.iterrows():
|
432
439
|
promptTemplate = prompt_template
|
433
|
-
|
434
|
-
# extracting the placeholders from the prompt template
|
435
440
|
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
436
441
|
inputDict = {key: row[key] for key in keys if key in row}
|
437
442
|
|
438
443
|
if not all([ky in dataframe.columns for ky in keys]):
|
439
444
|
raise LlumoAIError.InvalidPromptTemplate()
|
440
445
|
|
441
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
442
|
-
"-", ""
|
443
|
-
)
|
446
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
444
447
|
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
445
448
|
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
446
449
|
|
447
|
-
compressed_prompt_id = (
|
448
|
-
|
449
|
-
)
|
450
|
-
compressed_prompt_output_id = (
|
451
|
-
f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
452
|
-
)
|
450
|
+
compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
451
|
+
compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
453
452
|
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
454
|
-
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
455
|
-
|
456
|
-
|
453
|
+
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
454
|
+
|
455
|
+
rowDataDict = {}
|
456
|
+
for col in dataframe.columns:
|
457
|
+
val = row[col]
|
458
|
+
rowDataDict[col] = {"type": "VARIABLE", "value": str(val)}
|
457
459
|
|
458
|
-
# Use the server-provided socket ID here
|
459
460
|
templateData = {
|
460
461
|
"processID": getProcessID(),
|
461
462
|
"socketID": socketID,
|
@@ -469,13 +470,10 @@ class LlumoClient:
|
|
469
470
|
"compressed_prompt": compressed_prompt_id,
|
470
471
|
"compressed_prompt_output": compressed_prompt_output_id,
|
471
472
|
"cost": cost_id,
|
472
|
-
"cost_saving": cost_saving_id
|
473
|
+
"cost_saving": cost_saving_id
|
473
474
|
},
|
474
475
|
"processData": {
|
475
|
-
"rowData":
|
476
|
-
"query": {"type": "VARIABLE", "value": ""},
|
477
|
-
"context": {"type": "VARIABLE", "value": ""},
|
478
|
-
},
|
476
|
+
"rowData": rowDataDict,
|
479
477
|
"dependency": list(inputDict.keys()),
|
480
478
|
"dependencyMapping": {ky: ky for ky in list(inputDict.keys())},
|
481
479
|
"provider": provider,
|
@@ -485,128 +483,89 @@ class LlumoClient:
|
|
485
483
|
"compressed_prompt": compressed_prompt_id,
|
486
484
|
"compressed_prompt_output": compressed_prompt_output_id,
|
487
485
|
"cost": cost_id,
|
488
|
-
"cost_saving": cost_saving_id
|
489
|
-
}
|
486
|
+
"cost_saving": cost_saving_id
|
487
|
+
}
|
490
488
|
},
|
491
489
|
"workspaceID": workspaceID,
|
492
490
|
"email": email,
|
493
|
-
"playgroundID": activePlayground
|
491
|
+
"playgroundID": activePlayground
|
494
492
|
}
|
495
493
|
|
496
|
-
|
497
|
-
|
498
|
-
context = ""
|
499
|
-
|
500
|
-
for key, value in inputDict.items():
|
501
|
-
if isinstance(value, str):
|
502
|
-
length = len(value.split()) * 1.5
|
503
|
-
if length > 50:
|
504
|
-
context += f" {key}: {value}, "
|
505
|
-
else:
|
506
|
-
if promptTemplate:
|
507
|
-
populatedPrompt = getInputPopulatedPrompt(
|
508
|
-
promptTemplate, {key: value}
|
509
|
-
)
|
510
|
-
query += f"{populatedPrompt} "
|
511
|
-
else:
|
512
|
-
query += f" {key}: {value}, "
|
513
|
-
|
514
|
-
if not context.strip():
|
515
|
-
for key, value in inputDict.items():
|
516
|
-
context += f" {key}: {value}, "
|
517
|
-
|
518
|
-
templateData["processData"]["rowData"]["context"][
|
519
|
-
"value"
|
520
|
-
] = context.strip()
|
521
|
-
templateData["processData"]["rowData"]["query"]["value"] = query.strip()
|
522
|
-
|
523
|
-
if promptTemplate and not query.strip():
|
524
|
-
templateData["processData"]["rowData"]["query"][
|
525
|
-
"value"
|
526
|
-
] = promptTemplate
|
527
|
-
|
494
|
+
rowIdMapping[rowID] = index
|
495
|
+
# print("__________________________TEMPLATE__________________________________")
|
528
496
|
# print(templateData)
|
497
|
+
|
529
498
|
currentBatch.append(templateData)
|
530
499
|
|
531
|
-
|
532
|
-
|
533
|
-
|
500
|
+
if len(currentBatch) == 10 or index == len(dataframe) - 1:
|
501
|
+
self.allBatches.append(currentBatch)
|
502
|
+
currentBatch = []
|
534
503
|
|
535
|
-
# Post all batches
|
536
504
|
total_items = sum(len(batch) for batch in self.allBatches)
|
537
|
-
# print(f"Posting {len(self.allBatches)} batches ({total_items} items total)")
|
538
505
|
|
539
506
|
for cnt, batch in enumerate(self.allBatches):
|
540
|
-
# print(f"Posting batch {cnt + 1}/{len(self.allBatches)} for eval '{eval}'")
|
541
507
|
try:
|
542
508
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
543
|
-
# print(f"Batch {cnt + 1} posted successfully")
|
544
509
|
except Exception as e:
|
545
510
|
print(f"Error posting batch {cnt + 1}: {str(e)}")
|
546
511
|
continue
|
547
|
-
|
548
|
-
# Small delay between batches to prevent overwhelming the server
|
549
512
|
time.sleep(1)
|
550
513
|
|
551
|
-
# updating the dict for row column mapping
|
552
514
|
self.AllProcessMapping()
|
553
|
-
# Calculate a reasonable timeout based on the data size
|
554
515
|
timeout = max(60, min(600, total_items * 10))
|
555
|
-
|
556
|
-
|
557
|
-
# Listen for results
|
558
|
-
self.socket.listenForResults(
|
559
|
-
min_wait=20,
|
560
|
-
max_wait=timeout,
|
561
|
-
inactivity_timeout=30,
|
562
|
-
expected_results=None,
|
563
|
-
)
|
564
|
-
|
565
|
-
# Get results for this evaluation
|
566
|
-
eval_results = self.socket.getReceivedData()
|
567
|
-
# print(f"Received {len(eval_results)} results for evaluation '{eval}'")
|
516
|
+
self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30, expected_results=None)
|
568
517
|
|
569
|
-
|
570
|
-
results = self.finalResp(eval_results)
|
571
|
-
print(f"======= Completed evaluation: {eval} =======\n")
|
572
|
-
|
573
|
-
# print("All evaluations completed successfully")
|
518
|
+
results = self.socket.getReceivedData()
|
519
|
+
# results = self.finalResp(eval_results)
|
520
|
+
# print(f"======= Completed evaluation: {eval} =======\n")
|
574
521
|
|
575
522
|
except Exception as e:
|
576
523
|
print(f"Error during evaluation: {e}")
|
577
524
|
raise
|
578
525
|
finally:
|
579
|
-
# Always disconnect the socket when done
|
580
526
|
try:
|
581
527
|
self.socket.disconnect()
|
582
|
-
# print("Socket disconnected")
|
583
528
|
except Exception as e:
|
584
529
|
print(f"Error disconnecting socket: {e}")
|
585
530
|
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
531
|
+
dataframe["Compressed Input"] = None
|
532
|
+
for records in results:
|
533
|
+
for compound_key, value in records.items():
|
534
|
+
# for compound_key, value in item['data'].items():
|
535
|
+
rowID = compound_key.split('-')[0]
|
536
|
+
# looking for the index of each rowID , in the original dataframe
|
537
|
+
if rowID in rowIdMapping:
|
538
|
+
index = rowIdMapping[rowID]
|
593
539
|
|
594
|
-
|
540
|
+
dataframe.at[index, "Compressed Input"] = value["value"]
|
541
|
+
|
542
|
+
else:
|
543
|
+
pass
|
544
|
+
# print(f"⚠️ Warning: Could not find rowID {rowID} in mapping")
|
595
545
|
|
546
|
+
# compressed_prompt, compressed_prompt_output, cost, cost_saving = costColumnMapping(results, self.processMapping)
|
547
|
+
# dataframe["compressed_prompt"] = compressed_prompt
|
548
|
+
# dataframe["compressed_prompt_output"] = compressed_prompt_output
|
549
|
+
# dataframe["cost"] = cost
|
550
|
+
# dataframe["cost_saving"] = cost_saving
|
551
|
+
|
552
|
+
return dataframe
|
596
553
|
def run_sweep(
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
554
|
+
self,
|
555
|
+
templates: List[str],
|
556
|
+
dataset: Dict[str, List[str]],
|
557
|
+
model_aliases: List[AVAILABLEMODELS],
|
558
|
+
apiKey: str,
|
559
|
+
eval=["Response Correctness"],
|
560
|
+
toEvaluate: bool = False,
|
561
|
+
createExperiment: bool = False,
|
562
|
+
) -> pd.DataFrame:
|
563
|
+
|
606
564
|
try:
|
607
565
|
self.validateApiKey()
|
608
566
|
except Exception as e:
|
609
|
-
raise "Some error
|
567
|
+
raise Exception("Some error occurred, please check your API key")
|
568
|
+
|
610
569
|
workspaceID = self.workspaceID
|
611
570
|
email = self.email
|
612
571
|
executor = ModelExecutor(apiKey)
|
@@ -616,57 +575,81 @@ class LlumoClient:
|
|
616
575
|
|
617
576
|
results = []
|
618
577
|
|
619
|
-
# Iterate through combinations
|
620
578
|
for combo in combinations:
|
621
579
|
for template in templates:
|
622
580
|
prompt = template
|
623
581
|
for k, v in combo.items():
|
624
582
|
prompt = prompt.replace(f"{{{{{k}}}}}", v)
|
625
|
-
# Add a row for each model
|
626
|
-
for model in model_aliases:
|
627
|
-
row = {
|
628
|
-
"template": template,
|
629
|
-
"prompt": prompt,
|
630
|
-
**combo,
|
631
|
-
"model": model.value,
|
632
|
-
}
|
633
583
|
|
584
|
+
row = {
|
585
|
+
"prompt": prompt,
|
586
|
+
**combo,
|
587
|
+
}
|
588
|
+
|
589
|
+
for i, model in enumerate(model_aliases, 1):
|
634
590
|
try:
|
635
591
|
provider = getProviderFromModel(model)
|
636
|
-
response = executor.execute(
|
637
|
-
|
638
|
-
|
639
|
-
row["output"] = response
|
592
|
+
response = executor.execute(provider, model.value, prompt, apiKey)
|
593
|
+
outputKey = f"output_{i}"
|
594
|
+
row[outputKey] = response
|
640
595
|
except Exception as e:
|
641
|
-
row["
|
596
|
+
row[f"output_{i}"] = str(e)
|
642
597
|
|
643
|
-
|
598
|
+
results.append(row)
|
599
|
+
|
600
|
+
|
601
|
+
|
644
602
|
df = pd.DataFrame(results)
|
645
|
-
if toEvaluate:
|
646
603
|
|
647
|
-
|
604
|
+
|
605
|
+
if toEvaluate==True:
|
606
|
+
dfWithEvals = df.copy()
|
607
|
+
for i, model in enumerate(model_aliases,1):
|
608
|
+
outputColName = f"output_{i}"
|
609
|
+
try:
|
610
|
+
res = self.evaluateMultiple(
|
611
|
+
df,
|
612
|
+
eval=eval,
|
613
|
+
prompt_template=str(templates[0]),
|
614
|
+
outputColName=outputColName,
|
615
|
+
)
|
616
|
+
|
617
|
+
# Rename all new columns with _i+1 (e.g., _1, _2)
|
618
|
+
for evalMetric in eval:
|
619
|
+
scoreCol = f"{evalMetric}"
|
620
|
+
reasonCol = f"{evalMetric} Reason"
|
621
|
+
if scoreCol in res.columns:
|
622
|
+
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
623
|
+
if reasonCol in res.columns:
|
624
|
+
res = res.rename(columns={reasonCol: f"{reasonCol}_{i}"})
|
625
|
+
|
626
|
+
# Drop duplicated columns from df (like prompt, variables, etc.)
|
627
|
+
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
628
|
+
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
629
|
+
|
630
|
+
except Exception as e:
|
631
|
+
print(f"Evaluation failed for model {model.value}: {str(e)}")
|
648
632
|
|
649
633
|
if createExperiment:
|
650
634
|
pd.set_option("future.no_silent_downcasting", True)
|
651
|
-
|
652
|
-
if createPlayground(email, workspaceID,
|
653
|
-
|
654
|
-
|
655
|
-
)
|
635
|
+
dfWithEvals = dfWithEvals.fillna("Some error occurred")
|
636
|
+
if createPlayground(email, workspaceID, dfWithEvals, promptText=templates[0]):
|
637
|
+
|
638
|
+
print("Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
|
656
639
|
else:
|
657
|
-
return
|
658
|
-
|
640
|
+
return dfWithEvals
|
659
641
|
else:
|
660
|
-
if createExperiment:
|
642
|
+
if createExperiment==True:
|
661
643
|
pd.set_option("future.no_silent_downcasting", True)
|
662
|
-
df = df.fillna("Some error
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
else:
|
644
|
+
df = df.fillna("Some error occurred")
|
645
|
+
|
646
|
+
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
647
|
+
print("Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
|
648
|
+
else :
|
668
649
|
return df
|
669
650
|
|
651
|
+
|
652
|
+
# this function generates an output using llm and tools and evaluate that output
|
670
653
|
def evaluateAgents(
|
671
654
|
self,
|
672
655
|
dataframe,
|
@@ -683,6 +666,8 @@ class LlumoClient:
|
|
683
666
|
toolResponseDf = LlumoAgentExecutor.run(
|
684
667
|
dataframe, agents, model=model, model_api_key=model_api_key
|
685
668
|
)
|
669
|
+
|
670
|
+
|
686
671
|
evals = [
|
687
672
|
"Tool Reliability",
|
688
673
|
"Stepwise Progression",
|
@@ -708,34 +693,41 @@ class LlumoClient:
|
|
708
693
|
else:
|
709
694
|
return toolResponseDf
|
710
695
|
|
696
|
+
# this function evaluate that tools output given by the user
|
711
697
|
def evaluateAgentResponses(
|
712
698
|
self,
|
713
699
|
dataframe,
|
714
|
-
|
700
|
+
evals=["Final Task Alignment"],
|
701
|
+
outputColName="output",
|
715
702
|
createExperiment: bool = False,
|
716
703
|
):
|
717
704
|
try:
|
718
705
|
if "query" and "messageHistory" and "tools" not in dataframe.columns:
|
719
706
|
raise ValueError(
|
720
|
-
"DataFrame must contain 'query', 'messageHistory', and 'tools' columns"
|
707
|
+
"DataFrame must contain 'query', 'messageHistory','output' ,and 'tools' columns. Make sure the columns names are same as mentioned here."
|
721
708
|
)
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
709
|
+
prompt_template="Give answer for the given query: {{query}}"
|
710
|
+
|
711
|
+
# evals = [
|
712
|
+
# "Tool Reliability",
|
713
|
+
# "Stepwise Progression",
|
714
|
+
# "Tool Selection Accuracy",
|
715
|
+
# "Final Task Alignment",
|
716
|
+
# ]
|
728
717
|
toolResponseDf = dataframe.copy()
|
729
718
|
for eval in evals:
|
730
719
|
# Perform evaluation
|
731
720
|
toolResponseDf = self.evaluate(
|
732
|
-
toolResponseDf, eval=eval, prompt_template=prompt_template
|
721
|
+
toolResponseDf, eval=eval, prompt_template=prompt_template,outputColName=outputColName
|
733
722
|
)
|
723
|
+
|
724
|
+
|
734
725
|
return toolResponseDf
|
735
726
|
|
736
727
|
except Exception as e:
|
737
728
|
raise e
|
738
729
|
|
730
|
+
|
739
731
|
def runDataStream(
|
740
732
|
self,
|
741
733
|
dataframe,
|
@@ -896,7 +888,7 @@ class LlumoClient:
|
|
896
888
|
pd.set_option("future.no_silent_downcasting", True)
|
897
889
|
df = dataframe.fillna("Some error occured").astype(object)
|
898
890
|
|
899
|
-
if createPlayground(email, workspaceID, df):
|
891
|
+
if createPlayground(email, workspaceID, df,queryColName=queryColName, dataStreamName=streamId):
|
900
892
|
print(
|
901
893
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
902
894
|
)
|
llumo/helpingFuntions.py
CHANGED
@@ -7,6 +7,7 @@ import requests
|
|
7
7
|
import json
|
8
8
|
import base64
|
9
9
|
import os
|
10
|
+
import re
|
10
11
|
|
11
12
|
subscriptionUrl = "https://app.llumo.ai/api/workspace/record-extra-usage"
|
12
13
|
getStreamdataUrl = "https://app.llumo.ai/api/data-stream/all"
|
@@ -18,6 +19,8 @@ uploadColList = (
|
|
18
19
|
uploadRowList = (
|
19
20
|
"https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
|
20
21
|
)
|
22
|
+
createInsightUrl = "https://app.llumo.ai/api/New-Eval-API/insights-api/generate-playground-insights"
|
23
|
+
getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
|
21
24
|
|
22
25
|
|
23
26
|
def getProcessID():
|
@@ -210,30 +213,151 @@ def deleteColumnListInPlayground(workspaceID: str, playgroundID: str):
|
|
210
213
|
return None
|
211
214
|
|
212
215
|
|
213
|
-
def createColumn(workspaceID, dataframe, playgroundID):
|
214
|
-
|
216
|
+
def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColName=None,outputColName= "output",dataStreamName=None,definationMapping=None):
|
215
217
|
if len(dataframe) > 100:
|
216
218
|
dataframe = dataframe.head(100)
|
217
219
|
print("⚠️ Dataframe truncated to 100 rows for upload.")
|
218
220
|
|
219
|
-
playgroundID = playgroundID
|
220
|
-
|
221
221
|
coltemplate = {
|
222
222
|
"workspaceID": workspaceID,
|
223
223
|
"playgroundID": playgroundID,
|
224
224
|
"columnListToUpload": [],
|
225
225
|
}
|
226
|
+
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
|
227
|
+
|
228
|
+
|
229
|
+
|
230
|
+
# Create a mapping of column names to unique column IDs
|
231
|
+
columnIDMapping = {}
|
226
232
|
|
233
|
+
# Iterate over each column in the dataframe
|
227
234
|
for indx, col in enumerate(dataframe.columns):
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
+
# Generate a unique column ID using uuid
|
236
|
+
columnID = str(uuid.uuid4().hex[:8])
|
237
|
+
|
238
|
+
columnIDMapping[col] = columnID
|
239
|
+
|
240
|
+
|
241
|
+
if col.startswith('output'):
|
242
|
+
# For output columns, create the prompt template with promptText
|
243
|
+
if promptText:
|
244
|
+
# Extract variables from promptText and set them as dependencies
|
245
|
+
dependencies = []
|
246
|
+
|
247
|
+
# Find variables inside {{variable}}
|
248
|
+
variables = re.findall(r'{{(.*?)}}', promptText)
|
249
|
+
|
250
|
+
# Loop through each variable and check if it exists as a column name
|
251
|
+
for var in variables:
|
252
|
+
varName = var.strip()
|
253
|
+
if varName in columnIDMapping: # Check if the variable is a column name
|
254
|
+
dependencies.append(columnIDMapping[varName]) # Add its columnID
|
255
|
+
|
256
|
+
# Now update the template for the output column
|
257
|
+
|
258
|
+
template={
|
259
|
+
"provider": "OPENAI",
|
260
|
+
"model": "GPT_4o",
|
261
|
+
"promptText": promptText,
|
262
|
+
"modelOptions": {
|
263
|
+
"temperature": 0,
|
264
|
+
"frequencyPenalty": 0,
|
265
|
+
"presencePenalty": 0,
|
266
|
+
"maxToken": 8192
|
267
|
+
},
|
268
|
+
"toolConfig": "none",
|
269
|
+
"concurrency": "",
|
270
|
+
"outputType": "STRING",
|
271
|
+
"isPromptSelected": True,
|
272
|
+
"isSmartPromptSelected": False,
|
273
|
+
"dependency": dependencies, # Use the dependencies extracted from promptText
|
274
|
+
"columnID": columnID, # Use the generated column ID
|
275
|
+
"label": col,
|
276
|
+
"type": "PROMPT",
|
277
|
+
"order": indx,
|
278
|
+
}
|
279
|
+
|
280
|
+
elif col.startswith('Data '):
|
281
|
+
if queryColName and dataStreamName:
|
282
|
+
dependencies = []
|
283
|
+
dependencies.append(columnIDMapping[queryColName])
|
284
|
+
template = {
|
285
|
+
"variableType": "STRING",
|
286
|
+
"dependency": dependencies,
|
287
|
+
"dataStreamName": dataStreamName,
|
288
|
+
"query": columnIDMapping[queryColName],
|
289
|
+
"columnID": columnID, # Use the generated column ID
|
290
|
+
"label": "Data stream",
|
291
|
+
"type": "DATA_STREAM",
|
292
|
+
"order": indx}
|
293
|
+
|
294
|
+
elif col in allEvals:
|
295
|
+
|
296
|
+
dependencies = []
|
297
|
+
variables = re.findall(r'{{(.*?)}}', promptText)
|
298
|
+
|
299
|
+
# Loop through each variable and check if it exists as a column name
|
300
|
+
for var in variables:
|
301
|
+
varName = var.strip()
|
302
|
+
if varName in columnIDMapping: # Check if the variable is a column name
|
303
|
+
dependencies.append(columnIDMapping[varName])
|
304
|
+
|
305
|
+
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
306
|
+
longDef = definationMapping.get(col, {}).get(col, "")
|
307
|
+
shortDef ="You have run this from SDK"
|
308
|
+
enum = col.upper().replace(" ","_")
|
309
|
+
|
310
|
+
template = {
|
311
|
+
"analytics": [
|
312
|
+
col.lower().replace(" ","_")
|
313
|
+
],
|
314
|
+
"evaluationMetric": "ALL",
|
315
|
+
"evaluationModel": "GEMINI_PRO",
|
316
|
+
"selectPrompt": columnIDMapping[outputColName],
|
317
|
+
"scoreCondition": "GREATER_THAN",
|
318
|
+
"scoreValue": "50",
|
319
|
+
"scoreResult": "PASS",
|
320
|
+
"llmKpi": col,
|
321
|
+
"setRules": True,
|
322
|
+
"type": "EVAL",
|
323
|
+
"evalType": "LLM",
|
324
|
+
"similarityMetric": None,
|
325
|
+
"embeddingModel": None,
|
326
|
+
"groundTruth": None,
|
327
|
+
"dataStream": None,
|
328
|
+
"context": None,
|
329
|
+
"dependency": [columnIDMapping[outputColName]],
|
330
|
+
"hallucinationFields": {
|
331
|
+
"query": None,
|
332
|
+
"context": None,
|
333
|
+
"output": None
|
334
|
+
},
|
335
|
+
"definition": longDef,
|
336
|
+
"analyticsENUM": enum,
|
337
|
+
"prompt": shortDef,
|
338
|
+
"analyticsName": col,
|
339
|
+
"columnID": str(uuid.uuid4().hex[:8]),
|
340
|
+
"label": col,
|
341
|
+
"order": indx
|
342
|
+
}
|
343
|
+
|
344
|
+
|
345
|
+
|
346
|
+
|
347
|
+
else:
|
348
|
+
|
349
|
+
template = {
|
350
|
+
"label": col, # Label is the column name
|
351
|
+
"type": "VARIABLE", # Default type for non-output columns
|
352
|
+
"variableType": "STRING",
|
353
|
+
"order": indx,
|
354
|
+
"columnID": columnID, # Use the generated column ID
|
355
|
+
}
|
356
|
+
|
357
|
+
# Append the template to the column list
|
235
358
|
coltemplate["columnListToUpload"].append(template)
|
236
359
|
|
360
|
+
# Prepare the row template structure
|
237
361
|
rowTemplate = {
|
238
362
|
"workspaceID": workspaceID,
|
239
363
|
"playgroundID": playgroundID,
|
@@ -241,14 +365,22 @@ def createColumn(workspaceID, dataframe, playgroundID):
|
|
241
365
|
"columnList": coltemplate["columnListToUpload"],
|
242
366
|
}
|
243
367
|
|
368
|
+
# Populate dataToUploadList with rows from the dataframe
|
244
369
|
for indx, row in dataframe.iterrows():
|
245
|
-
row_dict =
|
370
|
+
row_dict = {}
|
371
|
+
|
372
|
+
# For each column, we need to map the column ID to the corresponding value in the row
|
373
|
+
for col in dataframe.columns:
|
374
|
+
columnID = columnIDMapping[col] # Get the columnID from the mapping
|
375
|
+
row_dict[columnID] = row[col] # Map the columnID to the value in the row
|
376
|
+
|
377
|
+
# Add the row index (if necessary)
|
246
378
|
row_dict["pIndex"] = indx
|
247
379
|
rowTemplate["dataToUploadList"].append(row_dict)
|
248
380
|
|
381
|
+
# Return the column template, row template, and the column ID mapping
|
249
382
|
return coltemplate, rowTemplate
|
250
383
|
|
251
|
-
|
252
384
|
def uploadColumnListInPlayground(payload):
|
253
385
|
url = uploadColList
|
254
386
|
headers = {
|
@@ -292,10 +424,11 @@ def uploadRowsInDBPlayground(payload):
|
|
292
424
|
return None
|
293
425
|
|
294
426
|
|
295
|
-
def createPlayground(email, workspaceID, df):
|
427
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output"):
|
428
|
+
|
296
429
|
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
297
430
|
payload1, payload2 = createColumn(
|
298
|
-
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId
|
431
|
+
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName
|
299
432
|
)
|
300
433
|
deleteExistingRows = deleteColumnListInPlayground(
|
301
434
|
workspaceID=workspaceID, playgroundID=playgroundId
|
@@ -305,3 +438,56 @@ def createPlayground(email, workspaceID, df):
|
|
305
438
|
|
306
439
|
if rowListUpload:
|
307
440
|
return True
|
441
|
+
|
442
|
+
|
443
|
+
|
444
|
+
def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
445
|
+
headers = {
|
446
|
+
|
447
|
+
"Content-Type": "application/json",
|
448
|
+
}
|
449
|
+
|
450
|
+
# Initial request to generate playground insights
|
451
|
+
payload = {
|
452
|
+
"activePlayground": activePlayground,
|
453
|
+
"workspaceID": workspaceID,
|
454
|
+
}
|
455
|
+
|
456
|
+
urlGenerate = createInsightUrl
|
457
|
+
|
458
|
+
responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
|
459
|
+
|
460
|
+
if responseGenerate.status_code == 200:
|
461
|
+
responseJson = responseGenerate.json()
|
462
|
+
|
463
|
+
insightStatus = responseJson.get("data", {}).get("insight", False)
|
464
|
+
|
465
|
+
if insightStatus:
|
466
|
+
# If insight is true, request to get all playground insights
|
467
|
+
urlGetAll = getPlaygroundInsightsUrl
|
468
|
+
|
469
|
+
responseGetAll = requests.post(urlGetAll, json=payload, headers=headers)
|
470
|
+
|
471
|
+
if responseGetAll.status_code == 200:
|
472
|
+
responseAllJson = responseGetAll.json()
|
473
|
+
|
474
|
+
data = responseAllJson.get("data", {}).get("data", [])
|
475
|
+
|
476
|
+
# Extract insight and solution
|
477
|
+
insights = []
|
478
|
+
for item in data:
|
479
|
+
insight = item.get("insight", "")
|
480
|
+
solution = item.get("solution", "")
|
481
|
+
insights.append({"insight": insight, "solution": solution})
|
482
|
+
|
483
|
+
return insights
|
484
|
+
else:
|
485
|
+
print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
|
486
|
+
return None
|
487
|
+
else:
|
488
|
+
print("No insight generated.")
|
489
|
+
return None
|
490
|
+
else:
|
491
|
+
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
492
|
+
return None
|
493
|
+
|
llumo/sockets.py
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
+
llumo/client.py,sha256=mVqxNyYMbHlL9lnkpw_H6_n98WYO38ZcLZNnQX1TjUE,35948
|
3
|
+
llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
|
4
|
+
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
+
llumo/helpingFuntions.py,sha256=ah0FUQcRV3gfguvjQQ_aZzq59hpJttqAPJdjJVNYdFc,17110
|
7
|
+
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
+
llumo/sockets.py,sha256=i90l2rr08paa-ifKy2E5YMIS8r3yRBmu2CUOjhFKork,5579
|
9
|
+
llumo-0.2.13.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
+
llumo-0.2.13.dist-info/METADATA,sha256=W-ZVsTKhiPw6strlNQEqyufgOLfV9ZCjCpY6Dj3Qc94,1491
|
11
|
+
llumo-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
llumo-0.2.13.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
+
llumo-0.2.13.dist-info/RECORD,,
|
llumo-0.2.11.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=_TpTvzt5ox5MUCZ5O07AY6DYrtIhtezEDAE6nG7nUjs,35632
|
3
|
-
llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=lG_d3lQgJj6pI7v1YdLqdPojrLCNwybKz29zXrGaL5k,9090
|
7
|
-
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
-
llumo/sockets.py,sha256=Qxxqtx3Hg07HLhA4QfcipK1ChiOYhHZBu02iA6MfYlQ,5579
|
9
|
-
llumo-0.2.11.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.11.dist-info/METADATA,sha256=ymWr0L9PnYitu0FnBAZ2NsJnlOxhcsUoUVA8Yn-DXoQ,1491
|
11
|
-
llumo-0.2.11.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
12
|
-
llumo-0.2.11.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|