llumo 0.2.15b1__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llumo/__init__.py CHANGED
@@ -4,4 +4,5 @@ from .helpingFuntions import *
4
4
  from .models import AVAILABLEMODELS
5
5
  from .execution import ModelExecutor
6
6
  from .functionCalling import *
7
-
7
+ from .openai import openai
8
+ from .google import genai
llumo/chains.py ADDED
@@ -0,0 +1,73 @@
1
+ import pandas as pd
2
+ from .helpingFuntions import *
3
+
4
+ class LlumoDataFrameResults(pd.DataFrame):
5
+ _metadata=["evals","evalData","definationMapping"]
6
+
7
+ def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
8
+ self.evals = evals or []
9
+ self.evalData= evalData or []
10
+ self.definationMapping= definationMapping or {}
11
+ super().__init__(*args, **kwargs)
12
+
13
+ @property
14
+ def _constructor(self):
15
+ # Needed so slicing operations return the same type
16
+ return LlumoDataFrameResults
17
+
18
+ def insights(self):
19
+
20
+ if not self.evalData:
21
+ print("No raw data available. Please run evaluateMultiple() first.")
22
+ return None
23
+ try:
24
+ insights=[]
25
+ reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
26
+
27
+ for evalname in self.evals:
28
+ uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
29
+ allReasons = []
30
+ for edgeCase in reasonData[evalname]:
31
+ allReasons.extend(reasonData[evalname][edgeCase])
32
+
33
+ evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
34
+
35
+ insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
36
+ return insights
37
+ except Exception as e:
38
+
39
+ print("Can not genrate insights for this eval, please try again later.")
40
+
41
+
42
+ class LlumoDictResults(list):
43
+ _metadata=["evals","evalData","definationMapping"]
44
+
45
+ def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
46
+ self.evals = evals or []
47
+ self.evalData= evalData or []
48
+ self.definationMapping= definationMapping or {}
49
+ super().__init__(*args, **kwargs) # This will handle list[dict]
50
+
51
+ def insights(self):
52
+
53
+ if not self.evalData:
54
+ print("No raw data available. Please run evaluateMultiple() first.")
55
+ return None
56
+ try:
57
+ insights=[]
58
+ reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
59
+ for evalname in self.evals:
60
+ uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
61
+ allReasons = []
62
+ for edgeCase in reasonData[evalname]:
63
+ allReasons.extend(reasonData[evalname][edgeCase])
64
+ evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
65
+ insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
66
+ return insights
67
+ except Exception as e:
68
+ print("Can not genrate insights for this eval, please try again later.")
69
+
70
+
71
+ for _cls in (LlumoDataFrameResults, LlumoDictResults):
72
+ _cls.__name__ = "LlumoResults"
73
+ _cls.__qualname__ = "LlumoResults"
llumo/client.py CHANGED
@@ -5,20 +5,23 @@ import time
5
5
  import re
6
6
  import json
7
7
  import uuid
8
-
8
+ import warnings
9
9
  import os
10
10
  import itertools
11
11
  import pandas as pd
12
12
  from typing import List, Dict
13
- from .models import AVAILABLEMODELS, getProviderFromModel
13
+ from .models import AVAILABLEMODELS, getProviderFromModel, Provider
14
14
  from .execution import ModelExecutor
15
15
  from .exceptions import LlumoAIError
16
16
  from .helpingFuntions import *
17
17
  from .sockets import LlumoSocketClient
18
18
  from .functionCalling import LlumoAgentExecutor
19
+ from .chains import LlumoDataFrameResults,LlumoDictResults
19
20
  import threading
20
21
  from tqdm import tqdm
21
22
 
23
+ pd.set_option('future.no_silent_downcasting', True)
24
+
22
25
  postUrl = (
23
26
  "https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
24
27
  )
@@ -38,7 +41,8 @@ class LlumoClient:
38
41
 
39
42
  def __init__(self, api_key):
40
43
  self.apiKey = api_key
41
-
44
+ self.evalData=[]
45
+ self.evals=[]
42
46
  self.processMapping = {}
43
47
  self.definationMapping = {}
44
48
 
@@ -50,7 +54,7 @@ class LlumoClient:
50
54
  reqBody = {"analytics": [evalName]}
51
55
 
52
56
  try:
53
- print(reqBody)
57
+
54
58
  response = requests.post(url=validateUrl, json=reqBody, headers=headers)
55
59
 
56
60
  except requests.exceptions.RequestException as e:
@@ -393,27 +397,51 @@ class LlumoClient:
393
397
  outputColName=outputColName,
394
398
  ):
395
399
  print(
396
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
400
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
397
401
  )
398
402
  else:
399
403
  return dataframe
400
404
 
401
405
  # this function allows the users to run multiple evals at once
402
406
 
403
- def evaluateCompressor(self, data, prompt_template):
407
+ def compressor(self, data, prompt_template):
404
408
  results = []
409
+ if isinstance(data, dict):
410
+ data = [data]
411
+ elif not isinstance(data, list):
412
+ raise ValueError("Data should be a dict or a list of dicts.")
413
+
405
414
  dataframe = pd.DataFrame(data)
415
+
406
416
  try:
407
- socketID = self.socket.connect(timeout=150)
417
+ self.socket = LlumoSocketClient(socketUrl)
418
+ dataframe = pd.DataFrame(data).astype(str)
419
+ socketID = self.socket.connect(timeout=250)
420
+
421
+ # Wait for socket connection
408
422
  max_wait_secs = 20
409
423
  waited_secs = 0
410
424
  while not self.socket._connection_established.is_set():
411
425
  time.sleep(0.1)
412
426
  waited_secs += 0.1
413
427
  if waited_secs >= max_wait_secs:
414
- raise RuntimeError(
415
- "Timeout waiting for server 'connection-established' event."
416
- )
428
+ raise RuntimeError("Timeout waiting for server connection")
429
+
430
+ # Start listener thread
431
+ expectedResults = len(dataframe)
432
+ # print("expected result" ,expectedResults)
433
+ timeout = max(100, min(150, expectedResults * 10))
434
+ listener_thread = threading.Thread(
435
+ target=self.socket.listenForResults,
436
+ kwargs={
437
+ "min_wait": 40,
438
+ "max_wait": timeout,
439
+ "inactivity_timeout": 10,
440
+ "expected_results": expectedResults,
441
+ },
442
+ daemon=True,
443
+ )
444
+ listener_thread.start()
417
445
 
418
446
  try:
419
447
  self.validateApiKey()
@@ -508,7 +536,7 @@ class LlumoClient:
508
536
  "playgroundID": activePlayground,
509
537
  }
510
538
 
511
- rowIdMapping[rowID] = index
539
+ rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
512
540
  # print("__________________________TEMPLATE__________________________________")
513
541
  # print(templateData)
514
542
 
@@ -537,7 +565,19 @@ class LlumoClient:
537
565
  expected_results=None,
538
566
  )
539
567
 
540
- results = self.socket.getReceivedData()
568
+ rawResults = self.socket.getReceivedData()
569
+ receivedRowIDs = {key for item in rawResults for key in item.keys()}
570
+ expectedRowIDs = set(rowIdMapping.keys())
571
+ missingRowIDs = expectedRowIDs - receivedRowIDs
572
+ # print("All expected keys:", expected_rowIDs)
573
+ # print("All received keys:", received_rowIDs)
574
+ # print("Missing keys:", len(missingRowIDs))
575
+ missingRowIDs = list(missingRowIDs)
576
+
577
+ if len(missingRowIDs) > 0:
578
+ dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
579
+ rawResults.extend(dataFromDb)
580
+
541
581
  # results = self.finalResp(eval_results)
542
582
  # print(f"======= Completed evaluation: {eval} =======\n")
543
583
 
@@ -551,10 +591,10 @@ class LlumoClient:
551
591
  print(f"Error disconnecting socket: {e}")
552
592
 
553
593
  dataframe["Compressed Input"] = None
554
- for records in results:
594
+ for records in rawResults:
555
595
  for compound_key, value in records.items():
556
596
  # for compound_key, value in item['data'].items():
557
- rowID = compound_key.split("-")[0]
597
+ rowID = compound_key
558
598
  # looking for the index of each rowID , in the original dataframe
559
599
  if rowID in rowIdMapping:
560
600
  index = rowIdMapping[rowID]
@@ -577,16 +617,24 @@ class LlumoClient:
577
617
  self,
578
618
  data,
579
619
  evals: list, # list of eval metric names
580
- prompt_template="",
620
+ prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
581
621
  outputColName="output",
582
622
  createExperiment: bool = False,
623
+ getDataFrame:bool =False,
583
624
  _tocheck=True,
584
625
  ):
626
+ if isinstance(data, dict):
627
+ data = [data]
628
+ elif not isinstance(data, list):
629
+ raise ValueError("Data should be a dict or a list of dicts.")
630
+
585
631
  self.socket = LlumoSocketClient(socketUrl)
586
632
  dataframe = pd.DataFrame(data).astype(str)
587
633
  workspaceID = None
588
634
  email = None
589
635
  socketID = self.socket.connect(timeout=250)
636
+ self.evalData=[]
637
+ self.evals=evals
590
638
  self.allBatches = []
591
639
  rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
592
640
 
@@ -614,10 +662,14 @@ class LlumoClient:
614
662
  daemon=True,
615
663
  )
616
664
  listener_thread.start()
617
-
618
- activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
619
- "-", ""
620
- )
665
+ self.validateApiKey(evalName=evals[0])
666
+ if createExperiment:
667
+ activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
668
+
669
+ else:
670
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
671
+ "-", ""
672
+ )
621
673
  for evalName in evals:
622
674
  # print(f"\n======= Running evaluation for: {evalName} =======")
623
675
 
@@ -776,10 +828,8 @@ class LlumoClient:
776
828
  rawResults.extend(dataFromDb)
777
829
 
778
830
 
831
+ self.evalData = rawResults
779
832
 
780
-
781
-
782
-
783
833
  # Initialize dataframe columns for each eval
784
834
  for eval in evals:
785
835
  dataframe[eval] = None
@@ -799,7 +849,12 @@ class LlumoClient:
799
849
 
800
850
  if createExperiment:
801
851
  pd.set_option("future.no_silent_downcasting", True)
802
- df = dataframe.fillna("Some error occured").astype(object)
852
+ # df = dataframe.fillna("Some error occured").astype(object)
853
+ with warnings.catch_warnings():
854
+ warnings.simplefilter(action='ignore', category=FutureWarning)
855
+ df = dataframe.fillna("Some error occurred").astype(str)
856
+
857
+ df = dataframe.fillna("Some error occured").infer_objects(copy=False)
803
858
  if createPlayground(
804
859
  email,
805
860
  workspaceID,
@@ -807,13 +862,19 @@ class LlumoClient:
807
862
  promptText=prompt_template,
808
863
  definationMapping=self.definationMapping,
809
864
  outputColName=outputColName,
865
+ activePlayground= activePlayground
810
866
  ):
811
867
  print(
812
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
868
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
813
869
  )
814
870
  else:
815
- return dataframe
871
+ if getDataFrame:
872
+ return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
873
+ else:
874
+ data=dataframe.to_dict(orient="records")
875
+ return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
816
876
 
877
+
817
878
  def promptSweep(
818
879
  self,
819
880
  templates: List[str],
@@ -823,9 +884,14 @@ class LlumoClient:
823
884
  evals=["Response Correctness"],
824
885
  toEvaluate: bool = False,
825
886
  createExperiment: bool = False,
887
+ getDataFrame = False
826
888
 
827
889
 
828
890
  ) -> pd.DataFrame:
891
+ if isinstance(dataset, dict):
892
+ dataset = [dataset]
893
+ elif not isinstance(dataset, list):
894
+ raise ValueError("Data should be a dict or a list of dicts.")
829
895
 
830
896
  modelStatus = validateModels(model_aliases=model_aliases)
831
897
  if modelStatus["status"]== False:
@@ -878,6 +944,8 @@ class LlumoClient:
878
944
  prompt_template=str(templates[0]),
879
945
  outputColName=outputColName,
880
946
  _tocheck=False,
947
+ getDataFrame=True,
948
+ createExperiment = False
881
949
  )
882
950
 
883
951
  # Rename all new columns with _i+1 (e.g., _1, _2)
@@ -910,10 +978,17 @@ class LlumoClient:
910
978
  ):
911
979
 
912
980
  print(
913
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
981
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
914
982
  )
915
983
  else:
916
- return dfWithEvals
984
+ if getDataFrame:
985
+ return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
986
+ definationMapping=self.definationMapping)
987
+ else:
988
+ data = dfWithEvals.to_dict(orient="records")
989
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
990
+
991
+
917
992
  else:
918
993
  if createExperiment == True:
919
994
  pd.set_option("future.no_silent_downcasting", True)
@@ -921,10 +996,18 @@ class LlumoClient:
921
996
 
922
997
  if createPlayground(email, workspaceID, df, promptText=templates[0]):
923
998
  print(
924
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
999
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
925
1000
  )
926
1001
  else:
927
- return df
1002
+ if getDataFrame:
1003
+ return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
1004
+ definationMapping=self.definationMapping)
1005
+ else:
1006
+ data = df.to_dict(orient="records")
1007
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
1008
+ definationMapping=self.definationMapping)
1009
+
1010
+
928
1011
 
929
1012
  # this function generates an output using llm and tools and evaluate that output
930
1013
  def evaluateAgents(
@@ -936,8 +1019,14 @@ class LlumoClient:
936
1019
  evals=["Final Task Alignment"],
937
1020
  prompt_template="Give answer for the given query: {{query}}",
938
1021
  createExperiment: bool = False,
1022
+ getDataFrame:bool = False
939
1023
 
940
1024
  ):
1025
+ if isinstance(data, dict):
1026
+ data = [data]
1027
+ elif not isinstance(data, list):
1028
+ raise ValueError("Data should be a dict or a list of dicts.")
1029
+
941
1030
  if model.lower() not in ["openai", "google"]:
942
1031
  raise ValueError("Model must be 'openai' or 'google'")
943
1032
 
@@ -961,27 +1050,37 @@ class LlumoClient:
961
1050
  toolResponseDf.to_dict(orient="records"),
962
1051
  evals=evals,
963
1052
  prompt_template=prompt_template,
964
- createExperiment=False,
1053
+ createExperiment=createExperiment,
1054
+ getDataFrame=getDataFrame
1055
+
965
1056
  )
966
1057
 
967
- if createExperiment:
968
- pd.set_option("future.no_silent_downcasting", True)
969
- df = toolResponseDf.fillna("Some error occured")
970
- if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
971
- print(
972
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
973
- )
974
- else:
975
- return toolResponseDf
1058
+ return toolResponseDf
1059
+ # if createExperiment:
1060
+ # pd.set_option("future.no_silent_downcasting", True)
1061
+ # df = toolResponseDf.fillna("Some error occured")
1062
+ # if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
1063
+ # print(
1064
+ # "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1065
+ # )
1066
+ # else:
1067
+ # return toolResponseDf
976
1068
 
977
1069
  # this function evaluate that tools output given by the user
978
1070
  def evaluateAgentResponses(
979
1071
  self,
980
1072
  data,
981
1073
  evals=["Final Task Alignment"],
982
- outputColName="output",
983
1074
  createExperiment: bool = False,
1075
+ getDataFrame = False,
1076
+ outputColName="output"
1077
+
984
1078
  ):
1079
+ if isinstance(data, dict):
1080
+ data = [data]
1081
+ elif not isinstance(data, list):
1082
+ raise ValueError("Data should be a dict or a list of dicts.")
1083
+
985
1084
  dataframe = pd.DataFrame(data)
986
1085
 
987
1086
  try:
@@ -1001,7 +1100,9 @@ class LlumoClient:
1001
1100
  evals=evals,
1002
1101
  prompt_template="Give answer for the given query: {{query}}",
1003
1102
  outputColName=outputColName,
1004
- createExperiment=createExperiment
1103
+ createExperiment=createExperiment,
1104
+ getDataFrame = getDataFrame
1105
+
1005
1106
  )
1006
1107
  if createExperiment:
1007
1108
  pass
@@ -1022,8 +1123,14 @@ class LlumoClient:
1022
1123
  prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
1023
1124
  evals=["Context Utilization"],
1024
1125
  toEvaluate=False,
1025
- generateOutput=True
1126
+ generateOutput=True,
1127
+ getDataFrame = False
1026
1128
  ):
1129
+ if isinstance(data, dict):
1130
+ data = [data]
1131
+ elif not isinstance(data, list):
1132
+ raise ValueError("Data should be a dict or a list of dicts.")
1133
+
1027
1134
  # Validate required parameters
1028
1135
  if generateOutput:
1029
1136
  if not modelAliases:
@@ -1147,23 +1254,25 @@ class LlumoClient:
1147
1254
  outputEvalMapping = None
1148
1255
  if toEvaluate:
1149
1256
  for evalName in evals:
1150
-
1151
1257
  # Validate API and dependencies
1152
1258
  self.validateApiKey(evalName=evalName)
1153
1259
  metricDependencies = checkDependency(
1154
- evalName, list(working_df.columns), tocheck=True
1260
+ evalName, list(working_df.columns), tocheck=False
1155
1261
  )
1156
1262
  if not metricDependencies["status"]:
1157
1263
  raise LlumoAIError.dependencyError(metricDependencies["message"])
1158
1264
 
1159
- working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
1160
-
1265
+ working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
1161
1266
 
1267
+
1162
1268
  self.socket.disconnect()
1163
1269
 
1164
1270
  # Create experiment if required
1165
1271
  if createExperiment:
1166
- df = working_df.fillna("Some error occured").astype(object)
1272
+ # df = working_df.fillna("Some error occured").astype(object)
1273
+ with warnings.catch_warnings():
1274
+ warnings.simplefilter(action='ignore', category=FutureWarning)
1275
+ df = working_df.fillna("Some error occurred").astype(str)
1167
1276
  if createPlayground(
1168
1277
  email, workspaceID, df,
1169
1278
  queryColName=queryColName,
@@ -1173,10 +1282,22 @@ class LlumoClient:
1173
1282
  evalOutputMap=outputEvalMapping
1174
1283
  ):
1175
1284
  print(
1176
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
1285
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
1177
1286
  else:
1178
- self.latestDataframe = working_df
1179
- return working_df
1287
+ if getDataFrame == True and toEvaluate == True:
1288
+ return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
1289
+ definationMapping=self.definationMapping)
1290
+
1291
+ elif getDataFrame == False and toEvaluate == True:
1292
+ data = working_df.to_dict(orient="records")
1293
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
1294
+ definationMapping=self.definationMapping)
1295
+
1296
+ elif getDataFrame== True and toEvaluate == False:
1297
+ return working_df
1298
+
1299
+ elif getDataFrame == False and toEvaluate == False :
1300
+ return working_df.to_dict(orient = "records")
1180
1301
 
1181
1302
  def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
1182
1303
  executor = ModelExecutor(apiKey)
@@ -1189,45 +1310,81 @@ class LlumoClient:
1189
1310
  inputDict = {key: row[key] for key in inputVariables}
1190
1311
  for i, model in enumerate(modelAliases, 1):
1191
1312
  try:
1313
+
1192
1314
  provider = getProviderFromModel(model)
1315
+ if provider == Provider.OPENAI:
1316
+ validateOpenaiKey(apiKey)
1317
+ elif provider == Provider.GOOGLE:
1318
+ validateGoogleKey(apiKey)
1319
+
1193
1320
  filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
1194
1321
  response = executor.execute(provider, model.value, filled_template, apiKey)
1195
1322
  df.at[indx, f"output_{i}"] = response
1323
+
1196
1324
  except Exception as e:
1197
- df.at[indx, f"output_{i}"] = str(e)
1325
+ # df.at[indx, f"output_{i}"] = str(e)
1326
+ raise e
1327
+
1198
1328
  return df
1199
1329
 
1200
- def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
1330
+ def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
1201
1331
  dfWithEvals = df.copy()
1202
-
1203
1332
  outputColMapping = {}
1204
1333
 
1205
- for i, model in enumerate(modelAliases, 1):
1206
- outputColName = f"output_{i}"
1207
- try:
1334
+ if generateOutput:
1335
+ # Evaluate per model output
1336
+ for i, model in enumerate(modelAliases, 1):
1337
+ outputColName = f"output_{i}"
1338
+ try:
1339
+ res = self.evaluateMultiple(
1340
+ dfWithEvals.to_dict("records"),
1341
+ evals=evals,
1342
+ prompt_template=prompt_template,
1343
+ outputColName=outputColName,
1344
+ _tocheck=False,
1345
+ getDataFrame=True,
1346
+ createExperiment=False
1347
+ )
1348
+
1349
+ for evalMetric in evals:
1350
+ scoreCol = f"{evalMetric}"
1351
+ reasonCol = f"{evalMetric} Reason"
1352
+ if scoreCol in res.columns:
1353
+ res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
1354
+ if reasonCol in res.columns:
1355
+ res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
1208
1356
 
1357
+ outputColMapping[f"{scoreCol}_{i}"] = outputColName
1358
+
1359
+ newCols = [col for col in res.columns if col not in dfWithEvals.columns]
1360
+ dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
1361
+
1362
+ except Exception as e:
1363
+ print(f"Evaluation failed for model {model.value}: {str(e)}")
1364
+
1365
+ else:
1366
+ # Evaluate only once on "output" column
1367
+ try:
1368
+ outputColName = "output"
1209
1369
  res = self.evaluateMultiple(
1210
1370
  dfWithEvals.to_dict("records"),
1211
1371
  evals=evals,
1212
1372
  prompt_template=prompt_template,
1213
1373
  outputColName=outputColName,
1214
1374
  _tocheck=False,
1375
+ getDataFrame=True,
1376
+ createExperiment=False
1215
1377
  )
1216
1378
  for evalMetric in evals:
1217
1379
  scoreCol = f"{evalMetric}"
1218
1380
  reasonCol = f"{evalMetric} Reason"
1219
-
1220
- if scoreCol in res.columns:
1221
- res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
1222
- if reasonCol in res.columns:
1223
- res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
1224
-
1225
- outputColMapping[f"{scoreCol}_{i}"] = outputColName
1381
+ outputColMapping[scoreCol] = "output"
1226
1382
 
1227
1383
  newCols = [col for col in res.columns if col not in dfWithEvals.columns]
1228
1384
  dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
1229
1385
  except Exception as e:
1230
- print(f"Evaluation failed for model {model.value}: {str(e)}")
1386
+ print(f"Evaluation failed: {str(e)}")
1387
+
1231
1388
  return dfWithEvals, outputColMapping
1232
1389
 
1233
1390
  def runDataStream(
@@ -1236,8 +1393,13 @@ class LlumoClient:
1236
1393
  streamName: str,
1237
1394
  queryColName: str = "query",
1238
1395
  createExperiment: bool = False,
1396
+ getDataFrame = False
1239
1397
  ):
1240
1398
 
1399
+ if isinstance(data, dict):
1400
+ data = [data]
1401
+ elif not isinstance(data, list):
1402
+ raise ValueError("Data should be a dict or a list of dicts.")
1241
1403
 
1242
1404
  # Copy the original dataframe
1243
1405
  original_df = pd.DataFrame(data)
@@ -1354,10 +1516,16 @@ class LlumoClient:
1354
1516
  definationMapping=self.definationMapping,
1355
1517
  ):
1356
1518
  print(
1357
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
1519
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
1358
1520
  else:
1359
- self.latestDataframe = working_df
1360
- return working_df
1521
+ if getDataFrame:
1522
+ return working_df
1523
+
1524
+ else:
1525
+ data = working_df.to_dict(orient="records")
1526
+ return data
1527
+ # self.latestDataframe = working_df
1528
+ # return working_df
1361
1529
 
1362
1530
 
1363
1531
  def createExperiment(self, dataframe):
@@ -1367,12 +1535,12 @@ class LlumoClient:
1367
1535
  flag = createPlayground(self.email, self.workspaceID, dataframe)
1368
1536
  if flag:
1369
1537
  print(
1370
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
1538
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1371
1539
  )
1372
1540
  except Exception as e:
1373
1541
  raise "Some error ocuured please check your API key"
1374
1542
 
1375
- def upload(self, file_path):
1543
+ def uploadfile(self, file_path):
1376
1544
 
1377
1545
  workspaceID = None
1378
1546
  email = None
@@ -1407,13 +1575,34 @@ class LlumoClient:
1407
1575
  if createPlayground(self.email, self.workspaceID, df):
1408
1576
 
1409
1577
  print(
1410
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
1578
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1411
1579
  )
1412
1580
 
1413
1581
  return True
1414
1582
 
1415
1583
  except Exception as e:
1416
1584
  print(f"Error: {e}")
1585
+
1586
+ def upload(self,data):
1587
+ try:
1588
+ if isinstance(data, dict):
1589
+ data = [data]
1590
+ # Check if data is now a list of dictionaries
1591
+ if isinstance(data, list) and all(isinstance(item, dict) for item in data):
1592
+ dataframe = pd.DataFrame(data).astype(str)
1593
+ else:
1594
+ raise ValueError("Data must be a dictionary or a list of dictionaries.")
1595
+ self.validateApiKey()
1596
+ if createPlayground(self.email, self.workspaceID, dataframe):
1597
+ print(
1598
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1599
+ )
1600
+ return True
1601
+
1602
+ except Exception as e:
1603
+ print(f"Error: {e}")
1604
+ return False
1605
+
1417
1606
 
1418
1607
 
1419
1608
  class SafeDict(dict):
llumo/execution.py CHANGED
@@ -25,15 +25,14 @@ class ModelExecutor:
25
25
  return response.choices[0].message.content
26
26
 
27
27
  def _executeGoogle(self, modelName: str, prompt: str,api_key) -> str:
28
-
28
+
29
29
  # Configure GenAI with API Key
30
30
  genai.configure(api_key=api_key)
31
-
31
+
32
32
  # Select Generative Model
33
33
  model = genai.GenerativeModel("gemini-2.0-flash-lite")
34
34
  # Generate Response
35
35
  response = model.generate_content(prompt)
36
36
  return response.text
37
-
38
-
39
-
37
+
38
+
llumo/google.py ADDED
@@ -0,0 +1,66 @@
1
+ from google import generativeai as _genai
2
+ from .client import LlumoClient
3
+
4
+
5
+ def evaluate_multiple(data, api_key=None, evals=["Response Correctness"]):
6
+ client = LlumoClient(api_key=api_key)
7
+ results = client.evaluateMultiple(
8
+ data,
9
+ evals=evals,
10
+ createExperiment=False,
11
+ prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
12
+ getDataFrame=False
13
+ )
14
+ return results
15
+
16
+
17
+ class ChatCompletionWithEval:
18
+ def __init__(self, response, evaluation):
19
+ self._response = response
20
+ self.evaluation = evaluation
21
+
22
+ def __getattr__(self, name):
23
+ return getattr(self._response, name)
24
+
25
+ def __getitem__(self, key):
26
+ return self._response[key]
27
+
28
+ def __repr__(self):
29
+ return repr(self._response)
30
+
31
+
32
+ class genai:
33
+ class GenerativeModel:
34
+ def __init__(self, api_key: str, model: str = "gemini-2.5-flash"):
35
+ _genai.configure(api_key=api_key)
36
+ self._api_key = api_key
37
+ self._model_name = model
38
+ self._model_instance = _genai.GenerativeModel(model_name=model)
39
+
40
+ def generate_content(self, contents: str | list[str], **kwargs):
41
+ context = kwargs.pop("context", None)
42
+ evals = kwargs.pop("evals", [])
43
+ llumo_key = kwargs.pop("llumo_key", None)
44
+
45
+ # Run Gemini generation
46
+ response = self._model_instance.generate_content(contents=contents, **kwargs)
47
+ output = response.text
48
+
49
+ eval_input = [{
50
+ "query": contents,
51
+ "context": context or contents,
52
+ "output": output,
53
+ }]
54
+
55
+ evaluation = None
56
+ try:
57
+ evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
58
+ except Exception as e:
59
+ evaluation = None
60
+
61
+ if evaluation is None:
62
+ print("Cannot process your request for evaluation, please check your api and try again later.")
63
+ return response
64
+
65
+
66
+ return ChatCompletionWithEval(response, evaluation)
llumo/helpingFuntions.py CHANGED
@@ -8,6 +8,9 @@ import json
8
8
  import base64
9
9
  import os
10
10
  import re
11
+ import openai
12
+ import google.generativeai as genai
13
+ from collections import defaultdict
11
14
 
12
15
 
13
16
  from .models import _MODEL_METADATA, AVAILABLEMODELS
@@ -21,8 +24,7 @@ uploadColList = (
21
24
  uploadRowList = (
22
25
  "https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
23
26
  )
24
- createInsightUrl = "https://app.llumo.ai/api/New-Eval-API/insights-api/generate-playground-insights"
25
- getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
27
+ createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
26
28
 
27
29
 
28
30
  def getProcessID():
@@ -227,7 +229,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
227
229
  }
228
230
  allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
229
231
 
230
-
232
+ evalDependencies = checkDependency(_returnDepMapping=True)
231
233
 
232
234
  # Create a mapping of column names to unique column IDs
233
235
  columnIDMapping = {}
@@ -240,46 +242,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
240
242
  columnIDMapping[col] = columnID
241
243
 
242
244
 
243
- if col.startswith('output') and promptText!=None:
244
- # For output columns, create the prompt template with promptText
245
- if promptText:
246
- # Extract variables from promptText and set them as dependencies
247
- dependencies = []
248
-
249
- # Find variables inside {{variable}}
250
- variables = re.findall(r'{{(.*?)}}', promptText)
251
-
252
- # Loop through each variable and check if it exists as a column name
253
- for var in variables:
254
- varName = var.strip()
255
- if varName in columnIDMapping: # Check if the variable is a column name
256
- dependencies.append(columnIDMapping[varName]) # Add its columnID
257
-
258
- # Now update the template for the output column
259
-
260
- template={
261
- "provider": "OPENAI",
262
- "model": "GPT_4o",
263
- "promptText": promptText,
264
- "modelOptions": {
265
- "temperature": 0,
266
- "frequencyPenalty": 0,
267
- "presencePenalty": 0,
268
- "maxToken": 8192
269
- },
270
- "toolConfig": "none",
271
- "concurrency": "",
272
- "outputType": "STRING",
273
- "isPromptSelected": True,
274
- "isSmartPromptSelected": False,
275
- "dependency": dependencies, # Use the dependencies extracted from promptText
276
- "columnID": columnID, # Use the generated column ID
277
- "label": col,
278
- "type": "PROMPT",
279
- "order": indx,
280
- }
281
-
282
- elif col.startswith('context') and dataStreamName != None :
245
+ # if col.startswith('output') and promptText!=None:
246
+ # # For output columns, create the prompt template with promptText
247
+ # if promptText:
248
+ # # Extract variables from promptText and set them as dependencies
249
+ # dependencies = []
250
+ #
251
+ # # Find variables inside {{variable}}
252
+ # variables = re.findall(r'{{(.*?)}}', promptText)
253
+ #
254
+ # # Loop through each variable and check if it exists as a column name
255
+ # for var in variables:
256
+ # varName = var.strip()
257
+ # if varName in columnIDMapping: # Check if the variable is a column name
258
+ # dependencies.append(columnIDMapping[varName]) # Add its columnID
259
+ #
260
+ # # Now update the template for the output column
261
+ #
262
+ # template={
263
+ # "provider": "OPENAI",
264
+ # "model": "GPT_4o",
265
+ # "promptText": promptText,
266
+ # "modelOptions": {
267
+ # "temperature": 0,
268
+ # "frequencyPenalty": 0,
269
+ # "presencePenalty": 0,
270
+ # "maxToken": 8192
271
+ # },
272
+ # "toolConfig": "none",
273
+ # "concurrency": "",
274
+ # "outputType": "STRING",
275
+ # "isPromptSelected": True,
276
+ # "isSmartPromptSelected": False,
277
+ # "dependency": dependencies, # Use the dependencies extracted from promptText
278
+ # "columnID": columnID, # Use the generated column ID
279
+ # "label": col,
280
+ # "type": "PROMPT",
281
+ # "order": indx,
282
+ # }
283
+
284
+ if col.startswith('context') and dataStreamName != None :
283
285
  if queryColName and dataStreamName:
284
286
  dependencies = []
285
287
  dependencies.append(columnIDMapping[queryColName])
@@ -310,9 +312,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
310
312
 
311
313
  dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
312
314
 
313
- longDef = definationMapping.get(col, {}).get('definition', "")
314
- shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
315
- enum = col.upper().replace(" ","_")
315
+ longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
316
+ shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
317
+ enum = col.rsplit("_",1)[0].upper().replace(" ","_")
316
318
 
317
319
  template = {
318
320
  "analytics": [
@@ -320,20 +322,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
320
322
  ],
321
323
  "evaluationMetric": "ALL",
322
324
  "evaluationModel": "LLUMO_EVALLM",
323
- "selectPrompt": columnIDMapping[outputColName],
325
+ "selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
324
326
  "scoreCondition": "GREATER_THAN",
325
327
  "scoreValue": "50",
326
328
  "scoreResult": "PASS",
327
- "llmKpi": col,
329
+ "llmKpi": col.rsplit("_",1)[0],
328
330
  "setRules": True,
329
331
  "type": "EVAL",
330
332
  "evalType": "LLM",
331
333
  "similarityMetric": None,
332
334
  "embeddingModel": None,
333
- "groundTruth": None,
335
+ "groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
334
336
  "dataStream": None,
335
- "context": None,
336
- "dependency": [columnIDMapping[outputColName]],
337
+ "context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
338
+ "dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
339
+ "query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
340
+ "tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
341
+ "messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
337
342
  "hallucinationFields": {
338
343
  "query": None,
339
344
  "context": None,
@@ -342,7 +347,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
342
347
  "definition": longDef,
343
348
  "analyticsENUM": enum,
344
349
  "prompt": shortDef,
345
- "analyticsName": col,
350
+ "analyticsName": col.rsplit("_",1)[0],
346
351
  "columnID": columnID,
347
352
  "label": col,
348
353
  "order": indx
@@ -378,12 +383,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
378
383
  row_dict = {}
379
384
 
380
385
  # For each column, we need to map the column ID to the corresponding value in the row
381
- print(dataframe.columns)
386
+
382
387
  for col in dataframe.columns:
383
388
  columnID = columnIDMapping[col]
384
389
 
385
390
  if any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText!=None:
386
- print(col)
391
+
387
392
  row_dict[columnID] = {
388
393
 
389
394
  "value": row[col],
@@ -450,9 +455,12 @@ def uploadRowsInDBPlayground(payload):
450
455
  return None
451
456
 
452
457
 
453
- def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
458
+ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
454
459
 
455
- playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
460
+ if activePlayground != None:
461
+ playgroundId=activePlayground
462
+ else:
463
+ playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
456
464
  payload1, payload2 = createColumn(
457
465
  workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
458
466
  )
@@ -470,7 +478,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
470
478
 
471
479
 
472
480
 
473
- def getPlaygroundInsights(workspaceID: str, activePlayground: str):
481
+ def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
474
482
  headers = {
475
483
 
476
484
  "Content-Type": "application/json",
@@ -478,48 +486,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
478
486
 
479
487
  # Initial request to generate playground insights
480
488
  payload = {
481
- "activePlayground": activePlayground,
482
- "workspaceID": workspaceID,
489
+ "uniqueClassesString": uniqueClassesString,
490
+ "reasonList": reasonList,
491
+ "definition": defination,
483
492
  }
484
493
 
485
494
  urlGenerate = createInsightUrl
486
-
487
- responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
488
-
489
- if responseGenerate.status_code == 200:
490
- responseJson = responseGenerate.json()
491
-
492
- insightStatus = responseJson.get("data", {}).get("insight", False)
495
+ try:
496
+ responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
493
497
 
494
- if insightStatus:
495
- # If insight is true, request to get all playground insights
496
- urlGetAll = getPlaygroundInsightsUrl
498
+ if responseGenerate.status_code == 200:
499
+ responseJson = responseGenerate.json()
497
500
 
498
- responseGetAll = requests.post(urlGetAll, json=payload, headers=headers)
499
-
500
- if responseGetAll.status_code == 200:
501
- responseAllJson = responseGetAll.json()
502
-
503
- data = responseAllJson.get("data", {}).get("data", [])
504
-
505
- # Extract insight and solution
506
- insights = []
507
- for item in data:
508
- insight = item.get("insight", "")
509
- solution = item.get("solution", "")
510
- insights.append({"insight": insight, "solution": solution})
511
-
512
- return insights
513
- else:
514
- print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
515
- return None
516
- else:
517
- print("No insight generated.")
518
- return None
501
+ filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
502
+
503
+ return filteredResponse
504
+ except Exception as e:
505
+ print(f"Exception occurred while generating insight: {e}")
506
+ return None
507
+
519
508
  else:
520
509
  print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
521
510
  return None
522
- def checkDependency(selectedEval, columns,tocheck=True):
511
+ def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
523
512
  """
524
513
  Checks if all the required input columns for the selected evaluation metric are present.
525
514
 
@@ -530,33 +519,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
530
519
  Raises:
531
520
  - LlumoAIError.dependencyError: If any required column is missing.
532
521
  """
533
- if tocheck:
534
522
  # Define required dependencies for each evaluation metric
535
- metricDependencies = {
536
- 'Response Completeness': ['context', 'query', 'output'],
537
- 'Response Bias': ['output'],
538
- 'Response Harmfulness': ['output'],
539
- 'Input Toxicity': ['query'],
540
- 'Input Harmfulness': ['query'],
541
- 'Context Utilization': ['output', 'context'],
542
- 'Relevance Retention': ['context', 'query'],
543
- 'Semantic Cohesion': ['context'],
544
- 'Final Task Alignment': ['messageHistory'],
545
- 'Tool Reliability': ['messageHistory'],
546
- 'Response Correctness': ['output', 'query', 'context'],
547
- 'Response Toxicity': ['output'],
548
- 'Input Bias': ['query'],
549
- 'Input Relevancy': ['context', 'query'],
550
- 'Redundancy Reduction': ['context'],
551
- 'Response Sentiment': ['output'],
552
- 'Tool Selection Accuracy': ['tools', 'messageHistory'],
553
- 'Stepwise Progression': ['tools', 'messageHistory'],
554
- 'Hallucination': ['query', 'context', 'output'],
555
- 'Groundedness': ['groundTruth', 'output'],
556
- 'Memory Utilization': ['context', 'messageHistory'],
557
- 'Input Relevancy (Multi-turn)': ['context', 'query']
558
- }
523
+ metricDependencies = {
524
+ 'Response Completeness': ['context', 'query', 'output'],
525
+ 'Response Bias': ['output'],
526
+ 'Response Harmfulness': ['output'],
527
+ 'Input Toxicity': ['query'],
528
+ 'Input Harmfulness': ['query'],
529
+ 'Context Utilization': ['output', 'context'],
530
+ 'Relevance Retention': ['context', 'query'],
531
+ 'Semantic Cohesion': ['context'],
532
+ 'Final Task Alignment': ['messageHistory'],
533
+ 'Tool Reliability': ['messageHistory'],
534
+ 'Response Correctness': ['output', 'query', 'context'],
535
+ 'Response Toxicity': ['output'],
536
+ 'Input Bias': ['query'],
537
+ 'Input Relevancy': ['context', 'query'],
538
+ 'Redundancy Reduction': ['context'],
539
+ 'Response Sentiment': ['output'],
540
+ 'Tool Selection Accuracy': ['tools', 'messageHistory'],
541
+ 'Stepwise Progression': ['tools', 'messageHistory'],
542
+ 'Hallucination': ['query', 'context', 'output'],
543
+ 'Groundedness': ['groundTruth', 'output'],
544
+ 'Memory Utilization': ['context', 'messageHistory'],
545
+ 'Input Relevancy (Multi-turn)': ['context', 'query']
546
+ }
547
+ if _returnDepMapping == True:
548
+ return metricDependencies
559
549
 
550
+ if tocheck == True:
560
551
  # Check if the selected evaluation metric is known
561
552
  if selectedEval not in metricDependencies:
562
553
  return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
@@ -632,3 +623,56 @@ def validateModels(model_aliases):
632
623
 
633
624
 
634
625
 
626
+ def validateOpenaiKey(api_key):
627
+ try:
628
+ client = openai.OpenAI(api_key=api_key)
629
+ _ = client.models.list() # Light call to list models
630
+ except openai.AuthenticationError:
631
+ raise ValueError("❌ Invalid OpenAI API key.")
632
+ except Exception as e:
633
+ raise RuntimeError(f"⚠️ Error validating OpenAI key: {e}")
634
+
635
+ def validateGoogleKey(api_key):
636
+ try:
637
+ genai.configure(api_key=api_key)
638
+ _ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
639
+ except Exception as e:
640
+ if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
641
+ raise ValueError("❌ Invalid Google API key.")
642
+ raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
643
+
644
+ def groupLogsByClass(logs, max_logs=2):
645
+ # Initialize the final result structures (no defaultdict)
646
+ groupedLogs = {}
647
+ uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
648
+
649
+ # Iterate through the logs
650
+ for log in logs:
651
+ log_details = list(log.values())[0] # Get the details dictionary
652
+ eval_name = log_details.get("kpi", "unmarked")
653
+ edge_case = log_details.get("edgeCase", "unmarked")
654
+ reasoning = log_details.get("reasoning", "")
655
+
656
+ if eval_name != "unmarked" and edge_case != "unmarked":
657
+ # Ensure that the eval_name and edge_case exist in the dictionary
658
+ if eval_name not in groupedLogs:
659
+ groupedLogs[eval_name] = {}
660
+ uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
661
+
662
+ if edge_case not in groupedLogs[eval_name]:
663
+ groupedLogs[eval_name][edge_case] = []
664
+
665
+ # Append the reasoning to the correct place
666
+ groupedLogs[eval_name][edge_case].append(reasoning)
667
+ uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
668
+
669
+ # Limit the number of reasons to max_logs
670
+ for eval_name in groupedLogs:
671
+ for edge_case in groupedLogs[eval_name]:
672
+ groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
673
+
674
+ # Convert the set of unique edge cases to a list for easier reading
675
+ for eval_name in uniqueEdgeCases:
676
+ uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
677
+
678
+ return groupedLogs, uniqueEdgeCases
llumo/openai.py ADDED
@@ -0,0 +1,78 @@
1
+ from openai import OpenAI as OpenAIClient
2
+ from .client import LlumoClient
3
+
4
+ # Dummy evaluation function that uses LlumoClient
5
+ def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
6
+ client = LlumoClient(api_key=api_key)
7
+ results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
8
+ return results
9
+
10
+ # Wrapper around ChatCompletion to allow custom fields like `.evaluation`
11
+ class ChatCompletionWithEval:
12
+ def __init__(self, response, evaluation):
13
+ self._response = response
14
+ self.evaluation = evaluation
15
+
16
+ def __getattr__(self, name):
17
+ return getattr(self._response, name)
18
+
19
+ def __getitem__(self, key):
20
+ return self._response[key]
21
+
22
+ def __repr__(self):
23
+ return repr(self._response)
24
+
25
+ class openai(OpenAIClient):
26
+ def __init__(self, api_key: str):
27
+ super().__init__(api_key=api_key)
28
+
29
+ original_create = self.chat.completions.create
30
+
31
+ class ChatCompletionsWrapper:
32
+ @staticmethod
33
+ def create(*args, **kwargs):
34
+ context = kwargs.pop("context", None)
35
+ evals = kwargs.pop("evals", [])
36
+ llumo_key = kwargs.pop("llumo_key", None)
37
+
38
+ messages = kwargs.get("messages", [])
39
+ user_message = next(
40
+ (m.get("content") for m in reversed(messages) if m.get("role") == "user"),
41
+ "",
42
+ )
43
+
44
+ # If context is None or empty or whitespace-only, set it to user_message
45
+ if not context or context.strip() == "":
46
+ context = user_message
47
+
48
+ response = original_create(*args, **kwargs)
49
+
50
+ try:
51
+ output_text = response.choices[0].message.content
52
+ except Exception:
53
+ output_text = ""
54
+
55
+ eval_input = [{
56
+ "query": user_message,
57
+ "context": context,
58
+ "output": output_text,
59
+ }]
60
+
61
+ # Safely call evaluate_multiple, if error return None
62
+ evaluation = None
63
+ try:
64
+ evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
65
+ except Exception as e:
66
+ # You can optionally log the error here if you want
67
+ # print(f"Evaluation failed, skipping: {e}")
68
+ evaluation = None
69
+
70
+ # If evaluation is None, just return normal response
71
+ if evaluation is None:
72
+ print("Cannot process your request for evaluation, please check your api and try again later.")
73
+ return response
74
+
75
+ # Otherwise wrap with evaluation attached
76
+ return ChatCompletionWithEval(response, evaluation)
77
+
78
+ self.chat.completions.create = ChatCompletionsWrapper.create
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llumo
3
- Version: 0.2.15b1
3
+ Version: 0.2.16
4
4
  Summary: Python SDK for interacting with the Llumo ai API.
5
5
  Home-page: https://www.llumo.ai/
6
6
  Author: Llumo
@@ -0,0 +1,16 @@
1
+ llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
2
+ llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
3
+ llumo/client.py,sha256=20xn-RVaWwEwEi2pFaf_ZWF6OJhctQoxc707NAzTcQk,62532
4
+ llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
5
+ llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
6
+ llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
7
+ llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
8
+ llumo/helpingFuntions.py,sha256=fqwtTNuDYiERB1_ljme-Ldc7WduU6g4Zl54O2INW7dQ,25241
9
+ llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
10
+ llumo/openai.py,sha256=DGhEwQIJIIycGpw3hYQnyxdj6RFVpZ-gay-fZGqtkhU,3013
11
+ llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
12
+ llumo-0.2.16.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
13
+ llumo-0.2.16.dist-info/METADATA,sha256=GXu8pBXbYQ_EWRRPlAfAqaqqSl1Lb6raDCceRo8f8Qs,1519
14
+ llumo-0.2.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ llumo-0.2.16.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
16
+ llumo-0.2.16.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
2
- llumo/client.py,sha256=XljwD5mZxjyrXHhu8YhN0cGsd-O_LyKbPzrhS8zbqZo,53778
3
- llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
4
- llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
5
- llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
6
- llumo/helpingFuntions.py,sha256=0-ZwG0fnbfP4DP1JTMewM8LdXzz_-p1gRqhPsX0Zmpk,22785
7
- llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
8
- llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
9
- llumo-0.2.15b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
10
- llumo-0.2.15b1.dist-info/METADATA,sha256=yDLkiD46Qq44PA3ylKK2dzsXZmnuE23yxH0RmoqizOk,1521
11
- llumo-0.2.15b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- llumo-0.2.15b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
13
- llumo-0.2.15b1.dist-info/RECORD,,