llumo 0.2.15b1__py3-none-any.whl → 0.2.16b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llumo/__init__.py CHANGED
@@ -4,4 +4,5 @@ from .helpingFuntions import *
4
4
  from .models import AVAILABLEMODELS
5
5
  from .execution import ModelExecutor
6
6
  from .functionCalling import *
7
-
7
+ from .openai import openai
8
+ from .google import genai
llumo/chains.py ADDED
@@ -0,0 +1,73 @@
1
+ import pandas as pd
2
+ from .helpingFuntions import *
3
+
4
+ class LlumoDataFrameResults(pd.DataFrame):
5
+ _metadata=["evals","evalData","definationMapping"]
6
+
7
+ def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
8
+ self.evals = evals or []
9
+ self.evalData= evalData or []
10
+ self.definationMapping= definationMapping or {}
11
+ super().__init__(*args, **kwargs)
12
+
13
+ @property
14
+ def _constructor(self):
15
+ # Needed so slicing operations return the same type
16
+ return LlumoDataFrameResults
17
+
18
+ def insights(self):
19
+
20
+ if not self.evalData:
21
+ print("No raw data available. Please run evaluateMultiple() first.")
22
+ return None
23
+ try:
24
+ insights=[]
25
+ reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
26
+
27
+ for evalname in self.evals:
28
+ uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
29
+ allReasons = []
30
+ for edgeCase in reasonData[evalname]:
31
+ allReasons.extend(reasonData[evalname][edgeCase])
32
+
33
+ evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
34
+
35
+ insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
36
+ return insights
37
+ except Exception as e:
38
+
39
+ print("Can not genrate insights for this eval, please try again later.")
40
+
41
+
42
+ class LlumoDictResults(list):
43
+ _metadata=["evals","evalData","definationMapping"]
44
+
45
+ def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
46
+ self.evals = evals or []
47
+ self.evalData= evalData or []
48
+ self.definationMapping= definationMapping or {}
49
+ super().__init__(*args, **kwargs) # This will handle list[dict]
50
+
51
+ def insights(self):
52
+
53
+ if not self.evalData:
54
+ print("No raw data available. Please run evaluateMultiple() first.")
55
+ return None
56
+ try:
57
+ insights=[]
58
+ reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
59
+ for evalname in self.evals:
60
+ uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
61
+ allReasons = []
62
+ for edgeCase in reasonData[evalname]:
63
+ allReasons.extend(reasonData[evalname][edgeCase])
64
+ evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
65
+ insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
66
+ return insights
67
+ except Exception as e:
68
+ print("Can not genrate insights for this eval, please try again later.")
69
+
70
+
71
+ for _cls in (LlumoDataFrameResults, LlumoDictResults):
72
+ _cls.__name__ = "LlumoResults"
73
+ _cls.__qualname__ = "LlumoResults"
llumo/client.py CHANGED
@@ -5,20 +5,23 @@ import time
5
5
  import re
6
6
  import json
7
7
  import uuid
8
-
8
+ import warnings
9
9
  import os
10
10
  import itertools
11
11
  import pandas as pd
12
12
  from typing import List, Dict
13
- from .models import AVAILABLEMODELS, getProviderFromModel
13
+ from .models import AVAILABLEMODELS, getProviderFromModel, Provider
14
14
  from .execution import ModelExecutor
15
15
  from .exceptions import LlumoAIError
16
16
  from .helpingFuntions import *
17
17
  from .sockets import LlumoSocketClient
18
18
  from .functionCalling import LlumoAgentExecutor
19
+ from .chains import LlumoDataFrameResults,LlumoDictResults
19
20
  import threading
20
21
  from tqdm import tqdm
21
22
 
23
+ pd.set_option('future.no_silent_downcasting', True)
24
+
22
25
  postUrl = (
23
26
  "https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
24
27
  )
@@ -38,7 +41,8 @@ class LlumoClient:
38
41
 
39
42
  def __init__(self, api_key):
40
43
  self.apiKey = api_key
41
-
44
+ self.evalData=[]
45
+ self.evals=[]
42
46
  self.processMapping = {}
43
47
  self.definationMapping = {}
44
48
 
@@ -50,7 +54,7 @@ class LlumoClient:
50
54
  reqBody = {"analytics": [evalName]}
51
55
 
52
56
  try:
53
- print(reqBody)
57
+
54
58
  response = requests.post(url=validateUrl, json=reqBody, headers=headers)
55
59
 
56
60
  except requests.exceptions.RequestException as e:
@@ -393,27 +397,46 @@ class LlumoClient:
393
397
  outputColName=outputColName,
394
398
  ):
395
399
  print(
396
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
400
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
397
401
  )
398
402
  else:
399
403
  return dataframe
400
404
 
401
405
  # this function allows the users to run multiple evals at once
402
406
 
403
- def evaluateCompressor(self, data, prompt_template):
407
+ def compressor(self, data, prompt_template):
404
408
  results = []
405
409
  dataframe = pd.DataFrame(data)
410
+
406
411
  try:
407
- socketID = self.socket.connect(timeout=150)
412
+ self.socket = LlumoSocketClient(socketUrl)
413
+ dataframe = pd.DataFrame(data).astype(str)
414
+ socketID = self.socket.connect(timeout=250)
415
+
416
+ # Wait for socket connection
408
417
  max_wait_secs = 20
409
418
  waited_secs = 0
410
419
  while not self.socket._connection_established.is_set():
411
420
  time.sleep(0.1)
412
421
  waited_secs += 0.1
413
422
  if waited_secs >= max_wait_secs:
414
- raise RuntimeError(
415
- "Timeout waiting for server 'connection-established' event."
416
- )
423
+ raise RuntimeError("Timeout waiting for server connection")
424
+
425
+ # Start listener thread
426
+ expectedResults = len(dataframe)
427
+ # print("expected result" ,expectedResults)
428
+ timeout = max(100, min(150, expectedResults * 10))
429
+ listener_thread = threading.Thread(
430
+ target=self.socket.listenForResults,
431
+ kwargs={
432
+ "min_wait": 40,
433
+ "max_wait": timeout,
434
+ "inactivity_timeout": 10,
435
+ "expected_results": expectedResults,
436
+ },
437
+ daemon=True,
438
+ )
439
+ listener_thread.start()
417
440
 
418
441
  try:
419
442
  self.validateApiKey()
@@ -508,7 +531,7 @@ class LlumoClient:
508
531
  "playgroundID": activePlayground,
509
532
  }
510
533
 
511
- rowIdMapping[rowID] = index
534
+ rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
512
535
  # print("__________________________TEMPLATE__________________________________")
513
536
  # print(templateData)
514
537
 
@@ -537,7 +560,19 @@ class LlumoClient:
537
560
  expected_results=None,
538
561
  )
539
562
 
540
- results = self.socket.getReceivedData()
563
+ rawResults = self.socket.getReceivedData()
564
+ receivedRowIDs = {key for item in rawResults for key in item.keys()}
565
+ expectedRowIDs = set(rowIdMapping.keys())
566
+ missingRowIDs = expectedRowIDs - receivedRowIDs
567
+ # print("All expected keys:", expected_rowIDs)
568
+ # print("All received keys:", received_rowIDs)
569
+ # print("Missing keys:", len(missingRowIDs))
570
+ missingRowIDs = list(missingRowIDs)
571
+
572
+ if len(missingRowIDs) > 0:
573
+ dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
574
+ rawResults.extend(dataFromDb)
575
+
541
576
  # results = self.finalResp(eval_results)
542
577
  # print(f"======= Completed evaluation: {eval} =======\n")
543
578
 
@@ -551,10 +586,10 @@ class LlumoClient:
551
586
  print(f"Error disconnecting socket: {e}")
552
587
 
553
588
  dataframe["Compressed Input"] = None
554
- for records in results:
589
+ for records in rawResults:
555
590
  for compound_key, value in records.items():
556
591
  # for compound_key, value in item['data'].items():
557
- rowID = compound_key.split("-")[0]
592
+ rowID = compound_key
558
593
  # looking for the index of each rowID , in the original dataframe
559
594
  if rowID in rowIdMapping:
560
595
  index = rowIdMapping[rowID]
@@ -580,6 +615,7 @@ class LlumoClient:
580
615
  prompt_template="",
581
616
  outputColName="output",
582
617
  createExperiment: bool = False,
618
+ getDataFrame:bool =False,
583
619
  _tocheck=True,
584
620
  ):
585
621
  self.socket = LlumoSocketClient(socketUrl)
@@ -587,6 +623,8 @@ class LlumoClient:
587
623
  workspaceID = None
588
624
  email = None
589
625
  socketID = self.socket.connect(timeout=250)
626
+ self.evalData=[]
627
+ self.evals=evals
590
628
  self.allBatches = []
591
629
  rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
592
630
 
@@ -614,10 +652,14 @@ class LlumoClient:
614
652
  daemon=True,
615
653
  )
616
654
  listener_thread.start()
617
-
618
- activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
619
- "-", ""
620
- )
655
+ self.validateApiKey(evalName=evals[0])
656
+ if createExperiment:
657
+ activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
658
+
659
+ else:
660
+ activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
661
+ "-", ""
662
+ )
621
663
  for evalName in evals:
622
664
  # print(f"\n======= Running evaluation for: {evalName} =======")
623
665
 
@@ -776,10 +818,8 @@ class LlumoClient:
776
818
  rawResults.extend(dataFromDb)
777
819
 
778
820
 
821
+ self.evalData = rawResults
779
822
 
780
-
781
-
782
-
783
823
  # Initialize dataframe columns for each eval
784
824
  for eval in evals:
785
825
  dataframe[eval] = None
@@ -799,7 +839,12 @@ class LlumoClient:
799
839
 
800
840
  if createExperiment:
801
841
  pd.set_option("future.no_silent_downcasting", True)
802
- df = dataframe.fillna("Some error occured").astype(object)
842
+ # df = dataframe.fillna("Some error occured").astype(object)
843
+ with warnings.catch_warnings():
844
+ warnings.simplefilter(action='ignore', category=FutureWarning)
845
+ df = dataframe.fillna("Some error occurred").astype(str)
846
+
847
+ df = dataframe.fillna("Some error occured").infer_objects(copy=False)
803
848
  if createPlayground(
804
849
  email,
805
850
  workspaceID,
@@ -807,13 +852,19 @@ class LlumoClient:
807
852
  promptText=prompt_template,
808
853
  definationMapping=self.definationMapping,
809
854
  outputColName=outputColName,
855
+ activePlayground= activePlayground
810
856
  ):
811
857
  print(
812
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
858
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
813
859
  )
814
860
  else:
815
- return dataframe
861
+ if getDataFrame:
862
+ return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
863
+ else:
864
+ data=dataframe.to_dict(orient="records")
865
+ return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
816
866
 
867
+
817
868
  def promptSweep(
818
869
  self,
819
870
  templates: List[str],
@@ -823,6 +874,7 @@ class LlumoClient:
823
874
  evals=["Response Correctness"],
824
875
  toEvaluate: bool = False,
825
876
  createExperiment: bool = False,
877
+ getDataFrame = False
826
878
 
827
879
 
828
880
  ) -> pd.DataFrame:
@@ -878,6 +930,8 @@ class LlumoClient:
878
930
  prompt_template=str(templates[0]),
879
931
  outputColName=outputColName,
880
932
  _tocheck=False,
933
+ getDataFrame=True,
934
+ createExperiment = False
881
935
  )
882
936
 
883
937
  # Rename all new columns with _i+1 (e.g., _1, _2)
@@ -910,10 +964,17 @@ class LlumoClient:
910
964
  ):
911
965
 
912
966
  print(
913
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
967
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
914
968
  )
915
969
  else:
916
- return dfWithEvals
970
+ if getDataFrame:
971
+ return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
972
+ definationMapping=self.definationMapping)
973
+ else:
974
+ data = dfWithEvals.to_dict(orient="records")
975
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
976
+
977
+
917
978
  else:
918
979
  if createExperiment == True:
919
980
  pd.set_option("future.no_silent_downcasting", True)
@@ -921,10 +982,18 @@ class LlumoClient:
921
982
 
922
983
  if createPlayground(email, workspaceID, df, promptText=templates[0]):
923
984
  print(
924
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
985
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
925
986
  )
926
987
  else:
927
- return df
988
+ if getDataFrame:
989
+ return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
990
+ definationMapping=self.definationMapping)
991
+ else:
992
+ data = df.to_dict(orient="records")
993
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
994
+ definationMapping=self.definationMapping)
995
+
996
+
928
997
 
929
998
  # this function generates an output using llm and tools and evaluate that output
930
999
  def evaluateAgents(
@@ -936,6 +1005,7 @@ class LlumoClient:
936
1005
  evals=["Final Task Alignment"],
937
1006
  prompt_template="Give answer for the given query: {{query}}",
938
1007
  createExperiment: bool = False,
1008
+ getDataFrame:bool = False
939
1009
 
940
1010
  ):
941
1011
  if model.lower() not in ["openai", "google"]:
@@ -961,27 +1031,33 @@ class LlumoClient:
961
1031
  toolResponseDf.to_dict(orient="records"),
962
1032
  evals=evals,
963
1033
  prompt_template=prompt_template,
964
- createExperiment=False,
1034
+ createExperiment=createExperiment,
1035
+ getDataFrame=getDataFrame
1036
+
965
1037
  )
966
1038
 
967
- if createExperiment:
968
- pd.set_option("future.no_silent_downcasting", True)
969
- df = toolResponseDf.fillna("Some error occured")
970
- if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
971
- print(
972
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
973
- )
974
- else:
975
- return toolResponseDf
1039
+ return toolResponseDf
1040
+ # if createExperiment:
1041
+ # pd.set_option("future.no_silent_downcasting", True)
1042
+ # df = toolResponseDf.fillna("Some error occured")
1043
+ # if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
1044
+ # print(
1045
+ # "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1046
+ # )
1047
+ # else:
1048
+ # return toolResponseDf
976
1049
 
977
1050
  # this function evaluate that tools output given by the user
978
1051
  def evaluateAgentResponses(
979
1052
  self,
980
1053
  data,
981
1054
  evals=["Final Task Alignment"],
982
- outputColName="output",
983
1055
  createExperiment: bool = False,
1056
+ getDataFrame = False,
1057
+ outputColName="output"
1058
+
984
1059
  ):
1060
+
985
1061
  dataframe = pd.DataFrame(data)
986
1062
 
987
1063
  try:
@@ -1001,7 +1077,9 @@ class LlumoClient:
1001
1077
  evals=evals,
1002
1078
  prompt_template="Give answer for the given query: {{query}}",
1003
1079
  outputColName=outputColName,
1004
- createExperiment=createExperiment
1080
+ createExperiment=createExperiment,
1081
+ getDataFrame = getDataFrame
1082
+
1005
1083
  )
1006
1084
  if createExperiment:
1007
1085
  pass
@@ -1022,7 +1100,8 @@ class LlumoClient:
1022
1100
  prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
1023
1101
  evals=["Context Utilization"],
1024
1102
  toEvaluate=False,
1025
- generateOutput=True
1103
+ generateOutput=True,
1104
+ getDataFrame = False
1026
1105
  ):
1027
1106
  # Validate required parameters
1028
1107
  if generateOutput:
@@ -1147,23 +1226,25 @@ class LlumoClient:
1147
1226
  outputEvalMapping = None
1148
1227
  if toEvaluate:
1149
1228
  for evalName in evals:
1150
-
1151
1229
  # Validate API and dependencies
1152
1230
  self.validateApiKey(evalName=evalName)
1153
1231
  metricDependencies = checkDependency(
1154
- evalName, list(working_df.columns), tocheck=True
1232
+ evalName, list(working_df.columns), tocheck=False
1155
1233
  )
1156
1234
  if not metricDependencies["status"]:
1157
1235
  raise LlumoAIError.dependencyError(metricDependencies["message"])
1158
1236
 
1159
- working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
1160
-
1237
+ working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
1161
1238
 
1239
+
1162
1240
  self.socket.disconnect()
1163
1241
 
1164
1242
  # Create experiment if required
1165
1243
  if createExperiment:
1166
- df = working_df.fillna("Some error occured").astype(object)
1244
+ # df = working_df.fillna("Some error occured").astype(object)
1245
+ with warnings.catch_warnings():
1246
+ warnings.simplefilter(action='ignore', category=FutureWarning)
1247
+ df = working_df.fillna("Some error occurred").astype(str)
1167
1248
  if createPlayground(
1168
1249
  email, workspaceID, df,
1169
1250
  queryColName=queryColName,
@@ -1173,10 +1254,22 @@ class LlumoClient:
1173
1254
  evalOutputMap=outputEvalMapping
1174
1255
  ):
1175
1256
  print(
1176
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
1257
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
1177
1258
  else:
1178
- self.latestDataframe = working_df
1179
- return working_df
1259
+ if getDataFrame == True and toEvaluate == True:
1260
+ return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
1261
+ definationMapping=self.definationMapping)
1262
+
1263
+ elif getDataFrame == False and toEvaluate == True:
1264
+ data = working_df.to_dict(orient="records")
1265
+ return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
1266
+ definationMapping=self.definationMapping)
1267
+
1268
+ elif getDataFrame== True and toEvaluate == False:
1269
+ return working_df
1270
+
1271
+ elif getDataFrame == False and toEvaluate == False :
1272
+ return working_df.to_dict(orient = "records")
1180
1273
 
1181
1274
  def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
1182
1275
  executor = ModelExecutor(apiKey)
@@ -1189,45 +1282,81 @@ class LlumoClient:
1189
1282
  inputDict = {key: row[key] for key in inputVariables}
1190
1283
  for i, model in enumerate(modelAliases, 1):
1191
1284
  try:
1285
+
1192
1286
  provider = getProviderFromModel(model)
1287
+ if provider == Provider.OPENAI:
1288
+ validateOpenaiKey(apiKey)
1289
+ elif provider == Provider.GOOGLE:
1290
+ validateGoogleKey(apiKey)
1291
+
1193
1292
  filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
1194
1293
  response = executor.execute(provider, model.value, filled_template, apiKey)
1195
1294
  df.at[indx, f"output_{i}"] = response
1295
+
1196
1296
  except Exception as e:
1197
- df.at[indx, f"output_{i}"] = str(e)
1297
+ # df.at[indx, f"output_{i}"] = str(e)
1298
+ raise e
1299
+
1198
1300
  return df
1199
1301
 
1200
- def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
1302
+ def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
1201
1303
  dfWithEvals = df.copy()
1202
-
1203
1304
  outputColMapping = {}
1204
1305
 
1205
- for i, model in enumerate(modelAliases, 1):
1206
- outputColName = f"output_{i}"
1207
- try:
1306
+ if generateOutput:
1307
+ # Evaluate per model output
1308
+ for i, model in enumerate(modelAliases, 1):
1309
+ outputColName = f"output_{i}"
1310
+ try:
1311
+ res = self.evaluateMultiple(
1312
+ dfWithEvals.to_dict("records"),
1313
+ evals=evals,
1314
+ prompt_template=prompt_template,
1315
+ outputColName=outputColName,
1316
+ _tocheck=False,
1317
+ getDataFrame=True,
1318
+ createExperiment=False
1319
+ )
1208
1320
 
1321
+ for evalMetric in evals:
1322
+ scoreCol = f"{evalMetric}"
1323
+ reasonCol = f"{evalMetric} Reason"
1324
+ if scoreCol in res.columns:
1325
+ res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
1326
+ if reasonCol in res.columns:
1327
+ res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
1328
+
1329
+ outputColMapping[f"{scoreCol}_{i}"] = outputColName
1330
+
1331
+ newCols = [col for col in res.columns if col not in dfWithEvals.columns]
1332
+ dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
1333
+
1334
+ except Exception as e:
1335
+ print(f"Evaluation failed for model {model.value}: {str(e)}")
1336
+
1337
+ else:
1338
+ # Evaluate only once on "output" column
1339
+ try:
1340
+ outputColName = "output"
1209
1341
  res = self.evaluateMultiple(
1210
1342
  dfWithEvals.to_dict("records"),
1211
1343
  evals=evals,
1212
1344
  prompt_template=prompt_template,
1213
1345
  outputColName=outputColName,
1214
1346
  _tocheck=False,
1347
+ getDataFrame=True,
1348
+ createExperiment=False
1215
1349
  )
1216
1350
  for evalMetric in evals:
1217
1351
  scoreCol = f"{evalMetric}"
1218
1352
  reasonCol = f"{evalMetric} Reason"
1219
-
1220
- if scoreCol in res.columns:
1221
- res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
1222
- if reasonCol in res.columns:
1223
- res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
1224
-
1225
- outputColMapping[f"{scoreCol}_{i}"] = outputColName
1353
+ outputColMapping[scoreCol] = "output"
1226
1354
 
1227
1355
  newCols = [col for col in res.columns if col not in dfWithEvals.columns]
1228
1356
  dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
1229
1357
  except Exception as e:
1230
- print(f"Evaluation failed for model {model.value}: {str(e)}")
1358
+ print(f"Evaluation failed: {str(e)}")
1359
+
1231
1360
  return dfWithEvals, outputColMapping
1232
1361
 
1233
1362
  def runDataStream(
@@ -1236,6 +1365,7 @@ class LlumoClient:
1236
1365
  streamName: str,
1237
1366
  queryColName: str = "query",
1238
1367
  createExperiment: bool = False,
1368
+ getDataFrame = False
1239
1369
  ):
1240
1370
 
1241
1371
 
@@ -1354,10 +1484,16 @@ class LlumoClient:
1354
1484
  definationMapping=self.definationMapping,
1355
1485
  ):
1356
1486
  print(
1357
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
1487
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
1358
1488
  else:
1359
- self.latestDataframe = working_df
1360
- return working_df
1489
+ if getDataFrame:
1490
+ return working_df
1491
+
1492
+ else:
1493
+ data = working_df.to_dict(orient="records")
1494
+ return data
1495
+ # self.latestDataframe = working_df
1496
+ # return working_df
1361
1497
 
1362
1498
 
1363
1499
  def createExperiment(self, dataframe):
@@ -1367,7 +1503,7 @@ class LlumoClient:
1367
1503
  flag = createPlayground(self.email, self.workspaceID, dataframe)
1368
1504
  if flag:
1369
1505
  print(
1370
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
1506
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1371
1507
  )
1372
1508
  except Exception as e:
1373
1509
  raise "Some error ocuured please check your API key"
@@ -1407,7 +1543,7 @@ class LlumoClient:
1407
1543
  if createPlayground(self.email, self.workspaceID, df):
1408
1544
 
1409
1545
  print(
1410
- "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
1546
+ "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
1411
1547
  )
1412
1548
 
1413
1549
  return True
llumo/execution.py CHANGED
@@ -25,15 +25,14 @@ class ModelExecutor:
25
25
  return response.choices[0].message.content
26
26
 
27
27
  def _executeGoogle(self, modelName: str, prompt: str,api_key) -> str:
28
-
28
+
29
29
  # Configure GenAI with API Key
30
30
  genai.configure(api_key=api_key)
31
-
31
+
32
32
  # Select Generative Model
33
33
  model = genai.GenerativeModel("gemini-2.0-flash-lite")
34
34
  # Generate Response
35
35
  response = model.generate_content(prompt)
36
36
  return response.text
37
-
38
-
39
-
37
+
38
+
llumo/google.py ADDED
@@ -0,0 +1,34 @@
1
+ from google import generativeai as _genai
2
+
3
+ class genai:
4
+ """
5
+ Top-level wrapper module to mimic:
6
+ >>> from google import genai
7
+ >>> client = genai.Client(api_key=...)
8
+ """
9
+
10
+ class Client:
11
+ def __init__(self, api_key: str, default_model: str = "gemini-2.5-flash"):
12
+ _genai.configure(api_key=api_key)
13
+ self._defaultModel = default_model
14
+ self._defaultModelInstance = _genai.GenerativeModel(model_name=default_model)
15
+
16
+ class Models:
17
+ def __init__(self, outer):
18
+ self._outer = outer
19
+
20
+ def generate_content(self, contents: str | list[str], model: str = None, **kwargs):
21
+ model_name = model or self._outer._defaultModel
22
+ model_instance = _genai.GenerativeModel(model_name=model_name)
23
+ return model_instance.generate_content(contents=contents, **kwargs)
24
+
25
+ self.models = Models(self)
26
+
27
+ def generate(self, prompt: str | list[str], **kwargs):
28
+ """Convenience shortcut for single-line generation."""
29
+ return self._defaultModelInstance.generate_content(prompt, **kwargs)
30
+
31
+ def setDefaultModel(self, model_name: str):
32
+ """Change the default model at runtime."""
33
+ self._defaultModel = model_name
34
+ self._defaultModelInstance = _genai.GenerativeModel(model_name=model_name)
llumo/helpingFuntions.py CHANGED
@@ -8,6 +8,9 @@ import json
8
8
  import base64
9
9
  import os
10
10
  import re
11
+ import openai
12
+ import google.generativeai as genai
13
+ from collections import defaultdict
11
14
 
12
15
 
13
16
  from .models import _MODEL_METADATA, AVAILABLEMODELS
@@ -21,8 +24,7 @@ uploadColList = (
21
24
  uploadRowList = (
22
25
  "https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
23
26
  )
24
- createInsightUrl = "https://app.llumo.ai/api/New-Eval-API/insights-api/generate-playground-insights"
25
- getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
27
+ createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
26
28
 
27
29
 
28
30
  def getProcessID():
@@ -227,11 +229,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
227
229
  }
228
230
  allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
229
231
 
230
-
232
+ evalDependencies = checkDependency(_returnDepMapping=True)
231
233
 
232
234
  # Create a mapping of column names to unique column IDs
233
235
  columnIDMapping = {}
234
-
236
+ print("Def Mapping: ")
237
+ print(definationMapping)
235
238
  # Iterate over each column in the dataframe
236
239
  for indx, col in enumerate(dataframe.columns):
237
240
  # Generate a unique column ID using uuid
@@ -240,46 +243,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
240
243
  columnIDMapping[col] = columnID
241
244
 
242
245
 
243
- if col.startswith('output') and promptText!=None:
244
- # For output columns, create the prompt template with promptText
245
- if promptText:
246
- # Extract variables from promptText and set them as dependencies
247
- dependencies = []
248
-
249
- # Find variables inside {{variable}}
250
- variables = re.findall(r'{{(.*?)}}', promptText)
251
-
252
- # Loop through each variable and check if it exists as a column name
253
- for var in variables:
254
- varName = var.strip()
255
- if varName in columnIDMapping: # Check if the variable is a column name
256
- dependencies.append(columnIDMapping[varName]) # Add its columnID
257
-
258
- # Now update the template for the output column
259
-
260
- template={
261
- "provider": "OPENAI",
262
- "model": "GPT_4o",
263
- "promptText": promptText,
264
- "modelOptions": {
265
- "temperature": 0,
266
- "frequencyPenalty": 0,
267
- "presencePenalty": 0,
268
- "maxToken": 8192
269
- },
270
- "toolConfig": "none",
271
- "concurrency": "",
272
- "outputType": "STRING",
273
- "isPromptSelected": True,
274
- "isSmartPromptSelected": False,
275
- "dependency": dependencies, # Use the dependencies extracted from promptText
276
- "columnID": columnID, # Use the generated column ID
277
- "label": col,
278
- "type": "PROMPT",
279
- "order": indx,
280
- }
281
-
282
- elif col.startswith('context') and dataStreamName != None :
246
+ # if col.startswith('output') and promptText!=None:
247
+ # # For output columns, create the prompt template with promptText
248
+ # if promptText:
249
+ # # Extract variables from promptText and set them as dependencies
250
+ # dependencies = []
251
+ #
252
+ # # Find variables inside {{variable}}
253
+ # variables = re.findall(r'{{(.*?)}}', promptText)
254
+ #
255
+ # # Loop through each variable and check if it exists as a column name
256
+ # for var in variables:
257
+ # varName = var.strip()
258
+ # if varName in columnIDMapping: # Check if the variable is a column name
259
+ # dependencies.append(columnIDMapping[varName]) # Add its columnID
260
+ #
261
+ # # Now update the template for the output column
262
+ #
263
+ # template={
264
+ # "provider": "OPENAI",
265
+ # "model": "GPT_4o",
266
+ # "promptText": promptText,
267
+ # "modelOptions": {
268
+ # "temperature": 0,
269
+ # "frequencyPenalty": 0,
270
+ # "presencePenalty": 0,
271
+ # "maxToken": 8192
272
+ # },
273
+ # "toolConfig": "none",
274
+ # "concurrency": "",
275
+ # "outputType": "STRING",
276
+ # "isPromptSelected": True,
277
+ # "isSmartPromptSelected": False,
278
+ # "dependency": dependencies, # Use the dependencies extracted from promptText
279
+ # "columnID": columnID, # Use the generated column ID
280
+ # "label": col,
281
+ # "type": "PROMPT",
282
+ # "order": indx,
283
+ # }
284
+
285
+ if col.startswith('context') and dataStreamName != None :
283
286
  if queryColName and dataStreamName:
284
287
  dependencies = []
285
288
  dependencies.append(columnIDMapping[queryColName])
@@ -310,9 +313,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
310
313
 
311
314
  dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
312
315
 
313
- longDef = definationMapping.get(col, {}).get('definition', "")
314
- shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
315
- enum = col.upper().replace(" ","_")
316
+ longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
317
+ shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
318
+ enum = col.rsplit("_",1)[0].upper().replace(" ","_")
316
319
 
317
320
  template = {
318
321
  "analytics": [
@@ -320,20 +323,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
320
323
  ],
321
324
  "evaluationMetric": "ALL",
322
325
  "evaluationModel": "LLUMO_EVALLM",
323
- "selectPrompt": columnIDMapping[outputColName],
326
+ "selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
324
327
  "scoreCondition": "GREATER_THAN",
325
328
  "scoreValue": "50",
326
329
  "scoreResult": "PASS",
327
- "llmKpi": col,
330
+ "llmKpi": col.rsplit("_",1)[0],
328
331
  "setRules": True,
329
332
  "type": "EVAL",
330
333
  "evalType": "LLM",
331
334
  "similarityMetric": None,
332
335
  "embeddingModel": None,
333
- "groundTruth": None,
336
+ "groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
334
337
  "dataStream": None,
335
- "context": None,
336
- "dependency": [columnIDMapping[outputColName]],
338
+ "context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
339
+ "dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
340
+ "query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
341
+ "tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
342
+ "messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
337
343
  "hallucinationFields": {
338
344
  "query": None,
339
345
  "context": None,
@@ -342,7 +348,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
342
348
  "definition": longDef,
343
349
  "analyticsENUM": enum,
344
350
  "prompt": shortDef,
345
- "analyticsName": col,
351
+ "analyticsName": col.rsplit("_",1)[0],
346
352
  "columnID": columnID,
347
353
  "label": col,
348
354
  "order": indx
@@ -378,12 +384,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
378
384
  row_dict = {}
379
385
 
380
386
  # For each column, we need to map the column ID to the corresponding value in the row
381
- print(dataframe.columns)
387
+
382
388
  for col in dataframe.columns:
383
389
  columnID = columnIDMapping[col]
384
390
 
385
391
  if any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText!=None:
386
- print(col)
392
+
387
393
  row_dict[columnID] = {
388
394
 
389
395
  "value": row[col],
@@ -450,9 +456,12 @@ def uploadRowsInDBPlayground(payload):
450
456
  return None
451
457
 
452
458
 
453
- def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
459
+ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
454
460
 
455
- playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
461
+ if activePlayground != None:
462
+ playgroundId=activePlayground
463
+ else:
464
+ playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
456
465
  payload1, payload2 = createColumn(
457
466
  workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
458
467
  )
@@ -470,7 +479,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
470
479
 
471
480
 
472
481
 
473
- def getPlaygroundInsights(workspaceID: str, activePlayground: str):
482
+ def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
474
483
  headers = {
475
484
 
476
485
  "Content-Type": "application/json",
@@ -478,48 +487,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
478
487
 
479
488
  # Initial request to generate playground insights
480
489
  payload = {
481
- "activePlayground": activePlayground,
482
- "workspaceID": workspaceID,
490
+ "uniqueClassesString": uniqueClassesString,
491
+ "reasonList": reasonList,
492
+ "definition": defination,
483
493
  }
484
494
 
485
495
  urlGenerate = createInsightUrl
486
-
487
- responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
488
-
489
- if responseGenerate.status_code == 200:
490
- responseJson = responseGenerate.json()
491
-
492
- insightStatus = responseJson.get("data", {}).get("insight", False)
496
+ try:
497
+ responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
493
498
 
494
- if insightStatus:
495
- # If insight is true, request to get all playground insights
496
- urlGetAll = getPlaygroundInsightsUrl
499
+ if responseGenerate.status_code == 200:
500
+ responseJson = responseGenerate.json()
497
501
 
498
- responseGetAll = requests.post(urlGetAll, json=payload, headers=headers)
499
-
500
- if responseGetAll.status_code == 200:
501
- responseAllJson = responseGetAll.json()
502
-
503
- data = responseAllJson.get("data", {}).get("data", [])
504
-
505
- # Extract insight and solution
506
- insights = []
507
- for item in data:
508
- insight = item.get("insight", "")
509
- solution = item.get("solution", "")
510
- insights.append({"insight": insight, "solution": solution})
511
-
512
- return insights
513
- else:
514
- print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
515
- return None
516
- else:
517
- print("No insight generated.")
518
- return None
502
+ filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
503
+
504
+ return filteredResponse
505
+ except Exception as e:
506
+ print(f"Exception occurred while generating insight: {e}")
507
+ return None
508
+
519
509
  else:
520
510
  print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
521
511
  return None
522
- def checkDependency(selectedEval, columns,tocheck=True):
512
+ def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
523
513
  """
524
514
  Checks if all the required input columns for the selected evaluation metric are present.
525
515
 
@@ -530,33 +520,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
530
520
  Raises:
531
521
  - LlumoAIError.dependencyError: If any required column is missing.
532
522
  """
533
- if tocheck:
534
523
  # Define required dependencies for each evaluation metric
535
- metricDependencies = {
536
- 'Response Completeness': ['context', 'query', 'output'],
537
- 'Response Bias': ['output'],
538
- 'Response Harmfulness': ['output'],
539
- 'Input Toxicity': ['query'],
540
- 'Input Harmfulness': ['query'],
541
- 'Context Utilization': ['output', 'context'],
542
- 'Relevance Retention': ['context', 'query'],
543
- 'Semantic Cohesion': ['context'],
544
- 'Final Task Alignment': ['messageHistory'],
545
- 'Tool Reliability': ['messageHistory'],
546
- 'Response Correctness': ['output', 'query', 'context'],
547
- 'Response Toxicity': ['output'],
548
- 'Input Bias': ['query'],
549
- 'Input Relevancy': ['context', 'query'],
550
- 'Redundancy Reduction': ['context'],
551
- 'Response Sentiment': ['output'],
552
- 'Tool Selection Accuracy': ['tools', 'messageHistory'],
553
- 'Stepwise Progression': ['tools', 'messageHistory'],
554
- 'Hallucination': ['query', 'context', 'output'],
555
- 'Groundedness': ['groundTruth', 'output'],
556
- 'Memory Utilization': ['context', 'messageHistory'],
557
- 'Input Relevancy (Multi-turn)': ['context', 'query']
558
- }
524
+ metricDependencies = {
525
+ 'Response Completeness': ['context', 'query', 'output'],
526
+ 'Response Bias': ['output'],
527
+ 'Response Harmfulness': ['output'],
528
+ 'Input Toxicity': ['query'],
529
+ 'Input Harmfulness': ['query'],
530
+ 'Context Utilization': ['output', 'context'],
531
+ 'Relevance Retention': ['context', 'query'],
532
+ 'Semantic Cohesion': ['context'],
533
+ 'Final Task Alignment': ['messageHistory'],
534
+ 'Tool Reliability': ['messageHistory'],
535
+ 'Response Correctness': ['output', 'query', 'context'],
536
+ 'Response Toxicity': ['output'],
537
+ 'Input Bias': ['query'],
538
+ 'Input Relevancy': ['context', 'query'],
539
+ 'Redundancy Reduction': ['context'],
540
+ 'Response Sentiment': ['output'],
541
+ 'Tool Selection Accuracy': ['tools', 'messageHistory'],
542
+ 'Stepwise Progression': ['tools', 'messageHistory'],
543
+ 'Hallucination': ['query', 'context', 'output'],
544
+ 'Groundedness': ['groundTruth', 'output'],
545
+ 'Memory Utilization': ['context', 'messageHistory'],
546
+ 'Input Relevancy (Multi-turn)': ['context', 'query']
547
+ }
548
+ if _returnDepMapping == True:
549
+ return metricDependencies
559
550
 
551
+ if tocheck == True:
560
552
  # Check if the selected evaluation metric is known
561
553
  if selectedEval not in metricDependencies:
562
554
  return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
@@ -632,3 +624,56 @@ def validateModels(model_aliases):
632
624
 
633
625
 
634
626
 
627
+ def validateOpenaiKey(api_key):
628
+ try:
629
+ client = openai.OpenAI(api_key=api_key)
630
+ _ = client.models.list() # Light call to list models
631
+ except openai.AuthenticationError:
632
+ raise ValueError("❌ Invalid OpenAI API key.")
633
+ except Exception as e:
634
+ raise RuntimeError(f"⚠️ Error validating OpenAI key: {e}")
635
+
636
+ def validateGoogleKey(api_key):
637
+ try:
638
+ genai.configure(api_key=api_key)
639
+ _ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
640
+ except Exception as e:
641
+ if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
642
+ raise ValueError("❌ Invalid Google API key.")
643
+ raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
644
+
645
+ def groupLogsByClass(logs, max_logs=2):
646
+ # Initialize the final result structures (no defaultdict)
647
+ groupedLogs = {}
648
+ uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
649
+
650
+ # Iterate through the logs
651
+ for log in logs:
652
+ log_details = list(log.values())[0] # Get the details dictionary
653
+ eval_name = log_details.get("kpi", "unmarked")
654
+ edge_case = log_details.get("edgeCase", "unmarked")
655
+ reasoning = log_details.get("reasoning", "")
656
+
657
+ if eval_name != "unmarked" and edge_case != "unmarked":
658
+ # Ensure that the eval_name and edge_case exist in the dictionary
659
+ if eval_name not in groupedLogs:
660
+ groupedLogs[eval_name] = {}
661
+ uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
662
+
663
+ if edge_case not in groupedLogs[eval_name]:
664
+ groupedLogs[eval_name][edge_case] = []
665
+
666
+ # Append the reasoning to the correct place
667
+ groupedLogs[eval_name][edge_case].append(reasoning)
668
+ uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
669
+
670
+ # Limit the number of reasons to max_logs
671
+ for eval_name in groupedLogs:
672
+ for edge_case in groupedLogs[eval_name]:
673
+ groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
674
+
675
+ # Convert the set of unique edge cases to a list for easier reading
676
+ for eval_name in uniqueEdgeCases:
677
+ uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
678
+
679
+ return groupedLogs, uniqueEdgeCases
llumo/openai.py ADDED
@@ -0,0 +1,79 @@
1
+ from openai import OpenAI as OpenAIClient
2
+ from .client import LlumoClient
3
+
4
+ # Dummy evaluation function that uses LlumoClient
5
+ def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
6
+ client = LlumoClient(api_key=api_key)
7
+ results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
8
+ print(results)
9
+ return results
10
+
11
+ # Wrapper around ChatCompletion to allow custom fields like `.evaluation`
12
+ class ChatCompletionWithEval:
13
+ def __init__(self, response, evaluation):
14
+ self._response = response
15
+ self.evaluation = evaluation
16
+
17
+ def __getattr__(self, name):
18
+ return getattr(self._response, name)
19
+
20
+ def __getitem__(self, key):
21
+ return self._response[key]
22
+
23
+ def __repr__(self):
24
+ return repr(self._response)
25
+
26
+ class openai(OpenAIClient):
27
+ def __init__(self, api_key: str):
28
+ super().__init__(api_key=api_key)
29
+
30
+ original_create = self.chat.completions.create
31
+
32
+ class ChatCompletionsWrapper:
33
+ @staticmethod
34
+ def create(*args, **kwargs):
35
+ context = kwargs.pop("context", None)
36
+ evals = kwargs.pop("evals", [])
37
+ llumo_key = kwargs.pop("llumo_key", None)
38
+
39
+ messages = kwargs.get("messages", [])
40
+ user_message = next(
41
+ (m.get("content") for m in reversed(messages) if m.get("role") == "user"),
42
+ "",
43
+ )
44
+
45
+ # If context is None or empty or whitespace-only, set it to user_message
46
+ if not context or context.strip() == "":
47
+ context = user_message
48
+
49
+ response = original_create(*args, **kwargs)
50
+
51
+ try:
52
+ output_text = response.choices[0].message.content
53
+ except Exception:
54
+ output_text = ""
55
+
56
+ eval_input = [{
57
+ "query": user_message,
58
+ "context": context,
59
+ "output": output_text,
60
+ }]
61
+
62
+ # Safely call evaluate_multiple, if error return None
63
+ evaluation = None
64
+ try:
65
+ evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
66
+ except Exception as e:
67
+ # You can optionally log the error here if you want
68
+ # print(f"Evaluation failed, skipping: {e}")
69
+ evaluation = None
70
+
71
+ # If evaluation is None, just return normal response
72
+ if evaluation is None:
73
+ print("All server are busy for evaluation ")
74
+ return response
75
+
76
+ # Otherwise wrap with evaluation attached
77
+ return ChatCompletionWithEval(response, evaluation)
78
+
79
+ self.chat.completions.create = ChatCompletionsWrapper.create
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llumo
3
- Version: 0.2.15b1
3
+ Version: 0.2.16b1
4
4
  Summary: Python SDK for interacting with the Llumo ai API.
5
5
  Home-page: https://www.llumo.ai/
6
6
  Author: Llumo
@@ -0,0 +1,16 @@
1
+ llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
2
+ llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
3
+ llumo/client.py,sha256=rOTbw8QGi5CnQ77QKS4rKh-dSBSVoyVAORrK1i_b5EQ,60339
4
+ llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
5
+ llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
6
+ llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
7
+ llumo/google.py,sha256=5AVAqxPN20UuHIqi4yuHHSTf49LI96krtbztJ5qt8L0,1413
8
+ llumo/helpingFuntions.py,sha256=0W2JNdLyOV92lgESgB_JyJmOUvW5ooRdZyjN5LKDSX0,25296
9
+ llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
10
+ llumo/openai.py,sha256=BEmsOdHiQzDpKv6b4L62JaUMq7DbpICNPqyfMNRWi2I,2981
11
+ llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
12
+ llumo-0.2.16b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
13
+ llumo-0.2.16b1.dist-info/METADATA,sha256=_e94VIPrn02CP0X9gdkICA210Te_inzaSPcfH0p-Hlk,1521
14
+ llumo-0.2.16b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ llumo-0.2.16b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
16
+ llumo-0.2.16b1.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
2
- llumo/client.py,sha256=XljwD5mZxjyrXHhu8YhN0cGsd-O_LyKbPzrhS8zbqZo,53778
3
- llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
4
- llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
5
- llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
6
- llumo/helpingFuntions.py,sha256=0-ZwG0fnbfP4DP1JTMewM8LdXzz_-p1gRqhPsX0Zmpk,22785
7
- llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
8
- llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
9
- llumo-0.2.15b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
10
- llumo-0.2.15b1.dist-info/METADATA,sha256=yDLkiD46Qq44PA3ylKK2dzsXZmnuE23yxH0RmoqizOk,1521
11
- llumo-0.2.15b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- llumo-0.2.15b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
13
- llumo-0.2.15b1.dist-info/RECORD,,