PyPI - llumo - Versions diffs - 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

llumo 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

llumo/callback.py +123 -31
llumo/client.py +213 -228
llumo/google.py +16 -16
llumo/helpingFuntions.py +2 -2
llumo/llumoSessionContext.py +91 -29
llumo/openai.py +148 -136
{llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/METADATA +1 -1
llumo-0.2.30.dist-info/RECORD +20 -0
llumo-0.2.28.dist-info/RECORD +0 -20
{llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/WHEEL +0 -0
{llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/licenses/LICENSE +0 -0
{llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/top_level.txt +0 -0

llumo/client.py CHANGED Viewed

@@ -29,10 +29,7 @@ fetchUrl = (
     "https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
 )
 socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
-# {
-#     "workspaceID":"c9191fdf33bdd7838328c1a0",
-#     "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
-# }
 validateUrl = "https://app.llumo.ai/api/workspace-details"
 socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
@@ -79,6 +76,7 @@ class LlumoClient:
         # Try to parse JSON
         try:
             data = response.json()
+            # print(data)
         except ValueError as e:
             print(f"JSON parsing error: {str(e)}")
             # print(f"Response content that could not be parsed: {response.text[:1000]}...")
@@ -93,7 +91,9 @@ class LlumoClient:
             self.workspaceID = data["data"]["data"].get("workspaceID")
             self.evalDefinition = data["data"]["data"]["analyticsMapping"]
             self.socketToken = data["data"]["data"].get("token")
-            self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
+            # print(self.socketToken)
+            self.hasSubscribed = data["data"]["data"].get("hasSubscr"
+                                                          "ibed", False)
             self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
             self.subscriptionEndDate = data["data"]["data"].get(
                 "subscriptionEndDate", None
@@ -191,7 +191,7 @@ class LlumoClient:
     def postBatch(self, batch, workspaceID):
         payload = {
             "batch": json.dumps(batch),
-            "runType": "EVAL",
+            "runType": "FULL_EVAL_RUN",
             "workspaceID": workspaceID,
         }
         # socketToken here if the "JWD" token
@@ -204,6 +204,7 @@ class LlumoClient:
             response = requests.post(postUrl, json=payload, headers=headers)
             # print(f"Post API Status Code: {response.status_code}")
             # print(response.text)
+            # print(response.status_code)
         except Exception as e:
             print(f"Error in posting batch: {e}")
@@ -644,7 +645,7 @@ class LlumoClient:
             self.socket.listenForResults(
                 min_wait=20,
                 max_wait=timeout,
-                inactivity_timeout=30,
+                inactivity_timeout=50,
                 expected_results=None,
             )
@@ -701,13 +702,11 @@ class LlumoClient:
         data,
         evals: list,
         prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
-        outputColName="output",
-        createExperiment: bool = False,
         getDataFrame: bool = False,
         _tocheck=True,
     ):
-        if hasattr(self, "startLlumoRun"):
-            self.startLlumoRun(runName="evaluateMultiple")
+        # if hasattr(self, "startLlumoRun"):
+        #     self.startLlumoRun(runName="evaluateMultiple")
         if isinstance(data, dict):
             data = [data]
         elif not isinstance(data, list):
@@ -743,9 +742,9 @@ class LlumoClient:
         listener_thread = threading.Thread(
             target=self.socket.listenForResults,
             kwargs={
-                "min_wait": 40,
+                "min_wait": 20,
                 "max_wait": timeout,
-                "inactivity_timeout": 10,
+                "inactivity_timeout": 35,
                 "expected_results": expectedResults,
             },
             daemon=True,
@@ -753,148 +752,152 @@ class LlumoClient:
         listener_thread.start()
         self.validateApiKey(evalName=evals[0])
         activePlayground = self.playgroundID
-        for evalName in evals:
-            # print(f"\n======= Running evaluation for: {evalName} =======")
+        # print(f"\n======= Running evaluation for: {evalName} =======")
+        # Validate API and dependencies
+        # self.validateApiKey(evalName=evals[0])
+        customAnalytics = getCustomAnalytics(self.workspaceID)
+        # metricDependencies = checkDependency(
+        #     evalName,
+        #     list(dataframe.columns),
+        #     tocheck=_tocheck,
+        #     customevals=customAnalytics,
+        # )
+        # if not metricDependencies["status"]:
+        #     raise LlumoAIError.dependencyError(metricDependencies["message"])
-            # Validate API and dependencies
-            self.validateApiKey(evalName=evalName)
-            customAnalytics = getCustomAnalytics(self.workspaceID)
-            metricDependencies = checkDependency(
-                evalName,
-                list(dataframe.columns),
-                tocheck=_tocheck,
-                customevals=customAnalytics,
-            )
-            if not metricDependencies["status"]:
-                raise LlumoAIError.dependencyError(metricDependencies["message"])
+        # evalDefinition = self.evalDefinition[evalName]["definition"]
+        model = "GPT_4"
+        provider = "OPENAI"
+        evalType = "LLM"
+        workspaceID = self.workspaceID
+        email = self.email
+        # categories = self.categories
+        # evaluationStrictness = self.evaluationStrictness
+        # grammarCheckOutput = self.grammarCheckOutput
+        # insightLength = self.insightsLength
+        # numJudges = self.numJudges
+        # penaltyBonusInstructions = self.penaltyBonusInstructions
+        # probableEdgeCases = self.probableEdgeCases
+        # fieldMapping = self.fieldMapping
-            evalDefinition = self.evalDefinition[evalName]["definition"]
-            model = "GPT_4"
-            provider = "OPENAI"
-            evalType = "LLM"
-            workspaceID = self.workspaceID
-            email = self.email
-            categories = self.categories
-            evaluationStrictness = self.evaluationStrictness
-            grammarCheckOutput = self.grammarCheckOutput
-            insightLength = self.insightsLength
-            numJudges = self.numJudges
-            penaltyBonusInstructions = self.penaltyBonusInstructions
-            probableEdgeCases = self.probableEdgeCases
-            fieldMapping = self.fieldMapping
+        userHits = checkUserHits(
+            self.workspaceID,
+            self.hasSubscribed,
+            self.trialEndDate,
+            self.subscriptionEndDate,
+            self.hitsAvailable,
+            len(dataframe),
+        )
+        if not userHits["success"]:
+            raise LlumoAIError.InsufficientCredits(userHits["message"])
-            userHits = checkUserHits(
-                self.workspaceID,
-                self.hasSubscribed,
-                self.trialEndDate,
-                self.subscriptionEndDate,
-                self.hitsAvailable,
-                len(dataframe),
+        currentBatch = []
+        for index, row in dataframe.iterrows():
+            tools = [row["tools"]] if "tools" in dataframe.columns else []
+            groundTruth = row.get("groundTruth", "")
+            messageHistory = (
+                [row["messageHistory"]]
+                if "messageHistory" in dataframe.columns
+                else []
             )
-            if not userHits["success"]:
-                raise LlumoAIError.InsufficientCredits(userHits["message"])
+            promptTemplate = prompt_template
+            keys = re.findall(r"{{(.*?)}}", promptTemplate)
-            currentBatch = []
-            for index, row in dataframe.iterrows():
-                tools = [row["tools"]] if "tools" in dataframe.columns else []
-                groundTruth = row.get("groundTruth", "")
-                messageHistory = (
-                    [row["messageHistory"]]
-                    if "messageHistory" in dataframe.columns
-                    else []
-                )
-                promptTemplate = prompt_template
-                keys = re.findall(r"{{(.*?)}}", promptTemplate)
+            if not all([ky in dataframe.columns for ky in keys]):
+                raise LlumoAIError.InvalidPromptTemplate()
-                if not all([ky in dataframe.columns for ky in keys]):
-                    raise LlumoAIError.InvalidPromptTemplate()
+            inputDict = {key: row[key] for key in keys if key in row}
+            # output = row.get(outputColName, "")
+            output = row.get("output","")
+            intermediateSteps = row.get("intermediateSteps", "")
-                inputDict = {key: row[key] for key in keys if key in row}
-                output = row.get(outputColName, "")
-                intermediateSteps = row.get("intermediateSteps", "")
+            rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+            columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+            compoundKey = f"{rowID}-{columnID}-{columnID}"
+            # rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
+            rowIdMapping[compoundKey] = {"index": index}
-                compoundKey = f"{rowID}-{columnID}-{columnID}"
-                rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
-                templateData = {
-                    "processID": getProcessID(),
-                    "socketID": socketID,
-                    "rowID": rowID,
-                    "columnID": columnID,
-                    "processType": "EVAL",
-                    "evalType": evalType,
-                    "workspaceID": workspaceID,
-                    "email": email,
-                    "playgroundID": activePlayground,
-                    "source": "SDK",
-                    "processData": {
-                        "analyticsName": evalName,
-                        "definition": evalDefinition,
-                        "executionDependency": {
-                            "Query": "",
-                            "Context": "",
-                            "Output": output,
-                            "Tools": tools,
-                            "GroundTruth": groundTruth,
-                            "MessageHistory": messageHistory,
-                            "IntermediateSteps": intermediateSteps,
-                        },
-                        "categories": categories,
-                        "evaluationStrictness": evaluationStrictness,
-                        "grammarCheckOutput": grammarCheckOutput,
-                        "insightLength": insightLength,
-                        "numJudges": numJudges,
-                        "penaltyBonusInstructions": penaltyBonusInstructions,
-                        "probableEdgeCases": probableEdgeCases,
-                        "model": model,
-                        "provider": provider,
+            templateData = {
+                "processID": getProcessID(),
+                "socketID": socketID,
+                "rowID": rowID,
+                "columnID": columnID,
+                "processType": "FULL_EVAL_RUN",
+                "evalType": "LLM",
+                "workspaceID": workspaceID,
+                "email": email,
+                "playgroundID": activePlayground,
+                "source": "SDK",
+                "processData": {
+                    # "analyticsName": evalName,
+                    # "definition": evalDefinition,
+                    "executionDependency": {
+                        "query": "",
+                        "context": "",
+                        "output": output,
+                        "tools": tools,
+                        "groundTruth": groundTruth,
+                        "messageHistory": messageHistory,
+                        "intermediateSteps": intermediateSteps,
                     },
-                    "type": "EVAL",
-                    "kpi": evalName,
-                    "fieldMappig": fieldMapping,
-                }
-                query = ""
-                context = ""
-                for key, value in inputDict.items():
-                    if isinstance(value, str):
-                        length = len(value.split()) * 1.5
-                        if length > 50:
-                            context += f" {key}: {value}, "
-                        else:
-                            if promptTemplate:
-                                tempObj = {key: value}
-                                promptTemplate = getInputPopulatedPrompt(
-                                    promptTemplate, tempObj
-                                )
-                            else:
-                                query += f" {key}: {value}, "
+                    "evallist":evals,
+                    # "model": model,
+                    # "provider": provider,
+                    "sessionID":self.sessionID
+                    # "categories": categories,
+                    # "evaluationStrictness": evaluationStrictness,
+                    # "grammarCheckOutput": grammarCheckOutput,
+                    # "insightLength": insightLength,
+                    # "numJudges": numJudges,
+                    # "penaltyBonusInstructions": penaltyBonusInstructions,
+                    # "probableEdgeCases": probableEdgeCases,
+                },
+                "type": "FULL_EVAL_RUN",
+                # "kpi": evalName,
+                # "fieldMappig": fieldMapping,
+            }
-                if not context.strip():
-                    for key, value in inputDict.items():
+            query = ""
+            context = ""
+            for key, value in inputDict.items():
+                if isinstance(value, str):
+                    length = len(value.split()) * 1.5
+                    if length > 50:
                         context += f" {key}: {value}, "
+                    else:
+                        if promptTemplate:
+                            tempObj = {key: value}
+                            promptTemplate = getInputPopulatedPrompt(
+                                promptTemplate, tempObj
+                            )
+                        else:
+                            query += f" {key}: {value}, "
-                templateData["processData"]["executionDependency"][
-                    "context"
-                ] = context.strip()
+            if not context.strip():
+                for key, value in inputDict.items():
+                    context += f" {key}: {value}, "
+            templateData["processData"]["executionDependency"][
+                "context"
+            ] = context.strip()
+            templateData["processData"]["executionDependency"][
+                "query"
+            ] = query.strip()
+            if promptTemplate and not query.strip():
                 templateData["processData"]["executionDependency"][
                     "query"
-                ] = query.strip()
-                if promptTemplate and not query.strip():
-                    templateData["processData"]["executionDependency"][
-                        "query"
-                    ] = promptTemplate
-                currentBatch.append(templateData)
-                if len(currentBatch) == 10:
-                    self.allBatches.append(currentBatch)
-                    currentBatch = []
+                ] = promptTemplate
-            if currentBatch:
+            currentBatch.append(templateData)
+            if len(currentBatch) == 10:
                 self.allBatches.append(currentBatch)
+                currentBatch = []
+        if currentBatch:
+            self.allBatches.append(currentBatch)
         for batch in tqdm(
             self.allBatches,
@@ -905,7 +908,8 @@ class LlumoClient:
         ):
             try:
                 self.postBatch(batch=batch, workspaceID=workspaceID)
-                time.sleep(3)
+                time.sleep(2)
+                # print(batch)
             except Exception as e:
                 print(f"Error posting batch: {e}")
                 raise
@@ -921,8 +925,8 @@ class LlumoClient:
         receivedRowIDs = {key for item in rawResults for key in item.keys()}
         expectedRowIDs = set(rowIdMapping.keys())
         missingRowIDs = expectedRowIDs - receivedRowIDs
-        # print("All expected keys:", expected_rowIDs)
-        # print("All received keys:", received_rowIDs)
+        # print("All expected keys:", expectedRowIDs)
+        # print("All received keys:", receivedRowIDs)
         # print("Missing keys:", len(missingRowIDs))
         missingRowIDs = list(missingRowIDs)
         if len(missingRowIDs) > 0:
@@ -930,102 +934,83 @@ class LlumoClient:
             rawResults.extend(dataFromDb)
         self.evalData = rawResults
+        # print("RAW RESULTS: ", self.evalData)
         # Initialize dataframe columns for each eval
-        for eval in evals:
-            dataframe[eval] = None
-            dataframe[f"{eval} Reason"] = None
+        for ev_name in evals:
+            dataframe[ev_name] = ""
+            dataframe[f"{ev_name} Reason"] = ""
+            # dataframe[f"{ev_name} EdgeCase"] = None
         # Map results to dataframe rows
         for item in rawResults:
             for compound_key, value in item.items():
-                if compound_key in rowIdMapping:
-                    index = rowIdMapping[compound_key]["index"]
-                    evalName = rowIdMapping[compound_key]["eval"]
-                    dataframe.at[index, evalName] = value.get("value")
-                    dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
-                    # Log the evaluation step
-                    if hasattr(self, "logEvalStep"):
-                        try:
-                            start_time = time.time()
-                            self.logEvalStep(
-                                stepName=f"EVAL-{evalName}",
-                                output=value.get("value"),
-                                context=row.get("context", ""),
-                                query=row.get("query", ""),
-                                messageHistory=row.get("messageHistory", ""),
-                                tools=row.get("tools", ""),
-                                intermediateSteps=row.get("intermediateSteps", ""),
-                                groundTruth=row.get("groundTruth", ""),
-                                analyticsScore=value.get("analyticsScore", {}),
-                                reasoning=value.get("reasoning", {}),
-                                classification=value.get("classification", {}),
-                                evalLabel=value.get("evalLabel", {}),
-                                latencyMs=int((time.time() - start_time) * 1000),
-                                status="SUCCESS",
-                                message="",
-                            )
-                        except Exception as e:
-                            print(f"Error logging eval step: {e}")
+                if compound_key not in rowIdMapping:
+                    continue
+                index = rowIdMapping[compound_key]["index"]
+                rowID, columnID, _ = compound_key.split("-", 2)
-        self.socket.disconnect()
-        if hasattr(self, "endLlumoRun"):
-            self.endLlumoRun()
+                if hasattr(self, "startLlumoRun"):
+                    self.startLlumoRun(runName="evaluateMultiple",rowID = rowID, columnID = columnID)
-        if createExperiment:
-            pd.set_option("future.no_silent_downcasting", True)
-            # df = dataframe.fillna("Some error occured").astype(object)
-            with warnings.catch_warnings():
-                warnings.simplefilter(action="ignore", category=FutureWarning)
-                df = dataframe.fillna("Some error occurred").astype(str)
+                # get the dataframe row at this index
+                row = dataframe.iloc[index].to_dict()
-            df = dataframe.fillna("Some error occured").infer_objects(copy=False)
-            if createPlayground(
-                email,
-                workspaceID,
-                df,
-                promptText=prompt_template,
-                definationMapping=self.definationMapping,
-                outputColName=outputColName,
-                activePlayground=activePlayground,
-                customAnalytics=customAnalytics,
-            ):
-                print(
-                    "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
-                )
-                if getDataFrame:
-                    return LlumoDataFrameResults(
-                        dataframe,
-                        evals=self.evals,
-                        evalData=self.evalData,
-                        definationMapping=self.definationMapping,
-                    )
-                else:
-                    data = dataframe.to_dict(orient="records")
-                    return LlumoDictResults(
-                        data,
-                        evals=self.evals,
-                        evalData=self.evalData,
-                        definationMapping=self.definationMapping,
-                    )
+                if not value:
+                    continue
-        else:
-            if getDataFrame:
-                return LlumoDataFrameResults(
-                    dataframe,
-                    evals=self.evals,
-                    evalData=self.evalData,
-                    definationMapping=self.definationMapping,
-                )
-            else:
-                data = dataframe.to_dict(orient="records")
-                return LlumoDictResults(
-                    data,
-                    evals=self.evals,
-                    evalData=self.evalData,
-                    definationMapping=self.definationMapping,
-                )
+                # ️ Handle fullEval block
+                fullEval = value.get("fullEval") if isinstance(value, dict) else None
+                if fullEval:
+                    if "evalMetrics" in fullEval and isinstance(fullEval["evalMetrics"], list):
+                        for eval_item in fullEval["evalMetrics"]:
+                            evalName = eval_item.get("evalName") or eval_item.get("kpiName")
+                            score = str(eval_item.get("score")) or eval_item.get("value")
+                            reasoning = eval_item.get("reasoning")
+                            # edgeCase = eval_item.get("edgeCase")
+                            if evalName:
+                                dataframe.at[index, evalName] = score
+                                dataframe.at[index, f"{evalName} Reason"] = reasoning
+                                # dataframe.at[index, f"{evalName} EdgeCase"] = edgeCase
+                                # logEvalStep if available
+                                if hasattr(self, "logEvalStep"):
+                                    try:
+                                        start_time = time.time()
+                                        self.logEvalStep(
+                                            stepName=f"EVAL-{evalName}",
+                                            output=row.get("output", ""),
+                                            context=row.get("context", ""),
+                                            query=row.get("query", ""),
+                                            messageHistory=row.get("messageHistory", ""),
+                                            tools=row.get("tools", ""),
+                                            intermediateSteps=row.get("intermediateSteps", ""),
+                                            groundTruth=row.get("groundTruth", ""),
+                                            analyticsScore=score,
+                                            reasoning=reasoning,
+                                            classification=eval_item.get("classification", {}),
+                                            evalLabel=eval_item.get("evalLabel", {}),
+                                            latencyMs=int((time.time() - start_time) * 1000),
+                                            status="SUCCESS",
+                                            message="",
+                                        )
+                                    except Exception as e:
+                                        print(f"⚠️ logEvalStep failed: {e}")
+            if hasattr(self, "endLlumoRun"):
+                self.endEvalRun()
+        # Clean up and finish
+        try:
+            self.socket.disconnect()
+        except Exception:
+            pass
+        # if hasattr(self, "endLlumoRun"):
+        #     self.endEvalRun()
+        #
+        return dataframe
     def promptSweep(
         self,
@@ -1806,8 +1791,8 @@ class LlumoClient:
         rowIdMapping = {}  # (rowID-columnID-columnID -> (index, evalName))
         self.validateApiKey(evalName=evals[0])
         if createExperiment:
-            if playgroundID:
-                activePlayground = playgroundID
+            if self.playgroundID:
+                activePlayground = self.playgroundID
             else:
                 activePlayground = str(
                     createEvalPlayground(email=self.email, workspaceID=self.workspaceID)

llumo/google.py CHANGED Viewed

@@ -15,9 +15,9 @@ def evaluate_multiple(data, api_key=None, evals=["Response Correctness"]):
 class ChatCompletionWithEval:
-    def __init__(self, response, evaluation):
+    def __init__(self, response, evaluation=None):
         self._response = response
-        self.evaluation = evaluation
+        # self.evaluation = evaluation
     def __getattr__(self, name):
         return getattr(self._response, name)
@@ -46,21 +46,21 @@ class genai:
             response = self._model_instance.generate_content(contents=contents, **kwargs)
             output = response.text
-            eval_input = [{
-                "query": contents,
-                "context": context or contents,
-                "output": output,
-            }]
+            # eval_input = [{
+            #     "query": contents,
+            #     "context": context or contents,
+            #     "output": output,
+            # }]
-            evaluation = None
-            try:
-                evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
-            except Exception as e:
-                evaluation = None
+            # evaluation = None
+            # try:
+            #     evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
+            # except Exception as e:
+            #     evaluation = None
-            if evaluation is None:
-                    print("Cannot process your request for evaluation, please check your api and try again later.")
-                    return response
+            # if evaluation is None:
+            #         print("Cannot process your request for evaluation, please check your api and try again later.")
+            #         return response
-            return ChatCompletionWithEval(response, evaluation)
+            return ChatCompletionWithEval(response, evaluation=None)

llumo/helpingFuntions.py CHANGED Viewed

@@ -130,7 +130,7 @@ def checkUserHits(
                 response = json.loads(responseBody.text)
                 proceed = response.get("execution", "")
-                print(proceed)
+                # print(proceed)
                 if proceed:
                     return {"success": True, "message": "Hits added and access granted."}
@@ -234,7 +234,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
     except Exception as e:
         pass
     evalDependencies =  checkDependency(_returnDepMapping=True,customevals=customAnalytics)
-    print(allEvals)
+    # print(allEvals)
     # Create a mapping of column names to unique column IDs
     columnIDMapping = {}

llumo 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

llumo 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl