PyPI - llumo - Versions diffs - 0.2.13b1__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

llumo 0.2.13b1py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

llumo/client.py +457 -141
llumo/exceptions.py +4 -0
llumo/helpingFuntions.py +127 -12
llumo/sockets.py +23 -10
{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/METADATA +2 -1
llumo-0.2.14.dist-info/RECORD +13 -0
llumo-0.2.13b1.dist-info/RECORD +0 -13
{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/WHEEL +0 -0
{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/licenses/LICENSE +0 -0
{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/top_level.txt +0 -0

llumo/client.py CHANGED Viewed

@@ -16,10 +16,20 @@ from .exceptions import LlumoAIError
 from .helpingFuntions import *
 from .sockets import LlumoSocketClient
 from .functionCalling import LlumoAgentExecutor
-postUrl = "https://app.llumo.ai/api/eval/run-multiple-column"
-fetchUrl = "https://app.llumo.ai/api/eval/fetch-rows-data-by-column"
+import threading
+from tqdm import tqdm
+postUrl = (
+    "https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
+)
+fetchUrl = (
+    "https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
+)
+socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
+# {
+#     "workspaceID":"c9191fdf33bdd7838328c1a0",
+#     "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
+# }
 validateUrl = "https://app.llumo.ai/api/workspace-details"
 socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
@@ -32,7 +42,7 @@ class LlumoClient:
         self.processMapping = {}
         self.definationMapping = {}
-    def validateApiKey(self, evalName=" "):
+    def validateApiKey(self, evalName="Input Bias"):
         headers = {
             "Authorization": f"Bearer {self.apiKey}",
             "Content-Type": "application/json",
@@ -41,13 +51,6 @@ class LlumoClient:
         try:
             response = requests.post(url=validateUrl, json=reqBody, headers=headers)
-            try:
-                response_preview = response.text[:500]  # First 500 chars
-                # print(f"Response preview: {response_preview}")
-            except Exception as e:
-                print(f"Could not get response preview: {e}")
         except requests.exceptions.RequestException as e:
             print(f"Request exception: {str(e)}")
@@ -82,14 +85,16 @@ class LlumoClient:
         try:
             self.hitsAvailable = data["data"]["data"].get("remainingHits", 0)
             self.workspaceID = data["data"]["data"].get("workspaceID")
-            self.evalDefinition = data["data"]["data"].get("analyticsMapping")
+            self.evalDefinition = data["data"]["data"]["analyticsMapping"]
             self.socketToken = data["data"]["data"].get("token")
             self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
             self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
-            self.subscriptionEndDate = data["data"]["data"].get("subscriptionEndDate", None)
+            self.subscriptionEndDate = data["data"]["data"].get(
+                "subscriptionEndDate", None
+            )
             self.email = data["data"]["data"].get("email", None)
-            self.definationMapping[evalName] = self.evalDefinition
+            self.definationMapping[evalName] = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, None)
         except Exception as e:
             # print(f"Error extracting data from response: {str(e)}")
@@ -159,13 +164,24 @@ class LlumoClient:
     # this function allows the users to run exactl one eval at a time
     def evaluate(
         self,
-        dataframe,
+        data,
         eval="Response Completeness",
         prompt_template="",
         outputColName="output",
         createExperiment: bool = False,
+        _tocheck=True,
     ):
+        # converting it into a pandas dataframe object
+        dataframe = pd.DataFrame(data)
+        # check for dependencies for the selected eval metric
+        metricDependencies = checkDependency(
+            eval, columns=list(dataframe.columns), tocheck=_tocheck
+        )
+        if metricDependencies["status"] == False:
+            raise LlumoAIError.dependencyError(metricDependencies["message"])
         results = {}
         try:
             socketID = self.socket.connect(timeout=150)
@@ -206,7 +222,7 @@ class LlumoClient:
             # if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
             #     raise LlumoAIError.InsufficientCredits()
-            evalDefinition = self.evalDefinition[eval]
+            evalDefinition = self.evalDefinition[eval].get("definition")
             model = "GPT_4"
             provider = "OPENAI"
             evalType = "LLM"
@@ -318,13 +334,13 @@ class LlumoClient:
             for cnt, batch in enumerate(self.allBatches):
                 try:
                     self.postBatch(batch=batch, workspaceID=workspaceID)
-                    # print("Betch Posted with item len: ", len(batch))
+                    print("Betch Posted with item len: ", len(batch))
                 except Exception as e:
                     continue
-                time.sleep(3)
+                # time.sleep(3)
             timeout = max(50, min(600, totalItems * 10))
@@ -367,7 +383,14 @@ class LlumoClient:
             pd.set_option("future.no_silent_downcasting", True)
             df = dataframe.fillna("Some error occured").astype(object)
-            if createPlayground(email, workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping,outputColName=outputColName):
+            if createPlayground(
+                email,
+                workspaceID,
+                df,
+                promptText=prompt_template,
+                definationMapping=self.definationMapping,
+                outputColName=outputColName,
+            ):
                 print(
                     "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
                 )
@@ -375,34 +398,10 @@ class LlumoClient:
             return dataframe
     # this function allows the users to run multiple evals at once
-    def evaluateMultiple(
-            self,
-            dataframe,
-            eval=["Response Completeness"],
-            prompt_template="",
-            outputColName="output",
-            createExperiment: bool = False,
-    ):
-        resultdf  = dataframe.copy()
-        for evalName in eval:
-            time.sleep(2)
-            resultdf = self.evaluate(dataframe = resultdf,eval=evalName,prompt_template=prompt_template,outputColName=outputColName,createExperiment = False)
-        if createExperiment:
-            pd.set_option("future.no_silent_downcasting", True)
-            df = resultdf.fillna("Some error occured").astype(object)
-            if createPlayground(self.email, self.workspaceID, df,definationMapping=self.definationMapping,outputColName=outputColName,promptText=prompt_template):
-                print(
-                    "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
-                )
-        else:
-            return resultdf
-    def evaluateCompressor(self, dataframe, prompt_template):
+    def evaluateCompressor(self, data, prompt_template):
         results = []
+        dataframe = pd.DataFrame(data)
         try:
             socketID = self.socket.connect(timeout=150)
             max_wait_secs = 20
@@ -411,7 +410,9 @@ class LlumoClient:
                 time.sleep(0.1)
                 waited_secs += 0.1
                 if waited_secs >= max_wait_secs:
-                    raise RuntimeError("Timeout waiting for server 'connection-established' event.")
+                    raise RuntimeError(
+                        "Timeout waiting for server 'connection-established' event."
+                    )
             try:
                 self.validateApiKey()
@@ -422,8 +423,14 @@ class LlumoClient:
                     print(f"Response content: {e.response.text[:500]}...")
                 raise
-            userHits = checkUserHits(self.workspaceID, self.hasSubscribed, self.trialEndDate, self.subscriptionEndDate,
-                                     self.hitsAvailable, len(dataframe))
+            userHits = checkUserHits(
+                self.workspaceID,
+                self.hasSubscribed,
+                self.trialEndDate,
+                self.subscriptionEndDate,
+                self.hitsAvailable,
+                len(dataframe),
+            )
             if not userHits["success"]:
                 raise LlumoAIError.InsufficientCredits(userHits["message"])
@@ -444,14 +451,22 @@ class LlumoClient:
                 if not all([ky in dataframe.columns for ky in keys]):
                     raise LlumoAIError.InvalidPromptTemplate()
-                activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
+                    "-", ""
+                )
                 rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
                 columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                compressed_prompt_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                compressed_prompt_output_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                compressed_prompt_id = (
+                    f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                )
+                compressed_prompt_output_id = (
+                    f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                )
                 cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
+                    "-", ""
+                )
                 rowDataDict = {}
                 for col in dataframe.columns:
@@ -471,7 +486,7 @@ class LlumoClient:
                         "compressed_prompt": compressed_prompt_id,
                         "compressed_prompt_output": compressed_prompt_output_id,
                         "cost": cost_id,
-                        "cost_saving": cost_saving_id
+                        "cost_saving": cost_saving_id,
                     },
                     "processData": {
                         "rowData": rowDataDict,
@@ -484,12 +499,12 @@ class LlumoClient:
                             "compressed_prompt": compressed_prompt_id,
                             "compressed_prompt_output": compressed_prompt_output_id,
                             "cost": cost_id,
-                            "cost_saving": cost_saving_id
-                        }
+                            "cost_saving": cost_saving_id,
+                        },
                     },
                     "workspaceID": workspaceID,
                     "email": email,
-                    "playgroundID": activePlayground
+                    "playgroundID": activePlayground,
                 }
                 rowIdMapping[rowID] = index
@@ -514,7 +529,12 @@ class LlumoClient:
             self.AllProcessMapping()
             timeout = max(60, min(600, total_items * 10))
-            self.socket.listenForResults(min_wait=20, max_wait=timeout, inactivity_timeout=30, expected_results=None)
+            self.socket.listenForResults(
+                min_wait=20,
+                max_wait=timeout,
+                inactivity_timeout=30,
+                expected_results=None,
+            )
             results = self.socket.getReceivedData()
             # results = self.finalResp(eval_results)
@@ -533,7 +553,7 @@ class LlumoClient:
         for records in results:
             for compound_key, value in records.items():
                 # for compound_key, value in item['data'].items():
-                rowID = compound_key.split('-')[0]
+                rowID = compound_key.split("-")[0]
                 # looking for the index of each rowID , in the original dataframe
                 if rowID in rowIdMapping:
                     index = rowIdMapping[rowID]
@@ -551,22 +571,259 @@ class LlumoClient:
         # dataframe["cost_saving"] = cost_saving
         return dataframe
-    def run_sweep(
-    self,
-    templates: List[str],
-    dataset: Dict[str, List[str]],
-    model_aliases: List[AVAILABLEMODELS],
-    apiKey: str,
-    eval=["Response Correctness"],
-    toEvaluate: bool = False,
-    createExperiment: bool = False,
-) -> pd.DataFrame:
+    def evaluateMultiple(
+        self,
+        data,
+        evals: list,  # list of eval metric names
+        prompt_template="",
+        outputColName="output",
+        createExperiment: bool = False,
+        _tocheck=True,
+    ):
+        dataframe = pd.DataFrame(data)
+        workspaceID = None
+        email = None
+        socketID = self.socket.connect(timeout=250)
+        self.allBatches = []
+        rowIdMapping = {}  # (rowID-columnID-columnID -> (index, evalName))
+        # Wait for socket connection
+        max_wait_secs = 20
+        waited_secs = 0
+        while not self.socket._connection_established.is_set():
+            time.sleep(0.1)
+            waited_secs += 0.1
+            if waited_secs >= max_wait_secs:
+                raise RuntimeError("Timeout waiting for server connection")
+        # Start listener thread
+        expectedResults = len(dataframe) * len(evals)
+        # print("expected result" ,expectedResults)
+        timeout = max(100, min(150, expectedResults * 10))
+        listener_thread = threading.Thread(
+            target=self.socket.listenForResults,
+            kwargs={
+                "min_wait": 40,
+                "max_wait": timeout,
+                "inactivity_timeout": 10,
+                "expected_results": expectedResults,
+            },
+            daemon=True,
+        )
+        listener_thread.start()
-        try:
-            self.validateApiKey()
-        except Exception as e:
-            raise Exception("Some error occurred, please check your API key")
+        activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
+                    "-", ""
+                )
+        for evalName in evals:
+            # print(f"\n======= Running evaluation for: {evalName} =======")
+            # Validate API and dependencies
+            self.validateApiKey(evalName=evalName)
+            metricDependencies = checkDependency(
+                evalName, list(dataframe.columns), tocheck=_tocheck
+            )
+            if not metricDependencies["status"]:
+                raise LlumoAIError.dependencyError(metricDependencies["message"])
+            evalDefinition = self.evalDefinition[evalName]["definition"]
+            model = "GPT_4"
+            provider = "OPENAI"
+            evalType = "LLM"
+            workspaceID = self.workspaceID
+            email = self.email
+            userHits = checkUserHits(
+                self.workspaceID,
+                self.hasSubscribed,
+                self.trialEndDate,
+                self.subscriptionEndDate,
+                self.hitsAvailable,
+                len(dataframe),
+            )
+            if not userHits["success"]:
+                raise LlumoAIError.InsufficientCredits(userHits["message"])
+            currentBatch = []
+            for index, row in dataframe.iterrows():
+                tools = [row["tools"]] if "tools" in dataframe.columns else []
+                groundTruth = row.get("groundTruth", "")
+                messageHistory = (
+                    [row["messageHistory"]]
+                    if "messageHistory" in dataframe.columns
+                    else []
+                )
+                promptTemplate = prompt_template
+                keys = re.findall(r"{{(.*?)}}", promptTemplate)
+                if not all([ky in dataframe.columns for ky in keys]):
+                    raise LlumoAIError.InvalidPromptTemplate()
+                inputDict = {key: row[key] for key in keys if key in row}
+                output = row.get(outputColName, "")
+                rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+                compoundKey = f"{rowID}-{columnID}-{columnID}"
+                rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
+                templateData = {
+                    "processID": getProcessID(),
+                    "socketID": socketID,
+                    "source": "SDK",
+                    "processData": {
+                        "executionDependency": {
+                            "query": "",
+                            "context": "",
+                            "output": output,
+                            "tools": tools,
+                            "groundTruth": groundTruth,
+                            "messageHistory": messageHistory,
+                        },
+                        "definition": evalDefinition,
+                        "model": model,
+                        "provider": provider,
+                        "analytics": evalName,
+                    },
+                    "workspaceID": workspaceID,
+                    "type": "EVAL",
+                    "evalType": evalType,
+                    "kpi": evalName,
+                    "columnID": columnID,
+                    "rowID": rowID,
+                    "playgroundID": activePlayground,
+                    "processType": "EVAL",
+                    "email": email,
+                }
+                query = ""
+                context = ""
+                for key, value in inputDict.items():
+                    if isinstance(value, str):
+                        length = len(value.split()) * 1.5
+                        if length > 50:
+                            context += f" {key}: {value}, "
+                        else:
+                            if promptTemplate:
+                                tempObj = {key: value}
+                                promptTemplate = getInputPopulatedPrompt(
+                                    promptTemplate, tempObj
+                                )
+                            else:
+                                query += f" {key}: {value}, "
+                if not context.strip():
+                    for key, value in inputDict.items():
+                        context += f" {key}: {value}, "
+                templateData["processData"]["executionDependency"][
+                    "context"
+                ] = context.strip()
+                templateData["processData"]["executionDependency"][
+                    "query"
+                ] = query.strip()
+                if promptTemplate and not query.strip():
+                    templateData["processData"]["executionDependency"][
+                        "query"
+                    ] = promptTemplate
+                currentBatch.append(templateData)
+                if len(currentBatch) == 10:
+                    self.allBatches.append(currentBatch)
+                    currentBatch = []
+            if currentBatch:
+                self.allBatches.append(currentBatch)
+        for batch in tqdm(
+            self.allBatches,
+            desc="Processing Batches",
+            unit="batch",
+            colour="magenta",
+            ascii=False,
+        ):
+            try:
+                self.postBatch(batch=batch, workspaceID=workspaceID)
+                time.sleep(3)
+            except Exception as e:
+                print(f"Error posting batch: {e}")
+                raise
+        # Wait for results
+        time.sleep(3)
+        listener_thread.join()
+        rawResults = self.socket.getReceivedData()
+        # print("data from db #####################",dataFromDb)
+        # Fix here: keep full keys, do not split keys
+        receivedRowIDs = {key for item in rawResults for key in item.keys()}
+        expectedRowIDs = set(rowIdMapping.keys())
+        missingRowIDs = expectedRowIDs - receivedRowIDs
+        # print("All expected keys:", expected_rowIDs)
+        # print("All received keys:", received_rowIDs)
+        # print("Missing keys:", len(missingRowIDs))
+        missingRowIDs=list(missingRowIDs)
+        if len(missingRowIDs) > 0:
+            dataFromDb=fetchData(workspaceID,activePlayground,missingRowIDs)
+            rawResults.extend(dataFromDb)
+        # Initialize dataframe columns for each eval
+        for eval in evals:
+            dataframe[eval] = None
+            dataframe[f"{eval} Reason"] = None
+        # Map results to dataframe rows
+        for item in rawResults:
+            for compound_key, value in item.items():
+                if compound_key in rowIdMapping:
+                    index = rowIdMapping[compound_key]["index"]
+                    evalName = rowIdMapping[compound_key]["eval"]
+                    dataframe.at[index, evalName] = value.get("value")
+                    dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
+        self.socket.disconnect()
+        if createExperiment:
+            pd.set_option("future.no_silent_downcasting", True)
+            df = dataframe.fillna("Some error occured").astype(object)
+            if createPlayground(
+                email,
+                workspaceID,
+                df,
+                promptText=prompt_template,
+                definationMapping=self.definationMapping,
+                outputColName=outputColName,
+            ):
+                print(
+                    "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
+                )
+        else:
+            return dataframe
+    def run_sweep(
+        self,
+        templates: List[str],
+        dataset: Dict[str, List[str]],
+        model_aliases: List[AVAILABLEMODELS],
+        apiKey: str,
+        evals=["Response Correctness"],
+        toEvaluate: bool = False,
+        createExperiment: bool = False,
+    ) -> pd.DataFrame:
+        self.validateApiKey(evalName=" ")
         workspaceID = self.workspaceID
         email = self.email
         executor = ModelExecutor(apiKey)
@@ -590,7 +847,9 @@ class LlumoClient:
                 for i, model in enumerate(model_aliases, 1):
                     try:
                         provider = getProviderFromModel(model)
-                        response = executor.execute(provider, model.value, prompt, apiKey)
+                        response = executor.execute(
+                            provider, model.value, prompt, apiKey
+                        )
                         outputKey = f"output_{i}"
                         row[outputKey] = response
                     except Exception as e:
@@ -598,25 +857,23 @@ class LlumoClient:
                 results.append(row)
         df = pd.DataFrame(results)
-        if toEvaluate==True:
+        if toEvaluate == True:
             dfWithEvals = df.copy()
-            for i, model in enumerate(model_aliases,1):
+            for i, model in enumerate(model_aliases, 1):
                 outputColName = f"output_{i}"
                 try:
                     res = self.evaluateMultiple(
-                        df,
-                        eval=eval,
+                        df.to_dict("records"),
+                        evals=evals,
                         prompt_template=str(templates[0]),
                         outputColName=outputColName,
+                        _tocheck=False,
                     )
                     # Rename all new columns with _i+1 (e.g., _1, _2)
-                    for evalMetric in eval:
+                    for evalMetric in evals:
                         scoreCol = f"{evalMetric}"
                         reasonCol = f"{evalMetric} Reason"
                         if scoreCol in res.columns:
@@ -625,7 +882,9 @@ class LlumoClient:
                             res = res.rename(columns={reasonCol: f"{reasonCol}_{i}"})
                     # Drop duplicated columns from df (like prompt, variables, etc.)
-                    newCols = [col for col in res.columns if col not in dfWithEvals.columns]
+                    newCols = [
+                        col for col in res.columns if col not in dfWithEvals.columns
+                    ]
                     dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
                 except Exception as e:
@@ -634,26 +893,35 @@ class LlumoClient:
             if createExperiment:
                 pd.set_option("future.no_silent_downcasting", True)
                 dfWithEvals = dfWithEvals.fillna("Some error occurred")
-                if createPlayground(email, workspaceID, dfWithEvals, promptText=templates[0]):
-                    print("Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
+                if createPlayground(
+                    email,
+                    workspaceID,
+                    dfWithEvals,
+                    promptText=templates[0],
+                    definationMapping=self.definationMapping,
+                ):
+                    print(
+                        "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
+                    )
             else:
                 return dfWithEvals
         else:
-            if createExperiment==True:
+            if createExperiment == True:
                 pd.set_option("future.no_silent_downcasting", True)
                 df = df.fillna("Some error occurred")
                 if createPlayground(email, workspaceID, df, promptText=templates[0]):
-                    print("Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
-            else :
+                    print(
+                        "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
+                    )
+            else:
                 return df
     # this function generates an output using llm and tools and evaluate that output
     def evaluateAgents(
         self,
-        dataframe,
+        data,
         model,
         agents,
         model_api_key=None,
@@ -664,31 +932,33 @@ class LlumoClient:
         if model.lower() not in ["openai", "google"]:
             raise ValueError("Model must be 'openai' or 'google'")
+        # converting into pandas dataframe object
+        dataframe = pd.DataFrame(data)
         # Run unified agent execution
         toolResponseDf = LlumoAgentExecutor.run(
             dataframe, agents, model=model, model_api_key=model_api_key
         )
-        # evals = [
-        #     "Tool Reliability",
-        #     "Stepwise Progression",
-        #     "Tool Selection Accuracy",
-        #     "Final Task Alignment",
-        # ]
-        for eval in evals:
-            # Perform evaluation
-            toolResponseDf = self.evaluate(
-                toolResponseDf,
-                eval=eval,
-                prompt_template=prompt_template,
-                createExperiment=False,
-            )
+        # for eval in evals:
+        # Perform evaluation
+        # toolResponseDf = self.evaluate(
+        #     toolResponseDf.to_dict(orient = "records"),
+        #     eval=eval,
+        #     prompt_template=prompt_template,
+        #     createExperiment=False,
+        # )
+        toolResponseDf = self.evaluateMultiple(
+            toolResponseDf.to_dict(orient="records"),
+            evals=evals,
+            prompt_template=prompt_template,
+            createExperiment=False,
+        )
         if createExperiment:
             pd.set_option("future.no_silent_downcasting", True)
             df = toolResponseDf.fillna("Some error occured")
-            if createPlayground(self.email, self.workspaceID, df):
+            if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
                 print(
                     "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
                 )
@@ -698,47 +968,49 @@ class LlumoClient:
     # this function evaluate that tools output given by the user
     def evaluateAgentResponses(
         self,
-        dataframe,
+        data,
         evals=["Final Task Alignment"],
         outputColName="output",
         createExperiment: bool = False,
     ):
+        dataframe = pd.DataFrame(data)
         try:
             if "query" and "messageHistory" and "tools" not in dataframe.columns:
                 raise ValueError(
                     "DataFrame must contain 'query', 'messageHistory','output' ,and 'tools' columns. Make sure the columns names are same as mentioned here."
                 )
-            prompt_template="Give answer for the given query: {{query}}"
-            # evals = [
-            #     "Tool Reliability",
-            #     "Stepwise Progression",
-            #     "Tool Selection Accuracy",
-            #     "Final Task Alignment",
-            # ]
             toolResponseDf = dataframe.copy()
-            for eval in evals:
-                # Perform evaluation
-                toolResponseDf = self.evaluate(
-                    toolResponseDf, eval=eval, prompt_template=prompt_template,outputColName=outputColName
-                )
-            return toolResponseDf
+            # for eval in evals:
+            #     # Perform evaluation
+            #     toolResponseDf = self.evaluate(
+            #         toolResponseDf.to_dict(orient = "records"), eval=eval, prompt_template="Give answer for the given query: {{query}}",outputColName=outputColName
+            #     )
+            toolResponseDf = self.evaluateMultiple(
+                toolResponseDf.to_dict(orient="records"),
+                evals=evals,
+                prompt_template="Give answer for the given query: {{query}}",
+                outputColName=outputColName,
+                createExperiment=createExperiment
+            )
+            if createExperiment:
+                pass
+            else:
+                return toolResponseDf
         except Exception as e:
             raise e
     def runDataStream(
         self,
-        dataframe,
+        data,
         streamName: str,
         queryColName: str = "query",
         createExperiment: bool = False,
     ):
         results = {}
+        dataframe = pd.DataFrame(data)
         try:
             socketID = self.socket.connect(timeout=150)
             # Ensure full connection before proceeding
@@ -753,16 +1025,11 @@ class LlumoClient:
                     )
             # print(f"Connected with socket ID: {socketID}")
             rowIdMapping = {}
-            try:
-                # print(f"Validating API key...")
-                self.validateApiKey()
-                # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
-            except Exception as e:
-                print(f"Error during API key validation: {str(e)}")
-                if hasattr(e, "response") and getattr(e, "response", None) is not None:
-                    print(f"Status code: {e.response.status_code}")
-                    print(f"Response content: {e.response.text[:500]}...")
-                raise
+            # print(f"Validating API key...")
+            self.validateApiKey()
+            # print(f"API key validation successful. Hits available: {self.hitsAvailable}")
             # check for available hits and trial limit
             userHits = checkUserHits(
                 self.workspaceID,
@@ -890,7 +1157,13 @@ class LlumoClient:
             pd.set_option("future.no_silent_downcasting", True)
             df = dataframe.fillna("Some error occured").astype(object)
-            if createPlayground(email, workspaceID, df,queryColName=queryColName, dataStreamName=streamId):
+            if createPlayground(
+                email,
+                workspaceID,
+                df,
+                queryColName=queryColName,
+                dataStreamName=streamId,
+            ):
                 print(
                     "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
                 )
@@ -910,6 +1183,49 @@ class LlumoClient:
         except Exception as e:
             raise "Some error ocuured please check your API key"
+    def upload(self, file_path):
+        workspaceID = None
+        email = None
+        try:
+            self.validateApiKey()
+        except Exception as e:
+            if hasattr(e, "response") and getattr(e, "response", None) is not None:
+                pass
+            raise
+        # Get file extension
+        _, ext = os.path.splitext(file_path)
+        ext = ext.lower()
+        # Supported formats
+        try:
+            if ext == ".csv":
+                df = pd.read_csv(file_path)
+            elif ext in [".xlsx", ".xls"]:
+                df = pd.read_excel(file_path)
+            elif ext == ".json":
+                df = pd.read_json(file_path, orient="records")
+            elif ext == ".parquet":
+                df = pd.read_parquet(file_path)
+            else:
+                raise ValueError(f"Unsupported file format: {ext}")
+            # If successfully loaded, call createPlayground
+            df = df.astype(str)
+            if createPlayground(self.email, self.workspaceID, df):
+                print(
+                    "Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
+                )
+                return True
+        except Exception as e:
+            print(f"Error: {e}")
 class SafeDict(dict):
     def __missing__(self, key):

llumo/exceptions.py CHANGED Viewed

@@ -46,6 +46,10 @@ class LlumoAIError(Exception):
     def modelHitsExhausted(details = "Your credits for the selected model exhausted."):
         return LlumoAIError(details)
+    @staticmethod
+    def dependencyError(details):
+        return LlumoAIError(details)
     # @staticmethod
     # def dateNotFound():
     #     return LlumoAIError("Trial end date or subscription end date not found for the given user.")

llumo/helpingFuntions.py CHANGED Viewed

@@ -212,7 +212,6 @@ def deleteColumnListInPlayground(workspaceID: str, playgroundID: str):
         print("❌ Error:", response.status_code, response.text)
         return None
 def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColName=None,outputColName= "output",dataStreamName=None,definationMapping=None):
     if len(dataframe) > 100:
         dataframe = dataframe.head(100)
@@ -238,7 +237,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
         columnIDMapping[col] = columnID
-        if col.startswith('output'):
+        if col.startswith('output') and promptText!=None:
             # For output columns, create the prompt template with promptText
             if promptText:
                 # Extract variables from promptText and set them as dependencies
@@ -277,7 +276,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
                     "order": indx,
                 }
-        elif col.startswith('Data '):
+        elif col.startswith('Data ') :
             if queryColName and dataStreamName:
                 dependencies = []
                 dependencies.append(columnIDMapping[queryColName])
@@ -291,7 +290,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
                     "type": "DATA_STREAM",
                     "order": indx}
-        elif col in allEvals:
+        elif col in allEvals and promptText!=None:
             dependencies = []
             variables = re.findall(r'{{(.*?)}}', promptText)
@@ -303,8 +302,8 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
                     dependencies.append(columnIDMapping[varName])
             dependencies.append(columnIDMapping[outputColName])  # Add the output column ID
-            longDef = definationMapping.get(col, {}).get(col, "")
-            shortDef ="You have run this from SDK"
+            longDef = definationMapping.get(col, {}).get('definition', "")
+            shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
             enum =  col.upper().replace(" ","_")
             template = {
@@ -312,7 +311,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
         col.lower().replace(" ","_")
       ],
       "evaluationMetric": "ALL",
-      "evaluationModel": "GEMINI_PRO",
+      "evaluationModel": "LLUMO_EVALLM",
       "selectPrompt": columnIDMapping[outputColName],
       "scoreCondition": "GREATER_THAN",
       "scoreValue": "50",
@@ -336,12 +335,13 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
       "analyticsENUM": enum,
       "prompt": shortDef,
       "analyticsName": col,
-      "columnID": str(uuid.uuid4().hex[:8]),
+      "columnID": columnID,
       "label": col,
       "order": indx
     }
+        elif col.endswith(' Reason') and promptText!=None:
+            continue
         else:
@@ -371,9 +371,25 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
         # For each column, we need to map the column ID to the corresponding value in the row
         for col in dataframe.columns:
-            columnID = columnIDMapping[col]  # Get the columnID from the mapping
-            row_dict[columnID] = row[col]  # Map the columnID to the value in the row
+            columnID = columnIDMapping[col]
+            if col in allEvals and promptText!=None:
+                row_dict[columnID] = {
+                    "value": row[col],
+                    "type": "EVAL",
+                    "isValid": True,
+                    "reasoning": row[col+" Reason"],
+                    "edgeCase": "minorHallucinationDetailNotInContext",
+                    "kpi": col
+                    }
+            elif col.endswith(' Reason') and promptText!=None:
+                continue
+            else:# Get the columnID from the mapping
+                row_dict[columnID] = row[col]
+            # row_dict[columnID] = row[col]  # Directly map the column ID to the row value
         # Add the row index (if necessary)
         row_dict["pIndex"] = indx
         rowTemplate["dataToUploadList"].append(row_dict)
@@ -430,6 +446,8 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
     payload1, payload2 = createColumn(
         workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName
     )
+ # Debugging line to check the payload2 structure
     deleteExistingRows = deleteColumnListInPlayground(
         workspaceID=workspaceID, playgroundID=playgroundId
     )
@@ -441,6 +459,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
 def getPlaygroundInsights(workspaceID: str, activePlayground: str):
     headers = {
@@ -490,4 +509,100 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
     else:
         print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
         return None
+def checkDependency(selectedEval, columns,tocheck=True):
+    """
+    Checks if all the required input columns for the selected evaluation metric are present.
+    Parameters:
+    - selectedEval (str): The name of the selected evaluation metric.
+    - columns (list): List of column names present in the dataset.
+    Raises:
+    - LlumoAIError.dependencyError: If any required column is missing.
+    """
+    if tocheck:
+    # Define required dependencies for each evaluation metric
+        metricDependencies = {
+            'Response Completeness': ['context', 'query', 'output'],
+            'Response Bias': ['output'],
+            'Response Harmfulness': ['output'],
+            'Input Toxicity': ['query'],
+            'Input Harmfulness': ['query'],
+            'Context Utilization': ['output', 'context'],
+            'Relevance Retention': ['context', 'query'],
+            'Semantic Cohesion': ['context'],
+            'Final Task Alignment': ['messageHistory'],
+            'Tool Reliability': ['messageHistory'],
+            'Response Correctness': ['output', 'query', 'context'],
+            'Response Toxicity': ['output'],
+            'Input Bias': ['query'],
+            'Input Relevancy': ['context', 'query'],
+            'Redundancy Reduction': ['context'],
+            'Response Sentiment': ['output'],
+            'Tool Selection Accuracy': ['tools', 'messageHistory'],
+            'Stepwise Progression': ['tools', 'messageHistory'],
+            'Hallucination': ['query', 'context', 'output'],
+            'Groundedness': ['groundTruth', 'output'],
+            'Memory Utilization': ['context', 'messageHistory'],
+            'Input Relevancy (Multi-turn)': ['context', 'query']
+        }
+        # Check if the selected evaluation metric is known
+        if selectedEval not in metricDependencies:
+            return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
+        # Get the required columns for the selected evaluation
+        columnsRequired = metricDependencies[selectedEval]
+        # Check if each required column is present in the provided columns
+        for requirement in columnsRequired:
+            if requirement not in columns:
+                return {"status":False,
+                    "message":f"'{selectedEval}' requires columns: {columnsRequired}. "
+                    f"Missing: '{requirement}'. Please ensure your data includes all required columns."
+                    }
+        return {"status":True,"message":"success"}
+    else:
+        return {"status":True,"message":"success"}
+def fetchData(workspaceID, playgroundID, missingList: list):
+    # Define the URL and prepare the payload
+    socket_data_url = "https://app.llumo.ai/api/eval/get-awaited"
+    payload = {
+        "workspaceID": workspaceID,
+        "playgroundID": playgroundID,
+        "missingList": missingList
+    }
+    try:
+        # Send a POST request to the API
+        response = requests.post(socket_data_url, json=payload)
+        # Check if the response is successful
+        if response.status_code == 200:
+            # Parse the JSON data from the response
+            data = response.json().get("data", {})
+            # Prepare the list of all data values in the desired format
+            result_list = []
+            for key, value in data.items():
+                # Create a dictionary for each item in the response data
+                result_list.append({
+                    key: {
+                        "value": value.get("value"),
+                        "reasoning": value.get("reasoning"),
+                        "edgeCase": value.get("edgeCase"),
+                        "kpi": value.get("kpi")
+                    }
+                })
+            return result_list
+        else:
+            print(f"Failed to fetch data. Status Code: {response.status_code}")
+            return []
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return []

llumo/sockets.py CHANGED Viewed

@@ -17,15 +17,16 @@ class LlumoSocketClient:
         # Initialize client
         self.sio = socketio.Client(
-            # logger=True,
-            # engineio_logger=True,
+            logger=False,
+            engineio_logger=False,
             reconnection=True,
-            reconnection_attempts=10,
+            reconnection_attempts=1,
             reconnection_delay=1,
         )
         @self.sio.on("connect")
         def on_connect():
+            self.sio.emit("ready")
             # print("Socket connection established")
             self._connected = True
             # Don't set connection_established yet - wait for server confirmation
@@ -37,21 +38,27 @@ class LlumoSocketClient:
             #     f"Server acknowledged connection with 'connection-established' event: {data}"
             # )
             if isinstance(data, dict) and "socketId" in data:
+                self.sio.emit("ready")
                 self.server_socket_id = data["socketId"]
                 # print(f"Received server socket ID: {self.server_socket_id}")
             self._connection_established.set()
         @self.sio.on("result-update")
-        def on_result_update(data):
+        def on_result_update(data, callback=None):
             with self._lock:
                 # print(f"Received result-update event: {data}")
                 self._received_data.append(data)
                 self._last_update_time = time.time()
                 # ✅ Stop if all expected results are received
-                if self._expected_results and len(self._received_data) >= self._expected_results:
+                if (
+                    self._expected_results
+                    and len(self._received_data) >= self._expected_results
+                ):
                     # print("✅ All expected results received.")
                     self._listening_done.set()
+            if callback:
+                callback(True)
         @self.sio.on("disconnect")
         def on_disconnect():
@@ -81,13 +88,17 @@ class LlumoSocketClient:
             start = time.time()
             while not self.sio.connected:
                 if time.time() - start > timeout:
-                    raise RuntimeError("Timed out waiting for low-level socket connection.")
+                    raise RuntimeError(
+                        "Timed out waiting for low-level socket connection."
+                    )
                 time.sleep(0.1)
             # print("[DEBUG] SocketIO low-level connection established.")
             # Wait for server "connection-established" event
             if not self._connection_established.wait(timeout):
-                raise RuntimeError("Timed out waiting for connection-established event.")
+                raise RuntimeError(
+                    "Timed out waiting for connection-established event."
+                )
             self._connected = True
             self._last_update_time = time.time()
@@ -100,10 +111,13 @@ class LlumoSocketClient:
             self._connected = False
             raise RuntimeError(f"WebSocket connection failed: {e}")
-    def listenForResults(self, min_wait=30, max_wait=300, inactivity_timeout=50, expected_results=None):
+    def listenForResults(
+        self, min_wait=30, max_wait=300, inactivity_timeout=50, expected_results=None
+    ):
         # if not self._connected:
         #     raise RuntimeError("WebSocket is not connected. Call connect() first.")
+        # total records
         self._expected_results = expected_results  # NEW
         start_time = time.time()
         self._last_update_time = time.time()
@@ -128,14 +142,13 @@ class LlumoSocketClient:
                     self._listening_done.set()
                     break
-                time.sleep(3)
         timeout_thread = threading.Thread(target=timeout_watcher, daemon=True)
         timeout_thread.start()
         self._listening_done.wait()
     def getReceivedData(self):
         with self._lock:
+            # print("Total received:", len(self._received_data))  # DEBUG
             return self._received_data.copy()
     def disconnect(self):

{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llumo
-Version: 0.2.13b1
+Version: 0.2.14
 Summary: Python SDK for interacting with the Llumo ai API.
 Home-page: https://www.llumo.ai/
 Author: Llumo
@@ -21,6 +21,7 @@ Requires-Dist: requests>=2.0.0
 Requires-Dist: python-socketio
 Requires-Dist: python-dotenv
 Requires-Dist: openai==1.75.0
+Requires-Dist: tqdm==4.67.1
 Requires-Dist: google-generativeai==0.8.5
 Dynamic: author
 Dynamic: author-email

llumo-0.2.14.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
+llumo/client.py,sha256=HpvUyucrGPbcPQMz_cTRDcEsBFpmNt8jfW1zJU4Nyss,46781
+llumo/exceptions.py,sha256=i3Qv4_g7XjRuho7-b7ybjw2bwSh_NhvICR6ZAgiLQX8,1944
+llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
+llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
+llumo/helpingFuntions.py,sha256=RgWok8DoE1R-Tc0kJ9B5En6LEUEk5EvQU8iJiGPbUsw,21911
+llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
+llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
+llumo-0.2.14.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
+llumo-0.2.14.dist-info/METADATA,sha256=B7NLMFRj8018jkWxEzKSSGlb2CS3d45rDtnywDh_4kc,1519
+llumo-0.2.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+llumo-0.2.14.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
+llumo-0.2.14.dist-info/RECORD,,

llumo-0.2.13b1.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
-llumo/client.py,sha256=pzmJkz5LRF3h1WgjmezNnJEUAZ9_5nF47eW489F9-y4,36026
-llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
-llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
-llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
-llumo/helpingFuntions.py,sha256=ah0FUQcRV3gfguvjQQ_aZzq59hpJttqAPJdjJVNYdFc,17110
-llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
-llumo/sockets.py,sha256=0BCcdCaiXDR7LO_9NIYA6urtpgdmyWW2M1US67G9Eus,5583
-llumo-0.2.13b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
-llumo-0.2.13b1.dist-info/METADATA,sha256=Kyb0OFYTsOosmZ6Rcok4LNgWqVsUldzjeXmnw2vOnGA,1493
-llumo-0.2.13b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-llumo-0.2.13b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
-llumo-0.2.13b1.dist-info/RECORD,,

{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

llumo 0.2.13b1__py3-none-any.whl → 0.2.14__py3-none-any.whl

llumo 0.2.13b1py3-none-any.whl → 0.2.14py3-none-any.whl