PyPI - llumo - Versions diffs - 0.2.29__tar.gz → 0.2.31__tar.gz - Mend

llumo 0.2.29tar.gz → 0.2.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{llumo-0.2.29/llumo.egg-info → llumo-0.2.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llumo
-Version: 0.2.29
+Version: 0.2.31
 Summary: Python SDK for interacting with the Llumo ai API.
 Home-page: https://www.llumo.ai/
 Author: Llumo

{llumo-0.2.29 → llumo-0.2.31}/llumo/callback.py RENAMED Viewed

@@ -11,11 +11,12 @@ import re
 class LlumoCallbackHandler(BaseCallbackHandler):
-    def __init__(self, session: LlumoSessionContext = None):
+    def __init__(self, session: LlumoSessionContext = None,agentType = "react_agent"):
         if session is None:
             raise ValueError("LlumoSessionContext is required")
         self.sessionLogger = session
+        self.agentType = agentType
         # Initialize timing and state variables
         self.llmStartTime = None
@@ -26,6 +27,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         # Initialize tracking variables
         self.prompt = ""
+        self.searchQuery = ""
         self.currentToolName = None
         self.currentToolInput = None
         self.currentAgentName = None
@@ -45,8 +47,14 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         self.currentObservation = ""
         self.isAgentExecution = False
     def on_chain_start(self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any) -> None:
         """Called when a chain starts - this includes agent execution"""
+        # print("ON CHAIN START: ",inputs)
+        # print("ON CHAIN START: serialized",serialized)
+        # print("ON CHAIN START: kwargs",kwargs)
         try:
             self.prompt = inputs.get("input", "")
             self.chainStartTime = time.time()
@@ -62,6 +70,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
             self.currentToolInput = None
             self.hasErrors = False
             self.lastError = None
+            self.toolDescription = {}
             # Dynamically detect agent name from serialized data
             if serialized is not None:
@@ -91,6 +100,26 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         except Exception as e:
             print(f"[ERROR] in on_chain_start: {e}")
+        try:
+            self.sessionLogger.logQueryStep(
+                stepName = "Query Invocation",
+                model = "unknown",
+                provider = "unknown",
+                inputTokens = round(len(self.prompt.split()) * 1.5),
+                query = self.prompt,
+                status = "SUCCESS"
+            )
+        except Exception as e:
+            self.sessionLogger.logQueryStep(
+                stepName="Query Invocation",
+                model="unknown",
+                provider="unknown",
+                inputTokens=0,
+                query="",
+                status="FAILURE"
+            )
+            print(f"[ERROR] Failed to log user input: {e}")
     def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
         """Called when a chain ends"""
         try:
@@ -98,15 +127,16 @@ class LlumoCallbackHandler(BaseCallbackHandler):
                 # Use logAgentStep for final completion
                 self.sessionLogger.logAgentStep(
                     stepName="Agent Execution Completed",
-                    agentType="react_agent",
+                    agentType=self.agentType,
                     agentName=self.currentAgentName or "unknown",
                     numStepsTaken=self.agentsSteps,
                     tools=self.toolsUsed,
                     query=self.prompt,
                     status="SUCCESS",
-                    message=f"Final output: {outputs['output']}. ReAct steps: {json.dumps(self.reactSteps)}",
+                    # message=f"Final output: {outputs['output']}. ReAct steps: {json.dumps(self.reactSteps)}",
                 )
             # Reset execution state after chain ends
             self.isAgentExecution = False
@@ -118,6 +148,14 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         self.llmStartTime = time.time()
         self.stepTime = time.time()
+        if self.prompt == "":
+            match = re.search(r"Human:\s*(.*)",prompts[0], re.DOTALL)
+            if match:
+                user_question = match.group(1).strip()
+                self.prompt = user_question # 👉 What is LangChain?
+            else:
+                self.prompt = ""
         # Dynamically get model info
         model = "unknown"
         if serialized and "kwargs" in serialized:
@@ -204,7 +242,6 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         # Parse ReAct reasoning from LLM output if we're in agent execution
         if self.isAgentExecution and output:
             self._parse_react_reasoning(output)
         try:
             self.sessionLogger.logLlmStep(
                 stepName="LLM Call Completed",
@@ -212,17 +249,62 @@ class LlumoCallbackHandler(BaseCallbackHandler):
                 provider=self.llmProvider,
                 inputTokens=int(input_tokens),
                 outputTokens=int(output_tokens),
-                temperature=float(kwargs.get("temperature", 0.7)),
-                promptTruncated=False,
+                # temperature=float(kwargs.get("temperature", 0.7)),
+                # promptTruncated=False,
                 latencyMs=duration_ms,
-                query=str(self.prompt),
+                prompt=str(self.prompt),
                 output=output,
                 status=status,
-                message=error_message if status == "ERROR" else "",
+                # message=error_message if status == "ERROR" else "",
             )
         except Exception as e:
             print(f"[ERROR] Failed to log LLM end: {e}")
+    def on_retriever_start(self, serialized, query, run_id, parent_run_id=None, **kwargs):
+        self.prompt = query
+        self.searchQuery = query
+    def on_retriever_end(self, documents, run_id, parent_run_id=None, **kwargs):
+        try:
+            chunkSize = len(documents[0].page_content) if documents and documents[0].page_content else 0
+        except Exception:
+            chunkSize = 0
+        source = ( kwargs.get("metadata", {}).get("source") or kwargs.get("tags") or "unknown")
+        try:
+            self.sessionLogger.logRetrieverStep(
+                stepName="Context Retrieval Complete",
+                retrieverSource = str(source),
+                topK = len(documents),
+                chunkSize =  chunkSize,
+                context =  [doc.page_content for doc in documents],
+                searchQuery =  self.prompt if self.prompt != "" else self.searchQuery,
+                latencyMs = 120,  # mock latency, replace with real timing if needed
+                status = "SUCCESS"
+             )
+        except Exception as e:
+            print(f"[ERROR] Failed to log chain output: {e}")
+    def on_retriever_error(self, error, run_id, parent_run_id=None, **kwargs):
+        try:
+            self.sessionLogger.logRetrieverStep(
+                stepName="Context Retrieval Error",
+                retrieverSource = kwargs.get("metadata", {}).get("source", "unknown"),
+                topK = 0,
+                chunkSize =  0,
+                context = [],
+                searchQuery =  self.prompt if self.prompt != "" else self.searchQuery,
+                latencyMs = 0,  # mock latency, replace with real timing if needed
+                status = "FAILURE"
+             )
+        except Exception as e:
+            print(f"[ERROR] Failed to log chain output: {e}")
     def _parse_react_reasoning(self, llm_output: str):
         """Parse ReAct reasoning pattern from LLM output"""
         try:
@@ -265,6 +347,9 @@ class LlumoCallbackHandler(BaseCallbackHandler):
     def on_tool_start(self, serialized: Dict[str, Any], input_str: str, **kwargs: Any) -> None:
         """Called when a tool starts executing"""
+        # print("ON TOOL START: ",serialized)
+        # print("ON TOOL START: ",kwargs)
         self.toolStartTime = time.time()
         self.stepTime = time.time()
@@ -272,6 +357,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
         self.currentToolName = (serialized.get("name") or
                                 serialized.get("_type") or
                                 "unknown")
+        self.currentToolDescription = serialized.get("description","No description found")
         # Handle the case where input_str is "None" or None
         if input_str == "None" or input_str is None:
@@ -316,13 +402,13 @@ class LlumoCallbackHandler(BaseCallbackHandler):
             self.sessionLogger.logToolStep(
                 stepName="Tool Execution Completed",
                 toolName=self.currentToolName or "unknown",
+                description = self.currentToolDescription,
                 input=self.currentToolInput or {"input": ""},
                 output=output_str,
                 latencyMs=duration_ms,
                 status="SUCCESS",
-                message="",
+                # message="",
             )
             print(f"[DEBUG] Tool completed: {self.currentToolName} -> {output_str}")
         except Exception as e:
@@ -331,7 +417,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
     def on_agent_action(self, action: AgentAction, **kwargs: Any) -> None:
         """Called when an agent takes an action"""
         self.agentsSteps += 1
-        print("ON AGENT ACTION: ", action)
+        # print("ON AGENT ACTION: ", action)
         try:
             # Dynamically extract information from action
@@ -352,52 +438,54 @@ class LlumoCallbackHandler(BaseCallbackHandler):
                 })
             # Log the agent action step using logAgentStep
-            current_status = "ERROR" if self.hasErrors else "SUCCESS"
+            current_status = "FAILURE" if self.hasErrors else "SUCCESS"
             reasoning_text = self.currentThought if self.currentThought else "No reasoning captured"
             self.sessionLogger.logAgentStep(
                 stepName=f"Agent Action Step {self.agentsSteps}",
-                agentType="react_agent",
+                agentType=self.agentType,
                 agentName=self.currentAgentName or "unknown",
                 numStepsTaken=self.agentsSteps,
                 tools=[tool_name],
                 query=self.prompt,
                 status=current_status,
-                message=f"Executing {tool_name} with input: {tool_input}. Reasoning: {reasoning_text}",
+                # message=f"Executing {tool_name} with input: {tool_input}. Reasoning: {reasoning_text}",
             )
         except Exception as e:
             print(f"[ERROR] Failed to log agent action: {e}")
     def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
         """Called when an agent completes execution"""
-        print("ON AGENT FINISH:", finish)
+        # print("ON AGENT FINISH:", finish)
         # We don't need to log anything here since the final result is already logged in on_chain_end
         pass
     def on_agent_error(self, error: Exception, **kwargs: Any) -> None:
         """Called when an agent encounters an error"""
-        print("ITS A AGENT ERROR:", error)
+        # print("ITS A AGENT ERROR:", error)
         self.hasErrors = True
         self.lastError = str(error)
         try:
             self.sessionLogger.logAgentStep(
                 stepName="Agent Execution Error",
-                agentType="react_agent",
+                agentType=self.agentType,
                 agentName=self.currentAgentName or "unknown",
                 numStepsTaken=self.agentsSteps,
                 tools=self.toolsUsed,
                 query=self.prompt,
-                status="ERROR",
-                message=str(error),
+                status="FAILURE",
+                # message=str(error),
             )
         except Exception as e:
             print(f"[ERROR] Failed to log agent error: {e}")
     def on_tool_error(self, error: Exception, **kwargs: Any) -> None:
         """Called when a tool encounters an error"""
-        print("ITS A TOOL ERROR:", error)
         self.hasErrors = True
         self.lastError = str(error)
@@ -410,18 +498,19 @@ class LlumoCallbackHandler(BaseCallbackHandler):
             self.sessionLogger.logToolStep(
                 stepName="Tool Execution Failed",
                 toolName=self.currentToolName or "unknown",
+                description=self.currentToolDescription,
                 input=self.currentToolInput or {"input": ""},
                 output="",
                 latencyMs=0,
-                status="ERROR",
-                message=str(error),
+                status="FAILURE",
+                # message=str(error),
             )
         except Exception as e:
             print(f"[ERROR] Failed to log tool error: {e}")
     def on_chain_error(self, error: Exception, **kwargs: Any) -> None:
         """Called when a chain encounters an error"""
-        print("ITS A CHAIN ERROR:", error)
+        # print("ITS A CHAIN ERROR:", error)
         self.hasErrors = True
         self.lastError = str(error)
@@ -430,14 +519,17 @@ class LlumoCallbackHandler(BaseCallbackHandler):
                 # Use logAgentStep for agent-related chain errors
                 self.sessionLogger.logAgentStep(
                     stepName="Agent Chain Error",
-                    agentType="react_agent",
+                    agentType=self.agentType,
                     agentName=self.currentAgentName or "unknown",
                     numStepsTaken=self.agentsSteps,
                     tools=self.toolsUsed,
                     query=self.prompt,
-                    status="ERROR",
-                    message=str(error),
+                    status="FAILURE",
+                    # message=str(error),
                 )
             else:
                 # Use logLlmStep for general chain errors
                 self.sessionLogger.logLlmStep(
@@ -446,13 +538,13 @@ class LlumoCallbackHandler(BaseCallbackHandler):
                     provider=self.llmProvider,
                     inputTokens=0,
                     outputTokens=0,
-                    temperature=0.0,
-                    promptTruncated=False,
+                    # temperature=0.0,
+                    # promptTruncated=False,
                     latencyMs=0,
-                    query=self.prompt,
+                    prompt=self.prompt,
                     output="",
-                    status="ERROR",
-                    message=str(error),
+                    status="FAILURE",
+                    # message=str(error),
                 )
         except Exception as e:
             print(f"[ERROR] Failed to log chain error: {e}")

{llumo-0.2.29 → llumo-0.2.31}/llumo/client.py RENAMED Viewed

@@ -29,10 +29,7 @@ fetchUrl = (
     "https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
 )
 socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
-# {
-#     "workspaceID":"c9191fdf33bdd7838328c1a0",
-#     "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
-# }
 validateUrl = "https://app.llumo.ai/api/workspace-details"
 socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
@@ -79,6 +76,7 @@ class LlumoClient:
         # Try to parse JSON
         try:
             data = response.json()
+            # print(data)
         except ValueError as e:
             print(f"JSON parsing error: {str(e)}")
             # print(f"Response content that could not be parsed: {response.text[:1000]}...")
@@ -93,7 +91,9 @@ class LlumoClient:
             self.workspaceID = data["data"]["data"].get("workspaceID")
             self.evalDefinition = data["data"]["data"]["analyticsMapping"]
             self.socketToken = data["data"]["data"].get("token")
-            self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
+            # print(self.socketToken)
+            self.hasSubscribed = data["data"]["data"].get("hasSubscr"
+                                                          "ibed", False)
             self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
             self.subscriptionEndDate = data["data"]["data"].get(
                 "subscriptionEndDate", None
@@ -191,7 +191,7 @@ class LlumoClient:
     def postBatch(self, batch, workspaceID):
         payload = {
             "batch": json.dumps(batch),
-            "runType": "EVAL",
+            "runType": "FULL_EVAL_RUN",
             "workspaceID": workspaceID,
         }
         # socketToken here if the "JWD" token
@@ -204,6 +204,7 @@ class LlumoClient:
             response = requests.post(postUrl, json=payload, headers=headers)
             # print(f"Post API Status Code: {response.status_code}")
             # print(response.text)
+            # print(response.status_code)
         except Exception as e:
             print(f"Error in posting batch: {e}")
@@ -644,7 +645,7 @@ class LlumoClient:
             self.socket.listenForResults(
                 min_wait=20,
                 max_wait=timeout,
-                inactivity_timeout=30,
+                inactivity_timeout=50,
                 expected_results=None,
             )
@@ -701,13 +702,11 @@ class LlumoClient:
         data,
         evals: list,
         prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
-        outputColName="output",
-        createExperiment: bool = False,
         getDataFrame: bool = False,
         _tocheck=True,
     ):
-        if hasattr(self, "startLlumoRun"):
-            self.startLlumoRun(runName="evaluateMultiple")
+        # if hasattr(self, "startLlumoRun"):
+        #     self.startLlumoRun(runName="evaluateMultiple")
         if isinstance(data, dict):
             data = [data]
         elif not isinstance(data, list):
@@ -743,9 +742,9 @@ class LlumoClient:
         listener_thread = threading.Thread(
             target=self.socket.listenForResults,
             kwargs={
-                "min_wait": 40,
+                "min_wait": 20,
                 "max_wait": timeout,
-                "inactivity_timeout": 10,
+                "inactivity_timeout": 35,
                 "expected_results": expectedResults,
             },
             daemon=True,
@@ -753,148 +752,152 @@ class LlumoClient:
         listener_thread.start()
         self.validateApiKey(evalName=evals[0])
         activePlayground = self.playgroundID
-        for evalName in evals:
-            # print(f"\n======= Running evaluation for: {evalName} =======")
+        # print(f"\n======= Running evaluation for: {evalName} =======")
+        # Validate API and dependencies
+        # self.validateApiKey(evalName=evals[0])
+        customAnalytics = getCustomAnalytics(self.workspaceID)
+        # metricDependencies = checkDependency(
+        #     evalName,
+        #     list(dataframe.columns),
+        #     tocheck=_tocheck,
+        #     customevals=customAnalytics,
+        # )
+        # if not metricDependencies["status"]:
+        #     raise LlumoAIError.dependencyError(metricDependencies["message"])
-            # Validate API and dependencies
-            self.validateApiKey(evalName=evalName)
-            customAnalytics = getCustomAnalytics(self.workspaceID)
-            metricDependencies = checkDependency(
-                evalName,
-                list(dataframe.columns),
-                tocheck=_tocheck,
-                customevals=customAnalytics,
-            )
-            if not metricDependencies["status"]:
-                raise LlumoAIError.dependencyError(metricDependencies["message"])
+        # evalDefinition = self.evalDefinition[evalName]["definition"]
+        model = "GPT_4"
+        provider = "OPENAI"
+        evalType = "LLM"
+        workspaceID = self.workspaceID
+        email = self.email
+        # categories = self.categories
+        # evaluationStrictness = self.evaluationStrictness
+        # grammarCheckOutput = self.grammarCheckOutput
+        # insightLength = self.insightsLength
+        # numJudges = self.numJudges
+        # penaltyBonusInstructions = self.penaltyBonusInstructions
+        # probableEdgeCases = self.probableEdgeCases
+        # fieldMapping = self.fieldMapping
-            evalDefinition = self.evalDefinition[evalName]["definition"]
-            model = "GPT_4"
-            provider = "OPENAI"
-            evalType = "LLM"
-            workspaceID = self.workspaceID
-            email = self.email
-            categories = self.categories
-            evaluationStrictness = self.evaluationStrictness
-            grammarCheckOutput = self.grammarCheckOutput
-            insightLength = self.insightsLength
-            numJudges = self.numJudges
-            penaltyBonusInstructions = self.penaltyBonusInstructions
-            probableEdgeCases = self.probableEdgeCases
-            fieldMapping = self.fieldMapping
+        userHits = checkUserHits(
+            self.workspaceID,
+            self.hasSubscribed,
+            self.trialEndDate,
+            self.subscriptionEndDate,
+            self.hitsAvailable,
+            len(dataframe),
+        )
+        if not userHits["success"]:
+            raise LlumoAIError.InsufficientCredits(userHits["message"])
-            userHits = checkUserHits(
-                self.workspaceID,
-                self.hasSubscribed,
-                self.trialEndDate,
-                self.subscriptionEndDate,
-                self.hitsAvailable,
-                len(dataframe),
+        currentBatch = []
+        for index, row in dataframe.iterrows():
+            tools = [row["tools"]] if "tools" in dataframe.columns else []
+            groundTruth = row.get("groundTruth", "")
+            messageHistory = (
+                [row["messageHistory"]]
+                if "messageHistory" in dataframe.columns
+                else []
             )
-            if not userHits["success"]:
-                raise LlumoAIError.InsufficientCredits(userHits["message"])
+            promptTemplate = prompt_template
+            keys = re.findall(r"{{(.*?)}}", promptTemplate)
-            currentBatch = []
-            for index, row in dataframe.iterrows():
-                tools = [row["tools"]] if "tools" in dataframe.columns else []
-                groundTruth = row.get("groundTruth", "")
-                messageHistory = (
-                    [row["messageHistory"]]
-                    if "messageHistory" in dataframe.columns
-                    else []
-                )
-                promptTemplate = prompt_template
-                keys = re.findall(r"{{(.*?)}}", promptTemplate)
+            if not all([ky in dataframe.columns for ky in keys]):
+                raise LlumoAIError.InvalidPromptTemplate()
-                if not all([ky in dataframe.columns for ky in keys]):
-                    raise LlumoAIError.InvalidPromptTemplate()
+            inputDict = {key: row[key] for key in keys if key in row}
+            # output = row.get(outputColName, "")
+            output = row.get("output","")
+            intermediateSteps = row.get("intermediateSteps", "")
-                inputDict = {key: row[key] for key in keys if key in row}
-                output = row.get(outputColName, "")
-                intermediateSteps = row.get("intermediateSteps", "")
+            rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+            columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
-                columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
+            compoundKey = f"{rowID}-{columnID}-{columnID}"
+            # rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
+            rowIdMapping[compoundKey] = {"index": index}
-                compoundKey = f"{rowID}-{columnID}-{columnID}"
-                rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
-                templateData = {
-                    "processID": getProcessID(),
-                    "socketID": socketID,
-                    "rowID": rowID,
-                    "columnID": columnID,
-                    "processType": "EVAL",
-                    "evalType": evalType,
-                    "workspaceID": workspaceID,
-                    "email": email,
-                    "playgroundID": activePlayground,
-                    "source": "SDK",
-                    "processData": {
-                        "analyticsName": evalName,
-                        "definition": evalDefinition,
-                        "executionDependency": {
-                            "Query": "",
-                            "Context": "",
-                            "Output": output,
-                            "Tools": tools,
-                            "GroundTruth": groundTruth,
-                            "MessageHistory": messageHistory,
-                            "IntermediateSteps": intermediateSteps,
-                        },
-                        "categories": categories,
-                        "evaluationStrictness": evaluationStrictness,
-                        "grammarCheckOutput": grammarCheckOutput,
-                        "insightLength": insightLength,
-                        "numJudges": numJudges,
-                        "penaltyBonusInstructions": penaltyBonusInstructions,
-                        "probableEdgeCases": probableEdgeCases,
-                        "model": model,
-                        "provider": provider,
+            templateData = {
+                "processID": getProcessID(),
+                "socketID": socketID,
+                "rowID": rowID,
+                "columnID": columnID,
+                "processType": "FULL_EVAL_RUN",
+                "evalType": "LLM",
+                "workspaceID": workspaceID,
+                "email": email,
+                "playgroundID": activePlayground,
+                "source": "SDK",
+                "processData": {
+                    # "analyticsName": evalName,
+                    # "definition": evalDefinition,
+                    "executionDependency": {
+                        "query": "",
+                        "context": "",
+                        "output": output,
+                        "tools": tools,
+                        "groundTruth": groundTruth,
+                        "messageHistory": messageHistory,
+                        "intermediateSteps": intermediateSteps,
                     },
-                    "type": "EVAL",
-                    "kpi": evalName,
-                    "fieldMappig": fieldMapping,
-                }
-                query = ""
-                context = ""
-                for key, value in inputDict.items():
-                    if isinstance(value, str):
-                        length = len(value.split()) * 1.5
-                        if length > 50:
-                            context += f" {key}: {value}, "
-                        else:
-                            if promptTemplate:
-                                tempObj = {key: value}
-                                promptTemplate = getInputPopulatedPrompt(
-                                    promptTemplate, tempObj
-                                )
-                            else:
-                                query += f" {key}: {value}, "
+                    "evallist":evals,
+                    # "model": model,
+                    # "provider": provider,
+                    "sessionID":self.sessionID
+                    # "categories": categories,
+                    # "evaluationStrictness": evaluationStrictness,
+                    # "grammarCheckOutput": grammarCheckOutput,
+                    # "insightLength": insightLength,
+                    # "numJudges": numJudges,
+                    # "penaltyBonusInstructions": penaltyBonusInstructions,
+                    # "probableEdgeCases": probableEdgeCases,
+                },
+                "type": "FULL_EVAL_RUN",
+                # "kpi": evalName,
+                # "fieldMappig": fieldMapping,
+            }
-                if not context.strip():
-                    for key, value in inputDict.items():
+            query = ""
+            context = ""
+            for key, value in inputDict.items():
+                if isinstance(value, str):
+                    length = len(value.split()) * 1.5
+                    if length > 50:
                         context += f" {key}: {value}, "
+                    else:
+                        if promptTemplate:
+                            tempObj = {key: value}
+                            promptTemplate = getInputPopulatedPrompt(
+                                promptTemplate, tempObj
+                            )
+                        else:
+                            query += f" {key}: {value}, "
-                templateData["processData"]["executionDependency"][
-                    "context"
-                ] = context.strip()
+            if not context.strip():
+                for key, value in inputDict.items():
+                    context += f" {key}: {value}, "
+            templateData["processData"]["executionDependency"][
+                "context"
+            ] = context.strip()
+            templateData["processData"]["executionDependency"][
+                "query"
+            ] = query.strip()
+            if promptTemplate and not query.strip():
                 templateData["processData"]["executionDependency"][
                     "query"
-                ] = query.strip()
-                if promptTemplate and not query.strip():
-                    templateData["processData"]["executionDependency"][
-                        "query"
-                    ] = promptTemplate
-                currentBatch.append(templateData)
-                if len(currentBatch) == 10:
-                    self.allBatches.append(currentBatch)
-                    currentBatch = []
+                ] = promptTemplate
-            if currentBatch:
+            currentBatch.append(templateData)
+            if len(currentBatch) == 10:
                 self.allBatches.append(currentBatch)
+                currentBatch = []
+        if currentBatch:
+            self.allBatches.append(currentBatch)
         for batch in tqdm(
             self.allBatches,
@@ -905,7 +908,8 @@ class LlumoClient:
         ):
             try:
                 self.postBatch(batch=batch, workspaceID=workspaceID)
-                time.sleep(3)
+                time.sleep(2)
+                # print(batch)
             except Exception as e:
                 print(f"Error posting batch: {e}")
                 raise
@@ -921,8 +925,8 @@ class LlumoClient:
         receivedRowIDs = {key for item in rawResults for key in item.keys()}
         expectedRowIDs = set(rowIdMapping.keys())
         missingRowIDs = expectedRowIDs - receivedRowIDs
-        # print("All expected keys:", expected_rowIDs)
-        # print("All received keys:", received_rowIDs)
+        # print("All expected keys:", expectedRowIDs)
+        # print("All received keys:", receivedRowIDs)
         # print("Missing keys:", len(missingRowIDs))
         missingRowIDs = list(missingRowIDs)
         if len(missingRowIDs) > 0:
@@ -930,102 +934,83 @@ class LlumoClient:
             rawResults.extend(dataFromDb)
         self.evalData = rawResults
+        # print("RAW RESULTS: ", self.evalData)
         # Initialize dataframe columns for each eval
-        for eval in evals:
-            dataframe[eval] = None
-            dataframe[f"{eval} Reason"] = None
+        for ev_name in evals:
+            dataframe[ev_name] = ""
+            dataframe[f"{ev_name} Reason"] = ""
+            # dataframe[f"{ev_name} EdgeCase"] = None
         # Map results to dataframe rows
         for item in rawResults:
             for compound_key, value in item.items():
-                if compound_key in rowIdMapping:
-                    index = rowIdMapping[compound_key]["index"]
-                    evalName = rowIdMapping[compound_key]["eval"]
-                    dataframe.at[index, evalName] = value.get("value")
-                    dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
-                    # Log the evaluation step
-                    if hasattr(self, "logEvalStep"):
-                        try:
-                            start_time = time.time()
-                            self.logEvalStep(
-                                stepName=f"EVAL-{evalName}",
-                                output=value.get("value"),
-                                context=row.get("context", ""),
-                                query=row.get("query", ""),
-                                messageHistory=row.get("messageHistory", ""),
-                                tools=row.get("tools", ""),
-                                intermediateSteps=row.get("intermediateSteps", ""),
-                                groundTruth=row.get("groundTruth", ""),
-                                analyticsScore=value.get("analyticsScore", {}),
-                                reasoning=value.get("reasoning", {}),
-                                classification=value.get("classification", {}),
-                                evalLabel=value.get("evalLabel", {}),
-                                latencyMs=int((time.time() - start_time) * 1000),
-                                status="SUCCESS",
-                                message="",
-                            )
-                        except Exception as e:
-                            print(f"Error logging eval step: {e}")
+                if compound_key not in rowIdMapping:
+                    continue
+                index = rowIdMapping[compound_key]["index"]
+                rowID, columnID, _ = compound_key.split("-", 2)
-        self.socket.disconnect()
-        if hasattr(self, "endLlumoRun"):
-            self.endLlumoRun()
+                if hasattr(self, "startLlumoRun"):
+                    self.startLlumoRun(runName="evaluateMultiple",rowID = rowID, columnID = columnID)
-        if createExperiment:
-            pd.set_option("future.no_silent_downcasting", True)
-            # df = dataframe.fillna("Some error occured").astype(object)
-            with warnings.catch_warnings():
-                warnings.simplefilter(action="ignore", category=FutureWarning)
-                df = dataframe.fillna("Some error occurred").astype(str)
+                # get the dataframe row at this index
+                row = dataframe.iloc[index].to_dict()
-            df = dataframe.fillna("Some error occured").infer_objects(copy=False)
-            if createPlayground(
-                email,
-                workspaceID,
-                df,
-                promptText=prompt_template,
-                definationMapping=self.definationMapping,
-                outputColName=outputColName,
-                activePlayground=activePlayground,
-                customAnalytics=customAnalytics,
-            ):
-                print(
-                    "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
-                )
-                if getDataFrame:
-                    return LlumoDataFrameResults(
-                        dataframe,
-                        evals=self.evals,
-                        evalData=self.evalData,
-                        definationMapping=self.definationMapping,
-                    )
-                else:
-                    data = dataframe.to_dict(orient="records")
-                    return LlumoDictResults(
-                        data,
-                        evals=self.evals,
-                        evalData=self.evalData,
-                        definationMapping=self.definationMapping,
-                    )
+                if not value:
+                    continue
-        else:
-            if getDataFrame:
-                return LlumoDataFrameResults(
-                    dataframe,
-                    evals=self.evals,
-                    evalData=self.evalData,
-                    definationMapping=self.definationMapping,
-                )
-            else:
-                data = dataframe.to_dict(orient="records")
-                return LlumoDictResults(
-                    data,
-                    evals=self.evals,
-                    evalData=self.evalData,
-                    definationMapping=self.definationMapping,
-                )
+                # ️ Handle fullEval block
+                fullEval = value.get("fullEval") if isinstance(value, dict) else None
+                if fullEval:
+                    if "evalMetrics" in fullEval and isinstance(fullEval["evalMetrics"], list):
+                        for eval_item in fullEval["evalMetrics"]:
+                            evalName = eval_item.get("evalName") or eval_item.get("kpiName")
+                            score = str(eval_item.get("score")) or eval_item.get("value")
+                            reasoning = eval_item.get("reasoning")
+                            # edgeCase = eval_item.get("edgeCase")
+                            if evalName:
+                                dataframe.at[index, evalName] = score
+                                dataframe.at[index, f"{evalName} Reason"] = reasoning
+                                # dataframe.at[index, f"{evalName} EdgeCase"] = edgeCase
+                                # logEvalStep if available
+                                if hasattr(self, "logEvalStep"):
+                                    try:
+                                        start_time = time.time()
+                                        self.logEvalStep(
+                                            stepName=f"EVAL-{evalName}",
+                                            output=row.get("output", ""),
+                                            context=row.get("context", ""),
+                                            query=row.get("query", ""),
+                                            messageHistory=row.get("messageHistory", ""),
+                                            tools=row.get("tools", ""),
+                                            intermediateSteps=row.get("intermediateSteps", ""),
+                                            groundTruth=row.get("groundTruth", ""),
+                                            analyticsScore=score,
+                                            reasoning=reasoning,
+                                            classification=eval_item.get("classification", {}),
+                                            evalLabel=eval_item.get("evalLabel", {}),
+                                            latencyMs=int((time.time() - start_time) * 1000),
+                                            status="SUCCESS",
+                                            message="",
+                                        )
+                                    except Exception as e:
+                                        print(f"⚠️ logEvalStep failed: {e}")
+            if hasattr(self, "endLlumoRun"):
+                self.endEvalRun()
+        # Clean up and finish
+        try:
+            self.socket.disconnect()
+        except Exception:
+            pass
+        # if hasattr(self, "endLlumoRun"):
+        #     self.endEvalRun()
+        #
+        return dataframe
     def promptSweep(
         self,
@@ -1806,8 +1791,8 @@ class LlumoClient:
         rowIdMapping = {}  # (rowID-columnID-columnID -> (index, evalName))
         self.validateApiKey(evalName=evals[0])
         if createExperiment:
-            if playgroundID:
-                activePlayground = playgroundID
+            if self.playgroundID:
+                activePlayground = self.playgroundID
             else:
                 activePlayground = str(
                     createEvalPlayground(email=self.email, workspaceID=self.workspaceID)

{llumo-0.2.29 → llumo-0.2.31}/llumo/helpingFuntions.py RENAMED Viewed

@@ -130,7 +130,7 @@ def checkUserHits(
                 response = json.loads(responseBody.text)
                 proceed = response.get("execution", "")
-                print(proceed)
+                # print(proceed)
                 if proceed:
                     return {"success": True, "message": "Hits added and access granted."}
@@ -234,7 +234,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
     except Exception as e:
         pass
     evalDependencies =  checkDependency(_returnDepMapping=True,customevals=customAnalytics)
-    print(allEvals)
+    # print(allEvals)
     # Create a mapping of column names to unique column IDs
     columnIDMapping = {}

{llumo-0.2.29 → llumo-0.2.31}/llumo/llumoSessionContext.py RENAMED Viewed

@@ -51,8 +51,21 @@ class LlumoSessionContext(LlumoClient):
     def __exit__(self, excType, excVal, excTb):
         self.end()
-    def startLlumoRun(self, runName: str):
-        LlumoRunID = str(uuid.uuid4().hex[:16])
+    def startLlumoRun(self, runName: str, rowID: str = "", columnID: str = "", runID: str = None):
+        if runID is None:
+            LlumoRunID = str(uuid.uuid4().hex[:16])
+        else:
+            LlumoRunID = runID
+        # Proceed with using LlumoRunID, rowID, columnID...
+        # if  rowID =="":
+        #     rowID = str(uuid.uuid4().hex[:16])
+        # if columnID == "":
+        #     columnID = str(uuid.uuid4().hex[:16])
         currentTime = datetime(2025, 8, 2, 10, 20, 15, tzinfo=timezone.utc)
         createdAt = currentTime.strftime("%Y-%m-%dT%H:%M:%S.000Z")
         llumoRun = {
@@ -62,8 +75,8 @@ class LlumoSessionContext(LlumoClient):
             "playgroundID": self.logger.getPlaygroundID(),
             "workspaceID": self.logger.getWorkspaceID(),
             "source": "SDK",
-            "rowID": "",
-            "columnID": "",
+            "rowID": rowID,
+            "columnID": columnID,
             "email": self.logger.getUserEmailID(),
             "createdAt": createdAt,
             "createdBy": self.logger.getUserEmailID(),
@@ -96,12 +109,52 @@ class LlumoSessionContext(LlumoClient):
         # STEP 3: Send the payload
         url = "https://app.llumo.ai/api/create-debug-log"
         headers = {
-            "Authorization": f"Bearer {self.apiKey}",
+            "Authorization": f"Bearer {self.logger.getWorkspaceID()}",
             "Content-Type": "application/json",
         }
         try:
-            response = requests.post(url, headers=headers, json=run, timeout=10)
+            print(run)
+            response = requests.post(url, headers=headers, json=run, timeout=20)
+            response.raise_for_status()
+            # print(response.json())
+        except requests.exceptions.Timeout:
+            # print("Request timed out.")
+            pass
+        except requests.exceptions.RequestException as e:
+            pass
+        # Cleanup
+        if self.threadLlumoRun:
+            _ctxLlumoRun.reset(self.threadLlumoRun)
+            self.threadLlumoRun = None
+    def endEvalRun(self):
+        run = getLlumoRun()
+        if run is None:
+            return
+        # STEP 1: Sort steps by timestamp
+        steps = run.get("steps", [])
+        # sorted_steps = sorted(steps, key=lambda s: s.get("timestamp", 0))
+        # # STEP 2: Remove timestamp from each step before sending
+        # clean_steps = [
+        #     {k: v for k, v in step.items() if k != "timestamp"} for step in sorted_steps
+        # ]
+        # run["steps"] = clean_steps
+        # print(run["runName"])  # optional debug log
+        # STEP 3: Send the payload
+        url = "https://backend-api.llumo.ai/api/v1/create-debug-log-for-sdk"
+        headers = {
+            "Authorization": f"Bearer {self.logger.getWorkspaceID()}",
+            "Content-Type": "application/json",
+        }
+        # print(run)
+        try:
+            response = requests.post(url, headers=headers, json={"log":run}, timeout=20)
             response.raise_for_status()
             # print(response.json())
         except requests.exceptions.Timeout:
@@ -145,51 +198,52 @@ class LlumoSessionContext(LlumoClient):
         provider: str,
         inputTokens: int,
         outputTokens: int,
-        temperature: float,
-        promptTruncated: bool,
+        # temperature: float,
+        # promptTruncated: bool,
         latencyMs: int,
-        query: str,
+        prompt: str,
         output: str,
         status: str,
-        message: str,
+        # message: str,
     ):
         metadata = {
             "model": model,
             "provider": provider,
             "inputTokens": inputTokens,
             "outputTokens": outputTokens,
-            "temperature": temperature,
-            "promptTruncated": promptTruncated,
+            # "temperature": temperature,
+            # "promptTruncated": promptTruncated,
             "latencyMs": latencyMs,
-            "query": query,
+            "prompt": prompt,
             "output": output,
             "status": status,
-            "message": message,
+            # "message": message,
         }
         self.logStep("LLM", stepName, metadata)
     def logRetrieverStep(
         self,
         stepName: str,
         retrieverSource: str,
-        queryVectorType: str,
         topK: int,
-        matchedIDs: List[str],
-        query: str,
+        chunkSize,
+        context : str,
+        searchQuery: str,
         latencyMs: int,
-        status: str,
-        message: str,
+        status: str
     ):
         metadata = {
             "retrieverSource": retrieverSource,
-            "queryVectorType": queryVectorType,
             "topK": topK,
-            "matchedIDs": matchedIDs,
-            "query": query,
+            "chunkSize":chunkSize,
+            "context": context,
+            "searchQuery": searchQuery,
             "latencyMs": latencyMs,
             "status": status,
-            "message": message,
+            # "message": message,
         }
         self.logStep("RETRIEVER", stepName, metadata)
     def logAgentStep(
@@ -201,7 +255,7 @@ class LlumoSessionContext(LlumoClient):
         tools: List[str],
         query: str,
         status: str,
-        message: str,
+        # message: str,
     ):
         metadata = {
             "agentType": agentType,
@@ -210,8 +264,8 @@ class LlumoSessionContext(LlumoClient):
             "tools": tools,
             "query": query,
             "status": status,
-            "message": message,
-        }
+        #     "message": message,
+         }
         self.logStep("AGENT", stepName, metadata)
     def logToolSelectorStep(
@@ -222,7 +276,7 @@ class LlumoSessionContext(LlumoClient):
         selectedTool: str,
         reasoning: str,
         status: str,
-        message: str,
+        # message: str,
     ):
         metadata = {
             "selectorType": selectorType,
@@ -230,7 +284,7 @@ class LlumoSessionContext(LlumoClient):
             "selectedTool": selectedTool,
             "reasoning": reasoning,
             "status": status,
-            "message": message,
+            # "message": message,
         }
         self.logStep("TOOL_SELECTOR", stepName, metadata)
@@ -238,19 +292,21 @@ class LlumoSessionContext(LlumoClient):
         self,
         stepName: str,
         toolName: str,
+        description: str,
         input: Dict[str, Any],
         output: str,
         latencyMs: int,
         status: str,
-        message: str,
+        # message: str,
     ):
         metadata = {
             "toolName": toolName,
+            "description":description,
             "input": input,
             "output": output,
             "latencyMs": latencyMs,
             "status": status,
-            "message": message,
+            # "message": message,
         }
         self.logStep("TOOL", stepName, metadata)
@@ -364,3 +420,16 @@ class LlumoSessionContext(LlumoClient):
             "message": message,
         }
         self.logStep("CUSTOM_SCRIPT", stepName, metadata)
+    def logQueryStep(self,stepName,model,provider,inputTokens,query,status):
+        metadata = {
+            "model": model,
+            "provider": provider,
+            "inputTokens": inputTokens,
+            "query": query,
+            "status":status
+        }
+        self.logStep("QUERY", stepName, metadata)

{llumo-0.2.29 → llumo-0.2.31}/llumo/openai.py RENAMED Viewed

@@ -12,8 +12,6 @@ def performEvaluation(data, api_key=None, evals=["Response Correctness"], **kwar
         results = client.evaluateMultiple(
             data,
             evals=evals,
-            createExperiment=kwargs.get("createExperiment", False),
-            playgroundID=kwargs.get("playgroundID"),
             prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
             getDataFrame=False,
         )
@@ -138,6 +136,13 @@ class OpenAI(OpenAIClient):
             response = original_create(*args, **kwargs)
             latency = int((time.time() - start_time) * 1000)
             output_text = response.choices[0].message.content
+            self.session.logQueryStep(
+                stepName="Query Invocation",
+                model=model,
+                provider="openai",
+                inputTokens=response.usage.prompt_tokens,
+                query=user_message,
+                status = "SUCCESS")
             self.session.logLlmStep(
                 stepName=f"LLM-{user_message[:30]}",
@@ -145,13 +150,13 @@ class OpenAI(OpenAIClient):
                 provider="openai",
                 inputTokens=response.usage.prompt_tokens,
                 outputTokens=response.usage.completion_tokens,
-                temperature=kwargs.get("temperature", 0.0),
-                promptTruncated=False,
+                # temperature=kwargs.get("temperature", 0.0),
+                # promptTruncated=False,
                 latencyMs=latency,
-                query=user_message,
+                prompt=user_message,
                 output=output_text,
                 status="SUCCESS",
-                message="",
+                # message="",
             )
             # Response Correctness Evaluation

{llumo-0.2.29 → llumo-0.2.31/llumo.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llumo
-Version: 0.2.29
+Version: 0.2.31
 Summary: Python SDK for interacting with the Llumo ai API.
 Home-page: https://www.llumo.ai/
 Author: Llumo