PyPI - qtype - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

qtype 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

qtype/interpreter/converters.py CHANGED Viewed

@@ -2,37 +2,16 @@
 from __future__ import annotations
-from collections.abc import AsyncIterator
-from typing import Any, cast
+from pathlib import Path
+from typing import Any
+import fsspec
 import pandas as pd
 from pydantic import BaseModel
 from qtype.interpreter.types import FlowMessage, Session
-from qtype.semantic.model import Flow
-async def dataframe_to_flow_messages(
-    df: pd.DataFrame, session: Session
-) -> AsyncIterator[FlowMessage]:
-    """
-    Convert a DataFrame to an async generator of FlowMessages.
-    Each row in the DataFrame becomes a FlowMessage with the same session.
-    Args:
-        df: DataFrame where each row represents one set of inputs
-        session: Session object to use for all messages
-    Yields:
-        FlowMessages, one per DataFrame row
-    """
-    # Use to_dict with orient='records' - much faster than iterrows
-    # This returns a list of dicts directly without Series overhead
-    records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
-    for record in records:
-        yield FlowMessage(session=session, variables=record)
+from qtype.interpreter.typing import convert_dict_to_typed_variables
+from qtype.semantic.model import Flow, Variable
 def flow_messages_to_dataframe(
@@ -77,3 +56,140 @@ def flow_messages_to_dataframe(
         results.append(row_data)
     return pd.DataFrame(results)
+def read_dataframe_from_file(
+    file_path: str,
+) -> pd.DataFrame:
+    """
+    Read a file into a pandas DataFrame.
+    Automatically detects file format based on MIME type and supports both
+    local and remote files via fsspec. Returns raw DataFrame without type
+    conversion.
+    Args:
+        file_path: Path to the file (local or remote, e.g., s3://bucket/file)
+    Returns:
+        DataFrame with data from the file
+    Raises:
+        ValueError: If file format is not supported or mime type detection fails
+        FileNotFoundError: If file does not exist
+    Supported formats:
+        - CSV (.csv)
+        - JSON (.json)
+        - JSONL (.jsonl, JSON Lines)
+        - Parquet (.parquet)
+        - Excel (.xlsx, .xls)
+    Examples:
+        >>> # Read CSV
+        >>> df = read_dataframe_from_file("data.csv")
+        >>>
+        >>> # Read from S3
+        >>> df = read_dataframe_from_file("s3://bucket/data.parquet")
+    """
+    import magic
+    ext_to_mime = {
+        ".csv": "text/csv",
+        ".json": "application/json",
+        ".jsonl": "application/jsonlines",
+        ".parquet": "application/vnd.parquet",
+        ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        ".xls": "application/vnd.ms-excel",
+    }
+    # Detect MIME type - handle both local and remote files
+    # For remote files, we'll need to download a sample first
+    if file_path.startswith(("http://", "https://", "s3://", "gs://")):
+        # For remote files, infer from extension as fallback
+        extension = Path(file_path).suffix.lower()
+        # Map extensions to mime types
+        mime_type = ext_to_mime.get(extension, "application/octet-stream")
+    else:
+        # Local file - use magic to detect mime type
+        try:
+            mime_type = magic.Magic(mime=True).from_file(file_path)
+        except Exception as e:
+            # Fallback to extension-based detection
+            extension = Path(file_path).suffix.lower()
+            mime_type = ext_to_mime.get(extension, "application/octet-stream")
+            if mime_type == "application/octet-stream":
+                raise ValueError(
+                    f"Could not determine file type for {file_path}: {e}"
+                )
+    # Open file with fsspec (supports local and remote files)
+    with fsspec.open(file_path, "rb") as file_handle:
+        # Read based on MIME type
+        if mime_type == "text/csv" or mime_type == "text/plain":
+            df = pd.read_csv(file_handle)  # type: ignore[arg-type]
+        elif mime_type in ["application/json", "application/jsonlines"]:
+            # Check if it's JSONL by extension
+            if Path(file_path).suffix.lower() == ".jsonl":
+                df = pd.read_json(
+                    file_handle,  # type: ignore[arg-type]
+                    lines=True,
+                )
+            else:
+                df = pd.read_json(file_handle)  # type: ignore[arg-type]
+        elif mime_type in [
+            "application/vnd.parquet",
+            "application/octet-stream",
+        ]:
+            # Parquet is often detected as octet-stream
+            df = pd.read_parquet(file_handle)  # type: ignore[arg-type]
+        elif mime_type in [
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            "application/vnd.ms-excel",
+        ]:
+            df = pd.read_excel(file_handle)  # type: ignore[arg-type]
+        else:
+            raise ValueError(
+                f"Unsupported MIME type for file {file_path}: {mime_type}"
+            )
+    return df
+def dataframe_to_flow_messages(
+    df: pd.DataFrame,
+    variables: list[Variable],
+    session: Session = Session(session_id="default"),
+) -> list[FlowMessage]:
+    """
+    Convert a DataFrame to FlowMessages with type conversion.
+    Each row in the DataFrame becomes a FlowMessage with variables converted
+    to their proper types based on the Variable definitions.
+    Args:
+        df: DataFrame with raw data
+        variables: List of Variable definitions for type conversion
+        session: Session to use for all FlowMessages (default: Session(session_id="default"))
+    Returns:
+        List of FlowMessages, one per row, with typed variables
+    Examples:
+        >>> from qtype.semantic.model import Variable
+        >>> from qtype.base.types import PrimitiveTypeEnum
+        >>> import pandas as pd
+        >>>
+        >>> df = pd.DataFrame({"age": ["30"], "score": ["95.5"]})
+        >>> vars = [
+        ...     Variable(id="age", type=PrimitiveTypeEnum.int),
+        ...     Variable(id="score", type=PrimitiveTypeEnum.float),
+        ... ]
+        >>> messages = dataframe_to_flow_messages(df, vars)
+    """
+    messages = []
+    for row_dict in df.to_dict(orient="records"):
+        typed_vars = convert_dict_to_typed_variables(row_dict, variables)
+        messages.append(FlowMessage(session=session, variables=typed_vars))
+    return messages

qtype/interpreter/executors/agent_executor.py CHANGED Viewed

@@ -112,9 +112,8 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
         # Convert input variables to chat messages
         inputs = []
         for input_var in self.step.inputs:
-            value = message.variables.get(input_var.id)
-            if value and isinstance(value, ChatMessage):
-                inputs.append(to_chat_message(value))
+            value = message.get_variable(input_var.id)
+            inputs.append(to_chat_message(value))
         # Get session ID for memory isolation
         session_id = message.session.session_id

qtype/interpreter/executors/aggregate_executor.py CHANGED Viewed

@@ -11,10 +11,9 @@ class AggregateExecutor(BatchedStepExecutor):
     """
     Executor for the Aggregate step.
-    This is a terminal, many-to-one operation that reduces an entire stream
-    to a single summary message containing counts of successful and failed
-    messages. It processes all messages without modification during the
-    processing phase, then emits a single aggregate summary during finalization.
+    A step that, after all messages have been processed,
+    returns a single message containing the counts of successful and failed
+    messages. Other messages are passed through unchanged.
     """
     def __init__(

qtype/interpreter/executors/construct_executor.py CHANGED Viewed

@@ -4,7 +4,7 @@ from qtype.dsl.model import ListType
 from qtype.interpreter.base.base_step_executor import StepExecutor
 from qtype.interpreter.base.executor_context import ExecutorContext
 from qtype.interpreter.types import FlowMessage
-from qtype.interpreter.typing import instantiate_variable
+from qtype.interpreter.typing import convert_dict_to_typed_variables
 from qtype.semantic.model import Construct
@@ -43,26 +43,26 @@ class ConstructExecutor(StepExecutor):
                 isinstance(output_var.type, ListType)
                 or len(self.step.inputs) == 1
             ):
-                inputs = message.variables[self.step.inputs[0].id]
+                # Single input: pass value directly
+                data = {
+                    output_var.id: message.variables[self.step.inputs[0].id]
+                }
             elif hasattr(output_var.type, "model_validate"):
                 # This is a custom type (Pydantic model)
-                # So input should be a dict
-                input_values = {
-                    input_var.id: message.variables[input_var.id]
-                    for input_var in self.step.inputs
-                }
-                # use the mapping to convert variable names to
-                inputs = {
-                    self.step.field_mapping.get(var_name, var_name): value  # type: ignore[attr-defined]
-                    for var_name, value in input_values.items()
+                # field_bindings maps type field names to Variables
+                data = {
+                    output_var.id: {
+                        field_name: message.variables[var.id]
+                        for field_name, var in self.step.field_bindings.items()
+                    }
                 }
             else:
                 raise ValueError(
                     "Construct step must have either a single input or output of a custom type."
                 )
-            constructed_value = instantiate_variable(output_var, inputs)
-            yield message.copy_with_variables(
-                {output_var.id: constructed_value}
-            )
+            # Use convert_dict_to_typed_variables to validate and convert
+            result = convert_dict_to_typed_variables(data, self.step.outputs)
+            yield message.copy_with_variables(result)
         except Exception as e:
             yield message.copy_with_error(self.step.id, e)

qtype/interpreter/executors/doc_to_text_executor.py CHANGED Viewed

@@ -49,9 +49,7 @@ class DocToTextConverterExecutor(StepExecutor):
         try:
             # Get the input document
-            if input_id not in message.variables:
-                raise ValueError(f"Input variable '{input_id}' is missing")
-            doc = message.variables.get(input_id)
+            doc = message.get_variable(input_id)
             if not isinstance(doc, RAGDocument):
                 raise ValueError(
                     f"Input variable '{input_id}' must be a RAGDocument"

qtype/interpreter/executors/field_extractor_executor.py CHANGED Viewed

@@ -111,15 +111,11 @@ class FieldExtractorExecutor(StepExecutor):
             Multiple messages may be yielded if JSONPath matches multiple values.
         """
         input_id = self.step.inputs[0].id
-        output_id = self.step.outputs[0].id
+        output_var = self.step.outputs[0]
         try:
             # Get the input value
-            input_value = message.variables.get(input_id)
-            if input_value is None:
-                raise ValueError(
-                    f"Input variable '{input_id}' is not set or is None"
-                )
+            input_value = message.get_variable(input_id)
             await self.stream_emitter.status(
                 f"Extracting fields using JSONPath: {self.step.json_path}"
@@ -132,17 +128,20 @@ class FieldExtractorExecutor(StepExecutor):
             matches = self.jsonpath_expr.find(input_dict)
             if not matches:
-                if self.step.fail_on_missing:
+                if output_var.optional:
+                    # Yield message with None output
+                    await self.stream_emitter.status(
+                        "JSONPath matched 0 value(s)"
+                    )
+                    yield message.copy_with_variables({output_var.id: None})
+                    return
+                else:
                     raise ValueError(
                         (
                             f"JSONPath expression '{self.step.json_path}' "
                             f"did not match any data in input"
                         )
                     )
-                else:
-                    # Yield message with None output
-                    yield message.copy_with_variables({output_id: None})
-                    return
             await self.stream_emitter.status(
                 f"JSONPath matched {len(matches)} value(s)"
@@ -156,7 +155,9 @@ class FieldExtractorExecutor(StepExecutor):
                 output_value = self._construct_output(extracted_data)
                 # Yield message with the constructed output
-                yield message.copy_with_variables({output_id: output_value})
+                yield message.copy_with_variables(
+                    {output_var.id: output_value}
+                )
         except Exception as e:
             # Emit error event to stream so frontend can display it

qtype/interpreter/executors/file_source_executor.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from __future__ import annotations
-from pathlib import Path
 from typing import AsyncIterator
-import fsspec
-import pandas as pd
 from qtype.interpreter.base.base_step_executor import StepExecutor
 from qtype.interpreter.base.executor_context import ExecutorContext
+from qtype.interpreter.converters import (
+    dataframe_to_flow_messages,
+    read_dataframe_from_file,
+)
 from qtype.interpreter.types import FlowMessage
 from qtype.semantic.model import ConstantPath, FileSource
@@ -37,8 +37,6 @@ class FileSourceExecutor(StepExecutor):
         Yields:
             FlowMessages with the results of processing.
         """
-        output_columns = {output.id for output in self.step.outputs}
         # get the path
         if isinstance(self.step.path, ConstantPath):  # type: ignore[attr-defined]
             file_path = self.step.path  # type: ignore[attr-defined]
@@ -55,30 +53,16 @@ class FileSourceExecutor(StepExecutor):
                 f"Reading file from path: {file_path}"
             )
-        # Determine file format from extension
+        # Get file path as string
         file_path_str = (
             file_path.uri if isinstance(file_path, ConstantPath) else file_path
         )
-        extension = Path(file_path_str).suffix.lower()
-        # Use fsspec to open the file and read with pandas
-        with fsspec.open(file_path_str, "rb") as file_handle:
-            if extension == ".csv":
-                df = pd.read_csv(file_handle)  # type: ignore[arg-type]
-            elif extension == ".parquet":
-                df = pd.read_parquet(file_handle)  # type: ignore[arg-type]
-            elif extension == ".json":
-                df = pd.read_json(file_handle)  # type: ignore[arg-type]
-            elif extension == ".jsonl":
-                df = pd.read_json(
-                    file_handle,
-                    lines=True,  # type: ignore[arg-type]
-                )
-            else:
-                # Default to parquet if no extension or unknown
-                df = pd.read_parquet(file_handle)  # type: ignore[arg-type]
+        # Read file into DataFrame using helper function
+        df = read_dataframe_from_file(file_path_str)
-        # confirm the outputs exist in the dataframe
+        # Validate that expected output columns are present
+        output_columns = {output.id for output in self.step.outputs}
         columns = set(df.columns)
         missing_columns = output_columns - columns
         if missing_columns:
@@ -90,12 +74,15 @@ class FileSourceExecutor(StepExecutor):
                 )
             )
-        for row in df.to_dict(orient="records"):
-            # Filter to only the expected output columns if they exist
-            row = {
-                str(k): v for k, v in row.items() if str(k) in output_columns
-            }
-            yield message.copy_with_variables(new_variables=row)
+        # Convert DataFrame to FlowMessages with type conversion
+        flow_messages = dataframe_to_flow_messages(
+            df, self.step.outputs, session=message.session
+        )
+        # Yield each message
+        for flow_message in flow_messages:
+            yield flow_message
         await self.stream_emitter.status(
             f"Emitted {len(df)} rows from: {file_path_str}"
         )

qtype/interpreter/executors/invoke_embedding_executor.py CHANGED Viewed

@@ -52,10 +52,7 @@ class InvokeEmbeddingExecutor(StepExecutor):
         try:
             # Get the input value
-            input_value = message.variables.get(input_id)
-            if input_value is None:
-                raise ValueError(f"Input variable '{input_id}' is missing")
+            input_value = message.get_variable(input_id)
             def _call(input_value=input_value):
                 # Generate embedding based on input type

qtype/interpreter/executors/invoke_flow_executor.py CHANGED Viewed

@@ -34,7 +34,7 @@ class InvokeFlowExecutor(StepExecutor):
         initial = message.copy_with_variables(
             {
                 id: message.variables.get(var.id)
-                for var, id in self.step.input_bindings.items()
+                for id, var in self.step.input_bindings.items()
             }
         )
         # Pass through context (already available as self.context)
@@ -46,6 +46,6 @@ class InvokeFlowExecutor(StepExecutor):
             yield msg.copy_with_variables(
                 {
                     var.id: msg.variables.get(id)
-                    for var, id in self.step.output_bindings.items()
+                    for id, var in self.step.output_bindings.items()
                 }
             )

qtype/interpreter/executors/invoke_tool_executor.py CHANGED Viewed

@@ -247,28 +247,26 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
         """
         tool_inputs = {}
-        for tool_param_name, step_var_id in self.step.input_bindings.items():
+        for tool_param_name, step_variable in self.step.input_bindings.items():
             # Get tool parameter definition
-            tool_param = self.step.tool.inputs.get(tool_param_name)
+            tool_param = next(
+                (p for p in self.step.tool.inputs if p.id == tool_param_name),
+                None,
+            )
             if not tool_param:
                 raise ValueError(
                     f"Tool parameter '{tool_param_name}' not defined in tool"
                 )
             # Get value from message variables
-            value = message.variables.get(step_var_id)
-            # Handle missing values
-            if value is None:
-                if not tool_param.optional:
-                    raise ValueError(
-                        (
-                            f"Required input '{step_var_id}' for tool "
-                            f"parameter '{tool_param_name}' is missing"
-                        )
-                    )
-                # Skip optional parameters that are missing
-                continue
+            # Use default=None for optional params, let get_variable raise for required
+            if tool_param.optional:
+                value = message.get_variable(step_variable.id, default=None)
+                if value is None:
+                    # Skip optional parameters that are unset
+                    continue
+            else:
+                value = message.get_variable(step_variable.id)
             tool_inputs[tool_param_name] = value
@@ -288,9 +286,12 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
         """
         output_vars = {}
-        for tool_param_name, step_var_id in self.step.output_bindings.items():
+        for tool_param_name, step_var in self.step.output_bindings.items():
             # Get tool parameter definition
-            tool_param = self.step.tool.outputs.get(tool_param_name)
+            tool_param = next(
+                (p for p in self.step.tool.outputs if p.id == tool_param_name),
+                None,
+            )
             if not tool_param:
                 raise ValueError(
                     f"Tool parameter '{tool_param_name}' not defined in tool"
@@ -311,7 +312,7 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
                 value = result
             if value is not None:
-                output_vars[step_var_id] = value
+                output_vars[step_var.id] = value
         return output_vars

qtype/interpreter/executors/llm_inference_executor.py CHANGED Viewed

@@ -109,7 +109,7 @@ class LLMInferenceExecutor(StepExecutor):
         # Convert input variables to chat messages
         inputs = []
         for input_var in self.step.inputs:
-            value = message.variables.get(input_var.id)
+            value = message.get_variable(input_var.id)
             # Convert any value type to ChatMessage, then to LlamaChatMessage
             chat_msg = variable_to_chat_message(value, input_var)
             inputs.append(to_chat_message(chat_msg))
@@ -160,9 +160,14 @@ class LLMInferenceExecutor(StepExecutor):
         if self.context.on_stream_event:
             # Generate a unique stream ID for this inference
             stream_id = f"llm-{self.step.id}-{id(message)}"
-            async with self.stream_emitter.reasoning_stream(
-                f"llm-{self.step.id}-{id(message)}-reasoning"
-            ) as reasoning:
+            reasoning_stream_id = f"llm-{self.step.id}-{id(message)}-reasoning"
+            async with (
+                self.stream_emitter.reasoning_stream(
+                    reasoning_stream_id
+                ) as reasoning,
+                self.stream_emitter.text_stream(stream_id) as streamer,
+            ):
                 generator = await model.astream_chat(
                     messages=inputs,
                     **(
@@ -171,26 +176,19 @@ class LLMInferenceExecutor(StepExecutor):
                         else {}
                     ),
                 )
-                async for complete_response in generator:
+                async for chat_response in generator:
+                    # Extract and emit reasoning if present
                     reasoning_text = self.__extract_stream_reasoning_(
-                        complete_response
+                        chat_response
                     )
                     if reasoning_text:
                         await reasoning.delta(reasoning_text)
-            async with self.stream_emitter.text_stream(stream_id) as streamer:
-                generator = await model.astream_chat(
-                    messages=inputs,
-                    **(
-                        self.step.model.inference_params
-                        if self.step.model.inference_params
-                        else {}
-                    ),
-                )
-                async for chat_response in generator:
+                    # Emit text delta
                     chat_text = chat_response.delta
-                    if chat_text.strip() != "":
-                        await streamer.delta(chat_response.delta)
+                    if chat_text is not None and chat_text.strip() != "":
+                        await streamer.delta(chat_text)
             # Get the final result
             chat_result = chat_response
         else:

qtype/interpreter/executors/prompt_template_executor.py CHANGED Viewed

@@ -51,9 +51,7 @@ class PromptTemplateExecutor(StepExecutor):
             input_map = {}
             for var in self.step.inputs:
                 if var.id in format_args:
-                    value = message.variables.get(var.id)
-                    if value is not None:
-                        input_map[var.id] = value
+                    input_map[var.id] = message.get_variable(var.id)
             missing = format_args - input_map.keys()
             if missing:

qtype/interpreter/tools/function_tool_helper.py CHANGED Viewed

@@ -14,7 +14,7 @@ from pydantic import create_model
 from qtype.base.types import PrimitiveTypeEnum
 from qtype.dsl.model import ListType
 from qtype.dsl.types import PRIMITIVE_TO_PYTHON_TYPE
-from qtype.semantic.model import APITool, PythonFunctionTool, ToolParameter
+from qtype.semantic.model import APITool, PythonFunctionTool, Variable
 logger = logging.getLogger(__name__)
@@ -29,9 +29,9 @@ class FunctionToolHelper:
     @staticmethod
     def _qtype_type_to_python_type(
-        param: ToolParameter,
+        param: Variable,
     ) -> type:
-        """Convert QType ToolParameter type to Python type for Pydantic.
+        """Convert QType Variable type to Python type for Pydantic.
         The param.type has already been resolved during semantic model
         creation, so we just need to convert it to the appropriate Python
@@ -42,7 +42,7 @@ class FunctionToolHelper:
         - Unknown → str
         Args:
-            param: The QType ToolParameter to convert.
+            param: The QType Variable to convert.
         Returns:
             Python type suitable for Pydantic field annotation.
@@ -55,7 +55,8 @@ class FunctionToolHelper:
         if isinstance(param.type, ListType):
             # Create a mock parameter with the element type to recursively
             # resolve it
-            element_param = ToolParameter(
+            element_param = Variable(
+                id="temp",
                 type=param.type.element_type,
                 optional=False,
             )
@@ -74,13 +75,13 @@ class FunctionToolHelper:
     @staticmethod
     def _create_fn_schema(
         tool_name: str,
-        inputs: dict[str, ToolParameter],
+        inputs: list[Variable],
     ) -> type[BaseModel] | None:
         """Create a Pydantic model from QType tool input parameters.
         Args:
             tool_name: Name of the tool (used for model name).
-            inputs: Dictionary of input parameter names to ToolParameter.
+            inputs: List of input Variables.
         Returns:
             Pydantic BaseModel class representing the tool's input schema.
@@ -91,17 +92,17 @@ class FunctionToolHelper:
         # Each field is a tuple of (type_annotation, field_info)
         field_definitions: dict[str, Any] = {}
-        for param_name, param in inputs.items():
+        for param in inputs:
             python_type = FunctionToolHelper._qtype_type_to_python_type(param)
             # Create field with optional annotation
             if param.optional:
-                field_definitions[param_name] = (
+                field_definitions[param.id] = (
                     python_type | None,  # type: ignore[valid-type]
                     PydanticField(default=None),
                 )
             else:
-                field_definitions[param_name] = (
+                field_definitions[param.id] = (
                     python_type,
                     PydanticField(...),
                 )

qtype 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

qtype 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl