PyPI - qtype - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

qtype 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

qtype/application/converters/tools_from_api.py +476 -11
qtype/application/converters/tools_from_module.py +37 -13
qtype/application/converters/types.py +17 -3
qtype/application/facade.py +17 -20
qtype/commands/convert.py +36 -2
qtype/commands/generate.py +48 -0
qtype/commands/run.py +1 -0
qtype/commands/serve.py +11 -1
qtype/commands/validate.py +8 -11
qtype/commands/visualize.py +0 -3
qtype/dsl/model.py +190 -4
qtype/dsl/validator.py +2 -1
qtype/interpreter/api.py +5 -1
qtype/interpreter/batch/file_sink_source.py +162 -0
qtype/interpreter/batch/flow.py +1 -1
qtype/interpreter/batch/sql_source.py +3 -6
qtype/interpreter/batch/step.py +12 -1
qtype/interpreter/batch/utils.py +8 -9
qtype/interpreter/step.py +2 -2
qtype/interpreter/steps/tool.py +194 -28
qtype/interpreter/ui/404/index.html +1 -1
qtype/interpreter/ui/404.html +1 -1
qtype/interpreter/ui/_next/static/chunks/393-8fd474427f8e19ce.js +36 -0
qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +1 -0
qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +3 -0
qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
qtype/interpreter/ui/index.html +1 -1
qtype/interpreter/ui/index.txt +4 -4
qtype/loader.py +8 -2
qtype/semantic/generate.py +6 -2
qtype/semantic/model.py +132 -77
qtype/semantic/visualize.py +24 -6
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/METADATA +4 -2
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/RECORD +44 -43
qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
/qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → nUaw6_IwRwPqkzwe5s725}/_buildManifest.js +0 -0
/qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → nUaw6_IwRwPqkzwe5s725}/_ssgManifest.js +0 -0
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/WHEEL +0 -0
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/entry_points.txt +0 -0
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/licenses/LICENSE +0 -0
{qtype-0.0.11.dist-info → qtype-0.0.13.dist-info}/top_level.txt +0 -0

qtype/commands/convert.py CHANGED Viewed

@@ -9,13 +9,48 @@ import logging
 from pathlib import Path
 from qtype.application.facade import QTypeFacade
+from qtype.dsl.model import Application, ToolList
 logger = logging.getLogger(__name__)
 def convert_api(args: argparse.Namespace) -> None:
     """Convert API specification to qtype format."""
-    raise NotImplementedError("API conversion is not implemented yet.")
+    from qtype.application.converters.tools_from_api import tools_from_api
+    try:
+        api_name, auths, tools, types = tools_from_api(args.api_spec)
+        if not tools:
+            raise ValueError(
+                f"No tools found from the API specification: {args.api_spec}"
+            )
+        if not auths and not types:
+            doc = ToolList(
+                root=list(tools),
+            )
+        else:
+            doc: Application | ToolList = Application(
+                id=api_name,
+                description=f"Tools created from API specification {args.api_spec}",
+                tools=list(tools),
+                types=types,
+                auths=auths,
+            )
+        # Use facade to convert to YAML format
+        facade = QTypeFacade()
+        content = facade.convert_document(doc)
+        # Write to file or stdout
+        if args.output:
+            output_path = Path(args.output)
+            output_path.write_text(content, encoding="utf-8")
+            logger.info(f"✅ Converted tools saved to {output_path}")
+        else:
+            print(content)
+    except Exception as e:
+        logger.error(f"❌ Conversion failed: {e}")
+        raise
 def convert_module(args: argparse.Namespace) -> None:
@@ -23,7 +58,6 @@ def convert_module(args: argparse.Namespace) -> None:
     from qtype.application.converters.tools_from_module import (
         tools_from_module,
     )
-    from qtype.dsl.model import Application, ToolList
     try:
         tools, types = tools_from_module(args.module_path)

qtype/commands/generate.py CHANGED Viewed

@@ -84,8 +84,56 @@ def generate_schema(args: argparse.Namespace) -> None:
             'output' attribute specifying the output file path.
     """
     schema = Document.model_json_schema()
     # Add the $schema property to indicate JSON Schema version
     schema["$schema"] = "http://json-schema.org/draft-07/schema#"
+    # Add custom YAML tag definitions for QType loader features
+    if "$defs" not in schema:
+        schema["$defs"] = {}
+    # Define custom YAML tags used by QType loader
+    schema["$defs"]["qtype_include_tag"] = {
+        "type": "string",
+        "pattern": "^!include\\s+.+",
+        "description": "Include external YAML file using QType's !include tag",
+    }
+    schema["$defs"]["qtype_include_raw_tag"] = {
+        "type": "string",
+        "pattern": "^!include_raw\\s+.+",
+        "description": "Include raw text file using QType's !include_raw tag",
+    }
+    schema["$defs"]["qtype_env_var"] = {
+        "type": "string",
+        "pattern": "^.*\\$\\{[^}:]+(?::[^}]*)?\\}.*$",
+        "description": "String with environment variable substitution using ${VAR_NAME} or ${VAR_NAME:default} syntax",
+    }
+    # Add these custom patterns to string types throughout the schema
+    def add_custom_patterns(obj):
+        if isinstance(obj, dict):
+            if obj.get("type") == "string" and "anyOf" not in obj:
+                # Add anyOf to allow either regular strings or custom tag patterns
+                original_obj = obj.copy()
+                obj.clear()
+                obj["anyOf"] = [
+                    original_obj,
+                    {"$ref": "#/$defs/qtype_include_tag"},
+                    {"$ref": "#/$defs/qtype_include_raw_tag"},
+                    {"$ref": "#/$defs/qtype_env_var"},
+                ]
+            else:
+                for value in obj.values():
+                    add_custom_patterns(value)
+        elif isinstance(obj, list):
+            for item in obj:
+                add_custom_patterns(item)
+    # Apply custom patterns to the schema
+    add_custom_patterns(schema)
     output = json.dumps(schema, indent=2)
     output_path: Optional[str] = getattr(args, "output", None)
     if output_path:

qtype/commands/run.py CHANGED Viewed

@@ -112,6 +112,7 @@ def run_flow(args: Any) -> None:
         logger.error(f"❌ Execution failed: {e}")
     except Exception as e:
         logger.error(f"❌ Unexpected error: {e}", exc_info=True)
+        pass
 def parser(subparsers: argparse._SubParsersAction) -> None:

qtype/commands/serve.py CHANGED Viewed

@@ -31,6 +31,7 @@ def serve(args: Any) -> None:
         logger.info(f"Loading and validating spec: {spec_path}")
         semantic_model, type_registry = facade.load_semantic_model(spec_path)
+        facade.telemetry(semantic_model)
         logger.info(f"✅ Successfully loaded spec: {spec_path}")
         # Import APIExecutor and create the FastAPI app
@@ -43,8 +44,17 @@ def serve(args: Any) -> None:
         logger.info(f"Starting server for: {name}")
         api_executor = APIExecutor(semantic_model)
+        # Create server info for OpenAPI spec
+        servers = [
+            {
+                "url": f"http://{args.host}:{args.port}",
+                "description": "Development server",
+            }
+        ]
         fastapi_app = api_executor.create_app(
-            name=name, ui_enabled=not args.disable_ui
+            name=name, ui_enabled=not args.disable_ui, servers=servers
         )
         # Start the server

qtype/commands/validate.py CHANGED Viewed

@@ -10,6 +10,7 @@ import sys
 from pathlib import Path
 from typing import Any
+from qtype import dsl
 from qtype.application.facade import QTypeFacade
 from qtype.base.exceptions import LoadError, SemanticError, ValidationError
@@ -31,16 +32,11 @@ def main(args: Any) -> None:
     try:
         # Use the facade for validation - it will raise exceptions on errors
-        loaded_data = facade.load_and_validate(spec_path)
+        loaded_data, custom_types = facade.load_dsl_document(spec_path)
+        if isinstance(loaded_data, dsl.Application):
+            loaded_data, custom_types = facade.load_semantic_model(spec_path)
         logger.info("✅ Validation successful - document is valid.")
-        # If printing is requested, load and print the document
-        if args.print:
-            try:
-                print(loaded_data.model_dump_json(indent=2, exclude_none=True))  # type: ignore
-            except Exception as e:
-                logger.warning(f"Could not print document: {e}")
     except LoadError as e:
         logger.error(f"❌ Failed to load document: {e}")
         sys.exit(1)
@@ -50,9 +46,10 @@ def main(args: Any) -> None:
     except SemanticError as e:
         logger.error(f"❌ Semantic validation failed: {e}")
         sys.exit(1)
-    except Exception as e:
-        logger.error(f"❌ Unexpected error during validation: {e}")
-        sys.exit(1)
+    # If printing is requested, load and print the document
+    if args.print:
+        logging.info(facade.convert_document(loaded_data))  # type: ignore
 def parser(subparsers: argparse._SubParsersAction) -> None:

qtype/commands/visualize.py CHANGED Viewed

@@ -68,9 +68,6 @@ def main(args: Any) -> None:
     except ValidationError as e:
         logger.error(f"❌ Visualization failed: {e}")
         exit(1)
-    except Exception as e:
-        logger.error(f"❌ Unexpected error: {e}")
-        exit(1)
 def parser(subparsers: argparse._SubParsersAction) -> None:

qtype/dsl/model.py CHANGED Viewed

@@ -45,6 +45,33 @@ def _resolve_variable_type(
         return parsed_type
     # --- Case 1: The type is a string ---
+    # Check if it's a list type (e.g., "list[text]")
+    if parsed_type.startswith("list[") and parsed_type.endswith("]"):
+        # Extract the element type from "list[element_type]"
+        element_type_str = parsed_type[5:-1]  # Remove "list[" and "]"
+        # Recursively resolve the element type
+        element_type = _resolve_variable_type(
+            element_type_str, custom_type_registry
+        )
+        # Allow both primitive types and custom types (but no nested lists)
+        if isinstance(element_type, PrimitiveTypeEnum):
+            return ListType(element_type=element_type)
+        elif isinstance(element_type, str):
+            # This is a custom type reference - store as string for later resolution
+            return ListType(element_type=element_type)
+        elif element_type in DOMAIN_CLASSES.values():
+            # Domain class - store its name as string reference
+            for name, cls in DOMAIN_CLASSES.items():
+                if cls == element_type:
+                    return ListType(element_type=name)
+            return ListType(element_type=str(element_type))
+        else:
+            raise ValueError(
+                f"List element type must be a primitive type or custom type reference, got: {element_type}"
+            )
     # Try to resolve it as a primitive type first.
     try:
         return PrimitiveTypeEnum(parsed_type)
@@ -107,12 +134,56 @@ class CustomType(StrictBaseModel):
     properties: dict[str, str]
+class ToolParameter(BaseModel):
+    """Defines a tool input or output parameter with type and optional flag."""
+    type: VariableType | str
+    optional: bool = Field(
+        default=False, description="Whether this parameter is optional"
+    )
+    @model_validator(mode="before")
+    @classmethod
+    def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
+        """
+        This validator runs during the main validation pass. It uses the
+        context to resolve string-based type references.
+        """
+        if (
+            isinstance(data, dict)
+            and "type" in data
+            and isinstance(data["type"], str)
+        ):
+            # Get the registry of custom types from the validation context.
+            custom_types = (info.context or {}).get("custom_types", {})
+            resolved = _resolve_variable_type(data["type"], custom_types)
+            data["type"] = resolved
+        return data
+class ListType(BaseModel):
+    """Represents a list type with a specific element type."""
+    element_type: PrimitiveTypeEnum | str = Field(
+        ...,
+        description="Type of elements in the list (primitive type or custom type reference)",
+    )
+    def __str__(self) -> str:
+        """String representation for list type."""
+        if isinstance(self.element_type, PrimitiveTypeEnum):
+            return f"list[{self.element_type.value}]"
+        else:
+            return f"list[{self.element_type}]"
 VariableType = (
     PrimitiveTypeEnum
     | Type[Embedding]
     | Type[ChatMessage]
     | Type[ChatContent]
     | Type[BaseModel]
+    | ListType
 )
@@ -238,15 +309,24 @@ class Condition(Step):
         return self
-class Tool(Step, ABC):
+class Tool(StrictBaseModel, ABC):
     """
     Base class for callable functions or external operations available to the model or as a step in a flow.
     """
+    id: str = Field(..., description="Unique ID of this component.")
     name: str = Field(..., description="Name of the tool function.")
     description: str = Field(
         ..., description="Description of what the tool does."
     )
+    inputs: dict[str, ToolParameter] | None = Field(
+        default=None,
+        description="Input parameters required by this tool.",
+    )
+    outputs: dict[str, ToolParameter] | None = Field(
+        default=None,
+        description="Output parameters produced by this tool.",
+    )
 class PythonFunctionTool(Tool):
@@ -277,6 +357,10 @@ class APITool(Tool):
         default=None,
         description="Optional HTTP headers to include in the request.",
     )
+    parameters: dict[str, ToolParameter] | None = Field(
+        default=None,
+        description="Output parameters produced by this tool.",
+    )
 class LLMInference(Step):
@@ -376,6 +460,23 @@ class Decoder(Step):
         return self
+class Invoke(Step):
+    """Invokes a tool with input and output bindings."""
+    tool: ToolType | str = Field(
+        ...,
+        description="Tool to invoke.",
+    )
+    input_bindings: dict[str, str] = Field(
+        ...,
+        description="Mapping from step input IDs to tool input parameter names.",
+    )
+    output_bindings: dict[str, str] = Field(
+        ...,
+        description="Mapping from tool output parameter names to step output IDs.",
+    )
 #
 # ---------------- Observability and Authentication Components ----------------
 #
@@ -400,6 +501,13 @@ class APIKeyAuthProvider(AuthorizationProvider):
     )
+class BearerTokenAuthProvider(AuthorizationProvider):
+    """Bearer token authentication provider."""
+    type: Literal["bearer_token"] = "bearer_token"
+    token: str = Field(..., description="Bearer token for authentication.")
 class OAuth2AuthProvider(AuthorizationProvider):
     """OAuth2 authentication provider."""
@@ -594,6 +702,38 @@ class SQLSource(Source):
         return self
+class FileSource(Source):
+    """File source that reads data from a file using fsspec-compatible URIs."""
+    path: str | None = Field(
+        default=None,
+        description="fsspec-compatible URI to read from. If None, expects 'path' input variable.",
+    )
+    @model_validator(mode="after")
+    def validate_file_source(self) -> "FileSource":
+        """Validate that either path is specified or 'path' input variable exists."""
+        if self.path is None:
+            # Check if 'path' input variable exists
+            if self.inputs is None:
+                raise ValueError(
+                    "FileSource must either specify 'path' field or have a 'path' input variable."
+                )
+            path_input_exists = any(
+                (isinstance(inp, Variable) and inp.id == "path")
+                or (isinstance(inp, str) and inp == "path")
+                for inp in self.inputs
+            )
+            if not path_input_exists:
+                raise ValueError(
+                    "FileSource must either specify 'path' field or have a 'path' input variable."
+                )
+        return self
 class Sink(Step):
     """Base class for data sinks"""
@@ -606,6 +746,47 @@ class Sink(Step):
     )
+class FileSink(Sink):
+    """File sink that writes data to a file using fsspec-compatible URIs."""
+    path: str | None = Field(
+        default=None,
+        description="fsspec-compatible URI to write to. If None, expects 'path' input variable.",
+    )
+    @model_validator(mode="after")
+    def validate_file_sink(self) -> "FileSink":
+        """Validate that either path is specified or 'path' input variable exists."""
+        # Ensure user does not set any output variables
+        if self.outputs is not None and len(self.outputs) > 0:
+            raise ValueError(
+                "FileSink outputs are automatically generated. Do not specify outputs."
+            )
+        # Automatically set the output variable
+        self.outputs = [Variable(id=f"{self.id}-file-uri", type="text")]
+        if self.path is None:
+            # Check if 'path' input variable exists
+            if self.inputs is None:
+                raise ValueError(
+                    "FileSink must either specify 'path' field or have a 'path' input variable."
+                )
+            path_input_exists = any(
+                (isinstance(inp, Variable) and inp.id == "path")
+                or (isinstance(inp, str) and inp == "path")
+                for inp in self.inputs
+            )
+            if not path_input_exists:
+                raise ValueError(
+                    "FileSink must either specify 'path' field or have a 'path' input variable."
+                )
+        return self
 #
 # ---------------- Retrieval Augmented Generation Components ----------------
 #
@@ -704,11 +885,15 @@ ToolType = Union[
 ]
 # Create a union type for all source types
-SourceType = Union[SQLSource,]
+SourceType = Union[
+    FileSource,
+    SQLSource,
+]
 # Create a union type for all authorization provider types
 AuthProviderType = Union[
     APIKeyAuthProvider,
+    BearerTokenAuthProvider,
     AWSAuthProvider,
     OAuth2AuthProvider,
 ]
@@ -716,15 +901,16 @@ AuthProviderType = Union[
 # Create a union type for all step types
 StepType = Union[
     Agent,
-    APITool,
     Condition,
     Decoder,
     DocumentSearch,
+    FileSink,
+    FileSource,
     Flow,
     IndexUpsert,
+    Invoke,
     LLMInference,
     PromptTemplate,
-    PythonFunctionTool,
     SQLSource,
     Sink,
     VectorSearch,

qtype/dsl/validator.py CHANGED Viewed

@@ -21,7 +21,8 @@ class DuplicateComponentError(QTypeValidationError):
         existing_obj: qtype.dsl.domain_types.StrictBaseModel,
     ):
         super().__init__(
-            f"Duplicate component with ID '{obj_id}' found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
+            f'Duplicate component with ID "{obj_id}" found.'
+            # f"Duplicate component with ID \"{obj_id}\" found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
         )

qtype/interpreter/api.py CHANGED Viewed

@@ -37,6 +37,7 @@ class APIExecutor:
         name: str | None = None,
         ui_enabled: bool = True,
         fast_api_args: dict | None = None,
+        servers: list[dict] | None = None,
     ) -> FastAPI:
         """Create FastAPI app with dynamic endpoints."""
         if fast_api_args is None:
@@ -45,6 +46,10 @@ class APIExecutor:
                 "redoc_url": "/redoc",
             }
+        # Add servers to FastAPI kwargs if provided
+        if servers is not None:
+            fast_api_args["servers"] = servers
         app = FastAPI(title=name or "QType API", **fast_api_args)
         # Serve static UI files if they exist
@@ -158,7 +163,6 @@ class APIExecutor:
                                     status_code=400,
                                     detail=f"Required input '{var.id}' not provided",
                                 )
-                    return flow_copy
                     # Execute the flow
                     result_vars = execute_flow(flow_copy)

qtype/interpreter/batch/file_sink_source.py ADDED Viewed

@@ -0,0 +1,162 @@
+from typing import Any, Tuple
+import fsspec  # type: ignore[import-untyped]
+import pandas as pd
+from qtype.base.exceptions import InterpreterError
+from qtype.interpreter.batch.types import BatchConfig, ErrorMode
+from qtype.interpreter.batch.utils import reconcile_results_and_errors
+from qtype.semantic.model import FileSink, FileSource
+def execute_file_source(
+    step: FileSource,
+    inputs: pd.DataFrame,
+    batch_config: BatchConfig,
+    **kwargs: dict[Any, Any],
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Executes a FileSource step to read data from a file using fsspec.
+    Args:
+        step: The FileSource step to execute.
+        inputs: Input DataFrame (may contain path variable).
+        batch_config: Configuration for batch processing.
+        **kwargs: Additional keyword arguments.
+    Returns:
+        A tuple containing two DataFrames:
+            - The first DataFrame contains the successfully read data.
+            - The second DataFrame contains rows that encountered errors with an 'error' column.
+    """
+    output_columns = {output.id for output in step.outputs}
+    results = []
+    errors = []
+    # FileSource has cardinality 'many', so it reads once and produces multiple output rows
+    # We process each input row (which might have different paths) separately
+    for _, row in inputs.iterrows():
+        try:
+            file_path = step.path if step.path else row.get("path")
+            if not file_path:
+                raise InterpreterError(
+                    f"No path specified for {type(step).__name__}. "
+                    "Either set the 'path' field or provide a 'path' input variable."
+                )
+            # Use fsspec to open the file and read with pandas
+            with fsspec.open(file_path, "rb") as file_handle:
+                df = pd.read_parquet(file_handle)  # type: ignore[arg-type]
+            # Filter to only the expected output columns if they exist
+            if output_columns and len(df) > 0:
+                available_columns = set(df.columns)
+                missing_columns = output_columns - available_columns
+                if missing_columns:
+                    raise InterpreterError(
+                        f"File {file_path} missing expected columns: {', '.join(missing_columns)}. "
+                        f"Available columns: {', '.join(available_columns)}"
+                    )
+                df = df[[col for col in df.columns if col in output_columns]]
+            results.append(df)
+        except Exception as e:
+            if batch_config.error_mode == ErrorMode.FAIL:
+                raise e
+            # If there's an error, add it to the errors list
+            error_df = pd.DataFrame([{"error": str(e)}])
+            errors.append(error_df)
+    return reconcile_results_and_errors(results, errors)
+def execute_file_sink(
+    step: FileSink,
+    inputs: pd.DataFrame,
+    batch_config: BatchConfig,
+    **kwargs: dict[Any, Any],
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Executes a FileSink step to write data to a file using fsspec.
+    Args:
+        step: The FileSink step to execute.
+        inputs: Input DataFrame containing data to write.
+        batch_config: Configuration for batch processing.
+        **kwargs: Additional keyword arguments.
+    Returns:
+        A tuple containing two DataFrames:
+            - The first DataFrame contains success indicators.
+            - The second DataFrame contains rows that encountered errors with an 'error' column.
+    """
+    # this is enforced by the dsl, but we'll check here to confirm
+    if len(step.outputs) > 1:
+        raise InterpreterError(
+            f"There should only be one output variable for {type(step).__name__}."
+        )
+    output_column_name = step.outputs[0].id
+    # make a list of all file paths
+    try:
+        if step.path:
+            file_paths = [step.path] * len(inputs)
+        else:
+            if "path" not in inputs.columns:
+                raise InterpreterError(
+                    f"No path specified for {type(step).__name__}. "
+                    "Either set the 'path' field or provide a 'path' input variable."
+                )
+            file_paths = inputs["path"].tolist()
+    except Exception as e:
+        if batch_config.error_mode == ErrorMode.FAIL:
+            raise e
+        # If we can't get the path, we can't proceed
+        return pd.DataFrame(), pd.DataFrame([{"error": str(e)}])
+    # Check if all paths are the same
+    unique_paths = list(set(file_paths))
+    if len(unique_paths) == 1:
+        # All rows write to the same file - process as one batch
+        file_path = unique_paths[0]
+        try:
+            # Use fsspec to write the parquet file
+            input_columns = [i.id for i in step.inputs]
+            with fsspec.open(file_path, "wb") as file_handle:
+                inputs[input_columns].to_parquet(file_handle, index=False)  # type: ignore[arg-type]
+            inputs[output_column_name] = file_path
+            return inputs, pd.DataFrame()
+        except Exception as e:
+            if batch_config.error_mode == ErrorMode.FAIL:
+                raise e
+            # If there's an error, return error for all rows
+            error_df = pd.DataFrame([{"error": str(e)}])
+            return pd.DataFrame(), error_df
+    else:
+        # Multiple unique paths - split inputs and process recursively
+        all_results = []
+        all_errors = []
+        for unique_path in unique_paths:
+            # Create mask for rows with this path
+            path_mask = [p == unique_path for p in file_paths]
+            sliced_inputs = inputs[path_mask].copy()
+            # Recursively call execute_file_sink with the sliced DataFrame
+            results, errors = execute_file_sink(
+                step, sliced_inputs, batch_config, **kwargs
+            )
+            if len(results) > 0:
+                all_results.append(results)
+            if len(errors) > 0:
+                all_errors.append(errors)
+        return reconcile_results_and_errors(all_results, all_errors)

qtype 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

qtype 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl