PyPI - qtype - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

qtype 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

qtype/dsl/model.py CHANGED Viewed

@@ -79,6 +79,11 @@ def _resolve_list_type(
             if cls == element_type:
                 return ListType(element_type=name)
         return ListType(element_type=str(element_type))
+    elif isinstance(element_type, type) and issubclass(
+        element_type, BaseModel
+    ):
+        # Custom type class - store its name as string reference
+        return ListType(element_type=element_type.__name__)
     else:
         raise ValueError(
             (
@@ -140,6 +145,8 @@ def _resolve_variable_type(
     Resolve a type to its corresponding representation.
     Handles primitive types, list types, domain types, and custom types.
+    Unknown types are returned as strings (forward references) and will be
+    validated later during the linking phase.
     Args:
         parsed_type: The type to resolve (can be string or already resolved)
@@ -172,17 +179,26 @@ def _resolve_variable_type(
     if custom is not None:
         return custom
-    # If it's not any known type, return it as a string.
-    # This assumes it might be a forward reference to a custom type.
-    return parsed_type
+    # If it's not any known type, raise an error
+    available_types = (
+        f"primitive types ({', '.join([t.value for t in PrimitiveTypeEnum])}), "
+        f"domain types ({', '.join(DOMAIN_CLASSES.keys())})"
+    )
+    if custom_type_registry:
+        available_types += (
+            f", or custom types ({', '.join(custom_type_registry.keys())})"
+        )
+    raise ValueError(
+        f"Unknown type '{parsed_type}'. Must be one of: {available_types}"
+    )
 def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
     """
     Shared validator for resolving 'type' fields in models.
-    This validator resolves string-based type references using the custom
-    type registry from the validation context.
+    This validator handles optional '?' syntax and resolves string-based
+    type references using the custom type registry from the validation context.
     Args:
         data: The data dict being validated
@@ -196,6 +212,13 @@ def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
         and "type" in data
         and isinstance(data["type"], str)
     ):
+        # Handle '?' suffix for optional types BEFORE type resolution
+        type_value = data["type"]
+        if type_value.endswith("?"):
+            # Strip '?' and mark as optional
+            data["type"] = type_value[:-1]
+            data["optional"] = True
         # Get the registry of custom types from the validation context.
         custom_types = (info.context or {}).get("custom_types", {})
         resolved = _resolve_variable_type(data["type"], custom_types)
@@ -203,6 +226,54 @@ def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
     return data
+def _merge_vars_from_bindings(
+    existing: list[Reference[Variable] | str],
+    bindings: dict[str, Reference[Variable] | str],
+) -> list[Reference[Variable] | str]:
+    """Merge existing variables with bindings and deduplicate by variable ID.
+    Args:
+        existing: Existing list of variable references or IDs
+        bindings: Dict mapping parameter names to variable references or IDs
+    Returns:
+        Merged list with duplicates removed, preserving original form
+    """
+    def get_id(item):
+        return item.ref if isinstance(item, Reference) else item
+    seen = set()
+    result = []
+    for item in list(existing) + list(bindings.values()):
+        var_id = get_id(item)
+        if var_id not in seen:
+            seen.add(var_id)
+            result.append(item)
+    return result
+def _type_to_string(type_value: Any) -> str:
+    """Convert a type value to its string representation.
+    Args:
+        type_value: The type value to convert
+    Returns:
+        String representation of the type
+    """
+    if isinstance(type_value, str):
+        return type_value
+    elif isinstance(type_value, PrimitiveTypeEnum):
+        return type_value.value
+    elif isinstance(type_value, ListType):
+        return str(type_value)
+    elif isinstance(type_value, type):
+        return type_value.__name__
+    else:
+        return str(type_value)
 class Variable(StrictBaseModel):
     """Schema for a variable that can serve as input, output, or parameter within the DSL."""
@@ -216,15 +287,37 @@ class Variable(StrictBaseModel):
             "Type of data expected or produced. Either a CustomType or domain specific type."
         ),
     )
+    optional: bool = Field(
+        default=False,
+        description=(
+            "Whether this variable can be unset or None. "
+            "Use '?' suffix in type string as shorthand (e.g., 'text?')."
+        ),
+    )
-    ui: UIType | None = Field(None, description="Hints for the UI if needed.")
+    ui: UIType | None = Field(
+        default=None, description="Hints for the UI if needed."
+    )
     @model_validator(mode="before")
     @classmethod
     def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
-        """Resolve string-based type references using the shared validator."""
+        """Resolve string-based type references and handle optional '?' syntax."""
         return _resolve_type_field_validator(data, info)
+    @model_serializer
+    def serialize_model(self):
+        """Serialize with '?' suffix for optional types."""
+        result: dict[str, Any] = {"id": self.id}
+        type_str = _type_to_string(self.type)
+        result["type"] = f"{type_str}?" if self.optional else type_str
+        if self.ui is not None:
+            result["ui"] = self.ui.model_dump()
+        return result
     @model_validator(mode="after")
     def validate_ui_type(self) -> Variable:
         """Ensure at least one credential source is provided."""
@@ -260,37 +353,6 @@ class CustomType(StrictBaseModel):
     properties: dict[str, str]
-class ToolParameter(BaseModel):
-    """Defines a tool input or output parameter with type and optional flag."""
-    type: VariableType | str
-    optional: bool = Field(
-        default=False, description="Whether this parameter is optional"
-    )
-    @model_validator(mode="before")
-    @classmethod
-    def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
-        """Resolve string-based type references using the shared validator."""
-        return _resolve_type_field_validator(data, info)
-    @staticmethod
-    def _serialize_type(value):
-        if isinstance(value, type):
-            return value.__name__
-        elif hasattr(value, "__name__"):
-            return value.__name__
-        return value
-    @model_serializer
-    def _model_serializer(self):
-        # Use the default serialization, but ensure 'type' is a string
-        return {
-            "type": self._serialize_type(self.type),
-            "optional": self.optional,
-        }
 class ListType(BaseModel):
     """Represents a list type with a specific element type."""
@@ -418,11 +480,19 @@ class Construct(Step):
     """A step that converts variables into an instance of a Custom or Domain Type"""
     type: Literal["Construct"] = "Construct"
-    field_mapping: dict[str, str] = Field(
+    field_bindings: dict[str, Reference[Variable] | str] = Field(
         ...,
-        description="Mapping of type inputs to variable names, if needed.",
+        description="Mapping from type field names to flow variable names.",
     )
+    @model_validator(mode="after")
+    def infer_inputs_from_bindings(self) -> "Construct":
+        """Infer inputs from field bindings."""
+        self.inputs = _merge_vars_from_bindings(
+            self.inputs, self.field_bindings
+        )
+        return self
 class PromptTemplate(Step):
     """Defines a prompt template with a string format and variable bindings.
@@ -445,12 +515,12 @@ class Tool(StrictBaseModel, ABC):
     description: str = Field(
         ..., description="Description of what the tool does."
     )
-    inputs: dict[str, ToolParameter] = Field(
-        default_factory=dict,
+    inputs: list[Variable] = Field(
+        default_factory=list,
         description="Input parameters required by this tool.",
     )
-    outputs: dict[str, ToolParameter] = Field(
-        default_factory=dict,
+    outputs: list[Variable] = Field(
+        default_factory=list,
         description="Output parameters produced by this tool.",
     )
@@ -485,9 +555,9 @@ class APITool(Tool):
         default_factory=dict,
         description="Optional HTTP headers to include in the request.",
     )
-    parameters: dict[str, ToolParameter] = Field(
-        default_factory=dict,
-        description="Output parameters produced by this tool.",
+    parameters: list[Variable] = Field(
+        default_factory=list,
+        description="Path and query parameters for the API call.",
     )
@@ -622,6 +692,9 @@ class FieldExtractor(Step):
     The extracted data is used to construct the output variable by passing it
     as keyword arguments to the output type's constructor.
+    If there is no match and the output variable is optional, it is set to None.
+    If there is no match and the output variable is required, an error is raised.
     Example JSONPath expressions:
     - `$.field_name` - Extract a single field
     - `$.items[*]` - Extract all items from a list
@@ -633,10 +706,6 @@ class FieldExtractor(Step):
         ...,
         description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
     )
-    fail_on_missing: bool = Field(
-        default=True,
-        description="Whether to raise an error if the JSONPath matches no data. If False, returns None.",
-    )
 class InvokeTool(Step, ConcurrentStepMixin):
@@ -648,33 +717,23 @@ class InvokeTool(Step, ConcurrentStepMixin):
         ...,
         description="Tool to invoke.",
     )
-    input_bindings: dict[str, str] = Field(
+    input_bindings: dict[str, Reference[Variable] | str] = Field(
         ...,
-        description="Mapping from variable references to tool input parameter names.",
+        description="Mapping from tool parameter names to flow variable names.",
     )
-    output_bindings: dict[str, str] = Field(
+    output_bindings: dict[str, Reference[Variable] | str] = Field(
         ...,
-        description="Mapping from variable references to tool output parameter names.",
+        description="Mapping from tool output names to flow variable names.",
     )
     @model_validator(mode="after")
     def infer_inputs_outputs_from_bindings(self) -> "InvokeTool":
-        def _merge_vars(
-            existing: list[Reference[Variable] | str],
-            bindings: dict[str, str],
-        ) -> list[Reference[Variable] | str]:
-            """Merge existing variables with bindings and deduplicate."""
-            # NOTE: doesn't handle references. You may duplicate inputs here..
-            existing_ids = [item for item in existing if isinstance(item, str)]
-            inferred_ids = list(bindings.values())
-            merged_ids: list[Reference[Variable] | str] = [
-                Reference[Variable].model_validate({"$ref": var_id})
-                for var_id in dict.fromkeys(existing_ids + inferred_ids)
-            ]
-            return merged_ids
-        self.inputs = _merge_vars(self.inputs, self.input_bindings)
-        self.outputs = _merge_vars(self.outputs, self.output_bindings)
+        self.inputs = _merge_vars_from_bindings(
+            self.inputs, self.input_bindings
+        )
+        self.outputs = _merge_vars_from_bindings(
+            self.outputs, self.output_bindings
+        )
         return self
@@ -687,15 +746,25 @@ class InvokeFlow(Step):
         ...,
         description="Flow to invoke.",
     )
-    input_bindings: dict[Reference[Variable], str] = Field(
+    input_bindings: dict[str, Reference[Variable] | str] = Field(
         ...,
-        description="Mapping from variable references to flow input variable IDs.",
+        description="Mapping from flow input variable IDs to step variable names.",
     )
-    output_bindings: dict[Reference[Variable], str] = Field(
+    output_bindings: dict[str, Reference[Variable] | str] = Field(
         ...,
-        description="Mapping from variable references to flow output variable IDs.",
+        description="Mapping from flow output variable IDs to step variable names.",
     )
+    @model_validator(mode="after")
+    def infer_inputs_outputs_from_bindings(self) -> "InvokeFlow":
+        self.inputs = _merge_vars_from_bindings(
+            self.inputs, self.input_bindings
+        )
+        self.outputs = _merge_vars_from_bindings(
+            self.outputs, self.output_bindings
+        )
+        return self
 #
 # ---------------- Secret Manager Component ----------------
@@ -1000,6 +1069,21 @@ class FileSource(Source):
         description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
     )
+    @model_validator(mode="after")
+    def infer_inputs_from_path(self) -> "FileSource":
+        """Add path variable to inputs if it's a variable reference."""
+        if isinstance(self.path, str):
+            # Path is a variable ID, add it to inputs
+            path_ref = Reference[Variable].model_validate({"$ref": self.path})
+            if path_ref not in self.inputs and self.path not in self.inputs:
+                self.inputs = list(self.inputs) + [path_ref]
+        elif isinstance(self.path, Reference):
+            # Path is already a Reference, add it to inputs
+            if self.path not in self.inputs:
+                self.inputs = list(self.inputs) + [self.path]
+        # If path is ConstantPath, don't add to inputs
+        return self
 class Writer(Step, BatchableStepMixin):
     """Base class for things that write data in batches."""
@@ -1020,22 +1104,31 @@ class FileWriter(Writer, BatchableStepMixin):
         description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
     )
+    @model_validator(mode="after")
+    def infer_inputs_from_path(self) -> "FileWriter":
+        """Add path variable to inputs if it's a variable reference."""
+        if isinstance(self.path, str):
+            # Path is a variable ID, add it to inputs
+            path_ref = Reference[Variable].model_validate({"$ref": self.path})
+            if path_ref not in self.inputs and self.path not in self.inputs:
+                self.inputs = list(self.inputs) + [path_ref]
+        elif isinstance(self.path, Reference):
+            # Path is already a Reference, add it to inputs
+            if self.path not in self.inputs:
+                self.inputs = list(self.inputs) + [self.path]
+        # If path is ConstantPath, don't add to inputs
+        return self
 class Aggregate(Step):
     """
-    A terminal step that consumes an entire input stream and produces a single
-    summary message with success/error counts.
+    A step that, after all messages have been processed,
+    returns a single message containing the counts of successful and failed
+    messages. Other messages are passed through unchanged.
     """
     type: Literal["Aggregate"] = "Aggregate"
-    # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
-    # The order will be: success_count, error_count, total_count
-    outputs: list[Reference[Variable] | str] = Field(
-        default_factory=list,
-        description="References to the variables for the output. There should be one and only one output with type AggregateStats",
-    )
 #
 # ---------------- Retrieval Augmented Generation Components ----------------

qtype/examples/data_processing/athena_query.qtype.yaml ADDED Viewed

@@ -0,0 +1,56 @@
+id: athena-query-example
+description: Query AWS Athena database and process results
+auths:
+  - type: aws
+    id: aws_auth
+    region: us-east-1
+    profile_name: default
+flows:
+  - type: Flow
+    id: query-athena
+    variables:
+      - id: min_sales
+        type: int
+      - id: product_id
+        type: text
+      - id: product_name
+        type: text
+      - id: total_sales
+        type: int
+      - id: region
+        type: text
+    inputs:
+      - min_sales
+    outputs:
+      - product_id
+      - product_name
+      - total_sales
+      - region
+    steps:
+      - type: SQLSource
+        id: load_sales
+        connection: "awsathena+rest://:@athena.us-east-1.amazonaws.com:443/sales_db?s3_staging_dir=s3://my-results-bucket/athena-results/&work_group=primary&catalog_name=some_catalog""
+        auth: aws_auth
+        query: |
+          SELECT
+            product_id,
+            product_name,
+            total_sales,
+            region
+          FROM product_sales
+          WHERE total_sales >= :min_sales
+          ORDER BY total_sales DESC
+          LIMIT 100
+        inputs:
+          - min_sales
+        outputs:
+          - product_id
+          - product_name
+          - total_sales
+          - region

qtype/examples/data_processing/batch_inputs.csv ADDED Viewed

@@ -0,0 +1,5 @@
+query,topic
+What is machine learning?,Technology
+Best pasta recipes,Cooking
+How to train for a marathon,Fitness
+Climate change solutions,Environment

qtype/examples/data_processing/create_sample_db.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Create a sample SQLite database with product reviews for the example."""
+import sqlite3
+from pathlib import Path
+# Sample product reviews data
+SAMPLE_REVIEWS = [
+    (
+        1,
+        "Wireless Headphones",
+        5,
+        "Amazing sound quality! The noise cancellation is superb and "
+        "battery lasts all day. Highly recommend for music lovers.",
+    ),
+    (
+        2,
+        "Wireless Headphones",
+        2,
+        "Disappointed with the build quality. They broke after just 2 "
+        "weeks of normal use. Sound is okay but not worth the price.",
+    ),
+    (
+        3,
+        "Smart Watch",
+        4,
+        "Great fitness tracker with accurate heart rate monitoring. "
+        "Battery life could be better, but overall very satisfied.",
+    ),
+    (
+        4,
+        "Smart Watch",
+        5,
+        "Best smartwatch I've owned! Seamless integration with my phone, "
+        "tons of useful features, and looks professional.",
+    ),
+    (
+        5,
+        "Laptop Stand",
+        3,
+        "Does the job but feels flimsy. The adjustability is limited and "
+        "it wobbles a bit. Expected better quality for the price.",
+    ),
+    (
+        6,
+        "Laptop Stand",
+        5,
+        "Perfect for my home office setup! Sturdy construction, multiple "
+        "height options, and really helps with posture.",
+    ),
+    (
+        7,
+        "USB-C Hub",
+        4,
+        "Works well with all my devices. All ports function properly and "
+        "data transfer is fast. Gets a bit warm during heavy use.",
+    ),
+    (
+        8,
+        "USB-C Hub",
+        1,
+        "Stopped working after a week. One port was DOA and then the whole "
+        "hub died. Total waste of money.",
+    ),
+    (
+        9,
+        "Mechanical Keyboard",
+        5,
+        "Typing feels incredible! The switches are responsive and the build "
+        "quality is excellent. Worth every penny.",
+    ),
+    (
+        10,
+        "Mechanical Keyboard",
+        4,
+        "Great keyboard for coding. Switches are a bit loud for an office "
+        "environment but the tactile feedback is amazing.",
+    ),
+]
+def create_database(db_path: Path | str) -> None:
+    """Create SQLite database with sample product reviews.
+    Args:
+        db_path: Path where the database file should be created
+    """
+    db_path = Path(db_path)
+    # Remove existing database if it exists
+    if db_path.exists():
+        db_path.unlink()
+    # Create database and table
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    # Create reviews table
+    cursor.execute(
+        """
+        CREATE TABLE product_reviews (
+            review_id INTEGER PRIMARY KEY,
+            product_name TEXT NOT NULL,
+            rating INTEGER NOT NULL CHECK (rating >= 1 AND rating <= 5),
+            review_text TEXT NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    """
+    )
+    # Insert sample data
+    cursor.executemany(
+        """
+        INSERT INTO product_reviews
+        (review_id, product_name, rating, review_text)
+        VALUES (?, ?, ?, ?)
+    """,
+        SAMPLE_REVIEWS,
+    )
+    conn.commit()
+    conn.close()
+    print(f"Created database at {db_path} with {len(SAMPLE_REVIEWS)} reviews")
+if __name__ == "__main__":
+    # Create database in the data_processing examples directory
+    db_path = Path(__file__).parent / "reviews.db"
+    create_database(db_path)

qtype/examples/data_processing/invoke_other_flows.qtype.yaml ADDED Viewed

@@ -0,0 +1,98 @@
+id: text_analysis_pipeline
+description: Demonstrates invoking reusable flows for text analysis
+models:
+  - type: Model
+    id: nova_lite
+    provider: aws-bedrock
+    model_id: amazon.nova-lite-v1:0
+flows:
+  # Reusable flow for text summarization
+  - type: Flow
+    id: summarize_text
+    description: Summarizes input text
+    variables:
+      - id: input_text
+        type: text
+      - id: output_summary
+        type: text
+    inputs:
+      - input_text
+    outputs:
+      - output_summary
+    steps:
+      - type: LLMInference
+        id: summarize_step
+        model: nova_lite
+        system_message: "You provide concise summaries."
+        inputs: [input_text]
+        outputs: [output_summary]
+  # Reusable flow for sentiment analysis
+  - type: Flow
+    id: analyze_sentiment
+    description: Analyzes sentiment of text
+    variables:
+      - id: input_for_sentiment
+        type: text
+      - id: output_sentiment
+        type: text
+    inputs:
+      - input_for_sentiment
+    outputs:
+      - output_sentiment
+    steps:
+      - type: LLMInference
+        id: sentiment_step
+        model: nova_lite
+        system_message: "Analyze sentiment. Respond with only: positive, negative, or neutral."
+        inputs: [input_for_sentiment]
+        outputs: [output_sentiment]
+  # Main processing flow
+  - type: Flow
+    id: main
+    description: Orchestrates text analysis using multiple flows
+    variables:
+      - id: article_text
+        type: text
+      - id: summary
+        type: text
+      - id: sentiment
+        type: text
+      - id: report
+        type: text
+    inputs:
+      - article_text
+    outputs:
+      - report
+    steps:
+      # Invoke summarization flow
+      - type: InvokeFlow
+        id: get_summary
+        flow: summarize_text
+        input_bindings:
+          input_text: article_text
+        output_bindings:
+          output_summary: summary
+      # Invoke sentiment analysis flow
+      - type: InvokeFlow
+        id: get_sentiment
+        flow: analyze_sentiment
+        input_bindings:
+          input_for_sentiment: article_text
+        output_bindings:
+          output_sentiment: sentiment
+      # Combine results into a report
+      - type: PromptTemplate
+        id: create_report
+        template: |
+          Analysis Report
+          ---------------
+          Sentiment: {sentiment}
+          Summary: {summary}
+        inputs: [summary, sentiment]
+        outputs: [report]

qtype/examples/data_processing/reviews.db ADDED Viewed

Binary file

qtype 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

qtype 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl