PyPI - qtype - Versions diffs - 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

qtype 0.1.13py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

qtype/base/__init__.py +8 -2
qtype/base/logging.py +0 -17
qtype/base/resources.py +193 -0
qtype/cli.py +5 -9
qtype/commands/generate.py +6 -1
qtype/commands/run.py +37 -10
qtype/docs/Gallery/dataflow_pipelines.md +15 -2
qtype/docs/Gallery/recipe_chatbot.md +103 -0
qtype/docs/Gallery/recipe_chatbot.mermaid +62 -0
qtype/docs/Gallery/recipe_chatbot.png +0 -0
qtype/docs/Gallery/research_assistant.md +1 -1
qtype/docs/How To/Command Line Usage/pass_inputs_on_the_cli.md +4 -1
qtype/docs/How To/Data Processing/load_documents.md +74 -0
qtype/docs/How To/Data Processing/read_sql_databases.md +2 -0
qtype/docs/Reference/cli.md +3 -2
qtype/docs/Reference/plugins.md +0 -4
qtype/docs/Reference/semantic-validation-rules.md +1 -6
qtype/docs/Tutorials/01-first-qtype-application.md +1 -1
qtype/docs/Tutorials/03-structured-data.md +1 -1
qtype/docs/Tutorials/04-tools-and-function-calling.md +1 -1
qtype/examples/conversational_ai/simple_chatbot_with_auth.qtype.yaml +48 -0
qtype/examples/data_processing/load_documents.qtype.yaml +31 -0
qtype/examples/invoke_models/invoke_embedding_aws.qtype.yaml +45 -0
qtype/examples/rag/recipe_chatbot.qtype.yaml +216 -0
qtype/interpreter/auth/aws.py +94 -17
qtype/interpreter/auth/generic.py +11 -12
qtype/interpreter/base/secrets.py +4 -2
qtype/interpreter/conversions.py +15 -14
qtype/interpreter/converters.py +1 -1
qtype/interpreter/executors/bedrock_reranker_executor.py +17 -28
qtype/interpreter/executors/document_embedder_executor.py +1 -12
qtype/interpreter/executors/invoke_embedding_executor.py +23 -33
qtype/interpreter/executors/llm_inference_executor.py +2 -0
qtype/interpreter/executors/sql_source_executor.py +6 -2
qtype/interpreter/flow.py +11 -1
qtype/mcp/server.py +11 -158
qtype/semantic/visualize.py +10 -3
{qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/METADATA +2 -2
{qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/RECORD +42 -33
{qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/WHEEL +0 -0
{qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/entry_points.txt +0 -0
{qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/licenses/LICENSE +0 -0

qtype/base/__init__.py CHANGED Viewed

@@ -3,12 +3,18 @@
 from __future__ import annotations
 from .exceptions import QTypeError, ValidationError
-from .logging import get_logger
+from .resources import (
+    ResourceDirectory,
+    get_docs_resource,
+    get_examples_resource,
+)
 from .types import JSONValue
 __all__ = [
     "QTypeError",
     "ValidationError",
-    "get_logger",
     "JSONValue",
+    "ResourceDirectory",
+    "get_docs_resource",
+    "get_examples_resource",
 ]

qtype/base/logging.py CHANGED Viewed

@@ -5,23 +5,6 @@ from __future__ import annotations
 import logging
-def get_logger(name: str) -> logging.Logger:
-    """Get a logger with the given name and consistent formatting."""
-    logger = logging.getLogger(f"qtype.{name}")
-    # Only configure if not already configured
-    if not logger.handlers:
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter(
-            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-        )
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-        logger.setLevel(logging.INFO)
-    return logger
 def configure_logging(
     level: str = "INFO", format_string: str | None = None
 ) -> None:

qtype/base/resources.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""Resource directory access utilities for QType package resources."""
+from __future__ import annotations
+import re
+from functools import lru_cache
+from importlib.resources import files
+from pathlib import Path
+# Regex for pymdownx snippets: --8<-- "path/to/file"
+SNIPPET_REGEX = re.compile(r'--8<--\s+"([^"]+)"')
+class ResourceDirectory:
+    """Abstraction for accessing resource directories (docs, examples, etc.)."""
+    def __init__(
+        self, name: str, file_extension: str, resolve_snippets: bool = False
+    ):
+        """Initialize a resource directory.
+        Args:
+            name: Directory name (e.g., "docs", "examples")
+            file_extension: File extension to search for (e.g., ".md", ".yaml")
+            resolve_snippets: Whether to resolve MkDocs snippets in file content
+        """
+        self.name = name
+        self.file_extension = file_extension
+        self.resolve_snippets = resolve_snippets
+        self._path_cache: Path | None = None
+    def get_path(self) -> Path:
+        """Get the path to this resource directory.
+        Returns:
+            Path to the resource directory, trying installed package first,
+            then falling back to development path.
+        """
+        if self._path_cache is not None:
+            return self._path_cache
+        try:
+            # Try to get from installed package
+            resource_root = files("qtype") / self.name
+            # Check if it exists by trying to iterate
+            list(resource_root.iterdir())
+            self._path_cache = Path(str(resource_root))
+        except (FileNotFoundError, AttributeError, TypeError):
+            # Fall back to development path
+            self._path_cache = Path(__file__).parent.parent.parent / self.name
+        return self._path_cache
+    def get_file(self, file_path: str) -> str:
+        """Get the content of a specific file.
+        Args:
+            file_path: Relative path to the file from the resource root.
+        Returns:
+            The full content of the file.
+        Raises:
+            FileNotFoundError: If the specified file doesn't exist.
+            ValueError: If the path tries to access files outside the directory.
+        """
+        resource_path = self.get_path()
+        # Resolve the requested file path
+        requested_file = (resource_path / file_path).resolve()
+        # Security check: ensure the resolved path is within resource directory
+        try:
+            requested_file.relative_to(resource_path.resolve())
+        except ValueError as e:
+            raise ValueError(
+                f"Invalid path: '{file_path}' is outside {self.name} directory"
+            ) from e
+        if not requested_file.exists():
+            raise FileNotFoundError(
+                (
+                    f"{self.name.capitalize()} file not found: '{file_path}'. "
+                    f"Use list_{self.name} to see available files."
+                )
+            )
+        if not requested_file.is_file():
+            raise ValueError(f"Path is not a file: '{file_path}'")
+        content = requested_file.read_text(encoding="utf-8")
+        # Apply snippet resolution if enabled
+        if self.resolve_snippets:
+            content = _resolve_snippets(content, requested_file, self)
+        return content
+    def list_files(self) -> list[str]:
+        """List all files in this resource directory.
+        Returns:
+            Sorted list of relative paths to all files with the configured extension.
+        Raises:
+            FileNotFoundError: If the resource directory doesn't exist.
+        """
+        resource_path = self.get_path()
+        if not resource_path.exists():
+            raise FileNotFoundError(
+                (
+                    f"{self.name.capitalize()} directory not found: "
+                    f"{resource_path}"
+                )
+            )
+        # Find all files with the configured extension
+        pattern = f"*{self.file_extension}"
+        files_list = []
+        for file in resource_path.rglob(pattern):
+            # Get relative path from resource root
+            rel_path = file.relative_to(resource_path)
+            files_list.append(str(rel_path))
+        return sorted(files_list)
+def _resolve_snippets(
+    content: str, base_path: Path, docs_resource: ResourceDirectory
+) -> str:
+    """Recursively resolve MkDocs snippets in markdown content.
+    Mimics the behavior of pymdownx.snippets.
+    Args:
+        content: The markdown content to process
+        base_path: Path to the file being processed (for resolving relative paths)
+        docs_resource: The docs ResourceDirectory for resolving snippet paths
+    Returns:
+        Content with all snippets resolved
+    """
+    docs_root = docs_resource.get_path()
+    project_root = docs_root.parent
+    def replace_match(match: re.Match) -> str:
+        snippet_path = match.group(1)
+        # pymdownx logic: try relative to current file, then docs, then project
+        candidates = [
+            base_path.parent / snippet_path,  # Relative to the doc file
+            docs_root / snippet_path,  # Relative to docs root
+            project_root / snippet_path,  # Relative to project root
+        ]
+        for candidate in candidates:
+            if candidate.exists() and candidate.is_file():
+                # Recursively resolve snippets inside the included file
+                return _resolve_snippets(
+                    candidate.read_text(encoding="utf-8"),
+                    candidate,
+                    docs_resource,
+                )
+        return f"> [!WARNING] Could not resolve snippet: {snippet_path}"
+    return SNIPPET_REGEX.sub(replace_match, content)
+# Initialize singleton resource directories
+_docs_resource = ResourceDirectory("docs", ".md", resolve_snippets=True)
+_examples_resource = ResourceDirectory("examples", ".yaml")
+@lru_cache(maxsize=1)
+def get_docs_resource() -> ResourceDirectory:
+    """Get the singleton docs resource directory.
+    Returns:
+        ResourceDirectory instance for documentation files.
+    """
+    return _docs_resource
+@lru_cache(maxsize=1)
+def get_examples_resource() -> ResourceDirectory:
+    """Get the singleton examples resource directory.
+    Returns:
+        ResourceDirectory instance for example files.
+    """
+    return _examples_resource

qtype/cli.py CHANGED Viewed

@@ -7,9 +7,9 @@ import importlib
 import logging
 from pathlib import Path
-from qtype.base.logging import get_logger
+from qtype.base.logging import configure_logging
-logger = get_logger("application.facade")
+logger = logging.getLogger(__name__)
 try:
     from importlib.metadata import entry_points
@@ -59,9 +59,8 @@ def _discover_local_commands(subparsers: argparse._SubParsersAction) -> None:
                     f"Built-in command module {module_name} does not have a 'parser' function"
                 )
         except Exception as e:
-            logging.error(
-                f"Failed to load built-in command module {module_name}: {e}",
-                exc_info=True,
+            logging.debug(
+                f"Failed to load built-in command module {module_name}: {e} -- you may need the mcp or interpreter extras."
             )
@@ -133,10 +132,7 @@ def main() -> None:
     args = parser.parse_args()
     # Set logging level based on user input
-    logging.basicConfig(
-        level=getattr(logging, args.log_level),
-        format="%(asctime)s - %(levelname)s: %(message)s",
-    )
+    configure_logging(level=args.log_level)
     # Dispatch to the selected subcommand
     args.func(args)

qtype/commands/generate.py CHANGED Viewed

@@ -117,7 +117,10 @@ def run_generate_skill(args: argparse.Namespace) -> None:
     Args:
         args: Command-line arguments with 'output' path.
     """
-    from qtype.mcp.server import _docs_resource, _examples_resource
+    from qtype.base.resources import get_docs_resource, get_examples_resource
+    _docs_resource = get_docs_resource()
+    _examples_resource = get_examples_resource()
     output_path = Path(args.output) / "qtype-architect"
@@ -156,6 +159,7 @@ def generate_schema(args: argparse.Namespace) -> None:
         args (argparse.Namespace): Command-line arguments with an optional
             'output' attribute specifying the output file path.
     """
+    logger.info("Generating QType DSL JSON schema...")
     schema = Document.model_json_schema()
     # Add the $schema property to indicate JSON Schema version
@@ -235,6 +239,7 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
         "-o",
         "--output",
         type=str,
+        default=None,
         help="Output file for the schema (default: stdout)",
     )
     schema_parser.set_defaults(func=generate_schema)

qtype/commands/run.py CHANGED Viewed

@@ -185,7 +185,7 @@ def run_flow(args: Any) -> None:
         # Display results
         if len(result_df) > 0:
-            logger.info(f"Processed {len(result_df)} em")
+            logger.info(f"Processed {len(result_df)} rows")
             # Remove 'row' and 'error' columns for display if all errors are None
             display_df = result_df.copy()
@@ -197,15 +197,37 @@ def run_flow(args: Any) -> None:
             if "row" in display_df.columns:
                 display_df = display_df.drop(columns=["row"])
-            if len(display_df) > 1:
-                logger.info(f"\nResults:\n{display_df[0:10].to_string()}\n...")
-            else:
-                # Print the first row with column_name: value one per line
-                fmt_str = []
-                for col, val in display_df.iloc[0].items():
-                    fmt_str.append(f"{col}: {val}")
-                fmt_str = "\n".join(fmt_str)
-                logger.info(f"\nResults:\n{fmt_str}")
+            # Show summary for console display
+            logger.info(
+                f"\nResults summary: {len(display_df)} rows, "
+                f"{len(display_df.columns)} columns: {list(display_df.columns)}"
+            )
+            # Optionally show full output
+            if args.show_output:
+                # Truncate long strings for display
+                max_col_width = 100
+                for col in display_df.columns:
+                    display_df[col] = display_df[col].apply(
+                        lambda x: (
+                            f"{str(x)[:max_col_width]}..."
+                            if isinstance(x, str)
+                            and len(str(x)) > max_col_width
+                            else x
+                        )
+                    )
+                if len(display_df) > 1:
+                    logger.info(
+                        f"\nResults:\n{display_df[0:10].to_string()}\n..."
+                    )
+                else:
+                    # Print the first row with column_name: value one per line
+                    fmt_str = []
+                    for col, val in display_df.iloc[0].items():
+                        fmt_str.append(f"{col}: {val}")
+                    fmt_str = "\n".join(fmt_str)
+                    logger.info(f"\nResults:\n{fmt_str}")
             # Save the output
             if args.output:
@@ -267,6 +289,11 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
         action="store_true",
         help="Show progress bars during flow execution.",
     )
+    cmd_parser.add_argument(
+        "--show-output",
+        action="store_true",
+        help="Display full output data in console (default: summary only).",
+    )
     cmd_parser.add_argument(
         "spec", type=str, help="Path to the QType YAML spec file."

qtype/docs/Gallery/dataflow_pipelines.md CHANGED Viewed

@@ -64,11 +64,24 @@ Example output:
 You'll notice that the output shows 1 message for `write_results` and 10 for the others. That is because it is reporting the number of messages _emitted_ from each step, and `write_results` is a sink that collects all messages.
-The final message of the output will be the result file where the data are written:
+By default, QType shows a summary of the results. The final message will show:
 ```
 2026-01-16 11:23:35,151 - INFO: ✅ Flow execution completed successfully
-2026-01-16 11:23:35,151 - INFO: Processed 1 em
+2026-01-16 11:23:35,151 - INFO: Processed 1 rows
+2026-01-16 11:23:35,152 - INFO:
+Results summary: 1 rows, 1 columns: ['result_file']
+```
+To see the full output data, add the `--show-output` flag:
+```bash
+qtype run -i '{"output_path":"results.parquet"}' --progress --show-output examples/data_processing/dataflow_pipelines.qtype.yaml
+```
+This will display:
+```
 2026-01-16 11:23:35,152 - INFO:
 Results:
 result_file: results.parquet

qtype/docs/Gallery/recipe_chatbot.md ADDED Viewed

@@ -0,0 +1,103 @@
+# Retrieval Augmented Generation Chatbot
+## Overview
+A complete RAG (Retrieval Augmented Generation) chatbot that answers cooking questions using a recipe collection from GitHub. The system ingests markdown recipe files, splits them into chunks, generates embeddings, stores them in a vector database, and provides conversational search with context-aware responses using memory to maintain conversation history.
+## Architecture
+```mermaid
+--8<-- "Gallery/recipe_chatbot.mermaid"
+```
+## Complete Code
+```yaml
+--8<-- "../examples/rag/recipe_chatbot.qtype.yaml"
+```
+## Running the Example
+### Prerequisites
+Start Qdrant vector database locally:
+```bash
+docker run -p 6333:6333 qdrant/qdrant
+```
+Clone the recipe repository:
+```bash
+git clone https://github.com/clarklab/chowdown.git
+```
+### Ingest Recipe Documents
+Run the ingestion flow to populate the vector index:
+```bash
+AWS_PROFILE=my_profile qtype run examples/rag/recipe_chatbot.qtype.yaml --flow recipe_ingestion --progress
+```
+This will:
+1. Load all markdown files from `chowdown/_recipes/`
+2. Split them into 512-token chunks with 50-token overlap
+3. Generate embeddings using AWS Bedrock Titan
+4. Store vectors in Qdrant collection `chowdown_recipes`
+You should see the output similar to:
+```
+2026-02-04 06:38:06,222 - qtype.commands.run - INFO - Running flow from recipe_chatbot.qtype.yaml
+2026-02-04 06:38:06,315 - qtype.commands.run - INFO - Executing flow recipe_ingestion from recipe_chatbot.qtype.yaml
+/Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:238: UserWarning: Api key is used with an insecure connection.
+  self._client = qdrant_client.QdrantClient(
+/Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:241: UserWarning: Api key is used with an insecure connection.
+  self._aclient = qdrant_client.AsyncQdrantClient(
+╭─────────────────────────────────────────────── Flow Progress ────────────────────────────────────────────────╮
+│                                                                                                              │
+│  Step load_recipes   12.9 msg/s ▁▁▂▄▄▅▅▅▅▄▆▆▆▇▇█▇▇… ✔ 34 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02    │
+│  Step split_recipes  14.9 msg/s ▁▁▁▃▂▅▅▅▆▅▆▆▇▇▇█▇▇… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02    │
+│  Step embed_chunks   18.7 msg/s ██▃▃▁▂▂▁▂▁▁▁▁▁▁▁▁▁… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02    │
+│  Step index_recipes  47.0 msg/s ████████▁           ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:00    │
+│                                                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+2026-02-04 06:38:11,141 - qtype.commands.run - INFO - ✅ Flow execution completed successfully
+2026-02-04 06:38:11,141 - qtype.commands.run - INFO - Processed 39 rows
+2026-02-04 06:38:11,141 - qtype.commands.run - INFO -
+Results summary: 39 rows, 1 columns: ['embedded_chunk']
+```
+### Start the Chatbot
+Launch the conversational UI:
+```bash
+AWS_PROFILE=my_profile qtype serve examples/rag/recipe_chatbot.qtype.yaml --flow recipe_chat
+```
+Then open http://localhost:8000 and ask questions like:
+- "What dessert recipes do you have?"
+- "What can I make with chicken?"
+![A screenshot the ui showing a user asking for a healthy recipe and the AI responding with bean sprout stir fry](recipe_chatbot.png)
+## Key Features
+- **Conversational Interface**: Flow interface type that accumulates messages in `conversation_history` for stateful multi-turn chat
+- **Memory**: Conversation buffer with `token_limit` (10,000) and `chat_history_token_ratio` (0.7) that auto-flushes oldest messages when limit exceeded
+- **DocumentSource**: Loads markdown files via LlamaIndex `SimpleDirectoryReader` with `required_exts` file filter
+- **DocumentSplitter**: Splits documents with `SentenceSplitter` using `chunk_size` (512) and `chunk_overlap` (50) parameters
+- **DocumentEmbedder**: Generates embeddings with AWS Bedrock Titan, processes chunks concurrently via `num_workers` (5)
+- **VectorIndex**: Qdrant vector store with `embedding_model` reference and dimensions (1024)
+- **IndexUpsert**: Writes to vector index in batches via `batch_size` (25)
+- **VectorSearch**: Semantic search with `default_top_k` (5) returns chunks by embedding distance
+- **FieldExtractor**: Extracts text from ChatMessage using JSONPath `$.blocks[?(@.type == 'text')].content`
+- **PromptTemplate**: Injects search results and query into template string for LLM context
+- **LLMInference**: Calls model with `system_message` and `memory` reference for conversation history
+- **RAGDocument**: Domain type with `content`, `file_id`, `file_name`, `metadata` fields
+- **RAGChunk**: Domain type with `content`, `chunk_id`, `document_id`, `vector` fields
+- **RAGSearchResult**: Domain type with `content` (RAGChunk), `doc_id`, `score` fields
+## Learn More
+- Tutorial: [Building a Stateful Chatbot](../../Tutorials/building-a-stateful-chatbot.md)
+- How-To: [Use Environment Variables](../../How-To%20Guides/Language%20Features/use-environment-variables.md)
+- How-To: [Configure AWS Authentication](../../How-To%20Guides/Authentication/configure-aws-authentication.md)

qtype/docs/Gallery/recipe_chatbot.mermaid ADDED Viewed

@@ -0,0 +1,62 @@
+flowchart TD
+    subgraph APP ["📱 recipe_rag_chatbot"]
+        direction TB
+    subgraph FLOW_0 ["🔄 recipe_chat"]
+        direction LR
+        FLOW_0_START@{shape: circle, label: "▶️ Start"}
+        FLOW_0_S0@{shape: rect, label: "⚙️ extract_question"}
+        FLOW_0_S1@{shape: cyl, label: "🔎 search_recipes"}
+        FLOW_0_S2@{shape: doc, label: "📄 build_context_prompt"}
+        FLOW_0_S3@{shape: rounded, label: "✨ generate_response"}
+        FLOW_0_START -->|user_message| FLOW_0_S0
+        FLOW_0_S0 -->|user_question| FLOW_0_S1
+        FLOW_0_S1 -->|search_results| FLOW_0_S2
+        FLOW_0_S0 -->|user_question| FLOW_0_S2
+        FLOW_0_S2 -->|context_prompt| FLOW_0_S3
+    end
+    subgraph FLOW_1 ["🔄 recipe_ingestion"]
+        direction TB
+        FLOW_1_S0@{shape: rect, label: "⚙️ load_recipes"}
+        FLOW_1_S1@{shape: rect, label: "⚙️ split_recipes"}
+        FLOW_1_S2@{shape: rect, label: "⚙️ embed_chunks"}
+        FLOW_1_S3@{shape: rect, label: "💾 index_recipes"}
+        FLOW_1_S0 -->|recipe_document| FLOW_1_S1
+        FLOW_1_S1 -->|recipe_chunk| FLOW_1_S2
+        FLOW_1_S2 -->|embedded_chunk| FLOW_1_S3
+    end
+    subgraph RESOURCES ["🔧 Shared Resources"]
+        direction LR
+        AUTH_AWS_AUTH@{shape: hex, label: "🔐 aws_auth (AWS)"}
+        MODEL_CLAUDE_SONNET@{shape: rounded, label: "✨ claude_sonnet (aws-bedrock)" }
+        MODEL_CLAUDE_SONNET -.->|uses| AUTH_AWS_AUTH
+        MODEL_TITAN_EMBED@{shape: rounded, label: "✨ titan_embed (aws-bedrock)" }
+        MODEL_TITAN_EMBED -.->|uses| AUTH_AWS_AUTH
+        INDEX_RECIPE_INDEX@{shape: cyl, label: "�️ recipe_index"}
+        EMB_TITAN_EMBED@{shape: rounded, label: "🎯 titan_embed"}
+        INDEX_RECIPE_INDEX -.->|embeds| EMB_TITAN_EMBED
+        MEM_RECIPE_CHAT_MEMORY@{shape: win-pane, label: "🧠 recipe_chat_memory (10KT)"}
+    end
+    end
+    FLOW_0_S1 -.-> INDEX_RECIPE_INDEX
+    FLOW_0_S3 -.->|uses| MODEL_CLAUDE_SONNET
+    FLOW_0_S3 -.->|stores| MEM_RECIPE_CHAT_MEMORY
+    FLOW_1_S3 -.->|writes| INDEX_RECIPE_INDEX
+    %% Styling
+    classDef appBox fill:none,stroke:#495057,stroke-width:3px
+    classDef flowBox fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
+    classDef llmNode fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
+    classDef modelNode fill:#e8f5e8,stroke:#2e7d32,stroke-width:2px
+    classDef authNode fill:#fff3e0,stroke:#ef6c00,stroke-width:2px
+    classDef telemetryNode fill:#fce4ec,stroke:#c2185b,stroke-width:2px
+    classDef resourceBox fill:#f5f5f5,stroke:#616161,stroke-width:1px
+    class APP appBox
+    class FLOW_0 flowBox
+    class RESOURCES resourceBox
+    class TELEMETRY telemetryNode

qtype/docs/Gallery/recipe_chatbot.png ADDED Viewed

Binary file

qtype/docs/Gallery/research_assistant.md CHANGED Viewed

@@ -59,7 +59,7 @@ qtype validate examples/research_assistant/research_assistant.qtype.yaml
 # Run directly
 qtype run -i '{"topic":"Latest developments in retrieval augmented generation"}' \
-  examples/research_assistant/research_assistant.qtype.yaml
+  --show-output examples/research_assistant/research_assistant.qtype.yaml
 ```
 ### Example Output

qtype/docs/How To/Command Line Usage/pass_inputs_on_the_cli.md CHANGED Viewed

@@ -2,6 +2,8 @@
 Provide input values to your QType flows directly from the command line using JSON-formatted input data, enabling dynamic parameterization of applications without modifying YAML files.
+**Note:** Inputs are optional. Flows with source steps (like `DocumentSource` or `SQLSource`) can run without any inputs, as these steps generate their own data.
 ### CLI Usage
 ```bash
@@ -20,10 +22,11 @@ qtype run -f analyze_data -i '{"threshold":0.85}' app.qtype.yaml
 ### Explanation
-- **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field
+- **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field (optional - omit for flows with source steps)
 - **JSON format**: Must be valid JSON with double quotes for strings, properly escaped special characters
 - **Flow inputs**: The variables must match those declared in the flow's `inputs` list or the application's `inputs` list
 - **`-f`, `--flow`**: Specifies which flow to run when your application contains multiple flows (defaults to first flow if omitted)
+- **Source steps**: Flows containing source steps like `DocumentSource`, `SQLSource`, or `FileSource` can run without inputs, as these steps generate data independently
 ## Complete Example

qtype/docs/How To/Data Processing/load_documents.md ADDED Viewed

@@ -0,0 +1,74 @@
+# Load Documents
+Load documents from files, directories, or external systems using LlamaIndex readers with DocumentSource.
+**Note:** DocumentSource is a source step that generates data independently, so flows using it typically require no inputs.
+### QType YAML
+```yaml
+steps:
+  - type: DocumentSource
+    id: load_docs
+    reader_module: llama_index.core.SimpleDirectoryReader
+    args:
+      input_dir: ./data
+      required_exts: [".md", ".txt"]
+      recursive: true
+    loader_args:
+      num_workers: 4
+    outputs:
+      - document
+```
+### Explanation
+- **reader_module**: Python module path to a class that inherits from `llama_index.core.readers.base.BaseReader` (most common: `llama_index.core.SimpleDirectoryReader`)
+- **args**: Arguments passed to the reader class constructor (e.g., `input_dir`, `required_exts`, `recursive`, `file_extractor`)
+- **loader_args**: Arguments passed to the reader's `load_data()` method (e.g., `num_workers` for parallel processing)
+- **outputs**: Variable to store loaded documents (type: `RAGDocument`) - DocumentSource fans out, emitting one message per document
+- **Critical distinction**: Constructor args configure the reader instance; `load_data` args control how documents are loaded
+### Common Reader Modules
+**SimpleDirectoryReader** (`llama_index.core.SimpleDirectoryReader`):
+- Constructor args: `input_dir`, `input_files`, `required_exts`, `exclude`, `recursive`, `file_extractor`, `file_metadata`, `encoding`
+- Loader args: `num_workers` (parallel processing)
+- Supports 15+ file types including PDF, DOCX, CSV, Markdown, images, audio/video
+- [Full documentation](https://developers.llamaindex.ai/python/framework/module_guides/loading/simpledirectoryreader/)
+**JSONReader** (`llama_index.readers.json.JSONReader`):
+- Constructor args: `levels_back`, `collapse_length`, `ensure_ascii`, `is_jsonl`, `clean_json`
+- Loader args: `input_file`, `extra_info`
+- Supports both JSON and JSONL (JSON Lines) formats
+- [Full documentation](https://developers.llamaindex.ai/typescript/framework/modules/data/readers/json/)
+### Dynamic Arguments
+You can pass flow variables as constructor arguments by including them in `args`. At runtime, QType merges message variables with the configured args:
+```yaml
+variables:
+  - id: data_path
+    type: text
+steps:
+  - type: DocumentSource
+    id: load_docs
+    reader_module: llama_index.core.SimpleDirectoryReader
+    args:
+      input_dir: data_path    # References variable from message
+    inputs: [data_path]
+```
+## Complete Example
+```yaml
+--8<-- "../examples/data_processing/load_documents.qtype.yaml"
+```
+## See Also
+- [DocumentSource Reference](../../components/DocumentSource.md)
+- [DocumentSplitter How-To](chunk_documents.md)
+- [RAG Tutorial](../../Tutorials/rag_tutorial.md)

qtype 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

qtype 0.1.13py3-none-any.whl → 0.1.14py3-none-any.whl