qtype 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. qtype/base/__init__.py +8 -2
  2. qtype/base/logging.py +0 -17
  3. qtype/base/resources.py +193 -0
  4. qtype/cli.py +5 -9
  5. qtype/commands/generate.py +6 -1
  6. qtype/commands/run.py +37 -10
  7. qtype/docs/Gallery/dataflow_pipelines.md +15 -2
  8. qtype/docs/Gallery/recipe_chatbot.md +103 -0
  9. qtype/docs/Gallery/recipe_chatbot.mermaid +62 -0
  10. qtype/docs/Gallery/recipe_chatbot.png +0 -0
  11. qtype/docs/Gallery/research_assistant.md +1 -1
  12. qtype/docs/How To/Command Line Usage/pass_inputs_on_the_cli.md +4 -1
  13. qtype/docs/How To/Data Processing/load_documents.md +74 -0
  14. qtype/docs/How To/Data Processing/read_sql_databases.md +2 -0
  15. qtype/docs/Reference/cli.md +3 -2
  16. qtype/docs/Reference/plugins.md +0 -4
  17. qtype/docs/Reference/semantic-validation-rules.md +1 -6
  18. qtype/docs/Tutorials/01-first-qtype-application.md +1 -1
  19. qtype/docs/Tutorials/03-structured-data.md +1 -1
  20. qtype/docs/Tutorials/04-tools-and-function-calling.md +1 -1
  21. qtype/examples/conversational_ai/simple_chatbot_with_auth.qtype.yaml +48 -0
  22. qtype/examples/data_processing/load_documents.qtype.yaml +31 -0
  23. qtype/examples/invoke_models/invoke_embedding_aws.qtype.yaml +45 -0
  24. qtype/examples/rag/recipe_chatbot.qtype.yaml +216 -0
  25. qtype/interpreter/auth/aws.py +94 -17
  26. qtype/interpreter/auth/generic.py +11 -12
  27. qtype/interpreter/base/secrets.py +4 -2
  28. qtype/interpreter/conversions.py +15 -14
  29. qtype/interpreter/converters.py +1 -1
  30. qtype/interpreter/executors/bedrock_reranker_executor.py +17 -28
  31. qtype/interpreter/executors/document_embedder_executor.py +1 -12
  32. qtype/interpreter/executors/invoke_embedding_executor.py +23 -33
  33. qtype/interpreter/executors/llm_inference_executor.py +2 -0
  34. qtype/interpreter/executors/sql_source_executor.py +6 -2
  35. qtype/interpreter/flow.py +11 -1
  36. qtype/mcp/server.py +11 -158
  37. qtype/semantic/visualize.py +10 -3
  38. {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/METADATA +2 -2
  39. {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/RECORD +42 -33
  40. {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/WHEEL +0 -0
  41. {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/entry_points.txt +0 -0
  42. {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/licenses/LICENSE +0 -0
qtype/base/__init__.py CHANGED
@@ -3,12 +3,18 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from .exceptions import QTypeError, ValidationError
6
- from .logging import get_logger
6
+ from .resources import (
7
+ ResourceDirectory,
8
+ get_docs_resource,
9
+ get_examples_resource,
10
+ )
7
11
  from .types import JSONValue
8
12
 
9
13
  __all__ = [
10
14
  "QTypeError",
11
15
  "ValidationError",
12
- "get_logger",
13
16
  "JSONValue",
17
+ "ResourceDirectory",
18
+ "get_docs_resource",
19
+ "get_examples_resource",
14
20
  ]
qtype/base/logging.py CHANGED
@@ -5,23 +5,6 @@ from __future__ import annotations
5
5
  import logging
6
6
 
7
7
 
8
- def get_logger(name: str) -> logging.Logger:
9
- """Get a logger with the given name and consistent formatting."""
10
- logger = logging.getLogger(f"qtype.{name}")
11
-
12
- # Only configure if not already configured
13
- if not logger.handlers:
14
- handler = logging.StreamHandler()
15
- formatter = logging.Formatter(
16
- "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
17
- )
18
- handler.setFormatter(formatter)
19
- logger.addHandler(handler)
20
- logger.setLevel(logging.INFO)
21
-
22
- return logger
23
-
24
-
25
8
  def configure_logging(
26
9
  level: str = "INFO", format_string: str | None = None
27
10
  ) -> None:
@@ -0,0 +1,193 @@
1
+ """Resource directory access utilities for QType package resources."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from functools import lru_cache
7
+ from importlib.resources import files
8
+ from pathlib import Path
9
+
10
+ # Regex for pymdownx snippets: --8<-- "path/to/file"
11
+ SNIPPET_REGEX = re.compile(r'--8<--\s+"([^"]+)"')
12
+
13
+
14
+ class ResourceDirectory:
15
+ """Abstraction for accessing resource directories (docs, examples, etc.)."""
16
+
17
+ def __init__(
18
+ self, name: str, file_extension: str, resolve_snippets: bool = False
19
+ ):
20
+ """Initialize a resource directory.
21
+
22
+ Args:
23
+ name: Directory name (e.g., "docs", "examples")
24
+ file_extension: File extension to search for (e.g., ".md", ".yaml")
25
+ resolve_snippets: Whether to resolve MkDocs snippets in file content
26
+ """
27
+ self.name = name
28
+ self.file_extension = file_extension
29
+ self.resolve_snippets = resolve_snippets
30
+ self._path_cache: Path | None = None
31
+
32
+ def get_path(self) -> Path:
33
+ """Get the path to this resource directory.
34
+
35
+ Returns:
36
+ Path to the resource directory, trying installed package first,
37
+ then falling back to development path.
38
+ """
39
+ if self._path_cache is not None:
40
+ return self._path_cache
41
+
42
+ try:
43
+ # Try to get from installed package
44
+ resource_root = files("qtype") / self.name
45
+ # Check if it exists by trying to iterate
46
+ list(resource_root.iterdir())
47
+ self._path_cache = Path(str(resource_root))
48
+ except (FileNotFoundError, AttributeError, TypeError):
49
+ # Fall back to development path
50
+ self._path_cache = Path(__file__).parent.parent.parent / self.name
51
+
52
+ return self._path_cache
53
+
54
+ def get_file(self, file_path: str) -> str:
55
+ """Get the content of a specific file.
56
+
57
+ Args:
58
+ file_path: Relative path to the file from the resource root.
59
+
60
+ Returns:
61
+ The full content of the file.
62
+
63
+ Raises:
64
+ FileNotFoundError: If the specified file doesn't exist.
65
+ ValueError: If the path tries to access files outside the directory.
66
+ """
67
+ resource_path = self.get_path()
68
+
69
+ # Resolve the requested file path
70
+ requested_file = (resource_path / file_path).resolve()
71
+
72
+ # Security check: ensure the resolved path is within resource directory
73
+ try:
74
+ requested_file.relative_to(resource_path.resolve())
75
+ except ValueError as e:
76
+ raise ValueError(
77
+ f"Invalid path: '{file_path}' is outside {self.name} directory"
78
+ ) from e
79
+
80
+ if not requested_file.exists():
81
+ raise FileNotFoundError(
82
+ (
83
+ f"{self.name.capitalize()} file not found: '{file_path}'. "
84
+ f"Use list_{self.name} to see available files."
85
+ )
86
+ )
87
+
88
+ if not requested_file.is_file():
89
+ raise ValueError(f"Path is not a file: '{file_path}'")
90
+
91
+ content = requested_file.read_text(encoding="utf-8")
92
+
93
+ # Apply snippet resolution if enabled
94
+ if self.resolve_snippets:
95
+ content = _resolve_snippets(content, requested_file, self)
96
+
97
+ return content
98
+
99
+ def list_files(self) -> list[str]:
100
+ """List all files in this resource directory.
101
+
102
+ Returns:
103
+ Sorted list of relative paths to all files with the configured extension.
104
+
105
+ Raises:
106
+ FileNotFoundError: If the resource directory doesn't exist.
107
+ """
108
+ resource_path = self.get_path()
109
+
110
+ if not resource_path.exists():
111
+ raise FileNotFoundError(
112
+ (
113
+ f"{self.name.capitalize()} directory not found: "
114
+ f"{resource_path}"
115
+ )
116
+ )
117
+
118
+ # Find all files with the configured extension
119
+ pattern = f"*{self.file_extension}"
120
+ files_list = []
121
+ for file in resource_path.rglob(pattern):
122
+ # Get relative path from resource root
123
+ rel_path = file.relative_to(resource_path)
124
+ files_list.append(str(rel_path))
125
+
126
+ return sorted(files_list)
127
+
128
+
129
+ def _resolve_snippets(
130
+ content: str, base_path: Path, docs_resource: ResourceDirectory
131
+ ) -> str:
132
+ """Recursively resolve MkDocs snippets in markdown content.
133
+
134
+ Mimics the behavior of pymdownx.snippets.
135
+
136
+ Args:
137
+ content: The markdown content to process
138
+ base_path: Path to the file being processed (for resolving relative paths)
139
+ docs_resource: The docs ResourceDirectory for resolving snippet paths
140
+
141
+ Returns:
142
+ Content with all snippets resolved
143
+ """
144
+ docs_root = docs_resource.get_path()
145
+ project_root = docs_root.parent
146
+
147
+ def replace_match(match: re.Match) -> str:
148
+ snippet_path = match.group(1)
149
+
150
+ # pymdownx logic: try relative to current file, then docs, then project
151
+ candidates = [
152
+ base_path.parent / snippet_path, # Relative to the doc file
153
+ docs_root / snippet_path, # Relative to docs root
154
+ project_root / snippet_path, # Relative to project root
155
+ ]
156
+
157
+ for candidate in candidates:
158
+ if candidate.exists() and candidate.is_file():
159
+ # Recursively resolve snippets inside the included file
160
+ return _resolve_snippets(
161
+ candidate.read_text(encoding="utf-8"),
162
+ candidate,
163
+ docs_resource,
164
+ )
165
+
166
+ return f"> [!WARNING] Could not resolve snippet: {snippet_path}"
167
+
168
+ return SNIPPET_REGEX.sub(replace_match, content)
169
+
170
+
171
+ # Initialize singleton resource directories
172
+ _docs_resource = ResourceDirectory("docs", ".md", resolve_snippets=True)
173
+ _examples_resource = ResourceDirectory("examples", ".yaml")
174
+
175
+
176
+ @lru_cache(maxsize=1)
177
+ def get_docs_resource() -> ResourceDirectory:
178
+ """Get the singleton docs resource directory.
179
+
180
+ Returns:
181
+ ResourceDirectory instance for documentation files.
182
+ """
183
+ return _docs_resource
184
+
185
+
186
+ @lru_cache(maxsize=1)
187
+ def get_examples_resource() -> ResourceDirectory:
188
+ """Get the singleton examples resource directory.
189
+
190
+ Returns:
191
+ ResourceDirectory instance for example files.
192
+ """
193
+ return _examples_resource
qtype/cli.py CHANGED
@@ -7,9 +7,9 @@ import importlib
7
7
  import logging
8
8
  from pathlib import Path
9
9
 
10
- from qtype.base.logging import get_logger
10
+ from qtype.base.logging import configure_logging
11
11
 
12
- logger = get_logger("application.facade")
12
+ logger = logging.getLogger(__name__)
13
13
 
14
14
  try:
15
15
  from importlib.metadata import entry_points
@@ -59,9 +59,8 @@ def _discover_local_commands(subparsers: argparse._SubParsersAction) -> None:
59
59
  f"Built-in command module {module_name} does not have a 'parser' function"
60
60
  )
61
61
  except Exception as e:
62
- logging.error(
63
- f"Failed to load built-in command module {module_name}: {e}",
64
- exc_info=True,
62
+ logging.debug(
63
+ f"Failed to load built-in command module {module_name}: {e} -- you may need the mcp or interpreter extras."
65
64
  )
66
65
 
67
66
 
@@ -133,10 +132,7 @@ def main() -> None:
133
132
  args = parser.parse_args()
134
133
 
135
134
  # Set logging level based on user input
136
- logging.basicConfig(
137
- level=getattr(logging, args.log_level),
138
- format="%(asctime)s - %(levelname)s: %(message)s",
139
- )
135
+ configure_logging(level=args.log_level)
140
136
 
141
137
  # Dispatch to the selected subcommand
142
138
  args.func(args)
@@ -117,7 +117,10 @@ def run_generate_skill(args: argparse.Namespace) -> None:
117
117
  Args:
118
118
  args: Command-line arguments with 'output' path.
119
119
  """
120
- from qtype.mcp.server import _docs_resource, _examples_resource
120
+ from qtype.base.resources import get_docs_resource, get_examples_resource
121
+
122
+ _docs_resource = get_docs_resource()
123
+ _examples_resource = get_examples_resource()
121
124
 
122
125
  output_path = Path(args.output) / "qtype-architect"
123
126
 
@@ -156,6 +159,7 @@ def generate_schema(args: argparse.Namespace) -> None:
156
159
  args (argparse.Namespace): Command-line arguments with an optional
157
160
  'output' attribute specifying the output file path.
158
161
  """
162
+ logger.info("Generating QType DSL JSON schema...")
159
163
  schema = Document.model_json_schema()
160
164
 
161
165
  # Add the $schema property to indicate JSON Schema version
@@ -235,6 +239,7 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
235
239
  "-o",
236
240
  "--output",
237
241
  type=str,
242
+ default=None,
238
243
  help="Output file for the schema (default: stdout)",
239
244
  )
240
245
  schema_parser.set_defaults(func=generate_schema)
qtype/commands/run.py CHANGED
@@ -185,7 +185,7 @@ def run_flow(args: Any) -> None:
185
185
 
186
186
  # Display results
187
187
  if len(result_df) > 0:
188
- logger.info(f"Processed {len(result_df)} em")
188
+ logger.info(f"Processed {len(result_df)} rows")
189
189
 
190
190
  # Remove 'row' and 'error' columns for display if all errors are None
191
191
  display_df = result_df.copy()
@@ -197,15 +197,37 @@ def run_flow(args: Any) -> None:
197
197
  if "row" in display_df.columns:
198
198
  display_df = display_df.drop(columns=["row"])
199
199
 
200
- if len(display_df) > 1:
201
- logger.info(f"\nResults:\n{display_df[0:10].to_string()}\n...")
202
- else:
203
- # Print the first row with column_name: value one per line
204
- fmt_str = []
205
- for col, val in display_df.iloc[0].items():
206
- fmt_str.append(f"{col}: {val}")
207
- fmt_str = "\n".join(fmt_str)
208
- logger.info(f"\nResults:\n{fmt_str}")
200
+ # Show summary for console display
201
+ logger.info(
202
+ f"\nResults summary: {len(display_df)} rows, "
203
+ f"{len(display_df.columns)} columns: {list(display_df.columns)}"
204
+ )
205
+
206
+ # Optionally show full output
207
+ if args.show_output:
208
+ # Truncate long strings for display
209
+ max_col_width = 100
210
+ for col in display_df.columns:
211
+ display_df[col] = display_df[col].apply(
212
+ lambda x: (
213
+ f"{str(x)[:max_col_width]}..."
214
+ if isinstance(x, str)
215
+ and len(str(x)) > max_col_width
216
+ else x
217
+ )
218
+ )
219
+
220
+ if len(display_df) > 1:
221
+ logger.info(
222
+ f"\nResults:\n{display_df[0:10].to_string()}\n..."
223
+ )
224
+ else:
225
+ # Print the first row with column_name: value one per line
226
+ fmt_str = []
227
+ for col, val in display_df.iloc[0].items():
228
+ fmt_str.append(f"{col}: {val}")
229
+ fmt_str = "\n".join(fmt_str)
230
+ logger.info(f"\nResults:\n{fmt_str}")
209
231
 
210
232
  # Save the output
211
233
  if args.output:
@@ -267,6 +289,11 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
267
289
  action="store_true",
268
290
  help="Show progress bars during flow execution.",
269
291
  )
292
+ cmd_parser.add_argument(
293
+ "--show-output",
294
+ action="store_true",
295
+ help="Display full output data in console (default: summary only).",
296
+ )
270
297
 
271
298
  cmd_parser.add_argument(
272
299
  "spec", type=str, help="Path to the QType YAML spec file."
@@ -64,11 +64,24 @@ Example output:
64
64
 
65
65
  You'll notice that the output shows 1 message for `write_results` and 10 for the others. That is because it is reporting the number of messages _emitted_ from each step, and `write_results` is a sink that collects all messages.
66
66
 
67
- The final message of the output will be the result file where the data are written:
67
+ By default, QType shows a summary of the results. The final message will show:
68
68
 
69
69
  ```
70
70
  2026-01-16 11:23:35,151 - INFO: ✅ Flow execution completed successfully
71
- 2026-01-16 11:23:35,151 - INFO: Processed 1 em
71
+ 2026-01-16 11:23:35,151 - INFO: Processed 1 rows
72
+ 2026-01-16 11:23:35,152 - INFO:
73
+ Results summary: 1 rows, 1 columns: ['result_file']
74
+ ```
75
+
76
+ To see the full output data, add the `--show-output` flag:
77
+
78
+ ```bash
79
+ qtype run -i '{"output_path":"results.parquet"}' --progress --show-output examples/data_processing/dataflow_pipelines.qtype.yaml
80
+ ```
81
+
82
+ This will display:
83
+
84
+ ```
72
85
  2026-01-16 11:23:35,152 - INFO:
73
86
  Results:
74
87
  result_file: results.parquet
@@ -0,0 +1,103 @@
1
+ # Retrieval Augmented Generation Chatbot
2
+
3
+ ## Overview
4
+
5
+ A complete RAG (Retrieval Augmented Generation) chatbot that answers cooking questions using a recipe collection from GitHub. The system ingests markdown recipe files, splits them into chunks, generates embeddings, stores them in a vector database, and provides conversational search with context-aware responses using memory to maintain conversation history.
6
+
7
+ ## Architecture
8
+
9
+ ```mermaid
10
+ --8<-- "Gallery/recipe_chatbot.mermaid"
11
+ ```
12
+
13
+ ## Complete Code
14
+
15
+ ```yaml
16
+ --8<-- "../examples/rag/recipe_chatbot.qtype.yaml"
17
+ ```
18
+
19
+ ## Running the Example
20
+
21
+ ### Prerequisites
22
+
23
+ Start Qdrant vector database locally:
24
+ ```bash
25
+ docker run -p 6333:6333 qdrant/qdrant
26
+ ```
27
+
28
+ Clone the recipe repository:
29
+ ```bash
30
+ git clone https://github.com/clarklab/chowdown.git
31
+ ```
32
+
33
+ ### Ingest Recipe Documents
34
+
35
+ Run the ingestion flow to populate the vector index:
36
+ ```bash
37
+ AWS_PROFILE=my_profile qtype run examples/rag/recipe_chatbot.qtype.yaml --flow recipe_ingestion --progress
38
+ ```
39
+
40
+ This will:
41
+ 1. Load all markdown files from `chowdown/_recipes/`
42
+ 2. Split them into 512-token chunks with 50-token overlap
43
+ 3. Generate embeddings using AWS Bedrock Titan
44
+ 4. Store vectors in Qdrant collection `chowdown_recipes`
45
+
46
+ You should see the output similar to:
47
+ ```
48
+ 2026-02-04 06:38:06,222 - qtype.commands.run - INFO - Running flow from recipe_chatbot.qtype.yaml
49
+ 2026-02-04 06:38:06,315 - qtype.commands.run - INFO - Executing flow recipe_ingestion from recipe_chatbot.qtype.yaml
50
+ /Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:238: UserWarning: Api key is used with an insecure connection.
51
+ self._client = qdrant_client.QdrantClient(
52
+ /Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:241: UserWarning: Api key is used with an insecure connection.
53
+ self._aclient = qdrant_client.AsyncQdrantClient(
54
+ ╭─────────────────────────────────────────────── Flow Progress ────────────────────────────────────────────────╮
55
+ │ │
56
+ │ Step load_recipes 12.9 msg/s ▁▁▂▄▄▅▅▅▅▄▆▆▆▇▇█▇▇… ✔ 34 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
57
+ │ Step split_recipes 14.9 msg/s ▁▁▁▃▂▅▅▅▆▅▆▆▇▇▇█▇▇… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
58
+ │ Step embed_chunks 18.7 msg/s ██▃▃▁▂▂▁▂▁▁▁▁▁▁▁▁▁… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
59
+ │ Step index_recipes 47.0 msg/s ████████▁ ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:00 │
60
+ │ │
61
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
62
+ 2026-02-04 06:38:11,141 - qtype.commands.run - INFO - ✅ Flow execution completed successfully
63
+ 2026-02-04 06:38:11,141 - qtype.commands.run - INFO - Processed 39 rows
64
+ 2026-02-04 06:38:11,141 - qtype.commands.run - INFO -
65
+ Results summary: 39 rows, 1 columns: ['embedded_chunk']
66
+ ```
67
+
68
+ ### Start the Chatbot
69
+
70
+ Launch the conversational UI:
71
+ ```bash
72
+ AWS_PROFILE=my_profile qtype serve examples/rag/recipe_chatbot.qtype.yaml --flow recipe_chat
73
+ ```
74
+
75
+ Then open http://localhost:8000 and ask questions like:
76
+ - "What dessert recipes do you have?"
77
+ - "What can I make with chicken?"
78
+
79
+ ![A screenshot the ui showing a user asking for a healthy recipe and the AI responding with bean sprout stir fry](recipe_chatbot.png)
80
+
81
+ ## Key Features
82
+
83
+ - **Conversational Interface**: Flow interface type that accumulates messages in `conversation_history` for stateful multi-turn chat
84
+ - **Memory**: Conversation buffer with `token_limit` (10,000) and `chat_history_token_ratio` (0.7) that auto-flushes oldest messages when limit exceeded
85
+ - **DocumentSource**: Loads markdown files via LlamaIndex `SimpleDirectoryReader` with `required_exts` file filter
86
+ - **DocumentSplitter**: Splits documents with `SentenceSplitter` using `chunk_size` (512) and `chunk_overlap` (50) parameters
87
+ - **DocumentEmbedder**: Generates embeddings with AWS Bedrock Titan, processes chunks concurrently via `num_workers` (5)
88
+ - **VectorIndex**: Qdrant vector store with `embedding_model` reference and dimensions (1024)
89
+ - **IndexUpsert**: Writes to vector index in batches via `batch_size` (25)
90
+ - **VectorSearch**: Semantic search with `default_top_k` (5) returns chunks by embedding distance
91
+ - **FieldExtractor**: Extracts text from ChatMessage using JSONPath `$.blocks[?(@.type == 'text')].content`
92
+ - **PromptTemplate**: Injects search results and query into template string for LLM context
93
+ - **LLMInference**: Calls model with `system_message` and `memory` reference for conversation history
94
+ - **RAGDocument**: Domain type with `content`, `file_id`, `file_name`, `metadata` fields
95
+ - **RAGChunk**: Domain type with `content`, `chunk_id`, `document_id`, `vector` fields
96
+ - **RAGSearchResult**: Domain type with `content` (RAGChunk), `doc_id`, `score` fields
97
+
98
+
99
+ ## Learn More
100
+
101
+ - Tutorial: [Building a Stateful Chatbot](../../Tutorials/building-a-stateful-chatbot.md)
102
+ - How-To: [Use Environment Variables](../../How-To%20Guides/Language%20Features/use-environment-variables.md)
103
+ - How-To: [Configure AWS Authentication](../../How-To%20Guides/Authentication/configure-aws-authentication.md)
@@ -0,0 +1,62 @@
1
+ flowchart TD
2
+ subgraph APP ["📱 recipe_rag_chatbot"]
3
+ direction TB
4
+
5
+ subgraph FLOW_0 ["🔄 recipe_chat"]
6
+ direction LR
7
+ FLOW_0_START@{shape: circle, label: "▶️ Start"}
8
+ FLOW_0_S0@{shape: rect, label: "⚙️ extract_question"}
9
+ FLOW_0_S1@{shape: cyl, label: "🔎 search_recipes"}
10
+ FLOW_0_S2@{shape: doc, label: "📄 build_context_prompt"}
11
+ FLOW_0_S3@{shape: rounded, label: "✨ generate_response"}
12
+ FLOW_0_START -->|user_message| FLOW_0_S0
13
+ FLOW_0_S0 -->|user_question| FLOW_0_S1
14
+ FLOW_0_S1 -->|search_results| FLOW_0_S2
15
+ FLOW_0_S0 -->|user_question| FLOW_0_S2
16
+ FLOW_0_S2 -->|context_prompt| FLOW_0_S3
17
+ end
18
+
19
+ subgraph FLOW_1 ["🔄 recipe_ingestion"]
20
+ direction TB
21
+ FLOW_1_S0@{shape: rect, label: "⚙️ load_recipes"}
22
+ FLOW_1_S1@{shape: rect, label: "⚙️ split_recipes"}
23
+ FLOW_1_S2@{shape: rect, label: "⚙️ embed_chunks"}
24
+ FLOW_1_S3@{shape: rect, label: "💾 index_recipes"}
25
+ FLOW_1_S0 -->|recipe_document| FLOW_1_S1
26
+ FLOW_1_S1 -->|recipe_chunk| FLOW_1_S2
27
+ FLOW_1_S2 -->|embedded_chunk| FLOW_1_S3
28
+ end
29
+
30
+ subgraph RESOURCES ["🔧 Shared Resources"]
31
+ direction LR
32
+ AUTH_AWS_AUTH@{shape: hex, label: "🔐 aws_auth (AWS)"}
33
+ MODEL_CLAUDE_SONNET@{shape: rounded, label: "✨ claude_sonnet (aws-bedrock)" }
34
+ MODEL_CLAUDE_SONNET -.->|uses| AUTH_AWS_AUTH
35
+ MODEL_TITAN_EMBED@{shape: rounded, label: "✨ titan_embed (aws-bedrock)" }
36
+ MODEL_TITAN_EMBED -.->|uses| AUTH_AWS_AUTH
37
+ INDEX_RECIPE_INDEX@{shape: cyl, label: "�️ recipe_index"}
38
+ EMB_TITAN_EMBED@{shape: rounded, label: "🎯 titan_embed"}
39
+ INDEX_RECIPE_INDEX -.->|embeds| EMB_TITAN_EMBED
40
+ MEM_RECIPE_CHAT_MEMORY@{shape: win-pane, label: "🧠 recipe_chat_memory (10KT)"}
41
+ end
42
+
43
+ end
44
+
45
+ FLOW_0_S1 -.-> INDEX_RECIPE_INDEX
46
+ FLOW_0_S3 -.->|uses| MODEL_CLAUDE_SONNET
47
+ FLOW_0_S3 -.->|stores| MEM_RECIPE_CHAT_MEMORY
48
+ FLOW_1_S3 -.->|writes| INDEX_RECIPE_INDEX
49
+
50
+ %% Styling
51
+ classDef appBox fill:none,stroke:#495057,stroke-width:3px
52
+ classDef flowBox fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
53
+ classDef llmNode fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
54
+ classDef modelNode fill:#e8f5e8,stroke:#2e7d32,stroke-width:2px
55
+ classDef authNode fill:#fff3e0,stroke:#ef6c00,stroke-width:2px
56
+ classDef telemetryNode fill:#fce4ec,stroke:#c2185b,stroke-width:2px
57
+ classDef resourceBox fill:#f5f5f5,stroke:#616161,stroke-width:1px
58
+
59
+ class APP appBox
60
+ class FLOW_0 flowBox
61
+ class RESOURCES resourceBox
62
+ class TELEMETRY telemetryNode
Binary file
@@ -59,7 +59,7 @@ qtype validate examples/research_assistant/research_assistant.qtype.yaml
59
59
 
60
60
  # Run directly
61
61
  qtype run -i '{"topic":"Latest developments in retrieval augmented generation"}' \
62
- examples/research_assistant/research_assistant.qtype.yaml
62
+ --show-output examples/research_assistant/research_assistant.qtype.yaml
63
63
  ```
64
64
 
65
65
  ### Example Output
@@ -2,6 +2,8 @@
2
2
 
3
3
  Provide input values to your QType flows directly from the command line using JSON-formatted input data, enabling dynamic parameterization of applications without modifying YAML files.
4
4
 
5
+ **Note:** Inputs are optional. Flows with source steps (like `DocumentSource` or `SQLSource`) can run without any inputs, as these steps generate their own data.
6
+
5
7
  ### CLI Usage
6
8
 
7
9
  ```bash
@@ -20,10 +22,11 @@ qtype run -f analyze_data -i '{"threshold":0.85}' app.qtype.yaml
20
22
 
21
23
  ### Explanation
22
24
 
23
- - **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field
25
+ - **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field (optional - omit for flows with source steps)
24
26
  - **JSON format**: Must be valid JSON with double quotes for strings, properly escaped special characters
25
27
  - **Flow inputs**: The variables must match those declared in the flow's `inputs` list or the application's `inputs` list
26
28
  - **`-f`, `--flow`**: Specifies which flow to run when your application contains multiple flows (defaults to first flow if omitted)
29
+ - **Source steps**: Flows containing source steps like `DocumentSource`, `SQLSource`, or `FileSource` can run without inputs, as these steps generate data independently
27
30
 
28
31
  ## Complete Example
29
32
 
@@ -0,0 +1,74 @@
1
+ # Load Documents
2
+
3
+ Load documents from files, directories, or external systems using LlamaIndex readers with DocumentSource.
4
+
5
+ **Note:** DocumentSource is a source step that generates data independently, so flows using it typically require no inputs.
6
+
7
+ ### QType YAML
8
+
9
+ ```yaml
10
+ steps:
11
+ - type: DocumentSource
12
+ id: load_docs
13
+ reader_module: llama_index.core.SimpleDirectoryReader
14
+ args:
15
+ input_dir: ./data
16
+ required_exts: [".md", ".txt"]
17
+ recursive: true
18
+ loader_args:
19
+ num_workers: 4
20
+ outputs:
21
+ - document
22
+ ```
23
+
24
+ ### Explanation
25
+
26
+ - **reader_module**: Python module path to a class that inherits from `llama_index.core.readers.base.BaseReader` (most common: `llama_index.core.SimpleDirectoryReader`)
27
+ - **args**: Arguments passed to the reader class constructor (e.g., `input_dir`, `required_exts`, `recursive`, `file_extractor`)
28
+ - **loader_args**: Arguments passed to the reader's `load_data()` method (e.g., `num_workers` for parallel processing)
29
+ - **outputs**: Variable to store loaded documents (type: `RAGDocument`) - DocumentSource fans out, emitting one message per document
30
+ - **Critical distinction**: Constructor args configure the reader instance; `load_data` args control how documents are loaded
31
+
32
+ ### Common Reader Modules
33
+
34
+ **SimpleDirectoryReader** (`llama_index.core.SimpleDirectoryReader`):
35
+ - Constructor args: `input_dir`, `input_files`, `required_exts`, `exclude`, `recursive`, `file_extractor`, `file_metadata`, `encoding`
36
+ - Loader args: `num_workers` (parallel processing)
37
+ - Supports 15+ file types including PDF, DOCX, CSV, Markdown, images, audio/video
38
+ - [Full documentation](https://developers.llamaindex.ai/python/framework/module_guides/loading/simpledirectoryreader/)
39
+
40
+ **JSONReader** (`llama_index.readers.json.JSONReader`):
41
+ - Constructor args: `levels_back`, `collapse_length`, `ensure_ascii`, `is_jsonl`, `clean_json`
42
+ - Loader args: `input_file`, `extra_info`
43
+ - Supports both JSON and JSONL (JSON Lines) formats
44
+ - [Full documentation](https://developers.llamaindex.ai/typescript/framework/modules/data/readers/json/)
45
+
46
+ ### Dynamic Arguments
47
+
48
+ You can pass flow variables as constructor arguments by including them in `args`. At runtime, QType merges message variables with the configured args:
49
+
50
+ ```yaml
51
+ variables:
52
+ - id: data_path
53
+ type: text
54
+
55
+ steps:
56
+ - type: DocumentSource
57
+ id: load_docs
58
+ reader_module: llama_index.core.SimpleDirectoryReader
59
+ args:
60
+ input_dir: data_path # References variable from message
61
+ inputs: [data_path]
62
+ ```
63
+
64
+ ## Complete Example
65
+
66
+ ```yaml
67
+ --8<-- "../examples/data_processing/load_documents.qtype.yaml"
68
+ ```
69
+
70
+ ## See Also
71
+
72
+ - [DocumentSource Reference](../../components/DocumentSource.md)
73
+ - [DocumentSplitter How-To](chunk_documents.md)
74
+ - [RAG Tutorial](../../Tutorials/rag_tutorial.md)