qtype 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/base/__init__.py +8 -2
- qtype/base/logging.py +0 -17
- qtype/base/resources.py +193 -0
- qtype/cli.py +5 -9
- qtype/commands/generate.py +6 -1
- qtype/commands/run.py +37 -10
- qtype/docs/Gallery/dataflow_pipelines.md +15 -2
- qtype/docs/Gallery/recipe_chatbot.md +103 -0
- qtype/docs/Gallery/recipe_chatbot.mermaid +62 -0
- qtype/docs/Gallery/recipe_chatbot.png +0 -0
- qtype/docs/Gallery/research_assistant.md +1 -1
- qtype/docs/How To/Command Line Usage/pass_inputs_on_the_cli.md +4 -1
- qtype/docs/How To/Data Processing/load_documents.md +74 -0
- qtype/docs/How To/Data Processing/read_sql_databases.md +2 -0
- qtype/docs/Reference/cli.md +3 -2
- qtype/docs/Reference/plugins.md +0 -4
- qtype/docs/Reference/semantic-validation-rules.md +1 -6
- qtype/docs/Tutorials/01-first-qtype-application.md +1 -1
- qtype/docs/Tutorials/03-structured-data.md +1 -1
- qtype/docs/Tutorials/04-tools-and-function-calling.md +1 -1
- qtype/examples/conversational_ai/simple_chatbot_with_auth.qtype.yaml +48 -0
- qtype/examples/data_processing/load_documents.qtype.yaml +31 -0
- qtype/examples/invoke_models/invoke_embedding_aws.qtype.yaml +45 -0
- qtype/examples/rag/recipe_chatbot.qtype.yaml +216 -0
- qtype/interpreter/auth/aws.py +94 -17
- qtype/interpreter/auth/generic.py +11 -12
- qtype/interpreter/base/secrets.py +4 -2
- qtype/interpreter/conversions.py +15 -14
- qtype/interpreter/converters.py +1 -1
- qtype/interpreter/executors/bedrock_reranker_executor.py +17 -28
- qtype/interpreter/executors/document_embedder_executor.py +1 -12
- qtype/interpreter/executors/invoke_embedding_executor.py +23 -33
- qtype/interpreter/executors/llm_inference_executor.py +2 -0
- qtype/interpreter/executors/sql_source_executor.py +6 -2
- qtype/interpreter/flow.py +11 -1
- qtype/mcp/server.py +11 -158
- qtype/semantic/visualize.py +10 -3
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/METADATA +2 -2
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/RECORD +42 -33
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/WHEEL +0 -0
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/entry_points.txt +0 -0
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/licenses/LICENSE +0 -0
qtype/base/__init__.py
CHANGED
|
@@ -3,12 +3,18 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from .exceptions import QTypeError, ValidationError
|
|
6
|
-
from .
|
|
6
|
+
from .resources import (
|
|
7
|
+
ResourceDirectory,
|
|
8
|
+
get_docs_resource,
|
|
9
|
+
get_examples_resource,
|
|
10
|
+
)
|
|
7
11
|
from .types import JSONValue
|
|
8
12
|
|
|
9
13
|
__all__ = [
|
|
10
14
|
"QTypeError",
|
|
11
15
|
"ValidationError",
|
|
12
|
-
"get_logger",
|
|
13
16
|
"JSONValue",
|
|
17
|
+
"ResourceDirectory",
|
|
18
|
+
"get_docs_resource",
|
|
19
|
+
"get_examples_resource",
|
|
14
20
|
]
|
qtype/base/logging.py
CHANGED
|
@@ -5,23 +5,6 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def get_logger(name: str) -> logging.Logger:
|
|
9
|
-
"""Get a logger with the given name and consistent formatting."""
|
|
10
|
-
logger = logging.getLogger(f"qtype.{name}")
|
|
11
|
-
|
|
12
|
-
# Only configure if not already configured
|
|
13
|
-
if not logger.handlers:
|
|
14
|
-
handler = logging.StreamHandler()
|
|
15
|
-
formatter = logging.Formatter(
|
|
16
|
-
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
17
|
-
)
|
|
18
|
-
handler.setFormatter(formatter)
|
|
19
|
-
logger.addHandler(handler)
|
|
20
|
-
logger.setLevel(logging.INFO)
|
|
21
|
-
|
|
22
|
-
return logger
|
|
23
|
-
|
|
24
|
-
|
|
25
8
|
def configure_logging(
|
|
26
9
|
level: str = "INFO", format_string: str | None = None
|
|
27
10
|
) -> None:
|
qtype/base/resources.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Resource directory access utilities for QType package resources."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from functools import lru_cache
|
|
7
|
+
from importlib.resources import files
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Regex for pymdownx snippets: --8<-- "path/to/file"
|
|
11
|
+
SNIPPET_REGEX = re.compile(r'--8<--\s+"([^"]+)"')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ResourceDirectory:
|
|
15
|
+
"""Abstraction for accessing resource directories (docs, examples, etc.)."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self, name: str, file_extension: str, resolve_snippets: bool = False
|
|
19
|
+
):
|
|
20
|
+
"""Initialize a resource directory.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
name: Directory name (e.g., "docs", "examples")
|
|
24
|
+
file_extension: File extension to search for (e.g., ".md", ".yaml")
|
|
25
|
+
resolve_snippets: Whether to resolve MkDocs snippets in file content
|
|
26
|
+
"""
|
|
27
|
+
self.name = name
|
|
28
|
+
self.file_extension = file_extension
|
|
29
|
+
self.resolve_snippets = resolve_snippets
|
|
30
|
+
self._path_cache: Path | None = None
|
|
31
|
+
|
|
32
|
+
def get_path(self) -> Path:
|
|
33
|
+
"""Get the path to this resource directory.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Path to the resource directory, trying installed package first,
|
|
37
|
+
then falling back to development path.
|
|
38
|
+
"""
|
|
39
|
+
if self._path_cache is not None:
|
|
40
|
+
return self._path_cache
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
# Try to get from installed package
|
|
44
|
+
resource_root = files("qtype") / self.name
|
|
45
|
+
# Check if it exists by trying to iterate
|
|
46
|
+
list(resource_root.iterdir())
|
|
47
|
+
self._path_cache = Path(str(resource_root))
|
|
48
|
+
except (FileNotFoundError, AttributeError, TypeError):
|
|
49
|
+
# Fall back to development path
|
|
50
|
+
self._path_cache = Path(__file__).parent.parent.parent / self.name
|
|
51
|
+
|
|
52
|
+
return self._path_cache
|
|
53
|
+
|
|
54
|
+
def get_file(self, file_path: str) -> str:
|
|
55
|
+
"""Get the content of a specific file.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
file_path: Relative path to the file from the resource root.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The full content of the file.
|
|
62
|
+
|
|
63
|
+
Raises:
|
|
64
|
+
FileNotFoundError: If the specified file doesn't exist.
|
|
65
|
+
ValueError: If the path tries to access files outside the directory.
|
|
66
|
+
"""
|
|
67
|
+
resource_path = self.get_path()
|
|
68
|
+
|
|
69
|
+
# Resolve the requested file path
|
|
70
|
+
requested_file = (resource_path / file_path).resolve()
|
|
71
|
+
|
|
72
|
+
# Security check: ensure the resolved path is within resource directory
|
|
73
|
+
try:
|
|
74
|
+
requested_file.relative_to(resource_path.resolve())
|
|
75
|
+
except ValueError as e:
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Invalid path: '{file_path}' is outside {self.name} directory"
|
|
78
|
+
) from e
|
|
79
|
+
|
|
80
|
+
if not requested_file.exists():
|
|
81
|
+
raise FileNotFoundError(
|
|
82
|
+
(
|
|
83
|
+
f"{self.name.capitalize()} file not found: '{file_path}'. "
|
|
84
|
+
f"Use list_{self.name} to see available files."
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if not requested_file.is_file():
|
|
89
|
+
raise ValueError(f"Path is not a file: '{file_path}'")
|
|
90
|
+
|
|
91
|
+
content = requested_file.read_text(encoding="utf-8")
|
|
92
|
+
|
|
93
|
+
# Apply snippet resolution if enabled
|
|
94
|
+
if self.resolve_snippets:
|
|
95
|
+
content = _resolve_snippets(content, requested_file, self)
|
|
96
|
+
|
|
97
|
+
return content
|
|
98
|
+
|
|
99
|
+
def list_files(self) -> list[str]:
|
|
100
|
+
"""List all files in this resource directory.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Sorted list of relative paths to all files with the configured extension.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
FileNotFoundError: If the resource directory doesn't exist.
|
|
107
|
+
"""
|
|
108
|
+
resource_path = self.get_path()
|
|
109
|
+
|
|
110
|
+
if not resource_path.exists():
|
|
111
|
+
raise FileNotFoundError(
|
|
112
|
+
(
|
|
113
|
+
f"{self.name.capitalize()} directory not found: "
|
|
114
|
+
f"{resource_path}"
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Find all files with the configured extension
|
|
119
|
+
pattern = f"*{self.file_extension}"
|
|
120
|
+
files_list = []
|
|
121
|
+
for file in resource_path.rglob(pattern):
|
|
122
|
+
# Get relative path from resource root
|
|
123
|
+
rel_path = file.relative_to(resource_path)
|
|
124
|
+
files_list.append(str(rel_path))
|
|
125
|
+
|
|
126
|
+
return sorted(files_list)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _resolve_snippets(
|
|
130
|
+
content: str, base_path: Path, docs_resource: ResourceDirectory
|
|
131
|
+
) -> str:
|
|
132
|
+
"""Recursively resolve MkDocs snippets in markdown content.
|
|
133
|
+
|
|
134
|
+
Mimics the behavior of pymdownx.snippets.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
content: The markdown content to process
|
|
138
|
+
base_path: Path to the file being processed (for resolving relative paths)
|
|
139
|
+
docs_resource: The docs ResourceDirectory for resolving snippet paths
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Content with all snippets resolved
|
|
143
|
+
"""
|
|
144
|
+
docs_root = docs_resource.get_path()
|
|
145
|
+
project_root = docs_root.parent
|
|
146
|
+
|
|
147
|
+
def replace_match(match: re.Match) -> str:
|
|
148
|
+
snippet_path = match.group(1)
|
|
149
|
+
|
|
150
|
+
# pymdownx logic: try relative to current file, then docs, then project
|
|
151
|
+
candidates = [
|
|
152
|
+
base_path.parent / snippet_path, # Relative to the doc file
|
|
153
|
+
docs_root / snippet_path, # Relative to docs root
|
|
154
|
+
project_root / snippet_path, # Relative to project root
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
for candidate in candidates:
|
|
158
|
+
if candidate.exists() and candidate.is_file():
|
|
159
|
+
# Recursively resolve snippets inside the included file
|
|
160
|
+
return _resolve_snippets(
|
|
161
|
+
candidate.read_text(encoding="utf-8"),
|
|
162
|
+
candidate,
|
|
163
|
+
docs_resource,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return f"> [!WARNING] Could not resolve snippet: {snippet_path}"
|
|
167
|
+
|
|
168
|
+
return SNIPPET_REGEX.sub(replace_match, content)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# Initialize singleton resource directories
|
|
172
|
+
_docs_resource = ResourceDirectory("docs", ".md", resolve_snippets=True)
|
|
173
|
+
_examples_resource = ResourceDirectory("examples", ".yaml")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@lru_cache(maxsize=1)
|
|
177
|
+
def get_docs_resource() -> ResourceDirectory:
|
|
178
|
+
"""Get the singleton docs resource directory.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
ResourceDirectory instance for documentation files.
|
|
182
|
+
"""
|
|
183
|
+
return _docs_resource
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@lru_cache(maxsize=1)
|
|
187
|
+
def get_examples_resource() -> ResourceDirectory:
|
|
188
|
+
"""Get the singleton examples resource directory.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
ResourceDirectory instance for example files.
|
|
192
|
+
"""
|
|
193
|
+
return _examples_resource
|
qtype/cli.py
CHANGED
|
@@ -7,9 +7,9 @@ import importlib
|
|
|
7
7
|
import logging
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
from qtype.base.logging import
|
|
10
|
+
from qtype.base.logging import configure_logging
|
|
11
11
|
|
|
12
|
-
logger =
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
from importlib.metadata import entry_points
|
|
@@ -59,9 +59,8 @@ def _discover_local_commands(subparsers: argparse._SubParsersAction) -> None:
|
|
|
59
59
|
f"Built-in command module {module_name} does not have a 'parser' function"
|
|
60
60
|
)
|
|
61
61
|
except Exception as e:
|
|
62
|
-
logging.
|
|
63
|
-
f"Failed to load built-in command module {module_name}: {e}"
|
|
64
|
-
exc_info=True,
|
|
62
|
+
logging.debug(
|
|
63
|
+
f"Failed to load built-in command module {module_name}: {e} -- you may need the mcp or interpreter extras."
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
|
|
@@ -133,10 +132,7 @@ def main() -> None:
|
|
|
133
132
|
args = parser.parse_args()
|
|
134
133
|
|
|
135
134
|
# Set logging level based on user input
|
|
136
|
-
|
|
137
|
-
level=getattr(logging, args.log_level),
|
|
138
|
-
format="%(asctime)s - %(levelname)s: %(message)s",
|
|
139
|
-
)
|
|
135
|
+
configure_logging(level=args.log_level)
|
|
140
136
|
|
|
141
137
|
# Dispatch to the selected subcommand
|
|
142
138
|
args.func(args)
|
qtype/commands/generate.py
CHANGED
|
@@ -117,7 +117,10 @@ def run_generate_skill(args: argparse.Namespace) -> None:
|
|
|
117
117
|
Args:
|
|
118
118
|
args: Command-line arguments with 'output' path.
|
|
119
119
|
"""
|
|
120
|
-
from qtype.
|
|
120
|
+
from qtype.base.resources import get_docs_resource, get_examples_resource
|
|
121
|
+
|
|
122
|
+
_docs_resource = get_docs_resource()
|
|
123
|
+
_examples_resource = get_examples_resource()
|
|
121
124
|
|
|
122
125
|
output_path = Path(args.output) / "qtype-architect"
|
|
123
126
|
|
|
@@ -156,6 +159,7 @@ def generate_schema(args: argparse.Namespace) -> None:
|
|
|
156
159
|
args (argparse.Namespace): Command-line arguments with an optional
|
|
157
160
|
'output' attribute specifying the output file path.
|
|
158
161
|
"""
|
|
162
|
+
logger.info("Generating QType DSL JSON schema...")
|
|
159
163
|
schema = Document.model_json_schema()
|
|
160
164
|
|
|
161
165
|
# Add the $schema property to indicate JSON Schema version
|
|
@@ -235,6 +239,7 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
235
239
|
"-o",
|
|
236
240
|
"--output",
|
|
237
241
|
type=str,
|
|
242
|
+
default=None,
|
|
238
243
|
help="Output file for the schema (default: stdout)",
|
|
239
244
|
)
|
|
240
245
|
schema_parser.set_defaults(func=generate_schema)
|
qtype/commands/run.py
CHANGED
|
@@ -185,7 +185,7 @@ def run_flow(args: Any) -> None:
|
|
|
185
185
|
|
|
186
186
|
# Display results
|
|
187
187
|
if len(result_df) > 0:
|
|
188
|
-
logger.info(f"Processed {len(result_df)}
|
|
188
|
+
logger.info(f"Processed {len(result_df)} rows")
|
|
189
189
|
|
|
190
190
|
# Remove 'row' and 'error' columns for display if all errors are None
|
|
191
191
|
display_df = result_df.copy()
|
|
@@ -197,15 +197,37 @@ def run_flow(args: Any) -> None:
|
|
|
197
197
|
if "row" in display_df.columns:
|
|
198
198
|
display_df = display_df.drop(columns=["row"])
|
|
199
199
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
200
|
+
# Show summary for console display
|
|
201
|
+
logger.info(
|
|
202
|
+
f"\nResults summary: {len(display_df)} rows, "
|
|
203
|
+
f"{len(display_df.columns)} columns: {list(display_df.columns)}"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Optionally show full output
|
|
207
|
+
if args.show_output:
|
|
208
|
+
# Truncate long strings for display
|
|
209
|
+
max_col_width = 100
|
|
210
|
+
for col in display_df.columns:
|
|
211
|
+
display_df[col] = display_df[col].apply(
|
|
212
|
+
lambda x: (
|
|
213
|
+
f"{str(x)[:max_col_width]}..."
|
|
214
|
+
if isinstance(x, str)
|
|
215
|
+
and len(str(x)) > max_col_width
|
|
216
|
+
else x
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if len(display_df) > 1:
|
|
221
|
+
logger.info(
|
|
222
|
+
f"\nResults:\n{display_df[0:10].to_string()}\n..."
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
# Print the first row with column_name: value one per line
|
|
226
|
+
fmt_str = []
|
|
227
|
+
for col, val in display_df.iloc[0].items():
|
|
228
|
+
fmt_str.append(f"{col}: {val}")
|
|
229
|
+
fmt_str = "\n".join(fmt_str)
|
|
230
|
+
logger.info(f"\nResults:\n{fmt_str}")
|
|
209
231
|
|
|
210
232
|
# Save the output
|
|
211
233
|
if args.output:
|
|
@@ -267,6 +289,11 @@ def parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
267
289
|
action="store_true",
|
|
268
290
|
help="Show progress bars during flow execution.",
|
|
269
291
|
)
|
|
292
|
+
cmd_parser.add_argument(
|
|
293
|
+
"--show-output",
|
|
294
|
+
action="store_true",
|
|
295
|
+
help="Display full output data in console (default: summary only).",
|
|
296
|
+
)
|
|
270
297
|
|
|
271
298
|
cmd_parser.add_argument(
|
|
272
299
|
"spec", type=str, help="Path to the QType YAML spec file."
|
|
@@ -64,11 +64,24 @@ Example output:
|
|
|
64
64
|
|
|
65
65
|
You'll notice that the output shows 1 message for `write_results` and 10 for the others. That is because it is reporting the number of messages _emitted_ from each step, and `write_results` is a sink that collects all messages.
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
By default, QType shows a summary of the results. The final message will show:
|
|
68
68
|
|
|
69
69
|
```
|
|
70
70
|
2026-01-16 11:23:35,151 - INFO: ✅ Flow execution completed successfully
|
|
71
|
-
2026-01-16 11:23:35,151 - INFO: Processed 1
|
|
71
|
+
2026-01-16 11:23:35,151 - INFO: Processed 1 rows
|
|
72
|
+
2026-01-16 11:23:35,152 - INFO:
|
|
73
|
+
Results summary: 1 rows, 1 columns: ['result_file']
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
To see the full output data, add the `--show-output` flag:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
qtype run -i '{"output_path":"results.parquet"}' --progress --show-output examples/data_processing/dataflow_pipelines.qtype.yaml
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
This will display:
|
|
83
|
+
|
|
84
|
+
```
|
|
72
85
|
2026-01-16 11:23:35,152 - INFO:
|
|
73
86
|
Results:
|
|
74
87
|
result_file: results.parquet
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Retrieval Augmented Generation Chatbot
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
A complete RAG (Retrieval Augmented Generation) chatbot that answers cooking questions using a recipe collection from GitHub. The system ingests markdown recipe files, splits them into chunks, generates embeddings, stores them in a vector database, and provides conversational search with context-aware responses using memory to maintain conversation history.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```mermaid
|
|
10
|
+
--8<-- "Gallery/recipe_chatbot.mermaid"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Complete Code
|
|
14
|
+
|
|
15
|
+
```yaml
|
|
16
|
+
--8<-- "../examples/rag/recipe_chatbot.qtype.yaml"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Running the Example
|
|
20
|
+
|
|
21
|
+
### Prerequisites
|
|
22
|
+
|
|
23
|
+
Start Qdrant vector database locally:
|
|
24
|
+
```bash
|
|
25
|
+
docker run -p 6333:6333 qdrant/qdrant
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Clone the recipe repository:
|
|
29
|
+
```bash
|
|
30
|
+
git clone https://github.com/clarklab/chowdown.git
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Ingest Recipe Documents
|
|
34
|
+
|
|
35
|
+
Run the ingestion flow to populate the vector index:
|
|
36
|
+
```bash
|
|
37
|
+
AWS_PROFILE=my_profile qtype run examples/rag/recipe_chatbot.qtype.yaml --flow recipe_ingestion --progress
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This will:
|
|
41
|
+
1. Load all markdown files from `chowdown/_recipes/`
|
|
42
|
+
2. Split them into 512-token chunks with 50-token overlap
|
|
43
|
+
3. Generate embeddings using AWS Bedrock Titan
|
|
44
|
+
4. Store vectors in Qdrant collection `chowdown_recipes`
|
|
45
|
+
|
|
46
|
+
You should see the output similar to:
|
|
47
|
+
```
|
|
48
|
+
2026-02-04 06:38:06,222 - qtype.commands.run - INFO - Running flow from recipe_chatbot.qtype.yaml
|
|
49
|
+
2026-02-04 06:38:06,315 - qtype.commands.run - INFO - Executing flow recipe_ingestion from recipe_chatbot.qtype.yaml
|
|
50
|
+
/Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:238: UserWarning: Api key is used with an insecure connection.
|
|
51
|
+
self._client = qdrant_client.QdrantClient(
|
|
52
|
+
/Users/lou.kratz/repos/qtype-cicd-fix/.venv/lib/python3.13/site-packages/llama_index/vector_stores/qdrant/base.py:241: UserWarning: Api key is used with an insecure connection.
|
|
53
|
+
self._aclient = qdrant_client.AsyncQdrantClient(
|
|
54
|
+
╭─────────────────────────────────────────────── Flow Progress ────────────────────────────────────────────────╮
|
|
55
|
+
│ │
|
|
56
|
+
│ Step load_recipes 12.9 msg/s ▁▁▂▄▄▅▅▅▅▄▆▆▆▇▇█▇▇… ✔ 34 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
|
|
57
|
+
│ Step split_recipes 14.9 msg/s ▁▁▁▃▂▅▅▅▆▅▆▆▇▇▇█▇▇… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
|
|
58
|
+
│ Step embed_chunks 18.7 msg/s ██▃▃▁▂▂▁▂▁▁▁▁▁▁▁▁▁… ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:02 │
|
|
59
|
+
│ Step index_recipes 47.0 msg/s ████████▁ ✔ 39 succeeded ✖ 0 errors ⟳ - hits ✗ - misses 0:00:00 │
|
|
60
|
+
│ │
|
|
61
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
62
|
+
2026-02-04 06:38:11,141 - qtype.commands.run - INFO - ✅ Flow execution completed successfully
|
|
63
|
+
2026-02-04 06:38:11,141 - qtype.commands.run - INFO - Processed 39 rows
|
|
64
|
+
2026-02-04 06:38:11,141 - qtype.commands.run - INFO -
|
|
65
|
+
Results summary: 39 rows, 1 columns: ['embedded_chunk']
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Start the Chatbot
|
|
69
|
+
|
|
70
|
+
Launch the conversational UI:
|
|
71
|
+
```bash
|
|
72
|
+
AWS_PROFILE=my_profile qtype serve examples/rag/recipe_chatbot.qtype.yaml --flow recipe_chat
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Then open http://localhost:8000 and ask questions like:
|
|
76
|
+
- "What dessert recipes do you have?"
|
|
77
|
+
- "What can I make with chicken?"
|
|
78
|
+
|
|
79
|
+

|
|
80
|
+
|
|
81
|
+
## Key Features
|
|
82
|
+
|
|
83
|
+
- **Conversational Interface**: Flow interface type that accumulates messages in `conversation_history` for stateful multi-turn chat
|
|
84
|
+
- **Memory**: Conversation buffer with `token_limit` (10,000) and `chat_history_token_ratio` (0.7) that auto-flushes oldest messages when limit exceeded
|
|
85
|
+
- **DocumentSource**: Loads markdown files via LlamaIndex `SimpleDirectoryReader` with `required_exts` file filter
|
|
86
|
+
- **DocumentSplitter**: Splits documents with `SentenceSplitter` using `chunk_size` (512) and `chunk_overlap` (50) parameters
|
|
87
|
+
- **DocumentEmbedder**: Generates embeddings with AWS Bedrock Titan, processes chunks concurrently via `num_workers` (5)
|
|
88
|
+
- **VectorIndex**: Qdrant vector store with `embedding_model` reference and dimensions (1024)
|
|
89
|
+
- **IndexUpsert**: Writes to vector index in batches via `batch_size` (25)
|
|
90
|
+
- **VectorSearch**: Semantic search with `default_top_k` (5) returns chunks by embedding distance
|
|
91
|
+
- **FieldExtractor**: Extracts text from ChatMessage using JSONPath `$.blocks[?(@.type == 'text')].content`
|
|
92
|
+
- **PromptTemplate**: Injects search results and query into template string for LLM context
|
|
93
|
+
- **LLMInference**: Calls model with `system_message` and `memory` reference for conversation history
|
|
94
|
+
- **RAGDocument**: Domain type with `content`, `file_id`, `file_name`, `metadata` fields
|
|
95
|
+
- **RAGChunk**: Domain type with `content`, `chunk_id`, `document_id`, `vector` fields
|
|
96
|
+
- **RAGSearchResult**: Domain type with `content` (RAGChunk), `doc_id`, `score` fields
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
## Learn More
|
|
100
|
+
|
|
101
|
+
- Tutorial: [Building a Stateful Chatbot](../../Tutorials/building-a-stateful-chatbot.md)
|
|
102
|
+
- How-To: [Use Environment Variables](../../How-To%20Guides/Language%20Features/use-environment-variables.md)
|
|
103
|
+
- How-To: [Configure AWS Authentication](../../How-To%20Guides/Authentication/configure-aws-authentication.md)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
flowchart TD
|
|
2
|
+
subgraph APP ["📱 recipe_rag_chatbot"]
|
|
3
|
+
direction TB
|
|
4
|
+
|
|
5
|
+
subgraph FLOW_0 ["🔄 recipe_chat"]
|
|
6
|
+
direction LR
|
|
7
|
+
FLOW_0_START@{shape: circle, label: "▶️ Start"}
|
|
8
|
+
FLOW_0_S0@{shape: rect, label: "⚙️ extract_question"}
|
|
9
|
+
FLOW_0_S1@{shape: cyl, label: "🔎 search_recipes"}
|
|
10
|
+
FLOW_0_S2@{shape: doc, label: "📄 build_context_prompt"}
|
|
11
|
+
FLOW_0_S3@{shape: rounded, label: "✨ generate_response"}
|
|
12
|
+
FLOW_0_START -->|user_message| FLOW_0_S0
|
|
13
|
+
FLOW_0_S0 -->|user_question| FLOW_0_S1
|
|
14
|
+
FLOW_0_S1 -->|search_results| FLOW_0_S2
|
|
15
|
+
FLOW_0_S0 -->|user_question| FLOW_0_S2
|
|
16
|
+
FLOW_0_S2 -->|context_prompt| FLOW_0_S3
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
subgraph FLOW_1 ["🔄 recipe_ingestion"]
|
|
20
|
+
direction TB
|
|
21
|
+
FLOW_1_S0@{shape: rect, label: "⚙️ load_recipes"}
|
|
22
|
+
FLOW_1_S1@{shape: rect, label: "⚙️ split_recipes"}
|
|
23
|
+
FLOW_1_S2@{shape: rect, label: "⚙️ embed_chunks"}
|
|
24
|
+
FLOW_1_S3@{shape: rect, label: "💾 index_recipes"}
|
|
25
|
+
FLOW_1_S0 -->|recipe_document| FLOW_1_S1
|
|
26
|
+
FLOW_1_S1 -->|recipe_chunk| FLOW_1_S2
|
|
27
|
+
FLOW_1_S2 -->|embedded_chunk| FLOW_1_S3
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
subgraph RESOURCES ["🔧 Shared Resources"]
|
|
31
|
+
direction LR
|
|
32
|
+
AUTH_AWS_AUTH@{shape: hex, label: "🔐 aws_auth (AWS)"}
|
|
33
|
+
MODEL_CLAUDE_SONNET@{shape: rounded, label: "✨ claude_sonnet (aws-bedrock)" }
|
|
34
|
+
MODEL_CLAUDE_SONNET -.->|uses| AUTH_AWS_AUTH
|
|
35
|
+
MODEL_TITAN_EMBED@{shape: rounded, label: "✨ titan_embed (aws-bedrock)" }
|
|
36
|
+
MODEL_TITAN_EMBED -.->|uses| AUTH_AWS_AUTH
|
|
37
|
+
INDEX_RECIPE_INDEX@{shape: cyl, label: "�️ recipe_index"}
|
|
38
|
+
EMB_TITAN_EMBED@{shape: rounded, label: "🎯 titan_embed"}
|
|
39
|
+
INDEX_RECIPE_INDEX -.->|embeds| EMB_TITAN_EMBED
|
|
40
|
+
MEM_RECIPE_CHAT_MEMORY@{shape: win-pane, label: "🧠 recipe_chat_memory (10KT)"}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
FLOW_0_S1 -.-> INDEX_RECIPE_INDEX
|
|
46
|
+
FLOW_0_S3 -.->|uses| MODEL_CLAUDE_SONNET
|
|
47
|
+
FLOW_0_S3 -.->|stores| MEM_RECIPE_CHAT_MEMORY
|
|
48
|
+
FLOW_1_S3 -.->|writes| INDEX_RECIPE_INDEX
|
|
49
|
+
|
|
50
|
+
%% Styling
|
|
51
|
+
classDef appBox fill:none,stroke:#495057,stroke-width:3px
|
|
52
|
+
classDef flowBox fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
|
|
53
|
+
classDef llmNode fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
54
|
+
classDef modelNode fill:#e8f5e8,stroke:#2e7d32,stroke-width:2px
|
|
55
|
+
classDef authNode fill:#fff3e0,stroke:#ef6c00,stroke-width:2px
|
|
56
|
+
classDef telemetryNode fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
|
57
|
+
classDef resourceBox fill:#f5f5f5,stroke:#616161,stroke-width:1px
|
|
58
|
+
|
|
59
|
+
class APP appBox
|
|
60
|
+
class FLOW_0 flowBox
|
|
61
|
+
class RESOURCES resourceBox
|
|
62
|
+
class TELEMETRY telemetryNode
|
|
Binary file
|
|
@@ -59,7 +59,7 @@ qtype validate examples/research_assistant/research_assistant.qtype.yaml
|
|
|
59
59
|
|
|
60
60
|
# Run directly
|
|
61
61
|
qtype run -i '{"topic":"Latest developments in retrieval augmented generation"}' \
|
|
62
|
-
examples/research_assistant/research_assistant.qtype.yaml
|
|
62
|
+
--show-output examples/research_assistant/research_assistant.qtype.yaml
|
|
63
63
|
```
|
|
64
64
|
|
|
65
65
|
### Example Output
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Provide input values to your QType flows directly from the command line using JSON-formatted input data, enabling dynamic parameterization of applications without modifying YAML files.
|
|
4
4
|
|
|
5
|
+
**Note:** Inputs are optional. Flows with source steps (like `DocumentSource` or `SQLSource`) can run without any inputs, as these steps generate their own data.
|
|
6
|
+
|
|
5
7
|
### CLI Usage
|
|
6
8
|
|
|
7
9
|
```bash
|
|
@@ -20,10 +22,11 @@ qtype run -f analyze_data -i '{"threshold":0.85}' app.qtype.yaml
|
|
|
20
22
|
|
|
21
23
|
### Explanation
|
|
22
24
|
|
|
23
|
-
- **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field
|
|
25
|
+
- **`-i`, `--input`**: Accepts a JSON blob containing key-value pairs where keys match variable names declared in your flow's `inputs` field (optional - omit for flows with source steps)
|
|
24
26
|
- **JSON format**: Must be valid JSON with double quotes for strings, properly escaped special characters
|
|
25
27
|
- **Flow inputs**: The variables must match those declared in the flow's `inputs` list or the application's `inputs` list
|
|
26
28
|
- **`-f`, `--flow`**: Specifies which flow to run when your application contains multiple flows (defaults to first flow if omitted)
|
|
29
|
+
- **Source steps**: Flows containing source steps like `DocumentSource`, `SQLSource`, or `FileSource` can run without inputs, as these steps generate data independently
|
|
27
30
|
|
|
28
31
|
## Complete Example
|
|
29
32
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Load Documents
|
|
2
|
+
|
|
3
|
+
Load documents from files, directories, or external systems using LlamaIndex readers with DocumentSource.
|
|
4
|
+
|
|
5
|
+
**Note:** DocumentSource is a source step that generates data independently, so flows using it typically require no inputs.
|
|
6
|
+
|
|
7
|
+
### QType YAML
|
|
8
|
+
|
|
9
|
+
```yaml
|
|
10
|
+
steps:
|
|
11
|
+
- type: DocumentSource
|
|
12
|
+
id: load_docs
|
|
13
|
+
reader_module: llama_index.core.SimpleDirectoryReader
|
|
14
|
+
args:
|
|
15
|
+
input_dir: ./data
|
|
16
|
+
required_exts: [".md", ".txt"]
|
|
17
|
+
recursive: true
|
|
18
|
+
loader_args:
|
|
19
|
+
num_workers: 4
|
|
20
|
+
outputs:
|
|
21
|
+
- document
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Explanation
|
|
25
|
+
|
|
26
|
+
- **reader_module**: Python module path to a class that inherits from `llama_index.core.readers.base.BaseReader` (most common: `llama_index.core.SimpleDirectoryReader`)
|
|
27
|
+
- **args**: Arguments passed to the reader class constructor (e.g., `input_dir`, `required_exts`, `recursive`, `file_extractor`)
|
|
28
|
+
- **loader_args**: Arguments passed to the reader's `load_data()` method (e.g., `num_workers` for parallel processing)
|
|
29
|
+
- **outputs**: Variable to store loaded documents (type: `RAGDocument`) - DocumentSource fans out, emitting one message per document
|
|
30
|
+
- **Critical distinction**: Constructor args configure the reader instance; `load_data` args control how documents are loaded
|
|
31
|
+
|
|
32
|
+
### Common Reader Modules
|
|
33
|
+
|
|
34
|
+
**SimpleDirectoryReader** (`llama_index.core.SimpleDirectoryReader`):
|
|
35
|
+
- Constructor args: `input_dir`, `input_files`, `required_exts`, `exclude`, `recursive`, `file_extractor`, `file_metadata`, `encoding`
|
|
36
|
+
- Loader args: `num_workers` (parallel processing)
|
|
37
|
+
- Supports 15+ file types including PDF, DOCX, CSV, Markdown, images, audio/video
|
|
38
|
+
- [Full documentation](https://developers.llamaindex.ai/python/framework/module_guides/loading/simpledirectoryreader/)
|
|
39
|
+
|
|
40
|
+
**JSONReader** (`llama_index.readers.json.JSONReader`):
|
|
41
|
+
- Constructor args: `levels_back`, `collapse_length`, `ensure_ascii`, `is_jsonl`, `clean_json`
|
|
42
|
+
- Loader args: `input_file`, `extra_info`
|
|
43
|
+
- Supports both JSON and JSONL (JSON Lines) formats
|
|
44
|
+
- [Full documentation](https://developers.llamaindex.ai/typescript/framework/modules/data/readers/json/)
|
|
45
|
+
|
|
46
|
+
### Dynamic Arguments
|
|
47
|
+
|
|
48
|
+
You can pass flow variables as constructor arguments by including them in `args`. At runtime, QType merges message variables with the configured args:
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
variables:
|
|
52
|
+
- id: data_path
|
|
53
|
+
type: text
|
|
54
|
+
|
|
55
|
+
steps:
|
|
56
|
+
- type: DocumentSource
|
|
57
|
+
id: load_docs
|
|
58
|
+
reader_module: llama_index.core.SimpleDirectoryReader
|
|
59
|
+
args:
|
|
60
|
+
input_dir: data_path # References variable from message
|
|
61
|
+
inputs: [data_path]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Complete Example
|
|
65
|
+
|
|
66
|
+
```yaml
|
|
67
|
+
--8<-- "../examples/data_processing/load_documents.qtype.yaml"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## See Also
|
|
71
|
+
|
|
72
|
+
- [DocumentSource Reference](../../components/DocumentSource.md)
|
|
73
|
+
- [DocumentSplitter How-To](chunk_documents.md)
|
|
74
|
+
- [RAG Tutorial](../../Tutorials/rag_tutorial.md)
|