qtype 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. qtype/` +0 -0
  2. qtype/application/__init__.py +0 -2
  3. qtype/application/converters/tools_from_api.py +28 -22
  4. qtype/application/converters/tools_from_module.py +66 -32
  5. qtype/base/__init__.py +8 -2
  6. qtype/base/logging.py +0 -17
  7. qtype/base/resources.py +193 -0
  8. qtype/cli.py +5 -9
  9. qtype/commands/generate.py +95 -7
  10. qtype/commands/run.py +153 -54
  11. qtype/docs/.pages +8 -0
  12. {docs → qtype/docs}/Concepts/mental-model-and-philosophy.md +1 -1
  13. qtype/docs/Contributing/.pages +4 -0
  14. {docs → qtype/docs}/Contributing/index.md +8 -1
  15. {docs → qtype/docs}/Gallery/dataflow_pipelines.md +18 -4
  16. qtype/docs/Gallery/recipe_chatbot.md +103 -0
  17. qtype/docs/Gallery/recipe_chatbot.mermaid +62 -0
  18. qtype/docs/Gallery/recipe_chatbot.png +0 -0
  19. {docs → qtype/docs}/Gallery/research_assistant.md +4 -5
  20. {docs → qtype/docs}/Gallery/simple_chatbot.md +3 -1
  21. {docs → qtype/docs}/How To/Authentication/configure_aws_authentication.md +2 -2
  22. {docs → qtype/docs}/How To/Authentication/use_api_key_authentication.md +2 -2
  23. {docs → qtype/docs}/How To/Command Line Usage/load_multiple_inputs_from_files.md +24 -9
  24. {docs → qtype/docs}/How To/Command Line Usage/pass_inputs_on_the_cli.md +7 -4
  25. {docs → qtype/docs}/How To/Command Line Usage/serve_with_auto_reload.md +3 -2
  26. {docs → qtype/docs}/How To/Data Processing/adjust_concurrency.md +3 -4
  27. {docs → qtype/docs}/How To/Data Processing/cache_step_results.md +2 -2
  28. {docs → qtype/docs}/How To/Data Processing/decode_json_xml.md +1 -1
  29. {docs → qtype/docs}/How To/Data Processing/explode_collections.md +2 -2
  30. {docs → qtype/docs}/How To/Data Processing/gather_results.md +4 -4
  31. qtype/docs/How To/Data Processing/invoke_other_flows.md +71 -0
  32. qtype/docs/How To/Data Processing/load_data_from_athena.md +49 -0
  33. qtype/docs/How To/Data Processing/load_documents.md +74 -0
  34. qtype/docs/How To/Data Processing/read_data_from_files.md +61 -0
  35. {docs → qtype/docs}/How To/Data Processing/read_sql_databases.md +4 -3
  36. {docs → qtype/docs}/How To/Data Processing/write_data_to_file.md +1 -2
  37. {docs → qtype/docs}/How To/Invoke Models/call_large_language_models.md +1 -1
  38. {docs → qtype/docs}/How To/Invoke Models/create_embeddings.md +1 -1
  39. {docs → qtype/docs}/How To/Invoke Models/reuse_prompts_with_templates.md +2 -3
  40. {docs → qtype/docs}/How To/Language Features/include_raw_text_from_other_files.md +2 -1
  41. {docs → qtype/docs}/How To/Language Features/reference_entities_by_id.md +2 -2
  42. qtype/docs/How To/Language Features/use_agent_skills.md +29 -0
  43. {docs → qtype/docs}/How To/Language Features/use_environment_variables.md +2 -1
  44. qtype/docs/How To/Language Features/use_optional_variables.md +42 -0
  45. {docs → qtype/docs}/How To/Language Features/use_qtype_mcp.md +4 -4
  46. {docs → qtype/docs}/How To/Observability & Debugging/trace_calls_with_open_telemetry.md +1 -1
  47. {docs → qtype/docs}/How To/Observability & Debugging/validate_qtype_yaml.md +3 -2
  48. {docs → qtype/docs}/How To/Observability & Debugging/visualize_application_architecture.md +1 -1
  49. {docs → qtype/docs}/How To/Qtype Server/serve_flows_as_apis.md +3 -3
  50. {docs → qtype/docs}/How To/Qtype Server/serve_flows_as_ui.md +2 -3
  51. {docs → qtype/docs}/How To/Qtype Server/use_conversational_interfaces.md +1 -4
  52. {docs → qtype/docs}/How To/Qtype Server/use_variables_with_ui_hints.md +3 -2
  53. {docs → qtype/docs}/How To/Tools & Integration/bind_tool_inputs_and_outputs.md +1 -2
  54. {docs → qtype/docs}/How To/Tools & Integration/create_tools_from_openapi_specifications.md +10 -14
  55. {docs → qtype/docs}/How To/Tools & Integration/create_tools_from_python_modules.md +5 -8
  56. {docs → qtype/docs}/Reference/cli.md +16 -17
  57. qtype/docs/Tutorials/.pages +1 -0
  58. {docs → qtype/docs}/Tutorials/01-first-qtype-application.md +4 -3
  59. {docs → qtype/docs}/Tutorials/02-conversational-chatbot.md +3 -3
  60. {docs → qtype/docs}/Tutorials/03-structured-data.md +10 -11
  61. {docs → qtype/docs}/Tutorials/04-tools-and-function-calling.md +13 -20
  62. {docs → qtype/docs}/components/APITool.md +1 -1
  63. qtype/docs/components/Aggregate.md +7 -0
  64. qtype/docs/components/Collect.md +6 -0
  65. qtype/docs/components/Construct.md +6 -0
  66. {docs → qtype/docs}/components/DocumentEmbedder.md +0 -1
  67. {docs → qtype/docs}/components/DocumentSplitter.md +0 -1
  68. qtype/docs/components/Explode.md +5 -0
  69. {docs → qtype/docs}/components/FieldExtractor.md +2 -1
  70. qtype/docs/components/InvokeFlow.md +8 -0
  71. qtype/docs/components/InvokeTool.md +8 -0
  72. {docs → qtype/docs}/components/PrimitiveTypeEnum.md +0 -1
  73. {docs → qtype/docs}/components/Source.md +0 -1
  74. {docs → qtype/docs}/components/Step.md +0 -1
  75. {docs → qtype/docs}/components/Tool.md +2 -2
  76. {docs → qtype/docs}/components/Variable.md +2 -0
  77. qtype/docs/legacy_how_tos/.pages +6 -0
  78. qtype/docs/skills/architect/SKILL.md +188 -0
  79. qtype/docs/skills/architect/references/cheatsheet.md +198 -0
  80. qtype/docs/skills/architect/references/patterns.md +29 -0
  81. qtype/docs/stylesheets/extra.css +27 -0
  82. qtype/dsl/linker.py +8 -0
  83. qtype/dsl/model.py +177 -84
  84. qtype/examples/conversational_ai/simple_chatbot_with_auth.qtype.yaml +48 -0
  85. qtype/examples/data_processing/athena_query.qtype.yaml +56 -0
  86. qtype/examples/data_processing/batch_inputs.csv +5 -0
  87. qtype/examples/data_processing/create_sample_db.py +129 -0
  88. qtype/examples/data_processing/invoke_other_flows.qtype.yaml +98 -0
  89. qtype/examples/data_processing/load_documents.qtype.yaml +31 -0
  90. qtype/examples/data_processing/reviews.db +0 -0
  91. qtype/examples/data_processing/sample_article.txt +1 -0
  92. qtype/examples/data_processing/sample_documents.jsonl +5 -0
  93. qtype/examples/invoke_models/invoke_embedding_aws.qtype.yaml +45 -0
  94. qtype/examples/language_features/optional_variables.qtype.yaml +32 -0
  95. qtype/examples/language_features/story_prompt.txt +6 -0
  96. qtype/examples/legacy/data/customers.csv +6 -0
  97. qtype/examples/legacy/echo/readme.md +29 -0
  98. qtype/examples/legacy/qtype_plugin_example.py +51 -0
  99. qtype/examples/legacy/sample_data.txt +43 -0
  100. qtype/examples/legacy/vertex/README.md +11 -0
  101. qtype/examples/rag/recipe_chatbot.qtype.yaml +216 -0
  102. qtype/examples/research_assistant/tavily.qtype.yaml +216 -0
  103. {examples → qtype/examples}/tutorials/03_structured_data.qtype.yaml +2 -2
  104. {examples → qtype/examples}/tutorials/04_tools_and_function_calling.qtype.yaml +5 -5
  105. qtype/interpreter/auth/aws.py +94 -17
  106. qtype/interpreter/auth/generic.py +11 -12
  107. qtype/interpreter/base/secrets.py +4 -2
  108. qtype/interpreter/base/stream_emitter.py +19 -13
  109. qtype/interpreter/conversions.py +15 -14
  110. qtype/interpreter/converters.py +142 -26
  111. qtype/interpreter/executors/agent_executor.py +2 -3
  112. qtype/interpreter/executors/aggregate_executor.py +3 -4
  113. qtype/interpreter/executors/bedrock_reranker_executor.py +17 -28
  114. qtype/interpreter/executors/construct_executor.py +15 -15
  115. qtype/interpreter/executors/doc_to_text_executor.py +1 -3
  116. qtype/interpreter/executors/document_embedder_executor.py +1 -12
  117. qtype/interpreter/executors/field_extractor_executor.py +13 -12
  118. qtype/interpreter/executors/file_source_executor.py +18 -31
  119. qtype/interpreter/executors/invoke_embedding_executor.py +24 -37
  120. qtype/interpreter/executors/invoke_flow_executor.py +2 -2
  121. qtype/interpreter/executors/invoke_tool_executor.py +19 -18
  122. qtype/interpreter/executors/llm_inference_executor.py +18 -18
  123. qtype/interpreter/executors/prompt_template_executor.py +1 -3
  124. qtype/interpreter/executors/sql_source_executor.py +6 -2
  125. qtype/interpreter/flow.py +11 -1
  126. qtype/interpreter/tools/function_tool_helper.py +11 -10
  127. qtype/interpreter/types.py +89 -4
  128. qtype/interpreter/typing.py +31 -32
  129. qtype/mcp/server.py +194 -86
  130. {schema → qtype/schema}/qtype.schema.json +77 -79
  131. qtype/semantic/checker.py +19 -0
  132. qtype/semantic/generate.py +3 -6
  133. qtype/semantic/model.py +26 -33
  134. qtype/semantic/resolver.py +7 -0
  135. qtype/semantic/visualize.py +18 -6
  136. {qtype-0.1.12.dist-info → qtype-0.1.14.dist-info}/METADATA +47 -46
  137. qtype-0.1.14.dist-info/RECORD +361 -0
  138. {qtype-0.1.12.dist-info → qtype-0.1.14.dist-info}/WHEEL +1 -2
  139. docs/How To/Data Processing/read_data_from_files.md +0 -35
  140. docs/components/Aggregate.md +0 -8
  141. docs/components/InvokeFlow.md +0 -8
  142. docs/components/InvokeTool.md +0 -8
  143. docs/components/ToolParameter.md +0 -6
  144. examples/research_assistant/tavily.qtype.yaml +0 -289
  145. qtype/application/facade.py +0 -177
  146. qtype-0.1.12.dist-info/RECORD +0 -325
  147. qtype-0.1.12.dist-info/top_level.txt +0 -1
  148. {docs → qtype/docs}/Contributing/roadmap.md +0 -0
  149. {docs → qtype/docs}/Decisions/ADR-001-Chat-vs-Completion-Endpoint-Features.md +0 -0
  150. {docs → qtype/docs}/Gallery/dataflow_pipelines.mermaid +0 -0
  151. {docs → qtype/docs}/Gallery/research_assistant.mermaid +0 -0
  152. {docs → qtype/docs}/Gallery/simple_chatbot.mermaid +0 -0
  153. {docs → qtype/docs}/How To/Language Features/include_qtype_yaml.md +0 -0
  154. {docs → qtype/docs}/How To/Observability & Debugging/visualize_example.mermaid +0 -0
  155. {docs → qtype/docs}/How To/Qtype Server/flow_as_ui.png +0 -0
  156. {docs → qtype/docs}/Reference/plugins.md +0 -0
  157. {docs → qtype/docs}/Reference/semantic-validation-rules.md +0 -0
  158. {docs → qtype/docs}/Tutorials/example_chat.png +0 -0
  159. {docs → qtype/docs}/Tutorials/index.md +0 -0
  160. {docs → qtype/docs}/components/APIKeyAuthProvider.md +0 -0
  161. {docs → qtype/docs}/components/AWSAuthProvider.md +0 -0
  162. {docs → qtype/docs}/components/AWSSecretManager.md +0 -0
  163. {docs → qtype/docs}/components/Agent.md +0 -0
  164. {docs → qtype/docs}/components/AggregateStats.md +0 -0
  165. {docs → qtype/docs}/components/Application.md +0 -0
  166. {docs → qtype/docs}/components/AuthorizationProvider.md +0 -0
  167. {docs → qtype/docs}/components/AuthorizationProviderList.md +0 -0
  168. {docs → qtype/docs}/components/BearerTokenAuthProvider.md +0 -0
  169. {docs → qtype/docs}/components/BedrockReranker.md +0 -0
  170. {docs → qtype/docs}/components/ChatContent.md +0 -0
  171. {docs → qtype/docs}/components/ChatMessage.md +0 -0
  172. {docs → qtype/docs}/components/ConstantPath.md +0 -0
  173. {docs → qtype/docs}/components/CustomType.md +0 -0
  174. {docs → qtype/docs}/components/Decoder.md +0 -0
  175. {docs → qtype/docs}/components/DecoderFormat.md +0 -0
  176. {docs → qtype/docs}/components/DocToTextConverter.md +0 -0
  177. {docs → qtype/docs}/components/Document.md +0 -0
  178. {docs → qtype/docs}/components/DocumentIndex.md +0 -0
  179. {docs → qtype/docs}/components/DocumentSearch.md +0 -0
  180. {docs → qtype/docs}/components/DocumentSource.md +0 -0
  181. {docs → qtype/docs}/components/Echo.md +0 -0
  182. {docs → qtype/docs}/components/Embedding.md +0 -0
  183. {docs → qtype/docs}/components/EmbeddingModel.md +0 -0
  184. {docs → qtype/docs}/components/FileSource.md +0 -0
  185. {docs → qtype/docs}/components/FileWriter.md +0 -0
  186. {docs → qtype/docs}/components/Flow.md +0 -0
  187. {docs → qtype/docs}/components/FlowInterface.md +0 -0
  188. {docs → qtype/docs}/components/Index.md +0 -0
  189. {docs → qtype/docs}/components/IndexUpsert.md +0 -0
  190. {docs → qtype/docs}/components/InvokeEmbedding.md +0 -0
  191. {docs → qtype/docs}/components/LLMInference.md +0 -0
  192. {docs → qtype/docs}/components/ListType.md +0 -0
  193. {docs → qtype/docs}/components/Memory.md +0 -0
  194. {docs → qtype/docs}/components/MessageRole.md +0 -0
  195. {docs → qtype/docs}/components/Model.md +0 -0
  196. {docs → qtype/docs}/components/ModelList.md +0 -0
  197. {docs → qtype/docs}/components/OAuth2AuthProvider.md +0 -0
  198. {docs → qtype/docs}/components/PromptTemplate.md +0 -0
  199. {docs → qtype/docs}/components/PythonFunctionTool.md +0 -0
  200. {docs → qtype/docs}/components/RAGChunk.md +0 -0
  201. {docs → qtype/docs}/components/RAGDocument.md +0 -0
  202. {docs → qtype/docs}/components/RAGSearchResult.md +0 -0
  203. {docs → qtype/docs}/components/Reranker.md +0 -0
  204. {docs → qtype/docs}/components/SQLSource.md +0 -0
  205. {docs → qtype/docs}/components/Search.md +0 -0
  206. {docs → qtype/docs}/components/SearchResult.md +0 -0
  207. {docs → qtype/docs}/components/SecretManager.md +0 -0
  208. {docs → qtype/docs}/components/SecretReference.md +0 -0
  209. {docs → qtype/docs}/components/TelemetrySink.md +0 -0
  210. {docs → qtype/docs}/components/ToolList.md +0 -0
  211. {docs → qtype/docs}/components/TypeList.md +0 -0
  212. {docs → qtype/docs}/components/VariableList.md +0 -0
  213. {docs → qtype/docs}/components/VectorIndex.md +0 -0
  214. {docs → qtype/docs}/components/VectorSearch.md +0 -0
  215. {docs → qtype/docs}/components/VertexAuthProvider.md +0 -0
  216. {docs → qtype/docs}/components/Writer.md +0 -0
  217. {docs → qtype/docs}/example_ui.png +0 -0
  218. {docs → qtype/docs}/index.md +0 -0
  219. {docs → qtype/docs}/legacy_how_tos/Configuration/modular-yaml.md +0 -0
  220. {docs → qtype/docs}/legacy_how_tos/Configuration/phoenix_projects.png +0 -0
  221. {docs → qtype/docs}/legacy_how_tos/Configuration/phoenix_traces.png +0 -0
  222. {docs → qtype/docs}/legacy_how_tos/Configuration/reference-by-id.md +0 -0
  223. {docs → qtype/docs}/legacy_how_tos/Configuration/telemetry-setup.md +0 -0
  224. {docs → qtype/docs}/legacy_how_tos/Data Types/custom-types.md +0 -0
  225. {docs → qtype/docs}/legacy_how_tos/Data Types/domain-types.md +0 -0
  226. {docs → qtype/docs}/legacy_how_tos/Debugging/visualize-apps.md +0 -0
  227. {docs → qtype/docs}/legacy_how_tos/Tools/api-tools.md +0 -0
  228. {docs → qtype/docs}/legacy_how_tos/Tools/python-tools.md +0 -0
  229. {examples → qtype/examples}/authentication/aws_authentication.qtype.yaml +0 -0
  230. {examples → qtype/examples}/conversational_ai/hello_world_chat.qtype.yaml +0 -0
  231. {examples → qtype/examples}/conversational_ai/simple_chatbot.qtype.yaml +0 -0
  232. {examples → qtype/examples}/data_processing/batch_processing.qtype.yaml +0 -0
  233. {examples → qtype/examples}/data_processing/cache_step_results.qtype.yaml +0 -0
  234. {examples → qtype/examples}/data_processing/collect_results.qtype.yaml +0 -0
  235. {examples → qtype/examples}/data_processing/dataflow_pipelines.qtype.yaml +0 -0
  236. {examples → qtype/examples}/data_processing/decode_json.qtype.yaml +0 -0
  237. {examples → qtype/examples}/data_processing/explode_items.qtype.yaml +0 -0
  238. {examples → qtype/examples}/data_processing/read_file.qtype.yaml +0 -0
  239. {examples → qtype/examples}/invoke_models/create_embeddings.qtype.yaml +0 -0
  240. {examples → qtype/examples}/invoke_models/simple_llm_call.qtype.yaml +0 -0
  241. {examples → qtype/examples}/language_features/include_raw.qtype.yaml +0 -0
  242. {examples → qtype/examples}/language_features/ui_hints.qtype.yaml +0 -0
  243. {examples → qtype/examples}/legacy/bedrock/data_analysis_with_telemetry.qtype.yaml +0 -0
  244. {examples → qtype/examples}/legacy/bedrock/hello_world.qtype.yaml +0 -0
  245. {examples → qtype/examples}/legacy/bedrock/hello_world_chat.qtype.yaml +0 -0
  246. {examples → qtype/examples}/legacy/bedrock/hello_world_chat_with_telemetry.qtype.yaml +0 -0
  247. {examples → qtype/examples}/legacy/bedrock/hello_world_chat_with_thinking.qtype.yaml +0 -0
  248. {examples → qtype/examples}/legacy/bedrock/hello_world_completion.qtype.yaml +0 -0
  249. {examples → qtype/examples}/legacy/bedrock/hello_world_completion_with_auth.qtype.yaml +0 -0
  250. {examples → qtype/examples}/legacy/bedrock/simple_agent_chat.qtype.yaml +0 -0
  251. {examples → qtype/examples}/legacy/chat_with_langfuse.qtype.yaml +0 -0
  252. {examples → qtype/examples}/legacy/data_processor.qtype.yaml +0 -0
  253. {examples → qtype/examples}/legacy/echo/debug_example.qtype.yaml +0 -0
  254. {examples → qtype/examples}/legacy/echo/prompt.qtype.yaml +0 -0
  255. {examples → qtype/examples}/legacy/echo/test.qtype.yaml +0 -0
  256. {examples → qtype/examples}/legacy/echo/video.qtype.yaml +0 -0
  257. {examples → qtype/examples}/legacy/field_extractor_example.qtype.yaml +0 -0
  258. {examples → qtype/examples}/legacy/multi_flow_example.qtype.yaml +0 -0
  259. {examples → qtype/examples}/legacy/openai/hello_world_chat.qtype.yaml +0 -0
  260. {examples → qtype/examples}/legacy/openai/hello_world_chat_with_telemetry.qtype.yaml +0 -0
  261. {examples → qtype/examples}/legacy/rag.qtype.yaml +0 -0
  262. {examples → qtype/examples}/legacy/time_utilities.qtype.yaml +0 -0
  263. {examples → qtype/examples}/legacy/vertex/hello_world_chat.qtype.yaml +0 -0
  264. {examples → qtype/examples}/legacy/vertex/hello_world_completion.qtype.yaml +0 -0
  265. {examples → qtype/examples}/legacy/vertex/hello_world_completion_with_auth.qtype.yaml +0 -0
  266. {examples → qtype/examples}/observability_debugging/trace_with_opentelemetry.qtype.yaml +0 -0
  267. {examples → qtype/examples}/research_assistant/research_assistant.qtype.yaml +0 -0
  268. {examples → qtype/examples}/research_assistant/tavily.oas.yaml +0 -0
  269. {examples → qtype/examples}/tutorials/01_hello_world.qtype.yaml +0 -0
  270. {examples → qtype/examples}/tutorials/02_conversational_chat.qtype.yaml +0 -0
  271. {qtype-0.1.12.dist-info → qtype-0.1.14.dist-info}/entry_points.txt +0 -0
  272. {qtype-0.1.12.dist-info → qtype-0.1.14.dist-info}/licenses/LICENSE +0 -0
@@ -2,37 +2,16 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from collections.abc import AsyncIterator
6
- from typing import Any, cast
5
+ from pathlib import Path
6
+ from typing import Any
7
7
 
8
+ import fsspec
8
9
  import pandas as pd
9
10
  from pydantic import BaseModel
10
11
 
11
12
  from qtype.interpreter.types import FlowMessage, Session
12
- from qtype.semantic.model import Flow
13
-
14
-
15
- async def dataframe_to_flow_messages(
16
- df: pd.DataFrame, session: Session
17
- ) -> AsyncIterator[FlowMessage]:
18
- """
19
- Convert a DataFrame to an async generator of FlowMessages.
20
-
21
- Each row in the DataFrame becomes a FlowMessage with the same session.
22
-
23
- Args:
24
- df: DataFrame where each row represents one set of inputs
25
- session: Session object to use for all messages
26
-
27
- Yields:
28
- FlowMessages, one per DataFrame row
29
- """
30
- # Use to_dict with orient='records' - much faster than iterrows
31
- # This returns a list of dicts directly without Series overhead
32
- records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
33
-
34
- for record in records:
35
- yield FlowMessage(session=session, variables=record)
13
+ from qtype.interpreter.typing import convert_dict_to_typed_variables
14
+ from qtype.semantic.model import Flow, Variable
36
15
 
37
16
 
38
17
  def flow_messages_to_dataframe(
@@ -77,3 +56,140 @@ def flow_messages_to_dataframe(
77
56
  results.append(row_data)
78
57
 
79
58
  return pd.DataFrame(results)
59
+
60
+
61
+ def read_dataframe_from_file(
62
+ file_path: str,
63
+ ) -> pd.DataFrame:
64
+ """
65
+ Read a file into a pandas DataFrame.
66
+
67
+ Automatically detects file format based on MIME type and supports both
68
+ local and remote files via fsspec. Returns raw DataFrame without type
69
+ conversion.
70
+
71
+ Args:
72
+ file_path: Path to the file (local or remote, e.g., s3://bucket/file)
73
+
74
+ Returns:
75
+ DataFrame with data from the file
76
+
77
+ Raises:
78
+ ValueError: If file format is not supported or mime type detection fails
79
+ FileNotFoundError: If file does not exist
80
+
81
+ Supported formats:
82
+ - CSV (.csv)
83
+ - JSON (.json)
84
+ - JSONL (.jsonl, JSON Lines)
85
+ - Parquet (.parquet)
86
+ - Excel (.xlsx, .xls)
87
+
88
+ Examples:
89
+ >>> # Read CSV
90
+ >>> df = read_dataframe_from_file("data.csv")
91
+ >>>
92
+ >>> # Read from S3
93
+ >>> df = read_dataframe_from_file("s3://bucket/data.parquet")
94
+ """
95
+ import magic
96
+
97
+ ext_to_mime = {
98
+ ".csv": "text/csv",
99
+ ".json": "application/json",
100
+ ".jsonl": "application/jsonlines",
101
+ ".parquet": "application/vnd.parquet",
102
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
103
+ ".xls": "application/vnd.ms-excel",
104
+ }
105
+ # Detect MIME type - handle both local and remote files
106
+ # For remote files, we'll need to download a sample first
107
+ if file_path.startswith(("http://", "https://", "s3://", "gs://")):
108
+ # For remote files, infer from extension as fallback
109
+ extension = Path(file_path).suffix.lower()
110
+ # Map extensions to mime types
111
+ mime_type = ext_to_mime.get(extension, "application/octet-stream")
112
+ else:
113
+ # Local file - use magic to detect mime type
114
+ try:
115
+ mime_type = magic.Magic(mime=True).from_file(file_path)
116
+ except Exception as e:
117
+ # Fallback to extension-based detection
118
+ extension = Path(file_path).suffix.lower()
119
+ mime_type = ext_to_mime.get(extension, "application/octet-stream")
120
+ if mime_type == "application/octet-stream":
121
+ raise ValueError(
122
+ f"Could not determine file type for {file_path}: {e}"
123
+ )
124
+
125
+ # Open file with fsspec (supports local and remote files)
126
+ with fsspec.open(file_path, "rb") as file_handle:
127
+ # Read based on MIME type
128
+ if mime_type == "text/csv" or mime_type == "text/plain":
129
+ df = pd.read_csv(file_handle, keep_default_na=False) # type: ignore[arg-type]
130
+ elif mime_type in ["application/json", "application/jsonlines"]:
131
+ # Check if it's JSONL by extension
132
+ if Path(file_path).suffix.lower() == ".jsonl":
133
+ df = pd.read_json(
134
+ file_handle, # type: ignore[arg-type]
135
+ lines=True,
136
+ )
137
+ else:
138
+ df = pd.read_json(file_handle) # type: ignore[arg-type]
139
+ elif mime_type in [
140
+ "application/vnd.parquet",
141
+ "application/octet-stream",
142
+ ]:
143
+ # Parquet is often detected as octet-stream
144
+ df = pd.read_parquet(file_handle) # type: ignore[arg-type]
145
+ elif mime_type in [
146
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
147
+ "application/vnd.ms-excel",
148
+ ]:
149
+ df = pd.read_excel(file_handle) # type: ignore[arg-type]
150
+ else:
151
+ raise ValueError(
152
+ f"Unsupported MIME type for file {file_path}: {mime_type}"
153
+ )
154
+
155
+ return df
156
+
157
+
158
+ def dataframe_to_flow_messages(
159
+ df: pd.DataFrame,
160
+ variables: list[Variable],
161
+ session: Session = Session(session_id="default"),
162
+ ) -> list[FlowMessage]:
163
+ """
164
+ Convert a DataFrame to FlowMessages with type conversion.
165
+
166
+ Each row in the DataFrame becomes a FlowMessage with variables converted
167
+ to their proper types based on the Variable definitions.
168
+
169
+ Args:
170
+ df: DataFrame with raw data
171
+ variables: List of Variable definitions for type conversion
172
+ session: Session to use for all FlowMessages (default: Session(session_id="default"))
173
+
174
+ Returns:
175
+ List of FlowMessages, one per row, with typed variables
176
+
177
+ Examples:
178
+ >>> from qtype.semantic.model import Variable
179
+ >>> from qtype.base.types import PrimitiveTypeEnum
180
+ >>> import pandas as pd
181
+ >>>
182
+ >>> df = pd.DataFrame({"age": ["30"], "score": ["95.5"]})
183
+ >>> vars = [
184
+ ... Variable(id="age", type=PrimitiveTypeEnum.int),
185
+ ... Variable(id="score", type=PrimitiveTypeEnum.float),
186
+ ... ]
187
+ >>> messages = dataframe_to_flow_messages(df, vars)
188
+ """
189
+ messages = []
190
+
191
+ for row_dict in df.to_dict(orient="records"):
192
+ typed_vars = convert_dict_to_typed_variables(row_dict, variables)
193
+ messages.append(FlowMessage(session=session, variables=typed_vars))
194
+
195
+ return messages
@@ -112,9 +112,8 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
112
112
  # Convert input variables to chat messages
113
113
  inputs = []
114
114
  for input_var in self.step.inputs:
115
- value = message.variables.get(input_var.id)
116
- if value and isinstance(value, ChatMessage):
117
- inputs.append(to_chat_message(value))
115
+ value = message.get_variable(input_var.id)
116
+ inputs.append(to_chat_message(value))
118
117
 
119
118
  # Get session ID for memory isolation
120
119
  session_id = message.session.session_id
@@ -11,10 +11,9 @@ class AggregateExecutor(BatchedStepExecutor):
11
11
  """
12
12
  Executor for the Aggregate step.
13
13
 
14
- This is a terminal, many-to-one operation that reduces an entire stream
15
- to a single summary message containing counts of successful and failed
16
- messages. It processes all messages without modification during the
17
- processing phase, then emits a single aggregate summary during finalization.
14
+ A step that, after all messages have been processed,
15
+ returns a single message containing the counts of successful and failed
16
+ messages. Other messages are passed through unchanged.
18
17
  """
19
18
 
20
19
  def __init__(
@@ -2,7 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import asyncio
6
5
  import logging
7
6
  from typing import AsyncIterator
8
7
 
@@ -56,10 +55,10 @@ class BedrockRerankerExecutor(StepExecutor):
56
55
  )
57
56
  return
58
57
 
59
- # Get session for region info
58
+ # Get region from auth or default session
60
59
  if self.step.auth is not None:
61
- with aws(self.step.auth, self.context.secret_manager) as s:
62
- region_name = s.region_name
60
+ with aws(self.step.auth, self.context.secret_manager) as creds:
61
+ region_name = creds.region_name
63
62
  else:
64
63
  import boto3
65
64
 
@@ -120,31 +119,21 @@ class BedrockRerankerExecutor(StepExecutor):
120
119
  },
121
120
  }
122
121
 
123
- def _call_bedrock_rerank():
124
- """Create client and call rerank in executor thread."""
125
- if self.step.auth is not None:
126
- with aws(self.step.auth, self.context.secret_manager) as s:
127
- client = s.client("bedrock-agent-runtime")
128
- return client.rerank(
129
- queries=queries,
130
- sources=documents,
131
- rerankingConfiguration=reranking_configuration,
132
- )
133
- else:
134
- import boto3
135
-
136
- session = boto3.Session()
137
- client = session.client("bedrock-agent-runtime")
138
- return client.rerank(
139
- queries=queries,
140
- sources=documents,
141
- rerankingConfiguration=reranking_configuration,
142
- )
122
+ # Create async bedrock client and call rerank
123
+ import aioboto3
143
124
 
144
- loop = asyncio.get_running_loop()
145
- response = await loop.run_in_executor(
146
- self.context.thread_pool, _call_bedrock_rerank
147
- )
125
+ creds_kwargs = {}
126
+ if self.step.auth is not None:
127
+ with aws(self.step.auth, self.context.secret_manager) as creds:
128
+ creds_kwargs = creds.as_kwargs()
129
+
130
+ session = aioboto3.Session(**creds_kwargs)
131
+ async with session.client("bedrock-agent-runtime") as client:
132
+ response = await client.rerank(
133
+ queries=queries,
134
+ sources=documents,
135
+ rerankingConfiguration=reranking_configuration,
136
+ )
148
137
 
149
138
  results = []
150
139
  for d in response["results"]:
@@ -4,7 +4,7 @@ from qtype.dsl.model import ListType
4
4
  from qtype.interpreter.base.base_step_executor import StepExecutor
5
5
  from qtype.interpreter.base.executor_context import ExecutorContext
6
6
  from qtype.interpreter.types import FlowMessage
7
- from qtype.interpreter.typing import instantiate_variable
7
+ from qtype.interpreter.typing import convert_dict_to_typed_variables
8
8
  from qtype.semantic.model import Construct
9
9
 
10
10
 
@@ -43,26 +43,26 @@ class ConstructExecutor(StepExecutor):
43
43
  isinstance(output_var.type, ListType)
44
44
  or len(self.step.inputs) == 1
45
45
  ):
46
- inputs = message.variables[self.step.inputs[0].id]
46
+ # Single input: pass value directly
47
+ data = {
48
+ output_var.id: message.variables[self.step.inputs[0].id]
49
+ }
47
50
  elif hasattr(output_var.type, "model_validate"):
48
51
  # This is a custom type (Pydantic model)
49
- # So input should be a dict
50
- input_values = {
51
- input_var.id: message.variables[input_var.id]
52
- for input_var in self.step.inputs
53
- }
54
- # use the mapping to convert variable names to
55
- inputs = {
56
- self.step.field_mapping.get(var_name, var_name): value # type: ignore[attr-defined]
57
- for var_name, value in input_values.items()
52
+ # field_bindings maps type field names to Variables
53
+ data = {
54
+ output_var.id: {
55
+ field_name: message.variables[var.id]
56
+ for field_name, var in self.step.field_bindings.items()
57
+ }
58
58
  }
59
59
  else:
60
60
  raise ValueError(
61
61
  "Construct step must have either a single input or output of a custom type."
62
62
  )
63
- constructed_value = instantiate_variable(output_var, inputs)
64
- yield message.copy_with_variables(
65
- {output_var.id: constructed_value}
66
- )
63
+
64
+ # Use convert_dict_to_typed_variables to validate and convert
65
+ result = convert_dict_to_typed_variables(data, self.step.outputs)
66
+ yield message.copy_with_variables(result)
67
67
  except Exception as e:
68
68
  yield message.copy_with_error(self.step.id, e)
@@ -49,9 +49,7 @@ class DocToTextConverterExecutor(StepExecutor):
49
49
 
50
50
  try:
51
51
  # Get the input document
52
- if input_id not in message.variables:
53
- raise ValueError(f"Input variable '{input_id}' is missing")
54
- doc = message.variables.get(input_id)
52
+ doc = message.get_variable(input_id)
55
53
  if not isinstance(doc, RAGDocument):
56
54
  raise ValueError(
57
55
  f"Input variable '{input_id}' must be a RAGDocument"
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import logging
3
2
  from typing import AsyncIterator
4
3
 
@@ -60,17 +59,7 @@ class DocumentEmbedderExecutor(StepExecutor):
60
59
  Returns:
61
60
  The embedding vector as a list of floats.
62
61
  """
63
-
64
- # TODO: switch back to async once aws auth supports it.
65
- # https://github.com/bazaarvoice/qtype/issues/108
66
- def _call():
67
- return self.embedding_model.get_text_embedding(text=text)
68
-
69
- loop = asyncio.get_running_loop()
70
- response = await loop.run_in_executor(self.context.thread_pool, _call)
71
-
72
- return response
73
- # return await self.embedding_model.aget_text_embedding(text=text)
62
+ return await self.embedding_model.aget_text_embedding(text=text)
74
63
 
75
64
  async def process_message(
76
65
  self,
@@ -111,15 +111,11 @@ class FieldExtractorExecutor(StepExecutor):
111
111
  Multiple messages may be yielded if JSONPath matches multiple values.
112
112
  """
113
113
  input_id = self.step.inputs[0].id
114
- output_id = self.step.outputs[0].id
114
+ output_var = self.step.outputs[0]
115
115
 
116
116
  try:
117
117
  # Get the input value
118
- input_value = message.variables.get(input_id)
119
- if input_value is None:
120
- raise ValueError(
121
- f"Input variable '{input_id}' is not set or is None"
122
- )
118
+ input_value = message.get_variable(input_id)
123
119
 
124
120
  await self.stream_emitter.status(
125
121
  f"Extracting fields using JSONPath: {self.step.json_path}"
@@ -132,17 +128,20 @@ class FieldExtractorExecutor(StepExecutor):
132
128
  matches = self.jsonpath_expr.find(input_dict)
133
129
 
134
130
  if not matches:
135
- if self.step.fail_on_missing:
131
+ if output_var.optional:
132
+ # Yield message with None output
133
+ await self.stream_emitter.status(
134
+ "JSONPath matched 0 value(s)"
135
+ )
136
+ yield message.copy_with_variables({output_var.id: None})
137
+ return
138
+ else:
136
139
  raise ValueError(
137
140
  (
138
141
  f"JSONPath expression '{self.step.json_path}' "
139
142
  f"did not match any data in input"
140
143
  )
141
144
  )
142
- else:
143
- # Yield message with None output
144
- yield message.copy_with_variables({output_id: None})
145
- return
146
145
 
147
146
  await self.stream_emitter.status(
148
147
  f"JSONPath matched {len(matches)} value(s)"
@@ -156,7 +155,9 @@ class FieldExtractorExecutor(StepExecutor):
156
155
  output_value = self._construct_output(extracted_data)
157
156
 
158
157
  # Yield message with the constructed output
159
- yield message.copy_with_variables({output_id: output_value})
158
+ yield message.copy_with_variables(
159
+ {output_var.id: output_value}
160
+ )
160
161
 
161
162
  except Exception as e:
162
163
  # Emit error event to stream so frontend can display it
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
3
  from typing import AsyncIterator
5
4
 
6
- import fsspec
7
- import pandas as pd
8
-
9
5
  from qtype.interpreter.base.base_step_executor import StepExecutor
10
6
  from qtype.interpreter.base.executor_context import ExecutorContext
7
+ from qtype.interpreter.converters import (
8
+ dataframe_to_flow_messages,
9
+ read_dataframe_from_file,
10
+ )
11
11
  from qtype.interpreter.types import FlowMessage
12
12
  from qtype.semantic.model import ConstantPath, FileSource
13
13
 
@@ -37,8 +37,6 @@ class FileSourceExecutor(StepExecutor):
37
37
  Yields:
38
38
  FlowMessages with the results of processing.
39
39
  """
40
- output_columns = {output.id for output in self.step.outputs}
41
-
42
40
  # get the path
43
41
  if isinstance(self.step.path, ConstantPath): # type: ignore[attr-defined]
44
42
  file_path = self.step.path # type: ignore[attr-defined]
@@ -55,30 +53,16 @@ class FileSourceExecutor(StepExecutor):
55
53
  f"Reading file from path: {file_path}"
56
54
  )
57
55
 
58
- # Determine file format from extension
56
+ # Get file path as string
59
57
  file_path_str = (
60
58
  file_path.uri if isinstance(file_path, ConstantPath) else file_path
61
59
  )
62
- extension = Path(file_path_str).suffix.lower()
63
60
 
64
- # Use fsspec to open the file and read with pandas
65
- with fsspec.open(file_path_str, "rb") as file_handle:
66
- if extension == ".csv":
67
- df = pd.read_csv(file_handle) # type: ignore[arg-type]
68
- elif extension == ".parquet":
69
- df = pd.read_parquet(file_handle) # type: ignore[arg-type]
70
- elif extension == ".json":
71
- df = pd.read_json(file_handle) # type: ignore[arg-type]
72
- elif extension == ".jsonl":
73
- df = pd.read_json(
74
- file_handle,
75
- lines=True, # type: ignore[arg-type]
76
- )
77
- else:
78
- # Default to parquet if no extension or unknown
79
- df = pd.read_parquet(file_handle) # type: ignore[arg-type]
61
+ # Read file into DataFrame using helper function
62
+ df = read_dataframe_from_file(file_path_str)
80
63
 
81
- # confirm the outputs exist in the dataframe
64
+ # Validate that expected output columns are present
65
+ output_columns = {output.id for output in self.step.outputs}
82
66
  columns = set(df.columns)
83
67
  missing_columns = output_columns - columns
84
68
  if missing_columns:
@@ -90,12 +74,15 @@ class FileSourceExecutor(StepExecutor):
90
74
  )
91
75
  )
92
76
 
93
- for row in df.to_dict(orient="records"):
94
- # Filter to only the expected output columns if they exist
95
- row = {
96
- str(k): v for k, v in row.items() if str(k) in output_columns
97
- }
98
- yield message.copy_with_variables(new_variables=row)
77
+ # Convert DataFrame to FlowMessages with type conversion
78
+ flow_messages = dataframe_to_flow_messages(
79
+ df, self.step.outputs, session=message.session
80
+ )
81
+
82
+ # Yield each message
83
+ for flow_message in flow_messages:
84
+ yield flow_message
85
+
99
86
  await self.stream_emitter.status(
100
87
  f"Emitted {len(df)} rows from: {file_path_str}"
101
88
  )
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  from typing import AsyncIterator
3
2
 
4
3
  from openinference.semconv.trace import OpenInferenceSpanKindValues
@@ -52,46 +51,34 @@ class InvokeEmbeddingExecutor(StepExecutor):
52
51
 
53
52
  try:
54
53
  # Get the input value
55
- input_value = message.variables.get(input_id)
54
+ input_value = message.get_variable(input_id)
56
55
 
57
- if input_value is None:
58
- raise ValueError(f"Input variable '{input_id}' is missing")
59
-
60
- def _call(input_value=input_value):
61
- # Generate embedding based on input type
62
- if input_type == PrimitiveTypeEnum.text:
63
- if not isinstance(input_value, str):
64
- input_value = str(input_value)
65
- vector = self.embedding_model.get_text_embedding(
66
- text=input_value
67
- )
68
- content = input_value
69
- elif input_type == PrimitiveTypeEnum.image:
70
- # For image embeddings
71
- vector = self.embedding_model.get_image_embedding(
72
- image_path=input_value
73
- )
74
- content = input_value
75
- else:
76
- raise ValueError(
77
- (
78
- f"Unsupported input type for embedding: "
79
- f"{input_type}. Must be 'text' or 'image'."
80
- )
56
+ # Generate embedding based on input type
57
+ if input_type == PrimitiveTypeEnum.text:
58
+ if not isinstance(input_value, str):
59
+ input_value = str(input_value)
60
+ vector = await self.embedding_model.aget_text_embedding(
61
+ text=input_value
62
+ )
63
+ content = input_value
64
+ elif input_type == PrimitiveTypeEnum.image:
65
+ # For image embeddings
66
+ vector = await self.embedding_model.aget_image_embedding(
67
+ image_path=input_value
68
+ )
69
+ content = input_value
70
+ else:
71
+ raise ValueError(
72
+ (
73
+ f"Unsupported input type for embedding: "
74
+ f"{input_type}. Must be 'text' or 'image'."
81
75
  )
82
-
83
- # Create the Embedding object
84
- embedding = Embedding(
85
- vector=vector,
86
- content=content,
87
76
  )
88
- return embedding
89
77
 
90
- # TODO: switch back to async once aws auth supports it.
91
- # https://github.com/bazaarvoice/qtype/issues/108
92
- loop = asyncio.get_running_loop()
93
- embedding = await loop.run_in_executor(
94
- self.context.thread_pool, _call
78
+ # Create the Embedding object
79
+ embedding = Embedding(
80
+ vector=vector,
81
+ content=content,
95
82
  )
96
83
 
97
84
  # Yield the result
@@ -34,7 +34,7 @@ class InvokeFlowExecutor(StepExecutor):
34
34
  initial = message.copy_with_variables(
35
35
  {
36
36
  id: message.variables.get(var.id)
37
- for var, id in self.step.input_bindings.items()
37
+ for id, var in self.step.input_bindings.items()
38
38
  }
39
39
  )
40
40
  # Pass through context (already available as self.context)
@@ -46,6 +46,6 @@ class InvokeFlowExecutor(StepExecutor):
46
46
  yield msg.copy_with_variables(
47
47
  {
48
48
  var.id: msg.variables.get(id)
49
- for var, id in self.step.output_bindings.items()
49
+ for id, var in self.step.output_bindings.items()
50
50
  }
51
51
  )
@@ -247,28 +247,26 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
247
247
  """
248
248
  tool_inputs = {}
249
249
 
250
- for tool_param_name, step_var_id in self.step.input_bindings.items():
250
+ for tool_param_name, step_variable in self.step.input_bindings.items():
251
251
  # Get tool parameter definition
252
- tool_param = self.step.tool.inputs.get(tool_param_name)
252
+ tool_param = next(
253
+ (p for p in self.step.tool.inputs if p.id == tool_param_name),
254
+ None,
255
+ )
253
256
  if not tool_param:
254
257
  raise ValueError(
255
258
  f"Tool parameter '{tool_param_name}' not defined in tool"
256
259
  )
257
260
 
258
261
  # Get value from message variables
259
- value = message.variables.get(step_var_id)
260
-
261
- # Handle missing values
262
- if value is None:
263
- if not tool_param.optional:
264
- raise ValueError(
265
- (
266
- f"Required input '{step_var_id}' for tool "
267
- f"parameter '{tool_param_name}' is missing"
268
- )
269
- )
270
- # Skip optional parameters that are missing
271
- continue
262
+ # Use default=None for optional params, let get_variable raise for required
263
+ if tool_param.optional:
264
+ value = message.get_variable(step_variable.id, default=None)
265
+ if value is None:
266
+ # Skip optional parameters that are unset
267
+ continue
268
+ else:
269
+ value = message.get_variable(step_variable.id)
272
270
 
273
271
  tool_inputs[tool_param_name] = value
274
272
 
@@ -288,9 +286,12 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
288
286
  """
289
287
  output_vars = {}
290
288
 
291
- for tool_param_name, step_var_id in self.step.output_bindings.items():
289
+ for tool_param_name, step_var in self.step.output_bindings.items():
292
290
  # Get tool parameter definition
293
- tool_param = self.step.tool.outputs.get(tool_param_name)
291
+ tool_param = next(
292
+ (p for p in self.step.tool.outputs if p.id == tool_param_name),
293
+ None,
294
+ )
294
295
  if not tool_param:
295
296
  raise ValueError(
296
297
  f"Tool parameter '{tool_param_name}' not defined in tool"
@@ -311,7 +312,7 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
311
312
  value = result
312
313
 
313
314
  if value is not None:
314
- output_vars[step_var_id] = value
315
+ output_vars[step_var.id] = value
315
316
 
316
317
  return output_vars
317
318