qtype 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. qtype/` +0 -0
  2. qtype/application/__init__.py +0 -2
  3. qtype/application/converters/tools_from_api.py +28 -22
  4. qtype/application/converters/tools_from_module.py +66 -32
  5. qtype/commands/generate.py +90 -7
  6. qtype/commands/run.py +116 -44
  7. qtype/docs/.pages +8 -0
  8. {docs → qtype/docs}/Concepts/mental-model-and-philosophy.md +1 -1
  9. qtype/docs/Contributing/.pages +4 -0
  10. {docs → qtype/docs}/Contributing/index.md +8 -1
  11. {docs → qtype/docs}/Gallery/dataflow_pipelines.md +3 -2
  12. {docs → qtype/docs}/Gallery/research_assistant.md +3 -4
  13. {docs → qtype/docs}/Gallery/simple_chatbot.md +3 -1
  14. {docs → qtype/docs}/How To/Authentication/configure_aws_authentication.md +2 -2
  15. {docs → qtype/docs}/How To/Authentication/use_api_key_authentication.md +2 -2
  16. {docs → qtype/docs}/How To/Command Line Usage/load_multiple_inputs_from_files.md +24 -9
  17. {docs → qtype/docs}/How To/Command Line Usage/pass_inputs_on_the_cli.md +3 -3
  18. {docs → qtype/docs}/How To/Command Line Usage/serve_with_auto_reload.md +3 -2
  19. {docs → qtype/docs}/How To/Data Processing/adjust_concurrency.md +3 -4
  20. {docs → qtype/docs}/How To/Data Processing/cache_step_results.md +2 -2
  21. {docs → qtype/docs}/How To/Data Processing/decode_json_xml.md +1 -1
  22. {docs → qtype/docs}/How To/Data Processing/explode_collections.md +2 -2
  23. {docs → qtype/docs}/How To/Data Processing/gather_results.md +4 -4
  24. qtype/docs/How To/Data Processing/invoke_other_flows.md +71 -0
  25. qtype/docs/How To/Data Processing/load_data_from_athena.md +49 -0
  26. qtype/docs/How To/Data Processing/read_data_from_files.md +61 -0
  27. {docs → qtype/docs}/How To/Data Processing/read_sql_databases.md +2 -3
  28. {docs → qtype/docs}/How To/Data Processing/write_data_to_file.md +1 -2
  29. {docs → qtype/docs}/How To/Invoke Models/call_large_language_models.md +1 -1
  30. {docs → qtype/docs}/How To/Invoke Models/create_embeddings.md +1 -1
  31. {docs → qtype/docs}/How To/Invoke Models/reuse_prompts_with_templates.md +2 -3
  32. {docs → qtype/docs}/How To/Language Features/include_raw_text_from_other_files.md +2 -1
  33. {docs → qtype/docs}/How To/Language Features/reference_entities_by_id.md +2 -2
  34. qtype/docs/How To/Language Features/use_agent_skills.md +29 -0
  35. {docs → qtype/docs}/How To/Language Features/use_environment_variables.md +2 -1
  36. qtype/docs/How To/Language Features/use_optional_variables.md +42 -0
  37. {docs → qtype/docs}/How To/Language Features/use_qtype_mcp.md +4 -4
  38. {docs → qtype/docs}/How To/Observability & Debugging/trace_calls_with_open_telemetry.md +1 -1
  39. {docs → qtype/docs}/How To/Observability & Debugging/validate_qtype_yaml.md +3 -2
  40. {docs → qtype/docs}/How To/Observability & Debugging/visualize_application_architecture.md +1 -1
  41. {docs → qtype/docs}/How To/Qtype Server/serve_flows_as_apis.md +3 -3
  42. {docs → qtype/docs}/How To/Qtype Server/serve_flows_as_ui.md +2 -3
  43. {docs → qtype/docs}/How To/Qtype Server/use_conversational_interfaces.md +1 -4
  44. {docs → qtype/docs}/How To/Qtype Server/use_variables_with_ui_hints.md +3 -2
  45. {docs → qtype/docs}/How To/Tools & Integration/bind_tool_inputs_and_outputs.md +1 -2
  46. {docs → qtype/docs}/How To/Tools & Integration/create_tools_from_openapi_specifications.md +10 -14
  47. {docs → qtype/docs}/How To/Tools & Integration/create_tools_from_python_modules.md +5 -8
  48. {docs → qtype/docs}/Reference/cli.md +13 -15
  49. {docs → qtype/docs}/Reference/plugins.md +4 -0
  50. {docs → qtype/docs}/Reference/semantic-validation-rules.md +6 -1
  51. qtype/docs/Tutorials/.pages +1 -0
  52. {docs → qtype/docs}/Tutorials/01-first-qtype-application.md +3 -2
  53. {docs → qtype/docs}/Tutorials/02-conversational-chatbot.md +3 -3
  54. {docs → qtype/docs}/Tutorials/03-structured-data.md +9 -10
  55. {docs → qtype/docs}/Tutorials/04-tools-and-function-calling.md +12 -19
  56. {docs → qtype/docs}/components/APITool.md +1 -1
  57. qtype/docs/components/Aggregate.md +7 -0
  58. qtype/docs/components/Collect.md +6 -0
  59. qtype/docs/components/Construct.md +6 -0
  60. {docs → qtype/docs}/components/DocumentEmbedder.md +0 -1
  61. {docs → qtype/docs}/components/DocumentSplitter.md +0 -1
  62. qtype/docs/components/Explode.md +5 -0
  63. {docs → qtype/docs}/components/FieldExtractor.md +2 -1
  64. qtype/docs/components/InvokeFlow.md +8 -0
  65. qtype/docs/components/InvokeTool.md +8 -0
  66. {docs → qtype/docs}/components/PrimitiveTypeEnum.md +0 -1
  67. {docs → qtype/docs}/components/Source.md +0 -1
  68. {docs → qtype/docs}/components/Step.md +0 -1
  69. {docs → qtype/docs}/components/Tool.md +2 -2
  70. {docs → qtype/docs}/components/Variable.md +2 -0
  71. qtype/docs/legacy_how_tos/.pages +6 -0
  72. qtype/docs/skills/architect/SKILL.md +188 -0
  73. qtype/docs/skills/architect/references/cheatsheet.md +198 -0
  74. qtype/docs/skills/architect/references/patterns.md +29 -0
  75. qtype/docs/stylesheets/extra.css +27 -0
  76. qtype/dsl/linker.py +8 -0
  77. qtype/dsl/model.py +177 -84
  78. qtype/examples/data_processing/athena_query.qtype.yaml +56 -0
  79. qtype/examples/data_processing/batch_inputs.csv +5 -0
  80. qtype/examples/data_processing/create_sample_db.py +129 -0
  81. qtype/examples/data_processing/invoke_other_flows.qtype.yaml +98 -0
  82. qtype/examples/data_processing/reviews.db +0 -0
  83. qtype/examples/data_processing/sample_article.txt +1 -0
  84. qtype/examples/data_processing/sample_documents.jsonl +5 -0
  85. qtype/examples/language_features/optional_variables.qtype.yaml +32 -0
  86. qtype/examples/language_features/story_prompt.txt +6 -0
  87. qtype/examples/legacy/data/customers.csv +6 -0
  88. qtype/examples/legacy/echo/readme.md +29 -0
  89. qtype/examples/legacy/qtype_plugin_example.py +51 -0
  90. qtype/examples/legacy/sample_data.txt +43 -0
  91. qtype/examples/legacy/vertex/README.md +11 -0
  92. qtype/examples/research_assistant/tavily.qtype.yaml +216 -0
  93. {examples → qtype/examples}/tutorials/03_structured_data.qtype.yaml +2 -2
  94. {examples → qtype/examples}/tutorials/04_tools_and_function_calling.qtype.yaml +5 -5
  95. qtype/interpreter/base/stream_emitter.py +19 -13
  96. qtype/interpreter/converters.py +142 -26
  97. qtype/interpreter/executors/agent_executor.py +2 -3
  98. qtype/interpreter/executors/aggregate_executor.py +3 -4
  99. qtype/interpreter/executors/construct_executor.py +15 -15
  100. qtype/interpreter/executors/doc_to_text_executor.py +1 -3
  101. qtype/interpreter/executors/field_extractor_executor.py +13 -12
  102. qtype/interpreter/executors/file_source_executor.py +18 -31
  103. qtype/interpreter/executors/invoke_embedding_executor.py +1 -4
  104. qtype/interpreter/executors/invoke_flow_executor.py +2 -2
  105. qtype/interpreter/executors/invoke_tool_executor.py +19 -18
  106. qtype/interpreter/executors/llm_inference_executor.py +16 -18
  107. qtype/interpreter/executors/prompt_template_executor.py +1 -3
  108. qtype/interpreter/tools/function_tool_helper.py +11 -10
  109. qtype/interpreter/types.py +89 -4
  110. qtype/interpreter/typing.py +31 -32
  111. qtype/mcp/server.py +312 -57
  112. {schema → qtype/schema}/qtype.schema.json +77 -79
  113. qtype/semantic/checker.py +19 -0
  114. qtype/semantic/generate.py +3 -6
  115. qtype/semantic/model.py +26 -33
  116. qtype/semantic/resolver.py +7 -0
  117. qtype/semantic/visualize.py +8 -3
  118. {qtype-0.1.12.dist-info → qtype-0.1.13.dist-info}/METADATA +47 -46
  119. qtype-0.1.13.dist-info/RECORD +352 -0
  120. {qtype-0.1.12.dist-info → qtype-0.1.13.dist-info}/WHEEL +1 -2
  121. docs/How To/Data Processing/read_data_from_files.md +0 -35
  122. docs/components/Aggregate.md +0 -8
  123. docs/components/InvokeFlow.md +0 -8
  124. docs/components/InvokeTool.md +0 -8
  125. docs/components/ToolParameter.md +0 -6
  126. examples/research_assistant/tavily.qtype.yaml +0 -289
  127. qtype/application/facade.py +0 -177
  128. qtype-0.1.12.dist-info/RECORD +0 -325
  129. qtype-0.1.12.dist-info/top_level.txt +0 -1
  130. {docs → qtype/docs}/Contributing/roadmap.md +0 -0
  131. {docs → qtype/docs}/Decisions/ADR-001-Chat-vs-Completion-Endpoint-Features.md +0 -0
  132. {docs → qtype/docs}/Gallery/dataflow_pipelines.mermaid +0 -0
  133. {docs → qtype/docs}/Gallery/research_assistant.mermaid +0 -0
  134. {docs → qtype/docs}/Gallery/simple_chatbot.mermaid +0 -0
  135. {docs → qtype/docs}/How To/Language Features/include_qtype_yaml.md +0 -0
  136. {docs → qtype/docs}/How To/Observability & Debugging/visualize_example.mermaid +0 -0
  137. {docs → qtype/docs}/How To/Qtype Server/flow_as_ui.png +0 -0
  138. {docs → qtype/docs}/Tutorials/example_chat.png +0 -0
  139. {docs → qtype/docs}/Tutorials/index.md +0 -0
  140. {docs → qtype/docs}/components/APIKeyAuthProvider.md +0 -0
  141. {docs → qtype/docs}/components/AWSAuthProvider.md +0 -0
  142. {docs → qtype/docs}/components/AWSSecretManager.md +0 -0
  143. {docs → qtype/docs}/components/Agent.md +0 -0
  144. {docs → qtype/docs}/components/AggregateStats.md +0 -0
  145. {docs → qtype/docs}/components/Application.md +0 -0
  146. {docs → qtype/docs}/components/AuthorizationProvider.md +0 -0
  147. {docs → qtype/docs}/components/AuthorizationProviderList.md +0 -0
  148. {docs → qtype/docs}/components/BearerTokenAuthProvider.md +0 -0
  149. {docs → qtype/docs}/components/BedrockReranker.md +0 -0
  150. {docs → qtype/docs}/components/ChatContent.md +0 -0
  151. {docs → qtype/docs}/components/ChatMessage.md +0 -0
  152. {docs → qtype/docs}/components/ConstantPath.md +0 -0
  153. {docs → qtype/docs}/components/CustomType.md +0 -0
  154. {docs → qtype/docs}/components/Decoder.md +0 -0
  155. {docs → qtype/docs}/components/DecoderFormat.md +0 -0
  156. {docs → qtype/docs}/components/DocToTextConverter.md +0 -0
  157. {docs → qtype/docs}/components/Document.md +0 -0
  158. {docs → qtype/docs}/components/DocumentIndex.md +0 -0
  159. {docs → qtype/docs}/components/DocumentSearch.md +0 -0
  160. {docs → qtype/docs}/components/DocumentSource.md +0 -0
  161. {docs → qtype/docs}/components/Echo.md +0 -0
  162. {docs → qtype/docs}/components/Embedding.md +0 -0
  163. {docs → qtype/docs}/components/EmbeddingModel.md +0 -0
  164. {docs → qtype/docs}/components/FileSource.md +0 -0
  165. {docs → qtype/docs}/components/FileWriter.md +0 -0
  166. {docs → qtype/docs}/components/Flow.md +0 -0
  167. {docs → qtype/docs}/components/FlowInterface.md +0 -0
  168. {docs → qtype/docs}/components/Index.md +0 -0
  169. {docs → qtype/docs}/components/IndexUpsert.md +0 -0
  170. {docs → qtype/docs}/components/InvokeEmbedding.md +0 -0
  171. {docs → qtype/docs}/components/LLMInference.md +0 -0
  172. {docs → qtype/docs}/components/ListType.md +0 -0
  173. {docs → qtype/docs}/components/Memory.md +0 -0
  174. {docs → qtype/docs}/components/MessageRole.md +0 -0
  175. {docs → qtype/docs}/components/Model.md +0 -0
  176. {docs → qtype/docs}/components/ModelList.md +0 -0
  177. {docs → qtype/docs}/components/OAuth2AuthProvider.md +0 -0
  178. {docs → qtype/docs}/components/PromptTemplate.md +0 -0
  179. {docs → qtype/docs}/components/PythonFunctionTool.md +0 -0
  180. {docs → qtype/docs}/components/RAGChunk.md +0 -0
  181. {docs → qtype/docs}/components/RAGDocument.md +0 -0
  182. {docs → qtype/docs}/components/RAGSearchResult.md +0 -0
  183. {docs → qtype/docs}/components/Reranker.md +0 -0
  184. {docs → qtype/docs}/components/SQLSource.md +0 -0
  185. {docs → qtype/docs}/components/Search.md +0 -0
  186. {docs → qtype/docs}/components/SearchResult.md +0 -0
  187. {docs → qtype/docs}/components/SecretManager.md +0 -0
  188. {docs → qtype/docs}/components/SecretReference.md +0 -0
  189. {docs → qtype/docs}/components/TelemetrySink.md +0 -0
  190. {docs → qtype/docs}/components/ToolList.md +0 -0
  191. {docs → qtype/docs}/components/TypeList.md +0 -0
  192. {docs → qtype/docs}/components/VariableList.md +0 -0
  193. {docs → qtype/docs}/components/VectorIndex.md +0 -0
  194. {docs → qtype/docs}/components/VectorSearch.md +0 -0
  195. {docs → qtype/docs}/components/VertexAuthProvider.md +0 -0
  196. {docs → qtype/docs}/components/Writer.md +0 -0
  197. {docs → qtype/docs}/example_ui.png +0 -0
  198. {docs → qtype/docs}/index.md +0 -0
  199. {docs → qtype/docs}/legacy_how_tos/Configuration/modular-yaml.md +0 -0
  200. {docs → qtype/docs}/legacy_how_tos/Configuration/phoenix_projects.png +0 -0
  201. {docs → qtype/docs}/legacy_how_tos/Configuration/phoenix_traces.png +0 -0
  202. {docs → qtype/docs}/legacy_how_tos/Configuration/reference-by-id.md +0 -0
  203. {docs → qtype/docs}/legacy_how_tos/Configuration/telemetry-setup.md +0 -0
  204. {docs → qtype/docs}/legacy_how_tos/Data Types/custom-types.md +0 -0
  205. {docs → qtype/docs}/legacy_how_tos/Data Types/domain-types.md +0 -0
  206. {docs → qtype/docs}/legacy_how_tos/Debugging/visualize-apps.md +0 -0
  207. {docs → qtype/docs}/legacy_how_tos/Tools/api-tools.md +0 -0
  208. {docs → qtype/docs}/legacy_how_tos/Tools/python-tools.md +0 -0
  209. {examples → qtype/examples}/authentication/aws_authentication.qtype.yaml +0 -0
  210. {examples → qtype/examples}/conversational_ai/hello_world_chat.qtype.yaml +0 -0
  211. {examples → qtype/examples}/conversational_ai/simple_chatbot.qtype.yaml +0 -0
  212. {examples → qtype/examples}/data_processing/batch_processing.qtype.yaml +0 -0
  213. {examples → qtype/examples}/data_processing/cache_step_results.qtype.yaml +0 -0
  214. {examples → qtype/examples}/data_processing/collect_results.qtype.yaml +0 -0
  215. {examples → qtype/examples}/data_processing/dataflow_pipelines.qtype.yaml +0 -0
  216. {examples → qtype/examples}/data_processing/decode_json.qtype.yaml +0 -0
  217. {examples → qtype/examples}/data_processing/explode_items.qtype.yaml +0 -0
  218. {examples → qtype/examples}/data_processing/read_file.qtype.yaml +0 -0
  219. {examples → qtype/examples}/invoke_models/create_embeddings.qtype.yaml +0 -0
  220. {examples → qtype/examples}/invoke_models/simple_llm_call.qtype.yaml +0 -0
  221. {examples → qtype/examples}/language_features/include_raw.qtype.yaml +0 -0
  222. {examples → qtype/examples}/language_features/ui_hints.qtype.yaml +0 -0
  223. {examples → qtype/examples}/legacy/bedrock/data_analysis_with_telemetry.qtype.yaml +0 -0
  224. {examples → qtype/examples}/legacy/bedrock/hello_world.qtype.yaml +0 -0
  225. {examples → qtype/examples}/legacy/bedrock/hello_world_chat.qtype.yaml +0 -0
  226. {examples → qtype/examples}/legacy/bedrock/hello_world_chat_with_telemetry.qtype.yaml +0 -0
  227. {examples → qtype/examples}/legacy/bedrock/hello_world_chat_with_thinking.qtype.yaml +0 -0
  228. {examples → qtype/examples}/legacy/bedrock/hello_world_completion.qtype.yaml +0 -0
  229. {examples → qtype/examples}/legacy/bedrock/hello_world_completion_with_auth.qtype.yaml +0 -0
  230. {examples → qtype/examples}/legacy/bedrock/simple_agent_chat.qtype.yaml +0 -0
  231. {examples → qtype/examples}/legacy/chat_with_langfuse.qtype.yaml +0 -0
  232. {examples → qtype/examples}/legacy/data_processor.qtype.yaml +0 -0
  233. {examples → qtype/examples}/legacy/echo/debug_example.qtype.yaml +0 -0
  234. {examples → qtype/examples}/legacy/echo/prompt.qtype.yaml +0 -0
  235. {examples → qtype/examples}/legacy/echo/test.qtype.yaml +0 -0
  236. {examples → qtype/examples}/legacy/echo/video.qtype.yaml +0 -0
  237. {examples → qtype/examples}/legacy/field_extractor_example.qtype.yaml +0 -0
  238. {examples → qtype/examples}/legacy/multi_flow_example.qtype.yaml +0 -0
  239. {examples → qtype/examples}/legacy/openai/hello_world_chat.qtype.yaml +0 -0
  240. {examples → qtype/examples}/legacy/openai/hello_world_chat_with_telemetry.qtype.yaml +0 -0
  241. {examples → qtype/examples}/legacy/rag.qtype.yaml +0 -0
  242. {examples → qtype/examples}/legacy/time_utilities.qtype.yaml +0 -0
  243. {examples → qtype/examples}/legacy/vertex/hello_world_chat.qtype.yaml +0 -0
  244. {examples → qtype/examples}/legacy/vertex/hello_world_completion.qtype.yaml +0 -0
  245. {examples → qtype/examples}/legacy/vertex/hello_world_completion_with_auth.qtype.yaml +0 -0
  246. {examples → qtype/examples}/observability_debugging/trace_with_opentelemetry.qtype.yaml +0 -0
  247. {examples → qtype/examples}/research_assistant/research_assistant.qtype.yaml +0 -0
  248. {examples → qtype/examples}/research_assistant/tavily.oas.yaml +0 -0
  249. {examples → qtype/examples}/tutorials/01_hello_world.qtype.yaml +0 -0
  250. {examples → qtype/examples}/tutorials/02_conversational_chat.qtype.yaml +0 -0
  251. {qtype-0.1.12.dist-info → qtype-0.1.13.dist-info}/entry_points.txt +0 -0
  252. {qtype-0.1.12.dist-info → qtype-0.1.13.dist-info}/licenses/LICENSE +0 -0
@@ -2,37 +2,16 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from collections.abc import AsyncIterator
6
- from typing import Any, cast
5
+ from pathlib import Path
6
+ from typing import Any
7
7
 
8
+ import fsspec
8
9
  import pandas as pd
9
10
  from pydantic import BaseModel
10
11
 
11
12
  from qtype.interpreter.types import FlowMessage, Session
12
- from qtype.semantic.model import Flow
13
-
14
-
15
- async def dataframe_to_flow_messages(
16
- df: pd.DataFrame, session: Session
17
- ) -> AsyncIterator[FlowMessage]:
18
- """
19
- Convert a DataFrame to an async generator of FlowMessages.
20
-
21
- Each row in the DataFrame becomes a FlowMessage with the same session.
22
-
23
- Args:
24
- df: DataFrame where each row represents one set of inputs
25
- session: Session object to use for all messages
26
-
27
- Yields:
28
- FlowMessages, one per DataFrame row
29
- """
30
- # Use to_dict with orient='records' - much faster than iterrows
31
- # This returns a list of dicts directly without Series overhead
32
- records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
33
-
34
- for record in records:
35
- yield FlowMessage(session=session, variables=record)
13
+ from qtype.interpreter.typing import convert_dict_to_typed_variables
14
+ from qtype.semantic.model import Flow, Variable
36
15
 
37
16
 
38
17
  def flow_messages_to_dataframe(
@@ -77,3 +56,140 @@ def flow_messages_to_dataframe(
77
56
  results.append(row_data)
78
57
 
79
58
  return pd.DataFrame(results)
59
+
60
+
61
+ def read_dataframe_from_file(
62
+ file_path: str,
63
+ ) -> pd.DataFrame:
64
+ """
65
+ Read a file into a pandas DataFrame.
66
+
67
+ Automatically detects file format based on MIME type and supports both
68
+ local and remote files via fsspec. Returns raw DataFrame without type
69
+ conversion.
70
+
71
+ Args:
72
+ file_path: Path to the file (local or remote, e.g., s3://bucket/file)
73
+
74
+ Returns:
75
+ DataFrame with data from the file
76
+
77
+ Raises:
78
+ ValueError: If file format is not supported or mime type detection fails
79
+ FileNotFoundError: If file does not exist
80
+
81
+ Supported formats:
82
+ - CSV (.csv)
83
+ - JSON (.json)
84
+ - JSONL (.jsonl, JSON Lines)
85
+ - Parquet (.parquet)
86
+ - Excel (.xlsx, .xls)
87
+
88
+ Examples:
89
+ >>> # Read CSV
90
+ >>> df = read_dataframe_from_file("data.csv")
91
+ >>>
92
+ >>> # Read from S3
93
+ >>> df = read_dataframe_from_file("s3://bucket/data.parquet")
94
+ """
95
+ import magic
96
+
97
+ ext_to_mime = {
98
+ ".csv": "text/csv",
99
+ ".json": "application/json",
100
+ ".jsonl": "application/jsonlines",
101
+ ".parquet": "application/vnd.parquet",
102
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
103
+ ".xls": "application/vnd.ms-excel",
104
+ }
105
+ # Detect MIME type - handle both local and remote files
106
+ # For remote files, we'll need to download a sample first
107
+ if file_path.startswith(("http://", "https://", "s3://", "gs://")):
108
+ # For remote files, infer from extension as fallback
109
+ extension = Path(file_path).suffix.lower()
110
+ # Map extensions to mime types
111
+ mime_type = ext_to_mime.get(extension, "application/octet-stream")
112
+ else:
113
+ # Local file - use magic to detect mime type
114
+ try:
115
+ mime_type = magic.Magic(mime=True).from_file(file_path)
116
+ except Exception as e:
117
+ # Fallback to extension-based detection
118
+ extension = Path(file_path).suffix.lower()
119
+ mime_type = ext_to_mime.get(extension, "application/octet-stream")
120
+ if mime_type == "application/octet-stream":
121
+ raise ValueError(
122
+ f"Could not determine file type for {file_path}: {e}"
123
+ )
124
+
125
+ # Open file with fsspec (supports local and remote files)
126
+ with fsspec.open(file_path, "rb") as file_handle:
127
+ # Read based on MIME type
128
+ if mime_type == "text/csv" or mime_type == "text/plain":
129
+ df = pd.read_csv(file_handle) # type: ignore[arg-type]
130
+ elif mime_type in ["application/json", "application/jsonlines"]:
131
+ # Check if it's JSONL by extension
132
+ if Path(file_path).suffix.lower() == ".jsonl":
133
+ df = pd.read_json(
134
+ file_handle, # type: ignore[arg-type]
135
+ lines=True,
136
+ )
137
+ else:
138
+ df = pd.read_json(file_handle) # type: ignore[arg-type]
139
+ elif mime_type in [
140
+ "application/vnd.parquet",
141
+ "application/octet-stream",
142
+ ]:
143
+ # Parquet is often detected as octet-stream
144
+ df = pd.read_parquet(file_handle) # type: ignore[arg-type]
145
+ elif mime_type in [
146
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
147
+ "application/vnd.ms-excel",
148
+ ]:
149
+ df = pd.read_excel(file_handle) # type: ignore[arg-type]
150
+ else:
151
+ raise ValueError(
152
+ f"Unsupported MIME type for file {file_path}: {mime_type}"
153
+ )
154
+
155
+ return df
156
+
157
+
158
+ def dataframe_to_flow_messages(
159
+ df: pd.DataFrame,
160
+ variables: list[Variable],
161
+ session: Session = Session(session_id="default"),
162
+ ) -> list[FlowMessage]:
163
+ """
164
+ Convert a DataFrame to FlowMessages with type conversion.
165
+
166
+ Each row in the DataFrame becomes a FlowMessage with variables converted
167
+ to their proper types based on the Variable definitions.
168
+
169
+ Args:
170
+ df: DataFrame with raw data
171
+ variables: List of Variable definitions for type conversion
172
+ session: Session to use for all FlowMessages (default: Session(session_id="default"))
173
+
174
+ Returns:
175
+ List of FlowMessages, one per row, with typed variables
176
+
177
+ Examples:
178
+ >>> from qtype.semantic.model import Variable
179
+ >>> from qtype.base.types import PrimitiveTypeEnum
180
+ >>> import pandas as pd
181
+ >>>
182
+ >>> df = pd.DataFrame({"age": ["30"], "score": ["95.5"]})
183
+ >>> vars = [
184
+ ... Variable(id="age", type=PrimitiveTypeEnum.int),
185
+ ... Variable(id="score", type=PrimitiveTypeEnum.float),
186
+ ... ]
187
+ >>> messages = dataframe_to_flow_messages(df, vars)
188
+ """
189
+ messages = []
190
+
191
+ for row_dict in df.to_dict(orient="records"):
192
+ typed_vars = convert_dict_to_typed_variables(row_dict, variables)
193
+ messages.append(FlowMessage(session=session, variables=typed_vars))
194
+
195
+ return messages
@@ -112,9 +112,8 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
112
112
  # Convert input variables to chat messages
113
113
  inputs = []
114
114
  for input_var in self.step.inputs:
115
- value = message.variables.get(input_var.id)
116
- if value and isinstance(value, ChatMessage):
117
- inputs.append(to_chat_message(value))
115
+ value = message.get_variable(input_var.id)
116
+ inputs.append(to_chat_message(value))
118
117
 
119
118
  # Get session ID for memory isolation
120
119
  session_id = message.session.session_id
@@ -11,10 +11,9 @@ class AggregateExecutor(BatchedStepExecutor):
11
11
  """
12
12
  Executor for the Aggregate step.
13
13
 
14
- This is a terminal, many-to-one operation that reduces an entire stream
15
- to a single summary message containing counts of successful and failed
16
- messages. It processes all messages without modification during the
17
- processing phase, then emits a single aggregate summary during finalization.
14
+ A step that, after all messages have been processed,
15
+ returns a single message containing the counts of successful and failed
16
+ messages. Other messages are passed through unchanged.
18
17
  """
19
18
 
20
19
  def __init__(
@@ -4,7 +4,7 @@ from qtype.dsl.model import ListType
4
4
  from qtype.interpreter.base.base_step_executor import StepExecutor
5
5
  from qtype.interpreter.base.executor_context import ExecutorContext
6
6
  from qtype.interpreter.types import FlowMessage
7
- from qtype.interpreter.typing import instantiate_variable
7
+ from qtype.interpreter.typing import convert_dict_to_typed_variables
8
8
  from qtype.semantic.model import Construct
9
9
 
10
10
 
@@ -43,26 +43,26 @@ class ConstructExecutor(StepExecutor):
43
43
  isinstance(output_var.type, ListType)
44
44
  or len(self.step.inputs) == 1
45
45
  ):
46
- inputs = message.variables[self.step.inputs[0].id]
46
+ # Single input: pass value directly
47
+ data = {
48
+ output_var.id: message.variables[self.step.inputs[0].id]
49
+ }
47
50
  elif hasattr(output_var.type, "model_validate"):
48
51
  # This is a custom type (Pydantic model)
49
- # So input should be a dict
50
- input_values = {
51
- input_var.id: message.variables[input_var.id]
52
- for input_var in self.step.inputs
53
- }
54
- # use the mapping to convert variable names to
55
- inputs = {
56
- self.step.field_mapping.get(var_name, var_name): value # type: ignore[attr-defined]
57
- for var_name, value in input_values.items()
52
+ # field_bindings maps type field names to Variables
53
+ data = {
54
+ output_var.id: {
55
+ field_name: message.variables[var.id]
56
+ for field_name, var in self.step.field_bindings.items()
57
+ }
58
58
  }
59
59
  else:
60
60
  raise ValueError(
61
61
  "Construct step must have either a single input or output of a custom type."
62
62
  )
63
- constructed_value = instantiate_variable(output_var, inputs)
64
- yield message.copy_with_variables(
65
- {output_var.id: constructed_value}
66
- )
63
+
64
+ # Use convert_dict_to_typed_variables to validate and convert
65
+ result = convert_dict_to_typed_variables(data, self.step.outputs)
66
+ yield message.copy_with_variables(result)
67
67
  except Exception as e:
68
68
  yield message.copy_with_error(self.step.id, e)
@@ -49,9 +49,7 @@ class DocToTextConverterExecutor(StepExecutor):
49
49
 
50
50
  try:
51
51
  # Get the input document
52
- if input_id not in message.variables:
53
- raise ValueError(f"Input variable '{input_id}' is missing")
54
- doc = message.variables.get(input_id)
52
+ doc = message.get_variable(input_id)
55
53
  if not isinstance(doc, RAGDocument):
56
54
  raise ValueError(
57
55
  f"Input variable '{input_id}' must be a RAGDocument"
@@ -111,15 +111,11 @@ class FieldExtractorExecutor(StepExecutor):
111
111
  Multiple messages may be yielded if JSONPath matches multiple values.
112
112
  """
113
113
  input_id = self.step.inputs[0].id
114
- output_id = self.step.outputs[0].id
114
+ output_var = self.step.outputs[0]
115
115
 
116
116
  try:
117
117
  # Get the input value
118
- input_value = message.variables.get(input_id)
119
- if input_value is None:
120
- raise ValueError(
121
- f"Input variable '{input_id}' is not set or is None"
122
- )
118
+ input_value = message.get_variable(input_id)
123
119
 
124
120
  await self.stream_emitter.status(
125
121
  f"Extracting fields using JSONPath: {self.step.json_path}"
@@ -132,17 +128,20 @@ class FieldExtractorExecutor(StepExecutor):
132
128
  matches = self.jsonpath_expr.find(input_dict)
133
129
 
134
130
  if not matches:
135
- if self.step.fail_on_missing:
131
+ if output_var.optional:
132
+ # Yield message with None output
133
+ await self.stream_emitter.status(
134
+ "JSONPath matched 0 value(s)"
135
+ )
136
+ yield message.copy_with_variables({output_var.id: None})
137
+ return
138
+ else:
136
139
  raise ValueError(
137
140
  (
138
141
  f"JSONPath expression '{self.step.json_path}' "
139
142
  f"did not match any data in input"
140
143
  )
141
144
  )
142
- else:
143
- # Yield message with None output
144
- yield message.copy_with_variables({output_id: None})
145
- return
146
145
 
147
146
  await self.stream_emitter.status(
148
147
  f"JSONPath matched {len(matches)} value(s)"
@@ -156,7 +155,9 @@ class FieldExtractorExecutor(StepExecutor):
156
155
  output_value = self._construct_output(extracted_data)
157
156
 
158
157
  # Yield message with the constructed output
159
- yield message.copy_with_variables({output_id: output_value})
158
+ yield message.copy_with_variables(
159
+ {output_var.id: output_value}
160
+ )
160
161
 
161
162
  except Exception as e:
162
163
  # Emit error event to stream so frontend can display it
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
3
  from typing import AsyncIterator
5
4
 
6
- import fsspec
7
- import pandas as pd
8
-
9
5
  from qtype.interpreter.base.base_step_executor import StepExecutor
10
6
  from qtype.interpreter.base.executor_context import ExecutorContext
7
+ from qtype.interpreter.converters import (
8
+ dataframe_to_flow_messages,
9
+ read_dataframe_from_file,
10
+ )
11
11
  from qtype.interpreter.types import FlowMessage
12
12
  from qtype.semantic.model import ConstantPath, FileSource
13
13
 
@@ -37,8 +37,6 @@ class FileSourceExecutor(StepExecutor):
37
37
  Yields:
38
38
  FlowMessages with the results of processing.
39
39
  """
40
- output_columns = {output.id for output in self.step.outputs}
41
-
42
40
  # get the path
43
41
  if isinstance(self.step.path, ConstantPath): # type: ignore[attr-defined]
44
42
  file_path = self.step.path # type: ignore[attr-defined]
@@ -55,30 +53,16 @@ class FileSourceExecutor(StepExecutor):
55
53
  f"Reading file from path: {file_path}"
56
54
  )
57
55
 
58
- # Determine file format from extension
56
+ # Get file path as string
59
57
  file_path_str = (
60
58
  file_path.uri if isinstance(file_path, ConstantPath) else file_path
61
59
  )
62
- extension = Path(file_path_str).suffix.lower()
63
60
 
64
- # Use fsspec to open the file and read with pandas
65
- with fsspec.open(file_path_str, "rb") as file_handle:
66
- if extension == ".csv":
67
- df = pd.read_csv(file_handle) # type: ignore[arg-type]
68
- elif extension == ".parquet":
69
- df = pd.read_parquet(file_handle) # type: ignore[arg-type]
70
- elif extension == ".json":
71
- df = pd.read_json(file_handle) # type: ignore[arg-type]
72
- elif extension == ".jsonl":
73
- df = pd.read_json(
74
- file_handle,
75
- lines=True, # type: ignore[arg-type]
76
- )
77
- else:
78
- # Default to parquet if no extension or unknown
79
- df = pd.read_parquet(file_handle) # type: ignore[arg-type]
61
+ # Read file into DataFrame using helper function
62
+ df = read_dataframe_from_file(file_path_str)
80
63
 
81
- # confirm the outputs exist in the dataframe
64
+ # Validate that expected output columns are present
65
+ output_columns = {output.id for output in self.step.outputs}
82
66
  columns = set(df.columns)
83
67
  missing_columns = output_columns - columns
84
68
  if missing_columns:
@@ -90,12 +74,15 @@ class FileSourceExecutor(StepExecutor):
90
74
  )
91
75
  )
92
76
 
93
- for row in df.to_dict(orient="records"):
94
- # Filter to only the expected output columns if they exist
95
- row = {
96
- str(k): v for k, v in row.items() if str(k) in output_columns
97
- }
98
- yield message.copy_with_variables(new_variables=row)
77
+ # Convert DataFrame to FlowMessages with type conversion
78
+ flow_messages = dataframe_to_flow_messages(
79
+ df, self.step.outputs, session=message.session
80
+ )
81
+
82
+ # Yield each message
83
+ for flow_message in flow_messages:
84
+ yield flow_message
85
+
99
86
  await self.stream_emitter.status(
100
87
  f"Emitted {len(df)} rows from: {file_path_str}"
101
88
  )
@@ -52,10 +52,7 @@ class InvokeEmbeddingExecutor(StepExecutor):
52
52
 
53
53
  try:
54
54
  # Get the input value
55
- input_value = message.variables.get(input_id)
56
-
57
- if input_value is None:
58
- raise ValueError(f"Input variable '{input_id}' is missing")
55
+ input_value = message.get_variable(input_id)
59
56
 
60
57
  def _call(input_value=input_value):
61
58
  # Generate embedding based on input type
@@ -34,7 +34,7 @@ class InvokeFlowExecutor(StepExecutor):
34
34
  initial = message.copy_with_variables(
35
35
  {
36
36
  id: message.variables.get(var.id)
37
- for var, id in self.step.input_bindings.items()
37
+ for id, var in self.step.input_bindings.items()
38
38
  }
39
39
  )
40
40
  # Pass through context (already available as self.context)
@@ -46,6 +46,6 @@ class InvokeFlowExecutor(StepExecutor):
46
46
  yield msg.copy_with_variables(
47
47
  {
48
48
  var.id: msg.variables.get(id)
49
- for var, id in self.step.output_bindings.items()
49
+ for id, var in self.step.output_bindings.items()
50
50
  }
51
51
  )
@@ -247,28 +247,26 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
247
247
  """
248
248
  tool_inputs = {}
249
249
 
250
- for tool_param_name, step_var_id in self.step.input_bindings.items():
250
+ for tool_param_name, step_variable in self.step.input_bindings.items():
251
251
  # Get tool parameter definition
252
- tool_param = self.step.tool.inputs.get(tool_param_name)
252
+ tool_param = next(
253
+ (p for p in self.step.tool.inputs if p.id == tool_param_name),
254
+ None,
255
+ )
253
256
  if not tool_param:
254
257
  raise ValueError(
255
258
  f"Tool parameter '{tool_param_name}' not defined in tool"
256
259
  )
257
260
 
258
261
  # Get value from message variables
259
- value = message.variables.get(step_var_id)
260
-
261
- # Handle missing values
262
- if value is None:
263
- if not tool_param.optional:
264
- raise ValueError(
265
- (
266
- f"Required input '{step_var_id}' for tool "
267
- f"parameter '{tool_param_name}' is missing"
268
- )
269
- )
270
- # Skip optional parameters that are missing
271
- continue
262
+ # Use default=None for optional params, let get_variable raise for required
263
+ if tool_param.optional:
264
+ value = message.get_variable(step_variable.id, default=None)
265
+ if value is None:
266
+ # Skip optional parameters that are unset
267
+ continue
268
+ else:
269
+ value = message.get_variable(step_variable.id)
272
270
 
273
271
  tool_inputs[tool_param_name] = value
274
272
 
@@ -288,9 +286,12 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
288
286
  """
289
287
  output_vars = {}
290
288
 
291
- for tool_param_name, step_var_id in self.step.output_bindings.items():
289
+ for tool_param_name, step_var in self.step.output_bindings.items():
292
290
  # Get tool parameter definition
293
- tool_param = self.step.tool.outputs.get(tool_param_name)
291
+ tool_param = next(
292
+ (p for p in self.step.tool.outputs if p.id == tool_param_name),
293
+ None,
294
+ )
294
295
  if not tool_param:
295
296
  raise ValueError(
296
297
  f"Tool parameter '{tool_param_name}' not defined in tool"
@@ -311,7 +312,7 @@ class InvokeToolExecutor(StepExecutor, ToolExecutionMixin):
311
312
  value = result
312
313
 
313
314
  if value is not None:
314
- output_vars[step_var_id] = value
315
+ output_vars[step_var.id] = value
315
316
 
316
317
  return output_vars
317
318
 
@@ -109,7 +109,7 @@ class LLMInferenceExecutor(StepExecutor):
109
109
  # Convert input variables to chat messages
110
110
  inputs = []
111
111
  for input_var in self.step.inputs:
112
- value = message.variables.get(input_var.id)
112
+ value = message.get_variable(input_var.id)
113
113
  # Convert any value type to ChatMessage, then to LlamaChatMessage
114
114
  chat_msg = variable_to_chat_message(value, input_var)
115
115
  inputs.append(to_chat_message(chat_msg))
@@ -160,9 +160,14 @@ class LLMInferenceExecutor(StepExecutor):
160
160
  if self.context.on_stream_event:
161
161
  # Generate a unique stream ID for this inference
162
162
  stream_id = f"llm-{self.step.id}-{id(message)}"
163
- async with self.stream_emitter.reasoning_stream(
164
- f"llm-{self.step.id}-{id(message)}-reasoning"
165
- ) as reasoning:
163
+ reasoning_stream_id = f"llm-{self.step.id}-{id(message)}-reasoning"
164
+
165
+ async with (
166
+ self.stream_emitter.reasoning_stream(
167
+ reasoning_stream_id
168
+ ) as reasoning,
169
+ self.stream_emitter.text_stream(stream_id) as streamer,
170
+ ):
166
171
  generator = await model.astream_chat(
167
172
  messages=inputs,
168
173
  **(
@@ -171,26 +176,19 @@ class LLMInferenceExecutor(StepExecutor):
171
176
  else {}
172
177
  ),
173
178
  )
174
- async for complete_response in generator:
179
+ async for chat_response in generator:
180
+ # Extract and emit reasoning if present
175
181
  reasoning_text = self.__extract_stream_reasoning_(
176
- complete_response
182
+ chat_response
177
183
  )
178
184
  if reasoning_text:
179
185
  await reasoning.delta(reasoning_text)
180
186
 
181
- async with self.stream_emitter.text_stream(stream_id) as streamer:
182
- generator = await model.astream_chat(
183
- messages=inputs,
184
- **(
185
- self.step.model.inference_params
186
- if self.step.model.inference_params
187
- else {}
188
- ),
189
- )
190
- async for chat_response in generator:
187
+ # Emit text delta
191
188
  chat_text = chat_response.delta
192
- if chat_text.strip() != "":
193
- await streamer.delta(chat_response.delta)
189
+ if chat_text is not None and chat_text.strip() != "":
190
+ await streamer.delta(chat_text)
191
+
194
192
  # Get the final result
195
193
  chat_result = chat_response
196
194
  else:
@@ -51,9 +51,7 @@ class PromptTemplateExecutor(StepExecutor):
51
51
  input_map = {}
52
52
  for var in self.step.inputs:
53
53
  if var.id in format_args:
54
- value = message.variables.get(var.id)
55
- if value is not None:
56
- input_map[var.id] = value
54
+ input_map[var.id] = message.get_variable(var.id)
57
55
 
58
56
  missing = format_args - input_map.keys()
59
57
  if missing:
@@ -14,7 +14,7 @@ from pydantic import create_model
14
14
  from qtype.base.types import PrimitiveTypeEnum
15
15
  from qtype.dsl.model import ListType
16
16
  from qtype.dsl.types import PRIMITIVE_TO_PYTHON_TYPE
17
- from qtype.semantic.model import APITool, PythonFunctionTool, ToolParameter
17
+ from qtype.semantic.model import APITool, PythonFunctionTool, Variable
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -29,9 +29,9 @@ class FunctionToolHelper:
29
29
 
30
30
  @staticmethod
31
31
  def _qtype_type_to_python_type(
32
- param: ToolParameter,
32
+ param: Variable,
33
33
  ) -> type:
34
- """Convert QType ToolParameter type to Python type for Pydantic.
34
+ """Convert QType Variable type to Python type for Pydantic.
35
35
 
36
36
  The param.type has already been resolved during semantic model
37
37
  creation, so we just need to convert it to the appropriate Python
@@ -42,7 +42,7 @@ class FunctionToolHelper:
42
42
  - Unknown → str
43
43
 
44
44
  Args:
45
- param: The QType ToolParameter to convert.
45
+ param: The QType Variable to convert.
46
46
 
47
47
  Returns:
48
48
  Python type suitable for Pydantic field annotation.
@@ -55,7 +55,8 @@ class FunctionToolHelper:
55
55
  if isinstance(param.type, ListType):
56
56
  # Create a mock parameter with the element type to recursively
57
57
  # resolve it
58
- element_param = ToolParameter(
58
+ element_param = Variable(
59
+ id="temp",
59
60
  type=param.type.element_type,
60
61
  optional=False,
61
62
  )
@@ -74,13 +75,13 @@ class FunctionToolHelper:
74
75
  @staticmethod
75
76
  def _create_fn_schema(
76
77
  tool_name: str,
77
- inputs: dict[str, ToolParameter],
78
+ inputs: list[Variable],
78
79
  ) -> type[BaseModel] | None:
79
80
  """Create a Pydantic model from QType tool input parameters.
80
81
 
81
82
  Args:
82
83
  tool_name: Name of the tool (used for model name).
83
- inputs: Dictionary of input parameter names to ToolParameter.
84
+ inputs: List of input Variables.
84
85
 
85
86
  Returns:
86
87
  Pydantic BaseModel class representing the tool's input schema.
@@ -91,17 +92,17 @@ class FunctionToolHelper:
91
92
  # Each field is a tuple of (type_annotation, field_info)
92
93
  field_definitions: dict[str, Any] = {}
93
94
 
94
- for param_name, param in inputs.items():
95
+ for param in inputs:
95
96
  python_type = FunctionToolHelper._qtype_type_to_python_type(param)
96
97
 
97
98
  # Create field with optional annotation
98
99
  if param.optional:
99
- field_definitions[param_name] = (
100
+ field_definitions[param.id] = (
100
101
  python_type | None, # type: ignore[valid-type]
101
102
  PydanticField(default=None),
102
103
  )
103
104
  else:
104
- field_definitions[param_name] = (
105
+ field_definitions[param.id] = (
105
106
  python_type,
106
107
  PydanticField(...),
107
108
  )