qtype 0.0.12__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +753 -264
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +495 -24
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +123 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +104 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +172 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/METADATA +22 -5
  102. qtype-0.1.7.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import AsyncIterator
4
+
5
+ from qtype.dsl.domain_types import SearchResult
6
+ from qtype.interpreter.base.base_step_executor import StepExecutor
7
+ from qtype.interpreter.base.executor_context import ExecutorContext
8
+ from qtype.interpreter.conversions import to_opensearch_client
9
+ from qtype.interpreter.types import FlowMessage
10
+ from qtype.semantic.model import DocumentSearch
11
+
12
+
13
+ class DocumentSearchExecutor(StepExecutor):
14
+ """Executor for DocumentSearch steps using OpenSearch/Elasticsearch."""
15
+
16
+ def __init__(
17
+ self, step: DocumentSearch, context: ExecutorContext, **dependencies
18
+ ):
19
+ super().__init__(step, context, **dependencies)
20
+ if not isinstance(step, DocumentSearch):
21
+ raise ValueError(
22
+ (
23
+ "DocumentSearchExecutor can only execute "
24
+ "DocumentSearch steps."
25
+ )
26
+ )
27
+ self.step: DocumentSearch = step
28
+ # Initialize the OpenSearch client once for the executor
29
+ self.client = to_opensearch_client(
30
+ self.step.index, self._secret_manager
31
+ )
32
+ self.index_name = self.step.index.name
33
+
34
+ async def finalize(self) -> AsyncIterator[FlowMessage]:
35
+ """Clean up resources after all messages are processed."""
36
+ if hasattr(self, "client") and self.client:
37
+ try:
38
+ await self.client.close()
39
+ except Exception:
40
+ pass
41
+ # Make this an async generator
42
+ return
43
+ yield # type: ignore[unreachable]
44
+
45
+ async def process_message(
46
+ self,
47
+ message: FlowMessage,
48
+ ) -> AsyncIterator[FlowMessage]:
49
+ """Process a single FlowMessage for the DocumentSearch step.
50
+
51
+ Args:
52
+ message: The FlowMessage to process.
53
+
54
+ Yields:
55
+ A list of dictionaries with _source, _search_score, and _search_id fields.
56
+ """
57
+ input_id = self.step.inputs[0].id
58
+ output_id = self.step.outputs[0].id
59
+
60
+ try:
61
+ # Get the search query text
62
+ query_text = message.variables.get(input_id)
63
+ if not isinstance(query_text, str):
64
+ raise ValueError(
65
+ (
66
+ f"Input variable '{input_id}' must be a string "
67
+ f"(text query), got {type(query_text)}"
68
+ )
69
+ )
70
+
71
+ # Build the search query
72
+ search_body = {
73
+ "query": {
74
+ "multi_match": {"query": query_text} | self.step.query_args
75
+ },
76
+ "size": self.step.default_top_k,
77
+ }
78
+
79
+ # Apply any filters if specified
80
+ if self.step.filters:
81
+ search_body["query"] = {
82
+ "bool": {
83
+ "must": [search_body["query"]],
84
+ "filter": [
85
+ {"term": {k: v}}
86
+ for k, v in self.step.filters.items()
87
+ ],
88
+ }
89
+ }
90
+
91
+ # Execute the search asynchronously using AsyncOpenSearch
92
+ response = await self.client.search(
93
+ index=self.index_name, body=search_body
94
+ )
95
+
96
+ # Process each hit and yield as SearchResult
97
+ # TODO: add support for decomposing a RAGSearchResult for hybrid search
98
+ search_results = []
99
+ for hit in response["hits"]["hits"]:
100
+ search_results.append(
101
+ SearchResult(
102
+ content=hit["_source"],
103
+ doc_id=hit["_id"],
104
+ score=hit["_score"],
105
+ )
106
+ )
107
+ yield message.copy_with_variables({output_id: search_results})
108
+
109
+ except Exception as e:
110
+ # Emit error event to stream so frontend can display it
111
+ await self.stream_emitter.error(str(e))
112
+ message.set_error(self.step.id, e)
113
+ yield message
@@ -0,0 +1,118 @@
1
+ import importlib
2
+ from typing import AsyncIterator
3
+
4
+ from qtype.interpreter.base.base_step_executor import StepExecutor
5
+ from qtype.interpreter.base.executor_context import ExecutorContext
6
+ from qtype.interpreter.conversions import from_llama_document
7
+ from qtype.interpreter.types import FlowMessage
8
+ from qtype.semantic.model import DocumentSource
9
+
10
+
11
+ class DocumentSourceExecutor(StepExecutor):
12
+ """Executor for DocumentSource steps."""
13
+
14
+ def __init__(
15
+ self, step: DocumentSource, context: ExecutorContext, **dependencies
16
+ ):
17
+ super().__init__(step, context, **dependencies)
18
+ if not isinstance(step, DocumentSource):
19
+ raise ValueError(
20
+ (
21
+ "DocumentSourceExecutor can only execute "
22
+ "DocumentSource steps."
23
+ )
24
+ )
25
+ self.step: DocumentSource = step
26
+ # Initialize the reader class once for the executor
27
+ self.reader_class = self._load_reader_class()
28
+
29
+ def _load_reader_class(self) -> type:
30
+ """Load the LlamaIndex reader class dynamically.
31
+
32
+ Returns:
33
+ The reader class.
34
+
35
+ Raises:
36
+ ImportError: If the reader class cannot be imported.
37
+ """
38
+ # Parse the reader module path
39
+ # Format: 'file.SimpleDirectoryReader' -> llama_index.readers.file + SimpleDirectoryReader
40
+ # Special case: 'file.SimpleDirectoryReader' is actually in llama_index.core
41
+ parts = self.step.reader_module.split(".")
42
+ module_path = ".".join(parts[:-1])
43
+ class_name = parts[-1]
44
+
45
+ # Dynamically import the reader module and get the class
46
+ try:
47
+ module = importlib.import_module(module_path)
48
+ reader_class = getattr(module, class_name)
49
+ return reader_class
50
+ except (ImportError, AttributeError) as e:
51
+ raise ImportError(
52
+ (
53
+ f"Failed to import reader class '{class_name}' "
54
+ f"from '{module_path}': {e}"
55
+ )
56
+ ) from e
57
+
58
+ async def process_message(
59
+ self,
60
+ message: FlowMessage,
61
+ ) -> AsyncIterator[FlowMessage]:
62
+ """Process a single FlowMessage for the DocumentSource step.
63
+
64
+ Args:
65
+ message: The FlowMessage to process.
66
+ Yields:
67
+ FlowMessages with loaded documents.
68
+ """
69
+ output_id = self.step.outputs[0].id
70
+
71
+ try:
72
+ # Resolve any SecretReferences in step args
73
+ context = f"step '{self.step.id}'"
74
+ resolved_args = self._secret_manager.resolve_secrets_in_dict(
75
+ self.step.args, context
76
+ )
77
+
78
+ # Combine resolved step args with message variables as runtime args
79
+ runtime_args = {
80
+ key: message.variables.get(key)
81
+ for key in message.variables.keys()
82
+ }
83
+ combined_args = {**resolved_args, **runtime_args}
84
+
85
+ # Instantiate the reader with combined arguments
86
+ loader = self.reader_class(**combined_args)
87
+
88
+ # Load documents using the loader
89
+ if not hasattr(loader, "load_data"):
90
+ raise AttributeError(
91
+ (
92
+ f"Reader class '{self.reader_class.__name__}' "
93
+ "does not have a 'load_data' method"
94
+ )
95
+ )
96
+ load_args = self.step.loader_args or {}
97
+
98
+ llama_documents = loader.load_data(**load_args)
99
+
100
+ # Convert LlamaIndex Documents to RAGDocuments
101
+ rag_documents = [
102
+ from_llama_document(doc) for doc in llama_documents
103
+ ]
104
+
105
+ # Emit feedback about total documents loaded
106
+ await self.stream_emitter.status(
107
+ f"Loaded {len(rag_documents)} documents"
108
+ )
109
+
110
+ # Yield one message per document (fan-out)
111
+ for doc in rag_documents:
112
+ yield message.copy_with_variables({output_id: doc})
113
+
114
+ except Exception as e:
115
+ # Emit error event to stream so frontend can display it
116
+ await self.stream_emitter.error(str(e))
117
+ message.set_error(self.step.id, e)
118
+ yield message
@@ -0,0 +1,105 @@
1
+ from typing import AsyncIterator
2
+
3
+ from llama_index.core.schema import Document as LlamaDocument
4
+
5
+ from qtype.dsl.domain_types import RAGChunk, RAGDocument
6
+ from qtype.interpreter.base.base_step_executor import StepExecutor
7
+ from qtype.interpreter.base.executor_context import ExecutorContext
8
+ from qtype.interpreter.conversions import to_text_splitter
9
+ from qtype.interpreter.types import FlowMessage
10
+ from qtype.semantic.model import DocumentSplitter
11
+
12
+
13
+ class DocumentSplitterExecutor(StepExecutor):
14
+ """Executor for DocumentSplitter steps."""
15
+
16
+ def __init__(
17
+ self, step: DocumentSplitter, context: ExecutorContext, **dependencies
18
+ ):
19
+ super().__init__(step, context, **dependencies)
20
+ if not isinstance(step, DocumentSplitter):
21
+ raise ValueError(
22
+ (
23
+ "DocumentSplitterExecutor can only execute "
24
+ "DocumentSplitter steps."
25
+ )
26
+ )
27
+ self.step: DocumentSplitter = step
28
+ # Initialize the text splitter once for the executor
29
+ self.llama_splitter = to_text_splitter(self.step)
30
+
31
+ async def process_message(
32
+ self,
33
+ message: FlowMessage,
34
+ ) -> AsyncIterator[FlowMessage]:
35
+ """Process a single FlowMessage for the DocumentSplitter step.
36
+
37
+ Args:
38
+ message: The FlowMessage to process.
39
+
40
+ Yields:
41
+ FlowMessages with document chunks.
42
+ """
43
+ input_id = self.step.inputs[0].id
44
+ output_id = self.step.outputs[0].id
45
+
46
+ try:
47
+ # Get the document from the input
48
+ document = message.variables.get(input_id)
49
+ if not isinstance(document, RAGDocument):
50
+ raise ValueError(
51
+ f"Input variable '{input_id}' must be a RAGDocument"
52
+ )
53
+
54
+ await self.stream_emitter.status(
55
+ f"Splitting document: {document.file_name}"
56
+ )
57
+
58
+ # Convert content to text if needed
59
+ if isinstance(document.content, bytes):
60
+ content_text = document.content.decode("utf-8")
61
+ elif isinstance(document.content, str):
62
+ content_text = document.content
63
+ else:
64
+ raise ValueError(
65
+ (
66
+ f"Unsupported document content type: "
67
+ f"{type(document.content)}"
68
+ )
69
+ )
70
+
71
+ # Convert to LlamaIndex Document for splitting
72
+ llama_doc = LlamaDocument(
73
+ text=content_text,
74
+ metadata={}, # ommit metadata here as it's added back later and we don't want the chunk size checks
75
+ doc_id=document.file_id,
76
+ )
77
+
78
+ # Split the document using the LlamaIndex splitter
79
+ nodes = self.llama_splitter.get_nodes_from_documents([llama_doc])
80
+
81
+ await self.stream_emitter.status(
82
+ f"Split {document.file_name} into {len(nodes)} chunks"
83
+ )
84
+
85
+ # Create a RAGChunk for each node and yield (fan-out)
86
+ for node in nodes:
87
+ merged_metadata = {}
88
+ merged_metadata.update(document.metadata)
89
+ if node.metadata:
90
+ merged_metadata.update(node.metadata)
91
+
92
+ chunk = RAGChunk(
93
+ content=node.text,
94
+ chunk_id=node.node_id,
95
+ document_id=document.file_id,
96
+ vector=None, # Embedding will be added later
97
+ metadata=merged_metadata,
98
+ )
99
+ yield message.copy_with_variables({output_id: chunk})
100
+
101
+ except Exception as e:
102
+ # Emit error event to stream so frontend can display it
103
+ await self.stream_emitter.error(str(e))
104
+ message.set_error(self.step.id, e)
105
+ yield message
@@ -0,0 +1,63 @@
1
+ from typing import AsyncIterator
2
+
3
+ from qtype.interpreter.base.base_step_executor import StepExecutor
4
+ from qtype.interpreter.base.executor_context import ExecutorContext
5
+ from qtype.interpreter.types import FlowMessage
6
+ from qtype.semantic.model import Echo
7
+
8
+
9
+ class EchoExecutor(StepExecutor):
10
+ """Executor for Echo steps.
11
+
12
+ Passes through input variables as outputs without modification.
13
+ Useful for debugging flows by inspecting variable values at specific
14
+ points in the execution pipeline.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ step: Echo,
20
+ context: ExecutorContext,
21
+ **dependencies: object,
22
+ ):
23
+ super().__init__(step, context, **dependencies)
24
+ if not isinstance(step, Echo):
25
+ raise ValueError("EchoExecutor can only execute Echo steps.")
26
+ self.step: Echo = step
27
+
28
+ async def process_message(
29
+ self,
30
+ message: FlowMessage,
31
+ ) -> AsyncIterator[FlowMessage]:
32
+ """Process a single FlowMessage for the Echo step.
33
+
34
+ Reads all input variables from the message and copies them to
35
+ the output variables with the same IDs.
36
+
37
+ Args:
38
+ message: The FlowMessage to process.
39
+
40
+ Yields:
41
+ FlowMessage with the echoed variables.
42
+ """
43
+ try:
44
+ # Build a dict of output variable values by reading from inputs
45
+ output_vars = {}
46
+ for input_var in self.step.inputs:
47
+ value = message.variables.get(input_var.id)
48
+ # Find the corresponding output variable ID (should match)
49
+ for output_var in self.step.outputs:
50
+ if output_var.id == input_var.id:
51
+ output_vars[output_var.id] = value
52
+ break
53
+
54
+ await self.stream_emitter.status(
55
+ f"Echoed {len(output_vars)} variable(s) in step {self.step.id}",
56
+ )
57
+ yield message.copy_with_variables(output_vars)
58
+
59
+ except Exception as e:
60
+ # Emit error event to stream so frontend can display it
61
+ await self.stream_emitter.error(str(e))
62
+ message.set_error(self.step.id, e)
63
+ yield message
@@ -0,0 +1,165 @@
1
+ from typing import Any, AsyncIterator
2
+
3
+ from jsonpath_ng.ext import parse # type: ignore[import-untyped]
4
+ from pydantic import BaseModel
5
+
6
+ from qtype.base.types import PrimitiveTypeEnum
7
+ from qtype.dsl.model import ListType
8
+ from qtype.interpreter.base.base_step_executor import StepExecutor
9
+ from qtype.interpreter.base.executor_context import ExecutorContext
10
+ from qtype.interpreter.types import FlowMessage
11
+ from qtype.semantic.model import FieldExtractor
12
+
13
+
14
+ class FieldExtractorExecutor(StepExecutor):
15
+ """Executor for FieldExtractor steps.
16
+
17
+ Extracts fields from input data using JSONPath expressions and
18
+ constructs output instances. Supports 1-to-many cardinality when
19
+ the JSONPath matches multiple values.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ step: FieldExtractor,
25
+ context: ExecutorContext,
26
+ **dependencies: object,
27
+ ):
28
+ super().__init__(step, context, **dependencies)
29
+ if not isinstance(step, FieldExtractor):
30
+ raise ValueError(
31
+ "FieldExtractorExecutor can only execute FieldExtractor steps."
32
+ )
33
+ self.step: FieldExtractor = step
34
+
35
+ # Parse the JSONPath expression once at initialization
36
+ try:
37
+ self.jsonpath_expr = parse(self.step.json_path)
38
+ except Exception as e:
39
+ raise ValueError(
40
+ f"Invalid JSONPath expression '{self.step.json_path}': {e}"
41
+ ) from e
42
+
43
+ def _to_dict(self, value: Any) -> Any:
44
+ """Convert value to dict representation for JSONPath processing.
45
+
46
+ Args:
47
+ value: The value to convert (could be BaseModel, dict, list, etc.)
48
+
49
+ Returns:
50
+ Dict representation suitable for JSONPath processing
51
+ """
52
+ if isinstance(value, BaseModel):
53
+ return value.model_dump()
54
+ return value
55
+
56
+ def _construct_output(self, extracted_data: Any) -> Any:
57
+ """Construct the output value from extracted data.
58
+
59
+ Args:
60
+ extracted_data: The data extracted by JSONPath
61
+
62
+ Returns:
63
+ Constructed output value based on the output variable type
64
+ """
65
+ output_var = self.step.outputs[0]
66
+ output_type = output_var.type
67
+
68
+ # Handle primitive types - just return the extracted data
69
+ if isinstance(output_type, PrimitiveTypeEnum):
70
+ return extracted_data
71
+
72
+ # Handle list types
73
+ if isinstance(output_type, ListType):
74
+ # The extracted_data should already be a list
75
+ if not isinstance(extracted_data, list):
76
+ extracted_data = [extracted_data]
77
+ return extracted_data
78
+
79
+ # Handle BaseModel types (domain types and custom types)
80
+ if isinstance(output_type, type) and issubclass(
81
+ output_type, BaseModel
82
+ ):
83
+ # If extracted_data is a dict, use it as kwargs
84
+ if isinstance(extracted_data, dict):
85
+ return output_type(**extracted_data)
86
+ # If it's already the right type, return it
87
+ elif isinstance(extracted_data, output_type):
88
+ return extracted_data
89
+ else:
90
+ raise ValueError(
91
+ (
92
+ f"Cannot construct {output_type.__name__} from "
93
+ f"{type(extracted_data).__name__}"
94
+ )
95
+ )
96
+
97
+ # Fallback - return as-is
98
+ return extracted_data
99
+
100
+ async def process_message(
101
+ self,
102
+ message: FlowMessage,
103
+ ) -> AsyncIterator[FlowMessage]:
104
+ """Process a single FlowMessage for the FieldExtractor step.
105
+
106
+ Args:
107
+ message: The FlowMessage to process.
108
+
109
+ Yields:
110
+ FlowMessage(s) with extracted and constructed output values.
111
+ Multiple messages may be yielded if JSONPath matches multiple values.
112
+ """
113
+ input_id = self.step.inputs[0].id
114
+ output_id = self.step.outputs[0].id
115
+
116
+ try:
117
+ # Get the input value
118
+ input_value = message.variables.get(input_id)
119
+ if input_value is None:
120
+ raise ValueError(
121
+ f"Input variable '{input_id}' is not set or is None"
122
+ )
123
+
124
+ await self.stream_emitter.status(
125
+ f"Extracting fields using JSONPath: {self.step.json_path}"
126
+ )
127
+
128
+ # Convert input to dict for JSONPath processing
129
+ input_dict = self._to_dict(input_value)
130
+
131
+ # Apply JSONPath expression
132
+ matches = self.jsonpath_expr.find(input_dict)
133
+
134
+ if not matches:
135
+ if self.step.fail_on_missing:
136
+ raise ValueError(
137
+ (
138
+ f"JSONPath expression '{self.step.json_path}' "
139
+ f"did not match any data in input"
140
+ )
141
+ )
142
+ else:
143
+ # Yield message with None output
144
+ yield message.copy_with_variables({output_id: None})
145
+ return
146
+
147
+ await self.stream_emitter.status(
148
+ f"JSONPath matched {len(matches)} value(s)"
149
+ )
150
+
151
+ # Yield one message per match (1-to-many)
152
+ for match in matches:
153
+ extracted_data = match.value
154
+
155
+ # Construct the output value
156
+ output_value = self._construct_output(extracted_data)
157
+
158
+ # Yield message with the constructed output
159
+ yield message.copy_with_variables({output_id: output_value})
160
+
161
+ except Exception as e:
162
+ # Emit error event to stream so frontend can display it
163
+ await self.stream_emitter.error(str(e))
164
+ message.set_error(self.step.id, e)
165
+ yield message
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import AsyncIterator
5
+
6
+ import fsspec
7
+ import pandas as pd
8
+
9
+ from qtype.interpreter.base.base_step_executor import StepExecutor
10
+ from qtype.interpreter.base.executor_context import ExecutorContext
11
+ from qtype.interpreter.types import FlowMessage
12
+ from qtype.semantic.model import ConstantPath, FileSource
13
+
14
+
15
+ class FileSourceExecutor(StepExecutor):
16
+ """Executor for FileSource steps."""
17
+
18
+ def __init__(
19
+ self, step: FileSource, context: ExecutorContext, **dependencies
20
+ ):
21
+ super().__init__(step, context, **dependencies)
22
+ if not isinstance(step, FileSource):
23
+ raise ValueError(
24
+ "FileSourceExecutor can only execute FileSource steps."
25
+ )
26
+ self.step = step
27
+
28
+ async def process_message(
29
+ self,
30
+ message: FlowMessage,
31
+ ) -> AsyncIterator[FlowMessage]:
32
+ """Process a single FlowMessage for the FileSource step.
33
+
34
+ Args:
35
+ message: The FlowMessage to process.
36
+
37
+ Yields:
38
+ FlowMessages with the results of processing.
39
+ """
40
+ output_columns = {output.id for output in self.step.outputs}
41
+
42
+ # get the path
43
+ if isinstance(self.step.path, ConstantPath):
44
+ file_path = self.step.path
45
+ else:
46
+ file_path = message.variables.get(self.step.path.id)
47
+ if not file_path:
48
+ raise ValueError(
49
+ (
50
+ f"FileSource step {self.step.id} requires a path "
51
+ "variable."
52
+ )
53
+ )
54
+ await self.stream_emitter.status(
55
+ f"Reading file from path: {file_path}"
56
+ )
57
+
58
+ # Determine file format from extension
59
+ file_path_str = (
60
+ file_path.uri if isinstance(file_path, ConstantPath) else file_path
61
+ )
62
+ extension = Path(file_path_str).suffix.lower()
63
+
64
+ # Use fsspec to open the file and read with pandas
65
+ with fsspec.open(file_path_str, "rb") as file_handle:
66
+ if extension == ".csv":
67
+ df = pd.read_csv(file_handle) # type: ignore[arg-type]
68
+ elif extension == ".parquet":
69
+ df = pd.read_parquet(file_handle) # type: ignore[arg-type]
70
+ elif extension == ".json":
71
+ df = pd.read_json(file_handle) # type: ignore[arg-type]
72
+ elif extension == ".jsonl":
73
+ df = pd.read_json(
74
+ file_handle,
75
+ lines=True, # type: ignore[arg-type]
76
+ )
77
+ else:
78
+ # Default to parquet if no extension or unknown
79
+ df = pd.read_parquet(file_handle) # type: ignore[arg-type]
80
+
81
+ # confirm the outputs exist in the dataframe
82
+ columns = set(df.columns)
83
+ missing_columns = output_columns - columns
84
+ if missing_columns:
85
+ raise ValueError(
86
+ (
87
+ f"File {file_path_str} missing expected columns: "
88
+ f"{', '.join(missing_columns)}. Available columns: "
89
+ f"{', '.join(columns)}"
90
+ )
91
+ )
92
+
93
+ for row in df.to_dict(orient="records"):
94
+ # Filter to only the expected output columns if they exist
95
+ row = {
96
+ str(k): v for k, v in row.items() if str(k) in output_columns
97
+ }
98
+ yield message.copy_with_variables(new_variables=row)
99
+ await self.stream_emitter.status(
100
+ f"Emitted {len(df)} rows from: {file_path_str}"
101
+ )