qtype 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. qtype/base/types.py +1 -9
  2. qtype/base/ui_shapes.py +38 -0
  3. qtype/dsl/model.py +43 -18
  4. qtype/interpreter/base/base_step_executor.py +10 -10
  5. qtype/interpreter/base/factory.py +6 -0
  6. qtype/interpreter/base/step_cache.py +3 -3
  7. qtype/interpreter/conversions.py +7 -4
  8. qtype/interpreter/executors/agent_executor.py +1 -2
  9. qtype/interpreter/executors/bedrock_reranker_executor.py +1 -2
  10. qtype/interpreter/executors/collect_executor.py +76 -0
  11. qtype/interpreter/executors/construct_executor.py +68 -0
  12. qtype/interpreter/executors/decoder_executor.py +1 -2
  13. qtype/interpreter/executors/doc_to_text_executor.py +1 -2
  14. qtype/interpreter/executors/document_embedder_executor.py +1 -2
  15. qtype/interpreter/executors/document_search_executor.py +1 -2
  16. qtype/interpreter/executors/document_source_executor.py +1 -2
  17. qtype/interpreter/executors/document_splitter_executor.py +1 -2
  18. qtype/interpreter/executors/echo_executor.py +1 -2
  19. qtype/interpreter/executors/explode_executor.py +49 -0
  20. qtype/interpreter/executors/field_extractor_executor.py +1 -2
  21. qtype/interpreter/executors/file_writer_executor.py +12 -13
  22. qtype/interpreter/executors/index_upsert_executor.py +3 -4
  23. qtype/interpreter/executors/invoke_embedding_executor.py +1 -2
  24. qtype/interpreter/executors/invoke_tool_executor.py +1 -2
  25. qtype/interpreter/executors/llm_inference_executor.py +1 -2
  26. qtype/interpreter/executors/prompt_template_executor.py +1 -2
  27. qtype/interpreter/executors/sql_source_executor.py +1 -2
  28. qtype/interpreter/executors/vector_search_executor.py +1 -2
  29. qtype/interpreter/flow.py +5 -2
  30. qtype/interpreter/types.py +18 -10
  31. qtype/interpreter/typing.py +53 -4
  32. qtype/semantic/checker.py +83 -0
  33. qtype/semantic/generate.py +0 -1
  34. qtype/semantic/model.py +25 -18
  35. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/METADATA +1 -1
  36. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/RECORD +40 -36
  37. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/WHEEL +0 -0
  38. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/entry_points.txt +0 -0
  39. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/licenses/LICENSE +0 -0
  40. {qtype-0.1.7.dist-info → qtype-0.1.8.dist-info}/top_level.txt +0 -0
qtype/base/types.py CHANGED
@@ -54,21 +54,13 @@ class PrimitiveTypeEnum(str, Enum):
54
54
  citation_url = "citation_url"
55
55
  date = "date"
56
56
  datetime = "datetime"
57
- int = "int"
58
57
  file = "file"
59
58
  float = "float"
60
59
  image = "image"
60
+ int = "int"
61
61
  text = "text"
62
62
  time = "time"
63
63
  video = "video"
64
- thinking = "thinking"
65
-
66
-
67
- class StepCardinality(str, Enum):
68
- """Does this step emit 1 (one) or 0...N (many) items?"""
69
-
70
- one = "one"
71
- many = "many"
72
64
 
73
65
 
74
66
  ReferenceT = TypeVar("ReferenceT")
@@ -0,0 +1,38 @@
1
+ from enum import Enum
2
+ from typing import Union
3
+
4
+ from pydantic import BaseModel
5
+ from pydantic import ConfigDict as PydanticConfigDict
6
+ from pydantic import Field
7
+
8
+ from qtype.base.types import PrimitiveTypeEnum
9
+
10
+
11
+ class TextWidget(str, Enum):
12
+ text = "text" # Simple text widget input, default
13
+ textarea = "textarea" # A paragraph editor
14
+
15
+
16
+ class TextInputUI(BaseModel):
17
+ model_config = PydanticConfigDict(extra="forbid")
18
+
19
+ widget: TextWidget = Field(
20
+ default=TextWidget.text,
21
+ description="What kind of text ui to present",
22
+ )
23
+
24
+
25
+ class FileUploadUI(BaseModel):
26
+ model_config = PydanticConfigDict(extra="forbid")
27
+ accept: str = Field(
28
+ default="*/*",
29
+ description="The mime type(s) to accept in the file upload.",
30
+ )
31
+
32
+
33
+ UIType = Union[TextInputUI, FileUploadUI]
34
+
35
+ UI_INPUT_TO_TYPE = {
36
+ (TextInputUI, PrimitiveTypeEnum.text),
37
+ (FileUploadUI, PrimitiveTypeEnum.file),
38
+ }
qtype/dsl/model.py CHANGED
@@ -23,9 +23,9 @@ from qtype.base.types import (
23
23
  ConcurrentStepMixin,
24
24
  PrimitiveTypeEnum,
25
25
  Reference,
26
- StepCardinality,
27
26
  StrictBaseModel,
28
27
  )
28
+ from qtype.base.ui_shapes import UI_INPUT_TO_TYPE, UIType
29
29
  from qtype.dsl.domain_types import (
30
30
  ChatContent,
31
31
  ChatMessage,
@@ -212,12 +212,24 @@ class Variable(StrictBaseModel):
212
212
  ),
213
213
  )
214
214
 
215
+ ui: UIType | None = Field(None, description="Hints for the UI if needed.")
216
+
215
217
  @model_validator(mode="before")
216
218
  @classmethod
217
219
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
218
220
  """Resolve string-based type references using the shared validator."""
219
221
  return _resolve_type_field_validator(data, info)
220
222
 
223
+ @model_validator(mode="after")
224
+ def validate_ui_type(self) -> Variable:
225
+ """Ensure at least one credential source is provided."""
226
+ if self.ui is not None:
227
+ if (type(self.ui), self.type) not in UI_INPUT_TO_TYPE:
228
+ raise ValueError(
229
+ f"Variable of {self.type} is not comptabile with UI configuration {self.ui}"
230
+ )
231
+ return self
232
+
221
233
 
222
234
  class SecretReference(StrictBaseModel):
223
235
  """
@@ -350,10 +362,6 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
350
362
 
351
363
  id: str = Field(..., description="Unique ID of this component.")
352
364
  type: str = Field(..., description="Type of the step component.")
353
- cardinality: StepCardinality = Field(
354
- default=StepCardinality.one,
355
- description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
356
- )
357
365
  inputs: list[Reference[Variable] | str] = Field(
358
366
  default_factory=list,
359
367
  description="References to the variables required by this step.",
@@ -364,6 +372,33 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
364
372
  )
365
373
 
366
374
 
375
+ class Explode(Step):
376
+ """A step that takes a list input and produces multiple outputs, one per item in the list."""
377
+
378
+ type: Literal["Explode"] = "Explode"
379
+
380
+
381
+ class Collect(Step, BatchableStepMixin):
382
+ """A step that collects all inputs and creates a single list to return."""
383
+
384
+ type: Literal["Collect"] = "Collect"
385
+
386
+ batch_config: BatchConfig = Field(
387
+ default_factory=partial(BatchConfig, batch_size=sys.maxsize),
388
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
389
+ )
390
+
391
+
392
+ class Construct(Step):
393
+ """A step that converts variables into an instance of a Custom or Domain Type"""
394
+
395
+ type: Literal["Construct"] = "Construct"
396
+ field_mapping: dict[str, str] = Field(
397
+ ...,
398
+ description="Mapping of type inputs to variable names, if needed.",
399
+ )
400
+
401
+
367
402
  class PromptTemplate(Step):
368
403
  """Defines a prompt template with a string format and variable bindings.
369
404
  This is used to generate prompts dynamically based on input variables."""
@@ -892,10 +927,6 @@ class Source(Step):
892
927
  """Base class for data sources"""
893
928
 
894
929
  id: str = Field(..., description="Unique ID of the data source.")
895
- cardinality: Literal[StepCardinality.many] = Field(
896
- default=StepCardinality.many,
897
- description="Sources always emit 0...N instances of the outputs.",
898
- )
899
930
 
900
931
 
901
932
  class SQLSource(Source):
@@ -952,7 +983,6 @@ class Aggregate(Step):
952
983
  """
953
984
 
954
985
  type: Literal["Aggregate"] = "Aggregate"
955
- cardinality: Literal[StepCardinality.one] = StepCardinality.one
956
986
 
957
987
  # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
958
988
  # The order will be: success_count, error_count, total_count
@@ -1006,10 +1036,6 @@ class DocumentSplitter(Step, ConcurrentStepMixin):
1006
1036
  """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
1007
1037
 
1008
1038
  type: Literal["DocumentSplitter"] = "DocumentSplitter"
1009
- cardinality: Literal[StepCardinality.many] = Field(
1010
- default=StepCardinality.many,
1011
- description="Consumes one document and emits 0...N nodes/chunks.",
1012
- )
1013
1039
 
1014
1040
  splitter_name: str = Field(
1015
1041
  default="SentenceSplitter",
@@ -1029,10 +1055,6 @@ class DocumentEmbedder(Step, ConcurrentStepMixin):
1029
1055
  """Embeds document chunks using a specified embedding model."""
1030
1056
 
1031
1057
  type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
1032
- cardinality: Literal[StepCardinality.many] = Field(
1033
- default=StepCardinality.many,
1034
- description="Consumes one chunk and emits one embedded chunk.",
1035
- )
1036
1058
  model: Reference[EmbeddingModel] | str = Field(
1037
1059
  ..., description="Embedding model to use for vectorization."
1038
1060
  )
@@ -1200,6 +1222,8 @@ StepType = Annotated[
1200
1222
  Agent,
1201
1223
  Aggregate,
1202
1224
  BedrockReranker,
1225
+ Collect,
1226
+ Construct,
1203
1227
  Decoder,
1204
1228
  DocToTextConverter,
1205
1229
  DocumentEmbedder,
@@ -1207,6 +1231,7 @@ StepType = Annotated[
1207
1231
  DocumentSplitter,
1208
1232
  DocumentSource,
1209
1233
  Echo,
1234
+ Explode,
1210
1235
  FieldExtractor,
1211
1236
  FileSource,
1212
1237
  FileWriter,
@@ -230,21 +230,12 @@ class StepExecutor(ABC):
230
230
  prepared_messages, process_item, task_limit=num_workers
231
231
  )
232
232
 
233
- # Combine all streams
234
- async def emit_failed_messages() -> AsyncIterator[FlowMessage]:
235
- for msg in failed_messages:
236
- yield msg
237
-
238
- all_results = stream.concat(
239
- stream.iterate([result_stream, emit_failed_messages()])
240
- )
241
-
242
233
  # Track message counts for telemetry
243
234
  message_count = 0
244
235
  error_count = 0
245
236
 
246
237
  # Stream results and track progress
247
- async with all_results.stream() as streamer:
238
+ async with result_stream.stream() as streamer:
248
239
  result: FlowMessage
249
240
  async for result in streamer:
250
241
  message_count += 1
@@ -255,6 +246,15 @@ class StepExecutor(ABC):
255
246
  )
256
247
  yield result
257
248
 
249
+ # Emit failed messages after processing completes
250
+ for msg in failed_messages:
251
+ message_count += 1
252
+ error_count += 1
253
+ self.progress.update_for_message(
254
+ msg, self.context.on_progress
255
+ )
256
+ yield msg
257
+
258
258
  # Finalize and track those messages too
259
259
  async for msg in self.finalize():
260
260
  message_count += 1
@@ -2,6 +2,8 @@ from qtype.semantic.model import (
2
2
  Agent,
3
3
  Aggregate,
4
4
  BedrockReranker,
5
+ Collect,
6
+ Construct,
5
7
  Decoder,
6
8
  DocToTextConverter,
7
9
  DocumentEmbedder,
@@ -9,6 +11,7 @@ from qtype.semantic.model import (
9
11
  DocumentSource,
10
12
  DocumentSplitter,
11
13
  Echo,
14
+ Explode,
12
15
  FieldExtractor,
13
16
  FileSource,
14
17
  FileWriter,
@@ -32,6 +35,8 @@ EXECUTOR_REGISTRY = {
32
35
  Agent: "qtype.interpreter.executors.agent_executor.AgentExecutor",
33
36
  Aggregate: "qtype.interpreter.executors.aggregate_executor.AggregateExecutor",
34
37
  BedrockReranker: "qtype.interpreter.executors.bedrock_reranker_executor.BedrockRerankerExecutor",
38
+ Collect: "qtype.interpreter.executors.collect_executor.CollectExecutor",
39
+ Construct: "qtype.interpreter.executors.construct_executor.ConstructExecutor",
35
40
  Decoder: "qtype.interpreter.executors.decoder_executor.DecoderExecutor",
36
41
  DocToTextConverter: "qtype.interpreter.executors.doc_to_text_executor.DocToTextConverterExecutor",
37
42
  DocumentEmbedder: "qtype.interpreter.executors.document_embedder_executor.DocumentEmbedderExecutor",
@@ -39,6 +44,7 @@ EXECUTOR_REGISTRY = {
39
44
  DocumentSource: "qtype.interpreter.executors.document_source_executor.DocumentSourceExecutor",
40
45
  DocumentSplitter: "qtype.interpreter.executors.document_splitter_executor.DocumentSplitterExecutor",
41
46
  Echo: "qtype.interpreter.executors.echo_executor.EchoExecutor",
47
+ Explode: "qtype.interpreter.executors.explode_executor.ExplodeExecutor",
42
48
  FieldExtractor: "qtype.interpreter.executors.field_extractor_executor.FieldExtractorExecutor",
43
49
  FileSource: "qtype.interpreter.executors.file_source_executor.FileSourceExecutor",
44
50
  FileWriter: "qtype.interpreter.executors.file_writer_executor.FileWriterExecutor",
@@ -67,8 +67,8 @@ def from_cache_value(
67
67
  ) -> FlowMessage:
68
68
  """Reconstructs a FlowMessage from cached output values."""
69
69
  if "FlowMessage.__error__" in cache_value:
70
- msg = message.model_copy(deep=True)
71
- msg.error = cache_value["FlowMessage.__error__"]
72
- return msg
70
+ return message.model_copy(
71
+ deep=True, update={"error": cache_value["FlowMessage.__error__"]}
72
+ )
73
73
  else:
74
74
  return message.copy_with_variables(cache_value)
@@ -18,7 +18,8 @@ from llama_index.core.base.llms.types import (
18
18
  from llama_index.core.memory import Memory as LlamaMemory
19
19
  from llama_index.core.schema import Document as LlamaDocument
20
20
  from llama_index.core.vector_stores.types import BasePydanticVectorStore
21
- from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
21
+ from opensearchpy import AsyncHttpConnection, AsyncOpenSearch
22
+ from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth
22
23
 
23
24
  from qtype.base.types import PrimitiveTypeEnum
24
25
  from qtype.dsl.domain_types import (
@@ -369,7 +370,7 @@ def to_opensearch_client(
369
370
  InterpreterError: If authentication fails or configuration is invalid
370
371
  """
371
372
  client_kwargs: dict[str, Any] = {
372
- "hosts": [index.endpoint],
373
+ "hosts": index.endpoint,
373
374
  **index.args,
374
375
  }
375
376
 
@@ -390,15 +391,17 @@ def to_opensearch_client(
390
391
  f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
391
392
  )
392
393
 
393
- # Use opensearch-py's built-in AWS auth
394
- aws_auth = AWSV4SignerAuth(
394
+ # Use opensearch-py's async AWS auth
395
+ aws_auth = AWSV4SignerAsyncAuth(
395
396
  credentials,
396
397
  auth_session.region_name or "us-east-1", # type: ignore
398
+ "aoss", # service name for OpenSearch Serverless
397
399
  )
398
400
 
399
401
  client_kwargs["http_auth"] = aws_auth
400
402
  client_kwargs["use_ssl"] = True
401
403
  client_kwargs["verify_certs"] = True
404
+ client_kwargs["connection_class"] = AsyncHttpConnection
402
405
  else:
403
406
  raise InterpreterError(
404
407
  f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
@@ -93,8 +93,7 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
93
93
  logger.error(f"Agent execution failed: {e}", exc_info=True)
94
94
  # Emit error event to stream so frontend can display it
95
95
  await self.stream_emitter.error(str(e))
96
- message.set_error(self.step.id, e)
97
- yield message
96
+ yield message.copy_with_error(self.step.id, e)
98
97
 
99
98
  async def _process_chat(
100
99
  self,
@@ -160,8 +160,7 @@ class BedrockRerankerExecutor(StepExecutor):
160
160
  logger.error(f"Reranking failed: {e}", exc_info=True)
161
161
  # Emit error event to stream so frontend can display it
162
162
  await self.stream_emitter.error(str(e))
163
- message.set_error(self.step.id, e)
164
- yield message
163
+ yield message.copy_with_error(self.step.id, e)
165
164
 
166
165
  def _query(self, message: FlowMessage) -> str:
167
166
  """Extract the query string from the FlowMessage.
@@ -0,0 +1,76 @@
1
+ from typing import Any, AsyncIterator
2
+
3
+ from qtype.interpreter.base.batch_step_executor import BatchedStepExecutor
4
+ from qtype.interpreter.base.executor_context import ExecutorContext
5
+ from qtype.interpreter.types import FlowMessage
6
+ from qtype.semantic.model import Collect
7
+
8
+
9
+ def _find_common_ancestors(messages: list[FlowMessage]) -> dict[str, Any]:
10
+ if not messages:
11
+ return {}
12
+
13
+ # 1. Start with all variables from the first message
14
+ common_vars = messages[0].variables.copy()
15
+
16
+ for msg in messages[1:]:
17
+ # 2. Identify keys that either don't exist in this message
18
+ # OR have a different value (diverged)
19
+ diverged_keys = [
20
+ k
21
+ for k, v in common_vars.items()
22
+ if k not in msg.variables or msg.variables[k] != v
23
+ ]
24
+ # 3. Remove diverged keys to leave only the "Common Ancestors"
25
+ for k in diverged_keys:
26
+ common_vars.pop(k)
27
+
28
+ return common_vars
29
+
30
+
31
+ class CollectExecutor(BatchedStepExecutor):
32
+ """Executor for Collect steps."""
33
+
34
+ def __init__(
35
+ self,
36
+ step: Collect,
37
+ context: ExecutorContext,
38
+ **dependencies,
39
+ ):
40
+ super().__init__(step, context, **dependencies)
41
+ if not isinstance(step, Collect):
42
+ raise ValueError("CollectExecutor can only execute Collect steps.")
43
+ self.step = step
44
+
45
+ async def process_batch(
46
+ self,
47
+ batch: list[FlowMessage],
48
+ ) -> AsyncIterator[FlowMessage]:
49
+ """Process a batch of FlowMessages for the Collect step.
50
+
51
+ Args:
52
+ batch: A list of FlowMessages to process.
53
+
54
+ Yields:
55
+ FlowMessages with the results of processing.
56
+ """
57
+
58
+ # Note that the batch processor accumulates the messages that we need,
59
+ # so this function isn't called until collection is ready.
60
+
61
+ # outputs[0] and inputs[0] is safe here since semantic validation ensures only one output
62
+ output_name = self.step.outputs[0].id
63
+ input_name = self.step.inputs[0].id
64
+
65
+ if len(batch) == 0:
66
+ # No messages to process -- yield nothing
67
+ return
68
+
69
+ results = []
70
+ for msg in batch:
71
+ results.append(msg.variables[input_name])
72
+
73
+ # Only variables common to all input messages are propagated at the end
74
+ common_ancestors = _find_common_ancestors(batch)
75
+ new_variables = {output_name: results} | common_ancestors
76
+ yield FlowMessage(session=batch[0].session, variables=new_variables)
@@ -0,0 +1,68 @@
1
+ from typing import AsyncIterator
2
+
3
+ from qtype.dsl.model import ListType
4
+ from qtype.interpreter.base.base_step_executor import StepExecutor
5
+ from qtype.interpreter.base.executor_context import ExecutorContext
6
+ from qtype.interpreter.types import FlowMessage
7
+ from qtype.interpreter.typing import instantiate_variable
8
+ from qtype.semantic.model import Construct
9
+
10
+
11
+ class ConstructExecutor(StepExecutor):
12
+ """Executor for Construct steps."""
13
+
14
+ def __init__(
15
+ self,
16
+ step: Construct,
17
+ context: ExecutorContext,
18
+ **dependencies,
19
+ ):
20
+ super().__init__(step, context, **dependencies)
21
+ if not isinstance(step, Construct):
22
+ raise ValueError(
23
+ "ConstructExecutor can only execute Construct steps."
24
+ )
25
+ self.step = step
26
+
27
+ async def process_message(
28
+ self,
29
+ message: FlowMessage,
30
+ ) -> AsyncIterator[FlowMessage]:
31
+ """Process a FlowMessage for the Construct step.
32
+
33
+ Args:
34
+ message: The FlowMessage to process.
35
+ Yields:
36
+ FlowMessages with the results of processing.
37
+ """
38
+ try:
39
+ # Safe since semantic validation ensures exactly one output variable
40
+ output_var = self.step.outputs[0]
41
+
42
+ if (
43
+ isinstance(output_var.type, ListType)
44
+ or len(self.step.inputs) == 1
45
+ ):
46
+ inputs = message.variables[self.step.inputs[0].id]
47
+ elif hasattr(output_var.type, "model_validate"):
48
+ # This is a custom type (Pydantic model)
49
+ # So input should be a dict
50
+ input_values = {
51
+ input_var.id: message.variables[input_var.id]
52
+ for input_var in self.step.inputs
53
+ }
54
+ # use the mapping to convert variable names to
55
+ inputs = {
56
+ self.step.field_mapping.get(var_name, var_name): value
57
+ for var_name, value in input_values.items()
58
+ }
59
+ else:
60
+ raise ValueError(
61
+ "Construct step must have either a single input or output of a custom type."
62
+ )
63
+ constructed_value = instantiate_variable(output_var, inputs)
64
+ yield message.copy_with_variables(
65
+ {output_var.id: constructed_value}
66
+ )
67
+ except Exception as e:
68
+ yield message.copy_with_error(self.step.id, e)
@@ -159,5 +159,4 @@ class DecoderExecutor(StepExecutor):
159
159
  except Exception as e:
160
160
  # Emit error event to stream so frontend can display it
161
161
  await self.stream_emitter.error(str(e))
162
- message.set_error(self.step.id, e)
163
- yield message
162
+ yield message.copy_with_error(self.step.id, e)
@@ -74,8 +74,7 @@ class DocToTextConverterExecutor(StepExecutor):
74
74
  except Exception as e:
75
75
  # Emit error event to stream so frontend can display it
76
76
  await self.stream_emitter.error(str(e))
77
- message.set_error(self.step.id, e)
78
- yield message
77
+ yield message.copy_with_error(self.step.id, e)
79
78
 
80
79
  def _convert_doc(self, doc: RAGDocument) -> RAGDocument:
81
80
  """Convert a RAGDocument to text/markdown format.
@@ -119,5 +119,4 @@ class DocumentEmbedderExecutor(StepExecutor):
119
119
  f"Error processing DocumentEmbedder step {self.step.id}",
120
120
  exc_info=e,
121
121
  )
122
- message.set_error(self.step.id, e)
123
- yield message
122
+ yield message.copy_with_error(self.step.id, e)
@@ -109,5 +109,4 @@ class DocumentSearchExecutor(StepExecutor):
109
109
  except Exception as e:
110
110
  # Emit error event to stream so frontend can display it
111
111
  await self.stream_emitter.error(str(e))
112
- message.set_error(self.step.id, e)
113
- yield message
112
+ yield message.copy_with_error(self.step.id, e)
@@ -114,5 +114,4 @@ class DocumentSourceExecutor(StepExecutor):
114
114
  except Exception as e:
115
115
  # Emit error event to stream so frontend can display it
116
116
  await self.stream_emitter.error(str(e))
117
- message.set_error(self.step.id, e)
118
- yield message
117
+ yield message.copy_with_error(self.step.id, e)
@@ -101,5 +101,4 @@ class DocumentSplitterExecutor(StepExecutor):
101
101
  except Exception as e:
102
102
  # Emit error event to stream so frontend can display it
103
103
  await self.stream_emitter.error(str(e))
104
- message.set_error(self.step.id, e)
105
- yield message
104
+ yield message.copy_with_error(self.step.id, e)
@@ -59,5 +59,4 @@ class EchoExecutor(StepExecutor):
59
59
  except Exception as e:
60
60
  # Emit error event to stream so frontend can display it
61
61
  await self.stream_emitter.error(str(e))
62
- message.set_error(self.step.id, e)
63
- yield message
62
+ yield message.copy_with_error(self.step.id, e)
@@ -0,0 +1,49 @@
1
+ from typing import AsyncIterator
2
+
3
+ from qtype.interpreter.base.base_step_executor import StepExecutor
4
+ from qtype.interpreter.base.executor_context import ExecutorContext
5
+ from qtype.interpreter.types import FlowMessage
6
+ from qtype.semantic.model import Explode
7
+
8
+
9
+ class ExplodeExecutor(StepExecutor):
10
+ """Executor for Explode steps."""
11
+
12
+ def __init__(
13
+ self,
14
+ step: Explode,
15
+ context: ExecutorContext,
16
+ **dependencies,
17
+ ):
18
+ super().__init__(step, context, **dependencies)
19
+ if not isinstance(step, Explode):
20
+ raise ValueError("ExplodeExecutor can only execute Explode steps.")
21
+ self.step = step
22
+
23
+ async def process_message(
24
+ self,
25
+ message: FlowMessage,
26
+ ) -> AsyncIterator[FlowMessage]:
27
+ """Process a FlowMessage for the Explode step.
28
+
29
+ Args:
30
+ message: A FlowMessage to process.
31
+ Yields:
32
+ FlowMessages with the results of processing.
33
+ """
34
+ try:
35
+ input_name = self.step.inputs[0].id
36
+ output_name = self.step.outputs[0].id
37
+
38
+ input_value = message.variables.get(input_name)
39
+
40
+ if not isinstance(input_value, list):
41
+ raise ValueError(
42
+ f"Explode step expected a list for input '{input_name}', "
43
+ f"but got: {type(input_value).__name__}"
44
+ )
45
+
46
+ for item in input_value:
47
+ yield message.copy_with_variables({output_name: item})
48
+ except Exception as e:
49
+ yield message.copy_with_error(self.step.id, e)
@@ -161,5 +161,4 @@ class FieldExtractorExecutor(StepExecutor):
161
161
  except Exception as e:
162
162
  # Emit error event to stream so frontend can display it
163
163
  await self.stream_emitter.error(str(e))
164
- message.set_error(self.step.id, e)
165
- yield message
164
+ yield message.copy_with_error(self.step.id, e)
@@ -6,6 +6,7 @@ import pandas as pd
6
6
 
7
7
  from qtype.interpreter.base.batch_step_executor import BatchedStepExecutor
8
8
  from qtype.interpreter.base.executor_context import ExecutorContext
9
+ from qtype.interpreter.executors.collect_executor import _find_common_ancestors
9
10
  from qtype.interpreter.types import FlowMessage
10
11
  from qtype.semantic.model import ConstantPath, FileWriter, Variable
11
12
 
@@ -59,12 +60,11 @@ class FileWriterExecutor(BatchedStepExecutor):
59
60
  await self.stream_emitter.status(
60
61
  f"Wrote {len(df)} records to {file_path}"
61
62
  )
62
- for msg in batch:
63
- yield (
64
- msg
65
- if not output_name
66
- else msg.copy_with_variables({output_name: file_path})
67
- )
63
+ # Identify the common ancestors to propagate
64
+ result_vars = _find_common_ancestors(batch)
65
+ result_vars[output_name] = file_path # type: ignore[index]
66
+ yield FlowMessage(session=batch[0].session, variables=result_vars)
67
+
68
68
  else:
69
69
  # Group messages by file path (path is a Variable in this branch)
70
70
  if not isinstance(self.step.path, Variable):
@@ -101,10 +101,9 @@ class FileWriterExecutor(BatchedStepExecutor):
101
101
  await self.stream_emitter.status(
102
102
  f"Wrote {len(df_group)} records to {file_path}"
103
103
  )
104
- # Re-yield the original messages for this group
105
- for msg in batch:
106
- yield (
107
- msg
108
- if not output_name
109
- else msg.copy_with_variables({output_name: file_path})
110
- )
104
+ # Identify the common ancestors to propagate
105
+ result_vars = _find_common_ancestors(msg_list)
106
+ result_vars[output_name] = file_path # type: ignore[index]
107
+ yield FlowMessage(
108
+ session=msg_list[0].session, variables=result_vars
109
+ )