qtype 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. qtype/application/facade.py +16 -17
  2. qtype/cli.py +5 -1
  3. qtype/commands/generate.py +1 -1
  4. qtype/commands/run.py +28 -5
  5. qtype/dsl/domain_types.py +24 -3
  6. qtype/dsl/model.py +56 -3
  7. qtype/interpreter/base/base_step_executor.py +8 -1
  8. qtype/interpreter/base/executor_context.py +18 -1
  9. qtype/interpreter/base/factory.py +33 -66
  10. qtype/interpreter/base/progress_tracker.py +35 -0
  11. qtype/interpreter/base/step_cache.py +3 -2
  12. qtype/interpreter/conversions.py +34 -19
  13. qtype/interpreter/converters.py +19 -13
  14. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  15. qtype/interpreter/executors/document_embedder_executor.py +36 -4
  16. qtype/interpreter/executors/document_search_executor.py +37 -46
  17. qtype/interpreter/executors/document_splitter_executor.py +1 -1
  18. qtype/interpreter/executors/field_extractor_executor.py +10 -5
  19. qtype/interpreter/executors/index_upsert_executor.py +115 -111
  20. qtype/interpreter/executors/invoke_embedding_executor.py +2 -2
  21. qtype/interpreter/executors/invoke_tool_executor.py +6 -1
  22. qtype/interpreter/flow.py +47 -32
  23. qtype/interpreter/rich_progress.py +225 -0
  24. qtype/interpreter/types.py +2 -0
  25. qtype/semantic/checker.py +79 -19
  26. qtype/semantic/model.py +43 -3
  27. qtype/semantic/resolver.py +4 -2
  28. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/METADATA +12 -11
  29. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/RECORD +33 -31
  30. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/WHEEL +0 -0
  31. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/entry_points.txt +0 -0
  32. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/licenses/LICENSE +0 -0
  33. {qtype-0.1.0.dist-info → qtype-0.1.2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import importlib
4
+ import uuid
4
5
  from typing import Any
5
6
 
6
7
  from llama_index.core.base.embeddings.base import BaseEmbedding
@@ -17,10 +18,15 @@ from llama_index.core.base.llms.types import (
17
18
  from llama_index.core.memory import Memory as LlamaMemory
18
19
  from llama_index.core.schema import Document as LlamaDocument
19
20
  from llama_index.core.vector_stores.types import BasePydanticVectorStore
20
- from opensearchpy import AWSV4SignerAuth, OpenSearch
21
+ from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
21
22
 
22
23
  from qtype.base.types import PrimitiveTypeEnum
23
- from qtype.dsl.domain_types import ChatContent, ChatMessage, RAGDocument
24
+ from qtype.dsl.domain_types import (
25
+ ChatContent,
26
+ ChatMessage,
27
+ RAGDocument,
28
+ RAGSearchResult,
29
+ )
24
30
  from qtype.dsl.model import Memory
25
31
  from qtype.interpreter.auth.aws import aws
26
32
  from qtype.interpreter.auth.generic import auth
@@ -305,7 +311,8 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
305
311
  )
306
312
 
307
313
  bedrock_embedding: BaseEmbedding = BedrockEmbedding(
308
- model_name=model.model_id if model.model_id else model.id
314
+ model_name=model.model_id if model.model_id else model.id,
315
+ max_retries=100,
309
316
  )
310
317
  return bedrock_embedding
311
318
  elif model.provider == "openai":
@@ -326,7 +333,7 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
326
333
  @cached_resource
327
334
  def to_opensearch_client(
328
335
  index: DocumentIndex, secret_manager: SecretManagerBase
329
- ) -> OpenSearch:
336
+ ) -> AsyncOpenSearch:
330
337
  """
331
338
  Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
332
339
 
@@ -375,7 +382,7 @@ def to_opensearch_client(
375
382
  f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
376
383
  )
377
384
 
378
- return OpenSearch(**client_kwargs)
385
+ return AsyncOpenSearch(**client_kwargs)
379
386
 
380
387
 
381
388
  def to_content_block(content: ChatContent) -> ContentBlock:
@@ -506,26 +513,30 @@ def to_text_splitter(splitter: DocumentSplitter) -> Any:
506
513
  Raises:
507
514
  InterpreterError: If the splitter class cannot be found or instantiated.
508
515
  """
509
- from llama_index.core.node_parser import SentenceSplitter
510
516
 
511
- # Map common splitter names to their classes
512
- splitter_classes = {
513
- "SentenceSplitter": SentenceSplitter,
514
- }
517
+ module_path = "llama_index.core.node_parser"
518
+ class_name = splitter.splitter_name
519
+ try:
520
+ reader_module = importlib.import_module(module_path)
521
+ splitter_class = getattr(reader_module, class_name)
522
+ except (ImportError, AttributeError) as e:
523
+ raise ImportError(
524
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
525
+ ) from e
526
+ from llama_index.core.schema import BaseNode
515
527
 
516
- # Get the splitter class
517
- splitter_class = splitter_classes.get(splitter.splitter_name)
528
+ # TODO: let the user specify a custom ID namespace
529
+ namespace = uuid.UUID("12345678-1234-5678-1234-567812345678")
518
530
 
519
- if splitter_class is None:
520
- raise InterpreterError(
521
- f"Unsupported text splitter: {splitter.splitter_name}. "
522
- f"Supported splitters: {', '.join(splitter_classes.keys())}"
523
- )
531
+ def id_func(i: int, doc: BaseNode) -> str:
532
+ u = uuid.uuid5(namespace, f"{doc.node_id}_{i}")
533
+ return str(u)
524
534
 
525
535
  # Prepare arguments for the splitter
526
536
  splitter_args = {
527
537
  "chunk_size": splitter.chunk_size,
528
538
  "chunk_overlap": splitter.chunk_overlap,
539
+ "id_func": id_func,
529
540
  **splitter.args,
530
541
  }
531
542
 
@@ -569,7 +580,7 @@ def to_llama_vector_store_and_retriever(
569
580
  return vector_store, retriever
570
581
 
571
582
 
572
- def from_node_with_score(node_with_score) -> Any:
583
+ def from_node_with_score(node_with_score) -> RAGSearchResult:
573
584
  """Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
574
585
 
575
586
  Args:
@@ -597,4 +608,8 @@ def from_node_with_score(node_with_score) -> Any:
597
608
  )
598
609
 
599
610
  # Wrap in RAGSearchResult with score
600
- return RAGSearchResult(chunk=chunk, score=node_with_score.score or 0.0)
611
+ return RAGSearchResult(
612
+ content=chunk,
613
+ doc_id=chunk.document_id,
614
+ score=node_with_score.score or 0.0,
615
+ )
@@ -2,17 +2,21 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from collections.abc import AsyncIterator
6
+ from typing import Any, cast
7
+
5
8
  import pandas as pd
9
+ from pydantic import BaseModel
6
10
 
7
11
  from qtype.interpreter.types import FlowMessage, Session
8
12
  from qtype.semantic.model import Flow
9
13
 
10
14
 
11
- def dataframe_to_flow_messages(
15
+ async def dataframe_to_flow_messages(
12
16
  df: pd.DataFrame, session: Session
13
- ) -> list[FlowMessage]:
17
+ ) -> AsyncIterator[FlowMessage]:
14
18
  """
15
- Convert a DataFrame to a list of FlowMessages.
19
+ Convert a DataFrame to an async generator of FlowMessages.
16
20
 
17
21
  Each row in the DataFrame becomes a FlowMessage with the same session.
18
22
 
@@ -20,14 +24,15 @@ def dataframe_to_flow_messages(
20
24
  df: DataFrame where each row represents one set of inputs
21
25
  session: Session object to use for all messages
22
26
 
23
- Returns:
24
- List of FlowMessages, one per DataFrame row
27
+ Yields:
28
+ FlowMessages, one per DataFrame row
25
29
  """
26
- messages = []
27
- for _, row in df.iterrows():
28
- variables = row.to_dict()
29
- messages.append(FlowMessage(session=session, variables=variables))
30
- return messages
30
+ # Use to_dict with orient='records' - much faster than iterrows
31
+ # This returns a list of dicts directly without Series overhead
32
+ records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
33
+
34
+ for record in records:
35
+ yield FlowMessage(session=session, variables=record)
31
36
 
32
37
 
33
38
  def flow_messages_to_dataframe(
@@ -45,8 +50,6 @@ def flow_messages_to_dataframe(
45
50
  Returns:
46
51
  DataFrame with one row per message, columns for each output variable
47
52
  """
48
- from typing import Any
49
-
50
53
  results = []
51
54
  for idx, message in enumerate(messages):
52
55
  row_data: dict[str, Any] = {"row": idx}
@@ -54,7 +57,10 @@ def flow_messages_to_dataframe(
54
57
  # Extract output variables
55
58
  for var in flow.outputs:
56
59
  if var.id in message.variables:
57
- row_data[var.id] = message.variables[var.id]
60
+ value = message.variables[var.id]
61
+ if isinstance(value, BaseModel):
62
+ value = value.model_dump()
63
+ row_data[var.id] = value
58
64
  else:
59
65
  row_data[var.id] = None
60
66
 
@@ -0,0 +1,195 @@
1
+ """BedrockReranker executor for reordering search results by relevance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import AsyncIterator
8
+
9
+ from pydantic import BaseModel
10
+
11
+ from qtype.base.types import PrimitiveTypeEnum
12
+ from qtype.dsl.domain_types import RAGChunk, SearchResult
13
+ from qtype.interpreter.auth.aws import aws
14
+ from qtype.interpreter.base.base_step_executor import StepExecutor
15
+ from qtype.interpreter.base.executor_context import ExecutorContext
16
+ from qtype.interpreter.types import FlowMessage
17
+ from qtype.semantic.model import BedrockReranker, ListType
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class BedrockRerankerExecutor(StepExecutor):
23
+ """Executor for BedrockReranker steps that reorder search results by relevance."""
24
+
25
+ def __init__(
26
+ self, step: BedrockReranker, context: ExecutorContext, **dependencies
27
+ ):
28
+ super().__init__(step, context, **dependencies)
29
+ if not isinstance(step, BedrockReranker):
30
+ raise ValueError(
31
+ "BedrockRerankerExecutor can only execute BedrockReranker steps."
32
+ )
33
+ self.step: BedrockReranker = step
34
+
35
+ async def process_message(
36
+ self,
37
+ message: FlowMessage,
38
+ ) -> AsyncIterator[FlowMessage]:
39
+ """Process a single FlowMessage for the BedrockReranker step.
40
+
41
+ Args:
42
+ message: The FlowMessage to process.
43
+
44
+ Yields:
45
+ FlowMessage with reranked results.
46
+ """
47
+ try:
48
+ # Get the inputs
49
+ query = self._query(message)
50
+ docs = self._docs(message)
51
+
52
+ if len(docs) == 0:
53
+ # No documents to rerank, yield original message
54
+ yield message.copy_with_variables(
55
+ {self.step.outputs[0].id: docs}
56
+ )
57
+ return
58
+
59
+ # Get session for region info
60
+ if self.step.auth is not None:
61
+ with aws(self.step.auth, self.context.secret_manager) as s:
62
+ region_name = s.region_name
63
+ else:
64
+ import boto3
65
+
66
+ region_name = boto3.Session().region_name
67
+
68
+ # Convert the types
69
+ queries = [
70
+ {
71
+ "type": "TEXT",
72
+ "textQuery": {"text": query},
73
+ }
74
+ ]
75
+ documents = []
76
+
77
+ for doc in docs:
78
+ if isinstance(doc.content, RAGChunk):
79
+ documents.append(
80
+ {
81
+ "type": "INLINE",
82
+ "inlineDocumentSource": {
83
+ "type": "TEXT",
84
+ "textDocument": {"text": str(doc.content)},
85
+ },
86
+ }
87
+ )
88
+ elif isinstance(doc.content, dict):
89
+ documents.append(
90
+ {
91
+ "type": "INLINE",
92
+ "inlineDocumentSource": {
93
+ "type": "JSON",
94
+ "jsonDocument": doc.content,
95
+ },
96
+ }
97
+ )
98
+ elif isinstance(doc.content, BaseModel):
99
+ documents.append(
100
+ {
101
+ "type": "INLINE",
102
+ "inlineDocumentSource": {
103
+ "type": "JSON",
104
+ "jsonDocument": doc.content.model_dump(),
105
+ },
106
+ }
107
+ )
108
+ else:
109
+ raise ValueError(
110
+ f"Unsupported document content type for BedrockReranker: {type(doc.content)}"
111
+ )
112
+
113
+ reranking_configuration = {
114
+ "type": "BEDROCK_RERANKING_MODEL",
115
+ "bedrockRerankingConfiguration": {
116
+ "numberOfResults": self.step.num_results or len(docs),
117
+ "modelConfiguration": {
118
+ "modelArn": f"arn:aws:bedrock:{region_name}::foundation-model/{self.step.model_id}"
119
+ },
120
+ },
121
+ }
122
+
123
+ def _call_bedrock_rerank():
124
+ """Create client and call rerank in executor thread."""
125
+ if self.step.auth is not None:
126
+ with aws(self.step.auth, self.context.secret_manager) as s:
127
+ client = s.client("bedrock-agent-runtime")
128
+ return client.rerank(
129
+ queries=queries,
130
+ sources=documents,
131
+ rerankingConfiguration=reranking_configuration,
132
+ )
133
+ else:
134
+ import boto3
135
+
136
+ session = boto3.Session()
137
+ client = session.client("bedrock-agent-runtime")
138
+ return client.rerank(
139
+ queries=queries,
140
+ sources=documents,
141
+ rerankingConfiguration=reranking_configuration,
142
+ )
143
+
144
+ loop = asyncio.get_running_loop()
145
+ response = await loop.run_in_executor(
146
+ self.context.thread_pool, _call_bedrock_rerank
147
+ )
148
+
149
+ results = []
150
+ for d in response["results"]:
151
+ doc = docs[d["index"]]
152
+ new_score = d["relevanceScore"]
153
+ results.append(doc.copy(update={"score": new_score}))
154
+
155
+ # Update the message with reranked results
156
+ yield message.copy_with_variables(
157
+ {self.step.outputs[0].id: results}
158
+ )
159
+ except Exception as e:
160
+ logger.error(f"Reranking failed: {e}", exc_info=True)
161
+ # Emit error event to stream so frontend can display it
162
+ await self.stream_emitter.error(str(e))
163
+ message.set_error(self.step.id, e)
164
+ yield message
165
+
166
+ def _query(self, message: FlowMessage) -> str:
167
+ """Extract the query string from the FlowMessage.
168
+
169
+ Args:
170
+ message: The FlowMessage containing the query variable.
171
+ Returns:
172
+ The query string.
173
+ """
174
+ for i in self.step.inputs:
175
+ if i.type == PrimitiveTypeEnum.text:
176
+ return message.variables[i.id]
177
+ raise ValueError(
178
+ f"No text input found for BedrockReranker step {self.step.id}"
179
+ )
180
+
181
+ def _docs(self, message: FlowMessage) -> list[SearchResult]:
182
+ """Extract the list of SearchResult documents from the FlowMessage.
183
+
184
+ Args:
185
+ message: The FlowMessage containing the document variable.
186
+ Returns:
187
+ The list of SearchResult documents.
188
+ """
189
+ for i in self.step.inputs:
190
+ if i.type == ListType(element_type="SearchResult"):
191
+ docs = message.variables[i.id]
192
+ return docs
193
+ raise ValueError(
194
+ f"No list of SearchResults input found for BedrockReranker step {self.step.id}"
195
+ )
@@ -1,5 +1,14 @@
1
1
  from typing import AsyncIterator
2
2
 
3
+ from botocore.exceptions import ClientError
4
+ from llama_index.core.base.embeddings.base import BaseEmbedding
5
+ from tenacity import (
6
+ retry,
7
+ retry_if_exception,
8
+ stop_after_attempt,
9
+ wait_exponential,
10
+ )
11
+
3
12
  from qtype.dsl.domain_types import RAGChunk
4
13
  from qtype.interpreter.base.base_step_executor import StepExecutor
5
14
  from qtype.interpreter.base.executor_context import ExecutorContext
@@ -8,6 +17,13 @@ from qtype.interpreter.types import FlowMessage
8
17
  from qtype.semantic.model import DocumentEmbedder
9
18
 
10
19
 
20
+ def is_throttling_error(e):
21
+ return (
22
+ isinstance(e, ClientError)
23
+ and e.response["Error"]["Code"] == "ThrottlingException"
24
+ )
25
+
26
+
11
27
  class DocumentEmbedderExecutor(StepExecutor):
12
28
  """Executor for DocumentEmbedder steps."""
13
29
 
@@ -24,7 +40,25 @@ class DocumentEmbedderExecutor(StepExecutor):
24
40
  )
25
41
  self.step: DocumentEmbedder = step
26
42
  # Initialize the embedding model once for the executor
27
- self.embedding_model = to_embedding_model(self.step.model)
43
+ self.embedding_model: BaseEmbedding = to_embedding_model(
44
+ self.step.model
45
+ )
46
+
47
+ # TODO: properly abstract this into a mixin
48
+ @retry(
49
+ retry=retry_if_exception(is_throttling_error),
50
+ wait=wait_exponential(multiplier=0.5, min=1, max=30),
51
+ stop=stop_after_attempt(10),
52
+ )
53
+ async def _embed(self, text: str) -> list[float]:
54
+ """Generate embedding for the given text using the embedding model.
55
+
56
+ Args:
57
+ text: The text to embed.
58
+ Returns:
59
+ The embedding vector as a list of floats.
60
+ """
61
+ return await self.embedding_model.aget_text_embedding(text=text)
28
62
 
29
63
  async def process_message(
30
64
  self,
@@ -52,9 +86,7 @@ class DocumentEmbedderExecutor(StepExecutor):
52
86
  )
53
87
 
54
88
  # Generate embedding for the chunk content
55
- vector = self.embedding_model.get_text_embedding(
56
- text=str(chunk.content)
57
- )
89
+ vector = await self._embed(str(chunk.content))
58
90
 
59
91
  # Create the output chunk with the vector
60
92
  embedded_chunk = RAGChunk(
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import AsyncIterator
2
4
 
3
- from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
5
+ from qtype.dsl.domain_types import SearchResult
4
6
  from qtype.interpreter.base.base_step_executor import StepExecutor
5
7
  from qtype.interpreter.base.executor_context import ExecutorContext
6
8
  from qtype.interpreter.conversions import to_opensearch_client
@@ -29,6 +31,17 @@ class DocumentSearchExecutor(StepExecutor):
29
31
  )
30
32
  self.index_name = self.step.index.name
31
33
 
34
+ async def finalize(self) -> AsyncIterator[FlowMessage]:
35
+ """Clean up resources after all messages are processed."""
36
+ if hasattr(self, "client") and self.client:
37
+ try:
38
+ await self.client.close()
39
+ except Exception:
40
+ pass
41
+ # Make this an async generator
42
+ return
43
+ yield # type: ignore[unreachable]
44
+
32
45
  async def process_message(
33
46
  self,
34
47
  message: FlowMessage,
@@ -39,7 +52,7 @@ class DocumentSearchExecutor(StepExecutor):
39
52
  message: The FlowMessage to process.
40
53
 
41
54
  Yields:
42
- FlowMessage with search results as RAGSearchResult instances.
55
+ A list of dictionaries with _source, _search_score, and _search_id fields.
43
56
  """
44
57
  input_id = self.step.inputs[0].id
45
58
  output_id = self.step.outputs[0].id
@@ -58,62 +71,40 @@ class DocumentSearchExecutor(StepExecutor):
58
71
  # Build the search query
59
72
  search_body = {
60
73
  "query": {
61
- "multi_match": {
62
- "query": query_text,
63
- "fields": ["content^2", "title", "*"],
64
- "type": "best_fields",
65
- }
74
+ "multi_match": {"query": query_text} | self.step.query_args
66
75
  },
67
- "size": 10, # Default top 10 results
76
+ "size": self.step.default_top_k,
68
77
  }
69
78
 
70
79
  # Apply any filters if specified
71
80
  if self.step.filters:
72
- if "query" in search_body:
73
- search_body["query"] = {
74
- "bool": {
75
- "must": [search_body["query"]],
76
- "filter": [
77
- {"term": {k: v}}
78
- for k, v in self.step.filters.items()
79
- ],
80
- }
81
+ search_body["query"] = {
82
+ "bool": {
83
+ "must": [search_body["query"]],
84
+ "filter": [
85
+ {"term": {k: v}}
86
+ for k, v in self.step.filters.items()
87
+ ],
81
88
  }
89
+ }
82
90
 
83
- # Execute the search
84
- response = self.client.search(
91
+ # Execute the search asynchronously using AsyncOpenSearch
92
+ response = await self.client.search(
85
93
  index=self.index_name, body=search_body
86
94
  )
87
95
 
88
- # Process each hit and yield as RAGSearchResult
96
+ # Process each hit and yield as SearchResult
97
+ # TODO: add support for decomposing a RAGSearchResult for hybrid search
98
+ search_results = []
89
99
  for hit in response["hits"]["hits"]:
90
- source = hit["_source"]
91
- doc_id = hit["_id"]
92
- score = hit["_score"]
93
-
94
- # Extract content (adjust field name based on your schema)
95
- content = source.get("content", "")
96
-
97
- # Build metadata from the source, excluding content field
98
- metadata = {
99
- k: v for k, v in source.items() if k not in ["content"]
100
- }
101
-
102
- # Create a RAGChunk from the search result
103
- # Use the document ID as both chunk_id and document_id
104
- chunk = RAGChunk(
105
- content=content,
106
- chunk_id=doc_id,
107
- document_id=source.get("document_id", doc_id),
108
- vector=None, # Document search doesn't return embeddings
109
- metadata=metadata,
100
+ search_results.append(
101
+ SearchResult(
102
+ content=hit["_source"],
103
+ doc_id=hit["_id"],
104
+ score=hit["_score"],
105
+ )
110
106
  )
111
-
112
- # Wrap in RAGSearchResult with the score
113
- search_result = RAGSearchResult(chunk=chunk, score=score)
114
-
115
- # Yield result for each document
116
- yield message.copy_with_variables({output_id: search_result})
107
+ yield message.copy_with_variables({output_id: search_results})
117
108
 
118
109
  except Exception as e:
119
110
  # Emit error event to stream so frontend can display it
@@ -72,7 +72,7 @@ class DocumentSplitterExecutor(StepExecutor):
72
72
  llama_doc = LlamaDocument(
73
73
  text=content_text,
74
74
  metadata=document.metadata or {},
75
- id_=document.file_id,
75
+ doc_id=document.file_id,
76
76
  )
77
77
 
78
78
  # Split the document using the LlamaIndex splitter
@@ -132,12 +132,17 @@ class FieldExtractorExecutor(StepExecutor):
132
132
  matches = self.jsonpath_expr.find(input_dict)
133
133
 
134
134
  if not matches:
135
- raise ValueError(
136
- (
137
- f"JSONPath expression '{self.step.json_path}' "
138
- f"did not match any data in input"
135
+ if self.step.fail_on_missing:
136
+ raise ValueError(
137
+ (
138
+ f"JSONPath expression '{self.step.json_path}' "
139
+ f"did not match any data in input"
140
+ )
139
141
  )
140
- )
142
+ else:
143
+ # Yield message with None output
144
+ yield message.copy_with_variables({output_id: None})
145
+ return
141
146
 
142
147
  await self.stream_emitter.status(
143
148
  f"JSONPath matched {len(matches)} value(s)"