qtype 0.1.3__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. qtype/base/types.py +1 -9
  2. qtype/base/ui_shapes.py +38 -0
  3. qtype/dsl/model.py +45 -19
  4. qtype/dsl/parser.py +3 -3
  5. qtype/interpreter/base/base_step_executor.py +10 -10
  6. qtype/interpreter/base/factory.py +6 -0
  7. qtype/interpreter/base/step_cache.py +3 -3
  8. qtype/interpreter/conversions.py +32 -7
  9. qtype/interpreter/executors/agent_executor.py +1 -2
  10. qtype/interpreter/executors/bedrock_reranker_executor.py +1 -2
  11. qtype/interpreter/executors/collect_executor.py +76 -0
  12. qtype/interpreter/executors/construct_executor.py +68 -0
  13. qtype/interpreter/executors/decoder_executor.py +1 -2
  14. qtype/interpreter/executors/doc_to_text_executor.py +1 -2
  15. qtype/interpreter/executors/document_embedder_executor.py +19 -4
  16. qtype/interpreter/executors/document_search_executor.py +1 -2
  17. qtype/interpreter/executors/document_source_executor.py +1 -2
  18. qtype/interpreter/executors/document_splitter_executor.py +1 -2
  19. qtype/interpreter/executors/echo_executor.py +1 -2
  20. qtype/interpreter/executors/explode_executor.py +49 -0
  21. qtype/interpreter/executors/field_extractor_executor.py +1 -2
  22. qtype/interpreter/executors/file_writer_executor.py +12 -13
  23. qtype/interpreter/executors/index_upsert_executor.py +3 -4
  24. qtype/interpreter/executors/invoke_embedding_executor.py +37 -26
  25. qtype/interpreter/executors/invoke_tool_executor.py +1 -2
  26. qtype/interpreter/executors/llm_inference_executor.py +1 -2
  27. qtype/interpreter/executors/prompt_template_executor.py +1 -2
  28. qtype/interpreter/executors/sql_source_executor.py +1 -2
  29. qtype/interpreter/executors/vector_search_executor.py +1 -2
  30. qtype/interpreter/flow.py +6 -4
  31. qtype/interpreter/types.py +18 -10
  32. qtype/interpreter/typing.py +53 -4
  33. qtype/interpreter/ui/404/index.html +1 -1
  34. qtype/interpreter/ui/404.html +1 -1
  35. qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_buildManifest.js +1 -1
  36. qtype/interpreter/ui/_next/static/chunks/230-e22a519211de9624.js +33 -0
  37. qtype/interpreter/ui/_next/static/chunks/255-268261a96ffee0a9.js +1 -0
  38. qtype/interpreter/ui/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
  39. qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-8f0e059659d8afd7.js +1 -0
  40. qtype/interpreter/ui/_next/static/chunks/app/layout-1615bcffa82ff884.js +1 -0
  41. qtype/interpreter/ui/_next/static/chunks/app/page-bfbb263eb356c6eb.js +1 -0
  42. qtype/interpreter/ui/_next/static/chunks/ba12c10f-8d3b3efcaddd4728.js +1 -0
  43. qtype/interpreter/ui/_next/static/chunks/framework-d7de93249215fb06.js +1 -0
  44. qtype/interpreter/ui/_next/static/chunks/main-090112344d9a19ce.js +1 -0
  45. qtype/interpreter/ui/_next/static/chunks/main-app-a5425c8070223ccc.js +1 -0
  46. qtype/interpreter/ui/_next/static/chunks/pages/{_app-0a0020ddd67f79cf.js → _app-7d307437aca18ad4.js} +1 -1
  47. qtype/interpreter/ui/_next/static/chunks/pages/{_error-03529f2c21436739.js → _error-cb2a52f75f2162e2.js} +1 -1
  48. qtype/interpreter/ui/_next/static/chunks/webpack-bd0771cb30dd12fa.js +1 -0
  49. qtype/interpreter/ui/_next/static/css/7906798f56607e8f.css +3 -0
  50. qtype/interpreter/ui/assets/video-sample.mp4 +0 -0
  51. qtype/interpreter/ui/index.html +1 -1
  52. qtype/interpreter/ui/index.txt +12 -12
  53. qtype/semantic/checker.py +83 -0
  54. qtype/semantic/generate.py +0 -1
  55. qtype/semantic/model.py +25 -18
  56. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/METADATA +2 -2
  57. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/RECORD +62 -57
  58. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +0 -36
  59. qtype/interpreter/ui/_next/static/chunks/4bd1b696-cf72ae8a39fa05aa.js +0 -1
  60. qtype/interpreter/ui/_next/static/chunks/964-2b041321a01cbf56.js +0 -1
  61. qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-e110d2a9d0a83d82.js +0 -1
  62. qtype/interpreter/ui/_next/static/chunks/app/layout-a05273ead5de2c41.js +0 -1
  63. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +0 -1
  64. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +0 -1
  65. qtype/interpreter/ui/_next/static/chunks/framework-7c95b8e5103c9e90.js +0 -1
  66. qtype/interpreter/ui/_next/static/chunks/main-app-6fc6346bc8f7f163.js +0 -1
  67. qtype/interpreter/ui/_next/static/chunks/main-e26b9cb206da2cac.js +0 -1
  68. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +0 -1
  69. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +0 -3
  70. /qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_ssgManifest.js +0 -0
  71. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/WHEEL +0 -0
  72. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/entry_points.txt +0 -0
  73. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/licenses/LICENSE +0 -0
  74. {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/top_level.txt +0 -0
qtype/base/types.py CHANGED
@@ -54,21 +54,13 @@ class PrimitiveTypeEnum(str, Enum):
54
54
  citation_url = "citation_url"
55
55
  date = "date"
56
56
  datetime = "datetime"
57
- int = "int"
58
57
  file = "file"
59
58
  float = "float"
60
59
  image = "image"
60
+ int = "int"
61
61
  text = "text"
62
62
  time = "time"
63
63
  video = "video"
64
- thinking = "thinking"
65
-
66
-
67
- class StepCardinality(str, Enum):
68
- """Does this step emit 1 (one) or 0...N (many) items?"""
69
-
70
- one = "one"
71
- many = "many"
72
64
 
73
65
 
74
66
  ReferenceT = TypeVar("ReferenceT")
@@ -0,0 +1,38 @@
1
+ from enum import Enum
2
+ from typing import Union
3
+
4
+ from pydantic import BaseModel
5
+ from pydantic import ConfigDict as PydanticConfigDict
6
+ from pydantic import Field
7
+
8
+ from qtype.base.types import PrimitiveTypeEnum
9
+
10
+
11
+ class TextWidget(str, Enum):
12
+ text = "text" # Simple text widget input, default
13
+ textarea = "textarea" # A paragraph editor
14
+
15
+
16
+ class TextInputUI(BaseModel):
17
+ model_config = PydanticConfigDict(extra="forbid")
18
+
19
+ widget: TextWidget = Field(
20
+ default=TextWidget.text,
21
+ description="What kind of text ui to present",
22
+ )
23
+
24
+
25
+ class FileUploadUI(BaseModel):
26
+ model_config = PydanticConfigDict(extra="forbid")
27
+ accept: str = Field(
28
+ default="*/*",
29
+ description="The mime type(s) to accept in the file upload.",
30
+ )
31
+
32
+
33
+ UIType = Union[TextInputUI, FileUploadUI]
34
+
35
+ UI_INPUT_TO_TYPE = {
36
+ (TextInputUI, PrimitiveTypeEnum.text),
37
+ (FileUploadUI, PrimitiveTypeEnum.file),
38
+ }
qtype/dsl/model.py CHANGED
@@ -23,9 +23,9 @@ from qtype.base.types import (
23
23
  ConcurrentStepMixin,
24
24
  PrimitiveTypeEnum,
25
25
  Reference,
26
- StepCardinality,
27
26
  StrictBaseModel,
28
27
  )
28
+ from qtype.base.ui_shapes import UI_INPUT_TO_TYPE, UIType
29
29
  from qtype.dsl.domain_types import (
30
30
  ChatContent,
31
31
  ChatMessage,
@@ -212,12 +212,24 @@ class Variable(StrictBaseModel):
212
212
  ),
213
213
  )
214
214
 
215
+ ui: UIType | None = Field(None, description="Hints for the UI if needed.")
216
+
215
217
  @model_validator(mode="before")
216
218
  @classmethod
217
219
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
218
220
  """Resolve string-based type references using the shared validator."""
219
221
  return _resolve_type_field_validator(data, info)
220
222
 
223
+ @model_validator(mode="after")
224
+ def validate_ui_type(self) -> Variable:
225
+ """Ensure at least one credential source is provided."""
226
+ if self.ui is not None:
227
+ if (type(self.ui), self.type) not in UI_INPUT_TO_TYPE:
228
+ raise ValueError(
229
+ f"Variable of {self.type} is not comptabile with UI configuration {self.ui}"
230
+ )
231
+ return self
232
+
221
233
 
222
234
  class SecretReference(StrictBaseModel):
223
235
  """
@@ -350,10 +362,6 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
350
362
 
351
363
  id: str = Field(..., description="Unique ID of this component.")
352
364
  type: str = Field(..., description="Type of the step component.")
353
- cardinality: StepCardinality = Field(
354
- default=StepCardinality.one,
355
- description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
356
- )
357
365
  inputs: list[Reference[Variable] | str] = Field(
358
366
  default_factory=list,
359
367
  description="References to the variables required by this step.",
@@ -364,6 +372,33 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
364
372
  )
365
373
 
366
374
 
375
+ class Explode(Step):
376
+ """A step that takes a list input and produces multiple outputs, one per item in the list."""
377
+
378
+ type: Literal["Explode"] = "Explode"
379
+
380
+
381
+ class Collect(Step, BatchableStepMixin):
382
+ """A step that collects all inputs and creates a single list to return."""
383
+
384
+ type: Literal["Collect"] = "Collect"
385
+
386
+ batch_config: BatchConfig = Field(
387
+ default_factory=partial(BatchConfig, batch_size=sys.maxsize),
388
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
389
+ )
390
+
391
+
392
+ class Construct(Step):
393
+ """A step that converts variables into an instance of a Custom or Domain Type"""
394
+
395
+ type: Literal["Construct"] = "Construct"
396
+ field_mapping: dict[str, str] = Field(
397
+ ...,
398
+ description="Mapping of type inputs to variable names, if needed.",
399
+ )
400
+
401
+
367
402
  class PromptTemplate(Step):
368
403
  """Defines a prompt template with a string format and variable bindings.
369
404
  This is used to generate prompts dynamically based on input variables."""
@@ -765,8 +800,9 @@ class AWSAuthProvider(AuthorizationProvider):
765
800
  has_keys = self.access_key_id and self.secret_access_key
766
801
  has_profile = self.profile_name
767
802
  has_role = self.role_arn
803
+ has_region = self.region
768
804
 
769
- if not (has_keys or has_profile or has_role):
805
+ if not (has_keys or has_profile or has_role or has_region):
770
806
  raise ValueError(
771
807
  "AWSAuthProvider must specify at least one authentication method: "
772
808
  "access keys, profile name, or role ARN."
@@ -891,10 +927,6 @@ class Source(Step):
891
927
  """Base class for data sources"""
892
928
 
893
929
  id: str = Field(..., description="Unique ID of the data source.")
894
- cardinality: Literal[StepCardinality.many] = Field(
895
- default=StepCardinality.many,
896
- description="Sources always emit 0...N instances of the outputs.",
897
- )
898
930
 
899
931
 
900
932
  class SQLSource(Source):
@@ -951,7 +983,6 @@ class Aggregate(Step):
951
983
  """
952
984
 
953
985
  type: Literal["Aggregate"] = "Aggregate"
954
- cardinality: Literal[StepCardinality.one] = StepCardinality.one
955
986
 
956
987
  # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
957
988
  # The order will be: success_count, error_count, total_count
@@ -1005,10 +1036,6 @@ class DocumentSplitter(Step, ConcurrentStepMixin):
1005
1036
  """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
1006
1037
 
1007
1038
  type: Literal["DocumentSplitter"] = "DocumentSplitter"
1008
- cardinality: Literal[StepCardinality.many] = Field(
1009
- default=StepCardinality.many,
1010
- description="Consumes one document and emits 0...N nodes/chunks.",
1011
- )
1012
1039
 
1013
1040
  splitter_name: str = Field(
1014
1041
  default="SentenceSplitter",
@@ -1028,10 +1055,6 @@ class DocumentEmbedder(Step, ConcurrentStepMixin):
1028
1055
  """Embeds document chunks using a specified embedding model."""
1029
1056
 
1030
1057
  type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
1031
- cardinality: Literal[StepCardinality.many] = Field(
1032
- default=StepCardinality.many,
1033
- description="Consumes one chunk and emits one embedded chunk.",
1034
- )
1035
1058
  model: Reference[EmbeddingModel] | str = Field(
1036
1059
  ..., description="Embedding model to use for vectorization."
1037
1060
  )
@@ -1199,6 +1222,8 @@ StepType = Annotated[
1199
1222
  Agent,
1200
1223
  Aggregate,
1201
1224
  BedrockReranker,
1225
+ Collect,
1226
+ Construct,
1202
1227
  Decoder,
1203
1228
  DocToTextConverter,
1204
1229
  DocumentEmbedder,
@@ -1206,6 +1231,7 @@ StepType = Annotated[
1206
1231
  DocumentSplitter,
1207
1232
  DocumentSource,
1208
1233
  Echo,
1234
+ Explode,
1209
1235
  FieldExtractor,
1210
1236
  FileSource,
1211
1237
  FileWriter,
qtype/dsl/parser.py CHANGED
@@ -153,12 +153,12 @@ def _format_validation_errors(
153
153
  error_msg = "Validation failed (see details above)"
154
154
  else:
155
155
  error_msg = "Validation failed:\n"
156
- for error in relevant_errors[:5]: # Show max 5 errors
156
+ for error in relevant_errors[:30]: # Show max 5 errors
157
157
  loc_path = _simplify_field_path(error["loc"])
158
158
  error_msg += f" {loc_path}: {error['msg']}\n"
159
159
 
160
- if len(relevant_errors) > 5:
161
- error_msg += f" ... and {len(relevant_errors) - 5} more errors\n"
160
+ if len(relevant_errors) > 30:
161
+ error_msg += f" ... and {len(relevant_errors) - 30} more errors\n"
162
162
 
163
163
  if source_name:
164
164
  error_msg = f"In {source_name}:\n{error_msg}"
@@ -230,21 +230,12 @@ class StepExecutor(ABC):
230
230
  prepared_messages, process_item, task_limit=num_workers
231
231
  )
232
232
 
233
- # Combine all streams
234
- async def emit_failed_messages() -> AsyncIterator[FlowMessage]:
235
- for msg in failed_messages:
236
- yield msg
237
-
238
- all_results = stream.concat(
239
- stream.iterate([result_stream, emit_failed_messages()])
240
- )
241
-
242
233
  # Track message counts for telemetry
243
234
  message_count = 0
244
235
  error_count = 0
245
236
 
246
237
  # Stream results and track progress
247
- async with all_results.stream() as streamer:
238
+ async with result_stream.stream() as streamer:
248
239
  result: FlowMessage
249
240
  async for result in streamer:
250
241
  message_count += 1
@@ -255,6 +246,15 @@ class StepExecutor(ABC):
255
246
  )
256
247
  yield result
257
248
 
249
+ # Emit failed messages after processing completes
250
+ for msg in failed_messages:
251
+ message_count += 1
252
+ error_count += 1
253
+ self.progress.update_for_message(
254
+ msg, self.context.on_progress
255
+ )
256
+ yield msg
257
+
258
258
  # Finalize and track those messages too
259
259
  async for msg in self.finalize():
260
260
  message_count += 1
@@ -2,6 +2,8 @@ from qtype.semantic.model import (
2
2
  Agent,
3
3
  Aggregate,
4
4
  BedrockReranker,
5
+ Collect,
6
+ Construct,
5
7
  Decoder,
6
8
  DocToTextConverter,
7
9
  DocumentEmbedder,
@@ -9,6 +11,7 @@ from qtype.semantic.model import (
9
11
  DocumentSource,
10
12
  DocumentSplitter,
11
13
  Echo,
14
+ Explode,
12
15
  FieldExtractor,
13
16
  FileSource,
14
17
  FileWriter,
@@ -32,6 +35,8 @@ EXECUTOR_REGISTRY = {
32
35
  Agent: "qtype.interpreter.executors.agent_executor.AgentExecutor",
33
36
  Aggregate: "qtype.interpreter.executors.aggregate_executor.AggregateExecutor",
34
37
  BedrockReranker: "qtype.interpreter.executors.bedrock_reranker_executor.BedrockRerankerExecutor",
38
+ Collect: "qtype.interpreter.executors.collect_executor.CollectExecutor",
39
+ Construct: "qtype.interpreter.executors.construct_executor.ConstructExecutor",
35
40
  Decoder: "qtype.interpreter.executors.decoder_executor.DecoderExecutor",
36
41
  DocToTextConverter: "qtype.interpreter.executors.doc_to_text_executor.DocToTextConverterExecutor",
37
42
  DocumentEmbedder: "qtype.interpreter.executors.document_embedder_executor.DocumentEmbedderExecutor",
@@ -39,6 +44,7 @@ EXECUTOR_REGISTRY = {
39
44
  DocumentSource: "qtype.interpreter.executors.document_source_executor.DocumentSourceExecutor",
40
45
  DocumentSplitter: "qtype.interpreter.executors.document_splitter_executor.DocumentSplitterExecutor",
41
46
  Echo: "qtype.interpreter.executors.echo_executor.EchoExecutor",
47
+ Explode: "qtype.interpreter.executors.explode_executor.ExplodeExecutor",
42
48
  FieldExtractor: "qtype.interpreter.executors.field_extractor_executor.FieldExtractorExecutor",
43
49
  FileSource: "qtype.interpreter.executors.file_source_executor.FileSourceExecutor",
44
50
  FileWriter: "qtype.interpreter.executors.file_writer_executor.FileWriterExecutor",
@@ -67,8 +67,8 @@ def from_cache_value(
67
67
  ) -> FlowMessage:
68
68
  """Reconstructs a FlowMessage from cached output values."""
69
69
  if "FlowMessage.__error__" in cache_value:
70
- msg = message.model_copy(deep=True)
71
- msg.error = cache_value["FlowMessage.__error__"]
72
- return msg
70
+ return message.model_copy(
71
+ deep=True, update={"error": cache_value["FlowMessage.__error__"]}
72
+ )
73
73
  else:
74
74
  return message.copy_with_variables(cache_value)
@@ -18,7 +18,8 @@ from llama_index.core.base.llms.types import (
18
18
  from llama_index.core.memory import Memory as LlamaMemory
19
19
  from llama_index.core.schema import Document as LlamaDocument
20
20
  from llama_index.core.vector_stores.types import BasePydanticVectorStore
21
- from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
21
+ from opensearchpy import AsyncHttpConnection, AsyncOpenSearch
22
+ from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth
22
23
 
23
24
  from qtype.base.types import PrimitiveTypeEnum
24
25
  from qtype.dsl.domain_types import (
@@ -34,6 +35,7 @@ from qtype.interpreter.base.secrets import SecretManagerBase
34
35
  from qtype.interpreter.types import InterpreterError
35
36
  from qtype.semantic.model import (
36
37
  APIKeyAuthProvider,
38
+ AWSAuthProvider,
37
39
  DocumentIndex,
38
40
  DocumentSplitter,
39
41
  Model,
@@ -302,7 +304,9 @@ def to_vector_store(
302
304
 
303
305
 
304
306
  @cached_resource
305
- def to_embedding_model(model: Model) -> BaseEmbedding:
307
+ def to_embedding_model(
308
+ model: Model, secret_manager: SecretManagerBase
309
+ ) -> BaseEmbedding:
306
310
  """Convert a qtype Model to a LlamaIndex embedding model."""
307
311
 
308
312
  if model.provider == "aws-bedrock":
@@ -310,7 +314,14 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
310
314
  BedrockEmbedding,
311
315
  )
312
316
 
317
+ session = None
318
+ if model.auth is not None:
319
+ assert isinstance(model.auth, AWSAuthProvider)
320
+ with aws(model.auth, secret_manager) as session:
321
+ session = session._session
322
+
313
323
  bedrock_embedding: BaseEmbedding = BedrockEmbedding(
324
+ botocore_session=session,
314
325
  model_name=model.model_id if model.model_id else model.id,
315
326
  max_retries=100,
316
327
  )
@@ -320,8 +331,20 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
320
331
  OpenAIEmbedding,
321
332
  )
322
333
 
334
+ api_key = None
335
+ if model.auth:
336
+ with auth(model.auth, secret_manager) as provider:
337
+ if not isinstance(provider, APIKeyAuthProvider):
338
+ raise InterpreterError(
339
+ f"OpenAI provider requires APIKeyAuthProvider, "
340
+ f"got {type(provider).__name__}"
341
+ )
342
+ # api_key is guaranteed to be str after auth() resolves it
343
+ api_key = provider.api_key # type: ignore[assignment]
344
+
323
345
  openai_embedding: BaseEmbedding = OpenAIEmbedding(
324
- model_name=model.model_id if model.model_id else model.id
346
+ api_key=api_key,
347
+ model_name=model.model_id if model.model_id else model.id,
325
348
  )
326
349
  return openai_embedding
327
350
  else:
@@ -347,7 +370,7 @@ def to_opensearch_client(
347
370
  InterpreterError: If authentication fails or configuration is invalid
348
371
  """
349
372
  client_kwargs: dict[str, Any] = {
350
- "hosts": [index.endpoint],
373
+ "hosts": index.endpoint,
351
374
  **index.args,
352
375
  }
353
376
 
@@ -368,15 +391,17 @@ def to_opensearch_client(
368
391
  f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
369
392
  )
370
393
 
371
- # Use opensearch-py's built-in AWS auth
372
- aws_auth = AWSV4SignerAuth(
394
+ # Use opensearch-py's async AWS auth
395
+ aws_auth = AWSV4SignerAsyncAuth(
373
396
  credentials,
374
397
  auth_session.region_name or "us-east-1", # type: ignore
398
+ "aoss", # service name for OpenSearch Serverless
375
399
  )
376
400
 
377
401
  client_kwargs["http_auth"] = aws_auth
378
402
  client_kwargs["use_ssl"] = True
379
403
  client_kwargs["verify_certs"] = True
404
+ client_kwargs["connection_class"] = AsyncHttpConnection
380
405
  else:
381
406
  raise InterpreterError(
382
407
  f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
@@ -566,7 +591,7 @@ def to_llama_vector_store_and_retriever(
566
591
  vector_store = to_vector_store(index, secret_manager)
567
592
 
568
593
  # Get the embedding model
569
- embedding_model = to_embedding_model(index.embedding_model)
594
+ embedding_model = to_embedding_model(index.embedding_model, secret_manager)
570
595
 
571
596
  # Create a VectorStoreIndex with the vector store and embedding model
572
597
  vector_index = VectorStoreIndex.from_vector_store(
@@ -93,8 +93,7 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
93
93
  logger.error(f"Agent execution failed: {e}", exc_info=True)
94
94
  # Emit error event to stream so frontend can display it
95
95
  await self.stream_emitter.error(str(e))
96
- message.set_error(self.step.id, e)
97
- yield message
96
+ yield message.copy_with_error(self.step.id, e)
98
97
 
99
98
  async def _process_chat(
100
99
  self,
@@ -160,8 +160,7 @@ class BedrockRerankerExecutor(StepExecutor):
160
160
  logger.error(f"Reranking failed: {e}", exc_info=True)
161
161
  # Emit error event to stream so frontend can display it
162
162
  await self.stream_emitter.error(str(e))
163
- message.set_error(self.step.id, e)
164
- yield message
163
+ yield message.copy_with_error(self.step.id, e)
165
164
 
166
165
  def _query(self, message: FlowMessage) -> str:
167
166
  """Extract the query string from the FlowMessage.
@@ -0,0 +1,76 @@
1
+ from typing import Any, AsyncIterator
2
+
3
+ from qtype.interpreter.base.batch_step_executor import BatchedStepExecutor
4
+ from qtype.interpreter.base.executor_context import ExecutorContext
5
+ from qtype.interpreter.types import FlowMessage
6
+ from qtype.semantic.model import Collect
7
+
8
+
9
+ def _find_common_ancestors(messages: list[FlowMessage]) -> dict[str, Any]:
10
+ if not messages:
11
+ return {}
12
+
13
+ # 1. Start with all variables from the first message
14
+ common_vars = messages[0].variables.copy()
15
+
16
+ for msg in messages[1:]:
17
+ # 2. Identify keys that either don't exist in this message
18
+ # OR have a different value (diverged)
19
+ diverged_keys = [
20
+ k
21
+ for k, v in common_vars.items()
22
+ if k not in msg.variables or msg.variables[k] != v
23
+ ]
24
+ # 3. Remove diverged keys to leave only the "Common Ancestors"
25
+ for k in diverged_keys:
26
+ common_vars.pop(k)
27
+
28
+ return common_vars
29
+
30
+
31
+ class CollectExecutor(BatchedStepExecutor):
32
+ """Executor for Collect steps."""
33
+
34
+ def __init__(
35
+ self,
36
+ step: Collect,
37
+ context: ExecutorContext,
38
+ **dependencies,
39
+ ):
40
+ super().__init__(step, context, **dependencies)
41
+ if not isinstance(step, Collect):
42
+ raise ValueError("CollectExecutor can only execute Collect steps.")
43
+ self.step = step
44
+
45
+ async def process_batch(
46
+ self,
47
+ batch: list[FlowMessage],
48
+ ) -> AsyncIterator[FlowMessage]:
49
+ """Process a batch of FlowMessages for the Collect step.
50
+
51
+ Args:
52
+ batch: A list of FlowMessages to process.
53
+
54
+ Yields:
55
+ FlowMessages with the results of processing.
56
+ """
57
+
58
+ # Note that the batch processor accumulates the messages that we need,
59
+ # so this function isn't called until collection is ready.
60
+
61
+ # outputs[0] and inputs[0] is safe here since semantic validation ensures only one output
62
+ output_name = self.step.outputs[0].id
63
+ input_name = self.step.inputs[0].id
64
+
65
+ if len(batch) == 0:
66
+ # No messages to process -- yield nothing
67
+ return
68
+
69
+ results = []
70
+ for msg in batch:
71
+ results.append(msg.variables[input_name])
72
+
73
+ # Only variables common to all input messages are propagated at the end
74
+ common_ancestors = _find_common_ancestors(batch)
75
+ new_variables = {output_name: results} | common_ancestors
76
+ yield FlowMessage(session=batch[0].session, variables=new_variables)
@@ -0,0 +1,68 @@
1
+ from typing import AsyncIterator
2
+
3
+ from qtype.dsl.model import ListType
4
+ from qtype.interpreter.base.base_step_executor import StepExecutor
5
+ from qtype.interpreter.base.executor_context import ExecutorContext
6
+ from qtype.interpreter.types import FlowMessage
7
+ from qtype.interpreter.typing import instantiate_variable
8
+ from qtype.semantic.model import Construct
9
+
10
+
11
+ class ConstructExecutor(StepExecutor):
12
+ """Executor for Construct steps."""
13
+
14
+ def __init__(
15
+ self,
16
+ step: Construct,
17
+ context: ExecutorContext,
18
+ **dependencies,
19
+ ):
20
+ super().__init__(step, context, **dependencies)
21
+ if not isinstance(step, Construct):
22
+ raise ValueError(
23
+ "ConstructExecutor can only execute Construct steps."
24
+ )
25
+ self.step = step
26
+
27
+ async def process_message(
28
+ self,
29
+ message: FlowMessage,
30
+ ) -> AsyncIterator[FlowMessage]:
31
+ """Process a FlowMessage for the Construct step.
32
+
33
+ Args:
34
+ message: The FlowMessage to process.
35
+ Yields:
36
+ FlowMessages with the results of processing.
37
+ """
38
+ try:
39
+ # Safe since semantic validation ensures exactly one output variable
40
+ output_var = self.step.outputs[0]
41
+
42
+ if (
43
+ isinstance(output_var.type, ListType)
44
+ or len(self.step.inputs) == 1
45
+ ):
46
+ inputs = message.variables[self.step.inputs[0].id]
47
+ elif hasattr(output_var.type, "model_validate"):
48
+ # This is a custom type (Pydantic model)
49
+ # So input should be a dict
50
+ input_values = {
51
+ input_var.id: message.variables[input_var.id]
52
+ for input_var in self.step.inputs
53
+ }
54
+ # use the mapping to convert variable names to
55
+ inputs = {
56
+ self.step.field_mapping.get(var_name, var_name): value
57
+ for var_name, value in input_values.items()
58
+ }
59
+ else:
60
+ raise ValueError(
61
+ "Construct step must have either a single input or output of a custom type."
62
+ )
63
+ constructed_value = instantiate_variable(output_var, inputs)
64
+ yield message.copy_with_variables(
65
+ {output_var.id: constructed_value}
66
+ )
67
+ except Exception as e:
68
+ yield message.copy_with_error(self.step.id, e)
@@ -159,5 +159,4 @@ class DecoderExecutor(StepExecutor):
159
159
  except Exception as e:
160
160
  # Emit error event to stream so frontend can display it
161
161
  await self.stream_emitter.error(str(e))
162
- message.set_error(self.step.id, e)
163
- yield message
162
+ yield message.copy_with_error(self.step.id, e)
@@ -74,8 +74,7 @@ class DocToTextConverterExecutor(StepExecutor):
74
74
  except Exception as e:
75
75
  # Emit error event to stream so frontend can display it
76
76
  await self.stream_emitter.error(str(e))
77
- message.set_error(self.step.id, e)
78
- yield message
77
+ yield message.copy_with_error(self.step.id, e)
79
78
 
80
79
  def _convert_doc(self, doc: RAGDocument) -> RAGDocument:
81
80
  """Convert a RAGDocument to text/markdown format.
@@ -1,3 +1,5 @@
1
+ import asyncio
2
+ import logging
1
3
  from typing import AsyncIterator
2
4
 
3
5
  from botocore.exceptions import ClientError
@@ -41,7 +43,7 @@ class DocumentEmbedderExecutor(StepExecutor):
41
43
  self.step: DocumentEmbedder = step
42
44
  # Initialize the embedding model once for the executor
43
45
  self.embedding_model: BaseEmbedding = to_embedding_model(
44
- self.step.model
46
+ self.step.model, context.secret_manager
45
47
  )
46
48
 
47
49
  # TODO: properly abstract this into a mixin
@@ -58,7 +60,17 @@ class DocumentEmbedderExecutor(StepExecutor):
58
60
  Returns:
59
61
  The embedding vector as a list of floats.
60
62
  """
61
- return await self.embedding_model.aget_text_embedding(text=text)
63
+
64
+ # TODO: switch back to async once aws auth supports it.
65
+ # https://github.com/bazaarvoice/qtype/issues/108
66
+ def _call():
67
+ return self.embedding_model.get_text_embedding(text=text)
68
+
69
+ loop = asyncio.get_running_loop()
70
+ response = await loop.run_in_executor(self.context.thread_pool, _call)
71
+
72
+ return response
73
+ # return await self.embedding_model.aget_text_embedding(text=text)
62
74
 
63
75
  async def process_message(
64
76
  self,
@@ -103,5 +115,8 @@ class DocumentEmbedderExecutor(StepExecutor):
103
115
  except Exception as e:
104
116
  # Emit error event to stream so frontend can display it
105
117
  await self.stream_emitter.error(str(e))
106
- message.set_error(self.step.id, e)
107
- yield message
118
+ logging.error(
119
+ f"Error processing DocumentEmbedder step {self.step.id}",
120
+ exc_info=e,
121
+ )
122
+ yield message.copy_with_error(self.step.id, e)
@@ -109,5 +109,4 @@ class DocumentSearchExecutor(StepExecutor):
109
109
  except Exception as e:
110
110
  # Emit error event to stream so frontend can display it
111
111
  await self.stream_emitter.error(str(e))
112
- message.set_error(self.step.id, e)
113
- yield message
112
+ yield message.copy_with_error(self.step.id, e)
@@ -114,5 +114,4 @@ class DocumentSourceExecutor(StepExecutor):
114
114
  except Exception as e:
115
115
  # Emit error event to stream so frontend can display it
116
116
  await self.stream_emitter.error(str(e))
117
- message.set_error(self.step.id, e)
118
- yield message
117
+ yield message.copy_with_error(self.step.id, e)
@@ -101,5 +101,4 @@ class DocumentSplitterExecutor(StepExecutor):
101
101
  except Exception as e:
102
102
  # Emit error event to stream so frontend can display it
103
103
  await self.stream_emitter.error(str(e))
104
- message.set_error(self.step.id, e)
105
- yield message
104
+ yield message.copy_with_error(self.step.id, e)