qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +751 -263
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +471 -22
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +173 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
  102. qtype-0.1.3.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
qtype/dsl/model.py CHANGED
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
+ import sys
4
5
  from abc import ABC
5
6
  from enum import Enum
6
- from typing import Any, Literal, Type, Union
7
+ from functools import partial
8
+ from typing import Annotated, Any, Literal, Type, Union
7
9
 
8
10
  from pydantic import (
9
11
  BaseModel,
@@ -14,20 +16,23 @@ from pydantic import (
14
16
  )
15
17
 
16
18
  import qtype.dsl.domain_types as domain_types
17
- from qtype.dsl.base_types import (
19
+ from qtype.base.types import (
20
+ BatchableStepMixin,
21
+ BatchConfig,
22
+ CachedStepMixin,
23
+ ConcurrentStepMixin,
18
24
  PrimitiveTypeEnum,
25
+ Reference,
19
26
  StepCardinality,
20
27
  StrictBaseModel,
21
28
  )
22
- from qtype.dsl.domain_types import ChatContent, ChatMessage, Embedding
23
-
24
-
25
- class StructuralTypeEnum(str, Enum):
26
- """Represents a structured type that can be used in the DSL."""
27
-
28
- object = "object"
29
- array = "array"
30
-
29
+ from qtype.dsl.domain_types import (
30
+ ChatContent,
31
+ ChatMessage,
32
+ Embedding,
33
+ RAGChunk,
34
+ RAGDocument,
35
+ )
31
36
 
32
37
  DOMAIN_CLASSES = {
33
38
  name: obj
@@ -36,36 +41,164 @@ DOMAIN_CLASSES = {
36
41
  }
37
42
 
38
43
 
44
+ def _resolve_list_type(
45
+ element_type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
46
+ ) -> ListType:
47
+ """
48
+ Resolve a list element type and return a ListType.
49
+
50
+ Args:
51
+ element_type_str: The element type string (e.g., "text", "ChatMessage")
52
+ custom_type_registry: Registry of custom types
53
+
54
+ Returns:
55
+ ListType with resolved element type
56
+
57
+ Raises:
58
+ ValueError: If element type is invalid for lists
59
+ """
60
+ # Recursively resolve the element type
61
+ element_type = _resolve_variable_type(
62
+ element_type_str, custom_type_registry
63
+ )
64
+
65
+ # Allow both primitive types and custom types (but no nested lists)
66
+ if isinstance(element_type, PrimitiveTypeEnum):
67
+ return ListType(element_type=element_type)
68
+ elif isinstance(element_type, str):
69
+ # This is a custom type reference - store as string for later resolution
70
+ return ListType(element_type=element_type)
71
+ elif element_type in DOMAIN_CLASSES.values():
72
+ # Domain class - store its name as string reference
73
+ for name, cls in DOMAIN_CLASSES.items():
74
+ if cls == element_type:
75
+ return ListType(element_type=name)
76
+ return ListType(element_type=str(element_type))
77
+ else:
78
+ raise ValueError(
79
+ (
80
+ "List element type must be a primitive or custom type "
81
+ f"reference, got: {element_type}"
82
+ )
83
+ )
84
+
85
+
86
+ def _resolve_primitive_type(type_str: str) -> PrimitiveTypeEnum | None:
87
+ """
88
+ Try to resolve a string as a primitive type.
89
+
90
+ Args:
91
+ type_str: The type string to resolve
92
+
93
+ Returns:
94
+ PrimitiveTypeEnum if it matches, None otherwise
95
+ """
96
+ try:
97
+ return PrimitiveTypeEnum(type_str)
98
+ except ValueError:
99
+ return None
100
+
101
+
102
+ def _resolve_domain_type(type_str: str) -> Type[BaseModel] | None:
103
+ """
104
+ Try to resolve a string as a built-in domain entity class.
105
+
106
+ Args:
107
+ type_str: The type string to resolve
108
+
109
+ Returns:
110
+ Domain class if found, None otherwise
111
+ """
112
+ return DOMAIN_CLASSES.get(type_str)
113
+
114
+
115
+ def _resolve_custom_type(
116
+ type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
117
+ ) -> Type[BaseModel] | None:
118
+ """
119
+ Try to resolve a string as a custom type from the registry.
120
+
121
+ Args:
122
+ type_str: The type string to resolve
123
+ custom_type_registry: Registry of custom types
124
+
125
+ Returns:
126
+ Custom type class if found, None otherwise
127
+ """
128
+ return custom_type_registry.get(type_str)
129
+
130
+
39
131
  def _resolve_variable_type(
40
132
  parsed_type: Any, custom_type_registry: dict[str, Type[BaseModel]]
41
133
  ) -> Any:
42
- """Resolve a type string to its corresponding PrimitiveTypeEnum or return as is."""
134
+ """
135
+ Resolve a type to its corresponding representation.
136
+
137
+ Handles primitive types, list types, domain types, and custom types.
138
+
139
+ Args:
140
+ parsed_type: The type to resolve (can be string or already resolved)
141
+ custom_type_registry: Registry of dynamically created custom types
142
+
143
+ Returns:
144
+ Resolved type (PrimitiveTypeEnum, ListType, domain class, or string)
145
+ """
43
146
  # If the type is already resolved or is a structured definition, pass it through.
44
147
  if not isinstance(parsed_type, str):
45
148
  return parsed_type
46
149
 
47
- # --- Case 1: The type is a string ---
48
- # Try to resolve it as a primitive type first.
49
- try:
50
- return PrimitiveTypeEnum(parsed_type)
51
- except ValueError:
52
- pass # Not a primitive, continue to the next check.
150
+ # Check if it's a list type (e.g., "list[text]")
151
+ if parsed_type.startswith("list[") and parsed_type.endswith("]"):
152
+ element_type_str = parsed_type[5:-1] # Remove "list[" and "]"
153
+ return _resolve_list_type(element_type_str, custom_type_registry)
154
+
155
+ # Try to resolve as primitive type
156
+ primitive = _resolve_primitive_type(parsed_type)
157
+ if primitive is not None:
158
+ return primitive
53
159
 
54
- # Try to resolve it as a built-in Domain Entity class.
55
- # (Assuming domain_types and inspect are defined elsewhere)
56
- if parsed_type in DOMAIN_CLASSES:
57
- return DOMAIN_CLASSES[parsed_type]
160
+ # Try to resolve as built-in domain entity class
161
+ domain = _resolve_domain_type(parsed_type)
162
+ if domain is not None:
163
+ return domain
58
164
 
59
- # Check the registry of dynamically created custom types
60
- if parsed_type in custom_type_registry:
61
- return custom_type_registry[parsed_type]
165
+ # Try to resolve as custom type
166
+ custom = _resolve_custom_type(parsed_type, custom_type_registry)
167
+ if custom is not None:
168
+ return custom
62
169
 
63
- # If it's not a primitive or a known domain entity, return it as a string.
64
- # This assumes it might be a reference ID to another custom type.
170
+ # If it's not any known type, return it as a string.
171
+ # This assumes it might be a forward reference to a custom type.
65
172
  return parsed_type
66
173
 
67
174
 
68
- class Variable(BaseModel):
175
+ def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
176
+ """
177
+ Shared validator for resolving 'type' fields in models.
178
+
179
+ This validator resolves string-based type references using the custom
180
+ type registry from the validation context.
181
+
182
+ Args:
183
+ data: The data dict being validated
184
+ info: Pydantic validation info containing context
185
+
186
+ Returns:
187
+ Updated data dict with resolved type field
188
+ """
189
+ if (
190
+ isinstance(data, dict)
191
+ and "type" in data
192
+ and isinstance(data["type"], str)
193
+ ):
194
+ # Get the registry of custom types from the validation context.
195
+ custom_types = (info.context or {}).get("custom_types", {})
196
+ resolved = _resolve_variable_type(data["type"], custom_types)
197
+ data["type"] = resolved
198
+ return data
199
+
200
+
201
+ class Variable(StrictBaseModel):
69
202
  """Schema for a variable that can serve as input, output, or parameter within the DSL."""
70
203
 
71
204
  id: str = Field(
@@ -82,21 +215,24 @@ class Variable(BaseModel):
82
215
  @model_validator(mode="before")
83
216
  @classmethod
84
217
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
85
- """
86
- This validator runs during the main validation pass. It uses the
87
- context to resolve string-based type references.
88
- """
89
- if (
90
- isinstance(data, dict)
91
- and "type" in data
92
- and isinstance(data["type"], str)
93
- ):
94
- # Get the registry of custom types from the validation context.
95
- custom_types = (info.context or {}).get("custom_types", {})
96
- resolved = _resolve_variable_type(data["type"], custom_types)
97
- # {'id': 'user_message', 'type': 'ChatMessage'}
98
- data["type"] = resolved
99
- return data
218
+ """Resolve string-based type references using the shared validator."""
219
+ return _resolve_type_field_validator(data, info)
220
+
221
+
222
+ class SecretReference(StrictBaseModel):
223
+ """
224
+ A reference to a secret in the application's configured SecretManager.
225
+ This value is resolved at runtime by the interpreter.
226
+ """
227
+
228
+ secret_name: str = Field(
229
+ ...,
230
+ description="The name, ID, or ARN of the secret to fetch (e.g., 'my-project/db-password').",
231
+ )
232
+ key: str | None = Field(
233
+ default=None,
234
+ description="Optional key if the secret is a JSON blob or map (e.g., a specific key in a K8s secret).",
235
+ )
100
236
 
101
237
 
102
238
  class CustomType(StrictBaseModel):
@@ -107,40 +243,77 @@ class CustomType(StrictBaseModel):
107
243
  properties: dict[str, str]
108
244
 
109
245
 
246
+ class ToolParameter(BaseModel):
247
+ """Defines a tool input or output parameter with type and optional flag."""
248
+
249
+ type: VariableType | str
250
+ optional: bool = Field(
251
+ default=False, description="Whether this parameter is optional"
252
+ )
253
+
254
+ @model_validator(mode="before")
255
+ @classmethod
256
+ def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
257
+ """Resolve string-based type references using the shared validator."""
258
+ return _resolve_type_field_validator(data, info)
259
+
260
+
261
+ class ListType(BaseModel):
262
+ """Represents a list type with a specific element type."""
263
+
264
+ element_type: PrimitiveTypeEnum | str = Field(
265
+ ...,
266
+ description="Type of elements in the list (primitive type or custom type reference)",
267
+ )
268
+
269
+ def __str__(self) -> str:
270
+ """String representation for list type."""
271
+ if isinstance(self.element_type, PrimitiveTypeEnum):
272
+ return f"list[{self.element_type.value}]"
273
+ else:
274
+ return f"list[{self.element_type}]"
275
+
276
+
110
277
  VariableType = (
111
278
  PrimitiveTypeEnum
112
279
  | Type[Embedding]
113
280
  | Type[ChatMessage]
114
281
  | Type[ChatContent]
115
282
  | Type[BaseModel]
283
+ | Type[RAGDocument]
284
+ | Type[RAGChunk]
285
+ | ListType
116
286
  )
117
287
 
118
288
 
119
289
  class Model(StrictBaseModel):
120
290
  """Describes a generative model configuration, including provider and model ID."""
121
291
 
292
+ type: Literal["Model"] = "Model"
122
293
  id: str = Field(..., description="Unique ID for the model.")
123
- auth: AuthProviderType | str | None = Field(
294
+ auth: Reference[AuthProviderType] | str | None = Field(
124
295
  default=None,
125
296
  description="AuthorizationProvider used for model access.",
126
297
  )
127
- inference_params: dict[str, Any] | None = Field(
128
- default=None,
298
+ inference_params: dict[str, Any] = Field(
299
+ default_factory=dict,
129
300
  description="Optional inference parameters like temperature or max_tokens.",
130
301
  )
131
302
  model_id: str | None = Field(
132
303
  default=None,
133
304
  description="The specific model name or ID for the provider. If None, id is used",
134
305
  )
135
- # TODO(maybe): Make this an enum?
136
- provider: str = Field(
137
- ..., description="Name of the provider, e.g., openai or anthropic."
306
+ provider: Literal["openai", "anthropic", "aws-bedrock", "gcp-vertex"] = (
307
+ Field(
308
+ ..., description="Name of the provider, e.g., openai or anthropic."
309
+ )
138
310
  )
139
311
 
140
312
 
141
313
  class EmbeddingModel(Model):
142
314
  """Describes an embedding model configuration, extending the base Model class."""
143
315
 
316
+ type: Literal["EmbeddingModel"] = "EmbeddingModel"
144
317
  dimensions: int = Field(
145
318
  ...,
146
319
  description="Dimensionality of the embedding vectors produced by this model.",
@@ -172,20 +345,22 @@ class Memory(StrictBaseModel):
172
345
  #
173
346
 
174
347
 
175
- class Step(StrictBaseModel, ABC):
348
+ class Step(CachedStepMixin, StrictBaseModel, ABC):
176
349
  """Base class for components that take inputs and produce outputs."""
177
350
 
178
351
  id: str = Field(..., description="Unique ID of this component.")
352
+ type: str = Field(..., description="Type of the step component.")
179
353
  cardinality: StepCardinality = Field(
180
354
  default=StepCardinality.one,
181
355
  description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
182
356
  )
183
- inputs: list[Variable | str] | None = Field(
184
- default=None,
185
- description="Input variables required by this step.",
357
+ inputs: list[Reference[Variable] | str] = Field(
358
+ default_factory=list,
359
+ description="References to the variables required by this step.",
186
360
  )
187
- outputs: list[Variable | str] | None = Field(
188
- default=None, description="Variable where output is stored."
361
+ outputs: list[Reference[Variable] | str] = Field(
362
+ default_factory=list,
363
+ description="References to the variables where output is stored.",
189
364
  )
190
365
 
191
366
 
@@ -193,65 +368,37 @@ class PromptTemplate(Step):
193
368
  """Defines a prompt template with a string format and variable bindings.
194
369
  This is used to generate prompts dynamically based on input variables."""
195
370
 
371
+ type: Literal["PromptTemplate"] = "PromptTemplate" # type: ignore
196
372
  template: str = Field(
197
373
  ...,
198
374
  description="String template for the prompt with variable placeholders.",
199
375
  )
200
376
 
201
- @model_validator(mode="after")
202
- def set_default_outputs(self) -> "PromptTemplate":
203
- """Set default output variable if none provided."""
204
- if self.outputs is None:
205
- self.outputs = [
206
- Variable(id=f"{self.id}.prompt", type=PrimitiveTypeEnum.text)
207
- ]
208
- if len(self.outputs) != 1:
209
- raise ValueError(
210
- "PromptTemplate steps must have exactly one output variable -- the result of applying the template."
211
- )
212
- return self
213
-
214
-
215
- class Condition(Step):
216
- """Conditional logic gate within a flow. Supports branching logic for execution based on variable values."""
217
-
218
- # TODO: Add support for more complex conditions
219
- else_: StepType | str | None = Field(
220
- default=None,
221
- alias="else",
222
- description="Optional step to run if condition fails.",
223
- )
224
- equals: Variable | str | None = Field(
225
- default=None, description="Match condition for equality check."
226
- )
227
- then: StepType | str = Field(
228
- ..., description="Step to run if condition matches."
229
- )
230
-
231
- @model_validator(mode="after")
232
- def set_default_outputs(self) -> "Condition":
233
- """Set default output variable if none provided."""
234
- if not self.inputs or len(self.inputs) != 1:
235
- raise ValueError(
236
- "Condition steps must have exactly one input variable."
237
- )
238
- return self
239
-
240
377
 
241
- class Tool(Step, ABC):
378
+ class Tool(StrictBaseModel, ABC):
242
379
  """
243
380
  Base class for callable functions or external operations available to the model or as a step in a flow.
244
381
  """
245
382
 
383
+ id: str = Field(..., description="Unique ID of this component.")
246
384
  name: str = Field(..., description="Name of the tool function.")
247
385
  description: str = Field(
248
386
  ..., description="Description of what the tool does."
249
387
  )
388
+ inputs: dict[str, ToolParameter] = Field(
389
+ default_factory=dict,
390
+ description="Input parameters required by this tool.",
391
+ )
392
+ outputs: dict[str, ToolParameter] = Field(
393
+ default_factory=dict,
394
+ description="Output parameters produced by this tool.",
395
+ )
250
396
 
251
397
 
252
398
  class PythonFunctionTool(Tool):
253
399
  """Tool that calls a Python function."""
254
400
 
401
+ type: Literal["PythonFunctionTool"] = "PythonFunctionTool"
255
402
  function_name: str = Field(
256
403
  ..., description="Name of the Python function to call."
257
404
  )
@@ -264,30 +411,36 @@ class PythonFunctionTool(Tool):
264
411
  class APITool(Tool):
265
412
  """Tool that invokes an API endpoint."""
266
413
 
414
+ type: Literal["APITool"] = "APITool"
267
415
  endpoint: str = Field(..., description="API endpoint URL to call.")
268
416
  method: str = Field(
269
417
  default="GET",
270
418
  description="HTTP method to use (GET, POST, PUT, DELETE, etc.).",
271
419
  )
272
- auth: AuthProviderType | str | None = Field(
420
+ auth: Reference[AuthProviderType] | str | None = Field(
273
421
  default=None,
274
422
  description="Optional AuthorizationProvider for API authentication.",
275
423
  )
276
- headers: dict[str, str] | None = Field(
277
- default=None,
424
+ headers: dict[str, str] = Field(
425
+ default_factory=dict,
278
426
  description="Optional HTTP headers to include in the request.",
279
427
  )
428
+ parameters: dict[str, ToolParameter] = Field(
429
+ default_factory=dict,
430
+ description="Output parameters produced by this tool.",
431
+ )
280
432
 
281
433
 
282
- class LLMInference(Step):
434
+ class LLMInference(Step, ConcurrentStepMixin):
283
435
  """Defines a step that performs inference using a language model.
284
436
  It can take input variables and produce output variables based on the model's response."""
285
437
 
286
- memory: Memory | str | None = Field(
438
+ type: Literal["LLMInference"] = "LLMInference"
439
+ memory: Reference[Memory] | str | None = Field(
287
440
  default=None,
288
- description="Memory object to retain context across interactions.",
441
+ description="A reference to a Memory object to retain context across interactions.",
289
442
  )
290
- model: ModelType | str = Field(
443
+ model: Reference[Model] | str = Field(
291
444
  ..., description="The model to use for inference."
292
445
  )
293
446
  system_message: str | None = Field(
@@ -295,43 +448,70 @@ class LLMInference(Step):
295
448
  description="Optional system message to set the context for the model.",
296
449
  )
297
450
 
298
- @model_validator(mode="after")
299
- def set_default_outputs(self) -> "LLMInference":
300
- """Set default output variable if none provided."""
301
- if self.outputs is None:
302
- self.outputs = [
303
- Variable(id=f"{self.id}.response", type=PrimitiveTypeEnum.text)
304
- ]
305
- return self
451
+
452
+ class InvokeEmbedding(Step, ConcurrentStepMixin):
453
+ """Defines a step that generates embeddings using an embedding model.
454
+ It takes input variables and produces output variables containing the embeddings."""
455
+
456
+ type: Literal["InvokeEmbedding"] = "InvokeEmbedding"
457
+ model: Reference[EmbeddingModel] | str = Field(
458
+ ..., description="The embedding model to use."
459
+ )
306
460
 
307
461
 
308
462
  class Agent(LLMInference):
309
463
  """Defines an agent that can perform tasks and make decisions based on user input and context."""
310
464
 
311
- tools: list[ToolType | str] = Field(
312
- ..., description="List of tools available to the agent."
465
+ type: Literal["Agent"] = "Agent"
466
+
467
+ tools: list[Reference[ToolType] | str] = Field(
468
+ default_factory=list,
469
+ description="List of tools available to the agent.",
313
470
  )
314
471
 
315
472
 
316
- class Flow(Step):
473
+ class Flow(StrictBaseModel):
317
474
  """Defines a flow of steps that can be executed in sequence or parallel.
318
475
  If input or output variables are not specified, they are inferred from
319
- the first and last step, respectively.
320
- """
476
+ the first and last step, respectively."""
321
477
 
478
+ id: str = Field(..., description="Unique ID of the flow.")
479
+ type: Literal["Flow"] = "Flow"
322
480
  description: str | None = Field(
323
481
  default=None, description="Optional description of the flow."
324
482
  )
483
+ steps: list[StepType | Reference[StepType]] = Field(
484
+ default_factory=list,
485
+ description="List of steps or references to steps",
486
+ )
325
487
 
326
- cardinality: StepCardinality = Field(
327
- default=StepCardinality.auto,
328
- description="The cardinality of the flow, inferred from its steps when set to 'auto'.",
488
+ interface: FlowInterface | None = Field(default=None)
489
+ variables: list[Variable] = Field(
490
+ default_factory=list,
491
+ description="List of variables available at the application scope.",
492
+ )
493
+ inputs: list[Reference[Variable] | str] = Field(
494
+ default_factory=list,
495
+ description="Input variables required by this step.",
329
496
  )
497
+ outputs: list[Reference[Variable] | str] = Field(
498
+ default_factory=list, description="Resulting variables"
499
+ )
500
+
501
+
502
+ class FlowInterface(StrictBaseModel):
503
+ """
504
+ Defines the public-facing contract for a Flow, guiding the UI
505
+ and session management.
506
+ """
330
507
 
331
- mode: Literal["Complete", "Chat"] = "Complete"
508
+ # 1. Tells the UI how to render this flow
509
+ type: Literal["Complete", "Conversational"] = "Complete"
332
510
 
333
- steps: list[StepType | str] = Field(
334
- default_factory=list, description="List of steps or step IDs."
511
+ # 2. Declares which inputs are "sticky" and persisted in the session
512
+ session_inputs: list[Reference[Variable] | str] = Field(
513
+ default_factory=list,
514
+ description="A list of input variable IDs that are set once and then persisted across a session.",
335
515
  )
336
516
 
337
517
 
@@ -346,34 +526,119 @@ class Decoder(Step):
346
526
  """Defines a step that decodes string data into structured outputs.
347
527
 
348
528
  If parsing fails, the step will raise an error and halt execution.
349
- Use conditional logic in your flow to handle potential parsing errors.
350
- """
529
+ Use conditional logic in your flow to handle potential parsing errors."""
530
+
531
+ type: Literal["Decoder"] = "Decoder"
351
532
 
352
533
  format: DecoderFormat = Field(
353
534
  DecoderFormat.json,
354
535
  description="Format in which the decoder processes data. Defaults to JSON.",
355
536
  )
356
537
 
357
- @model_validator(mode="after")
358
- def set_default_outputs(self) -> "Decoder":
359
- """Set default output variable if none provided."""
360
-
361
- if (
362
- self.inputs is None
363
- or len(self.inputs) != 1
364
- or (
365
- isinstance(self.inputs[0], Variable)
366
- and self.inputs[0].type != PrimitiveTypeEnum.text
367
- )
368
- ):
369
- raise ValueError(
370
- f"Decoder steps must have exactly one input variable of type 'text'. Found: {self.inputs}"
371
- )
372
- if self.outputs is None:
373
- raise ValueError(
374
- "Decoder steps must have at least one output variable defined."
375
- )
376
- return self
538
+
539
+ class Echo(Step):
540
+ """Defines a step that echoes its inputs as outputs.
541
+
542
+ Useful for debugging flows by inspecting variable values at a specific
543
+ point in the execution pipeline. The step simply passes through all input
544
+ variables as outputs without modification.
545
+ """
546
+
547
+ type: Literal["Echo"] = "Echo"
548
+
549
+
550
+ class FieldExtractor(Step):
551
+ """Extracts specific fields from input data using JSONPath expressions.
552
+
553
+ This step uses JSONPath syntax to extract data from structured inputs
554
+ (Pydantic models, dicts, lists). The input is first converted to a dict
555
+ using model_dump() if it's a Pydantic model, then the JSONPath expression
556
+ is evaluated.
557
+
558
+ If the JSONPath matches multiple values, the step yields multiple output
559
+ messages (1-to-many cardinality). If it matches a single value, it yields
560
+ one output message. If it matches nothing, it raises an error.
561
+
562
+ The extracted data is used to construct the output variable by passing it
563
+ as keyword arguments to the output type's constructor.
564
+
565
+ Example JSONPath expressions:
566
+ - `$.field_name` - Extract a single field
567
+ - `$.items[*]` - Extract all items from a list
568
+ - `$.items[?(@.price > 10)]` - Filter items by condition
569
+ """
570
+
571
+ type: Literal["FieldExtractor"] = "FieldExtractor"
572
+ json_path: str = Field(
573
+ ...,
574
+ description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
575
+ )
576
+ fail_on_missing: bool = Field(
577
+ default=True,
578
+ description="Whether to raise an error if the JSONPath matches no data. If False, returns None.",
579
+ )
580
+
581
+
582
+ class InvokeTool(Step, ConcurrentStepMixin):
583
+ """Invokes a tool with input and output bindings."""
584
+
585
+ type: Literal["InvokeTool"] = "InvokeTool"
586
+
587
+ tool: Reference[ToolType] | str = Field(
588
+ ...,
589
+ description="Tool to invoke.",
590
+ )
591
+ input_bindings: dict[str, str] = Field(
592
+ ...,
593
+ description="Mapping from variable references to tool input parameter names.",
594
+ )
595
+ output_bindings: dict[str, str] = Field(
596
+ ...,
597
+ description="Mapping from variable references to tool output parameter names.",
598
+ )
599
+
600
+
601
+ class InvokeFlow(Step):
602
+ """Invokes a flow with input and output bindings."""
603
+
604
+ type: Literal["InvokeFlow"] = "InvokeFlow"
605
+
606
+ flow: Reference[Flow] | str = Field(
607
+ ...,
608
+ description="Flow to invoke.",
609
+ )
610
+ input_bindings: dict[Reference[Variable], str] = Field(
611
+ ...,
612
+ description="Mapping from variable references to flow input variable IDs.",
613
+ )
614
+ output_bindings: dict[Reference[Variable], str] = Field(
615
+ ...,
616
+ description="Mapping from variable references to flow output variable IDs.",
617
+ )
618
+
619
+
620
+ #
621
+ # ---------------- Secret Manager Component ----------------
622
+ #
623
+
624
+
625
+ class SecretManager(StrictBaseModel, ABC):
626
+ """Base class for secret manager configurations."""
627
+
628
+ id: str = Field(
629
+ ..., description="Unique ID for this secret manager configuration."
630
+ )
631
+ type: str = Field(..., description="The type of secret manager.")
632
+ auth: Reference[AuthProviderType] | str = Field(
633
+ ...,
634
+ description="AuthorizationProvider used to access this secret manager.",
635
+ )
636
+
637
+
638
+ class AWSSecretManager(SecretManager):
639
+ """Configuration for AWS Secrets Manager."""
640
+
641
+ type: Literal["aws_secret_manager"] = "aws_secret_manager"
377
642
 
378
643
 
379
644
  #
@@ -394,22 +659,67 @@ class APIKeyAuthProvider(AuthorizationProvider):
394
659
  """API key-based authentication provider."""
395
660
 
396
661
  type: Literal["api_key"] = "api_key"
397
- api_key: str = Field(..., description="API key for authentication.")
662
+ api_key: str | SecretReference = Field(
663
+ ..., description="API key for authentication."
664
+ )
398
665
  host: str | None = Field(
399
666
  default=None, description="Base URL or domain of the provider."
400
667
  )
401
668
 
402
669
 
670
+ class BearerTokenAuthProvider(AuthorizationProvider):
671
+ """Bearer token authentication provider."""
672
+
673
+ type: Literal["bearer_token"] = "bearer_token"
674
+ token: str | SecretReference = Field(
675
+ ..., description="Bearer token for authentication."
676
+ )
677
+
678
+
403
679
  class OAuth2AuthProvider(AuthorizationProvider):
404
680
  """OAuth2 authentication provider."""
405
681
 
406
682
  type: Literal["oauth2"] = "oauth2"
407
683
  client_id: str = Field(..., description="OAuth2 client ID.")
408
- client_secret: str = Field(..., description="OAuth2 client secret.")
684
+ client_secret: str | SecretReference = Field(
685
+ ..., description="OAuth2 client secret."
686
+ )
409
687
  token_url: str = Field(..., description="Token endpoint URL.")
410
- scopes: list[str] | None = Field(
411
- default=None, description="OAuth2 scopes required."
688
+ scopes: list[str] = Field(
689
+ default_factory=list, description="OAuth2 scopes required."
690
+ )
691
+
692
+
693
+ class VertexAuthProvider(AuthorizationProvider):
694
+ """Google Vertex authentication provider supporting gcloud profile or service account."""
695
+
696
+ type: Literal["vertex"] = "vertex"
697
+ profile_name: str | None = Field(
698
+ default=None,
699
+ description="Local gcloud profile name (if using existing CLI credentials).",
412
700
  )
701
+ project_id: str | None = Field(
702
+ default=None,
703
+ description="Explicit GCP project ID override (if different from profile).",
704
+ )
705
+ service_account_file: str | None = Field(
706
+ default=None,
707
+ description="Path to a service account JSON key file.",
708
+ )
709
+ region: str | None = Field(
710
+ default=None,
711
+ description="Vertex region (e.g., us-central1).",
712
+ )
713
+
714
+ @model_validator(mode="after")
715
+ def validate_vertex_auth(self) -> VertexAuthProvider:
716
+ """Ensure at least one credential source is provided."""
717
+ if not (self.profile_name or self.service_account_file):
718
+ raise ValueError(
719
+ "VertexAuthProvider requires either a profile_name or a "
720
+ "service_account_file."
721
+ )
722
+ return self
413
723
 
414
724
 
415
725
  class AWSAuthProvider(AuthorizationProvider):
@@ -418,13 +728,13 @@ class AWSAuthProvider(AuthorizationProvider):
418
728
  type: Literal["aws"] = "aws"
419
729
 
420
730
  # Method 1: Access key/secret/session
421
- access_key_id: str | None = Field(
731
+ access_key_id: str | SecretReference | None = Field(
422
732
  default=None, description="AWS access key ID."
423
733
  )
424
- secret_access_key: str | None = Field(
734
+ secret_access_key: str | SecretReference | None = Field(
425
735
  default=None, description="AWS secret access key."
426
736
  )
427
- session_token: str | None = Field(
737
+ session_token: str | SecretReference | None = Field(
428
738
  default=None,
429
739
  description="AWS session token for temporary credentials.",
430
740
  )
@@ -449,7 +759,7 @@ class AWSAuthProvider(AuthorizationProvider):
449
759
  region: str | None = Field(default=None, description="AWS region.")
450
760
 
451
761
  @model_validator(mode="after")
452
- def validate_aws_auth(self) -> "AWSAuthProvider":
762
+ def validate_aws_auth(self) -> AWSAuthProvider:
453
763
  """Validate AWS authentication configuration."""
454
764
  # At least one auth method must be specified
455
765
  has_keys = self.access_key_id and self.secret_access_key
@@ -477,13 +787,18 @@ class TelemetrySink(StrictBaseModel):
477
787
  id: str = Field(
478
788
  ..., description="Unique ID of the telemetry sink configuration."
479
789
  )
480
- auth: AuthProviderType | str | None = Field(
790
+ provider: Literal["Phoenix", "Langfuse"] = "Phoenix"
791
+ auth: Reference[AuthProviderType] | str | None = Field(
481
792
  default=None,
482
793
  description="AuthorizationProvider used to authenticate telemetry data transmission.",
483
794
  )
484
- endpoint: str = Field(
795
+ endpoint: str | SecretReference = Field(
485
796
  ..., description="URL endpoint where telemetry data will be sent."
486
797
  )
798
+ args: dict[str, Any] = Field(
799
+ default_factory=dict,
800
+ description="Additional configuration arguments specific to the telemetry sink type.",
801
+ )
487
802
 
488
803
 
489
804
  #
@@ -508,48 +823,53 @@ class Application(StrictBaseModel):
508
823
  )
509
824
 
510
825
  # Core components
511
- memories: list[Memory] | None = Field(
512
- default=None,
826
+ memories: list[Memory] = Field(
827
+ default_factory=list,
513
828
  description="List of memory definitions used in this application.",
514
829
  )
515
- models: list[ModelType] | None = Field(
516
- default=None, description="List of models used in this application."
830
+ models: list[ModelType] = Field(
831
+ default_factory=list,
832
+ description="List of models used in this application.",
517
833
  )
518
- types: list[CustomType] | None = Field(
519
- default=None,
834
+ types: list[CustomType] = Field(
835
+ default_factory=list,
520
836
  description="List of custom types defined in this application.",
521
837
  )
522
- variables: list[Variable] | None = Field(
523
- default=None, description="List of variables used in this application."
524
- )
525
838
 
526
839
  # Orchestration
527
- flows: list[Flow] | None = Field(
528
- default=None, description="List of flows defined in this application."
840
+ flows: list[Flow] = Field(
841
+ default_factory=list,
842
+ description="List of flows defined in this application.",
529
843
  )
530
844
 
531
845
  # External integrations
532
- auths: list[AuthProviderType] | None = Field(
533
- default=None,
846
+ auths: list[AuthProviderType] = Field(
847
+ default_factory=list,
534
848
  description="List of authorization providers used for API access.",
535
849
  )
536
- tools: list[ToolType] | None = Field(
537
- default=None,
850
+ tools: list[ToolType] = Field(
851
+ default_factory=list,
538
852
  description="List of tools available in this application.",
539
853
  )
540
- indexes: list[IndexType] | None = Field(
541
- default=None,
854
+ indexes: list[IndexType] = Field(
855
+ default_factory=list,
542
856
  description="List of indexes available for search operations.",
543
857
  )
544
858
 
859
+ # Secret management
860
+ secret_manager: SecretManagerType | None = Field(
861
+ default=None,
862
+ description="Optional secret manager configuration for the application.",
863
+ )
864
+
545
865
  # Observability
546
866
  telemetry: TelemetrySink | None = Field(
547
867
  default=None, description="Optional telemetry sink for observability."
548
868
  )
549
869
 
550
870
  # Extensibility
551
- references: list[Document] | None = Field(
552
- default=None,
871
+ references: list[Document] = Field(
872
+ default_factory=list,
553
873
  description="List of other q-type documents you may use. This allows modular composition and reuse of components across applications.",
554
874
  )
555
875
 
@@ -559,6 +879,14 @@ class Application(StrictBaseModel):
559
879
  #
560
880
 
561
881
 
882
+ class ConstantPath(StrictBaseModel):
883
+ uri: str = Field(..., description="A constant Fsspec URI.")
884
+
885
+
886
+ # Let's the user use a constant path or reference a variable
887
+ PathType = ConstantPath | Reference[Variable] | str
888
+
889
+
562
890
  class Source(Step):
563
891
  """Base class for data sources"""
564
892
 
@@ -572,37 +900,64 @@ class Source(Step):
572
900
  class SQLSource(Source):
573
901
  """SQL database source that executes queries and emits rows."""
574
902
 
903
+ type: Literal["SQLSource"] = "SQLSource"
575
904
  query: str = Field(
576
905
  ..., description="SQL query to execute. Inputs are injected as params."
577
906
  )
578
- connection: str = Field(
907
+ connection: str | SecretReference = Field(
579
908
  ...,
580
909
  description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
581
910
  )
582
- auth: AuthProviderType | str | None = Field(
911
+ auth: Reference[AuthProviderType] | str | None = Field(
583
912
  default=None,
584
913
  description="Optional AuthorizationProvider for database authentication.",
585
914
  )
586
915
 
587
- @model_validator(mode="after")
588
- def validate_sql_source(self) -> "SQLSource":
589
- """Validate SQL source configuration."""
590
- if self.outputs is None:
591
- raise ValueError(
592
- "SQLSource must define output variables that match the result columns."
593
- )
594
- return self
595
916
 
917
+ class FileSource(Source):
918
+ """File source that reads data from a file using fsspec-compatible URIs."""
596
919
 
597
- class Sink(Step):
598
- """Base class for data sinks"""
920
+ type: Literal["FileSource"] = "FileSource"
921
+ path: PathType = Field(
922
+ default=...,
923
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
924
+ )
599
925
 
600
- id: str = Field(..., description="Unique ID of the data sink.")
601
- # Remove cardinality field - it's always one for sinks
602
- # ...existing code...
603
- cardinality: Literal[StepCardinality.one] = Field(
604
- default=StepCardinality.one,
605
- description="Flows always emit exactly one instance of the outputs.",
926
+
927
+ class Writer(Step, BatchableStepMixin):
928
+ """Base class for things that write data in batches."""
929
+
930
+ id: str = Field(..., description="Unique ID of the data writer.")
931
+
932
+
933
+ class FileWriter(Writer, BatchableStepMixin):
934
+ """File writer that writes data to a file using fsspec-compatible URIs."""
935
+
936
+ type: Literal["FileWriter"] = "FileWriter"
937
+ path: PathType = Field(
938
+ default=...,
939
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
940
+ )
941
+ batch_config: BatchConfig = Field(
942
+ default_factory=partial(BatchConfig, batch_size=sys.maxsize),
943
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
944
+ )
945
+
946
+
947
+ class Aggregate(Step):
948
+ """
949
+ A terminal step that consumes an entire input stream and produces a single
950
+ summary message with success/error counts.
951
+ """
952
+
953
+ type: Literal["Aggregate"] = "Aggregate"
954
+ cardinality: Literal[StepCardinality.one] = StepCardinality.one
955
+
956
+ # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
957
+ # The order will be: success_count, error_count, total_count
958
+ outputs: list[Reference[Variable] | str] = Field(
959
+ default_factory=list,
960
+ description="References to the variables for the output. There should be one and only one output with type AggregateStats",
606
961
  )
607
962
 
608
963
 
@@ -611,23 +966,95 @@ class Sink(Step):
611
966
  #
612
967
 
613
968
 
969
+ class DocumentSource(Source):
970
+ """A source of documents that will be used in retrieval augmented generation.
971
+ It uses LlamaIndex readers to load one or more raw Documents
972
+ from a specified path or system (e.g., Google Drive, web page).
973
+ See https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers
974
+ """
975
+
976
+ type: Literal["DocumentSource"] = "DocumentSource"
977
+ reader_module: str = Field(
978
+ ...,
979
+ description="Module path of the LlamaIndex Reader).",
980
+ )
981
+ args: dict[str, Any] = Field(
982
+ default_factory=dict,
983
+ description="Reader-specific arguments to pass to the Reader constructor.",
984
+ )
985
+ loader_args: dict[str, Any] = Field(
986
+ default_factory=dict,
987
+ description="Loader-specific arguments to pass to the load_data method.",
988
+ )
989
+ auth: Reference[AuthProviderType] | str | None = Field(
990
+ default=None,
991
+ description="AuthorizationProvider for accessing the source.",
992
+ )
993
+
994
+
995
+ class DocToTextConverter(Step, ConcurrentStepMixin):
996
+ """Defines a step to convert raw documents (e.g., PDF, DOCX) loaded by a DocumentSource into plain text
997
+ using an external tool like Docling or LlamaParse for pre-processing before chunking.
998
+ The input and output are both RAGDocument, but the output after processing with have content of type markdown.
999
+ """
1000
+
1001
+ type: Literal["DocToTextConverter"] = "DocToTextConverter"
1002
+
1003
+
1004
+ class DocumentSplitter(Step, ConcurrentStepMixin):
1005
+ """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
1006
+
1007
+ type: Literal["DocumentSplitter"] = "DocumentSplitter"
1008
+ cardinality: Literal[StepCardinality.many] = Field(
1009
+ default=StepCardinality.many,
1010
+ description="Consumes one document and emits 0...N nodes/chunks.",
1011
+ )
1012
+
1013
+ splitter_name: str = Field(
1014
+ default="SentenceSplitter",
1015
+ description="Name of the LlamaIndex TextSplitter class.",
1016
+ )
1017
+ chunk_size: int = Field(default=1024, description="Size of each chunk.")
1018
+ chunk_overlap: int = Field(
1019
+ default=20, description="Overlap between consecutive chunks."
1020
+ )
1021
+ args: dict[str, Any] = Field(
1022
+ default_factory=dict,
1023
+ description="Additional arguments specific to the chosen splitter class.",
1024
+ )
1025
+
1026
+
1027
+ class DocumentEmbedder(Step, ConcurrentStepMixin):
1028
+ """Embeds document chunks using a specified embedding model."""
1029
+
1030
+ type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
1031
+ cardinality: Literal[StepCardinality.many] = Field(
1032
+ default=StepCardinality.many,
1033
+ description="Consumes one chunk and emits one embedded chunk.",
1034
+ )
1035
+ model: Reference[EmbeddingModel] | str = Field(
1036
+ ..., description="Embedding model to use for vectorization."
1037
+ )
1038
+
1039
+
614
1040
  class Index(StrictBaseModel, ABC):
615
1041
  """Base class for searchable indexes that can be queried by search steps."""
616
1042
 
617
1043
  id: str = Field(..., description="Unique ID of the index.")
618
- args: dict[str, Any] | None = Field(
619
- default=None,
1044
+ args: dict[str, Any] = Field(
1045
+ default_factory=dict,
620
1046
  description="Index-specific configuration and connection parameters.",
621
1047
  )
622
- auth: AuthProviderType | str | None = Field(
1048
+ auth: Reference[AuthProviderType] | str | None = Field(
623
1049
  default=None,
624
1050
  description="AuthorizationProvider for accessing the index.",
625
1051
  )
626
1052
  name: str = Field(..., description="Name of the index/collection/table.")
627
1053
 
628
1054
 
629
- class IndexUpsert(Sink):
630
- index: IndexType | str = Field(
1055
+ class IndexUpsert(Writer):
1056
+ type: Literal["IndexUpsert"] = "IndexUpsert"
1057
+ index: Reference[IndexType] | str = Field(
631
1058
  ..., description="Index to upsert into (object or ID reference)."
632
1059
  )
633
1060
 
@@ -635,7 +1062,12 @@ class IndexUpsert(Sink):
635
1062
  class VectorIndex(Index):
636
1063
  """Vector database index for similarity search using embeddings."""
637
1064
 
638
- embedding_model: EmbeddingModel | str = Field(
1065
+ type: Literal["VectorIndex"] = "VectorIndex"
1066
+ module: str = Field(
1067
+ ...,
1068
+ description="Python module path for the vector store implementation (e.g., 'llama_index.vector_stores.qdrant.QdrantVectorStore').",
1069
+ )
1070
+ embedding_model: Reference[EmbeddingModel] | str = Field(
639
1071
  ...,
640
1072
  description="Embedding model used to vectorize queries and documents.",
641
1073
  )
@@ -644,102 +1076,167 @@ class VectorIndex(Index):
644
1076
  class DocumentIndex(Index):
645
1077
  """Document search index for text-based search (e.g., Elasticsearch, OpenSearch)."""
646
1078
 
647
- # TODO: add anything that is needed for document search indexes
648
- pass
1079
+ type: Literal["DocumentIndex"] = "DocumentIndex"
1080
+ endpoint: str = Field(
1081
+ ...,
1082
+ description="URL endpoint for the search cluster (e.g., https://my-cluster.es.amazonaws.com).",
1083
+ )
1084
+ id_field: str | None = Field(
1085
+ default=None,
1086
+ description=(
1087
+ "Field name to use as document ID. "
1088
+ "If not specified, auto-detects from: _id, id, doc_id, document_id, or uuid. "
1089
+ "If all are missing, a UUID is generated."
1090
+ ),
1091
+ )
649
1092
 
650
1093
 
651
1094
  class Search(Step, ABC):
652
1095
  """Base class for search operations against indexes."""
653
1096
 
654
- filters: dict[str, Any] | None = Field(
655
- default=None, description="Optional filters to apply during search."
1097
+ filters: dict[str, Any] = Field(
1098
+ default_factory=dict,
1099
+ description="Optional filters to apply during search.",
656
1100
  )
657
- index: IndexType | str = Field(
1101
+ index: Reference[IndexType] | str = Field(
658
1102
  ..., description="Index to search against (object or ID reference)."
659
1103
  )
1104
+ default_top_k: int | None = Field(
1105
+ default=10,
1106
+ description="Number of top results to retrieve if not provided in the inputs.",
1107
+ )
660
1108
 
661
1109
 
662
- class VectorSearch(Search):
1110
+ class VectorSearch(Search, BatchableStepMixin):
663
1111
  """Performs vector similarity search against a vector index."""
664
1112
 
665
- default_top_k: int | None = Field(
666
- default=50,
667
- description="Number of top results to retrieve if not provided in the inputs.",
1113
+ type: Literal["VectorSearch"] = "VectorSearch"
1114
+ index: Reference[VectorIndex] | str = Field(
1115
+ ..., description="Index to search against (object or ID reference)."
668
1116
  )
669
1117
 
670
- @model_validator(mode="after")
671
- def set_default_inputs_outputs(self) -> "VectorSearch":
672
- """Set default input and output variables if none provided."""
673
- if self.inputs is None:
674
- self.inputs = [
675
- Variable(id="top_k", type=PrimitiveTypeEnum.int),
676
- Variable(id="query", type=PrimitiveTypeEnum.text),
677
- ]
678
-
679
- if self.outputs is None:
680
- self.outputs = [Variable(id=f"{self.id}.results", type=Embedding)]
681
- return self
682
-
683
1118
 
684
- class DocumentSearch(Search):
1119
+ class DocumentSearch(Search, ConcurrentStepMixin):
685
1120
  """Performs document search against a document index."""
686
1121
 
687
- @model_validator(mode="after")
688
- def set_default_inputs_outputs(self) -> "DocumentSearch":
689
- """Set default input and output variables if none provided."""
690
- if self.inputs is None:
691
- self.inputs = [Variable(id="query", type=PrimitiveTypeEnum.text)]
692
-
693
- if self.outputs is None:
694
- self.outputs = [
695
- Variable(id=f"{self.id}.results", type=PrimitiveTypeEnum.text)
696
- ]
697
- return self
1122
+ type: Literal["DocumentSearch"] = "DocumentSearch"
1123
+ index: Reference[DocumentIndex] | str = Field(
1124
+ ..., description="Index to search against (object or ID reference)."
1125
+ )
1126
+ query_args: dict[str, Any] = Field(
1127
+ default={
1128
+ "type": "best_fields",
1129
+ "fields": ["*"],
1130
+ },
1131
+ description="The arguments (other than 'query') to specify to the query shape (see https://docs.opensearch.org/latest/query-dsl/full-text/multi-match/).",
1132
+ )
1133
+
1134
+
1135
+ class Reranker(Step):
1136
+ """Reranks a list of documents based on relevance to a query using an LLM."""
1137
+
1138
+ type: Literal["Reranker"] = "Reranker"
1139
+
1140
+
1141
+ # TODO: create a reranker that supports llamaindex rerankers...
1142
+
1143
+
1144
+ class BedrockReranker(Reranker, ConcurrentStepMixin):
1145
+ """Reranks documents using an AWS Bedrock model."""
1146
+
1147
+ type: Literal["BedrockReranker"] = "BedrockReranker"
1148
+ auth: Reference[AWSAuthProvider] | str | None = Field(
1149
+ default=None,
1150
+ description="AWS authorization provider for Bedrock access.",
1151
+ )
1152
+ model_id: str = Field(
1153
+ ...,
1154
+ description="Bedrock model ID to use for reranking. See https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html",
1155
+ )
1156
+ num_results: int | None = Field(
1157
+ default=None,
1158
+ description="Return this many results.",
1159
+ )
698
1160
 
699
1161
 
700
1162
  # Create a union type for all tool types
701
- ToolType = Union[
702
- APITool,
703
- PythonFunctionTool,
1163
+ ToolType = Annotated[
1164
+ Union[
1165
+ APITool,
1166
+ PythonFunctionTool,
1167
+ ],
1168
+ Field(discriminator="type"),
704
1169
  ]
705
1170
 
706
1171
  # Create a union type for all source types
707
- SourceType = Union[SQLSource,]
1172
+ SourceType = Union[
1173
+ DocumentSource,
1174
+ FileSource,
1175
+ SQLSource,
1176
+ ]
708
1177
 
709
1178
  # Create a union type for all authorization provider types
710
1179
  AuthProviderType = Union[
711
1180
  APIKeyAuthProvider,
1181
+ BearerTokenAuthProvider,
712
1182
  AWSAuthProvider,
713
1183
  OAuth2AuthProvider,
1184
+ VertexAuthProvider,
1185
+ ]
1186
+
1187
+ # Create a union type for all secret manager types
1188
+ SecretManagerType = Annotated[
1189
+ Union[
1190
+ AWSSecretManager
1191
+ # Add future managers like KubernetesSecretManager here
1192
+ ],
1193
+ Field(discriminator="type"),
714
1194
  ]
715
1195
 
716
1196
  # Create a union type for all step types
717
- StepType = Union[
718
- Agent,
719
- APITool,
720
- Condition,
721
- Decoder,
722
- DocumentSearch,
723
- Flow,
724
- IndexUpsert,
725
- LLMInference,
726
- PromptTemplate,
727
- PythonFunctionTool,
728
- SQLSource,
729
- Sink,
730
- VectorSearch,
1197
+ StepType = Annotated[
1198
+ Union[
1199
+ Agent,
1200
+ Aggregate,
1201
+ BedrockReranker,
1202
+ Decoder,
1203
+ DocToTextConverter,
1204
+ DocumentEmbedder,
1205
+ DocumentSearch,
1206
+ DocumentSplitter,
1207
+ DocumentSource,
1208
+ Echo,
1209
+ FieldExtractor,
1210
+ FileSource,
1211
+ FileWriter,
1212
+ IndexUpsert,
1213
+ InvokeEmbedding,
1214
+ InvokeFlow,
1215
+ InvokeTool,
1216
+ LLMInference,
1217
+ PromptTemplate,
1218
+ SQLSource,
1219
+ VectorSearch,
1220
+ ],
1221
+ Field(discriminator="type"),
731
1222
  ]
732
1223
 
733
1224
  # Create a union type for all index types
734
- IndexType = Union[
735
- DocumentIndex,
736
- VectorIndex,
1225
+ IndexType = Annotated[
1226
+ Union[
1227
+ DocumentIndex,
1228
+ VectorIndex,
1229
+ ],
1230
+ Field(discriminator="type"),
737
1231
  ]
738
1232
 
739
1233
  # Create a union type for all model types
740
- ModelType = Union[
741
- EmbeddingModel,
742
- Model,
1234
+ ModelType = Annotated[
1235
+ Union[
1236
+ EmbeddingModel,
1237
+ Model,
1238
+ ],
1239
+ Field(discriminator="type"),
743
1240
  ]
744
1241
 
745
1242
  #
@@ -754,12 +1251,6 @@ class AuthorizationProviderList(RootModel[list[AuthProviderType]]):
754
1251
  root: list[AuthProviderType]
755
1252
 
756
1253
 
757
- class IndexList(RootModel[list[IndexType]]):
758
- """Schema for a standalone list of indexes."""
759
-
760
- root: list[IndexType]
761
-
762
-
763
1254
  class ModelList(RootModel[list[ModelType]]):
764
1255
  """Schema for a standalone list of models."""
765
1256
 
@@ -785,11 +1276,8 @@ class VariableList(RootModel[list[Variable]]):
785
1276
 
786
1277
 
787
1278
  DocumentType = Union[
788
- Agent,
789
1279
  Application,
790
1280
  AuthorizationProviderList,
791
- Flow,
792
- IndexList,
793
1281
  ModelList,
794
1282
  ToolList,
795
1283
  TypeList,