qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +94 -73
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +4 -0
  9. qtype/commands/convert.py +20 -8
  10. qtype/commands/generate.py +19 -27
  11. qtype/commands/run.py +73 -36
  12. qtype/commands/serve.py +74 -54
  13. qtype/commands/validate.py +34 -8
  14. qtype/commands/visualize.py +46 -22
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +65 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +612 -363
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +57 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +74 -0
  30. qtype/interpreter/base/factory.py +117 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +462 -22
  36. qtype/interpreter/converters.py +77 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/decoder_executor.py +163 -0
  41. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  42. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  43. qtype/interpreter/executors/document_search_executor.py +122 -0
  44. qtype/interpreter/executors/document_source_executor.py +118 -0
  45. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  46. qtype/interpreter/executors/echo_executor.py +63 -0
  47. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  48. qtype/interpreter/executors/file_source_executor.py +101 -0
  49. qtype/interpreter/executors/file_writer_executor.py +110 -0
  50. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  51. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  52. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  53. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  54. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  55. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  56. qtype/interpreter/executors/sql_source_executor.py +106 -0
  57. qtype/interpreter/executors/vector_search_executor.py +91 -0
  58. qtype/interpreter/flow.py +159 -22
  59. qtype/interpreter/metadata_api.py +115 -0
  60. qtype/interpreter/resource_cache.py +5 -4
  61. qtype/interpreter/rich_progress.py +225 -0
  62. qtype/interpreter/stream/chat/__init__.py +15 -0
  63. qtype/interpreter/stream/chat/converter.py +391 -0
  64. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  65. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  66. qtype/interpreter/stream/chat/vercel.py +609 -0
  67. qtype/interpreter/stream/utils/__init__.py +15 -0
  68. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  69. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  70. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  71. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  72. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  73. qtype/interpreter/telemetry.py +135 -8
  74. qtype/interpreter/tools/__init__.py +5 -0
  75. qtype/interpreter/tools/function_tool_helper.py +265 -0
  76. qtype/interpreter/types.py +330 -0
  77. qtype/interpreter/typing.py +83 -89
  78. qtype/interpreter/ui/404/index.html +1 -1
  79. qtype/interpreter/ui/404.html +1 -1
  80. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  81. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  82. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  83. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  84. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  85. qtype/interpreter/ui/icon.png +0 -0
  86. qtype/interpreter/ui/index.html +1 -1
  87. qtype/interpreter/ui/index.txt +4 -4
  88. qtype/semantic/checker.py +583 -0
  89. qtype/semantic/generate.py +262 -83
  90. qtype/semantic/loader.py +95 -0
  91. qtype/semantic/model.py +436 -159
  92. qtype/semantic/resolver.py +63 -19
  93. qtype/semantic/visualize.py +28 -31
  94. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
  95. qtype-0.1.1.dist-info/RECORD +135 -0
  96. qtype/dsl/base_types.py +0 -38
  97. qtype/dsl/validator.py +0 -465
  98. qtype/interpreter/batch/__init__.py +0 -0
  99. qtype/interpreter/batch/file_sink_source.py +0 -162
  100. qtype/interpreter/batch/flow.py +0 -95
  101. qtype/interpreter/batch/sql_source.py +0 -92
  102. qtype/interpreter/batch/step.py +0 -74
  103. qtype/interpreter/batch/types.py +0 -41
  104. qtype/interpreter/batch/utils.py +0 -178
  105. qtype/interpreter/chat/chat_api.py +0 -237
  106. qtype/interpreter/chat/vercel.py +0 -314
  107. qtype/interpreter/exceptions.py +0 -10
  108. qtype/interpreter/step.py +0 -67
  109. qtype/interpreter/steps/__init__.py +0 -0
  110. qtype/interpreter/steps/agent.py +0 -114
  111. qtype/interpreter/steps/condition.py +0 -36
  112. qtype/interpreter/steps/decoder.py +0 -88
  113. qtype/interpreter/steps/llm_inference.py +0 -171
  114. qtype/interpreter/steps/prompt_template.py +0 -54
  115. qtype/interpreter/steps/search.py +0 -24
  116. qtype/interpreter/steps/tool.py +0 -219
  117. qtype/interpreter/streaming_helpers.py +0 -123
  118. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  119. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  120. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  121. qtype/interpreter/ui/favicon.ico +0 -0
  122. qtype/loader.py +0 -390
  123. qtype-0.0.16.dist-info/RECORD +0 -106
  124. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  125. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
  126. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
  127. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
  128. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
qtype/dsl/model.py CHANGED
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
+ import sys
4
5
  from abc import ABC
5
6
  from enum import Enum
6
- from typing import Any, Literal, Type, Union
7
+ from functools import partial
8
+ from typing import Annotated, Any, Literal, Type, Union
7
9
 
8
10
  from pydantic import (
9
11
  BaseModel,
@@ -14,20 +16,23 @@ from pydantic import (
14
16
  )
15
17
 
16
18
  import qtype.dsl.domain_types as domain_types
17
- from qtype.dsl.base_types import (
19
+ from qtype.base.types import (
20
+ BatchableStepMixin,
21
+ BatchConfig,
22
+ CachedStepMixin,
23
+ ConcurrentStepMixin,
18
24
  PrimitiveTypeEnum,
25
+ Reference,
19
26
  StepCardinality,
20
27
  StrictBaseModel,
21
28
  )
22
- from qtype.dsl.domain_types import ChatContent, ChatMessage, Embedding
23
-
24
-
25
- class StructuralTypeEnum(str, Enum):
26
- """Represents a structured type that can be used in the DSL."""
27
-
28
- object = "object"
29
- array = "array"
30
-
29
+ from qtype.dsl.domain_types import (
30
+ ChatContent,
31
+ ChatMessage,
32
+ Embedding,
33
+ RAGChunk,
34
+ RAGDocument,
35
+ )
31
36
 
32
37
  DOMAIN_CLASSES = {
33
38
  name: obj
@@ -36,63 +41,164 @@ DOMAIN_CLASSES = {
36
41
  }
37
42
 
38
43
 
44
+ def _resolve_list_type(
45
+ element_type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
46
+ ) -> ListType:
47
+ """
48
+ Resolve a list element type and return a ListType.
49
+
50
+ Args:
51
+ element_type_str: The element type string (e.g., "text", "ChatMessage")
52
+ custom_type_registry: Registry of custom types
53
+
54
+ Returns:
55
+ ListType with resolved element type
56
+
57
+ Raises:
58
+ ValueError: If element type is invalid for lists
59
+ """
60
+ # Recursively resolve the element type
61
+ element_type = _resolve_variable_type(
62
+ element_type_str, custom_type_registry
63
+ )
64
+
65
+ # Allow both primitive types and custom types (but no nested lists)
66
+ if isinstance(element_type, PrimitiveTypeEnum):
67
+ return ListType(element_type=element_type)
68
+ elif isinstance(element_type, str):
69
+ # This is a custom type reference - store as string for later resolution
70
+ return ListType(element_type=element_type)
71
+ elif element_type in DOMAIN_CLASSES.values():
72
+ # Domain class - store its name as string reference
73
+ for name, cls in DOMAIN_CLASSES.items():
74
+ if cls == element_type:
75
+ return ListType(element_type=name)
76
+ return ListType(element_type=str(element_type))
77
+ else:
78
+ raise ValueError(
79
+ (
80
+ "List element type must be a primitive or custom type "
81
+ f"reference, got: {element_type}"
82
+ )
83
+ )
84
+
85
+
86
+ def _resolve_primitive_type(type_str: str) -> PrimitiveTypeEnum | None:
87
+ """
88
+ Try to resolve a string as a primitive type.
89
+
90
+ Args:
91
+ type_str: The type string to resolve
92
+
93
+ Returns:
94
+ PrimitiveTypeEnum if it matches, None otherwise
95
+ """
96
+ try:
97
+ return PrimitiveTypeEnum(type_str)
98
+ except ValueError:
99
+ return None
100
+
101
+
102
+ def _resolve_domain_type(type_str: str) -> Type[BaseModel] | None:
103
+ """
104
+ Try to resolve a string as a built-in domain entity class.
105
+
106
+ Args:
107
+ type_str: The type string to resolve
108
+
109
+ Returns:
110
+ Domain class if found, None otherwise
111
+ """
112
+ return DOMAIN_CLASSES.get(type_str)
113
+
114
+
115
+ def _resolve_custom_type(
116
+ type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
117
+ ) -> Type[BaseModel] | None:
118
+ """
119
+ Try to resolve a string as a custom type from the registry.
120
+
121
+ Args:
122
+ type_str: The type string to resolve
123
+ custom_type_registry: Registry of custom types
124
+
125
+ Returns:
126
+ Custom type class if found, None otherwise
127
+ """
128
+ return custom_type_registry.get(type_str)
129
+
130
+
39
131
  def _resolve_variable_type(
40
132
  parsed_type: Any, custom_type_registry: dict[str, Type[BaseModel]]
41
133
  ) -> Any:
42
- """Resolve a type string to its corresponding PrimitiveTypeEnum or return as is."""
134
+ """
135
+ Resolve a type to its corresponding representation.
136
+
137
+ Handles primitive types, list types, domain types, and custom types.
138
+
139
+ Args:
140
+ parsed_type: The type to resolve (can be string or already resolved)
141
+ custom_type_registry: Registry of dynamically created custom types
142
+
143
+ Returns:
144
+ Resolved type (PrimitiveTypeEnum, ListType, domain class, or string)
145
+ """
43
146
  # If the type is already resolved or is a structured definition, pass it through.
44
147
  if not isinstance(parsed_type, str):
45
148
  return parsed_type
46
149
 
47
- # --- Case 1: The type is a string ---
48
150
  # Check if it's a list type (e.g., "list[text]")
49
151
  if parsed_type.startswith("list[") and parsed_type.endswith("]"):
50
- # Extract the element type from "list[element_type]"
51
152
  element_type_str = parsed_type[5:-1] # Remove "list[" and "]"
153
+ return _resolve_list_type(element_type_str, custom_type_registry)
52
154
 
53
- # Recursively resolve the element type
54
- element_type = _resolve_variable_type(
55
- element_type_str, custom_type_registry
56
- )
155
+ # Try to resolve as primitive type
156
+ primitive = _resolve_primitive_type(parsed_type)
157
+ if primitive is not None:
158
+ return primitive
57
159
 
58
- # Allow both primitive types and custom types (but no nested lists)
59
- if isinstance(element_type, PrimitiveTypeEnum):
60
- return ListType(element_type=element_type)
61
- elif isinstance(element_type, str):
62
- # This is a custom type reference - store as string for later resolution
63
- return ListType(element_type=element_type)
64
- elif element_type in DOMAIN_CLASSES.values():
65
- # Domain class - store its name as string reference
66
- for name, cls in DOMAIN_CLASSES.items():
67
- if cls == element_type:
68
- return ListType(element_type=name)
69
- return ListType(element_type=str(element_type))
70
- else:
71
- raise ValueError(
72
- f"List element type must be a primitive type or custom type reference, got: {element_type}"
73
- )
160
+ # Try to resolve as built-in domain entity class
161
+ domain = _resolve_domain_type(parsed_type)
162
+ if domain is not None:
163
+ return domain
74
164
 
75
- # Try to resolve it as a primitive type first.
76
- try:
77
- return PrimitiveTypeEnum(parsed_type)
78
- except ValueError:
79
- pass # Not a primitive, continue to the next check.
165
+ # Try to resolve as custom type
166
+ custom = _resolve_custom_type(parsed_type, custom_type_registry)
167
+ if custom is not None:
168
+ return custom
80
169
 
81
- # Try to resolve it as a built-in Domain Entity class.
82
- # (Assuming domain_types and inspect are defined elsewhere)
83
- if parsed_type in DOMAIN_CLASSES:
84
- return DOMAIN_CLASSES[parsed_type]
170
+ # If it's not any known type, return it as a string.
171
+ # This assumes it might be a forward reference to a custom type.
172
+ return parsed_type
85
173
 
86
- # Check the registry of dynamically created custom types
87
- if parsed_type in custom_type_registry:
88
- return custom_type_registry[parsed_type]
89
174
 
90
- # If it's not a primitive or a known domain entity, return it as a string.
91
- # This assumes it might be a reference ID to another custom type.
92
- return parsed_type
175
+ def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
176
+ """
177
+ Shared validator for resolving 'type' fields in models.
93
178
 
179
+ This validator resolves string-based type references using the custom
180
+ type registry from the validation context.
94
181
 
95
- class Variable(BaseModel):
182
+ Args:
183
+ data: The data dict being validated
184
+ info: Pydantic validation info containing context
185
+
186
+ Returns:
187
+ Updated data dict with resolved type field
188
+ """
189
+ if (
190
+ isinstance(data, dict)
191
+ and "type" in data
192
+ and isinstance(data["type"], str)
193
+ ):
194
+ # Get the registry of custom types from the validation context.
195
+ custom_types = (info.context or {}).get("custom_types", {})
196
+ resolved = _resolve_variable_type(data["type"], custom_types)
197
+ data["type"] = resolved
198
+ return data
199
+
200
+
201
+ class Variable(StrictBaseModel):
96
202
  """Schema for a variable that can serve as input, output, or parameter within the DSL."""
97
203
 
98
204
  id: str = Field(
@@ -109,21 +215,24 @@ class Variable(BaseModel):
109
215
  @model_validator(mode="before")
110
216
  @classmethod
111
217
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
112
- """
113
- This validator runs during the main validation pass. It uses the
114
- context to resolve string-based type references.
115
- """
116
- if (
117
- isinstance(data, dict)
118
- and "type" in data
119
- and isinstance(data["type"], str)
120
- ):
121
- # Get the registry of custom types from the validation context.
122
- custom_types = (info.context or {}).get("custom_types", {})
123
- resolved = _resolve_variable_type(data["type"], custom_types)
124
- # {'id': 'user_message', 'type': 'ChatMessage'}
125
- data["type"] = resolved
126
- return data
218
+ """Resolve string-based type references using the shared validator."""
219
+ return _resolve_type_field_validator(data, info)
220
+
221
+
222
+ class SecretReference(StrictBaseModel):
223
+ """
224
+ A reference to a secret in the application's configured SecretManager.
225
+ This value is resolved at runtime by the interpreter.
226
+ """
227
+
228
+ secret_name: str = Field(
229
+ ...,
230
+ description="The name, ID, or ARN of the secret to fetch (e.g., 'my-project/db-password').",
231
+ )
232
+ key: str | None = Field(
233
+ default=None,
234
+ description="Optional key if the secret is a JSON blob or map (e.g., a specific key in a K8s secret).",
235
+ )
127
236
 
128
237
 
129
238
  class CustomType(StrictBaseModel):
@@ -145,20 +254,8 @@ class ToolParameter(BaseModel):
145
254
  @model_validator(mode="before")
146
255
  @classmethod
147
256
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
148
- """
149
- This validator runs during the main validation pass. It uses the
150
- context to resolve string-based type references.
151
- """
152
- if (
153
- isinstance(data, dict)
154
- and "type" in data
155
- and isinstance(data["type"], str)
156
- ):
157
- # Get the registry of custom types from the validation context.
158
- custom_types = (info.context or {}).get("custom_types", {})
159
- resolved = _resolve_variable_type(data["type"], custom_types)
160
- data["type"] = resolved
161
- return data
257
+ """Resolve string-based type references using the shared validator."""
258
+ return _resolve_type_field_validator(data, info)
162
259
 
163
260
 
164
261
  class ListType(BaseModel):
@@ -183,6 +280,8 @@ VariableType = (
183
280
  | Type[ChatMessage]
184
281
  | Type[ChatContent]
185
282
  | Type[BaseModel]
283
+ | Type[RAGDocument]
284
+ | Type[RAGChunk]
186
285
  | ListType
187
286
  )
188
287
 
@@ -190,28 +289,31 @@ VariableType = (
190
289
  class Model(StrictBaseModel):
191
290
  """Describes a generative model configuration, including provider and model ID."""
192
291
 
292
+ type: Literal["Model"] = "Model"
193
293
  id: str = Field(..., description="Unique ID for the model.")
194
- auth: AuthProviderType | str | None = Field(
294
+ auth: Reference[AuthProviderType] | str | None = Field(
195
295
  default=None,
196
296
  description="AuthorizationProvider used for model access.",
197
297
  )
198
- inference_params: dict[str, Any] | None = Field(
199
- default=None,
298
+ inference_params: dict[str, Any] = Field(
299
+ default_factory=dict,
200
300
  description="Optional inference parameters like temperature or max_tokens.",
201
301
  )
202
302
  model_id: str | None = Field(
203
303
  default=None,
204
304
  description="The specific model name or ID for the provider. If None, id is used",
205
305
  )
206
- # TODO(maybe): Make this an enum?
207
- provider: str = Field(
208
- ..., description="Name of the provider, e.g., openai or anthropic."
306
+ provider: Literal["openai", "anthropic", "aws-bedrock", "gcp-vertex"] = (
307
+ Field(
308
+ ..., description="Name of the provider, e.g., openai or anthropic."
309
+ )
209
310
  )
210
311
 
211
312
 
212
313
  class EmbeddingModel(Model):
213
314
  """Describes an embedding model configuration, extending the base Model class."""
214
315
 
316
+ type: Literal["EmbeddingModel"] = "EmbeddingModel"
215
317
  dimensions: int = Field(
216
318
  ...,
217
319
  description="Dimensionality of the embedding vectors produced by this model.",
@@ -243,20 +345,22 @@ class Memory(StrictBaseModel):
243
345
  #
244
346
 
245
347
 
246
- class Step(StrictBaseModel, ABC):
348
+ class Step(CachedStepMixin, StrictBaseModel, ABC):
247
349
  """Base class for components that take inputs and produce outputs."""
248
350
 
249
351
  id: str = Field(..., description="Unique ID of this component.")
352
+ type: str = Field(..., description="Type of the step component.")
250
353
  cardinality: StepCardinality = Field(
251
354
  default=StepCardinality.one,
252
355
  description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
253
356
  )
254
- inputs: list[Variable | str] | None = Field(
255
- default=None,
256
- description="Input variables required by this step.",
357
+ inputs: list[Reference[Variable] | str] = Field(
358
+ default_factory=list,
359
+ description="References to the variables required by this step.",
257
360
  )
258
- outputs: list[Variable | str] | None = Field(
259
- default=None, description="Variable where output is stored."
361
+ outputs: list[Reference[Variable] | str] = Field(
362
+ default_factory=list,
363
+ description="References to the variables where output is stored.",
260
364
  )
261
365
 
262
366
 
@@ -264,50 +368,12 @@ class PromptTemplate(Step):
264
368
  """Defines a prompt template with a string format and variable bindings.
265
369
  This is used to generate prompts dynamically based on input variables."""
266
370
 
371
+ type: Literal["PromptTemplate"] = "PromptTemplate" # type: ignore
267
372
  template: str = Field(
268
373
  ...,
269
374
  description="String template for the prompt with variable placeholders.",
270
375
  )
271
376
 
272
- @model_validator(mode="after")
273
- def set_default_outputs(self) -> "PromptTemplate":
274
- """Set default output variable if none provided."""
275
- if self.outputs is None:
276
- self.outputs = [
277
- Variable(id=f"{self.id}.prompt", type=PrimitiveTypeEnum.text)
278
- ]
279
- if len(self.outputs) != 1:
280
- raise ValueError(
281
- "PromptTemplate steps must have exactly one output variable -- the result of applying the template."
282
- )
283
- return self
284
-
285
-
286
- class Condition(Step):
287
- """Conditional logic gate within a flow. Supports branching logic for execution based on variable values."""
288
-
289
- # TODO: Add support for more complex conditions
290
- else_: StepType | str | None = Field(
291
- default=None,
292
- alias="else",
293
- description="Optional step to run if condition fails.",
294
- )
295
- equals: Variable | str | None = Field(
296
- default=None, description="Match condition for equality check."
297
- )
298
- then: StepType | str = Field(
299
- ..., description="Step to run if condition matches."
300
- )
301
-
302
- @model_validator(mode="after")
303
- def set_default_outputs(self) -> "Condition":
304
- """Set default output variable if none provided."""
305
- if not self.inputs or len(self.inputs) != 1:
306
- raise ValueError(
307
- "Condition steps must have exactly one input variable."
308
- )
309
- return self
310
-
311
377
 
312
378
  class Tool(StrictBaseModel, ABC):
313
379
  """
@@ -319,12 +385,12 @@ class Tool(StrictBaseModel, ABC):
319
385
  description: str = Field(
320
386
  ..., description="Description of what the tool does."
321
387
  )
322
- inputs: dict[str, ToolParameter] | None = Field(
323
- default=None,
388
+ inputs: dict[str, ToolParameter] = Field(
389
+ default_factory=dict,
324
390
  description="Input parameters required by this tool.",
325
391
  )
326
- outputs: dict[str, ToolParameter] | None = Field(
327
- default=None,
392
+ outputs: dict[str, ToolParameter] = Field(
393
+ default_factory=dict,
328
394
  description="Output parameters produced by this tool.",
329
395
  )
330
396
 
@@ -332,6 +398,7 @@ class Tool(StrictBaseModel, ABC):
332
398
  class PythonFunctionTool(Tool):
333
399
  """Tool that calls a Python function."""
334
400
 
401
+ type: Literal["PythonFunctionTool"] = "PythonFunctionTool"
335
402
  function_name: str = Field(
336
403
  ..., description="Name of the Python function to call."
337
404
  )
@@ -344,34 +411,36 @@ class PythonFunctionTool(Tool):
344
411
  class APITool(Tool):
345
412
  """Tool that invokes an API endpoint."""
346
413
 
414
+ type: Literal["APITool"] = "APITool"
347
415
  endpoint: str = Field(..., description="API endpoint URL to call.")
348
416
  method: str = Field(
349
417
  default="GET",
350
418
  description="HTTP method to use (GET, POST, PUT, DELETE, etc.).",
351
419
  )
352
- auth: AuthProviderType | str | None = Field(
420
+ auth: Reference[AuthProviderType] | str | None = Field(
353
421
  default=None,
354
422
  description="Optional AuthorizationProvider for API authentication.",
355
423
  )
356
- headers: dict[str, str] | None = Field(
357
- default=None,
424
+ headers: dict[str, str] = Field(
425
+ default_factory=dict,
358
426
  description="Optional HTTP headers to include in the request.",
359
427
  )
360
- parameters: dict[str, ToolParameter] | None = Field(
361
- default=None,
428
+ parameters: dict[str, ToolParameter] = Field(
429
+ default_factory=dict,
362
430
  description="Output parameters produced by this tool.",
363
431
  )
364
432
 
365
433
 
366
- class LLMInference(Step):
434
+ class LLMInference(Step, ConcurrentStepMixin):
367
435
  """Defines a step that performs inference using a language model.
368
436
  It can take input variables and produce output variables based on the model's response."""
369
437
 
370
- memory: Memory | str | None = Field(
438
+ type: Literal["LLMInference"] = "LLMInference"
439
+ memory: Reference[Memory] | str | None = Field(
371
440
  default=None,
372
- description="Memory object to retain context across interactions.",
441
+ description="A reference to a Memory object to retain context across interactions.",
373
442
  )
374
- model: ModelType | str = Field(
443
+ model: Reference[Model] | str = Field(
375
444
  ..., description="The model to use for inference."
376
445
  )
377
446
  system_message: str | None = Field(
@@ -379,43 +448,70 @@ class LLMInference(Step):
379
448
  description="Optional system message to set the context for the model.",
380
449
  )
381
450
 
382
- @model_validator(mode="after")
383
- def set_default_outputs(self) -> "LLMInference":
384
- """Set default output variable if none provided."""
385
- if self.outputs is None:
386
- self.outputs = [
387
- Variable(id=f"{self.id}.response", type=PrimitiveTypeEnum.text)
388
- ]
389
- return self
451
+
452
+ class InvokeEmbedding(Step, ConcurrentStepMixin):
453
+ """Defines a step that generates embeddings using an embedding model.
454
+ It takes input variables and produces output variables containing the embeddings."""
455
+
456
+ type: Literal["InvokeEmbedding"] = "InvokeEmbedding"
457
+ model: Reference[EmbeddingModel] | str = Field(
458
+ ..., description="The embedding model to use."
459
+ )
390
460
 
391
461
 
392
462
  class Agent(LLMInference):
393
463
  """Defines an agent that can perform tasks and make decisions based on user input and context."""
394
464
 
395
- tools: list[ToolType | str] = Field(
396
- ..., description="List of tools available to the agent."
465
+ type: Literal["Agent"] = "Agent"
466
+
467
+ tools: list[Reference[ToolType] | str] = Field(
468
+ default_factory=list,
469
+ description="List of tools available to the agent.",
397
470
  )
398
471
 
399
472
 
400
- class Flow(Step):
473
+ class Flow(StrictBaseModel):
401
474
  """Defines a flow of steps that can be executed in sequence or parallel.
402
475
  If input or output variables are not specified, they are inferred from
403
- the first and last step, respectively.
404
- """
476
+ the first and last step, respectively."""
405
477
 
478
+ id: str = Field(..., description="Unique ID of the flow.")
479
+ type: Literal["Flow"] = "Flow"
406
480
  description: str | None = Field(
407
481
  default=None, description="Optional description of the flow."
408
482
  )
483
+ steps: list[StepType | Reference[StepType]] = Field(
484
+ default_factory=list,
485
+ description="List of steps or references to steps",
486
+ )
409
487
 
410
- cardinality: StepCardinality = Field(
411
- default=StepCardinality.auto,
412
- description="The cardinality of the flow, inferred from its steps when set to 'auto'.",
488
+ interface: FlowInterface | None = Field(default=None)
489
+ variables: list[Variable] = Field(
490
+ default_factory=list,
491
+ description="List of variables available at the application scope.",
492
+ )
493
+ inputs: list[Reference[Variable] | str] = Field(
494
+ default_factory=list,
495
+ description="Input variables required by this step.",
413
496
  )
497
+ outputs: list[Reference[Variable] | str] = Field(
498
+ default_factory=list, description="Resulting variables"
499
+ )
500
+
501
+
502
+ class FlowInterface(StrictBaseModel):
503
+ """
504
+ Defines the public-facing contract for a Flow, guiding the UI
505
+ and session management.
506
+ """
414
507
 
415
- mode: Literal["Complete", "Chat"] = "Complete"
508
+ # 1. Tells the UI how to render this flow
509
+ type: Literal["Complete", "Conversational"] = "Complete"
416
510
 
417
- steps: list[StepType | str] = Field(
418
- default_factory=list, description="List of steps or step IDs."
511
+ # 2. Declares which inputs are "sticky" and persisted in the session
512
+ session_inputs: list[Reference[Variable] | str] = Field(
513
+ default_factory=list,
514
+ description="A list of input variable IDs that are set once and then persisted across a session.",
419
515
  )
420
516
 
421
517
 
@@ -430,53 +526,117 @@ class Decoder(Step):
430
526
  """Defines a step that decodes string data into structured outputs.
431
527
 
432
528
  If parsing fails, the step will raise an error and halt execution.
433
- Use conditional logic in your flow to handle potential parsing errors.
434
- """
529
+ Use conditional logic in your flow to handle potential parsing errors."""
530
+
531
+ type: Literal["Decoder"] = "Decoder"
435
532
 
436
533
  format: DecoderFormat = Field(
437
534
  DecoderFormat.json,
438
535
  description="Format in which the decoder processes data. Defaults to JSON.",
439
536
  )
440
537
 
441
- @model_validator(mode="after")
442
- def set_default_outputs(self) -> "Decoder":
443
- """Set default output variable if none provided."""
444
-
445
- if (
446
- self.inputs is None
447
- or len(self.inputs) != 1
448
- or (
449
- isinstance(self.inputs[0], Variable)
450
- and self.inputs[0].type != PrimitiveTypeEnum.text
451
- )
452
- ):
453
- raise ValueError(
454
- f"Decoder steps must have exactly one input variable of type 'text'. Found: {self.inputs}"
455
- )
456
- if self.outputs is None:
457
- raise ValueError(
458
- "Decoder steps must have at least one output variable defined."
459
- )
460
- return self
461
538
 
539
+ class Echo(Step):
540
+ """Defines a step that echoes its inputs as outputs.
541
+
542
+ Useful for debugging flows by inspecting variable values at a specific
543
+ point in the execution pipeline. The step simply passes through all input
544
+ variables as outputs without modification.
545
+ """
462
546
 
463
- class Invoke(Step):
547
+ type: Literal["Echo"] = "Echo"
548
+
549
+
550
+ class FieldExtractor(Step):
551
+ """Extracts specific fields from input data using JSONPath expressions.
552
+
553
+ This step uses JSONPath syntax to extract data from structured inputs
554
+ (Pydantic models, dicts, lists). The input is first converted to a dict
555
+ using model_dump() if it's a Pydantic model, then the JSONPath expression
556
+ is evaluated.
557
+
558
+ If the JSONPath matches multiple values, the step yields multiple output
559
+ messages (1-to-many cardinality). If it matches a single value, it yields
560
+ one output message. If it matches nothing, it raises an error.
561
+
562
+ The extracted data is used to construct the output variable by passing it
563
+ as keyword arguments to the output type's constructor.
564
+
565
+ Example JSONPath expressions:
566
+ - `$.field_name` - Extract a single field
567
+ - `$.items[*]` - Extract all items from a list
568
+ - `$.items[?(@.price > 10)]` - Filter items by condition
569
+ """
570
+
571
+ type: Literal["FieldExtractor"] = "FieldExtractor"
572
+ json_path: str = Field(
573
+ ...,
574
+ description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
575
+ )
576
+
577
+
578
+ class InvokeTool(Step, ConcurrentStepMixin):
464
579
  """Invokes a tool with input and output bindings."""
465
580
 
466
- tool: ToolType | str = Field(
581
+ type: Literal["InvokeTool"] = "InvokeTool"
582
+
583
+ tool: Reference[ToolType] | str = Field(
467
584
  ...,
468
585
  description="Tool to invoke.",
469
586
  )
470
587
  input_bindings: dict[str, str] = Field(
471
588
  ...,
472
- description="Mapping from step input IDs to tool input parameter names.",
589
+ description="Mapping from variable references to tool input parameter names.",
473
590
  )
474
591
  output_bindings: dict[str, str] = Field(
475
592
  ...,
476
- description="Mapping from tool output parameter names to step output IDs.",
593
+ description="Mapping from variable references to tool output parameter names.",
477
594
  )
478
595
 
479
596
 
597
+ class InvokeFlow(Step):
598
+ """Invokes a flow with input and output bindings."""
599
+
600
+ type: Literal["InvokeFlow"] = "InvokeFlow"
601
+
602
+ flow: Reference[Flow] | str = Field(
603
+ ...,
604
+ description="Flow to invoke.",
605
+ )
606
+ input_bindings: dict[Reference[Variable], str] = Field(
607
+ ...,
608
+ description="Mapping from variable references to flow input variable IDs.",
609
+ )
610
+ output_bindings: dict[Reference[Variable], str] = Field(
611
+ ...,
612
+ description="Mapping from variable references to flow output variable IDs.",
613
+ )
614
+
615
+
616
+ #
617
+ # ---------------- Secret Manager Component ----------------
618
+ #
619
+
620
+
621
+ class SecretManager(StrictBaseModel, ABC):
622
+ """Base class for secret manager configurations."""
623
+
624
+ id: str = Field(
625
+ ..., description="Unique ID for this secret manager configuration."
626
+ )
627
+ type: str = Field(..., description="The type of secret manager.")
628
+ auth: Reference[AuthProviderType] | str = Field(
629
+ ...,
630
+ description="AuthorizationProvider used to access this secret manager.",
631
+ )
632
+
633
+
634
+ class AWSSecretManager(SecretManager):
635
+ """Configuration for AWS Secrets Manager."""
636
+
637
+ type: Literal["aws_secret_manager"] = "aws_secret_manager"
638
+
639
+
480
640
  #
481
641
  # ---------------- Observability and Authentication Components ----------------
482
642
  #
@@ -495,7 +655,9 @@ class APIKeyAuthProvider(AuthorizationProvider):
495
655
  """API key-based authentication provider."""
496
656
 
497
657
  type: Literal["api_key"] = "api_key"
498
- api_key: str = Field(..., description="API key for authentication.")
658
+ api_key: str | SecretReference = Field(
659
+ ..., description="API key for authentication."
660
+ )
499
661
  host: str | None = Field(
500
662
  default=None, description="Base URL or domain of the provider."
501
663
  )
@@ -505,7 +667,9 @@ class BearerTokenAuthProvider(AuthorizationProvider):
505
667
  """Bearer token authentication provider."""
506
668
 
507
669
  type: Literal["bearer_token"] = "bearer_token"
508
- token: str = Field(..., description="Bearer token for authentication.")
670
+ token: str | SecretReference = Field(
671
+ ..., description="Bearer token for authentication."
672
+ )
509
673
 
510
674
 
511
675
  class OAuth2AuthProvider(AuthorizationProvider):
@@ -513,26 +677,60 @@ class OAuth2AuthProvider(AuthorizationProvider):
513
677
 
514
678
  type: Literal["oauth2"] = "oauth2"
515
679
  client_id: str = Field(..., description="OAuth2 client ID.")
516
- client_secret: str = Field(..., description="OAuth2 client secret.")
680
+ client_secret: str | SecretReference = Field(
681
+ ..., description="OAuth2 client secret."
682
+ )
517
683
  token_url: str = Field(..., description="Token endpoint URL.")
518
- scopes: list[str] | None = Field(
519
- default=None, description="OAuth2 scopes required."
684
+ scopes: list[str] = Field(
685
+ default_factory=list, description="OAuth2 scopes required."
520
686
  )
521
687
 
522
688
 
689
+ class VertexAuthProvider(AuthorizationProvider):
690
+ """Google Vertex authentication provider supporting gcloud profile or service account."""
691
+
692
+ type: Literal["vertex"] = "vertex"
693
+ profile_name: str | None = Field(
694
+ default=None,
695
+ description="Local gcloud profile name (if using existing CLI credentials).",
696
+ )
697
+ project_id: str | None = Field(
698
+ default=None,
699
+ description="Explicit GCP project ID override (if different from profile).",
700
+ )
701
+ service_account_file: str | None = Field(
702
+ default=None,
703
+ description="Path to a service account JSON key file.",
704
+ )
705
+ region: str | None = Field(
706
+ default=None,
707
+ description="Vertex region (e.g., us-central1).",
708
+ )
709
+
710
+ @model_validator(mode="after")
711
+ def validate_vertex_auth(self) -> VertexAuthProvider:
712
+ """Ensure at least one credential source is provided."""
713
+ if not (self.profile_name or self.service_account_file):
714
+ raise ValueError(
715
+ "VertexAuthProvider requires either a profile_name or a "
716
+ "service_account_file."
717
+ )
718
+ return self
719
+
720
+
523
721
  class AWSAuthProvider(AuthorizationProvider):
524
722
  """AWS authentication provider supporting multiple credential methods."""
525
723
 
526
724
  type: Literal["aws"] = "aws"
527
725
 
528
726
  # Method 1: Access key/secret/session
529
- access_key_id: str | None = Field(
727
+ access_key_id: str | SecretReference | None = Field(
530
728
  default=None, description="AWS access key ID."
531
729
  )
532
- secret_access_key: str | None = Field(
730
+ secret_access_key: str | SecretReference | None = Field(
533
731
  default=None, description="AWS secret access key."
534
732
  )
535
- session_token: str | None = Field(
733
+ session_token: str | SecretReference | None = Field(
536
734
  default=None,
537
735
  description="AWS session token for temporary credentials.",
538
736
  )
@@ -557,7 +755,7 @@ class AWSAuthProvider(AuthorizationProvider):
557
755
  region: str | None = Field(default=None, description="AWS region.")
558
756
 
559
757
  @model_validator(mode="after")
560
- def validate_aws_auth(self) -> "AWSAuthProvider":
758
+ def validate_aws_auth(self) -> AWSAuthProvider:
561
759
  """Validate AWS authentication configuration."""
562
760
  # At least one auth method must be specified
563
761
  has_keys = self.access_key_id and self.secret_access_key
@@ -585,13 +783,18 @@ class TelemetrySink(StrictBaseModel):
585
783
  id: str = Field(
586
784
  ..., description="Unique ID of the telemetry sink configuration."
587
785
  )
588
- auth: AuthProviderType | str | None = Field(
786
+ provider: Literal["Phoenix", "Langfuse"] = "Phoenix"
787
+ auth: Reference[AuthProviderType] | str | None = Field(
589
788
  default=None,
590
789
  description="AuthorizationProvider used to authenticate telemetry data transmission.",
591
790
  )
592
- endpoint: str = Field(
791
+ endpoint: str | SecretReference = Field(
593
792
  ..., description="URL endpoint where telemetry data will be sent."
594
793
  )
794
+ args: dict[str, Any] = Field(
795
+ default_factory=dict,
796
+ description="Additional configuration arguments specific to the telemetry sink type.",
797
+ )
595
798
 
596
799
 
597
800
  #
@@ -616,48 +819,53 @@ class Application(StrictBaseModel):
616
819
  )
617
820
 
618
821
  # Core components
619
- memories: list[Memory] | None = Field(
620
- default=None,
822
+ memories: list[Memory] = Field(
823
+ default_factory=list,
621
824
  description="List of memory definitions used in this application.",
622
825
  )
623
- models: list[ModelType] | None = Field(
624
- default=None, description="List of models used in this application."
826
+ models: list[ModelType] = Field(
827
+ default_factory=list,
828
+ description="List of models used in this application.",
625
829
  )
626
- types: list[CustomType] | None = Field(
627
- default=None,
830
+ types: list[CustomType] = Field(
831
+ default_factory=list,
628
832
  description="List of custom types defined in this application.",
629
833
  )
630
- variables: list[Variable] | None = Field(
631
- default=None, description="List of variables used in this application."
632
- )
633
834
 
634
835
  # Orchestration
635
- flows: list[Flow] | None = Field(
636
- default=None, description="List of flows defined in this application."
836
+ flows: list[Flow] = Field(
837
+ default_factory=list,
838
+ description="List of flows defined in this application.",
637
839
  )
638
840
 
639
841
  # External integrations
640
- auths: list[AuthProviderType] | None = Field(
641
- default=None,
842
+ auths: list[AuthProviderType] = Field(
843
+ default_factory=list,
642
844
  description="List of authorization providers used for API access.",
643
845
  )
644
- tools: list[ToolType] | None = Field(
645
- default=None,
846
+ tools: list[ToolType] = Field(
847
+ default_factory=list,
646
848
  description="List of tools available in this application.",
647
849
  )
648
- indexes: list[IndexType] | None = Field(
649
- default=None,
850
+ indexes: list[IndexType] = Field(
851
+ default_factory=list,
650
852
  description="List of indexes available for search operations.",
651
853
  )
652
854
 
855
+ # Secret management
856
+ secret_manager: SecretManagerType | None = Field(
857
+ default=None,
858
+ description="Optional secret manager configuration for the application.",
859
+ )
860
+
653
861
  # Observability
654
862
  telemetry: TelemetrySink | None = Field(
655
863
  default=None, description="Optional telemetry sink for observability."
656
864
  )
657
865
 
658
866
  # Extensibility
659
- references: list[Document] | None = Field(
660
- default=None,
867
+ references: list[Document] = Field(
868
+ default_factory=list,
661
869
  description="List of other q-type documents you may use. This allows modular composition and reuse of components across applications.",
662
870
  )
663
871
 
@@ -667,6 +875,14 @@ class Application(StrictBaseModel):
667
875
  #
668
876
 
669
877
 
878
+ class ConstantPath(StrictBaseModel):
879
+ uri: str = Field(..., description="A constant Fsspec URI.")
880
+
881
+
882
+ # Let's the user use a constant path or reference a variable
883
+ PathType = ConstantPath | Reference[Variable] | str
884
+
885
+
670
886
  class Source(Step):
671
887
  """Base class for data sources"""
672
888
 
@@ -680,135 +896,161 @@ class Source(Step):
680
896
  class SQLSource(Source):
681
897
  """SQL database source that executes queries and emits rows."""
682
898
 
899
+ type: Literal["SQLSource"] = "SQLSource"
683
900
  query: str = Field(
684
901
  ..., description="SQL query to execute. Inputs are injected as params."
685
902
  )
686
- connection: str = Field(
903
+ connection: str | SecretReference = Field(
687
904
  ...,
688
905
  description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
689
906
  )
690
- auth: AuthProviderType | str | None = Field(
907
+ auth: Reference[AuthProviderType] | str | None = Field(
691
908
  default=None,
692
909
  description="Optional AuthorizationProvider for database authentication.",
693
910
  )
694
911
 
695
- @model_validator(mode="after")
696
- def validate_sql_source(self) -> "SQLSource":
697
- """Validate SQL source configuration."""
698
- if self.outputs is None:
699
- raise ValueError(
700
- "SQLSource must define output variables that match the result columns."
701
- )
702
- return self
703
-
704
912
 
705
913
  class FileSource(Source):
706
914
  """File source that reads data from a file using fsspec-compatible URIs."""
707
915
 
708
- path: str | None = Field(
709
- default=None,
710
- description="fsspec-compatible URI to read from. If None, expects 'path' input variable.",
916
+ type: Literal["FileSource"] = "FileSource"
917
+ path: PathType = Field(
918
+ default=...,
919
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
711
920
  )
712
921
 
713
- @model_validator(mode="after")
714
- def validate_file_source(self) -> "FileSource":
715
- """Validate that either path is specified or 'path' input variable exists."""
716
- if self.path is None:
717
- # Check if 'path' input variable exists
718
- if self.inputs is None:
719
- raise ValueError(
720
- "FileSource must either specify 'path' field or have a 'path' input variable."
721
- )
722
-
723
- path_input_exists = any(
724
- (isinstance(inp, Variable) and inp.id == "path")
725
- or (isinstance(inp, str) and inp == "path")
726
- for inp in self.inputs
727
- )
728
922
 
729
- if not path_input_exists:
730
- raise ValueError(
731
- "FileSource must either specify 'path' field or have a 'path' input variable."
732
- )
923
+ class Writer(Step, BatchableStepMixin):
924
+ """Base class for things that write data in batches."""
733
925
 
734
- return self
926
+ id: str = Field(..., description="Unique ID of the data writer.")
735
927
 
736
928
 
737
- class Sink(Step):
738
- """Base class for data sinks"""
929
+ class FileWriter(Writer, BatchableStepMixin):
930
+ """File writer that writes data to a file using fsspec-compatible URIs."""
739
931
 
740
- id: str = Field(..., description="Unique ID of the data sink.")
741
- # Remove cardinality field - it's always one for sinks
742
- # ...existing code...
743
- cardinality: Literal[StepCardinality.one] = Field(
744
- default=StepCardinality.one,
745
- description="Flows always emit exactly one instance of the outputs.",
932
+ type: Literal["FileWriter"] = "FileWriter"
933
+ path: PathType = Field(
934
+ default=...,
935
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
936
+ )
937
+ batch_config: BatchConfig = Field(
938
+ default_factory=partial(BatchConfig, batch_size=sys.maxsize),
939
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
940
+ )
941
+
942
+
943
+ class Aggregate(Step):
944
+ """
945
+ A terminal step that consumes an entire input stream and produces a single
946
+ summary message with success/error counts.
947
+ """
948
+
949
+ type: Literal["Aggregate"] = "Aggregate"
950
+ cardinality: Literal[StepCardinality.one] = StepCardinality.one
951
+
952
+ # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
953
+ # The order will be: success_count, error_count, total_count
954
+ outputs: list[Reference[Variable] | str] = Field(
955
+ default_factory=list,
956
+ description="References to the variables for the output. There should be one and only one output with type AggregateStats",
746
957
  )
747
958
 
748
959
 
749
- class FileSink(Sink):
750
- """File sink that writes data to a file using fsspec-compatible URIs."""
960
+ #
961
+ # ---------------- Retrieval Augmented Generation Components ----------------
962
+ #
751
963
 
752
- path: str | None = Field(
964
+
965
+ class DocumentSource(Source):
966
+ """A source of documents that will be used in retrieval augmented generation.
967
+ It uses LlamaIndex readers to load one or more raw Documents
968
+ from a specified path or system (e.g., Google Drive, web page).
969
+ See https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers
970
+ """
971
+
972
+ type: Literal["DocumentSource"] = "DocumentSource"
973
+ reader_module: str = Field(
974
+ ...,
975
+ description="Module path of the LlamaIndex Reader).",
976
+ )
977
+ args: dict[str, Any] = Field(
978
+ default_factory=dict,
979
+ description="Reader-specific arguments to pass to the Reader constructor.",
980
+ )
981
+ loader_args: dict[str, Any] = Field(
982
+ default_factory=dict,
983
+ description="Loader-specific arguments to pass to the load_data method.",
984
+ )
985
+ auth: Reference[AuthProviderType] | str | None = Field(
753
986
  default=None,
754
- description="fsspec-compatible URI to write to. If None, expects 'path' input variable.",
987
+ description="AuthorizationProvider for accessing the source.",
755
988
  )
756
989
 
757
- @model_validator(mode="after")
758
- def validate_file_sink(self) -> "FileSink":
759
- """Validate that either path is specified or 'path' input variable exists."""
760
- # Ensure user does not set any output variables
761
- if self.outputs is not None and len(self.outputs) > 0:
762
- raise ValueError(
763
- "FileSink outputs are automatically generated. Do not specify outputs."
764
- )
765
990
 
766
- # Automatically set the output variable
767
- self.outputs = [Variable(id=f"{self.id}-file-uri", type="text")]
991
+ class DocToTextConverter(Step, ConcurrentStepMixin):
992
+ """Defines a step to convert raw documents (e.g., PDF, DOCX) loaded by a DocumentSource into plain text
993
+ using an external tool like Docling or LlamaParse for pre-processing before chunking.
994
+ The input and output are both RAGDocument, but the output after processing with have content of type markdown.
995
+ """
768
996
 
769
- if self.path is None:
770
- # Check if 'path' input variable exists
771
- if self.inputs is None:
772
- raise ValueError(
773
- "FileSink must either specify 'path' field or have a 'path' input variable."
774
- )
997
+ type: Literal["DocToTextConverter"] = "DocToTextConverter"
775
998
 
776
- path_input_exists = any(
777
- (isinstance(inp, Variable) and inp.id == "path")
778
- or (isinstance(inp, str) and inp == "path")
779
- for inp in self.inputs
780
- )
781
999
 
782
- if not path_input_exists:
783
- raise ValueError(
784
- "FileSink must either specify 'path' field or have a 'path' input variable."
785
- )
1000
+ class DocumentSplitter(Step, ConcurrentStepMixin):
1001
+ """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
786
1002
 
787
- return self
1003
+ type: Literal["DocumentSplitter"] = "DocumentSplitter"
1004
+ cardinality: Literal[StepCardinality.many] = Field(
1005
+ default=StepCardinality.many,
1006
+ description="Consumes one document and emits 0...N nodes/chunks.",
1007
+ )
1008
+
1009
+ splitter_name: str = Field(
1010
+ default="SentenceSplitter",
1011
+ description="Name of the LlamaIndex TextSplitter class.",
1012
+ )
1013
+ chunk_size: int = Field(default=1024, description="Size of each chunk.")
1014
+ chunk_overlap: int = Field(
1015
+ default=20, description="Overlap between consecutive chunks."
1016
+ )
1017
+ args: dict[str, Any] = Field(
1018
+ default_factory=dict,
1019
+ description="Additional arguments specific to the chosen splitter class.",
1020
+ )
788
1021
 
789
1022
 
790
- #
791
- # ---------------- Retrieval Augmented Generation Components ----------------
792
- #
1023
+ class DocumentEmbedder(Step, ConcurrentStepMixin):
1024
+ """Embeds document chunks using a specified embedding model."""
1025
+
1026
+ type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
1027
+ cardinality: Literal[StepCardinality.many] = Field(
1028
+ default=StepCardinality.many,
1029
+ description="Consumes one chunk and emits one embedded chunk.",
1030
+ )
1031
+ model: Reference[EmbeddingModel] | str = Field(
1032
+ ..., description="Embedding model to use for vectorization."
1033
+ )
793
1034
 
794
1035
 
795
1036
  class Index(StrictBaseModel, ABC):
796
1037
  """Base class for searchable indexes that can be queried by search steps."""
797
1038
 
798
1039
  id: str = Field(..., description="Unique ID of the index.")
799
- args: dict[str, Any] | None = Field(
800
- default=None,
1040
+ args: dict[str, Any] = Field(
1041
+ default_factory=dict,
801
1042
  description="Index-specific configuration and connection parameters.",
802
1043
  )
803
- auth: AuthProviderType | str | None = Field(
1044
+ auth: Reference[AuthProviderType] | str | None = Field(
804
1045
  default=None,
805
1046
  description="AuthorizationProvider for accessing the index.",
806
1047
  )
807
1048
  name: str = Field(..., description="Name of the index/collection/table.")
808
1049
 
809
1050
 
810
- class IndexUpsert(Sink):
811
- index: IndexType | str = Field(
1051
+ class IndexUpsert(Writer):
1052
+ type: Literal["IndexUpsert"] = "IndexUpsert"
1053
+ index: Reference[IndexType] | str = Field(
812
1054
  ..., description="Index to upsert into (object or ID reference)."
813
1055
  )
814
1056
 
@@ -816,7 +1058,12 @@ class IndexUpsert(Sink):
816
1058
  class VectorIndex(Index):
817
1059
  """Vector database index for similarity search using embeddings."""
818
1060
 
819
- embedding_model: EmbeddingModel | str = Field(
1061
+ type: Literal["VectorIndex"] = "VectorIndex"
1062
+ module: str = Field(
1063
+ ...,
1064
+ description="Python module path for the vector store implementation (e.g., 'llama_index.vector_stores.qdrant.QdrantVectorStore').",
1065
+ )
1066
+ embedding_model: Reference[EmbeddingModel] | str = Field(
820
1067
  ...,
821
1068
  description="Embedding model used to vectorize queries and documents.",
822
1069
  )
@@ -825,67 +1072,53 @@ class VectorIndex(Index):
825
1072
  class DocumentIndex(Index):
826
1073
  """Document search index for text-based search (e.g., Elasticsearch, OpenSearch)."""
827
1074
 
828
- # TODO: add anything that is needed for document search indexes
829
- pass
1075
+ type: Literal["DocumentIndex"] = "DocumentIndex"
1076
+ endpoint: str = Field(
1077
+ ...,
1078
+ description="URL endpoint for the search cluster (e.g., https://my-cluster.es.amazonaws.com).",
1079
+ )
830
1080
 
831
1081
 
832
1082
  class Search(Step, ABC):
833
1083
  """Base class for search operations against indexes."""
834
1084
 
835
- filters: dict[str, Any] | None = Field(
836
- default=None, description="Optional filters to apply during search."
1085
+ filters: dict[str, Any] = Field(
1086
+ default_factory=dict,
1087
+ description="Optional filters to apply during search.",
837
1088
  )
838
- index: IndexType | str = Field(
1089
+ index: Reference[IndexType] | str = Field(
839
1090
  ..., description="Index to search against (object or ID reference)."
840
1091
  )
841
1092
 
842
1093
 
843
- class VectorSearch(Search):
1094
+ class VectorSearch(Search, BatchableStepMixin):
844
1095
  """Performs vector similarity search against a vector index."""
845
1096
 
1097
+ type: Literal["VectorSearch"] = "VectorSearch"
846
1098
  default_top_k: int | None = Field(
847
1099
  default=50,
848
1100
  description="Number of top results to retrieve if not provided in the inputs.",
849
1101
  )
850
1102
 
851
- @model_validator(mode="after")
852
- def set_default_inputs_outputs(self) -> "VectorSearch":
853
- """Set default input and output variables if none provided."""
854
- if self.inputs is None:
855
- self.inputs = [
856
- Variable(id="top_k", type=PrimitiveTypeEnum.int),
857
- Variable(id="query", type=PrimitiveTypeEnum.text),
858
- ]
859
-
860
- if self.outputs is None:
861
- self.outputs = [Variable(id=f"{self.id}.results", type=Embedding)]
862
- return self
863
-
864
1103
 
865
- class DocumentSearch(Search):
1104
+ class DocumentSearch(Search, ConcurrentStepMixin):
866
1105
  """Performs document search against a document index."""
867
1106
 
868
- @model_validator(mode="after")
869
- def set_default_inputs_outputs(self) -> "DocumentSearch":
870
- """Set default input and output variables if none provided."""
871
- if self.inputs is None:
872
- self.inputs = [Variable(id="query", type=PrimitiveTypeEnum.text)]
873
-
874
- if self.outputs is None:
875
- self.outputs = [
876
- Variable(id=f"{self.id}.results", type=PrimitiveTypeEnum.text)
877
- ]
878
- return self
1107
+ type: Literal["DocumentSearch"] = "DocumentSearch"
879
1108
 
880
1109
 
881
1110
  # Create a union type for all tool types
882
- ToolType = Union[
883
- APITool,
884
- PythonFunctionTool,
1111
+ ToolType = Annotated[
1112
+ Union[
1113
+ APITool,
1114
+ PythonFunctionTool,
1115
+ ],
1116
+ Field(discriminator="type"),
885
1117
  ]
886
1118
 
887
1119
  # Create a union type for all source types
888
1120
  SourceType = Union[
1121
+ DocumentSource,
889
1122
  FileSource,
890
1123
  SQLSource,
891
1124
  ]
@@ -896,36 +1129,61 @@ AuthProviderType = Union[
896
1129
  BearerTokenAuthProvider,
897
1130
  AWSAuthProvider,
898
1131
  OAuth2AuthProvider,
1132
+ VertexAuthProvider,
1133
+ ]
1134
+
1135
+ # Create a union type for all secret manager types
1136
+ SecretManagerType = Annotated[
1137
+ Union[
1138
+ AWSSecretManager
1139
+ # Add future managers like KubernetesSecretManager here
1140
+ ],
1141
+ Field(discriminator="type"),
899
1142
  ]
900
1143
 
901
1144
  # Create a union type for all step types
902
- StepType = Union[
903
- Agent,
904
- Condition,
905
- Decoder,
906
- DocumentSearch,
907
- FileSink,
908
- FileSource,
909
- Flow,
910
- IndexUpsert,
911
- Invoke,
912
- LLMInference,
913
- PromptTemplate,
914
- SQLSource,
915
- Sink,
916
- VectorSearch,
1145
+ StepType = Annotated[
1146
+ Union[
1147
+ Agent,
1148
+ Aggregate,
1149
+ Decoder,
1150
+ DocToTextConverter,
1151
+ DocumentEmbedder,
1152
+ DocumentSearch,
1153
+ DocumentSplitter,
1154
+ DocumentSource,
1155
+ Echo,
1156
+ FieldExtractor,
1157
+ FileSource,
1158
+ FileWriter,
1159
+ IndexUpsert,
1160
+ InvokeEmbedding,
1161
+ InvokeFlow,
1162
+ InvokeTool,
1163
+ LLMInference,
1164
+ PromptTemplate,
1165
+ SQLSource,
1166
+ VectorSearch,
1167
+ ],
1168
+ Field(discriminator="type"),
917
1169
  ]
918
1170
 
919
1171
  # Create a union type for all index types
920
- IndexType = Union[
921
- DocumentIndex,
922
- VectorIndex,
1172
+ IndexType = Annotated[
1173
+ Union[
1174
+ DocumentIndex,
1175
+ VectorIndex,
1176
+ ],
1177
+ Field(discriminator="type"),
923
1178
  ]
924
1179
 
925
1180
  # Create a union type for all model types
926
- ModelType = Union[
927
- EmbeddingModel,
928
- Model,
1181
+ ModelType = Annotated[
1182
+ Union[
1183
+ EmbeddingModel,
1184
+ Model,
1185
+ ],
1186
+ Field(discriminator="type"),
929
1187
  ]
930
1188
 
931
1189
  #
@@ -940,12 +1198,6 @@ class AuthorizationProviderList(RootModel[list[AuthProviderType]]):
940
1198
  root: list[AuthProviderType]
941
1199
 
942
1200
 
943
- class IndexList(RootModel[list[IndexType]]):
944
- """Schema for a standalone list of indexes."""
945
-
946
- root: list[IndexType]
947
-
948
-
949
1201
  class ModelList(RootModel[list[ModelType]]):
950
1202
  """Schema for a standalone list of models."""
951
1203
 
@@ -971,11 +1223,8 @@ class VariableList(RootModel[list[Variable]]):
971
1223
 
972
1224
 
973
1225
  DocumentType = Union[
974
- Agent,
975
1226
  Application,
976
1227
  AuthorizationProviderList,
977
- Flow,
978
- IndexList,
979
1228
  ModelList,
980
1229
  ToolList,
981
1230
  TypeList,