qtype 0.0.12__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +753 -264
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +495 -24
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +123 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +104 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +172 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/METADATA +22 -5
  102. qtype-0.1.7.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/top_level.txt +0 -0
qtype/dsl/model.py CHANGED
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
+ import sys
4
5
  from abc import ABC
5
6
  from enum import Enum
6
- from typing import Any, Literal, Type, Union
7
+ from functools import partial
8
+ from typing import Annotated, Any, Literal, Type, Union
7
9
 
8
10
  from pydantic import (
9
11
  BaseModel,
@@ -14,20 +16,23 @@ from pydantic import (
14
16
  )
15
17
 
16
18
  import qtype.dsl.domain_types as domain_types
17
- from qtype.dsl.base_types import (
19
+ from qtype.base.types import (
20
+ BatchableStepMixin,
21
+ BatchConfig,
22
+ CachedStepMixin,
23
+ ConcurrentStepMixin,
18
24
  PrimitiveTypeEnum,
25
+ Reference,
19
26
  StepCardinality,
20
27
  StrictBaseModel,
21
28
  )
22
- from qtype.dsl.domain_types import ChatContent, ChatMessage, Embedding
23
-
24
-
25
- class StructuralTypeEnum(str, Enum):
26
- """Represents a structured type that can be used in the DSL."""
27
-
28
- object = "object"
29
- array = "array"
30
-
29
+ from qtype.dsl.domain_types import (
30
+ ChatContent,
31
+ ChatMessage,
32
+ Embedding,
33
+ RAGChunk,
34
+ RAGDocument,
35
+ )
31
36
 
32
37
  DOMAIN_CLASSES = {
33
38
  name: obj
@@ -36,36 +41,164 @@ DOMAIN_CLASSES = {
36
41
  }
37
42
 
38
43
 
44
+ def _resolve_list_type(
45
+ element_type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
46
+ ) -> ListType:
47
+ """
48
+ Resolve a list element type and return a ListType.
49
+
50
+ Args:
51
+ element_type_str: The element type string (e.g., "text", "ChatMessage")
52
+ custom_type_registry: Registry of custom types
53
+
54
+ Returns:
55
+ ListType with resolved element type
56
+
57
+ Raises:
58
+ ValueError: If element type is invalid for lists
59
+ """
60
+ # Recursively resolve the element type
61
+ element_type = _resolve_variable_type(
62
+ element_type_str, custom_type_registry
63
+ )
64
+
65
+ # Allow both primitive types and custom types (but no nested lists)
66
+ if isinstance(element_type, PrimitiveTypeEnum):
67
+ return ListType(element_type=element_type)
68
+ elif isinstance(element_type, str):
69
+ # This is a custom type reference - store as string for later resolution
70
+ return ListType(element_type=element_type)
71
+ elif element_type in DOMAIN_CLASSES.values():
72
+ # Domain class - store its name as string reference
73
+ for name, cls in DOMAIN_CLASSES.items():
74
+ if cls == element_type:
75
+ return ListType(element_type=name)
76
+ return ListType(element_type=str(element_type))
77
+ else:
78
+ raise ValueError(
79
+ (
80
+ "List element type must be a primitive or custom type "
81
+ f"reference, got: {element_type}"
82
+ )
83
+ )
84
+
85
+
86
+ def _resolve_primitive_type(type_str: str) -> PrimitiveTypeEnum | None:
87
+ """
88
+ Try to resolve a string as a primitive type.
89
+
90
+ Args:
91
+ type_str: The type string to resolve
92
+
93
+ Returns:
94
+ PrimitiveTypeEnum if it matches, None otherwise
95
+ """
96
+ try:
97
+ return PrimitiveTypeEnum(type_str)
98
+ except ValueError:
99
+ return None
100
+
101
+
102
+ def _resolve_domain_type(type_str: str) -> Type[BaseModel] | None:
103
+ """
104
+ Try to resolve a string as a built-in domain entity class.
105
+
106
+ Args:
107
+ type_str: The type string to resolve
108
+
109
+ Returns:
110
+ Domain class if found, None otherwise
111
+ """
112
+ return DOMAIN_CLASSES.get(type_str)
113
+
114
+
115
+ def _resolve_custom_type(
116
+ type_str: str, custom_type_registry: dict[str, Type[BaseModel]]
117
+ ) -> Type[BaseModel] | None:
118
+ """
119
+ Try to resolve a string as a custom type from the registry.
120
+
121
+ Args:
122
+ type_str: The type string to resolve
123
+ custom_type_registry: Registry of custom types
124
+
125
+ Returns:
126
+ Custom type class if found, None otherwise
127
+ """
128
+ return custom_type_registry.get(type_str)
129
+
130
+
39
131
  def _resolve_variable_type(
40
132
  parsed_type: Any, custom_type_registry: dict[str, Type[BaseModel]]
41
133
  ) -> Any:
42
- """Resolve a type string to its corresponding PrimitiveTypeEnum or return as is."""
134
+ """
135
+ Resolve a type to its corresponding representation.
136
+
137
+ Handles primitive types, list types, domain types, and custom types.
138
+
139
+ Args:
140
+ parsed_type: The type to resolve (can be string or already resolved)
141
+ custom_type_registry: Registry of dynamically created custom types
142
+
143
+ Returns:
144
+ Resolved type (PrimitiveTypeEnum, ListType, domain class, or string)
145
+ """
43
146
  # If the type is already resolved or is a structured definition, pass it through.
44
147
  if not isinstance(parsed_type, str):
45
148
  return parsed_type
46
149
 
47
- # --- Case 1: The type is a string ---
48
- # Try to resolve it as a primitive type first.
49
- try:
50
- return PrimitiveTypeEnum(parsed_type)
51
- except ValueError:
52
- pass # Not a primitive, continue to the next check.
150
+ # Check if it's a list type (e.g., "list[text]")
151
+ if parsed_type.startswith("list[") and parsed_type.endswith("]"):
152
+ element_type_str = parsed_type[5:-1] # Remove "list[" and "]"
153
+ return _resolve_list_type(element_type_str, custom_type_registry)
154
+
155
+ # Try to resolve as primitive type
156
+ primitive = _resolve_primitive_type(parsed_type)
157
+ if primitive is not None:
158
+ return primitive
53
159
 
54
- # Try to resolve it as a built-in Domain Entity class.
55
- # (Assuming domain_types and inspect are defined elsewhere)
56
- if parsed_type in DOMAIN_CLASSES:
57
- return DOMAIN_CLASSES[parsed_type]
160
+ # Try to resolve as built-in domain entity class
161
+ domain = _resolve_domain_type(parsed_type)
162
+ if domain is not None:
163
+ return domain
58
164
 
59
- # Check the registry of dynamically created custom types
60
- if parsed_type in custom_type_registry:
61
- return custom_type_registry[parsed_type]
165
+ # Try to resolve as custom type
166
+ custom = _resolve_custom_type(parsed_type, custom_type_registry)
167
+ if custom is not None:
168
+ return custom
62
169
 
63
- # If it's not a primitive or a known domain entity, return it as a string.
64
- # This assumes it might be a reference ID to another custom type.
170
+ # If it's not any known type, return it as a string.
171
+ # This assumes it might be a forward reference to a custom type.
65
172
  return parsed_type
66
173
 
67
174
 
68
- class Variable(BaseModel):
175
+ def _resolve_type_field_validator(data: Any, info: ValidationInfo) -> Any:
176
+ """
177
+ Shared validator for resolving 'type' fields in models.
178
+
179
+ This validator resolves string-based type references using the custom
180
+ type registry from the validation context.
181
+
182
+ Args:
183
+ data: The data dict being validated
184
+ info: Pydantic validation info containing context
185
+
186
+ Returns:
187
+ Updated data dict with resolved type field
188
+ """
189
+ if (
190
+ isinstance(data, dict)
191
+ and "type" in data
192
+ and isinstance(data["type"], str)
193
+ ):
194
+ # Get the registry of custom types from the validation context.
195
+ custom_types = (info.context or {}).get("custom_types", {})
196
+ resolved = _resolve_variable_type(data["type"], custom_types)
197
+ data["type"] = resolved
198
+ return data
199
+
200
+
201
+ class Variable(StrictBaseModel):
69
202
  """Schema for a variable that can serve as input, output, or parameter within the DSL."""
70
203
 
71
204
  id: str = Field(
@@ -82,21 +215,24 @@ class Variable(BaseModel):
82
215
  @model_validator(mode="before")
83
216
  @classmethod
84
217
  def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
85
- """
86
- This validator runs during the main validation pass. It uses the
87
- context to resolve string-based type references.
88
- """
89
- if (
90
- isinstance(data, dict)
91
- and "type" in data
92
- and isinstance(data["type"], str)
93
- ):
94
- # Get the registry of custom types from the validation context.
95
- custom_types = (info.context or {}).get("custom_types", {})
96
- resolved = _resolve_variable_type(data["type"], custom_types)
97
- # {'id': 'user_message', 'type': 'ChatMessage'}
98
- data["type"] = resolved
99
- return data
218
+ """Resolve string-based type references using the shared validator."""
219
+ return _resolve_type_field_validator(data, info)
220
+
221
+
222
+ class SecretReference(StrictBaseModel):
223
+ """
224
+ A reference to a secret in the application's configured SecretManager.
225
+ This value is resolved at runtime by the interpreter.
226
+ """
227
+
228
+ secret_name: str = Field(
229
+ ...,
230
+ description="The name, ID, or ARN of the secret to fetch (e.g., 'my-project/db-password').",
231
+ )
232
+ key: str | None = Field(
233
+ default=None,
234
+ description="Optional key if the secret is a JSON blob or map (e.g., a specific key in a K8s secret).",
235
+ )
100
236
 
101
237
 
102
238
  class CustomType(StrictBaseModel):
@@ -107,40 +243,77 @@ class CustomType(StrictBaseModel):
107
243
  properties: dict[str, str]
108
244
 
109
245
 
246
+ class ToolParameter(BaseModel):
247
+ """Defines a tool input or output parameter with type and optional flag."""
248
+
249
+ type: VariableType | str
250
+ optional: bool = Field(
251
+ default=False, description="Whether this parameter is optional"
252
+ )
253
+
254
+ @model_validator(mode="before")
255
+ @classmethod
256
+ def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
257
+ """Resolve string-based type references using the shared validator."""
258
+ return _resolve_type_field_validator(data, info)
259
+
260
+
261
+ class ListType(BaseModel):
262
+ """Represents a list type with a specific element type."""
263
+
264
+ element_type: PrimitiveTypeEnum | str = Field(
265
+ ...,
266
+ description="Type of elements in the list (primitive type or custom type reference)",
267
+ )
268
+
269
+ def __str__(self) -> str:
270
+ """String representation for list type."""
271
+ if isinstance(self.element_type, PrimitiveTypeEnum):
272
+ return f"list[{self.element_type.value}]"
273
+ else:
274
+ return f"list[{self.element_type}]"
275
+
276
+
110
277
  VariableType = (
111
278
  PrimitiveTypeEnum
112
279
  | Type[Embedding]
113
280
  | Type[ChatMessage]
114
281
  | Type[ChatContent]
115
282
  | Type[BaseModel]
283
+ | Type[RAGDocument]
284
+ | Type[RAGChunk]
285
+ | ListType
116
286
  )
117
287
 
118
288
 
119
289
  class Model(StrictBaseModel):
120
290
  """Describes a generative model configuration, including provider and model ID."""
121
291
 
292
+ type: Literal["Model"] = "Model"
122
293
  id: str = Field(..., description="Unique ID for the model.")
123
- auth: AuthProviderType | str | None = Field(
294
+ auth: Reference[AuthProviderType] | str | None = Field(
124
295
  default=None,
125
296
  description="AuthorizationProvider used for model access.",
126
297
  )
127
- inference_params: dict[str, Any] | None = Field(
128
- default=None,
298
+ inference_params: dict[str, Any] = Field(
299
+ default_factory=dict,
129
300
  description="Optional inference parameters like temperature or max_tokens.",
130
301
  )
131
302
  model_id: str | None = Field(
132
303
  default=None,
133
304
  description="The specific model name or ID for the provider. If None, id is used",
134
305
  )
135
- # TODO(maybe): Make this an enum?
136
- provider: str = Field(
137
- ..., description="Name of the provider, e.g., openai or anthropic."
306
+ provider: Literal["openai", "anthropic", "aws-bedrock", "gcp-vertex"] = (
307
+ Field(
308
+ ..., description="Name of the provider, e.g., openai or anthropic."
309
+ )
138
310
  )
139
311
 
140
312
 
141
313
  class EmbeddingModel(Model):
142
314
  """Describes an embedding model configuration, extending the base Model class."""
143
315
 
316
+ type: Literal["EmbeddingModel"] = "EmbeddingModel"
144
317
  dimensions: int = Field(
145
318
  ...,
146
319
  description="Dimensionality of the embedding vectors produced by this model.",
@@ -172,20 +345,22 @@ class Memory(StrictBaseModel):
172
345
  #
173
346
 
174
347
 
175
- class Step(StrictBaseModel, ABC):
348
+ class Step(CachedStepMixin, StrictBaseModel, ABC):
176
349
  """Base class for components that take inputs and produce outputs."""
177
350
 
178
351
  id: str = Field(..., description="Unique ID of this component.")
352
+ type: str = Field(..., description="Type of the step component.")
179
353
  cardinality: StepCardinality = Field(
180
354
  default=StepCardinality.one,
181
355
  description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
182
356
  )
183
- inputs: list[Variable | str] | None = Field(
184
- default=None,
185
- description="Input variables required by this step.",
357
+ inputs: list[Reference[Variable] | str] = Field(
358
+ default_factory=list,
359
+ description="References to the variables required by this step.",
186
360
  )
187
- outputs: list[Variable | str] | None = Field(
188
- default=None, description="Variable where output is stored."
361
+ outputs: list[Reference[Variable] | str] = Field(
362
+ default_factory=list,
363
+ description="References to the variables where output is stored.",
189
364
  )
190
365
 
191
366
 
@@ -193,65 +368,37 @@ class PromptTemplate(Step):
193
368
  """Defines a prompt template with a string format and variable bindings.
194
369
  This is used to generate prompts dynamically based on input variables."""
195
370
 
371
+ type: Literal["PromptTemplate"] = "PromptTemplate" # type: ignore
196
372
  template: str = Field(
197
373
  ...,
198
374
  description="String template for the prompt with variable placeholders.",
199
375
  )
200
376
 
201
- @model_validator(mode="after")
202
- def set_default_outputs(self) -> "PromptTemplate":
203
- """Set default output variable if none provided."""
204
- if self.outputs is None:
205
- self.outputs = [
206
- Variable(id=f"{self.id}.prompt", type=PrimitiveTypeEnum.text)
207
- ]
208
- if len(self.outputs) != 1:
209
- raise ValueError(
210
- "PromptTemplate steps must have exactly one output variable -- the result of applying the template."
211
- )
212
- return self
213
-
214
-
215
- class Condition(Step):
216
- """Conditional logic gate within a flow. Supports branching logic for execution based on variable values."""
217
-
218
- # TODO: Add support for more complex conditions
219
- else_: StepType | str | None = Field(
220
- default=None,
221
- alias="else",
222
- description="Optional step to run if condition fails.",
223
- )
224
- equals: Variable | str | None = Field(
225
- default=None, description="Match condition for equality check."
226
- )
227
- then: StepType | str = Field(
228
- ..., description="Step to run if condition matches."
229
- )
230
-
231
- @model_validator(mode="after")
232
- def set_default_outputs(self) -> "Condition":
233
- """Set default output variable if none provided."""
234
- if not self.inputs or len(self.inputs) != 1:
235
- raise ValueError(
236
- "Condition steps must have exactly one input variable."
237
- )
238
- return self
239
-
240
377
 
241
- class Tool(Step, ABC):
378
+ class Tool(StrictBaseModel, ABC):
242
379
  """
243
380
  Base class for callable functions or external operations available to the model or as a step in a flow.
244
381
  """
245
382
 
383
+ id: str = Field(..., description="Unique ID of this component.")
246
384
  name: str = Field(..., description="Name of the tool function.")
247
385
  description: str = Field(
248
386
  ..., description="Description of what the tool does."
249
387
  )
388
+ inputs: dict[str, ToolParameter] = Field(
389
+ default_factory=dict,
390
+ description="Input parameters required by this tool.",
391
+ )
392
+ outputs: dict[str, ToolParameter] = Field(
393
+ default_factory=dict,
394
+ description="Output parameters produced by this tool.",
395
+ )
250
396
 
251
397
 
252
398
  class PythonFunctionTool(Tool):
253
399
  """Tool that calls a Python function."""
254
400
 
401
+ type: Literal["PythonFunctionTool"] = "PythonFunctionTool"
255
402
  function_name: str = Field(
256
403
  ..., description="Name of the Python function to call."
257
404
  )
@@ -264,30 +411,36 @@ class PythonFunctionTool(Tool):
264
411
  class APITool(Tool):
265
412
  """Tool that invokes an API endpoint."""
266
413
 
414
+ type: Literal["APITool"] = "APITool"
267
415
  endpoint: str = Field(..., description="API endpoint URL to call.")
268
416
  method: str = Field(
269
417
  default="GET",
270
418
  description="HTTP method to use (GET, POST, PUT, DELETE, etc.).",
271
419
  )
272
- auth: AuthProviderType | str | None = Field(
420
+ auth: Reference[AuthProviderType] | str | None = Field(
273
421
  default=None,
274
422
  description="Optional AuthorizationProvider for API authentication.",
275
423
  )
276
- headers: dict[str, str] | None = Field(
277
- default=None,
424
+ headers: dict[str, str] = Field(
425
+ default_factory=dict,
278
426
  description="Optional HTTP headers to include in the request.",
279
427
  )
428
+ parameters: dict[str, ToolParameter] = Field(
429
+ default_factory=dict,
430
+ description="Output parameters produced by this tool.",
431
+ )
280
432
 
281
433
 
282
- class LLMInference(Step):
434
+ class LLMInference(Step, ConcurrentStepMixin):
283
435
  """Defines a step that performs inference using a language model.
284
436
  It can take input variables and produce output variables based on the model's response."""
285
437
 
286
- memory: Memory | str | None = Field(
438
+ type: Literal["LLMInference"] = "LLMInference"
439
+ memory: Reference[Memory] | str | None = Field(
287
440
  default=None,
288
- description="Memory object to retain context across interactions.",
441
+ description="A reference to a Memory object to retain context across interactions.",
289
442
  )
290
- model: ModelType | str = Field(
443
+ model: Reference[Model] | str = Field(
291
444
  ..., description="The model to use for inference."
292
445
  )
293
446
  system_message: str | None = Field(
@@ -295,43 +448,70 @@ class LLMInference(Step):
295
448
  description="Optional system message to set the context for the model.",
296
449
  )
297
450
 
298
- @model_validator(mode="after")
299
- def set_default_outputs(self) -> "LLMInference":
300
- """Set default output variable if none provided."""
301
- if self.outputs is None:
302
- self.outputs = [
303
- Variable(id=f"{self.id}.response", type=PrimitiveTypeEnum.text)
304
- ]
305
- return self
451
+
452
+ class InvokeEmbedding(Step, ConcurrentStepMixin):
453
+ """Defines a step that generates embeddings using an embedding model.
454
+ It takes input variables and produces output variables containing the embeddings."""
455
+
456
+ type: Literal["InvokeEmbedding"] = "InvokeEmbedding"
457
+ model: Reference[EmbeddingModel] | str = Field(
458
+ ..., description="The embedding model to use."
459
+ )
306
460
 
307
461
 
308
462
  class Agent(LLMInference):
309
463
  """Defines an agent that can perform tasks and make decisions based on user input and context."""
310
464
 
311
- tools: list[ToolType | str] = Field(
312
- ..., description="List of tools available to the agent."
465
+ type: Literal["Agent"] = "Agent"
466
+
467
+ tools: list[Reference[ToolType] | str] = Field(
468
+ default_factory=list,
469
+ description="List of tools available to the agent.",
313
470
  )
314
471
 
315
472
 
316
- class Flow(Step):
473
+ class Flow(StrictBaseModel):
317
474
  """Defines a flow of steps that can be executed in sequence or parallel.
318
475
  If input or output variables are not specified, they are inferred from
319
- the first and last step, respectively.
320
- """
476
+ the first and last step, respectively."""
321
477
 
478
+ id: str = Field(..., description="Unique ID of the flow.")
479
+ type: Literal["Flow"] = "Flow"
322
480
  description: str | None = Field(
323
481
  default=None, description="Optional description of the flow."
324
482
  )
483
+ steps: list[StepType | Reference[StepType]] = Field(
484
+ default_factory=list,
485
+ description="List of steps or references to steps",
486
+ )
325
487
 
326
- cardinality: StepCardinality = Field(
327
- default=StepCardinality.auto,
328
- description="The cardinality of the flow, inferred from its steps when set to 'auto'.",
488
+ interface: FlowInterface | None = Field(default=None)
489
+ variables: list[Variable] = Field(
490
+ default_factory=list,
491
+ description="List of variables available at the application scope.",
492
+ )
493
+ inputs: list[Reference[Variable] | str] = Field(
494
+ default_factory=list,
495
+ description="Input variables required by this step.",
329
496
  )
497
+ outputs: list[Reference[Variable] | str] = Field(
498
+ default_factory=list, description="Resulting variables"
499
+ )
500
+
501
+
502
+ class FlowInterface(StrictBaseModel):
503
+ """
504
+ Defines the public-facing contract for a Flow, guiding the UI
505
+ and session management.
506
+ """
330
507
 
331
- mode: Literal["Complete", "Chat"] = "Complete"
508
+ # 1. Tells the UI how to render this flow
509
+ type: Literal["Complete", "Conversational"] = "Complete"
332
510
 
333
- steps: list[StepType | str] = Field(
334
- default_factory=list, description="List of steps or step IDs."
511
+ # 2. Declares which inputs are "sticky" and persisted in the session
512
+ session_inputs: list[Reference[Variable] | str] = Field(
513
+ default_factory=list,
514
+ description="A list of input variable IDs that are set once and then persisted across a session.",
335
515
  )
336
516
 
337
517
 
@@ -346,34 +526,119 @@ class Decoder(Step):
346
526
  """Defines a step that decodes string data into structured outputs.
347
527
 
348
528
  If parsing fails, the step will raise an error and halt execution.
349
- Use conditional logic in your flow to handle potential parsing errors.
350
- """
529
+ Use conditional logic in your flow to handle potential parsing errors."""
530
+
531
+ type: Literal["Decoder"] = "Decoder"
351
532
 
352
533
  format: DecoderFormat = Field(
353
534
  DecoderFormat.json,
354
535
  description="Format in which the decoder processes data. Defaults to JSON.",
355
536
  )
356
537
 
357
- @model_validator(mode="after")
358
- def set_default_outputs(self) -> "Decoder":
359
- """Set default output variable if none provided."""
360
-
361
- if (
362
- self.inputs is None
363
- or len(self.inputs) != 1
364
- or (
365
- isinstance(self.inputs[0], Variable)
366
- and self.inputs[0].type != PrimitiveTypeEnum.text
367
- )
368
- ):
369
- raise ValueError(
370
- f"Decoder steps must have exactly one input variable of type 'text'. Found: {self.inputs}"
371
- )
372
- if self.outputs is None:
373
- raise ValueError(
374
- "Decoder steps must have at least one output variable defined."
375
- )
376
- return self
538
+
539
+ class Echo(Step):
540
+ """Defines a step that echoes its inputs as outputs.
541
+
542
+ Useful for debugging flows by inspecting variable values at a specific
543
+ point in the execution pipeline. The step simply passes through all input
544
+ variables as outputs without modification.
545
+ """
546
+
547
+ type: Literal["Echo"] = "Echo"
548
+
549
+
550
+ class FieldExtractor(Step):
551
+ """Extracts specific fields from input data using JSONPath expressions.
552
+
553
+ This step uses JSONPath syntax to extract data from structured inputs
554
+ (Pydantic models, dicts, lists). The input is first converted to a dict
555
+ using model_dump() if it's a Pydantic model, then the JSONPath expression
556
+ is evaluated.
557
+
558
+ If the JSONPath matches multiple values, the step yields multiple output
559
+ messages (1-to-many cardinality). If it matches a single value, it yields
560
+ one output message. If it matches nothing, it raises an error.
561
+
562
+ The extracted data is used to construct the output variable by passing it
563
+ as keyword arguments to the output type's constructor.
564
+
565
+ Example JSONPath expressions:
566
+ - `$.field_name` - Extract a single field
567
+ - `$.items[*]` - Extract all items from a list
568
+ - `$.items[?(@.price > 10)]` - Filter items by condition
569
+ """
570
+
571
+ type: Literal["FieldExtractor"] = "FieldExtractor"
572
+ json_path: str = Field(
573
+ ...,
574
+ description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
575
+ )
576
+ fail_on_missing: bool = Field(
577
+ default=True,
578
+ description="Whether to raise an error if the JSONPath matches no data. If False, returns None.",
579
+ )
580
+
581
+
582
+ class InvokeTool(Step, ConcurrentStepMixin):
583
+ """Invokes a tool with input and output bindings."""
584
+
585
+ type: Literal["InvokeTool"] = "InvokeTool"
586
+
587
+ tool: Reference[ToolType] | str = Field(
588
+ ...,
589
+ description="Tool to invoke.",
590
+ )
591
+ input_bindings: dict[str, str] = Field(
592
+ ...,
593
+ description="Mapping from variable references to tool input parameter names.",
594
+ )
595
+ output_bindings: dict[str, str] = Field(
596
+ ...,
597
+ description="Mapping from variable references to tool output parameter names.",
598
+ )
599
+
600
+
601
+ class InvokeFlow(Step):
602
+ """Invokes a flow with input and output bindings."""
603
+
604
+ type: Literal["InvokeFlow"] = "InvokeFlow"
605
+
606
+ flow: Reference[Flow] | str = Field(
607
+ ...,
608
+ description="Flow to invoke.",
609
+ )
610
+ input_bindings: dict[Reference[Variable], str] = Field(
611
+ ...,
612
+ description="Mapping from variable references to flow input variable IDs.",
613
+ )
614
+ output_bindings: dict[Reference[Variable], str] = Field(
615
+ ...,
616
+ description="Mapping from variable references to flow output variable IDs.",
617
+ )
618
+
619
+
620
+ #
621
+ # ---------------- Secret Manager Component ----------------
622
+ #
623
+
624
+
625
+ class SecretManager(StrictBaseModel, ABC):
626
+ """Base class for secret manager configurations."""
627
+
628
+ id: str = Field(
629
+ ..., description="Unique ID for this secret manager configuration."
630
+ )
631
+ type: str = Field(..., description="The type of secret manager.")
632
+ auth: Reference[AuthProviderType] | str = Field(
633
+ ...,
634
+ description="AuthorizationProvider used to access this secret manager.",
635
+ )
636
+
637
+
638
+ class AWSSecretManager(SecretManager):
639
+ """Configuration for AWS Secrets Manager."""
640
+
641
+ type: Literal["aws_secret_manager"] = "aws_secret_manager"
377
642
 
378
643
 
379
644
  #
@@ -394,22 +659,67 @@ class APIKeyAuthProvider(AuthorizationProvider):
394
659
  """API key-based authentication provider."""
395
660
 
396
661
  type: Literal["api_key"] = "api_key"
397
- api_key: str = Field(..., description="API key for authentication.")
662
+ api_key: str | SecretReference = Field(
663
+ ..., description="API key for authentication."
664
+ )
398
665
  host: str | None = Field(
399
666
  default=None, description="Base URL or domain of the provider."
400
667
  )
401
668
 
402
669
 
670
+ class BearerTokenAuthProvider(AuthorizationProvider):
671
+ """Bearer token authentication provider."""
672
+
673
+ type: Literal["bearer_token"] = "bearer_token"
674
+ token: str | SecretReference = Field(
675
+ ..., description="Bearer token for authentication."
676
+ )
677
+
678
+
403
679
  class OAuth2AuthProvider(AuthorizationProvider):
404
680
  """OAuth2 authentication provider."""
405
681
 
406
682
  type: Literal["oauth2"] = "oauth2"
407
683
  client_id: str = Field(..., description="OAuth2 client ID.")
408
- client_secret: str = Field(..., description="OAuth2 client secret.")
684
+ client_secret: str | SecretReference = Field(
685
+ ..., description="OAuth2 client secret."
686
+ )
409
687
  token_url: str = Field(..., description="Token endpoint URL.")
410
- scopes: list[str] | None = Field(
411
- default=None, description="OAuth2 scopes required."
688
+ scopes: list[str] = Field(
689
+ default_factory=list, description="OAuth2 scopes required."
690
+ )
691
+
692
+
693
+ class VertexAuthProvider(AuthorizationProvider):
694
+ """Google Vertex authentication provider supporting gcloud profile or service account."""
695
+
696
+ type: Literal["vertex"] = "vertex"
697
+ profile_name: str | None = Field(
698
+ default=None,
699
+ description="Local gcloud profile name (if using existing CLI credentials).",
412
700
  )
701
+ project_id: str | None = Field(
702
+ default=None,
703
+ description="Explicit GCP project ID override (if different from profile).",
704
+ )
705
+ service_account_file: str | None = Field(
706
+ default=None,
707
+ description="Path to a service account JSON key file.",
708
+ )
709
+ region: str | None = Field(
710
+ default=None,
711
+ description="Vertex region (e.g., us-central1).",
712
+ )
713
+
714
+ @model_validator(mode="after")
715
+ def validate_vertex_auth(self) -> VertexAuthProvider:
716
+ """Ensure at least one credential source is provided."""
717
+ if not (self.profile_name or self.service_account_file):
718
+ raise ValueError(
719
+ "VertexAuthProvider requires either a profile_name or a "
720
+ "service_account_file."
721
+ )
722
+ return self
413
723
 
414
724
 
415
725
  class AWSAuthProvider(AuthorizationProvider):
@@ -418,13 +728,13 @@ class AWSAuthProvider(AuthorizationProvider):
418
728
  type: Literal["aws"] = "aws"
419
729
 
420
730
  # Method 1: Access key/secret/session
421
- access_key_id: str | None = Field(
731
+ access_key_id: str | SecretReference | None = Field(
422
732
  default=None, description="AWS access key ID."
423
733
  )
424
- secret_access_key: str | None = Field(
734
+ secret_access_key: str | SecretReference | None = Field(
425
735
  default=None, description="AWS secret access key."
426
736
  )
427
- session_token: str | None = Field(
737
+ session_token: str | SecretReference | None = Field(
428
738
  default=None,
429
739
  description="AWS session token for temporary credentials.",
430
740
  )
@@ -449,14 +759,15 @@ class AWSAuthProvider(AuthorizationProvider):
449
759
  region: str | None = Field(default=None, description="AWS region.")
450
760
 
451
761
  @model_validator(mode="after")
452
- def validate_aws_auth(self) -> "AWSAuthProvider":
762
+ def validate_aws_auth(self) -> AWSAuthProvider:
453
763
  """Validate AWS authentication configuration."""
454
764
  # At least one auth method must be specified
455
765
  has_keys = self.access_key_id and self.secret_access_key
456
766
  has_profile = self.profile_name
457
767
  has_role = self.role_arn
768
+ has_region = self.region
458
769
 
459
- if not (has_keys or has_profile or has_role):
770
+ if not (has_keys or has_profile or has_role or has_region):
460
771
  raise ValueError(
461
772
  "AWSAuthProvider must specify at least one authentication method: "
462
773
  "access keys, profile name, or role ARN."
@@ -477,13 +788,18 @@ class TelemetrySink(StrictBaseModel):
477
788
  id: str = Field(
478
789
  ..., description="Unique ID of the telemetry sink configuration."
479
790
  )
480
- auth: AuthProviderType | str | None = Field(
791
+ provider: Literal["Phoenix", "Langfuse"] = "Phoenix"
792
+ auth: Reference[AuthProviderType] | str | None = Field(
481
793
  default=None,
482
794
  description="AuthorizationProvider used to authenticate telemetry data transmission.",
483
795
  )
484
- endpoint: str = Field(
796
+ endpoint: str | SecretReference = Field(
485
797
  ..., description="URL endpoint where telemetry data will be sent."
486
798
  )
799
+ args: dict[str, Any] = Field(
800
+ default_factory=dict,
801
+ description="Additional configuration arguments specific to the telemetry sink type.",
802
+ )
487
803
 
488
804
 
489
805
  #
@@ -508,48 +824,53 @@ class Application(StrictBaseModel):
508
824
  )
509
825
 
510
826
  # Core components
511
- memories: list[Memory] | None = Field(
512
- default=None,
827
+ memories: list[Memory] = Field(
828
+ default_factory=list,
513
829
  description="List of memory definitions used in this application.",
514
830
  )
515
- models: list[ModelType] | None = Field(
516
- default=None, description="List of models used in this application."
831
+ models: list[ModelType] = Field(
832
+ default_factory=list,
833
+ description="List of models used in this application.",
517
834
  )
518
- types: list[CustomType] | None = Field(
519
- default=None,
835
+ types: list[CustomType] = Field(
836
+ default_factory=list,
520
837
  description="List of custom types defined in this application.",
521
838
  )
522
- variables: list[Variable] | None = Field(
523
- default=None, description="List of variables used in this application."
524
- )
525
839
 
526
840
  # Orchestration
527
- flows: list[Flow] | None = Field(
528
- default=None, description="List of flows defined in this application."
841
+ flows: list[Flow] = Field(
842
+ default_factory=list,
843
+ description="List of flows defined in this application.",
529
844
  )
530
845
 
531
846
  # External integrations
532
- auths: list[AuthProviderType] | None = Field(
533
- default=None,
847
+ auths: list[AuthProviderType] = Field(
848
+ default_factory=list,
534
849
  description="List of authorization providers used for API access.",
535
850
  )
536
- tools: list[ToolType] | None = Field(
537
- default=None,
851
+ tools: list[ToolType] = Field(
852
+ default_factory=list,
538
853
  description="List of tools available in this application.",
539
854
  )
540
- indexes: list[IndexType] | None = Field(
541
- default=None,
855
+ indexes: list[IndexType] = Field(
856
+ default_factory=list,
542
857
  description="List of indexes available for search operations.",
543
858
  )
544
859
 
860
+ # Secret management
861
+ secret_manager: SecretManagerType | None = Field(
862
+ default=None,
863
+ description="Optional secret manager configuration for the application.",
864
+ )
865
+
545
866
  # Observability
546
867
  telemetry: TelemetrySink | None = Field(
547
868
  default=None, description="Optional telemetry sink for observability."
548
869
  )
549
870
 
550
871
  # Extensibility
551
- references: list[Document] | None = Field(
552
- default=None,
872
+ references: list[Document] = Field(
873
+ default_factory=list,
553
874
  description="List of other q-type documents you may use. This allows modular composition and reuse of components across applications.",
554
875
  )
555
876
 
@@ -559,6 +880,14 @@ class Application(StrictBaseModel):
559
880
  #
560
881
 
561
882
 
883
+ class ConstantPath(StrictBaseModel):
884
+ uri: str = Field(..., description="A constant Fsspec URI.")
885
+
886
+
887
+ # Let's the user use a constant path or reference a variable
888
+ PathType = ConstantPath | Reference[Variable] | str
889
+
890
+
562
891
  class Source(Step):
563
892
  """Base class for data sources"""
564
893
 
@@ -572,37 +901,64 @@ class Source(Step):
572
901
  class SQLSource(Source):
573
902
  """SQL database source that executes queries and emits rows."""
574
903
 
904
+ type: Literal["SQLSource"] = "SQLSource"
575
905
  query: str = Field(
576
906
  ..., description="SQL query to execute. Inputs are injected as params."
577
907
  )
578
- connection: str = Field(
908
+ connection: str | SecretReference = Field(
579
909
  ...,
580
910
  description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
581
911
  )
582
- auth: AuthProviderType | str | None = Field(
912
+ auth: Reference[AuthProviderType] | str | None = Field(
583
913
  default=None,
584
914
  description="Optional AuthorizationProvider for database authentication.",
585
915
  )
586
916
 
587
- @model_validator(mode="after")
588
- def validate_sql_source(self) -> "SQLSource":
589
- """Validate SQL source configuration."""
590
- if self.outputs is None:
591
- raise ValueError(
592
- "SQLSource must define output variables that match the result columns."
593
- )
594
- return self
595
917
 
918
+ class FileSource(Source):
919
+ """File source that reads data from a file using fsspec-compatible URIs."""
596
920
 
597
- class Sink(Step):
598
- """Base class for data sinks"""
921
+ type: Literal["FileSource"] = "FileSource"
922
+ path: PathType = Field(
923
+ default=...,
924
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
925
+ )
599
926
 
600
- id: str = Field(..., description="Unique ID of the data sink.")
601
- # Remove cardinality field - it's always one for sinks
602
- # ...existing code...
603
- cardinality: Literal[StepCardinality.one] = Field(
604
- default=StepCardinality.one,
605
- description="Flows always emit exactly one instance of the outputs.",
927
+
928
+ class Writer(Step, BatchableStepMixin):
929
+ """Base class for things that write data in batches."""
930
+
931
+ id: str = Field(..., description="Unique ID of the data writer.")
932
+
933
+
934
+ class FileWriter(Writer, BatchableStepMixin):
935
+ """File writer that writes data to a file using fsspec-compatible URIs."""
936
+
937
+ type: Literal["FileWriter"] = "FileWriter"
938
+ path: PathType = Field(
939
+ default=...,
940
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
941
+ )
942
+ batch_config: BatchConfig = Field(
943
+ default_factory=partial(BatchConfig, batch_size=sys.maxsize),
944
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
945
+ )
946
+
947
+
948
+ class Aggregate(Step):
949
+ """
950
+ A terminal step that consumes an entire input stream and produces a single
951
+ summary message with success/error counts.
952
+ """
953
+
954
+ type: Literal["Aggregate"] = "Aggregate"
955
+ cardinality: Literal[StepCardinality.one] = StepCardinality.one
956
+
957
+ # Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
958
+ # The order will be: success_count, error_count, total_count
959
+ outputs: list[Reference[Variable] | str] = Field(
960
+ default_factory=list,
961
+ description="References to the variables for the output. There should be one and only one output with type AggregateStats",
606
962
  )
607
963
 
608
964
 
@@ -611,23 +967,95 @@ class Sink(Step):
611
967
  #
612
968
 
613
969
 
970
+ class DocumentSource(Source):
971
+ """A source of documents that will be used in retrieval augmented generation.
972
+ It uses LlamaIndex readers to load one or more raw Documents
973
+ from a specified path or system (e.g., Google Drive, web page).
974
+ See https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers
975
+ """
976
+
977
+ type: Literal["DocumentSource"] = "DocumentSource"
978
+ reader_module: str = Field(
979
+ ...,
980
+ description="Module path of the LlamaIndex Reader).",
981
+ )
982
+ args: dict[str, Any] = Field(
983
+ default_factory=dict,
984
+ description="Reader-specific arguments to pass to the Reader constructor.",
985
+ )
986
+ loader_args: dict[str, Any] = Field(
987
+ default_factory=dict,
988
+ description="Loader-specific arguments to pass to the load_data method.",
989
+ )
990
+ auth: Reference[AuthProviderType] | str | None = Field(
991
+ default=None,
992
+ description="AuthorizationProvider for accessing the source.",
993
+ )
994
+
995
+
996
+ class DocToTextConverter(Step, ConcurrentStepMixin):
997
+ """Defines a step to convert raw documents (e.g., PDF, DOCX) loaded by a DocumentSource into plain text
998
+ using an external tool like Docling or LlamaParse for pre-processing before chunking.
999
+ The input and output are both RAGDocument, but the output after processing with have content of type markdown.
1000
+ """
1001
+
1002
+ type: Literal["DocToTextConverter"] = "DocToTextConverter"
1003
+
1004
+
1005
+ class DocumentSplitter(Step, ConcurrentStepMixin):
1006
+ """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
1007
+
1008
+ type: Literal["DocumentSplitter"] = "DocumentSplitter"
1009
+ cardinality: Literal[StepCardinality.many] = Field(
1010
+ default=StepCardinality.many,
1011
+ description="Consumes one document and emits 0...N nodes/chunks.",
1012
+ )
1013
+
1014
+ splitter_name: str = Field(
1015
+ default="SentenceSplitter",
1016
+ description="Name of the LlamaIndex TextSplitter class.",
1017
+ )
1018
+ chunk_size: int = Field(default=1024, description="Size of each chunk.")
1019
+ chunk_overlap: int = Field(
1020
+ default=20, description="Overlap between consecutive chunks."
1021
+ )
1022
+ args: dict[str, Any] = Field(
1023
+ default_factory=dict,
1024
+ description="Additional arguments specific to the chosen splitter class.",
1025
+ )
1026
+
1027
+
1028
+ class DocumentEmbedder(Step, ConcurrentStepMixin):
1029
+ """Embeds document chunks using a specified embedding model."""
1030
+
1031
+ type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
1032
+ cardinality: Literal[StepCardinality.many] = Field(
1033
+ default=StepCardinality.many,
1034
+ description="Consumes one chunk and emits one embedded chunk.",
1035
+ )
1036
+ model: Reference[EmbeddingModel] | str = Field(
1037
+ ..., description="Embedding model to use for vectorization."
1038
+ )
1039
+
1040
+
614
1041
  class Index(StrictBaseModel, ABC):
615
1042
  """Base class for searchable indexes that can be queried by search steps."""
616
1043
 
617
1044
  id: str = Field(..., description="Unique ID of the index.")
618
- args: dict[str, Any] | None = Field(
619
- default=None,
1045
+ args: dict[str, Any] = Field(
1046
+ default_factory=dict,
620
1047
  description="Index-specific configuration and connection parameters.",
621
1048
  )
622
- auth: AuthProviderType | str | None = Field(
1049
+ auth: Reference[AuthProviderType] | str | None = Field(
623
1050
  default=None,
624
1051
  description="AuthorizationProvider for accessing the index.",
625
1052
  )
626
1053
  name: str = Field(..., description="Name of the index/collection/table.")
627
1054
 
628
1055
 
629
- class IndexUpsert(Sink):
630
- index: IndexType | str = Field(
1056
+ class IndexUpsert(Writer):
1057
+ type: Literal["IndexUpsert"] = "IndexUpsert"
1058
+ index: Reference[IndexType] | str = Field(
631
1059
  ..., description="Index to upsert into (object or ID reference)."
632
1060
  )
633
1061
 
@@ -635,7 +1063,12 @@ class IndexUpsert(Sink):
635
1063
  class VectorIndex(Index):
636
1064
  """Vector database index for similarity search using embeddings."""
637
1065
 
638
- embedding_model: EmbeddingModel | str = Field(
1066
+ type: Literal["VectorIndex"] = "VectorIndex"
1067
+ module: str = Field(
1068
+ ...,
1069
+ description="Python module path for the vector store implementation (e.g., 'llama_index.vector_stores.qdrant.QdrantVectorStore').",
1070
+ )
1071
+ embedding_model: Reference[EmbeddingModel] | str = Field(
639
1072
  ...,
640
1073
  description="Embedding model used to vectorize queries and documents.",
641
1074
  )
@@ -644,102 +1077,167 @@ class VectorIndex(Index):
644
1077
  class DocumentIndex(Index):
645
1078
  """Document search index for text-based search (e.g., Elasticsearch, OpenSearch)."""
646
1079
 
647
- # TODO: add anything that is needed for document search indexes
648
- pass
1080
+ type: Literal["DocumentIndex"] = "DocumentIndex"
1081
+ endpoint: str = Field(
1082
+ ...,
1083
+ description="URL endpoint for the search cluster (e.g., https://my-cluster.es.amazonaws.com).",
1084
+ )
1085
+ id_field: str | None = Field(
1086
+ default=None,
1087
+ description=(
1088
+ "Field name to use as document ID. "
1089
+ "If not specified, auto-detects from: _id, id, doc_id, document_id, or uuid. "
1090
+ "If all are missing, a UUID is generated."
1091
+ ),
1092
+ )
649
1093
 
650
1094
 
651
1095
  class Search(Step, ABC):
652
1096
  """Base class for search operations against indexes."""
653
1097
 
654
- filters: dict[str, Any] | None = Field(
655
- default=None, description="Optional filters to apply during search."
1098
+ filters: dict[str, Any] = Field(
1099
+ default_factory=dict,
1100
+ description="Optional filters to apply during search.",
656
1101
  )
657
- index: IndexType | str = Field(
1102
+ index: Reference[IndexType] | str = Field(
658
1103
  ..., description="Index to search against (object or ID reference)."
659
1104
  )
1105
+ default_top_k: int | None = Field(
1106
+ default=10,
1107
+ description="Number of top results to retrieve if not provided in the inputs.",
1108
+ )
660
1109
 
661
1110
 
662
- class VectorSearch(Search):
1111
+ class VectorSearch(Search, BatchableStepMixin):
663
1112
  """Performs vector similarity search against a vector index."""
664
1113
 
665
- default_top_k: int | None = Field(
666
- default=50,
667
- description="Number of top results to retrieve if not provided in the inputs.",
1114
+ type: Literal["VectorSearch"] = "VectorSearch"
1115
+ index: Reference[VectorIndex] | str = Field(
1116
+ ..., description="Index to search against (object or ID reference)."
668
1117
  )
669
1118
 
670
- @model_validator(mode="after")
671
- def set_default_inputs_outputs(self) -> "VectorSearch":
672
- """Set default input and output variables if none provided."""
673
- if self.inputs is None:
674
- self.inputs = [
675
- Variable(id="top_k", type=PrimitiveTypeEnum.int),
676
- Variable(id="query", type=PrimitiveTypeEnum.text),
677
- ]
678
-
679
- if self.outputs is None:
680
- self.outputs = [Variable(id=f"{self.id}.results", type=Embedding)]
681
- return self
682
-
683
1119
 
684
- class DocumentSearch(Search):
1120
+ class DocumentSearch(Search, ConcurrentStepMixin):
685
1121
  """Performs document search against a document index."""
686
1122
 
687
- @model_validator(mode="after")
688
- def set_default_inputs_outputs(self) -> "DocumentSearch":
689
- """Set default input and output variables if none provided."""
690
- if self.inputs is None:
691
- self.inputs = [Variable(id="query", type=PrimitiveTypeEnum.text)]
692
-
693
- if self.outputs is None:
694
- self.outputs = [
695
- Variable(id=f"{self.id}.results", type=PrimitiveTypeEnum.text)
696
- ]
697
- return self
1123
+ type: Literal["DocumentSearch"] = "DocumentSearch"
1124
+ index: Reference[DocumentIndex] | str = Field(
1125
+ ..., description="Index to search against (object or ID reference)."
1126
+ )
1127
+ query_args: dict[str, Any] = Field(
1128
+ default={
1129
+ "type": "best_fields",
1130
+ "fields": ["*"],
1131
+ },
1132
+ description="The arguments (other than 'query') to specify to the query shape (see https://docs.opensearch.org/latest/query-dsl/full-text/multi-match/).",
1133
+ )
1134
+
1135
+
1136
+ class Reranker(Step):
1137
+ """Reranks a list of documents based on relevance to a query using an LLM."""
1138
+
1139
+ type: Literal["Reranker"] = "Reranker"
1140
+
1141
+
1142
+ # TODO: create a reranker that supports llamaindex rerankers...
1143
+
1144
+
1145
+ class BedrockReranker(Reranker, ConcurrentStepMixin):
1146
+ """Reranks documents using an AWS Bedrock model."""
1147
+
1148
+ type: Literal["BedrockReranker"] = "BedrockReranker"
1149
+ auth: Reference[AWSAuthProvider] | str | None = Field(
1150
+ default=None,
1151
+ description="AWS authorization provider for Bedrock access.",
1152
+ )
1153
+ model_id: str = Field(
1154
+ ...,
1155
+ description="Bedrock model ID to use for reranking. See https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html",
1156
+ )
1157
+ num_results: int | None = Field(
1158
+ default=None,
1159
+ description="Return this many results.",
1160
+ )
698
1161
 
699
1162
 
700
1163
  # Create a union type for all tool types
701
- ToolType = Union[
702
- APITool,
703
- PythonFunctionTool,
1164
+ ToolType = Annotated[
1165
+ Union[
1166
+ APITool,
1167
+ PythonFunctionTool,
1168
+ ],
1169
+ Field(discriminator="type"),
704
1170
  ]
705
1171
 
706
1172
  # Create a union type for all source types
707
- SourceType = Union[SQLSource,]
1173
+ SourceType = Union[
1174
+ DocumentSource,
1175
+ FileSource,
1176
+ SQLSource,
1177
+ ]
708
1178
 
709
1179
  # Create a union type for all authorization provider types
710
1180
  AuthProviderType = Union[
711
1181
  APIKeyAuthProvider,
1182
+ BearerTokenAuthProvider,
712
1183
  AWSAuthProvider,
713
1184
  OAuth2AuthProvider,
1185
+ VertexAuthProvider,
1186
+ ]
1187
+
1188
+ # Create a union type for all secret manager types
1189
+ SecretManagerType = Annotated[
1190
+ Union[
1191
+ AWSSecretManager
1192
+ # Add future managers like KubernetesSecretManager here
1193
+ ],
1194
+ Field(discriminator="type"),
714
1195
  ]
715
1196
 
716
1197
  # Create a union type for all step types
717
- StepType = Union[
718
- Agent,
719
- APITool,
720
- Condition,
721
- Decoder,
722
- DocumentSearch,
723
- Flow,
724
- IndexUpsert,
725
- LLMInference,
726
- PromptTemplate,
727
- PythonFunctionTool,
728
- SQLSource,
729
- Sink,
730
- VectorSearch,
1198
+ StepType = Annotated[
1199
+ Union[
1200
+ Agent,
1201
+ Aggregate,
1202
+ BedrockReranker,
1203
+ Decoder,
1204
+ DocToTextConverter,
1205
+ DocumentEmbedder,
1206
+ DocumentSearch,
1207
+ DocumentSplitter,
1208
+ DocumentSource,
1209
+ Echo,
1210
+ FieldExtractor,
1211
+ FileSource,
1212
+ FileWriter,
1213
+ IndexUpsert,
1214
+ InvokeEmbedding,
1215
+ InvokeFlow,
1216
+ InvokeTool,
1217
+ LLMInference,
1218
+ PromptTemplate,
1219
+ SQLSource,
1220
+ VectorSearch,
1221
+ ],
1222
+ Field(discriminator="type"),
731
1223
  ]
732
1224
 
733
1225
  # Create a union type for all index types
734
- IndexType = Union[
735
- DocumentIndex,
736
- VectorIndex,
1226
+ IndexType = Annotated[
1227
+ Union[
1228
+ DocumentIndex,
1229
+ VectorIndex,
1230
+ ],
1231
+ Field(discriminator="type"),
737
1232
  ]
738
1233
 
739
1234
  # Create a union type for all model types
740
- ModelType = Union[
741
- EmbeddingModel,
742
- Model,
1235
+ ModelType = Annotated[
1236
+ Union[
1237
+ EmbeddingModel,
1238
+ Model,
1239
+ ],
1240
+ Field(discriminator="type"),
743
1241
  ]
744
1242
 
745
1243
  #
@@ -754,12 +1252,6 @@ class AuthorizationProviderList(RootModel[list[AuthProviderType]]):
754
1252
  root: list[AuthProviderType]
755
1253
 
756
1254
 
757
- class IndexList(RootModel[list[IndexType]]):
758
- """Schema for a standalone list of indexes."""
759
-
760
- root: list[IndexType]
761
-
762
-
763
1255
  class ModelList(RootModel[list[ModelType]]):
764
1256
  """Schema for a standalone list of models."""
765
1257
 
@@ -785,11 +1277,8 @@ class VariableList(RootModel[list[Variable]]):
785
1277
 
786
1278
 
787
1279
  DocumentType = Union[
788
- Agent,
789
1280
  Application,
790
1281
  AuthorizationProviderList,
791
- Flow,
792
- IndexList,
793
1282
  ModelList,
794
1283
  ToolList,
795
1284
  TypeList,