qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +751 -263
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +471 -22
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +173 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
  102. qtype-0.1.3.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
qtype/semantic/model.py CHANGED
@@ -13,18 +13,27 @@ Types are ignored since they should reflect dsl directly, which is type checked.
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
- from typing import Any, Literal
17
-
18
- from pydantic import BaseModel, Field, model_validator
19
-
20
- # Import enums and type aliases from DSL
16
+ from functools import partial
17
+ from typing import Any, Literal, Union
18
+
19
+ from pydantic import BaseModel, Field
20
+
21
+ # Import enums, mixins, and type aliases
22
+ from qtype.base.types import ( # noqa: F401
23
+ BatchableStepMixin,
24
+ BatchConfig,
25
+ CachedStepMixin,
26
+ ConcurrencyConfig,
27
+ ConcurrentStepMixin,
28
+ )
21
29
  from qtype.dsl.model import VariableType # noqa: F401
22
30
  from qtype.dsl.model import ( # noqa: F401
23
31
  CustomType,
24
32
  DecoderFormat,
33
+ ListType,
25
34
  PrimitiveTypeEnum,
26
35
  StepCardinality,
27
- StructuralTypeEnum,
36
+ ToolParameter,
28
37
  )
29
38
  from qtype.dsl.model import Variable as DSLVariable # noqa: F401
30
39
  from qtype.semantic.base_types import ImmutableModel
@@ -48,6 +57,58 @@ class AuthorizationProvider(ImmutableModel):
48
57
  type: str = Field(..., description="Authorization method type.")
49
58
 
50
59
 
60
+ class Tool(ImmutableModel):
61
+ """
62
+ Base class for callable functions or external operations available to the model or as a step in a flow.
63
+ """
64
+
65
+ id: str = Field(..., description="Unique ID of this component.")
66
+ name: str = Field(..., description="Name of the tool function.")
67
+ description: str = Field(
68
+ ..., description="Description of what the tool does."
69
+ )
70
+ inputs: dict[str, ToolParameter] = Field(
71
+ default_factory=dict,
72
+ description="Input parameters required by this tool.",
73
+ )
74
+ outputs: dict[str, ToolParameter] = Field(
75
+ default_factory=dict,
76
+ description="Output parameters produced by this tool.",
77
+ )
78
+
79
+
80
+ class SecretManager(BaseModel):
81
+ """Base class for secret manager configurations."""
82
+
83
+ id: str = Field(
84
+ ..., description="Unique ID for this secret manager configuration."
85
+ )
86
+ type: str = Field(..., description="The type of secret manager.")
87
+ auth: AuthorizationProvider = Field(
88
+ ...,
89
+ description="AuthorizationProvider used to access this secret manager.",
90
+ )
91
+
92
+
93
+ class Step(CachedStepMixin, BaseModel):
94
+ """Base class for components that take inputs and produce outputs."""
95
+
96
+ id: str = Field(..., description="Unique ID of this component.")
97
+ type: str = Field(..., description="Type of the step component.")
98
+ cardinality: StepCardinality = Field(
99
+ StepCardinality.one,
100
+ description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
101
+ )
102
+ inputs: list[Variable] = Field(
103
+ default_factory=list,
104
+ description="References to the variables required by this step.",
105
+ )
106
+ outputs: list[Variable] = Field(
107
+ default_factory=list,
108
+ description="References to the variables where output is stored.",
109
+ )
110
+
111
+
51
112
  class Application(BaseModel):
52
113
  """Defines a complete QType application specification.
53
114
 
@@ -64,51 +125,52 @@ class Application(BaseModel):
64
125
  None, description="Optional description of the application."
65
126
  )
66
127
  memories: list[Memory] = Field(
67
- [], description="List of memory definitions used in this application."
128
+ default_factory=list,
129
+ description="List of memory definitions used in this application.",
68
130
  )
69
131
  models: list[Model] = Field(
70
- [], description="List of models used in this application."
132
+ default_factory=list,
133
+ description="List of models used in this application.",
71
134
  )
72
135
  types: list[CustomType] = Field(
73
- [], description="List of custom types defined in this application."
74
- )
75
- variables: list[Variable] = Field(
76
- [], description="List of variables used in this application."
136
+ default_factory=list,
137
+ description="List of custom types defined in this application.",
77
138
  )
78
139
  flows: list[Flow] = Field(
79
- [], description="List of flows defined in this application."
140
+ default_factory=list,
141
+ description="List of flows defined in this application.",
80
142
  )
81
- auths: list[APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider] = (
82
- Field(
83
- [],
84
- description="List of authorization providers used for API access.",
85
- )
143
+ auths: list[AuthorizationProvider] = Field(
144
+ default_factory=list,
145
+ description="List of authorization providers used for API access.",
86
146
  )
87
147
  tools: list[Tool] = Field(
88
- [], description="List of tools available in this application."
148
+ default_factory=list,
149
+ description="List of tools available in this application.",
89
150
  )
90
151
  indexes: list[Index] = Field(
91
- [], description="List of indexes available for search operations."
152
+ default_factory=list,
153
+ description="List of indexes available for search operations.",
154
+ )
155
+ secret_manager: AWSSecretManager | None = Field(
156
+ None,
157
+ description="Optional secret manager configuration for the application.",
92
158
  )
93
159
  telemetry: TelemetrySink | None = Field(
94
160
  None, description="Optional telemetry sink for observability."
95
161
  )
96
162
 
97
163
 
98
- class Step(BaseModel):
99
- """Base class for components that take inputs and produce outputs."""
164
+ class AuthorizationProviderList(BaseModel):
165
+ """Schema for a standalone list of authorization providers."""
100
166
 
101
- id: str = Field(..., description="Unique ID of this component.")
102
- cardinality: StepCardinality = Field(
103
- StepCardinality.one,
104
- description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
105
- )
106
- inputs: list[Variable] = Field(
107
- [], description="Input variables required by this step."
108
- )
109
- outputs: list[Variable] = Field(
110
- [], description="Variable where output is stored."
111
- )
167
+ root: list[AuthorizationProvider] = Field(...)
168
+
169
+
170
+ class ConstantPath(BaseModel):
171
+ """Semantic version of ConstantPath."""
172
+
173
+ uri: str = Field(..., description="A constant Fsspec URI.")
112
174
 
113
175
 
114
176
  class Index(ImmutableModel):
@@ -116,13 +178,11 @@ class Index(ImmutableModel):
116
178
 
117
179
  id: str = Field(..., description="Unique ID of the index.")
118
180
  args: dict[str, Any] = Field(
119
- {},
181
+ default_factory=dict,
120
182
  description="Index-specific configuration and connection parameters.",
121
183
  )
122
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
123
- Field(
124
- None, description="AuthorizationProvider for accessing the index."
125
- )
184
+ auth: AuthorizationProvider | None = Field(
185
+ None, description="AuthorizationProvider for accessing the index."
126
186
  )
127
187
  name: str = Field(..., description="Name of the index/collection/table.")
128
188
 
@@ -130,20 +190,64 @@ class Index(ImmutableModel):
130
190
  class Model(ImmutableModel):
131
191
  """Describes a generative model configuration, including provider and model ID."""
132
192
 
193
+ type: Literal["Model"] = Field("Model")
133
194
  id: str = Field(..., description="Unique ID for the model.")
134
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
135
- Field(None, description="AuthorizationProvider used for model access.")
195
+ auth: AuthorizationProvider | None = Field(
196
+ None, description="AuthorizationProvider used for model access."
136
197
  )
137
198
  inference_params: dict[str, Any] = Field(
138
- {},
199
+ default_factory=dict,
139
200
  description="Optional inference parameters like temperature or max_tokens.",
140
201
  )
141
202
  model_id: str | None = Field(
142
203
  None,
143
204
  description="The specific model name or ID for the provider. If None, id is used",
144
205
  )
145
- provider: str = Field(
146
- ..., description="Name of the provider, e.g., openai or anthropic."
206
+ provider: Literal["openai", "anthropic", "aws-bedrock", "gcp-vertex"] = (
207
+ Field(
208
+ ..., description="Name of the provider, e.g., openai or anthropic."
209
+ )
210
+ )
211
+
212
+
213
+ class Flow(BaseModel):
214
+ """Defines a flow of steps that can be executed in sequence or parallel.
215
+ If input or output variables are not specified, they are inferred from
216
+ the first and last step, respectively."""
217
+
218
+ id: str = Field(..., description="Unique ID of the flow.")
219
+ type: Literal["Flow"] = Field("Flow")
220
+ description: str | None = Field(
221
+ None, description="Optional description of the flow."
222
+ )
223
+ steps: list[Step | Step] = Field(
224
+ default_factory=list,
225
+ description="List of steps or references to steps",
226
+ )
227
+ interface: FlowInterface | None = Field(None)
228
+ variables: list[Variable] = Field(
229
+ default_factory=list,
230
+ description="List of variables available at the application scope.",
231
+ )
232
+ inputs: list[Variable] = Field(
233
+ default_factory=list,
234
+ description="Input variables required by this step.",
235
+ )
236
+ outputs: list[Variable] = Field(
237
+ default_factory=list, description="Resulting variables"
238
+ )
239
+
240
+
241
+ class FlowInterface(BaseModel):
242
+ """
243
+ Defines the public-facing contract for a Flow, guiding the UI
244
+ and session management.
245
+ """
246
+
247
+ type: Literal["Complete", "Conversational"] = Field("Complete")
248
+ session_inputs: list[Variable] = Field(
249
+ default_factory=list,
250
+ description="A list of input variable IDs that are set once and then persisted across a session.",
147
251
  )
148
252
 
149
253
 
@@ -163,28 +267,73 @@ class Memory(ImmutableModel):
163
267
  )
164
268
 
165
269
 
270
+ class ModelList(BaseModel):
271
+ """Schema for a standalone list of models."""
272
+
273
+ root: list[Model] = Field(...)
274
+
275
+
276
+ class SecretReference(BaseModel):
277
+ """
278
+ A reference to a secret in the application's configured SecretManager.
279
+ This value is resolved at runtime by the interpreter.
280
+ """
281
+
282
+ secret_name: str = Field(
283
+ ...,
284
+ description="The name, ID, or ARN of the secret to fetch (e.g., 'my-project/db-password').",
285
+ )
286
+ key: str | None = Field(
287
+ None,
288
+ description="Optional key if the secret is a JSON blob or map (e.g., a specific key in a K8s secret).",
289
+ )
290
+
291
+
166
292
  class TelemetrySink(BaseModel):
167
293
  """Defines an observability endpoint for collecting telemetry data from the QType runtime."""
168
294
 
169
295
  id: str = Field(
170
296
  ..., description="Unique ID of the telemetry sink configuration."
171
297
  )
172
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
173
- Field(
174
- None,
175
- description="AuthorizationProvider used to authenticate telemetry data transmission.",
176
- )
298
+ provider: Literal["Phoenix", "Langfuse"] = Field("Phoenix")
299
+ auth: AuthorizationProvider | None = Field(
300
+ None,
301
+ description="AuthorizationProvider used to authenticate telemetry data transmission.",
177
302
  )
178
- endpoint: str = Field(
303
+ endpoint: str | SecretReference = Field(
179
304
  ..., description="URL endpoint where telemetry data will be sent."
180
305
  )
306
+ args: dict[str, Any] = Field(
307
+ default_factory=dict,
308
+ description="Additional configuration arguments specific to the telemetry sink type.",
309
+ )
310
+
311
+
312
+ class ToolList(BaseModel):
313
+ """Schema for a standalone list of tools."""
314
+
315
+ root: list[Tool] = Field(...)
316
+
317
+
318
+ class TypeList(BaseModel):
319
+ """Schema for a standalone list of type definitions."""
320
+
321
+ root: list[CustomType] = Field(...)
322
+
323
+
324
+ class VariableList(BaseModel):
325
+ """Schema for a standalone list of variables."""
326
+
327
+ root: list[Variable] = Field(...)
181
328
 
182
329
 
183
330
  class APIKeyAuthProvider(AuthorizationProvider):
184
331
  """API key-based authentication provider."""
185
332
 
186
333
  type: Literal["api_key"] = Field("api_key")
187
- api_key: str = Field(..., description="API key for authentication.")
334
+ api_key: str | SecretReference = Field(
335
+ ..., description="API key for authentication."
336
+ )
188
337
  host: str | None = Field(
189
338
  None, description="Base URL or domain of the provider."
190
339
  )
@@ -194,11 +343,13 @@ class AWSAuthProvider(AuthorizationProvider):
194
343
  """AWS authentication provider supporting multiple credential methods."""
195
344
 
196
345
  type: Literal["aws"] = Field("aws")
197
- access_key_id: str | None = Field(None, description="AWS access key ID.")
198
- secret_access_key: str | None = Field(
346
+ access_key_id: str | SecretReference | None = Field(
347
+ None, description="AWS access key ID."
348
+ )
349
+ secret_access_key: str | SecretReference | None = Field(
199
350
  None, description="AWS secret access key."
200
351
  )
201
- session_token: str | None = Field(
352
+ session_token: str | SecretReference | None = Field(
202
353
  None, description="AWS session token for temporary credentials."
203
354
  )
204
355
  profile_name: str | None = Field(
@@ -216,50 +367,251 @@ class AWSAuthProvider(AuthorizationProvider):
216
367
  region: str | None = Field(None, description="AWS region.")
217
368
 
218
369
 
370
+ class BearerTokenAuthProvider(AuthorizationProvider):
371
+ """Bearer token authentication provider."""
372
+
373
+ type: Literal["bearer_token"] = Field("bearer_token")
374
+ token: str | SecretReference = Field(
375
+ ..., description="Bearer token for authentication."
376
+ )
377
+
378
+
219
379
  class OAuth2AuthProvider(AuthorizationProvider):
220
380
  """OAuth2 authentication provider."""
221
381
 
222
382
  type: Literal["oauth2"] = Field("oauth2")
223
383
  client_id: str = Field(..., description="OAuth2 client ID.")
224
- client_secret: str = Field(..., description="OAuth2 client secret.")
384
+ client_secret: str | SecretReference = Field(
385
+ ..., description="OAuth2 client secret."
386
+ )
225
387
  token_url: str = Field(..., description="Token endpoint URL.")
226
- scopes: list[str] = Field([], description="OAuth2 scopes required.")
388
+ scopes: list[str] = Field(
389
+ default_factory=list, description="OAuth2 scopes required."
390
+ )
391
+
392
+
393
+ class VertexAuthProvider(AuthorizationProvider):
394
+ """Google Vertex authentication provider supporting gcloud profile or service account."""
395
+
396
+ type: Literal["vertex"] = Field("vertex")
397
+ profile_name: str | None = Field(
398
+ None,
399
+ description="Local gcloud profile name (if using existing CLI credentials).",
400
+ )
401
+ project_id: str | None = Field(
402
+ None,
403
+ description="Explicit GCP project ID override (if different from profile).",
404
+ )
405
+ service_account_file: str | None = Field(
406
+ None, description="Path to a service account JSON key file."
407
+ )
408
+ region: str | None = Field(
409
+ None, description="Vertex region (e.g., us-central1)."
410
+ )
227
411
 
228
412
 
229
- class Condition(Step):
230
- """Conditional logic gate within a flow. Supports branching logic for execution based on variable values."""
413
+ class APITool(Tool):
414
+ """Tool that invokes an API endpoint."""
231
415
 
232
- else_: Step | None = Field(
416
+ type: Literal["APITool"] = Field("APITool")
417
+ endpoint: str = Field(..., description="API endpoint URL to call.")
418
+ method: str = Field(
419
+ "GET", description="HTTP method to use (GET, POST, PUT, DELETE, etc.)."
420
+ )
421
+ auth: AuthorizationProvider | None = Field(
233
422
  None,
234
- description="Optional step to run if condition fails.",
235
- alias="else",
423
+ description="Optional AuthorizationProvider for API authentication.",
424
+ )
425
+ headers: dict[str, str] = Field(
426
+ default_factory=dict,
427
+ description="Optional HTTP headers to include in the request.",
428
+ )
429
+ parameters: dict[str, ToolParameter] = Field(
430
+ default_factory=dict,
431
+ description="Output parameters produced by this tool.",
432
+ )
433
+
434
+
435
+ class PythonFunctionTool(Tool):
436
+ """Tool that calls a Python function."""
437
+
438
+ type: Literal["PythonFunctionTool"] = Field("PythonFunctionTool")
439
+ function_name: str = Field(
440
+ ..., description="Name of the Python function to call."
236
441
  )
237
- equals: Variable | None = Field(
238
- None, description="Match condition for equality check."
442
+ module_path: str = Field(
443
+ ..., description="Optional module path where the function is defined."
444
+ )
445
+
446
+
447
+ class AWSSecretManager(SecretManager):
448
+ """Configuration for AWS Secrets Manager."""
449
+
450
+ type: Literal["aws_secret_manager"] = Field("aws_secret_manager")
451
+
452
+
453
+ class Aggregate(Step):
454
+ """
455
+ A terminal step that consumes an entire input stream and produces a single
456
+ summary message with success/error counts.
457
+ """
458
+
459
+ type: Literal["Aggregate"] = Field("Aggregate")
460
+ cardinality: Literal[StepCardinality.one] = Field(StepCardinality.one)
461
+ outputs: list[Variable] = Field(
462
+ default_factory=list,
463
+ description="References to the variables for the output. There should be one and only one output with type AggregateStats",
239
464
  )
240
- then: Step = Field(..., description="Step to run if condition matches.")
241
465
 
242
466
 
243
467
  class Decoder(Step):
244
468
  """Defines a step that decodes string data into structured outputs.
245
469
 
246
470
  If parsing fails, the step will raise an error and halt execution.
247
- Use conditional logic in your flow to handle potential parsing errors.
248
- """
471
+ Use conditional logic in your flow to handle potential parsing errors."""
249
472
 
473
+ type: Literal["Decoder"] = Field("Decoder")
250
474
  format: DecoderFormat = Field(
251
475
  DecoderFormat.json,
252
476
  description="Format in which the decoder processes data. Defaults to JSON.",
253
477
  )
254
478
 
255
479
 
256
- class LLMInference(Step):
480
+ class DocToTextConverter(Step, ConcurrentStepMixin):
481
+ """Defines a step to convert raw documents (e.g., PDF, DOCX) loaded by a DocumentSource into plain text
482
+ using an external tool like Docling or LlamaParse for pre-processing before chunking.
483
+ The input and output are both RAGDocument, but the output after processing with have content of type markdown.
484
+ """
485
+
486
+ type: Literal["DocToTextConverter"] = Field("DocToTextConverter")
487
+
488
+
489
+ class DocumentEmbedder(Step, ConcurrentStepMixin):
490
+ """Embeds document chunks using a specified embedding model."""
491
+
492
+ type: Literal["DocumentEmbedder"] = Field("DocumentEmbedder")
493
+ cardinality: Literal[StepCardinality.many] = Field(
494
+ StepCardinality.many,
495
+ description="Consumes one chunk and emits one embedded chunk.",
496
+ )
497
+ model: EmbeddingModel = Field(
498
+ ..., description="Embedding model to use for vectorization."
499
+ )
500
+
501
+
502
+ class DocumentSplitter(Step, ConcurrentStepMixin):
503
+ """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
504
+
505
+ type: Literal["DocumentSplitter"] = Field("DocumentSplitter")
506
+ cardinality: Literal[StepCardinality.many] = Field(
507
+ StepCardinality.many,
508
+ description="Consumes one document and emits 0...N nodes/chunks.",
509
+ )
510
+ splitter_name: str = Field(
511
+ "SentenceSplitter",
512
+ description="Name of the LlamaIndex TextSplitter class.",
513
+ )
514
+ chunk_size: int = Field(1024, description="Size of each chunk.")
515
+ chunk_overlap: int = Field(
516
+ 20, description="Overlap between consecutive chunks."
517
+ )
518
+ args: dict[str, Any] = Field(
519
+ default_factory=dict,
520
+ description="Additional arguments specific to the chosen splitter class.",
521
+ )
522
+
523
+
524
+ class Echo(Step):
525
+ """Defines a step that echoes its inputs as outputs.
526
+
527
+ Useful for debugging flows by inspecting variable values at a specific
528
+ point in the execution pipeline. The step simply passes through all input
529
+ variables as outputs without modification.
530
+ """
531
+
532
+ type: Literal["Echo"] = Field("Echo")
533
+
534
+
535
+ class FieldExtractor(Step):
536
+ """Extracts specific fields from input data using JSONPath expressions.
537
+
538
+ This step uses JSONPath syntax to extract data from structured inputs
539
+ (Pydantic models, dicts, lists). The input is first converted to a dict
540
+ using model_dump() if it's a Pydantic model, then the JSONPath expression
541
+ is evaluated.
542
+
543
+ If the JSONPath matches multiple values, the step yields multiple output
544
+ messages (1-to-many cardinality). If it matches a single value, it yields
545
+ one output message. If it matches nothing, it raises an error.
546
+
547
+ The extracted data is used to construct the output variable by passing it
548
+ as keyword arguments to the output type's constructor.
549
+
550
+ Example JSONPath expressions:
551
+ - `$.field_name` - Extract a single field
552
+ - `$.items[*]` - Extract all items from a list
553
+ - `$.items[?(@.price > 10)]` - Filter items by condition
554
+ """
555
+
556
+ type: Literal["FieldExtractor"] = Field("FieldExtractor")
557
+ json_path: str = Field(
558
+ ...,
559
+ description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
560
+ )
561
+ fail_on_missing: bool = Field(
562
+ True,
563
+ description="Whether to raise an error if the JSONPath matches no data. If False, returns None.",
564
+ )
565
+
566
+
567
+ class InvokeEmbedding(Step, ConcurrentStepMixin):
568
+ """Defines a step that generates embeddings using an embedding model.
569
+ It takes input variables and produces output variables containing the embeddings."""
570
+
571
+ type: Literal["InvokeEmbedding"] = Field("InvokeEmbedding")
572
+ model: EmbeddingModel = Field(
573
+ ..., description="The embedding model to use."
574
+ )
575
+
576
+
577
+ class InvokeFlow(Step):
578
+ """Invokes a flow with input and output bindings."""
579
+
580
+ type: Literal["InvokeFlow"] = Field("InvokeFlow")
581
+ flow: Flow = Field(..., description="Flow to invoke.")
582
+ input_bindings: dict[Variable, str] = Field(
583
+ ...,
584
+ description="Mapping from variable references to flow input variable IDs.",
585
+ )
586
+ output_bindings: dict[Variable, str] = Field(
587
+ ...,
588
+ description="Mapping from variable references to flow output variable IDs.",
589
+ )
590
+
591
+
592
+ class InvokeTool(Step, ConcurrentStepMixin):
593
+ """Invokes a tool with input and output bindings."""
594
+
595
+ type: Literal["InvokeTool"] = Field("InvokeTool")
596
+ tool: Tool = Field(..., description="Tool to invoke.")
597
+ input_bindings: dict[str, str] = Field(
598
+ ...,
599
+ description="Mapping from variable references to tool input parameter names.",
600
+ )
601
+ output_bindings: dict[str, str] = Field(
602
+ ...,
603
+ description="Mapping from variable references to tool output parameter names.",
604
+ )
605
+
606
+
607
+ class LLMInference(Step, ConcurrentStepMixin):
257
608
  """Defines a step that performs inference using a language model.
258
609
  It can take input variables and produce output variables based on the model's response."""
259
610
 
611
+ type: Literal["LLMInference"] = Field("LLMInference")
260
612
  memory: Memory | None = Field(
261
613
  None,
262
- description="Memory object to retain context across interactions.",
614
+ description="A reference to a Memory object to retain context across interactions.",
263
615
  )
264
616
  model: Model = Field(..., description="The model to use for inference.")
265
617
  system_message: str | None = Field(
@@ -272,30 +624,32 @@ class PromptTemplate(Step):
272
624
  """Defines a prompt template with a string format and variable bindings.
273
625
  This is used to generate prompts dynamically based on input variables."""
274
626
 
627
+ type: Literal["PromptTemplate"] = Field("PromptTemplate")
275
628
  template: str = Field(
276
629
  ...,
277
630
  description="String template for the prompt with variable placeholders.",
278
631
  )
279
632
 
280
633
 
634
+ class Reranker(Step):
635
+ """Reranks a list of documents based on relevance to a query using an LLM."""
636
+
637
+ type: Literal["Reranker"] = Field("Reranker")
638
+
639
+
281
640
  class Search(Step):
282
641
  """Base class for search operations against indexes."""
283
642
 
284
643
  filters: dict[str, Any] = Field(
285
- {}, description="Optional filters to apply during search."
644
+ default_factory=dict,
645
+ description="Optional filters to apply during search.",
286
646
  )
287
647
  index: Index = Field(
288
648
  ..., description="Index to search against (object or ID reference)."
289
649
  )
290
-
291
-
292
- class Sink(Step):
293
- """Base class for data sinks"""
294
-
295
- id: str = Field(..., description="Unique ID of the data sink.")
296
- cardinality: Literal["one"] = Field(
297
- StepCardinality.one,
298
- description="Flows always emit exactly one instance of the outputs.",
650
+ default_top_k: int | None = Field(
651
+ 10,
652
+ description="Number of top results to retrieve if not provided in the inputs.",
299
653
  )
300
654
 
301
655
 
@@ -303,32 +657,40 @@ class Source(Step):
303
657
  """Base class for data sources"""
304
658
 
305
659
  id: str = Field(..., description="Unique ID of the data source.")
306
- cardinality: Literal["many"] = Field(
660
+ cardinality: Literal[StepCardinality.many] = Field(
307
661
  StepCardinality.many,
308
662
  description="Sources always emit 0...N instances of the outputs.",
309
663
  )
310
664
 
311
665
 
312
- class Tool(Step, ImmutableModel):
313
- """
314
- Base class for callable functions or external operations available to the model or as a step in a flow.
315
- """
666
+ class Writer(Step, BatchableStepMixin):
667
+ """Base class for things that write data in batches."""
316
668
 
317
- name: str = Field(..., description="Name of the tool function.")
318
- description: str = Field(
319
- ..., description="Description of what the tool does."
320
- )
669
+ id: str = Field(..., description="Unique ID of the data writer.")
321
670
 
322
671
 
323
672
  class DocumentIndex(Index):
324
673
  """Document search index for text-based search (e.g., Elasticsearch, OpenSearch)."""
325
674
 
326
- pass
675
+ type: Literal["DocumentIndex"] = Field("DocumentIndex")
676
+ endpoint: str = Field(
677
+ ...,
678
+ description="URL endpoint for the search cluster (e.g., https://my-cluster.es.amazonaws.com).",
679
+ )
680
+ id_field: str | None = Field(
681
+ None,
682
+ description="Field name to use as document ID. If not specified, auto-detects from: _id, id, doc_id, document_id, or uuid. If all are missing, a UUID is generated.",
683
+ )
327
684
 
328
685
 
329
686
  class VectorIndex(Index):
330
687
  """Vector database index for similarity search using embeddings."""
331
688
 
689
+ type: Literal["VectorIndex"] = Field("VectorIndex")
690
+ module: str = Field(
691
+ ...,
692
+ description="Python module path for the vector store implementation (e.g., 'llama_index.vector_stores.qdrant.QdrantVectorStore').",
693
+ )
332
694
  embedding_model: EmbeddingModel = Field(
333
695
  ...,
334
696
  description="Embedding model used to vectorize queries and documents.",
@@ -338,6 +700,7 @@ class VectorIndex(Index):
338
700
  class EmbeddingModel(Model):
339
701
  """Describes an embedding model configuration, extending the base Model class."""
340
702
 
703
+ type: Literal["EmbeddingModel"] = Field("EmbeddingModel")
341
704
  dimensions: int = Field(
342
705
  ...,
343
706
  description="Dimensionality of the embedding vectors produced by this model.",
@@ -347,121 +710,130 @@ class EmbeddingModel(Model):
347
710
  class Agent(LLMInference):
348
711
  """Defines an agent that can perform tasks and make decisions based on user input and context."""
349
712
 
713
+ type: Literal["Agent"] = Field("Agent")
350
714
  tools: list[Tool] = Field(
351
- ..., description="List of tools available to the agent."
715
+ default_factory=list,
716
+ description="List of tools available to the agent.",
352
717
  )
353
718
 
354
719
 
355
- class DocumentSearch(Search):
720
+ class BedrockReranker(Reranker, ConcurrentStepMixin):
721
+ """Reranks documents using an AWS Bedrock model."""
722
+
723
+ type: Literal["BedrockReranker"] = Field("BedrockReranker")
724
+ auth: AWSAuthProvider | None = Field(
725
+ None, description="AWS authorization provider for Bedrock access."
726
+ )
727
+ model_id: str = Field(
728
+ ...,
729
+ description="Bedrock model ID to use for reranking. See https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html",
730
+ )
731
+ num_results: int | None = Field(
732
+ None, description="Return this many results."
733
+ )
734
+
735
+
736
+ class DocumentSearch(Search, ConcurrentStepMixin):
356
737
  """Performs document search against a document index."""
357
738
 
358
- pass
739
+ type: Literal["DocumentSearch"] = Field("DocumentSearch")
740
+ index: DocumentIndex = Field(
741
+ ..., description="Index to search against (object or ID reference)."
742
+ )
743
+ query_args: dict[str, Any] = Field(
744
+ {"type": "best_fields", "fields": ["*"]},
745
+ description="The arguments (other than 'query') to specify to the query shape (see https://docs.opensearch.org/latest/query-dsl/full-text/multi-match/).",
746
+ )
359
747
 
360
748
 
361
- class VectorSearch(Search):
749
+ class VectorSearch(Search, BatchableStepMixin):
362
750
  """Performs vector similarity search against a vector index."""
363
751
 
364
- default_top_k: int | None = Field(
365
- 50,
366
- description="Number of top results to retrieve if not provided in the inputs.",
752
+ type: Literal["VectorSearch"] = Field("VectorSearch")
753
+ index: VectorIndex = Field(
754
+ ..., description="Index to search against (object or ID reference)."
367
755
  )
368
756
 
369
757
 
370
- class IndexUpsert(Sink):
371
- """Semantic version of IndexUpsert."""
758
+ class DocumentSource(Source):
759
+ """A source of documents that will be used in retrieval augmented generation.
760
+ It uses LlamaIndex readers to load one or more raw Documents
761
+ from a specified path or system (e.g., Google Drive, web page).
762
+ See https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers
763
+ """
372
764
 
373
- index: Index = Field(
374
- ..., description="Index to upsert into (object or ID reference)."
765
+ type: Literal["DocumentSource"] = Field("DocumentSource")
766
+ reader_module: str = Field(
767
+ ..., description="Module path of the LlamaIndex Reader)."
768
+ )
769
+ args: dict[str, Any] = Field(
770
+ default_factory=dict,
771
+ description="Reader-specific arguments to pass to the Reader constructor.",
772
+ )
773
+ loader_args: dict[str, Any] = Field(
774
+ default_factory=dict,
775
+ description="Loader-specific arguments to pass to the load_data method.",
776
+ )
777
+ auth: AuthorizationProvider | None = Field(
778
+ None, description="AuthorizationProvider for accessing the source."
375
779
  )
376
780
 
377
781
 
378
- class SQLSource(Source):
379
- """SQL database source that executes queries and emits rows."""
782
+ class FileSource(Source):
783
+ """File source that reads data from a file using fsspec-compatible URIs."""
380
784
 
381
- query: str = Field(
382
- ..., description="SQL query to execute. Inputs are injected as params."
383
- )
384
- connection: str = Field(
785
+ type: Literal["FileSource"] = Field("FileSource")
786
+ path: ConstantPath | Variable = Field(
385
787
  ...,
386
- description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
387
- )
388
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
389
- Field(
390
- None,
391
- description="Optional AuthorizationProvider for database authentication.",
392
- )
788
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
393
789
  )
394
790
 
395
791
 
396
- class SourceType(Source):
792
+ class SQLSource(Source):
397
793
  """SQL database source that executes queries and emits rows."""
398
794
 
795
+ type: Literal["SQLSource"] = Field("SQLSource")
399
796
  query: str = Field(
400
797
  ..., description="SQL query to execute. Inputs are injected as params."
401
798
  )
402
- connection: str = Field(
799
+ connection: str | SecretReference = Field(
403
800
  ...,
404
801
  description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
405
802
  )
406
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
407
- Field(
408
- None,
409
- description="Optional AuthorizationProvider for database authentication.",
410
- )
803
+ auth: AuthorizationProvider | None = Field(
804
+ None,
805
+ description="Optional AuthorizationProvider for database authentication.",
411
806
  )
412
807
 
413
808
 
414
- class APITool(Tool):
415
- """Tool that invokes an API endpoint."""
809
+ class FileWriter(Writer, BatchableStepMixin):
810
+ """File writer that writes data to a file using fsspec-compatible URIs."""
416
811
 
417
- endpoint: str = Field(..., description="API endpoint URL to call.")
418
- method: str = Field(
419
- "GET", description="HTTP method to use (GET, POST, PUT, DELETE, etc.)."
420
- )
421
- auth: APIKeyAuthProvider | AWSAuthProvider | OAuth2AuthProvider | None = (
422
- Field(
423
- None,
424
- description="Optional AuthorizationProvider for API authentication.",
425
- )
812
+ type: Literal["FileWriter"] = Field("FileWriter")
813
+ path: ConstantPath | Variable = Field(
814
+ ...,
815
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
426
816
  )
427
- headers: dict[str, str] = Field(
428
- {}, description="Optional HTTP headers to include in the request."
817
+ batch_config: BatchConfig = Field(
818
+ default_factory=partial(BatchConfig, batch_size=9223372036854775807),
819
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
429
820
  )
430
821
 
431
822
 
432
- class PythonFunctionTool(Tool):
433
- """Tool that calls a Python function."""
823
+ class IndexUpsert(Writer):
824
+ """Semantic version of IndexUpsert."""
434
825
 
435
- function_name: str = Field(
436
- ..., description="Name of the Python function to call."
437
- )
438
- module_path: str = Field(
439
- ..., description="Optional module path where the function is defined."
826
+ type: Literal["IndexUpsert"] = Field("IndexUpsert")
827
+ index: Index = Field(
828
+ ..., description="Index to upsert into (object or ID reference)."
440
829
  )
441
830
 
442
831
 
443
- class Flow(Step):
444
- """Defines a flow of steps that can be executed in sequence or parallel.
445
- If input or output variables are not specified, they are inferred from
446
- the first and last step, respectively.
447
- """
448
-
449
- description: str | None = Field(
450
- None, description="Optional description of the flow."
451
- )
452
- cardinality: StepCardinality = Field(
453
- StepCardinality.auto,
454
- description="The cardinality of the flow, inferred from its steps when set to 'auto'.",
455
- )
456
- mode: Literal["Complete", "Chat"] = Field("Complete")
457
- steps: list[Step] = Field(..., description="List of steps or step IDs.")
458
-
459
- @model_validator(mode="after")
460
- def infer_cardinality(self) -> "Flow":
461
- if self.cardinality == StepCardinality.auto:
462
- self.cardinality = StepCardinality.one
463
- for step in self.steps:
464
- if step.cardinality == StepCardinality.many:
465
- self.cardinality = StepCardinality.many
466
- break
467
- return self
832
+ DocumentType = Union[
833
+ Application,
834
+ AuthorizationProviderList,
835
+ ModelList,
836
+ ToolList,
837
+ TypeList,
838
+ VariableList,
839
+ ]