qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +94 -73
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +4 -0
  9. qtype/commands/convert.py +20 -8
  10. qtype/commands/generate.py +19 -27
  11. qtype/commands/run.py +73 -36
  12. qtype/commands/serve.py +74 -54
  13. qtype/commands/validate.py +34 -8
  14. qtype/commands/visualize.py +46 -22
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +65 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +612 -363
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +57 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +74 -0
  30. qtype/interpreter/base/factory.py +117 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +462 -22
  36. qtype/interpreter/converters.py +77 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/decoder_executor.py +163 -0
  41. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  42. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  43. qtype/interpreter/executors/document_search_executor.py +122 -0
  44. qtype/interpreter/executors/document_source_executor.py +118 -0
  45. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  46. qtype/interpreter/executors/echo_executor.py +63 -0
  47. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  48. qtype/interpreter/executors/file_source_executor.py +101 -0
  49. qtype/interpreter/executors/file_writer_executor.py +110 -0
  50. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  51. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  52. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  53. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  54. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  55. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  56. qtype/interpreter/executors/sql_source_executor.py +106 -0
  57. qtype/interpreter/executors/vector_search_executor.py +91 -0
  58. qtype/interpreter/flow.py +159 -22
  59. qtype/interpreter/metadata_api.py +115 -0
  60. qtype/interpreter/resource_cache.py +5 -4
  61. qtype/interpreter/rich_progress.py +225 -0
  62. qtype/interpreter/stream/chat/__init__.py +15 -0
  63. qtype/interpreter/stream/chat/converter.py +391 -0
  64. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  65. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  66. qtype/interpreter/stream/chat/vercel.py +609 -0
  67. qtype/interpreter/stream/utils/__init__.py +15 -0
  68. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  69. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  70. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  71. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  72. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  73. qtype/interpreter/telemetry.py +135 -8
  74. qtype/interpreter/tools/__init__.py +5 -0
  75. qtype/interpreter/tools/function_tool_helper.py +265 -0
  76. qtype/interpreter/types.py +330 -0
  77. qtype/interpreter/typing.py +83 -89
  78. qtype/interpreter/ui/404/index.html +1 -1
  79. qtype/interpreter/ui/404.html +1 -1
  80. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  81. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  82. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  83. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  84. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  85. qtype/interpreter/ui/icon.png +0 -0
  86. qtype/interpreter/ui/index.html +1 -1
  87. qtype/interpreter/ui/index.txt +4 -4
  88. qtype/semantic/checker.py +583 -0
  89. qtype/semantic/generate.py +262 -83
  90. qtype/semantic/loader.py +95 -0
  91. qtype/semantic/model.py +436 -159
  92. qtype/semantic/resolver.py +63 -19
  93. qtype/semantic/visualize.py +28 -31
  94. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
  95. qtype-0.1.1.dist-info/RECORD +135 -0
  96. qtype/dsl/base_types.py +0 -38
  97. qtype/dsl/validator.py +0 -465
  98. qtype/interpreter/batch/__init__.py +0 -0
  99. qtype/interpreter/batch/file_sink_source.py +0 -162
  100. qtype/interpreter/batch/flow.py +0 -95
  101. qtype/interpreter/batch/sql_source.py +0 -92
  102. qtype/interpreter/batch/step.py +0 -74
  103. qtype/interpreter/batch/types.py +0 -41
  104. qtype/interpreter/batch/utils.py +0 -178
  105. qtype/interpreter/chat/chat_api.py +0 -237
  106. qtype/interpreter/chat/vercel.py +0 -314
  107. qtype/interpreter/exceptions.py +0 -10
  108. qtype/interpreter/step.py +0 -67
  109. qtype/interpreter/steps/__init__.py +0 -0
  110. qtype/interpreter/steps/agent.py +0 -114
  111. qtype/interpreter/steps/condition.py +0 -36
  112. qtype/interpreter/steps/decoder.py +0 -88
  113. qtype/interpreter/steps/llm_inference.py +0 -171
  114. qtype/interpreter/steps/prompt_template.py +0 -54
  115. qtype/interpreter/steps/search.py +0 -24
  116. qtype/interpreter/steps/tool.py +0 -219
  117. qtype/interpreter/streaming_helpers.py +0 -123
  118. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  119. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  120. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  121. qtype/interpreter/ui/favicon.ico +0 -0
  122. qtype/loader.py +0 -390
  123. qtype-0.0.16.dist-info/RECORD +0 -106
  124. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  125. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
  126. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
  127. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
  128. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
qtype/semantic/model.py CHANGED
@@ -13,11 +13,19 @@ Types are ignored since they should reflect dsl directly, which is type checked.
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
- from typing import Any, Literal
17
-
18
- from pydantic import BaseModel, Field, model_validator
19
-
20
- # Import enums and type aliases from DSL
16
+ from functools import partial
17
+ from typing import Any, Literal, Union
18
+
19
+ from pydantic import BaseModel, Field
20
+
21
+ # Import enums, mixins, and type aliases
22
+ from qtype.base.types import ( # noqa: F401
23
+ BatchableStepMixin,
24
+ BatchConfig,
25
+ CachedStepMixin,
26
+ ConcurrencyConfig,
27
+ ConcurrentStepMixin,
28
+ )
21
29
  from qtype.dsl.model import VariableType # noqa: F401
22
30
  from qtype.dsl.model import ( # noqa: F401
23
31
  CustomType,
@@ -25,7 +33,6 @@ from qtype.dsl.model import ( # noqa: F401
25
33
  ListType,
26
34
  PrimitiveTypeEnum,
27
35
  StepCardinality,
28
- StructuralTypeEnum,
29
36
  ToolParameter,
30
37
  )
31
38
  from qtype.dsl.model import Variable as DSLVariable # noqa: F401
@@ -61,10 +68,44 @@ class Tool(ImmutableModel):
61
68
  ..., description="Description of what the tool does."
62
69
  )
63
70
  inputs: dict[str, ToolParameter] = Field(
64
- {}, description="Input parameters required by this tool."
71
+ default_factory=dict,
72
+ description="Input parameters required by this tool.",
65
73
  )
66
74
  outputs: dict[str, ToolParameter] = Field(
67
- {}, description="Output parameters produced by this tool."
75
+ default_factory=dict,
76
+ description="Output parameters produced by this tool.",
77
+ )
78
+
79
+
80
+ class SecretManager(BaseModel):
81
+ """Base class for secret manager configurations."""
82
+
83
+ id: str = Field(
84
+ ..., description="Unique ID for this secret manager configuration."
85
+ )
86
+ type: str = Field(..., description="The type of secret manager.")
87
+ auth: AuthorizationProvider = Field(
88
+ ...,
89
+ description="AuthorizationProvider used to access this secret manager.",
90
+ )
91
+
92
+
93
+ class Step(CachedStepMixin, BaseModel):
94
+ """Base class for components that take inputs and produce outputs."""
95
+
96
+ id: str = Field(..., description="Unique ID of this component.")
97
+ type: str = Field(..., description="Type of the step component.")
98
+ cardinality: StepCardinality = Field(
99
+ StepCardinality.one,
100
+ description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
101
+ )
102
+ inputs: list[Variable] = Field(
103
+ default_factory=list,
104
+ description="References to the variables required by this step.",
105
+ )
106
+ outputs: list[Variable] = Field(
107
+ default_factory=list,
108
+ description="References to the variables where output is stored.",
68
109
  )
69
110
 
70
111
 
@@ -84,53 +125,52 @@ class Application(BaseModel):
84
125
  None, description="Optional description of the application."
85
126
  )
86
127
  memories: list[Memory] = Field(
87
- [], description="List of memory definitions used in this application."
128
+ default_factory=list,
129
+ description="List of memory definitions used in this application.",
88
130
  )
89
131
  models: list[Model] = Field(
90
- [], description="List of models used in this application."
132
+ default_factory=list,
133
+ description="List of models used in this application.",
91
134
  )
92
135
  types: list[CustomType] = Field(
93
- [], description="List of custom types defined in this application."
94
- )
95
- variables: list[Variable] = Field(
96
- [], description="List of variables used in this application."
136
+ default_factory=list,
137
+ description="List of custom types defined in this application.",
97
138
  )
98
139
  flows: list[Flow] = Field(
99
- [], description="List of flows defined in this application."
140
+ default_factory=list,
141
+ description="List of flows defined in this application.",
100
142
  )
101
- auths: list[
102
- APIKeyAuthProvider
103
- | BearerTokenAuthProvider
104
- | AWSAuthProvider
105
- | OAuth2AuthProvider
106
- ] = Field(
107
- [], description="List of authorization providers used for API access."
143
+ auths: list[AuthorizationProvider] = Field(
144
+ default_factory=list,
145
+ description="List of authorization providers used for API access.",
108
146
  )
109
147
  tools: list[Tool] = Field(
110
- [], description="List of tools available in this application."
148
+ default_factory=list,
149
+ description="List of tools available in this application.",
111
150
  )
112
151
  indexes: list[Index] = Field(
113
- [], description="List of indexes available for search operations."
152
+ default_factory=list,
153
+ description="List of indexes available for search operations.",
154
+ )
155
+ secret_manager: AWSSecretManager | None = Field(
156
+ None,
157
+ description="Optional secret manager configuration for the application.",
114
158
  )
115
159
  telemetry: TelemetrySink | None = Field(
116
160
  None, description="Optional telemetry sink for observability."
117
161
  )
118
162
 
119
163
 
120
- class Step(BaseModel):
121
- """Base class for components that take inputs and produce outputs."""
164
+ class AuthorizationProviderList(BaseModel):
165
+ """Schema for a standalone list of authorization providers."""
166
+
167
+ root: list[AuthorizationProvider] = Field(...)
122
168
 
123
- id: str = Field(..., description="Unique ID of this component.")
124
- cardinality: StepCardinality = Field(
125
- StepCardinality.one,
126
- description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
127
- )
128
- inputs: list[Variable] = Field(
129
- [], description="Input variables required by this step."
130
- )
131
- outputs: list[Variable] = Field(
132
- [], description="Variable where output is stored."
133
- )
169
+
170
+ class ConstantPath(BaseModel):
171
+ """Semantic version of ConstantPath."""
172
+
173
+ uri: str = Field(..., description="A constant Fsspec URI.")
134
174
 
135
175
 
136
176
  class Index(ImmutableModel):
@@ -138,16 +178,10 @@ class Index(ImmutableModel):
138
178
 
139
179
  id: str = Field(..., description="Unique ID of the index.")
140
180
  args: dict[str, Any] = Field(
141
- {},
181
+ default_factory=dict,
142
182
  description="Index-specific configuration and connection parameters.",
143
183
  )
144
- auth: (
145
- APIKeyAuthProvider
146
- | BearerTokenAuthProvider
147
- | AWSAuthProvider
148
- | OAuth2AuthProvider
149
- | None
150
- ) = Field(
184
+ auth: AuthorizationProvider | None = Field(
151
185
  None, description="AuthorizationProvider for accessing the index."
152
186
  )
153
187
  name: str = Field(..., description="Name of the index/collection/table.")
@@ -156,24 +190,64 @@ class Index(ImmutableModel):
156
190
  class Model(ImmutableModel):
157
191
  """Describes a generative model configuration, including provider and model ID."""
158
192
 
193
+ type: Literal["Model"] = Field("Model")
159
194
  id: str = Field(..., description="Unique ID for the model.")
160
- auth: (
161
- APIKeyAuthProvider
162
- | BearerTokenAuthProvider
163
- | AWSAuthProvider
164
- | OAuth2AuthProvider
165
- | None
166
- ) = Field(None, description="AuthorizationProvider used for model access.")
195
+ auth: AuthorizationProvider | None = Field(
196
+ None, description="AuthorizationProvider used for model access."
197
+ )
167
198
  inference_params: dict[str, Any] = Field(
168
- {},
199
+ default_factory=dict,
169
200
  description="Optional inference parameters like temperature or max_tokens.",
170
201
  )
171
202
  model_id: str | None = Field(
172
203
  None,
173
204
  description="The specific model name or ID for the provider. If None, id is used",
174
205
  )
175
- provider: str = Field(
176
- ..., description="Name of the provider, e.g., openai or anthropic."
206
+ provider: Literal["openai", "anthropic", "aws-bedrock", "gcp-vertex"] = (
207
+ Field(
208
+ ..., description="Name of the provider, e.g., openai or anthropic."
209
+ )
210
+ )
211
+
212
+
213
+ class Flow(BaseModel):
214
+ """Defines a flow of steps that can be executed in sequence or parallel.
215
+ If input or output variables are not specified, they are inferred from
216
+ the first and last step, respectively."""
217
+
218
+ id: str = Field(..., description="Unique ID of the flow.")
219
+ type: Literal["Flow"] = Field("Flow")
220
+ description: str | None = Field(
221
+ None, description="Optional description of the flow."
222
+ )
223
+ steps: list[Step | Step] = Field(
224
+ default_factory=list,
225
+ description="List of steps or references to steps",
226
+ )
227
+ interface: FlowInterface | None = Field(None)
228
+ variables: list[Variable] = Field(
229
+ default_factory=list,
230
+ description="List of variables available at the application scope.",
231
+ )
232
+ inputs: list[Variable] = Field(
233
+ default_factory=list,
234
+ description="Input variables required by this step.",
235
+ )
236
+ outputs: list[Variable] = Field(
237
+ default_factory=list, description="Resulting variables"
238
+ )
239
+
240
+
241
+ class FlowInterface(BaseModel):
242
+ """
243
+ Defines the public-facing contract for a Flow, guiding the UI
244
+ and session management.
245
+ """
246
+
247
+ type: Literal["Complete", "Conversational"] = Field("Complete")
248
+ session_inputs: list[Variable] = Field(
249
+ default_factory=list,
250
+ description="A list of input variable IDs that are set once and then persisted across a session.",
177
251
  )
178
252
 
179
253
 
@@ -193,32 +267,73 @@ class Memory(ImmutableModel):
193
267
  )
194
268
 
195
269
 
270
+ class ModelList(BaseModel):
271
+ """Schema for a standalone list of models."""
272
+
273
+ root: list[Model] = Field(...)
274
+
275
+
276
+ class SecretReference(BaseModel):
277
+ """
278
+ A reference to a secret in the application's configured SecretManager.
279
+ This value is resolved at runtime by the interpreter.
280
+ """
281
+
282
+ secret_name: str = Field(
283
+ ...,
284
+ description="The name, ID, or ARN of the secret to fetch (e.g., 'my-project/db-password').",
285
+ )
286
+ key: str | None = Field(
287
+ None,
288
+ description="Optional key if the secret is a JSON blob or map (e.g., a specific key in a K8s secret).",
289
+ )
290
+
291
+
196
292
  class TelemetrySink(BaseModel):
197
293
  """Defines an observability endpoint for collecting telemetry data from the QType runtime."""
198
294
 
199
295
  id: str = Field(
200
296
  ..., description="Unique ID of the telemetry sink configuration."
201
297
  )
202
- auth: (
203
- APIKeyAuthProvider
204
- | BearerTokenAuthProvider
205
- | AWSAuthProvider
206
- | OAuth2AuthProvider
207
- | None
208
- ) = Field(
298
+ provider: Literal["Phoenix", "Langfuse"] = Field("Phoenix")
299
+ auth: AuthorizationProvider | None = Field(
209
300
  None,
210
301
  description="AuthorizationProvider used to authenticate telemetry data transmission.",
211
302
  )
212
- endpoint: str = Field(
303
+ endpoint: str | SecretReference = Field(
213
304
  ..., description="URL endpoint where telemetry data will be sent."
214
305
  )
306
+ args: dict[str, Any] = Field(
307
+ default_factory=dict,
308
+ description="Additional configuration arguments specific to the telemetry sink type.",
309
+ )
310
+
311
+
312
+ class ToolList(BaseModel):
313
+ """Schema for a standalone list of tools."""
314
+
315
+ root: list[Tool] = Field(...)
316
+
317
+
318
+ class TypeList(BaseModel):
319
+ """Schema for a standalone list of type definitions."""
320
+
321
+ root: list[CustomType] = Field(...)
322
+
323
+
324
+ class VariableList(BaseModel):
325
+ """Schema for a standalone list of variables."""
326
+
327
+ root: list[Variable] = Field(...)
215
328
 
216
329
 
217
330
  class APIKeyAuthProvider(AuthorizationProvider):
218
331
  """API key-based authentication provider."""
219
332
 
220
333
  type: Literal["api_key"] = Field("api_key")
221
- api_key: str = Field(..., description="API key for authentication.")
334
+ api_key: str | SecretReference = Field(
335
+ ..., description="API key for authentication."
336
+ )
222
337
  host: str | None = Field(
223
338
  None, description="Base URL or domain of the provider."
224
339
  )
@@ -228,11 +343,13 @@ class AWSAuthProvider(AuthorizationProvider):
228
343
  """AWS authentication provider supporting multiple credential methods."""
229
344
 
230
345
  type: Literal["aws"] = Field("aws")
231
- access_key_id: str | None = Field(None, description="AWS access key ID.")
232
- secret_access_key: str | None = Field(
346
+ access_key_id: str | SecretReference | None = Field(
347
+ None, description="AWS access key ID."
348
+ )
349
+ secret_access_key: str | SecretReference | None = Field(
233
350
  None, description="AWS secret access key."
234
351
  )
235
- session_token: str | None = Field(
352
+ session_token: str | SecretReference | None = Field(
236
353
  None, description="AWS session token for temporary credentials."
237
354
  )
238
355
  profile_name: str | None = Field(
@@ -254,7 +371,9 @@ class BearerTokenAuthProvider(AuthorizationProvider):
254
371
  """Bearer token authentication provider."""
255
372
 
256
373
  type: Literal["bearer_token"] = Field("bearer_token")
257
- token: str = Field(..., description="Bearer token for authentication.")
374
+ token: str | SecretReference = Field(
375
+ ..., description="Bearer token for authentication."
376
+ )
258
377
 
259
378
 
260
379
  class OAuth2AuthProvider(AuthorizationProvider):
@@ -262,39 +381,61 @@ class OAuth2AuthProvider(AuthorizationProvider):
262
381
 
263
382
  type: Literal["oauth2"] = Field("oauth2")
264
383
  client_id: str = Field(..., description="OAuth2 client ID.")
265
- client_secret: str = Field(..., description="OAuth2 client secret.")
384
+ client_secret: str | SecretReference = Field(
385
+ ..., description="OAuth2 client secret."
386
+ )
266
387
  token_url: str = Field(..., description="Token endpoint URL.")
267
- scopes: list[str] = Field([], description="OAuth2 scopes required.")
388
+ scopes: list[str] = Field(
389
+ default_factory=list, description="OAuth2 scopes required."
390
+ )
391
+
392
+
393
+ class VertexAuthProvider(AuthorizationProvider):
394
+ """Google Vertex authentication provider supporting gcloud profile or service account."""
395
+
396
+ type: Literal["vertex"] = Field("vertex")
397
+ profile_name: str | None = Field(
398
+ None,
399
+ description="Local gcloud profile name (if using existing CLI credentials).",
400
+ )
401
+ project_id: str | None = Field(
402
+ None,
403
+ description="Explicit GCP project ID override (if different from profile).",
404
+ )
405
+ service_account_file: str | None = Field(
406
+ None, description="Path to a service account JSON key file."
407
+ )
408
+ region: str | None = Field(
409
+ None, description="Vertex region (e.g., us-central1)."
410
+ )
268
411
 
269
412
 
270
413
  class APITool(Tool):
271
414
  """Tool that invokes an API endpoint."""
272
415
 
416
+ type: Literal["APITool"] = Field("APITool")
273
417
  endpoint: str = Field(..., description="API endpoint URL to call.")
274
418
  method: str = Field(
275
419
  "GET", description="HTTP method to use (GET, POST, PUT, DELETE, etc.)."
276
420
  )
277
- auth: (
278
- APIKeyAuthProvider
279
- | BearerTokenAuthProvider
280
- | AWSAuthProvider
281
- | OAuth2AuthProvider
282
- | None
283
- ) = Field(
421
+ auth: AuthorizationProvider | None = Field(
284
422
  None,
285
423
  description="Optional AuthorizationProvider for API authentication.",
286
424
  )
287
425
  headers: dict[str, str] = Field(
288
- {}, description="Optional HTTP headers to include in the request."
426
+ default_factory=dict,
427
+ description="Optional HTTP headers to include in the request.",
289
428
  )
290
429
  parameters: dict[str, ToolParameter] = Field(
291
- {}, description="Output parameters produced by this tool."
430
+ default_factory=dict,
431
+ description="Output parameters produced by this tool.",
292
432
  )
293
433
 
294
434
 
295
435
  class PythonFunctionTool(Tool):
296
436
  """Tool that calls a Python function."""
297
437
 
438
+ type: Literal["PythonFunctionTool"] = Field("PythonFunctionTool")
298
439
  function_name: str = Field(
299
440
  ..., description="Name of the Python function to call."
300
441
  )
@@ -303,54 +444,170 @@ class PythonFunctionTool(Tool):
303
444
  )
304
445
 
305
446
 
306
- class Condition(Step):
307
- """Conditional logic gate within a flow. Supports branching logic for execution based on variable values."""
447
+ class AWSSecretManager(SecretManager):
448
+ """Configuration for AWS Secrets Manager."""
308
449
 
309
- else_: Step | None = Field(
310
- None,
311
- description="Optional step to run if condition fails.",
312
- alias="else",
313
- )
314
- equals: Variable | None = Field(
315
- None, description="Match condition for equality check."
450
+ type: Literal["aws_secret_manager"] = Field("aws_secret_manager")
451
+
452
+
453
+ class Aggregate(Step):
454
+ """
455
+ A terminal step that consumes an entire input stream and produces a single
456
+ summary message with success/error counts.
457
+ """
458
+
459
+ type: Literal["Aggregate"] = Field("Aggregate")
460
+ cardinality: Literal[StepCardinality.one] = Field(StepCardinality.one)
461
+ outputs: list[Variable] = Field(
462
+ default_factory=list,
463
+ description="References to the variables for the output. There should be one and only one output with type AggregateStats",
316
464
  )
317
- then: Step = Field(..., description="Step to run if condition matches.")
318
465
 
319
466
 
320
467
  class Decoder(Step):
321
468
  """Defines a step that decodes string data into structured outputs.
322
469
 
323
470
  If parsing fails, the step will raise an error and halt execution.
324
- Use conditional logic in your flow to handle potential parsing errors.
325
- """
471
+ Use conditional logic in your flow to handle potential parsing errors."""
326
472
 
473
+ type: Literal["Decoder"] = Field("Decoder")
327
474
  format: DecoderFormat = Field(
328
475
  DecoderFormat.json,
329
476
  description="Format in which the decoder processes data. Defaults to JSON.",
330
477
  )
331
478
 
332
479
 
333
- class Invoke(Step):
480
+ class DocToTextConverter(Step, ConcurrentStepMixin):
481
+ """Defines a step to convert raw documents (e.g., PDF, DOCX) loaded by a DocumentSource into plain text
482
+ using an external tool like Docling or LlamaParse for pre-processing before chunking.
483
+ The input and output are both RAGDocument, but the output after processing with have content of type markdown.
484
+ """
485
+
486
+ type: Literal["DocToTextConverter"] = Field("DocToTextConverter")
487
+
488
+
489
+ class DocumentEmbedder(Step, ConcurrentStepMixin):
490
+ """Embeds document chunks using a specified embedding model."""
491
+
492
+ type: Literal["DocumentEmbedder"] = Field("DocumentEmbedder")
493
+ cardinality: Literal[StepCardinality.many] = Field(
494
+ StepCardinality.many,
495
+ description="Consumes one chunk and emits one embedded chunk.",
496
+ )
497
+ model: EmbeddingModel = Field(
498
+ ..., description="Embedding model to use for vectorization."
499
+ )
500
+
501
+
502
+ class DocumentSplitter(Step, ConcurrentStepMixin):
503
+ """Configuration for chunking/splitting documents into embeddable nodes/chunks."""
504
+
505
+ type: Literal["DocumentSplitter"] = Field("DocumentSplitter")
506
+ cardinality: Literal[StepCardinality.many] = Field(
507
+ StepCardinality.many,
508
+ description="Consumes one document and emits 0...N nodes/chunks.",
509
+ )
510
+ splitter_name: str = Field(
511
+ "SentenceSplitter",
512
+ description="Name of the LlamaIndex TextSplitter class.",
513
+ )
514
+ chunk_size: int = Field(1024, description="Size of each chunk.")
515
+ chunk_overlap: int = Field(
516
+ 20, description="Overlap between consecutive chunks."
517
+ )
518
+ args: dict[str, Any] = Field(
519
+ default_factory=dict,
520
+ description="Additional arguments specific to the chosen splitter class.",
521
+ )
522
+
523
+
524
+ class Echo(Step):
525
+ """Defines a step that echoes its inputs as outputs.
526
+
527
+ Useful for debugging flows by inspecting variable values at a specific
528
+ point in the execution pipeline. The step simply passes through all input
529
+ variables as outputs without modification.
530
+ """
531
+
532
+ type: Literal["Echo"] = Field("Echo")
533
+
534
+
535
+ class FieldExtractor(Step):
536
+ """Extracts specific fields from input data using JSONPath expressions.
537
+
538
+ This step uses JSONPath syntax to extract data from structured inputs
539
+ (Pydantic models, dicts, lists). The input is first converted to a dict
540
+ using model_dump() if it's a Pydantic model, then the JSONPath expression
541
+ is evaluated.
542
+
543
+ If the JSONPath matches multiple values, the step yields multiple output
544
+ messages (1-to-many cardinality). If it matches a single value, it yields
545
+ one output message. If it matches nothing, it raises an error.
546
+
547
+ The extracted data is used to construct the output variable by passing it
548
+ as keyword arguments to the output type's constructor.
549
+
550
+ Example JSONPath expressions:
551
+ - `$.field_name` - Extract a single field
552
+ - `$.items[*]` - Extract all items from a list
553
+ - `$.items[?(@.price > 10)]` - Filter items by condition
554
+ """
555
+
556
+ type: Literal["FieldExtractor"] = Field("FieldExtractor")
557
+ json_path: str = Field(
558
+ ...,
559
+ description="JSONPath expression to extract data from the input. Uses jsonpath-ng syntax.",
560
+ )
561
+
562
+
563
+ class InvokeEmbedding(Step, ConcurrentStepMixin):
564
+ """Defines a step that generates embeddings using an embedding model.
565
+ It takes input variables and produces output variables containing the embeddings."""
566
+
567
+ type: Literal["InvokeEmbedding"] = Field("InvokeEmbedding")
568
+ model: EmbeddingModel = Field(
569
+ ..., description="The embedding model to use."
570
+ )
571
+
572
+
573
+ class InvokeFlow(Step):
574
+ """Invokes a flow with input and output bindings."""
575
+
576
+ type: Literal["InvokeFlow"] = Field("InvokeFlow")
577
+ flow: Flow = Field(..., description="Flow to invoke.")
578
+ input_bindings: dict[Variable, str] = Field(
579
+ ...,
580
+ description="Mapping from variable references to flow input variable IDs.",
581
+ )
582
+ output_bindings: dict[Variable, str] = Field(
583
+ ...,
584
+ description="Mapping from variable references to flow output variable IDs.",
585
+ )
586
+
587
+
588
+ class InvokeTool(Step, ConcurrentStepMixin):
334
589
  """Invokes a tool with input and output bindings."""
335
590
 
591
+ type: Literal["InvokeTool"] = Field("InvokeTool")
336
592
  tool: Tool = Field(..., description="Tool to invoke.")
337
593
  input_bindings: dict[str, str] = Field(
338
594
  ...,
339
- description="Mapping from step input IDs to tool input parameter names.",
595
+ description="Mapping from variable references to tool input parameter names.",
340
596
  )
341
597
  output_bindings: dict[str, str] = Field(
342
598
  ...,
343
- description="Mapping from tool output parameter names to step output IDs.",
599
+ description="Mapping from variable references to tool output parameter names.",
344
600
  )
345
601
 
346
602
 
347
- class LLMInference(Step):
603
+ class LLMInference(Step, ConcurrentStepMixin):
348
604
  """Defines a step that performs inference using a language model.
349
605
  It can take input variables and produce output variables based on the model's response."""
350
606
 
607
+ type: Literal["LLMInference"] = Field("LLMInference")
351
608
  memory: Memory | None = Field(
352
609
  None,
353
- description="Memory object to retain context across interactions.",
610
+ description="A reference to a Memory object to retain context across interactions.",
354
611
  )
355
612
  model: Model = Field(..., description="The model to use for inference.")
356
613
  system_message: str | None = Field(
@@ -363,6 +620,7 @@ class PromptTemplate(Step):
363
620
  """Defines a prompt template with a string format and variable bindings.
364
621
  This is used to generate prompts dynamically based on input variables."""
365
622
 
623
+ type: Literal["PromptTemplate"] = Field("PromptTemplate")
366
624
  template: str = Field(
367
625
  ...,
368
626
  description="String template for the prompt with variable placeholders.",
@@ -373,42 +631,48 @@ class Search(Step):
373
631
  """Base class for search operations against indexes."""
374
632
 
375
633
  filters: dict[str, Any] = Field(
376
- {}, description="Optional filters to apply during search."
634
+ default_factory=dict,
635
+ description="Optional filters to apply during search.",
377
636
  )
378
637
  index: Index = Field(
379
638
  ..., description="Index to search against (object or ID reference)."
380
639
  )
381
640
 
382
641
 
383
- class Sink(Step):
384
- """Base class for data sinks"""
385
-
386
- id: str = Field(..., description="Unique ID of the data sink.")
387
- cardinality: Literal["one"] = Field(
388
- StepCardinality.one,
389
- description="Flows always emit exactly one instance of the outputs.",
390
- )
391
-
392
-
393
642
  class Source(Step):
394
643
  """Base class for data sources"""
395
644
 
396
645
  id: str = Field(..., description="Unique ID of the data source.")
397
- cardinality: Literal["many"] = Field(
646
+ cardinality: Literal[StepCardinality.many] = Field(
398
647
  StepCardinality.many,
399
648
  description="Sources always emit 0...N instances of the outputs.",
400
649
  )
401
650
 
402
651
 
652
+ class Writer(Step, BatchableStepMixin):
653
+ """Base class for things that write data in batches."""
654
+
655
+ id: str = Field(..., description="Unique ID of the data writer.")
656
+
657
+
403
658
  class DocumentIndex(Index):
404
659
  """Document search index for text-based search (e.g., Elasticsearch, OpenSearch)."""
405
660
 
406
- pass
661
+ type: Literal["DocumentIndex"] = Field("DocumentIndex")
662
+ endpoint: str = Field(
663
+ ...,
664
+ description="URL endpoint for the search cluster (e.g., https://my-cluster.es.amazonaws.com).",
665
+ )
407
666
 
408
667
 
409
668
  class VectorIndex(Index):
410
669
  """Vector database index for similarity search using embeddings."""
411
670
 
671
+ type: Literal["VectorIndex"] = Field("VectorIndex")
672
+ module: str = Field(
673
+ ...,
674
+ description="Python module path for the vector store implementation (e.g., 'llama_index.vector_stores.qdrant.QdrantVectorStore').",
675
+ )
412
676
  embedding_model: EmbeddingModel = Field(
413
677
  ...,
414
678
  description="Embedding model used to vectorize queries and documents.",
@@ -418,6 +682,7 @@ class VectorIndex(Index):
418
682
  class EmbeddingModel(Model):
419
683
  """Describes an embedding model configuration, extending the base Model class."""
420
684
 
685
+ type: Literal["EmbeddingModel"] = Field("EmbeddingModel")
421
686
  dimensions: int = Field(
422
687
  ...,
423
688
  description="Dimensionality of the embedding vectors produced by this model.",
@@ -427,96 +692,108 @@ class EmbeddingModel(Model):
427
692
  class Agent(LLMInference):
428
693
  """Defines an agent that can perform tasks and make decisions based on user input and context."""
429
694
 
695
+ type: Literal["Agent"] = Field("Agent")
430
696
  tools: list[Tool] = Field(
431
- ..., description="List of tools available to the agent."
697
+ default_factory=list,
698
+ description="List of tools available to the agent.",
432
699
  )
433
700
 
434
701
 
435
- class DocumentSearch(Search):
702
+ class DocumentSearch(Search, ConcurrentStepMixin):
436
703
  """Performs document search against a document index."""
437
704
 
438
- pass
705
+ type: Literal["DocumentSearch"] = Field("DocumentSearch")
439
706
 
440
707
 
441
- class VectorSearch(Search):
708
+ class VectorSearch(Search, BatchableStepMixin):
442
709
  """Performs vector similarity search against a vector index."""
443
710
 
711
+ type: Literal["VectorSearch"] = Field("VectorSearch")
444
712
  default_top_k: int | None = Field(
445
713
  50,
446
714
  description="Number of top results to retrieve if not provided in the inputs.",
447
715
  )
448
716
 
449
717
 
450
- class FileSink(Sink):
451
- """File sink that writes data to a file using fsspec-compatible URIs."""
718
+ class DocumentSource(Source):
719
+ """A source of documents that will be used in retrieval augmented generation.
720
+ It uses LlamaIndex readers to load one or more raw Documents
721
+ from a specified path or system (e.g., Google Drive, web page).
722
+ See https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers
723
+ """
452
724
 
453
- path: str | None = Field(
454
- None,
455
- description="fsspec-compatible URI to write to. If None, expects 'path' input variable.",
725
+ type: Literal["DocumentSource"] = Field("DocumentSource")
726
+ reader_module: str = Field(
727
+ ..., description="Module path of the LlamaIndex Reader)."
456
728
  )
457
-
458
-
459
- class IndexUpsert(Sink):
460
- """Semantic version of IndexUpsert."""
461
-
462
- index: Index = Field(
463
- ..., description="Index to upsert into (object or ID reference)."
729
+ args: dict[str, Any] = Field(
730
+ default_factory=dict,
731
+ description="Reader-specific arguments to pass to the Reader constructor.",
732
+ )
733
+ loader_args: dict[str, Any] = Field(
734
+ default_factory=dict,
735
+ description="Loader-specific arguments to pass to the load_data method.",
736
+ )
737
+ auth: AuthorizationProvider | None = Field(
738
+ None, description="AuthorizationProvider for accessing the source."
464
739
  )
465
740
 
466
741
 
467
742
  class FileSource(Source):
468
743
  """File source that reads data from a file using fsspec-compatible URIs."""
469
744
 
470
- path: str | None = Field(
471
- None,
472
- description="fsspec-compatible URI to read from. If None, expects 'path' input variable.",
745
+ type: Literal["FileSource"] = Field("FileSource")
746
+ path: ConstantPath | Variable = Field(
747
+ ...,
748
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
473
749
  )
474
750
 
475
751
 
476
752
  class SQLSource(Source):
477
753
  """SQL database source that executes queries and emits rows."""
478
754
 
755
+ type: Literal["SQLSource"] = Field("SQLSource")
479
756
  query: str = Field(
480
757
  ..., description="SQL query to execute. Inputs are injected as params."
481
758
  )
482
- connection: str = Field(
759
+ connection: str | SecretReference = Field(
483
760
  ...,
484
761
  description="Database connection string or reference to auth provider. Typically in SQLAlchemy format.",
485
762
  )
486
- auth: (
487
- APIKeyAuthProvider
488
- | BearerTokenAuthProvider
489
- | AWSAuthProvider
490
- | OAuth2AuthProvider
491
- | None
492
- ) = Field(
763
+ auth: AuthorizationProvider | None = Field(
493
764
  None,
494
765
  description="Optional AuthorizationProvider for database authentication.",
495
766
  )
496
767
 
497
768
 
498
- class Flow(Step):
499
- """Defines a flow of steps that can be executed in sequence or parallel.
500
- If input or output variables are not specified, they are inferred from
501
- the first and last step, respectively.
502
- """
769
+ class FileWriter(Writer, BatchableStepMixin):
770
+ """File writer that writes data to a file using fsspec-compatible URIs."""
503
771
 
504
- description: str | None = Field(
505
- None, description="Optional description of the flow."
772
+ type: Literal["FileWriter"] = Field("FileWriter")
773
+ path: ConstantPath | Variable = Field(
774
+ ...,
775
+ description="Reference to a variable with an fsspec-compatible URI to read from, or the uri itself.",
506
776
  )
507
- cardinality: StepCardinality = Field(
508
- StepCardinality.auto,
509
- description="The cardinality of the flow, inferred from its steps when set to 'auto'.",
510
- )
511
- mode: Literal["Complete", "Chat"] = Field("Complete")
512
- steps: list[Step] = Field(..., description="List of steps or step IDs.")
513
-
514
- @model_validator(mode="after")
515
- def infer_cardinality(self) -> "Flow":
516
- if self.cardinality == StepCardinality.auto:
517
- self.cardinality = StepCardinality.one
518
- for step in self.steps:
519
- if step.cardinality == StepCardinality.many:
520
- self.cardinality = StepCardinality.many
521
- break
522
- return self
777
+ batch_config: BatchConfig = Field(
778
+ default_factory=partial(BatchConfig, batch_size=9223372036854775807),
779
+ description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
780
+ )
781
+
782
+
783
+ class IndexUpsert(Writer):
784
+ """Semantic version of IndexUpsert."""
785
+
786
+ type: Literal["IndexUpsert"] = Field("IndexUpsert")
787
+ index: Index = Field(
788
+ ..., description="Index to upsert into (object or ID reference)."
789
+ )
790
+
791
+
792
+ DocumentType = Union[
793
+ Application,
794
+ AuthorizationProviderList,
795
+ ModelList,
796
+ ToolList,
797
+ TypeList,
798
+ VariableList,
799
+ ]