qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +751 -263
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +471 -22
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +173 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
  102. qtype-0.1.3.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
qtype/dsl/validator.py DELETED
@@ -1,464 +0,0 @@
1
- from typing import Any, Dict, Union, get_args, get_origin
2
-
3
- import qtype.dsl.base_types as base_types
4
- import qtype.dsl.domain_types
5
- import qtype.dsl.model as dsl
6
-
7
-
8
- class QTypeValidationError(Exception):
9
- """Raised when there's an error during QType validation."""
10
-
11
- pass
12
-
13
-
14
- class DuplicateComponentError(QTypeValidationError):
15
- """Raised when there are duplicate components with the same ID."""
16
-
17
- def __init__(
18
- self,
19
- obj_id: str,
20
- found_obj: qtype.dsl.domain_types.StrictBaseModel,
21
- existing_obj: qtype.dsl.domain_types.StrictBaseModel,
22
- ):
23
- super().__init__(
24
- f"Duplicate component with ID '{obj_id}' found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
25
- )
26
-
27
-
28
- class ComponentNotFoundError(QTypeValidationError):
29
- """Raised when a component is not found in the DSL Application."""
30
-
31
- def __init__(self, component_id: str):
32
- super().__init__(
33
- f"Component with ID '{component_id}' not found in the DSL Application."
34
- )
35
-
36
-
37
- class ReferenceNotFoundError(QTypeValidationError):
38
- """Raised when a reference is not found in the lookup map."""
39
-
40
- def __init__(self, reference: str, type_hint: str | None = None):
41
- msg = (
42
- f"Reference '{reference}' not found in lookup map."
43
- if type_hint is None
44
- else f"Reference '{reference}' not found in lookup map for type '{type_hint}'."
45
- )
46
- super().__init__(msg)
47
-
48
-
49
- class FlowHasNoStepsError(QTypeValidationError):
50
- """Raised when a flow has no steps defined."""
51
-
52
- def __init__(self, flow_id: str):
53
- super().__init__(f"Flow {flow_id} has no steps defined.")
54
-
55
-
56
- # These types are used only for the DSL and should not be converted to semantic types
57
- # They are used for JSON schema generation
58
- # They will be switched to their semantic abstract class in the generation.
59
- # i.e., `ToolType` will be switched to `Tool`
60
- def _update_map_with_unique_check(
61
- current_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
62
- new_objects: list[qtype.dsl.domain_types.StrictBaseModel],
63
- ) -> None:
64
- """
65
- Update a map with new objects, ensuring unique IDs.
66
-
67
- Args:
68
- current_map: The current map of objects by ID.
69
- new_objects: List of new objects to add to the map.
70
-
71
- Returns:
72
- Updated map with new objects added, ensuring unique IDs.
73
- """
74
- for obj in new_objects:
75
- if obj is None:
76
- # If the object is None, we skip it.
77
- continue
78
- if isinstance(obj, str):
79
- # If the object is a string, we assume it is an ID and skip it.
80
- # This is a special case where we do not want to add the string itself.
81
- continue
82
- # Note: There is no current abstraction for the `id` field, so we assume it exists.
83
- obj_id = obj.id # type: ignore[attr-defined]
84
- # If the object already exists in the map, we check if it is the same object.
85
- # If it is not the same object, we raise an error.
86
- # This ensures that we do not have duplicate components with the same ID.
87
- if obj_id in current_map and id(current_map[obj_id]) != id(obj):
88
- raise DuplicateComponentError(obj_id, obj, current_map[obj_id])
89
- else:
90
- current_map[obj_id] = obj
91
-
92
-
93
- def _update_maps_with_embedded_objects(
94
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
95
- embedded_objects: list[qtype.dsl.domain_types.StrictBaseModel],
96
- ) -> None:
97
- """
98
- Update lookup maps with embedded objects.
99
- Embedded objects are when the user specifies the object and not just the ID.
100
- For example, a prompt template may have variables embedded:
101
- ```yaml
102
- steps:
103
- - id: my_prompt
104
- variables:
105
- - id: my_var
106
- type: text
107
- outputs:
108
- - id: my_output
109
- type: text
110
- ```
111
-
112
- Args:
113
- lookup_maps: The current lookup maps to update.
114
- embedded_objects: List of embedded objects to add to the maps.
115
- """
116
- for obj in embedded_objects:
117
- if isinstance(obj, dsl.Step):
118
- # All steps have inputs and outputs
119
- _update_map_with_unique_check(lookup_map, obj.inputs or []) # type: ignore
120
- _update_map_with_unique_check(lookup_map, obj.outputs or []) # type: ignore
121
- _update_map_with_unique_check(lookup_map, [obj])
122
-
123
- if isinstance(obj, dsl.Model):
124
- # note inputs
125
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
126
-
127
- if isinstance(obj, dsl.Condition):
128
- # Conditions have inputs and outputs
129
- _update_map_with_unique_check(lookup_map, [obj.then, obj.else_]) # type: ignore
130
- _update_map_with_unique_check(lookup_map, [obj.equals]) # type: ignore
131
- if obj.then and isinstance(obj.then, dsl.Step):
132
- _update_maps_with_embedded_objects(lookup_map, [obj.then])
133
- if obj.else_ and isinstance(obj.else_, dsl.Step):
134
- _update_maps_with_embedded_objects(lookup_map, [obj.else_])
135
-
136
- if isinstance(obj, dsl.APITool):
137
- # API tools have inputs and outputs
138
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
139
-
140
- if isinstance(obj, dsl.LLMInference):
141
- # LLM Inference steps have inputs and outputs
142
- _update_map_with_unique_check(lookup_map, [obj.model]) # type: ignore
143
- _update_maps_with_embedded_objects(lookup_map, [obj.model]) # type: ignore
144
- _update_map_with_unique_check(lookup_map, [obj.memory]) # type: ignore
145
-
146
- if isinstance(obj, dsl.Agent):
147
- _update_map_with_unique_check(lookup_map, obj.tools or []) # type: ignore
148
- _update_maps_with_embedded_objects(lookup_map, obj.tools or []) # type: ignore
149
-
150
- if isinstance(obj, dsl.Flow):
151
- _update_map_with_unique_check(lookup_map, [obj])
152
- _update_map_with_unique_check(lookup_map, obj.steps or []) # type: ignore
153
- _update_maps_with_embedded_objects(lookup_map, obj.steps or []) # type: ignore
154
-
155
- if isinstance(obj, dsl.TelemetrySink):
156
- # Telemetry sinks may have auth references
157
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
158
-
159
- if isinstance(obj, dsl.Index):
160
- # Indexes may have auth references
161
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
162
-
163
- if isinstance(obj, dsl.VectorIndex):
164
- if isinstance(obj.embedding_model, dsl.EmbeddingModel):
165
- _update_map_with_unique_check(
166
- lookup_map, [obj.embedding_model]
167
- )
168
- _update_maps_with_embedded_objects(
169
- lookup_map, [obj.embedding_model]
170
- )
171
-
172
- if isinstance(obj, dsl.Search):
173
- if isinstance(obj.index, dsl.Index):
174
- _update_map_with_unique_check(lookup_map, [obj.index])
175
- _update_maps_with_embedded_objects(lookup_map, [obj.index])
176
-
177
- if isinstance(obj, dsl.AuthorizationProviderList):
178
- # AuthorizationProviderList is a list of AuthorizationProvider objects
179
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
180
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
181
-
182
- if isinstance(obj, dsl.IndexList):
183
- # IndexList is a list of Index objects
184
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
185
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
186
-
187
- if isinstance(obj, dsl.ModelList):
188
- # ModelList is a list of Model objects
189
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
190
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
191
-
192
- if isinstance(obj, dsl.ToolList):
193
- # ToolList is a list of Tool objects
194
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
195
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
196
-
197
- if isinstance(obj, dsl.TypeList):
198
- # TypeList is a list of Type objects
199
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
200
-
201
- if isinstance(obj, dsl.VariableList):
202
- # VariableList is a list of Variable objects
203
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
204
-
205
- if isinstance(obj, dsl.TelemetrySink):
206
- # TelemetrySink is a list of TelemetrySink objects
207
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
208
-
209
-
210
- def _build_lookup_maps(
211
- dsl_application: dsl.Application,
212
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel]
213
- | None = None,
214
- ) -> Dict[str, qtype.dsl.domain_types.StrictBaseModel]:
215
- """
216
- Build lookup map for all objects in the DSL Application.
217
- This function creates a dictionary of id -> component, where each key is a
218
- component id and the value is the component.
219
- Args:
220
- dsl_application: The DSL Application to build lookup maps for.
221
- Returns:
222
- Dict[str, dsl.StrictBaseModel]: A dictionary of lookup maps
223
- Throws:
224
- SemanticResolutionError: If there are duplicate components with the same ID.
225
- """
226
- component_names = {
227
- f
228
- for f in dsl.Application.model_fields.keys()
229
- if f not in set(["id", "references"])
230
- }
231
-
232
- if lookup_map is None:
233
- lookup_map = {}
234
-
235
- for component_name in component_names:
236
- if not hasattr(dsl_application, component_name):
237
- raise ComponentNotFoundError(component_name)
238
- components = getattr(dsl_application, component_name) or []
239
- if not isinstance(components, list):
240
- components = [components] # Ensure we have a list
241
- _update_map_with_unique_check(lookup_map, components)
242
- _update_maps_with_embedded_objects(lookup_map, components)
243
-
244
- # now deal with the references.
245
- for ref in dsl_application.references or []:
246
- ref = ref.root # type: ignore
247
- if isinstance(ref, dsl.Application):
248
- _build_lookup_maps(ref, lookup_map)
249
-
250
- # Anything in the reference list that is not an Application is handled by the embedded object resolver.
251
- _update_maps_with_embedded_objects(
252
- lookup_map,
253
- [
254
- ref.root # type: ignore
255
- for ref in dsl_application.references or []
256
- if not isinstance(ref.root, dsl.Application)
257
- ], # type: ignore
258
- )
259
-
260
- lookup_map[dsl_application.id] = dsl_application
261
-
262
- return lookup_map
263
-
264
-
265
- def _is_dsl_type(type_obj: Any) -> bool:
266
- """Check if a type is a DSL type that should be converted to semantic."""
267
- if not hasattr(type_obj, "__name__"):
268
- return False
269
-
270
- # Check if it's defined in the DSL module
271
- return (
272
- hasattr(type_obj, "__module__")
273
- and (
274
- type_obj.__module__ == dsl.__name__
275
- or type_obj.__module__ == base_types.__name__
276
- )
277
- and not type_obj.__name__.startswith("_")
278
- )
279
-
280
-
281
- def _resolve_forward_ref(field_type: Any) -> Any:
282
- """
283
- Resolve a ForwardRef type to its actual type.
284
- This is used to handle cases where the type is a string that refers to a class.
285
- """
286
- if hasattr(field_type, "__forward_arg__"):
287
- # Extract the string from ForwardRef and process it
288
- forward_ref_str = field_type.__forward_arg__
289
- # Use eval to get the actual type from the string
290
- return eval(forward_ref_str, dict(vars(dsl)))
291
- return field_type
292
-
293
-
294
- def _is_union(type: Any) -> bool:
295
- """
296
- Indicates if the provided type is a Union type.
297
- """
298
- origin = get_origin(type)
299
- return origin is Union or (
300
- hasattr(type, "__class__") and type.__class__.__name__ == "UnionType"
301
- )
302
-
303
-
304
- def _is_reference_type(field_type: Any) -> bool:
305
- """
306
- Indicates if the provided type can be a reference -- i.e., a union between a dsl type and a string.
307
- """
308
- field_type = _resolve_forward_ref(field_type)
309
-
310
- if _is_union(field_type):
311
- args = get_args(field_type)
312
- has_str = any(arg is str for arg in args)
313
- has_dsl_type = any(_is_dsl_type(arg) for arg in args)
314
- return has_str and has_dsl_type
315
- else:
316
- return False
317
-
318
-
319
- def _resolve_id_references(
320
- dslobj: qtype.dsl.domain_types.StrictBaseModel | str,
321
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
322
- ) -> Any:
323
- """
324
- Resolves ID references in a DSL object such that all references are replaced with the actual object.
325
- """
326
-
327
- if isinstance(dslobj, str):
328
- # If the object is a string, we assume it is an ID and look it up in the map.
329
- if dslobj in lookup_map:
330
- return lookup_map[dslobj]
331
- else:
332
- raise ReferenceNotFoundError(dslobj)
333
-
334
- # iterate over all fields in the object
335
- def lookup_reference(val: str, typ: Any) -> Any:
336
- if (
337
- isinstance(val, str)
338
- and _is_reference_type(typ)
339
- and not _is_dsl_type(type(val))
340
- ):
341
- if val in lookup_map:
342
- return lookup_map[val]
343
- else:
344
- raise ReferenceNotFoundError(val, str(typ))
345
- return val
346
-
347
- for field_name, field_value in dslobj:
348
- field_info = dslobj.__class__.model_fields[field_name]
349
- field_type = _resolve_forward_ref(field_info.annotation)
350
-
351
- if isinstance(field_value, list):
352
- # If the field value is a list, resolve each item in the list
353
- # Get the type of the items of the list
354
- field_type = field_type.__args__[0] # type: ignore
355
- if (
356
- get_origin(field_type) is list
357
- ): # handles case where we have list[Class] | None -- in this case field_type is Union and item_type is now the list...
358
- field_type = field_type.__args__[0]
359
- resolved_list = [
360
- lookup_reference(item, field_type) # type: ignore
361
- for item in field_value
362
- ]
363
- setattr(dslobj, field_name, resolved_list)
364
- elif isinstance(field_value, dict):
365
- field_type = field_type.__args__[0]
366
- if (
367
- get_origin(field_type) is dict
368
- ): # handles case where we have dict[Class] | None -- in this case field_type is Union and item_type is now the dict...
369
- field_type = field_type.__args__[1]
370
- # If the field value is a dict, resolve each value in the dict
371
- resolved_dict = {
372
- k: lookup_reference(v, field_type) # type: ignore
373
- for k, v in field_value.items()
374
- }
375
- setattr(dslobj, field_name, resolved_dict)
376
- elif field_value is None:
377
- # Convert lst | None to an empty list
378
- # and dict | None to an empty dict
379
- if _is_union(field_type):
380
- args = field_type.__args__ # type: ignore
381
- if any(str(arg).startswith("list") for arg in args):
382
- setattr(dslobj, field_name, [])
383
- elif any(str(arg).startswith("dict") for arg in args):
384
- setattr(dslobj, field_name, {})
385
- else:
386
- setattr(
387
- dslobj, field_name, lookup_reference(field_value, field_type)
388
- )
389
-
390
- return dslobj
391
-
392
-
393
- def validate(
394
- dsl_application: dsl.Application,
395
- ) -> dsl.Application:
396
- """
397
- Validates the semantics of a DSL Application and returns a copy of it with all
398
- internal references resolved to their actual objects.
399
- Args:
400
- dsl_application: The DSL Application to validate.
401
- Returns:
402
- dsl.Application: A copy of the DSL Application with all internal references resolved.
403
- Throws:
404
- SemanticResolutionError: If there are semantic errors in the DSL Application.
405
- """
406
-
407
- # First, make a lookup map of all objects in the DSL Application.
408
- # This ensures that all object ids are unique.
409
- lookup_map = _build_lookup_maps(dsl_application)
410
-
411
- # If any flows have no steps, we raise an error.
412
- for flow in dsl_application.flows or []:
413
- if not flow.steps:
414
- raise FlowHasNoStepsError(flow.id)
415
- # If any flow doesn't have inputs, copy the inputs from the first step.
416
- if not flow.inputs:
417
- first_step = (
418
- lookup_map[flow.steps[0]]
419
- if isinstance(flow.steps[0], str)
420
- else flow.steps[0]
421
- )
422
- flow.inputs = first_step.inputs or [] # type: ignore
423
-
424
- # If any flow doesn't have outputs, copy them from the last step.
425
- if not flow.outputs:
426
- last_step = (
427
- lookup_map[flow.steps[-1]]
428
- if isinstance(flow.steps[-1], str)
429
- else flow.steps[-1]
430
- )
431
- flow.outputs = last_step.outputs or [] # type: ignore
432
-
433
- # Now we resolve all ID references in the DSL Application.
434
- lookup_map = {
435
- obj_id: _resolve_id_references(obj, lookup_map)
436
- for obj_id, obj in lookup_map.items()
437
- }
438
-
439
- # If any chat flow doesn't have an input variable that is a chat message, raise an error.
440
- for flow in dsl_application.flows or []:
441
- if flow.mode == "Chat":
442
- inputs = flow.inputs or []
443
- if not any(
444
- input_var.type == qtype.dsl.domain_types.ChatMessage
445
- for input_var in inputs
446
- if isinstance(input_var, dsl.Variable)
447
- ):
448
- raise QTypeValidationError(
449
- f"Chat flow {flow.id} must have at least one input variable of type ChatMessage."
450
- )
451
- if (
452
- not flow.outputs
453
- or len(flow.outputs) != 1
454
- or (
455
- isinstance(flow.outputs[0], dsl.Variable)
456
- and flow.outputs[0].type
457
- != qtype.dsl.domain_types.ChatMessage
458
- )
459
- ):
460
- raise QTypeValidationError(
461
- f"Chat flow {flow.id} must have exactly one output variable of type ChatMessage."
462
- )
463
-
464
- return dsl_application
File without changes
@@ -1,95 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- from typing import Any, Tuple
5
-
6
- import pandas as pd
7
-
8
- from qtype.interpreter.batch.step import batch_execute_step
9
- from qtype.interpreter.batch.types import BatchConfig
10
- from qtype.interpreter.batch.utils import reconcile_results_and_errors
11
- from qtype.semantic.model import Flow, Sink
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- def batch_execute_flow(
17
- flow: Flow,
18
- inputs: pd.DataFrame,
19
- batch_config: BatchConfig,
20
- **kwargs: dict[Any, Any],
21
- ) -> Tuple[pd.DataFrame, pd.DataFrame]:
22
- """Executes a flow in a batch context.
23
-
24
- Args:
25
- flow: The flow to execute.
26
- batch_config: The batch configuration to use.
27
- **kwargs: Additional keyword arguments to pass to the flow.
28
-
29
- Returns:
30
- A list of output variables produced by the flow.
31
- """
32
-
33
- previous_outputs = inputs
34
-
35
- all_errors = []
36
-
37
- # Iterate over each step in the flow
38
- for step in flow.steps:
39
- results: list[pd.DataFrame] = []
40
- errors: list[pd.DataFrame] = []
41
-
42
- if isinstance(step, Sink):
43
- # Send the entire batch to the sink
44
- batch_results, batch_errors = batch_execute_step(
45
- step, previous_outputs, batch_config
46
- )
47
- results.append(batch_results)
48
- if len(batch_errors) > 1:
49
- errors.append(batch_errors)
50
- else:
51
- # batch the current data into dataframes of max size batch_size
52
- batch_size = batch_config.batch_size
53
- for start in range(0, len(previous_outputs), batch_size):
54
- end = start + batch_size
55
- batch = previous_outputs.iloc[start:end]
56
- # Execute the step with the current batch
57
- batch_results, batch_errors = batch_execute_step(
58
- step, batch, batch_config
59
- )
60
-
61
- results.append(batch_results)
62
- if len(batch_errors) > 1:
63
- errors.append(batch_errors)
64
-
65
- previous_outputs, errors_df = reconcile_results_and_errors(
66
- results, errors
67
- )
68
-
69
- if len(errors_df):
70
- all_errors.append(errors_df)
71
- if batch_config.write_errors_to:
72
- output_file = (
73
- f"{batch_config.write_errors_to}/{step.id}.errors.parquet"
74
- )
75
- try:
76
- errors_df.to_parquet(
77
- output_file, engine="pyarrow", compression="snappy"
78
- )
79
- logging.info(
80
- f"Saved errors for step {step.id} to {output_file}"
81
- )
82
- except Exception as e:
83
- logging.warning(
84
- f"Could not save errors step {step.id} to {output_file}",
85
- exc_info=e,
86
- stack_info=True,
87
- )
88
-
89
- # Return the last steps results and errors
90
- rv_errors = (
91
- pd.concat(all_errors, ignore_index=True)
92
- if len(all_errors)
93
- else pd.DataFrame({})
94
- )
95
- return previous_outputs, rv_errors
@@ -1,95 +0,0 @@
1
- from typing import Any, Tuple
2
-
3
- import boto3 # type: ignore[import-untyped]
4
- import pandas as pd
5
- import sqlalchemy
6
- from sqlalchemy import create_engine
7
- from sqlalchemy.exc import SQLAlchemyError
8
-
9
- from qtype.base.exceptions import InterpreterError
10
- from qtype.interpreter.auth.generic import auth
11
- from qtype.interpreter.batch.types import BatchConfig, ErrorMode
12
- from qtype.interpreter.batch.utils import (
13
- reconcile_results_and_errors,
14
- validate_inputs,
15
- )
16
- from qtype.semantic.model import SQLSource
17
-
18
-
19
- def to_output_columns(
20
- df: pd.DataFrame, output_columns: set[str]
21
- ) -> pd.DataFrame:
22
- """Filters the DataFrame to only include specified output columns.
23
-
24
- Args:
25
- df: The input DataFrame.
26
- output_columns: A set of column names to retain in the DataFrame.
27
-
28
- Returns:
29
- A DataFrame containing only the specified output columns.
30
- """
31
- if len(df) == 0:
32
- return df
33
- missing = output_columns - set(df.columns)
34
- if missing:
35
- raise InterpreterError(
36
- f"SQL Result was missing expected columns: {','.join(missing)}, it has columns: {','.join(df.columns)}"
37
- )
38
-
39
- return df[[col for col in df.columns if col in output_columns]]
40
-
41
-
42
- def execute_sql_source(
43
- step: SQLSource,
44
- inputs: pd.DataFrame,
45
- batch_config: BatchConfig,
46
- **kwargs: dict[Any, Any],
47
- ) -> Tuple[pd.DataFrame, pd.DataFrame]:
48
- """Executes a SQLSource step to retrieve data from a SQL database.
49
-
50
- Args:
51
- step: The SQLSource step to execute.
52
-
53
- Returns:
54
- A tuple containing two DataFrames:
55
- - The first DataFrame contains the successfully retrieved data.
56
- - The second DataFrame contains rows that encountered errors with an 'error' column.
57
- """
58
- # Create a database engine
59
- validate_inputs(inputs, step)
60
-
61
- connect_args = {}
62
- if step.auth:
63
- with auth(step.auth) as creds:
64
- if isinstance(creds, boto3.Session):
65
- connect_args["session"] = creds
66
- engine = create_engine(step.connection, connect_args=connect_args)
67
-
68
- output_columns = {output.id for output in step.outputs}
69
-
70
- results = []
71
- errors = []
72
- step_inputs = {i.id for i in step.inputs}
73
- for _, row in inputs.iterrows():
74
- try:
75
- # Make a dictionary of column_name: value from row
76
- params = {col: row[col] for col in row.index if col in step_inputs}
77
- # Execute the query and fetch the results into a DataFrame
78
- with engine.connect() as connection:
79
- result = connection.execute(
80
- sqlalchemy.text(step.query),
81
- parameters=params if len(params) else None,
82
- )
83
- df = pd.DataFrame(
84
- result.fetchall(), columns=list(result.keys())
85
- )
86
- df = to_output_columns(df, output_columns)
87
- results.append(df)
88
- except SQLAlchemyError as e:
89
- if batch_config.error_mode == ErrorMode.FAIL:
90
- raise e
91
- # If there's an error, return an empty DataFrame and the error message
92
- error_df = pd.DataFrame([{"error": str(e)}])
93
- errors.append(error_df)
94
-
95
- return reconcile_results_and_errors(results, errors)