qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +94 -73
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +4 -0
  9. qtype/commands/convert.py +20 -8
  10. qtype/commands/generate.py +19 -27
  11. qtype/commands/run.py +73 -36
  12. qtype/commands/serve.py +74 -54
  13. qtype/commands/validate.py +34 -8
  14. qtype/commands/visualize.py +46 -22
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +65 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +612 -363
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +57 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +74 -0
  30. qtype/interpreter/base/factory.py +117 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +462 -22
  36. qtype/interpreter/converters.py +77 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/decoder_executor.py +163 -0
  41. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  42. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  43. qtype/interpreter/executors/document_search_executor.py +122 -0
  44. qtype/interpreter/executors/document_source_executor.py +118 -0
  45. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  46. qtype/interpreter/executors/echo_executor.py +63 -0
  47. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  48. qtype/interpreter/executors/file_source_executor.py +101 -0
  49. qtype/interpreter/executors/file_writer_executor.py +110 -0
  50. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  51. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  52. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  53. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  54. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  55. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  56. qtype/interpreter/executors/sql_source_executor.py +106 -0
  57. qtype/interpreter/executors/vector_search_executor.py +91 -0
  58. qtype/interpreter/flow.py +159 -22
  59. qtype/interpreter/metadata_api.py +115 -0
  60. qtype/interpreter/resource_cache.py +5 -4
  61. qtype/interpreter/rich_progress.py +225 -0
  62. qtype/interpreter/stream/chat/__init__.py +15 -0
  63. qtype/interpreter/stream/chat/converter.py +391 -0
  64. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  65. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  66. qtype/interpreter/stream/chat/vercel.py +609 -0
  67. qtype/interpreter/stream/utils/__init__.py +15 -0
  68. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  69. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  70. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  71. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  72. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  73. qtype/interpreter/telemetry.py +135 -8
  74. qtype/interpreter/tools/__init__.py +5 -0
  75. qtype/interpreter/tools/function_tool_helper.py +265 -0
  76. qtype/interpreter/types.py +330 -0
  77. qtype/interpreter/typing.py +83 -89
  78. qtype/interpreter/ui/404/index.html +1 -1
  79. qtype/interpreter/ui/404.html +1 -1
  80. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  81. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  82. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  83. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  84. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  85. qtype/interpreter/ui/icon.png +0 -0
  86. qtype/interpreter/ui/index.html +1 -1
  87. qtype/interpreter/ui/index.txt +4 -4
  88. qtype/semantic/checker.py +583 -0
  89. qtype/semantic/generate.py +262 -83
  90. qtype/semantic/loader.py +95 -0
  91. qtype/semantic/model.py +436 -159
  92. qtype/semantic/resolver.py +63 -19
  93. qtype/semantic/visualize.py +28 -31
  94. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
  95. qtype-0.1.1.dist-info/RECORD +135 -0
  96. qtype/dsl/base_types.py +0 -38
  97. qtype/dsl/validator.py +0 -465
  98. qtype/interpreter/batch/__init__.py +0 -0
  99. qtype/interpreter/batch/file_sink_source.py +0 -162
  100. qtype/interpreter/batch/flow.py +0 -95
  101. qtype/interpreter/batch/sql_source.py +0 -92
  102. qtype/interpreter/batch/step.py +0 -74
  103. qtype/interpreter/batch/types.py +0 -41
  104. qtype/interpreter/batch/utils.py +0 -178
  105. qtype/interpreter/chat/chat_api.py +0 -237
  106. qtype/interpreter/chat/vercel.py +0 -314
  107. qtype/interpreter/exceptions.py +0 -10
  108. qtype/interpreter/step.py +0 -67
  109. qtype/interpreter/steps/__init__.py +0 -0
  110. qtype/interpreter/steps/agent.py +0 -114
  111. qtype/interpreter/steps/condition.py +0 -36
  112. qtype/interpreter/steps/decoder.py +0 -88
  113. qtype/interpreter/steps/llm_inference.py +0 -171
  114. qtype/interpreter/steps/prompt_template.py +0 -54
  115. qtype/interpreter/steps/search.py +0 -24
  116. qtype/interpreter/steps/tool.py +0 -219
  117. qtype/interpreter/streaming_helpers.py +0 -123
  118. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  119. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  120. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  121. qtype/interpreter/ui/favicon.ico +0 -0
  122. qtype/loader.py +0 -390
  123. qtype-0.0.16.dist-info/RECORD +0 -106
  124. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  125. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
  126. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
  127. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
  128. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
qtype/dsl/validator.py DELETED
@@ -1,465 +0,0 @@
1
- from typing import Any, Dict, Union, get_args, get_origin
2
-
3
- import qtype.dsl.base_types as base_types
4
- import qtype.dsl.domain_types
5
- import qtype.dsl.model as dsl
6
-
7
-
8
- class QTypeValidationError(Exception):
9
- """Raised when there's an error during QType validation."""
10
-
11
- pass
12
-
13
-
14
- class DuplicateComponentError(QTypeValidationError):
15
- """Raised when there are duplicate components with the same ID."""
16
-
17
- def __init__(
18
- self,
19
- obj_id: str,
20
- found_obj: qtype.dsl.domain_types.StrictBaseModel,
21
- existing_obj: qtype.dsl.domain_types.StrictBaseModel,
22
- ):
23
- super().__init__(
24
- f'Duplicate component with ID "{obj_id}" found.'
25
- # f"Duplicate component with ID \"{obj_id}\" found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
26
- )
27
-
28
-
29
- class ComponentNotFoundError(QTypeValidationError):
30
- """Raised when a component is not found in the DSL Application."""
31
-
32
- def __init__(self, component_id: str):
33
- super().__init__(
34
- f"Component with ID '{component_id}' not found in the DSL Application."
35
- )
36
-
37
-
38
- class ReferenceNotFoundError(QTypeValidationError):
39
- """Raised when a reference is not found in the lookup map."""
40
-
41
- def __init__(self, reference: str, type_hint: str | None = None):
42
- msg = (
43
- f"Reference '{reference}' not found in lookup map."
44
- if type_hint is None
45
- else f"Reference '{reference}' not found in lookup map for type '{type_hint}'."
46
- )
47
- super().__init__(msg)
48
-
49
-
50
- class FlowHasNoStepsError(QTypeValidationError):
51
- """Raised when a flow has no steps defined."""
52
-
53
- def __init__(self, flow_id: str):
54
- super().__init__(f"Flow {flow_id} has no steps defined.")
55
-
56
-
57
- # These types are used only for the DSL and should not be converted to semantic types
58
- # They are used for JSON schema generation
59
- # They will be switched to their semantic abstract class in the generation.
60
- # i.e., `ToolType` will be switched to `Tool`
61
- def _update_map_with_unique_check(
62
- current_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
63
- new_objects: list[qtype.dsl.domain_types.StrictBaseModel],
64
- ) -> None:
65
- """
66
- Update a map with new objects, ensuring unique IDs.
67
-
68
- Args:
69
- current_map: The current map of objects by ID.
70
- new_objects: List of new objects to add to the map.
71
-
72
- Returns:
73
- Updated map with new objects added, ensuring unique IDs.
74
- """
75
- for obj in new_objects:
76
- if obj is None:
77
- # If the object is None, we skip it.
78
- continue
79
- if isinstance(obj, str):
80
- # If the object is a string, we assume it is an ID and skip it.
81
- # This is a special case where we do not want to add the string itself.
82
- continue
83
- # Note: There is no current abstraction for the `id` field, so we assume it exists.
84
- obj_id = obj.id # type: ignore[attr-defined]
85
- # If the object already exists in the map, we check if it is the same object.
86
- # If it is not the same object, we raise an error.
87
- # This ensures that we do not have duplicate components with the same ID.
88
- if obj_id in current_map and id(current_map[obj_id]) != id(obj):
89
- raise DuplicateComponentError(obj_id, obj, current_map[obj_id])
90
- else:
91
- current_map[obj_id] = obj
92
-
93
-
94
- def _update_maps_with_embedded_objects(
95
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
96
- embedded_objects: list[qtype.dsl.domain_types.StrictBaseModel],
97
- ) -> None:
98
- """
99
- Update lookup maps with embedded objects.
100
- Embedded objects are when the user specifies the object and not just the ID.
101
- For example, a prompt template may have variables embedded:
102
- ```yaml
103
- steps:
104
- - id: my_prompt
105
- variables:
106
- - id: my_var
107
- type: text
108
- outputs:
109
- - id: my_output
110
- type: text
111
- ```
112
-
113
- Args:
114
- lookup_maps: The current lookup maps to update.
115
- embedded_objects: List of embedded objects to add to the maps.
116
- """
117
- for obj in embedded_objects:
118
- if isinstance(obj, dsl.Step):
119
- # All steps have inputs and outputs
120
- _update_map_with_unique_check(lookup_map, obj.inputs or []) # type: ignore
121
- _update_map_with_unique_check(lookup_map, obj.outputs or []) # type: ignore
122
- _update_map_with_unique_check(lookup_map, [obj])
123
-
124
- if isinstance(obj, dsl.Model):
125
- # note inputs
126
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
127
-
128
- if isinstance(obj, dsl.Condition):
129
- # Conditions have inputs and outputs
130
- _update_map_with_unique_check(lookup_map, [obj.then, obj.else_]) # type: ignore
131
- _update_map_with_unique_check(lookup_map, [obj.equals]) # type: ignore
132
- if obj.then and isinstance(obj.then, dsl.Step):
133
- _update_maps_with_embedded_objects(lookup_map, [obj.then])
134
- if obj.else_ and isinstance(obj.else_, dsl.Step):
135
- _update_maps_with_embedded_objects(lookup_map, [obj.else_])
136
-
137
- if isinstance(obj, dsl.APITool):
138
- # API tools have inputs and outputs
139
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
140
-
141
- if isinstance(obj, dsl.LLMInference):
142
- # LLM Inference steps have inputs and outputs
143
- _update_map_with_unique_check(lookup_map, [obj.model]) # type: ignore
144
- _update_maps_with_embedded_objects(lookup_map, [obj.model]) # type: ignore
145
- _update_map_with_unique_check(lookup_map, [obj.memory]) # type: ignore
146
-
147
- if isinstance(obj, dsl.Agent):
148
- _update_map_with_unique_check(lookup_map, obj.tools or []) # type: ignore
149
- _update_maps_with_embedded_objects(lookup_map, obj.tools or []) # type: ignore
150
-
151
- if isinstance(obj, dsl.Flow):
152
- _update_map_with_unique_check(lookup_map, [obj])
153
- _update_map_with_unique_check(lookup_map, obj.steps or []) # type: ignore
154
- _update_maps_with_embedded_objects(lookup_map, obj.steps or []) # type: ignore
155
-
156
- if isinstance(obj, dsl.TelemetrySink):
157
- # Telemetry sinks may have auth references
158
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
159
-
160
- if isinstance(obj, dsl.Index):
161
- # Indexes may have auth references
162
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
163
-
164
- if isinstance(obj, dsl.VectorIndex):
165
- if isinstance(obj.embedding_model, dsl.EmbeddingModel):
166
- _update_map_with_unique_check(
167
- lookup_map, [obj.embedding_model]
168
- )
169
- _update_maps_with_embedded_objects(
170
- lookup_map, [obj.embedding_model]
171
- )
172
-
173
- if isinstance(obj, dsl.Search):
174
- if isinstance(obj.index, dsl.Index):
175
- _update_map_with_unique_check(lookup_map, [obj.index])
176
- _update_maps_with_embedded_objects(lookup_map, [obj.index])
177
-
178
- if isinstance(obj, dsl.AuthorizationProviderList):
179
- # AuthorizationProviderList is a list of AuthorizationProvider objects
180
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
181
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
182
-
183
- if isinstance(obj, dsl.IndexList):
184
- # IndexList is a list of Index objects
185
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
186
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
187
-
188
- if isinstance(obj, dsl.ModelList):
189
- # ModelList is a list of Model objects
190
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
191
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
192
-
193
- if isinstance(obj, dsl.ToolList):
194
- # ToolList is a list of Tool objects
195
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
196
- _update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
197
-
198
- if isinstance(obj, dsl.TypeList):
199
- # TypeList is a list of Type objects
200
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
201
-
202
- if isinstance(obj, dsl.VariableList):
203
- # VariableList is a list of Variable objects
204
- _update_map_with_unique_check(lookup_map, obj.root) # type: ignore
205
-
206
- if isinstance(obj, dsl.TelemetrySink):
207
- # TelemetrySink is a list of TelemetrySink objects
208
- _update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
209
-
210
-
211
- def _build_lookup_maps(
212
- dsl_application: dsl.Application,
213
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel]
214
- | None = None,
215
- ) -> Dict[str, qtype.dsl.domain_types.StrictBaseModel]:
216
- """
217
- Build lookup map for all objects in the DSL Application.
218
- This function creates a dictionary of id -> component, where each key is a
219
- component id and the value is the component.
220
- Args:
221
- dsl_application: The DSL Application to build lookup maps for.
222
- Returns:
223
- Dict[str, dsl.StrictBaseModel]: A dictionary of lookup maps
224
- Throws:
225
- SemanticResolutionError: If there are duplicate components with the same ID.
226
- """
227
- component_names = {
228
- f
229
- for f in dsl.Application.model_fields.keys()
230
- if f not in set(["id", "references"])
231
- }
232
-
233
- if lookup_map is None:
234
- lookup_map = {}
235
-
236
- for component_name in component_names:
237
- if not hasattr(dsl_application, component_name):
238
- raise ComponentNotFoundError(component_name)
239
- components = getattr(dsl_application, component_name) or []
240
- if not isinstance(components, list):
241
- components = [components] # Ensure we have a list
242
- _update_map_with_unique_check(lookup_map, components)
243
- _update_maps_with_embedded_objects(lookup_map, components)
244
-
245
- # now deal with the references.
246
- for ref in dsl_application.references or []:
247
- ref = ref.root # type: ignore
248
- if isinstance(ref, dsl.Application):
249
- _build_lookup_maps(ref, lookup_map)
250
-
251
- # Anything in the reference list that is not an Application is handled by the embedded object resolver.
252
- _update_maps_with_embedded_objects(
253
- lookup_map,
254
- [
255
- ref.root # type: ignore
256
- for ref in dsl_application.references or []
257
- if not isinstance(ref.root, dsl.Application)
258
- ], # type: ignore
259
- )
260
-
261
- lookup_map[dsl_application.id] = dsl_application
262
-
263
- return lookup_map
264
-
265
-
266
- def _is_dsl_type(type_obj: Any) -> bool:
267
- """Check if a type is a DSL type that should be converted to semantic."""
268
- if not hasattr(type_obj, "__name__"):
269
- return False
270
-
271
- # Check if it's defined in the DSL module
272
- return (
273
- hasattr(type_obj, "__module__")
274
- and (
275
- type_obj.__module__ == dsl.__name__
276
- or type_obj.__module__ == base_types.__name__
277
- )
278
- and not type_obj.__name__.startswith("_")
279
- )
280
-
281
-
282
- def _resolve_forward_ref(field_type: Any) -> Any:
283
- """
284
- Resolve a ForwardRef type to its actual type.
285
- This is used to handle cases where the type is a string that refers to a class.
286
- """
287
- if hasattr(field_type, "__forward_arg__"):
288
- # Extract the string from ForwardRef and process it
289
- forward_ref_str = field_type.__forward_arg__
290
- # Use eval to get the actual type from the string
291
- return eval(forward_ref_str, dict(vars(dsl)))
292
- return field_type
293
-
294
-
295
- def _is_union(type: Any) -> bool:
296
- """
297
- Indicates if the provided type is a Union type.
298
- """
299
- origin = get_origin(type)
300
- return origin is Union or (
301
- hasattr(type, "__class__") and type.__class__.__name__ == "UnionType"
302
- )
303
-
304
-
305
- def _is_reference_type(field_type: Any) -> bool:
306
- """
307
- Indicates if the provided type can be a reference -- i.e., a union between a dsl type and a string.
308
- """
309
- field_type = _resolve_forward_ref(field_type)
310
-
311
- if _is_union(field_type):
312
- args = get_args(field_type)
313
- has_str = any(arg is str for arg in args)
314
- has_dsl_type = any(_is_dsl_type(arg) for arg in args)
315
- return has_str and has_dsl_type
316
- else:
317
- return False
318
-
319
-
320
- def _resolve_id_references(
321
- dslobj: qtype.dsl.domain_types.StrictBaseModel | str,
322
- lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
323
- ) -> Any:
324
- """
325
- Resolves ID references in a DSL object such that all references are replaced with the actual object.
326
- """
327
-
328
- if isinstance(dslobj, str):
329
- # If the object is a string, we assume it is an ID and look it up in the map.
330
- if dslobj in lookup_map:
331
- return lookup_map[dslobj]
332
- else:
333
- raise ReferenceNotFoundError(dslobj)
334
-
335
- # iterate over all fields in the object
336
- def lookup_reference(val: str, typ: Any) -> Any:
337
- if (
338
- isinstance(val, str)
339
- and _is_reference_type(typ)
340
- and not _is_dsl_type(type(val))
341
- ):
342
- if val in lookup_map:
343
- return lookup_map[val]
344
- else:
345
- raise ReferenceNotFoundError(val, str(typ))
346
- return val
347
-
348
- for field_name, field_value in dslobj:
349
- field_info = dslobj.__class__.model_fields[field_name]
350
- field_type = _resolve_forward_ref(field_info.annotation)
351
-
352
- if isinstance(field_value, list):
353
- # If the field value is a list, resolve each item in the list
354
- # Get the type of the items of the list
355
- field_type = field_type.__args__[0] # type: ignore
356
- if (
357
- get_origin(field_type) is list
358
- ): # handles case where we have list[Class] | None -- in this case field_type is Union and item_type is now the list...
359
- field_type = field_type.__args__[0]
360
- resolved_list = [
361
- lookup_reference(item, field_type) # type: ignore
362
- for item in field_value
363
- ]
364
- setattr(dslobj, field_name, resolved_list)
365
- elif isinstance(field_value, dict):
366
- field_type = field_type.__args__[0]
367
- if (
368
- get_origin(field_type) is dict
369
- ): # handles case where we have dict[Class] | None -- in this case field_type is Union and item_type is now the dict...
370
- field_type = field_type.__args__[1]
371
- # If the field value is a dict, resolve each value in the dict
372
- resolved_dict = {
373
- k: lookup_reference(v, field_type) # type: ignore
374
- for k, v in field_value.items()
375
- }
376
- setattr(dslobj, field_name, resolved_dict)
377
- elif field_value is None:
378
- # Convert lst | None to an empty list
379
- # and dict | None to an empty dict
380
- if _is_union(field_type):
381
- args = field_type.__args__ # type: ignore
382
- if any(str(arg).startswith("list") for arg in args):
383
- setattr(dslobj, field_name, [])
384
- elif any(str(arg).startswith("dict") for arg in args):
385
- setattr(dslobj, field_name, {})
386
- else:
387
- setattr(
388
- dslobj, field_name, lookup_reference(field_value, field_type)
389
- )
390
-
391
- return dslobj
392
-
393
-
394
- def validate(
395
- dsl_application: dsl.Application,
396
- ) -> dsl.Application:
397
- """
398
- Validates the semantics of a DSL Application and returns a copy of it with all
399
- internal references resolved to their actual objects.
400
- Args:
401
- dsl_application: The DSL Application to validate.
402
- Returns:
403
- dsl.Application: A copy of the DSL Application with all internal references resolved.
404
- Throws:
405
- SemanticResolutionError: If there are semantic errors in the DSL Application.
406
- """
407
-
408
- # First, make a lookup map of all objects in the DSL Application.
409
- # This ensures that all object ids are unique.
410
- lookup_map = _build_lookup_maps(dsl_application)
411
-
412
- # If any flows have no steps, we raise an error.
413
- for flow in dsl_application.flows or []:
414
- if not flow.steps:
415
- raise FlowHasNoStepsError(flow.id)
416
- # If any flow doesn't have inputs, copy the inputs from the first step.
417
- if not flow.inputs:
418
- first_step = (
419
- lookup_map[flow.steps[0]]
420
- if isinstance(flow.steps[0], str)
421
- else flow.steps[0]
422
- )
423
- flow.inputs = first_step.inputs or [] # type: ignore
424
-
425
- # If any flow doesn't have outputs, copy them from the last step.
426
- if not flow.outputs:
427
- last_step = (
428
- lookup_map[flow.steps[-1]]
429
- if isinstance(flow.steps[-1], str)
430
- else flow.steps[-1]
431
- )
432
- flow.outputs = last_step.outputs or [] # type: ignore
433
-
434
- # Now we resolve all ID references in the DSL Application.
435
- lookup_map = {
436
- obj_id: _resolve_id_references(obj, lookup_map)
437
- for obj_id, obj in lookup_map.items()
438
- }
439
-
440
- # If any chat flow doesn't have an input variable that is a chat message, raise an error.
441
- for flow in dsl_application.flows or []:
442
- if flow.mode == "Chat":
443
- inputs = flow.inputs or []
444
- if not any(
445
- input_var.type == qtype.dsl.domain_types.ChatMessage
446
- for input_var in inputs
447
- if isinstance(input_var, dsl.Variable)
448
- ):
449
- raise QTypeValidationError(
450
- f"Chat flow {flow.id} must have at least one input variable of type ChatMessage."
451
- )
452
- if (
453
- not flow.outputs
454
- or len(flow.outputs) != 1
455
- or (
456
- isinstance(flow.outputs[0], dsl.Variable)
457
- and flow.outputs[0].type
458
- != qtype.dsl.domain_types.ChatMessage
459
- )
460
- ):
461
- raise QTypeValidationError(
462
- f"Chat flow {flow.id} must have exactly one output variable of type ChatMessage."
463
- )
464
-
465
- return dsl_application
File without changes
@@ -1,162 +0,0 @@
1
- from typing import Any, Tuple
2
-
3
- import fsspec # type: ignore[import-untyped]
4
- import pandas as pd
5
-
6
- from qtype.base.exceptions import InterpreterError
7
- from qtype.interpreter.batch.types import BatchConfig, ErrorMode
8
- from qtype.interpreter.batch.utils import reconcile_results_and_errors
9
- from qtype.semantic.model import FileSink, FileSource
10
-
11
-
12
- def execute_file_source(
13
- step: FileSource,
14
- inputs: pd.DataFrame,
15
- batch_config: BatchConfig,
16
- **kwargs: dict[Any, Any],
17
- ) -> Tuple[pd.DataFrame, pd.DataFrame]:
18
- """Executes a FileSource step to read data from a file using fsspec.
19
-
20
- Args:
21
- step: The FileSource step to execute.
22
- inputs: Input DataFrame (may contain path variable).
23
- batch_config: Configuration for batch processing.
24
- **kwargs: Additional keyword arguments.
25
-
26
- Returns:
27
- A tuple containing two DataFrames:
28
- - The first DataFrame contains the successfully read data.
29
- - The second DataFrame contains rows that encountered errors with an 'error' column.
30
- """
31
- output_columns = {output.id for output in step.outputs}
32
-
33
- results = []
34
- errors = []
35
-
36
- # FileSource has cardinality 'many', so it reads once and produces multiple output rows
37
- # We process each input row (which might have different paths) separately
38
- for _, row in inputs.iterrows():
39
- try:
40
- file_path = step.path if step.path else row.get("path")
41
- if not file_path:
42
- raise InterpreterError(
43
- f"No path specified for {type(step).__name__}. "
44
- "Either set the 'path' field or provide a 'path' input variable."
45
- )
46
-
47
- # Use fsspec to open the file and read with pandas
48
- with fsspec.open(file_path, "rb") as file_handle:
49
- df = pd.read_parquet(file_handle) # type: ignore[arg-type]
50
-
51
- # Filter to only the expected output columns if they exist
52
- if output_columns and len(df) > 0:
53
- available_columns = set(df.columns)
54
- missing_columns = output_columns - available_columns
55
- if missing_columns:
56
- raise InterpreterError(
57
- f"File {file_path} missing expected columns: {', '.join(missing_columns)}. "
58
- f"Available columns: {', '.join(available_columns)}"
59
- )
60
- df = df[[col for col in df.columns if col in output_columns]]
61
-
62
- results.append(df)
63
-
64
- except Exception as e:
65
- if batch_config.error_mode == ErrorMode.FAIL:
66
- raise e
67
-
68
- # If there's an error, add it to the errors list
69
- error_df = pd.DataFrame([{"error": str(e)}])
70
- errors.append(error_df)
71
-
72
- return reconcile_results_and_errors(results, errors)
73
-
74
-
75
- def execute_file_sink(
76
- step: FileSink,
77
- inputs: pd.DataFrame,
78
- batch_config: BatchConfig,
79
- **kwargs: dict[Any, Any],
80
- ) -> Tuple[pd.DataFrame, pd.DataFrame]:
81
- """Executes a FileSink step to write data to a file using fsspec.
82
-
83
- Args:
84
- step: The FileSink step to execute.
85
- inputs: Input DataFrame containing data to write.
86
- batch_config: Configuration for batch processing.
87
- **kwargs: Additional keyword arguments.
88
-
89
- Returns:
90
- A tuple containing two DataFrames:
91
- - The first DataFrame contains success indicators.
92
- - The second DataFrame contains rows that encountered errors with an 'error' column.
93
- """
94
- # this is enforced by the dsl, but we'll check here to confirm
95
- if len(step.outputs) > 1:
96
- raise InterpreterError(
97
- f"There should only be one output variable for {type(step).__name__}."
98
- )
99
- output_column_name = step.outputs[0].id
100
-
101
- # make a list of all file paths
102
- try:
103
- if step.path:
104
- file_paths = [step.path] * len(inputs)
105
- else:
106
- if "path" not in inputs.columns:
107
- raise InterpreterError(
108
- f"No path specified for {type(step).__name__}. "
109
- "Either set the 'path' field or provide a 'path' input variable."
110
- )
111
- file_paths = inputs["path"].tolist()
112
- except Exception as e:
113
- if batch_config.error_mode == ErrorMode.FAIL:
114
- raise e
115
- # If we can't get the path, we can't proceed
116
- return pd.DataFrame(), pd.DataFrame([{"error": str(e)}])
117
-
118
- # Check if all paths are the same
119
- unique_paths = list(set(file_paths))
120
-
121
- if len(unique_paths) == 1:
122
- # All rows write to the same file - process as one batch
123
- file_path = unique_paths[0]
124
-
125
- try:
126
- # Use fsspec to write the parquet file
127
- input_columns = [i.id for i in step.inputs]
128
- with fsspec.open(file_path, "wb") as file_handle:
129
- inputs[input_columns].to_parquet(file_handle, index=False) # type: ignore[arg-type]
130
-
131
- inputs[output_column_name] = file_path
132
- return inputs, pd.DataFrame()
133
-
134
- except Exception as e:
135
- if batch_config.error_mode == ErrorMode.FAIL:
136
- raise e
137
-
138
- # If there's an error, return error for all rows
139
- error_df = pd.DataFrame([{"error": str(e)}])
140
- return pd.DataFrame(), error_df
141
-
142
- else:
143
- # Multiple unique paths - split inputs and process recursively
144
- all_results = []
145
- all_errors = []
146
-
147
- for unique_path in unique_paths:
148
- # Create mask for rows with this path
149
- path_mask = [p == unique_path for p in file_paths]
150
- sliced_inputs = inputs[path_mask].copy()
151
-
152
- # Recursively call execute_file_sink with the sliced DataFrame
153
- results, errors = execute_file_sink(
154
- step, sliced_inputs, batch_config, **kwargs
155
- )
156
-
157
- if len(results) > 0:
158
- all_results.append(results)
159
- if len(errors) > 0:
160
- all_errors.append(errors)
161
-
162
- return reconcile_results_and_errors(all_results, all_errors)