qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/application/commons/tools.py +1 -1
- qtype/application/converters/tools_from_api.py +5 -5
- qtype/application/converters/tools_from_module.py +2 -2
- qtype/application/converters/types.py +14 -43
- qtype/application/documentation.py +1 -1
- qtype/application/facade.py +94 -73
- qtype/base/types.py +227 -7
- qtype/cli.py +4 -0
- qtype/commands/convert.py +20 -8
- qtype/commands/generate.py +19 -27
- qtype/commands/run.py +73 -36
- qtype/commands/serve.py +74 -54
- qtype/commands/validate.py +34 -8
- qtype/commands/visualize.py +46 -22
- qtype/dsl/__init__.py +6 -5
- qtype/dsl/custom_types.py +1 -1
- qtype/dsl/domain_types.py +65 -5
- qtype/dsl/linker.py +384 -0
- qtype/dsl/loader.py +315 -0
- qtype/dsl/model.py +612 -363
- qtype/dsl/parser.py +200 -0
- qtype/dsl/types.py +50 -0
- qtype/interpreter/api.py +57 -136
- qtype/interpreter/auth/aws.py +19 -9
- qtype/interpreter/auth/generic.py +93 -16
- qtype/interpreter/base/base_step_executor.py +436 -0
- qtype/interpreter/base/batch_step_executor.py +171 -0
- qtype/interpreter/base/exceptions.py +50 -0
- qtype/interpreter/base/executor_context.py +74 -0
- qtype/interpreter/base/factory.py +117 -0
- qtype/interpreter/base/progress_tracker.py +110 -0
- qtype/interpreter/base/secrets.py +339 -0
- qtype/interpreter/base/step_cache.py +74 -0
- qtype/interpreter/base/stream_emitter.py +469 -0
- qtype/interpreter/conversions.py +462 -22
- qtype/interpreter/converters.py +77 -0
- qtype/interpreter/endpoints.py +355 -0
- qtype/interpreter/executors/agent_executor.py +242 -0
- qtype/interpreter/executors/aggregate_executor.py +93 -0
- qtype/interpreter/executors/decoder_executor.py +163 -0
- qtype/interpreter/executors/doc_to_text_executor.py +112 -0
- qtype/interpreter/executors/document_embedder_executor.py +107 -0
- qtype/interpreter/executors/document_search_executor.py +122 -0
- qtype/interpreter/executors/document_source_executor.py +118 -0
- qtype/interpreter/executors/document_splitter_executor.py +105 -0
- qtype/interpreter/executors/echo_executor.py +63 -0
- qtype/interpreter/executors/field_extractor_executor.py +160 -0
- qtype/interpreter/executors/file_source_executor.py +101 -0
- qtype/interpreter/executors/file_writer_executor.py +110 -0
- qtype/interpreter/executors/index_upsert_executor.py +228 -0
- qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
- qtype/interpreter/executors/invoke_flow_executor.py +51 -0
- qtype/interpreter/executors/invoke_tool_executor.py +358 -0
- qtype/interpreter/executors/llm_inference_executor.py +272 -0
- qtype/interpreter/executors/prompt_template_executor.py +78 -0
- qtype/interpreter/executors/sql_source_executor.py +106 -0
- qtype/interpreter/executors/vector_search_executor.py +91 -0
- qtype/interpreter/flow.py +159 -22
- qtype/interpreter/metadata_api.py +115 -0
- qtype/interpreter/resource_cache.py +5 -4
- qtype/interpreter/rich_progress.py +225 -0
- qtype/interpreter/stream/chat/__init__.py +15 -0
- qtype/interpreter/stream/chat/converter.py +391 -0
- qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
- qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
- qtype/interpreter/stream/chat/vercel.py +609 -0
- qtype/interpreter/stream/utils/__init__.py +15 -0
- qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
- qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
- qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
- qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
- qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
- qtype/interpreter/telemetry.py +135 -8
- qtype/interpreter/tools/__init__.py +5 -0
- qtype/interpreter/tools/function_tool_helper.py +265 -0
- qtype/interpreter/types.py +330 -0
- qtype/interpreter/typing.py +83 -89
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
- qtype/interpreter/ui/icon.png +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +4 -4
- qtype/semantic/checker.py +583 -0
- qtype/semantic/generate.py +262 -83
- qtype/semantic/loader.py +95 -0
- qtype/semantic/model.py +436 -159
- qtype/semantic/resolver.py +63 -19
- qtype/semantic/visualize.py +28 -31
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
- qtype-0.1.1.dist-info/RECORD +135 -0
- qtype/dsl/base_types.py +0 -38
- qtype/dsl/validator.py +0 -465
- qtype/interpreter/batch/__init__.py +0 -0
- qtype/interpreter/batch/file_sink_source.py +0 -162
- qtype/interpreter/batch/flow.py +0 -95
- qtype/interpreter/batch/sql_source.py +0 -92
- qtype/interpreter/batch/step.py +0 -74
- qtype/interpreter/batch/types.py +0 -41
- qtype/interpreter/batch/utils.py +0 -178
- qtype/interpreter/chat/chat_api.py +0 -237
- qtype/interpreter/chat/vercel.py +0 -314
- qtype/interpreter/exceptions.py +0 -10
- qtype/interpreter/step.py +0 -67
- qtype/interpreter/steps/__init__.py +0 -0
- qtype/interpreter/steps/agent.py +0 -114
- qtype/interpreter/steps/condition.py +0 -36
- qtype/interpreter/steps/decoder.py +0 -88
- qtype/interpreter/steps/llm_inference.py +0 -171
- qtype/interpreter/steps/prompt_template.py +0 -54
- qtype/interpreter/steps/search.py +0 -24
- qtype/interpreter/steps/tool.py +0 -219
- qtype/interpreter/streaming_helpers.py +0 -123
- qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
- qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
- qtype/interpreter/ui/favicon.ico +0 -0
- qtype/loader.py +0 -390
- qtype-0.0.16.dist-info/RECORD +0 -106
- /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
qtype/dsl/validator.py
DELETED
|
@@ -1,465 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict, Union, get_args, get_origin
|
|
2
|
-
|
|
3
|
-
import qtype.dsl.base_types as base_types
|
|
4
|
-
import qtype.dsl.domain_types
|
|
5
|
-
import qtype.dsl.model as dsl
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class QTypeValidationError(Exception):
|
|
9
|
-
"""Raised when there's an error during QType validation."""
|
|
10
|
-
|
|
11
|
-
pass
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DuplicateComponentError(QTypeValidationError):
|
|
15
|
-
"""Raised when there are duplicate components with the same ID."""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
obj_id: str,
|
|
20
|
-
found_obj: qtype.dsl.domain_types.StrictBaseModel,
|
|
21
|
-
existing_obj: qtype.dsl.domain_types.StrictBaseModel,
|
|
22
|
-
):
|
|
23
|
-
super().__init__(
|
|
24
|
-
f'Duplicate component with ID "{obj_id}" found.'
|
|
25
|
-
# f"Duplicate component with ID \"{obj_id}\" found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class ComponentNotFoundError(QTypeValidationError):
|
|
30
|
-
"""Raised when a component is not found in the DSL Application."""
|
|
31
|
-
|
|
32
|
-
def __init__(self, component_id: str):
|
|
33
|
-
super().__init__(
|
|
34
|
-
f"Component with ID '{component_id}' not found in the DSL Application."
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class ReferenceNotFoundError(QTypeValidationError):
|
|
39
|
-
"""Raised when a reference is not found in the lookup map."""
|
|
40
|
-
|
|
41
|
-
def __init__(self, reference: str, type_hint: str | None = None):
|
|
42
|
-
msg = (
|
|
43
|
-
f"Reference '{reference}' not found in lookup map."
|
|
44
|
-
if type_hint is None
|
|
45
|
-
else f"Reference '{reference}' not found in lookup map for type '{type_hint}'."
|
|
46
|
-
)
|
|
47
|
-
super().__init__(msg)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class FlowHasNoStepsError(QTypeValidationError):
|
|
51
|
-
"""Raised when a flow has no steps defined."""
|
|
52
|
-
|
|
53
|
-
def __init__(self, flow_id: str):
|
|
54
|
-
super().__init__(f"Flow {flow_id} has no steps defined.")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# These types are used only for the DSL and should not be converted to semantic types
|
|
58
|
-
# They are used for JSON schema generation
|
|
59
|
-
# They will be switched to their semantic abstract class in the generation.
|
|
60
|
-
# i.e., `ToolType` will be switched to `Tool`
|
|
61
|
-
def _update_map_with_unique_check(
|
|
62
|
-
current_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
63
|
-
new_objects: list[qtype.dsl.domain_types.StrictBaseModel],
|
|
64
|
-
) -> None:
|
|
65
|
-
"""
|
|
66
|
-
Update a map with new objects, ensuring unique IDs.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
current_map: The current map of objects by ID.
|
|
70
|
-
new_objects: List of new objects to add to the map.
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
Updated map with new objects added, ensuring unique IDs.
|
|
74
|
-
"""
|
|
75
|
-
for obj in new_objects:
|
|
76
|
-
if obj is None:
|
|
77
|
-
# If the object is None, we skip it.
|
|
78
|
-
continue
|
|
79
|
-
if isinstance(obj, str):
|
|
80
|
-
# If the object is a string, we assume it is an ID and skip it.
|
|
81
|
-
# This is a special case where we do not want to add the string itself.
|
|
82
|
-
continue
|
|
83
|
-
# Note: There is no current abstraction for the `id` field, so we assume it exists.
|
|
84
|
-
obj_id = obj.id # type: ignore[attr-defined]
|
|
85
|
-
# If the object already exists in the map, we check if it is the same object.
|
|
86
|
-
# If it is not the same object, we raise an error.
|
|
87
|
-
# This ensures that we do not have duplicate components with the same ID.
|
|
88
|
-
if obj_id in current_map and id(current_map[obj_id]) != id(obj):
|
|
89
|
-
raise DuplicateComponentError(obj_id, obj, current_map[obj_id])
|
|
90
|
-
else:
|
|
91
|
-
current_map[obj_id] = obj
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def _update_maps_with_embedded_objects(
|
|
95
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
96
|
-
embedded_objects: list[qtype.dsl.domain_types.StrictBaseModel],
|
|
97
|
-
) -> None:
|
|
98
|
-
"""
|
|
99
|
-
Update lookup maps with embedded objects.
|
|
100
|
-
Embedded objects are when the user specifies the object and not just the ID.
|
|
101
|
-
For example, a prompt template may have variables embedded:
|
|
102
|
-
```yaml
|
|
103
|
-
steps:
|
|
104
|
-
- id: my_prompt
|
|
105
|
-
variables:
|
|
106
|
-
- id: my_var
|
|
107
|
-
type: text
|
|
108
|
-
outputs:
|
|
109
|
-
- id: my_output
|
|
110
|
-
type: text
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
Args:
|
|
114
|
-
lookup_maps: The current lookup maps to update.
|
|
115
|
-
embedded_objects: List of embedded objects to add to the maps.
|
|
116
|
-
"""
|
|
117
|
-
for obj in embedded_objects:
|
|
118
|
-
if isinstance(obj, dsl.Step):
|
|
119
|
-
# All steps have inputs and outputs
|
|
120
|
-
_update_map_with_unique_check(lookup_map, obj.inputs or []) # type: ignore
|
|
121
|
-
_update_map_with_unique_check(lookup_map, obj.outputs or []) # type: ignore
|
|
122
|
-
_update_map_with_unique_check(lookup_map, [obj])
|
|
123
|
-
|
|
124
|
-
if isinstance(obj, dsl.Model):
|
|
125
|
-
# note inputs
|
|
126
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
127
|
-
|
|
128
|
-
if isinstance(obj, dsl.Condition):
|
|
129
|
-
# Conditions have inputs and outputs
|
|
130
|
-
_update_map_with_unique_check(lookup_map, [obj.then, obj.else_]) # type: ignore
|
|
131
|
-
_update_map_with_unique_check(lookup_map, [obj.equals]) # type: ignore
|
|
132
|
-
if obj.then and isinstance(obj.then, dsl.Step):
|
|
133
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.then])
|
|
134
|
-
if obj.else_ and isinstance(obj.else_, dsl.Step):
|
|
135
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.else_])
|
|
136
|
-
|
|
137
|
-
if isinstance(obj, dsl.APITool):
|
|
138
|
-
# API tools have inputs and outputs
|
|
139
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
140
|
-
|
|
141
|
-
if isinstance(obj, dsl.LLMInference):
|
|
142
|
-
# LLM Inference steps have inputs and outputs
|
|
143
|
-
_update_map_with_unique_check(lookup_map, [obj.model]) # type: ignore
|
|
144
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.model]) # type: ignore
|
|
145
|
-
_update_map_with_unique_check(lookup_map, [obj.memory]) # type: ignore
|
|
146
|
-
|
|
147
|
-
if isinstance(obj, dsl.Agent):
|
|
148
|
-
_update_map_with_unique_check(lookup_map, obj.tools or []) # type: ignore
|
|
149
|
-
_update_maps_with_embedded_objects(lookup_map, obj.tools or []) # type: ignore
|
|
150
|
-
|
|
151
|
-
if isinstance(obj, dsl.Flow):
|
|
152
|
-
_update_map_with_unique_check(lookup_map, [obj])
|
|
153
|
-
_update_map_with_unique_check(lookup_map, obj.steps or []) # type: ignore
|
|
154
|
-
_update_maps_with_embedded_objects(lookup_map, obj.steps or []) # type: ignore
|
|
155
|
-
|
|
156
|
-
if isinstance(obj, dsl.TelemetrySink):
|
|
157
|
-
# Telemetry sinks may have auth references
|
|
158
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
159
|
-
|
|
160
|
-
if isinstance(obj, dsl.Index):
|
|
161
|
-
# Indexes may have auth references
|
|
162
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
163
|
-
|
|
164
|
-
if isinstance(obj, dsl.VectorIndex):
|
|
165
|
-
if isinstance(obj.embedding_model, dsl.EmbeddingModel):
|
|
166
|
-
_update_map_with_unique_check(
|
|
167
|
-
lookup_map, [obj.embedding_model]
|
|
168
|
-
)
|
|
169
|
-
_update_maps_with_embedded_objects(
|
|
170
|
-
lookup_map, [obj.embedding_model]
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
if isinstance(obj, dsl.Search):
|
|
174
|
-
if isinstance(obj.index, dsl.Index):
|
|
175
|
-
_update_map_with_unique_check(lookup_map, [obj.index])
|
|
176
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.index])
|
|
177
|
-
|
|
178
|
-
if isinstance(obj, dsl.AuthorizationProviderList):
|
|
179
|
-
# AuthorizationProviderList is a list of AuthorizationProvider objects
|
|
180
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
181
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
182
|
-
|
|
183
|
-
if isinstance(obj, dsl.IndexList):
|
|
184
|
-
# IndexList is a list of Index objects
|
|
185
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
186
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
187
|
-
|
|
188
|
-
if isinstance(obj, dsl.ModelList):
|
|
189
|
-
# ModelList is a list of Model objects
|
|
190
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
191
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
192
|
-
|
|
193
|
-
if isinstance(obj, dsl.ToolList):
|
|
194
|
-
# ToolList is a list of Tool objects
|
|
195
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
196
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
197
|
-
|
|
198
|
-
if isinstance(obj, dsl.TypeList):
|
|
199
|
-
# TypeList is a list of Type objects
|
|
200
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
201
|
-
|
|
202
|
-
if isinstance(obj, dsl.VariableList):
|
|
203
|
-
# VariableList is a list of Variable objects
|
|
204
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
205
|
-
|
|
206
|
-
if isinstance(obj, dsl.TelemetrySink):
|
|
207
|
-
# TelemetrySink is a list of TelemetrySink objects
|
|
208
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def _build_lookup_maps(
|
|
212
|
-
dsl_application: dsl.Application,
|
|
213
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel]
|
|
214
|
-
| None = None,
|
|
215
|
-
) -> Dict[str, qtype.dsl.domain_types.StrictBaseModel]:
|
|
216
|
-
"""
|
|
217
|
-
Build lookup map for all objects in the DSL Application.
|
|
218
|
-
This function creates a dictionary of id -> component, where each key is a
|
|
219
|
-
component id and the value is the component.
|
|
220
|
-
Args:
|
|
221
|
-
dsl_application: The DSL Application to build lookup maps for.
|
|
222
|
-
Returns:
|
|
223
|
-
Dict[str, dsl.StrictBaseModel]: A dictionary of lookup maps
|
|
224
|
-
Throws:
|
|
225
|
-
SemanticResolutionError: If there are duplicate components with the same ID.
|
|
226
|
-
"""
|
|
227
|
-
component_names = {
|
|
228
|
-
f
|
|
229
|
-
for f in dsl.Application.model_fields.keys()
|
|
230
|
-
if f not in set(["id", "references"])
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
if lookup_map is None:
|
|
234
|
-
lookup_map = {}
|
|
235
|
-
|
|
236
|
-
for component_name in component_names:
|
|
237
|
-
if not hasattr(dsl_application, component_name):
|
|
238
|
-
raise ComponentNotFoundError(component_name)
|
|
239
|
-
components = getattr(dsl_application, component_name) or []
|
|
240
|
-
if not isinstance(components, list):
|
|
241
|
-
components = [components] # Ensure we have a list
|
|
242
|
-
_update_map_with_unique_check(lookup_map, components)
|
|
243
|
-
_update_maps_with_embedded_objects(lookup_map, components)
|
|
244
|
-
|
|
245
|
-
# now deal with the references.
|
|
246
|
-
for ref in dsl_application.references or []:
|
|
247
|
-
ref = ref.root # type: ignore
|
|
248
|
-
if isinstance(ref, dsl.Application):
|
|
249
|
-
_build_lookup_maps(ref, lookup_map)
|
|
250
|
-
|
|
251
|
-
# Anything in the reference list that is not an Application is handled by the embedded object resolver.
|
|
252
|
-
_update_maps_with_embedded_objects(
|
|
253
|
-
lookup_map,
|
|
254
|
-
[
|
|
255
|
-
ref.root # type: ignore
|
|
256
|
-
for ref in dsl_application.references or []
|
|
257
|
-
if not isinstance(ref.root, dsl.Application)
|
|
258
|
-
], # type: ignore
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
lookup_map[dsl_application.id] = dsl_application
|
|
262
|
-
|
|
263
|
-
return lookup_map
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def _is_dsl_type(type_obj: Any) -> bool:
|
|
267
|
-
"""Check if a type is a DSL type that should be converted to semantic."""
|
|
268
|
-
if not hasattr(type_obj, "__name__"):
|
|
269
|
-
return False
|
|
270
|
-
|
|
271
|
-
# Check if it's defined in the DSL module
|
|
272
|
-
return (
|
|
273
|
-
hasattr(type_obj, "__module__")
|
|
274
|
-
and (
|
|
275
|
-
type_obj.__module__ == dsl.__name__
|
|
276
|
-
or type_obj.__module__ == base_types.__name__
|
|
277
|
-
)
|
|
278
|
-
and not type_obj.__name__.startswith("_")
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def _resolve_forward_ref(field_type: Any) -> Any:
|
|
283
|
-
"""
|
|
284
|
-
Resolve a ForwardRef type to its actual type.
|
|
285
|
-
This is used to handle cases where the type is a string that refers to a class.
|
|
286
|
-
"""
|
|
287
|
-
if hasattr(field_type, "__forward_arg__"):
|
|
288
|
-
# Extract the string from ForwardRef and process it
|
|
289
|
-
forward_ref_str = field_type.__forward_arg__
|
|
290
|
-
# Use eval to get the actual type from the string
|
|
291
|
-
return eval(forward_ref_str, dict(vars(dsl)))
|
|
292
|
-
return field_type
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
def _is_union(type: Any) -> bool:
|
|
296
|
-
"""
|
|
297
|
-
Indicates if the provided type is a Union type.
|
|
298
|
-
"""
|
|
299
|
-
origin = get_origin(type)
|
|
300
|
-
return origin is Union or (
|
|
301
|
-
hasattr(type, "__class__") and type.__class__.__name__ == "UnionType"
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
def _is_reference_type(field_type: Any) -> bool:
|
|
306
|
-
"""
|
|
307
|
-
Indicates if the provided type can be a reference -- i.e., a union between a dsl type and a string.
|
|
308
|
-
"""
|
|
309
|
-
field_type = _resolve_forward_ref(field_type)
|
|
310
|
-
|
|
311
|
-
if _is_union(field_type):
|
|
312
|
-
args = get_args(field_type)
|
|
313
|
-
has_str = any(arg is str for arg in args)
|
|
314
|
-
has_dsl_type = any(_is_dsl_type(arg) for arg in args)
|
|
315
|
-
return has_str and has_dsl_type
|
|
316
|
-
else:
|
|
317
|
-
return False
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
def _resolve_id_references(
|
|
321
|
-
dslobj: qtype.dsl.domain_types.StrictBaseModel | str,
|
|
322
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
323
|
-
) -> Any:
|
|
324
|
-
"""
|
|
325
|
-
Resolves ID references in a DSL object such that all references are replaced with the actual object.
|
|
326
|
-
"""
|
|
327
|
-
|
|
328
|
-
if isinstance(dslobj, str):
|
|
329
|
-
# If the object is a string, we assume it is an ID and look it up in the map.
|
|
330
|
-
if dslobj in lookup_map:
|
|
331
|
-
return lookup_map[dslobj]
|
|
332
|
-
else:
|
|
333
|
-
raise ReferenceNotFoundError(dslobj)
|
|
334
|
-
|
|
335
|
-
# iterate over all fields in the object
|
|
336
|
-
def lookup_reference(val: str, typ: Any) -> Any:
|
|
337
|
-
if (
|
|
338
|
-
isinstance(val, str)
|
|
339
|
-
and _is_reference_type(typ)
|
|
340
|
-
and not _is_dsl_type(type(val))
|
|
341
|
-
):
|
|
342
|
-
if val in lookup_map:
|
|
343
|
-
return lookup_map[val]
|
|
344
|
-
else:
|
|
345
|
-
raise ReferenceNotFoundError(val, str(typ))
|
|
346
|
-
return val
|
|
347
|
-
|
|
348
|
-
for field_name, field_value in dslobj:
|
|
349
|
-
field_info = dslobj.__class__.model_fields[field_name]
|
|
350
|
-
field_type = _resolve_forward_ref(field_info.annotation)
|
|
351
|
-
|
|
352
|
-
if isinstance(field_value, list):
|
|
353
|
-
# If the field value is a list, resolve each item in the list
|
|
354
|
-
# Get the type of the items of the list
|
|
355
|
-
field_type = field_type.__args__[0] # type: ignore
|
|
356
|
-
if (
|
|
357
|
-
get_origin(field_type) is list
|
|
358
|
-
): # handles case where we have list[Class] | None -- in this case field_type is Union and item_type is now the list...
|
|
359
|
-
field_type = field_type.__args__[0]
|
|
360
|
-
resolved_list = [
|
|
361
|
-
lookup_reference(item, field_type) # type: ignore
|
|
362
|
-
for item in field_value
|
|
363
|
-
]
|
|
364
|
-
setattr(dslobj, field_name, resolved_list)
|
|
365
|
-
elif isinstance(field_value, dict):
|
|
366
|
-
field_type = field_type.__args__[0]
|
|
367
|
-
if (
|
|
368
|
-
get_origin(field_type) is dict
|
|
369
|
-
): # handles case where we have dict[Class] | None -- in this case field_type is Union and item_type is now the dict...
|
|
370
|
-
field_type = field_type.__args__[1]
|
|
371
|
-
# If the field value is a dict, resolve each value in the dict
|
|
372
|
-
resolved_dict = {
|
|
373
|
-
k: lookup_reference(v, field_type) # type: ignore
|
|
374
|
-
for k, v in field_value.items()
|
|
375
|
-
}
|
|
376
|
-
setattr(dslobj, field_name, resolved_dict)
|
|
377
|
-
elif field_value is None:
|
|
378
|
-
# Convert lst | None to an empty list
|
|
379
|
-
# and dict | None to an empty dict
|
|
380
|
-
if _is_union(field_type):
|
|
381
|
-
args = field_type.__args__ # type: ignore
|
|
382
|
-
if any(str(arg).startswith("list") for arg in args):
|
|
383
|
-
setattr(dslobj, field_name, [])
|
|
384
|
-
elif any(str(arg).startswith("dict") for arg in args):
|
|
385
|
-
setattr(dslobj, field_name, {})
|
|
386
|
-
else:
|
|
387
|
-
setattr(
|
|
388
|
-
dslobj, field_name, lookup_reference(field_value, field_type)
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
return dslobj
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
def validate(
|
|
395
|
-
dsl_application: dsl.Application,
|
|
396
|
-
) -> dsl.Application:
|
|
397
|
-
"""
|
|
398
|
-
Validates the semantics of a DSL Application and returns a copy of it with all
|
|
399
|
-
internal references resolved to their actual objects.
|
|
400
|
-
Args:
|
|
401
|
-
dsl_application: The DSL Application to validate.
|
|
402
|
-
Returns:
|
|
403
|
-
dsl.Application: A copy of the DSL Application with all internal references resolved.
|
|
404
|
-
Throws:
|
|
405
|
-
SemanticResolutionError: If there are semantic errors in the DSL Application.
|
|
406
|
-
"""
|
|
407
|
-
|
|
408
|
-
# First, make a lookup map of all objects in the DSL Application.
|
|
409
|
-
# This ensures that all object ids are unique.
|
|
410
|
-
lookup_map = _build_lookup_maps(dsl_application)
|
|
411
|
-
|
|
412
|
-
# If any flows have no steps, we raise an error.
|
|
413
|
-
for flow in dsl_application.flows or []:
|
|
414
|
-
if not flow.steps:
|
|
415
|
-
raise FlowHasNoStepsError(flow.id)
|
|
416
|
-
# If any flow doesn't have inputs, copy the inputs from the first step.
|
|
417
|
-
if not flow.inputs:
|
|
418
|
-
first_step = (
|
|
419
|
-
lookup_map[flow.steps[0]]
|
|
420
|
-
if isinstance(flow.steps[0], str)
|
|
421
|
-
else flow.steps[0]
|
|
422
|
-
)
|
|
423
|
-
flow.inputs = first_step.inputs or [] # type: ignore
|
|
424
|
-
|
|
425
|
-
# If any flow doesn't have outputs, copy them from the last step.
|
|
426
|
-
if not flow.outputs:
|
|
427
|
-
last_step = (
|
|
428
|
-
lookup_map[flow.steps[-1]]
|
|
429
|
-
if isinstance(flow.steps[-1], str)
|
|
430
|
-
else flow.steps[-1]
|
|
431
|
-
)
|
|
432
|
-
flow.outputs = last_step.outputs or [] # type: ignore
|
|
433
|
-
|
|
434
|
-
# Now we resolve all ID references in the DSL Application.
|
|
435
|
-
lookup_map = {
|
|
436
|
-
obj_id: _resolve_id_references(obj, lookup_map)
|
|
437
|
-
for obj_id, obj in lookup_map.items()
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
# If any chat flow doesn't have an input variable that is a chat message, raise an error.
|
|
441
|
-
for flow in dsl_application.flows or []:
|
|
442
|
-
if flow.mode == "Chat":
|
|
443
|
-
inputs = flow.inputs or []
|
|
444
|
-
if not any(
|
|
445
|
-
input_var.type == qtype.dsl.domain_types.ChatMessage
|
|
446
|
-
for input_var in inputs
|
|
447
|
-
if isinstance(input_var, dsl.Variable)
|
|
448
|
-
):
|
|
449
|
-
raise QTypeValidationError(
|
|
450
|
-
f"Chat flow {flow.id} must have at least one input variable of type ChatMessage."
|
|
451
|
-
)
|
|
452
|
-
if (
|
|
453
|
-
not flow.outputs
|
|
454
|
-
or len(flow.outputs) != 1
|
|
455
|
-
or (
|
|
456
|
-
isinstance(flow.outputs[0], dsl.Variable)
|
|
457
|
-
and flow.outputs[0].type
|
|
458
|
-
!= qtype.dsl.domain_types.ChatMessage
|
|
459
|
-
)
|
|
460
|
-
):
|
|
461
|
-
raise QTypeValidationError(
|
|
462
|
-
f"Chat flow {flow.id} must have exactly one output variable of type ChatMessage."
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
return dsl_application
|
|
File without changes
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
from typing import Any, Tuple
|
|
2
|
-
|
|
3
|
-
import fsspec # type: ignore[import-untyped]
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from qtype.base.exceptions import InterpreterError
|
|
7
|
-
from qtype.interpreter.batch.types import BatchConfig, ErrorMode
|
|
8
|
-
from qtype.interpreter.batch.utils import reconcile_results_and_errors
|
|
9
|
-
from qtype.semantic.model import FileSink, FileSource
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def execute_file_source(
|
|
13
|
-
step: FileSource,
|
|
14
|
-
inputs: pd.DataFrame,
|
|
15
|
-
batch_config: BatchConfig,
|
|
16
|
-
**kwargs: dict[Any, Any],
|
|
17
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
18
|
-
"""Executes a FileSource step to read data from a file using fsspec.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
step: The FileSource step to execute.
|
|
22
|
-
inputs: Input DataFrame (may contain path variable).
|
|
23
|
-
batch_config: Configuration for batch processing.
|
|
24
|
-
**kwargs: Additional keyword arguments.
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
A tuple containing two DataFrames:
|
|
28
|
-
- The first DataFrame contains the successfully read data.
|
|
29
|
-
- The second DataFrame contains rows that encountered errors with an 'error' column.
|
|
30
|
-
"""
|
|
31
|
-
output_columns = {output.id for output in step.outputs}
|
|
32
|
-
|
|
33
|
-
results = []
|
|
34
|
-
errors = []
|
|
35
|
-
|
|
36
|
-
# FileSource has cardinality 'many', so it reads once and produces multiple output rows
|
|
37
|
-
# We process each input row (which might have different paths) separately
|
|
38
|
-
for _, row in inputs.iterrows():
|
|
39
|
-
try:
|
|
40
|
-
file_path = step.path if step.path else row.get("path")
|
|
41
|
-
if not file_path:
|
|
42
|
-
raise InterpreterError(
|
|
43
|
-
f"No path specified for {type(step).__name__}. "
|
|
44
|
-
"Either set the 'path' field or provide a 'path' input variable."
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Use fsspec to open the file and read with pandas
|
|
48
|
-
with fsspec.open(file_path, "rb") as file_handle:
|
|
49
|
-
df = pd.read_parquet(file_handle) # type: ignore[arg-type]
|
|
50
|
-
|
|
51
|
-
# Filter to only the expected output columns if they exist
|
|
52
|
-
if output_columns and len(df) > 0:
|
|
53
|
-
available_columns = set(df.columns)
|
|
54
|
-
missing_columns = output_columns - available_columns
|
|
55
|
-
if missing_columns:
|
|
56
|
-
raise InterpreterError(
|
|
57
|
-
f"File {file_path} missing expected columns: {', '.join(missing_columns)}. "
|
|
58
|
-
f"Available columns: {', '.join(available_columns)}"
|
|
59
|
-
)
|
|
60
|
-
df = df[[col for col in df.columns if col in output_columns]]
|
|
61
|
-
|
|
62
|
-
results.append(df)
|
|
63
|
-
|
|
64
|
-
except Exception as e:
|
|
65
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
66
|
-
raise e
|
|
67
|
-
|
|
68
|
-
# If there's an error, add it to the errors list
|
|
69
|
-
error_df = pd.DataFrame([{"error": str(e)}])
|
|
70
|
-
errors.append(error_df)
|
|
71
|
-
|
|
72
|
-
return reconcile_results_and_errors(results, errors)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def execute_file_sink(
|
|
76
|
-
step: FileSink,
|
|
77
|
-
inputs: pd.DataFrame,
|
|
78
|
-
batch_config: BatchConfig,
|
|
79
|
-
**kwargs: dict[Any, Any],
|
|
80
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
81
|
-
"""Executes a FileSink step to write data to a file using fsspec.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
step: The FileSink step to execute.
|
|
85
|
-
inputs: Input DataFrame containing data to write.
|
|
86
|
-
batch_config: Configuration for batch processing.
|
|
87
|
-
**kwargs: Additional keyword arguments.
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
A tuple containing two DataFrames:
|
|
91
|
-
- The first DataFrame contains success indicators.
|
|
92
|
-
- The second DataFrame contains rows that encountered errors with an 'error' column.
|
|
93
|
-
"""
|
|
94
|
-
# this is enforced by the dsl, but we'll check here to confirm
|
|
95
|
-
if len(step.outputs) > 1:
|
|
96
|
-
raise InterpreterError(
|
|
97
|
-
f"There should only be one output variable for {type(step).__name__}."
|
|
98
|
-
)
|
|
99
|
-
output_column_name = step.outputs[0].id
|
|
100
|
-
|
|
101
|
-
# make a list of all file paths
|
|
102
|
-
try:
|
|
103
|
-
if step.path:
|
|
104
|
-
file_paths = [step.path] * len(inputs)
|
|
105
|
-
else:
|
|
106
|
-
if "path" not in inputs.columns:
|
|
107
|
-
raise InterpreterError(
|
|
108
|
-
f"No path specified for {type(step).__name__}. "
|
|
109
|
-
"Either set the 'path' field or provide a 'path' input variable."
|
|
110
|
-
)
|
|
111
|
-
file_paths = inputs["path"].tolist()
|
|
112
|
-
except Exception as e:
|
|
113
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
114
|
-
raise e
|
|
115
|
-
# If we can't get the path, we can't proceed
|
|
116
|
-
return pd.DataFrame(), pd.DataFrame([{"error": str(e)}])
|
|
117
|
-
|
|
118
|
-
# Check if all paths are the same
|
|
119
|
-
unique_paths = list(set(file_paths))
|
|
120
|
-
|
|
121
|
-
if len(unique_paths) == 1:
|
|
122
|
-
# All rows write to the same file - process as one batch
|
|
123
|
-
file_path = unique_paths[0]
|
|
124
|
-
|
|
125
|
-
try:
|
|
126
|
-
# Use fsspec to write the parquet file
|
|
127
|
-
input_columns = [i.id for i in step.inputs]
|
|
128
|
-
with fsspec.open(file_path, "wb") as file_handle:
|
|
129
|
-
inputs[input_columns].to_parquet(file_handle, index=False) # type: ignore[arg-type]
|
|
130
|
-
|
|
131
|
-
inputs[output_column_name] = file_path
|
|
132
|
-
return inputs, pd.DataFrame()
|
|
133
|
-
|
|
134
|
-
except Exception as e:
|
|
135
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
136
|
-
raise e
|
|
137
|
-
|
|
138
|
-
# If there's an error, return error for all rows
|
|
139
|
-
error_df = pd.DataFrame([{"error": str(e)}])
|
|
140
|
-
return pd.DataFrame(), error_df
|
|
141
|
-
|
|
142
|
-
else:
|
|
143
|
-
# Multiple unique paths - split inputs and process recursively
|
|
144
|
-
all_results = []
|
|
145
|
-
all_errors = []
|
|
146
|
-
|
|
147
|
-
for unique_path in unique_paths:
|
|
148
|
-
# Create mask for rows with this path
|
|
149
|
-
path_mask = [p == unique_path for p in file_paths]
|
|
150
|
-
sliced_inputs = inputs[path_mask].copy()
|
|
151
|
-
|
|
152
|
-
# Recursively call execute_file_sink with the sliced DataFrame
|
|
153
|
-
results, errors = execute_file_sink(
|
|
154
|
-
step, sliced_inputs, batch_config, **kwargs
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
if len(results) > 0:
|
|
158
|
-
all_results.append(results)
|
|
159
|
-
if len(errors) > 0:
|
|
160
|
-
all_errors.append(errors)
|
|
161
|
-
|
|
162
|
-
return reconcile_results_and_errors(all_results, all_errors)
|