qtype 0.0.12__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/application/commons/tools.py +1 -1
- qtype/application/converters/tools_from_api.py +476 -11
- qtype/application/converters/tools_from_module.py +38 -14
- qtype/application/converters/types.py +15 -30
- qtype/application/documentation.py +1 -1
- qtype/application/facade.py +102 -85
- qtype/base/types.py +227 -7
- qtype/cli.py +5 -1
- qtype/commands/convert.py +52 -6
- qtype/commands/generate.py +44 -4
- qtype/commands/run.py +78 -36
- qtype/commands/serve.py +74 -44
- qtype/commands/validate.py +37 -14
- qtype/commands/visualize.py +46 -25
- qtype/dsl/__init__.py +6 -5
- qtype/dsl/custom_types.py +1 -1
- qtype/dsl/domain_types.py +86 -5
- qtype/dsl/linker.py +384 -0
- qtype/dsl/loader.py +315 -0
- qtype/dsl/model.py +753 -264
- qtype/dsl/parser.py +200 -0
- qtype/dsl/types.py +50 -0
- qtype/interpreter/api.py +63 -136
- qtype/interpreter/auth/aws.py +19 -9
- qtype/interpreter/auth/generic.py +93 -16
- qtype/interpreter/base/base_step_executor.py +436 -0
- qtype/interpreter/base/batch_step_executor.py +171 -0
- qtype/interpreter/base/exceptions.py +50 -0
- qtype/interpreter/base/executor_context.py +91 -0
- qtype/interpreter/base/factory.py +84 -0
- qtype/interpreter/base/progress_tracker.py +110 -0
- qtype/interpreter/base/secrets.py +339 -0
- qtype/interpreter/base/step_cache.py +74 -0
- qtype/interpreter/base/stream_emitter.py +469 -0
- qtype/interpreter/conversions.py +495 -24
- qtype/interpreter/converters.py +79 -0
- qtype/interpreter/endpoints.py +355 -0
- qtype/interpreter/executors/agent_executor.py +242 -0
- qtype/interpreter/executors/aggregate_executor.py +93 -0
- qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
- qtype/interpreter/executors/decoder_executor.py +163 -0
- qtype/interpreter/executors/doc_to_text_executor.py +112 -0
- qtype/interpreter/executors/document_embedder_executor.py +123 -0
- qtype/interpreter/executors/document_search_executor.py +113 -0
- qtype/interpreter/executors/document_source_executor.py +118 -0
- qtype/interpreter/executors/document_splitter_executor.py +105 -0
- qtype/interpreter/executors/echo_executor.py +63 -0
- qtype/interpreter/executors/field_extractor_executor.py +165 -0
- qtype/interpreter/executors/file_source_executor.py +101 -0
- qtype/interpreter/executors/file_writer_executor.py +110 -0
- qtype/interpreter/executors/index_upsert_executor.py +232 -0
- qtype/interpreter/executors/invoke_embedding_executor.py +104 -0
- qtype/interpreter/executors/invoke_flow_executor.py +51 -0
- qtype/interpreter/executors/invoke_tool_executor.py +358 -0
- qtype/interpreter/executors/llm_inference_executor.py +272 -0
- qtype/interpreter/executors/prompt_template_executor.py +78 -0
- qtype/interpreter/executors/sql_source_executor.py +106 -0
- qtype/interpreter/executors/vector_search_executor.py +91 -0
- qtype/interpreter/flow.py +172 -22
- qtype/interpreter/logging_progress.py +61 -0
- qtype/interpreter/metadata_api.py +115 -0
- qtype/interpreter/resource_cache.py +5 -4
- qtype/interpreter/rich_progress.py +225 -0
- qtype/interpreter/stream/chat/__init__.py +15 -0
- qtype/interpreter/stream/chat/converter.py +391 -0
- qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
- qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
- qtype/interpreter/stream/chat/vercel.py +609 -0
- qtype/interpreter/stream/utils/__init__.py +15 -0
- qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
- qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
- qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
- qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
- qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
- qtype/interpreter/telemetry.py +135 -8
- qtype/interpreter/tools/__init__.py +5 -0
- qtype/interpreter/tools/function_tool_helper.py +265 -0
- qtype/interpreter/types.py +330 -0
- qtype/interpreter/typing.py +83 -89
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
- qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
- qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
- qtype/interpreter/ui/icon.png +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +5 -5
- qtype/semantic/checker.py +643 -0
- qtype/semantic/generate.py +268 -85
- qtype/semantic/loader.py +95 -0
- qtype/semantic/model.py +535 -163
- qtype/semantic/resolver.py +63 -19
- qtype/semantic/visualize.py +50 -35
- {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/METADATA +22 -5
- qtype-0.1.7.dist-info/RECORD +137 -0
- qtype/dsl/base_types.py +0 -38
- qtype/dsl/validator.py +0 -464
- qtype/interpreter/batch/__init__.py +0 -0
- qtype/interpreter/batch/flow.py +0 -95
- qtype/interpreter/batch/sql_source.py +0 -95
- qtype/interpreter/batch/step.py +0 -63
- qtype/interpreter/batch/types.py +0 -41
- qtype/interpreter/batch/utils.py +0 -179
- qtype/interpreter/chat/chat_api.py +0 -237
- qtype/interpreter/chat/vercel.py +0 -314
- qtype/interpreter/exceptions.py +0 -10
- qtype/interpreter/step.py +0 -67
- qtype/interpreter/steps/__init__.py +0 -0
- qtype/interpreter/steps/agent.py +0 -114
- qtype/interpreter/steps/condition.py +0 -36
- qtype/interpreter/steps/decoder.py +0 -88
- qtype/interpreter/steps/llm_inference.py +0 -150
- qtype/interpreter/steps/prompt_template.py +0 -54
- qtype/interpreter/steps/search.py +0 -24
- qtype/interpreter/steps/tool.py +0 -53
- qtype/interpreter/streaming_helpers.py +0 -123
- qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
- qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
- qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
- qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
- qtype/interpreter/ui/favicon.ico +0 -0
- qtype/loader.py +0 -389
- qtype-0.0.12.dist-info/RECORD +0 -105
- /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/WHEEL +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/entry_points.txt +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/top_level.txt +0 -0
qtype/dsl/validator.py
DELETED
|
@@ -1,464 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict, Union, get_args, get_origin
|
|
2
|
-
|
|
3
|
-
import qtype.dsl.base_types as base_types
|
|
4
|
-
import qtype.dsl.domain_types
|
|
5
|
-
import qtype.dsl.model as dsl
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class QTypeValidationError(Exception):
|
|
9
|
-
"""Raised when there's an error during QType validation."""
|
|
10
|
-
|
|
11
|
-
pass
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DuplicateComponentError(QTypeValidationError):
|
|
15
|
-
"""Raised when there are duplicate components with the same ID."""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
obj_id: str,
|
|
20
|
-
found_obj: qtype.dsl.domain_types.StrictBaseModel,
|
|
21
|
-
existing_obj: qtype.dsl.domain_types.StrictBaseModel,
|
|
22
|
-
):
|
|
23
|
-
super().__init__(
|
|
24
|
-
f"Duplicate component with ID '{obj_id}' found:\n{found_obj.model_dump_json()}\nAlready exists:\n{existing_obj.model_dump_json()}"
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class ComponentNotFoundError(QTypeValidationError):
|
|
29
|
-
"""Raised when a component is not found in the DSL Application."""
|
|
30
|
-
|
|
31
|
-
def __init__(self, component_id: str):
|
|
32
|
-
super().__init__(
|
|
33
|
-
f"Component with ID '{component_id}' not found in the DSL Application."
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class ReferenceNotFoundError(QTypeValidationError):
|
|
38
|
-
"""Raised when a reference is not found in the lookup map."""
|
|
39
|
-
|
|
40
|
-
def __init__(self, reference: str, type_hint: str | None = None):
|
|
41
|
-
msg = (
|
|
42
|
-
f"Reference '{reference}' not found in lookup map."
|
|
43
|
-
if type_hint is None
|
|
44
|
-
else f"Reference '{reference}' not found in lookup map for type '{type_hint}'."
|
|
45
|
-
)
|
|
46
|
-
super().__init__(msg)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class FlowHasNoStepsError(QTypeValidationError):
|
|
50
|
-
"""Raised when a flow has no steps defined."""
|
|
51
|
-
|
|
52
|
-
def __init__(self, flow_id: str):
|
|
53
|
-
super().__init__(f"Flow {flow_id} has no steps defined.")
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# These types are used only for the DSL and should not be converted to semantic types
|
|
57
|
-
# They are used for JSON schema generation
|
|
58
|
-
# They will be switched to their semantic abstract class in the generation.
|
|
59
|
-
# i.e., `ToolType` will be switched to `Tool`
|
|
60
|
-
def _update_map_with_unique_check(
|
|
61
|
-
current_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
62
|
-
new_objects: list[qtype.dsl.domain_types.StrictBaseModel],
|
|
63
|
-
) -> None:
|
|
64
|
-
"""
|
|
65
|
-
Update a map with new objects, ensuring unique IDs.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
current_map: The current map of objects by ID.
|
|
69
|
-
new_objects: List of new objects to add to the map.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
Updated map with new objects added, ensuring unique IDs.
|
|
73
|
-
"""
|
|
74
|
-
for obj in new_objects:
|
|
75
|
-
if obj is None:
|
|
76
|
-
# If the object is None, we skip it.
|
|
77
|
-
continue
|
|
78
|
-
if isinstance(obj, str):
|
|
79
|
-
# If the object is a string, we assume it is an ID and skip it.
|
|
80
|
-
# This is a special case where we do not want to add the string itself.
|
|
81
|
-
continue
|
|
82
|
-
# Note: There is no current abstraction for the `id` field, so we assume it exists.
|
|
83
|
-
obj_id = obj.id # type: ignore[attr-defined]
|
|
84
|
-
# If the object already exists in the map, we check if it is the same object.
|
|
85
|
-
# If it is not the same object, we raise an error.
|
|
86
|
-
# This ensures that we do not have duplicate components with the same ID.
|
|
87
|
-
if obj_id in current_map and id(current_map[obj_id]) != id(obj):
|
|
88
|
-
raise DuplicateComponentError(obj_id, obj, current_map[obj_id])
|
|
89
|
-
else:
|
|
90
|
-
current_map[obj_id] = obj
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _update_maps_with_embedded_objects(
|
|
94
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
95
|
-
embedded_objects: list[qtype.dsl.domain_types.StrictBaseModel],
|
|
96
|
-
) -> None:
|
|
97
|
-
"""
|
|
98
|
-
Update lookup maps with embedded objects.
|
|
99
|
-
Embedded objects are when the user specifies the object and not just the ID.
|
|
100
|
-
For example, a prompt template may have variables embedded:
|
|
101
|
-
```yaml
|
|
102
|
-
steps:
|
|
103
|
-
- id: my_prompt
|
|
104
|
-
variables:
|
|
105
|
-
- id: my_var
|
|
106
|
-
type: text
|
|
107
|
-
outputs:
|
|
108
|
-
- id: my_output
|
|
109
|
-
type: text
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
lookup_maps: The current lookup maps to update.
|
|
114
|
-
embedded_objects: List of embedded objects to add to the maps.
|
|
115
|
-
"""
|
|
116
|
-
for obj in embedded_objects:
|
|
117
|
-
if isinstance(obj, dsl.Step):
|
|
118
|
-
# All steps have inputs and outputs
|
|
119
|
-
_update_map_with_unique_check(lookup_map, obj.inputs or []) # type: ignore
|
|
120
|
-
_update_map_with_unique_check(lookup_map, obj.outputs or []) # type: ignore
|
|
121
|
-
_update_map_with_unique_check(lookup_map, [obj])
|
|
122
|
-
|
|
123
|
-
if isinstance(obj, dsl.Model):
|
|
124
|
-
# note inputs
|
|
125
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
126
|
-
|
|
127
|
-
if isinstance(obj, dsl.Condition):
|
|
128
|
-
# Conditions have inputs and outputs
|
|
129
|
-
_update_map_with_unique_check(lookup_map, [obj.then, obj.else_]) # type: ignore
|
|
130
|
-
_update_map_with_unique_check(lookup_map, [obj.equals]) # type: ignore
|
|
131
|
-
if obj.then and isinstance(obj.then, dsl.Step):
|
|
132
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.then])
|
|
133
|
-
if obj.else_ and isinstance(obj.else_, dsl.Step):
|
|
134
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.else_])
|
|
135
|
-
|
|
136
|
-
if isinstance(obj, dsl.APITool):
|
|
137
|
-
# API tools have inputs and outputs
|
|
138
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
139
|
-
|
|
140
|
-
if isinstance(obj, dsl.LLMInference):
|
|
141
|
-
# LLM Inference steps have inputs and outputs
|
|
142
|
-
_update_map_with_unique_check(lookup_map, [obj.model]) # type: ignore
|
|
143
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.model]) # type: ignore
|
|
144
|
-
_update_map_with_unique_check(lookup_map, [obj.memory]) # type: ignore
|
|
145
|
-
|
|
146
|
-
if isinstance(obj, dsl.Agent):
|
|
147
|
-
_update_map_with_unique_check(lookup_map, obj.tools or []) # type: ignore
|
|
148
|
-
_update_maps_with_embedded_objects(lookup_map, obj.tools or []) # type: ignore
|
|
149
|
-
|
|
150
|
-
if isinstance(obj, dsl.Flow):
|
|
151
|
-
_update_map_with_unique_check(lookup_map, [obj])
|
|
152
|
-
_update_map_with_unique_check(lookup_map, obj.steps or []) # type: ignore
|
|
153
|
-
_update_maps_with_embedded_objects(lookup_map, obj.steps or []) # type: ignore
|
|
154
|
-
|
|
155
|
-
if isinstance(obj, dsl.TelemetrySink):
|
|
156
|
-
# Telemetry sinks may have auth references
|
|
157
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
158
|
-
|
|
159
|
-
if isinstance(obj, dsl.Index):
|
|
160
|
-
# Indexes may have auth references
|
|
161
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
162
|
-
|
|
163
|
-
if isinstance(obj, dsl.VectorIndex):
|
|
164
|
-
if isinstance(obj.embedding_model, dsl.EmbeddingModel):
|
|
165
|
-
_update_map_with_unique_check(
|
|
166
|
-
lookup_map, [obj.embedding_model]
|
|
167
|
-
)
|
|
168
|
-
_update_maps_with_embedded_objects(
|
|
169
|
-
lookup_map, [obj.embedding_model]
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
if isinstance(obj, dsl.Search):
|
|
173
|
-
if isinstance(obj.index, dsl.Index):
|
|
174
|
-
_update_map_with_unique_check(lookup_map, [obj.index])
|
|
175
|
-
_update_maps_with_embedded_objects(lookup_map, [obj.index])
|
|
176
|
-
|
|
177
|
-
if isinstance(obj, dsl.AuthorizationProviderList):
|
|
178
|
-
# AuthorizationProviderList is a list of AuthorizationProvider objects
|
|
179
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
180
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
181
|
-
|
|
182
|
-
if isinstance(obj, dsl.IndexList):
|
|
183
|
-
# IndexList is a list of Index objects
|
|
184
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
185
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
186
|
-
|
|
187
|
-
if isinstance(obj, dsl.ModelList):
|
|
188
|
-
# ModelList is a list of Model objects
|
|
189
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
190
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
191
|
-
|
|
192
|
-
if isinstance(obj, dsl.ToolList):
|
|
193
|
-
# ToolList is a list of Tool objects
|
|
194
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
195
|
-
_update_maps_with_embedded_objects(lookup_map, obj.root) # type: ignore
|
|
196
|
-
|
|
197
|
-
if isinstance(obj, dsl.TypeList):
|
|
198
|
-
# TypeList is a list of Type objects
|
|
199
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
200
|
-
|
|
201
|
-
if isinstance(obj, dsl.VariableList):
|
|
202
|
-
# VariableList is a list of Variable objects
|
|
203
|
-
_update_map_with_unique_check(lookup_map, obj.root) # type: ignore
|
|
204
|
-
|
|
205
|
-
if isinstance(obj, dsl.TelemetrySink):
|
|
206
|
-
# TelemetrySink is a list of TelemetrySink objects
|
|
207
|
-
_update_map_with_unique_check(lookup_map, [obj.auth]) # type: ignore
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def _build_lookup_maps(
|
|
211
|
-
dsl_application: dsl.Application,
|
|
212
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel]
|
|
213
|
-
| None = None,
|
|
214
|
-
) -> Dict[str, qtype.dsl.domain_types.StrictBaseModel]:
|
|
215
|
-
"""
|
|
216
|
-
Build lookup map for all objects in the DSL Application.
|
|
217
|
-
This function creates a dictionary of id -> component, where each key is a
|
|
218
|
-
component id and the value is the component.
|
|
219
|
-
Args:
|
|
220
|
-
dsl_application: The DSL Application to build lookup maps for.
|
|
221
|
-
Returns:
|
|
222
|
-
Dict[str, dsl.StrictBaseModel]: A dictionary of lookup maps
|
|
223
|
-
Throws:
|
|
224
|
-
SemanticResolutionError: If there are duplicate components with the same ID.
|
|
225
|
-
"""
|
|
226
|
-
component_names = {
|
|
227
|
-
f
|
|
228
|
-
for f in dsl.Application.model_fields.keys()
|
|
229
|
-
if f not in set(["id", "references"])
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
if lookup_map is None:
|
|
233
|
-
lookup_map = {}
|
|
234
|
-
|
|
235
|
-
for component_name in component_names:
|
|
236
|
-
if not hasattr(dsl_application, component_name):
|
|
237
|
-
raise ComponentNotFoundError(component_name)
|
|
238
|
-
components = getattr(dsl_application, component_name) or []
|
|
239
|
-
if not isinstance(components, list):
|
|
240
|
-
components = [components] # Ensure we have a list
|
|
241
|
-
_update_map_with_unique_check(lookup_map, components)
|
|
242
|
-
_update_maps_with_embedded_objects(lookup_map, components)
|
|
243
|
-
|
|
244
|
-
# now deal with the references.
|
|
245
|
-
for ref in dsl_application.references or []:
|
|
246
|
-
ref = ref.root # type: ignore
|
|
247
|
-
if isinstance(ref, dsl.Application):
|
|
248
|
-
_build_lookup_maps(ref, lookup_map)
|
|
249
|
-
|
|
250
|
-
# Anything in the reference list that is not an Application is handled by the embedded object resolver.
|
|
251
|
-
_update_maps_with_embedded_objects(
|
|
252
|
-
lookup_map,
|
|
253
|
-
[
|
|
254
|
-
ref.root # type: ignore
|
|
255
|
-
for ref in dsl_application.references or []
|
|
256
|
-
if not isinstance(ref.root, dsl.Application)
|
|
257
|
-
], # type: ignore
|
|
258
|
-
)
|
|
259
|
-
|
|
260
|
-
lookup_map[dsl_application.id] = dsl_application
|
|
261
|
-
|
|
262
|
-
return lookup_map
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
def _is_dsl_type(type_obj: Any) -> bool:
|
|
266
|
-
"""Check if a type is a DSL type that should be converted to semantic."""
|
|
267
|
-
if not hasattr(type_obj, "__name__"):
|
|
268
|
-
return False
|
|
269
|
-
|
|
270
|
-
# Check if it's defined in the DSL module
|
|
271
|
-
return (
|
|
272
|
-
hasattr(type_obj, "__module__")
|
|
273
|
-
and (
|
|
274
|
-
type_obj.__module__ == dsl.__name__
|
|
275
|
-
or type_obj.__module__ == base_types.__name__
|
|
276
|
-
)
|
|
277
|
-
and not type_obj.__name__.startswith("_")
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
def _resolve_forward_ref(field_type: Any) -> Any:
|
|
282
|
-
"""
|
|
283
|
-
Resolve a ForwardRef type to its actual type.
|
|
284
|
-
This is used to handle cases where the type is a string that refers to a class.
|
|
285
|
-
"""
|
|
286
|
-
if hasattr(field_type, "__forward_arg__"):
|
|
287
|
-
# Extract the string from ForwardRef and process it
|
|
288
|
-
forward_ref_str = field_type.__forward_arg__
|
|
289
|
-
# Use eval to get the actual type from the string
|
|
290
|
-
return eval(forward_ref_str, dict(vars(dsl)))
|
|
291
|
-
return field_type
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def _is_union(type: Any) -> bool:
|
|
295
|
-
"""
|
|
296
|
-
Indicates if the provided type is a Union type.
|
|
297
|
-
"""
|
|
298
|
-
origin = get_origin(type)
|
|
299
|
-
return origin is Union or (
|
|
300
|
-
hasattr(type, "__class__") and type.__class__.__name__ == "UnionType"
|
|
301
|
-
)
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def _is_reference_type(field_type: Any) -> bool:
|
|
305
|
-
"""
|
|
306
|
-
Indicates if the provided type can be a reference -- i.e., a union between a dsl type and a string.
|
|
307
|
-
"""
|
|
308
|
-
field_type = _resolve_forward_ref(field_type)
|
|
309
|
-
|
|
310
|
-
if _is_union(field_type):
|
|
311
|
-
args = get_args(field_type)
|
|
312
|
-
has_str = any(arg is str for arg in args)
|
|
313
|
-
has_dsl_type = any(_is_dsl_type(arg) for arg in args)
|
|
314
|
-
return has_str and has_dsl_type
|
|
315
|
-
else:
|
|
316
|
-
return False
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
def _resolve_id_references(
|
|
320
|
-
dslobj: qtype.dsl.domain_types.StrictBaseModel | str,
|
|
321
|
-
lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
|
|
322
|
-
) -> Any:
|
|
323
|
-
"""
|
|
324
|
-
Resolves ID references in a DSL object such that all references are replaced with the actual object.
|
|
325
|
-
"""
|
|
326
|
-
|
|
327
|
-
if isinstance(dslobj, str):
|
|
328
|
-
# If the object is a string, we assume it is an ID and look it up in the map.
|
|
329
|
-
if dslobj in lookup_map:
|
|
330
|
-
return lookup_map[dslobj]
|
|
331
|
-
else:
|
|
332
|
-
raise ReferenceNotFoundError(dslobj)
|
|
333
|
-
|
|
334
|
-
# iterate over all fields in the object
|
|
335
|
-
def lookup_reference(val: str, typ: Any) -> Any:
|
|
336
|
-
if (
|
|
337
|
-
isinstance(val, str)
|
|
338
|
-
and _is_reference_type(typ)
|
|
339
|
-
and not _is_dsl_type(type(val))
|
|
340
|
-
):
|
|
341
|
-
if val in lookup_map:
|
|
342
|
-
return lookup_map[val]
|
|
343
|
-
else:
|
|
344
|
-
raise ReferenceNotFoundError(val, str(typ))
|
|
345
|
-
return val
|
|
346
|
-
|
|
347
|
-
for field_name, field_value in dslobj:
|
|
348
|
-
field_info = dslobj.__class__.model_fields[field_name]
|
|
349
|
-
field_type = _resolve_forward_ref(field_info.annotation)
|
|
350
|
-
|
|
351
|
-
if isinstance(field_value, list):
|
|
352
|
-
# If the field value is a list, resolve each item in the list
|
|
353
|
-
# Get the type of the items of the list
|
|
354
|
-
field_type = field_type.__args__[0] # type: ignore
|
|
355
|
-
if (
|
|
356
|
-
get_origin(field_type) is list
|
|
357
|
-
): # handles case where we have list[Class] | None -- in this case field_type is Union and item_type is now the list...
|
|
358
|
-
field_type = field_type.__args__[0]
|
|
359
|
-
resolved_list = [
|
|
360
|
-
lookup_reference(item, field_type) # type: ignore
|
|
361
|
-
for item in field_value
|
|
362
|
-
]
|
|
363
|
-
setattr(dslobj, field_name, resolved_list)
|
|
364
|
-
elif isinstance(field_value, dict):
|
|
365
|
-
field_type = field_type.__args__[0]
|
|
366
|
-
if (
|
|
367
|
-
get_origin(field_type) is dict
|
|
368
|
-
): # handles case where we have dict[Class] | None -- in this case field_type is Union and item_type is now the dict...
|
|
369
|
-
field_type = field_type.__args__[1]
|
|
370
|
-
# If the field value is a dict, resolve each value in the dict
|
|
371
|
-
resolved_dict = {
|
|
372
|
-
k: lookup_reference(v, field_type) # type: ignore
|
|
373
|
-
for k, v in field_value.items()
|
|
374
|
-
}
|
|
375
|
-
setattr(dslobj, field_name, resolved_dict)
|
|
376
|
-
elif field_value is None:
|
|
377
|
-
# Convert lst | None to an empty list
|
|
378
|
-
# and dict | None to an empty dict
|
|
379
|
-
if _is_union(field_type):
|
|
380
|
-
args = field_type.__args__ # type: ignore
|
|
381
|
-
if any(str(arg).startswith("list") for arg in args):
|
|
382
|
-
setattr(dslobj, field_name, [])
|
|
383
|
-
elif any(str(arg).startswith("dict") for arg in args):
|
|
384
|
-
setattr(dslobj, field_name, {})
|
|
385
|
-
else:
|
|
386
|
-
setattr(
|
|
387
|
-
dslobj, field_name, lookup_reference(field_value, field_type)
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
return dslobj
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def validate(
|
|
394
|
-
dsl_application: dsl.Application,
|
|
395
|
-
) -> dsl.Application:
|
|
396
|
-
"""
|
|
397
|
-
Validates the semantics of a DSL Application and returns a copy of it with all
|
|
398
|
-
internal references resolved to their actual objects.
|
|
399
|
-
Args:
|
|
400
|
-
dsl_application: The DSL Application to validate.
|
|
401
|
-
Returns:
|
|
402
|
-
dsl.Application: A copy of the DSL Application with all internal references resolved.
|
|
403
|
-
Throws:
|
|
404
|
-
SemanticResolutionError: If there are semantic errors in the DSL Application.
|
|
405
|
-
"""
|
|
406
|
-
|
|
407
|
-
# First, make a lookup map of all objects in the DSL Application.
|
|
408
|
-
# This ensures that all object ids are unique.
|
|
409
|
-
lookup_map = _build_lookup_maps(dsl_application)
|
|
410
|
-
|
|
411
|
-
# If any flows have no steps, we raise an error.
|
|
412
|
-
for flow in dsl_application.flows or []:
|
|
413
|
-
if not flow.steps:
|
|
414
|
-
raise FlowHasNoStepsError(flow.id)
|
|
415
|
-
# If any flow doesn't have inputs, copy the inputs from the first step.
|
|
416
|
-
if not flow.inputs:
|
|
417
|
-
first_step = (
|
|
418
|
-
lookup_map[flow.steps[0]]
|
|
419
|
-
if isinstance(flow.steps[0], str)
|
|
420
|
-
else flow.steps[0]
|
|
421
|
-
)
|
|
422
|
-
flow.inputs = first_step.inputs or [] # type: ignore
|
|
423
|
-
|
|
424
|
-
# If any flow doesn't have outputs, copy them from the last step.
|
|
425
|
-
if not flow.outputs:
|
|
426
|
-
last_step = (
|
|
427
|
-
lookup_map[flow.steps[-1]]
|
|
428
|
-
if isinstance(flow.steps[-1], str)
|
|
429
|
-
else flow.steps[-1]
|
|
430
|
-
)
|
|
431
|
-
flow.outputs = last_step.outputs or [] # type: ignore
|
|
432
|
-
|
|
433
|
-
# Now we resolve all ID references in the DSL Application.
|
|
434
|
-
lookup_map = {
|
|
435
|
-
obj_id: _resolve_id_references(obj, lookup_map)
|
|
436
|
-
for obj_id, obj in lookup_map.items()
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
# If any chat flow doesn't have an input variable that is a chat message, raise an error.
|
|
440
|
-
for flow in dsl_application.flows or []:
|
|
441
|
-
if flow.mode == "Chat":
|
|
442
|
-
inputs = flow.inputs or []
|
|
443
|
-
if not any(
|
|
444
|
-
input_var.type == qtype.dsl.domain_types.ChatMessage
|
|
445
|
-
for input_var in inputs
|
|
446
|
-
if isinstance(input_var, dsl.Variable)
|
|
447
|
-
):
|
|
448
|
-
raise QTypeValidationError(
|
|
449
|
-
f"Chat flow {flow.id} must have at least one input variable of type ChatMessage."
|
|
450
|
-
)
|
|
451
|
-
if (
|
|
452
|
-
not flow.outputs
|
|
453
|
-
or len(flow.outputs) != 1
|
|
454
|
-
or (
|
|
455
|
-
isinstance(flow.outputs[0], dsl.Variable)
|
|
456
|
-
and flow.outputs[0].type
|
|
457
|
-
!= qtype.dsl.domain_types.ChatMessage
|
|
458
|
-
)
|
|
459
|
-
):
|
|
460
|
-
raise QTypeValidationError(
|
|
461
|
-
f"Chat flow {flow.id} must have exactly one output variable of type ChatMessage."
|
|
462
|
-
)
|
|
463
|
-
|
|
464
|
-
return dsl_application
|
|
File without changes
|
qtype/interpreter/batch/flow.py
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Any, Tuple
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from qtype.interpreter.batch.step import batch_execute_step
|
|
9
|
-
from qtype.interpreter.batch.types import BatchConfig
|
|
10
|
-
from qtype.interpreter.batch.utils import reconcile_results_and_errors
|
|
11
|
-
from qtype.semantic.model import Flow, Sink
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def batch_execute_flow(
|
|
17
|
-
flow: Flow,
|
|
18
|
-
inputs: pd.DataFrame,
|
|
19
|
-
batch_config: BatchConfig,
|
|
20
|
-
**kwargs: dict[Any, Any],
|
|
21
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
22
|
-
"""Executes a flow in a batch context.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
flow: The flow to execute.
|
|
26
|
-
batch_config: The batch configuration to use.
|
|
27
|
-
**kwargs: Additional keyword arguments to pass to the flow.
|
|
28
|
-
|
|
29
|
-
Returns:
|
|
30
|
-
A list of output variables produced by the flow.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
previous_outputs = inputs
|
|
34
|
-
|
|
35
|
-
all_errors = []
|
|
36
|
-
|
|
37
|
-
# Iterate over each step in the flow
|
|
38
|
-
for step in flow.steps:
|
|
39
|
-
results: list[pd.DataFrame] = []
|
|
40
|
-
errors: list[pd.DataFrame] = []
|
|
41
|
-
|
|
42
|
-
if isinstance(step, Sink):
|
|
43
|
-
# Send the entire batch to the sink
|
|
44
|
-
batch_results, batch_errors = batch_execute_step(
|
|
45
|
-
step, previous_outputs, batch_config
|
|
46
|
-
)
|
|
47
|
-
results.append(batch_results)
|
|
48
|
-
if len(batch_errors) > 1:
|
|
49
|
-
errors.append(batch_errors)
|
|
50
|
-
else:
|
|
51
|
-
# batch the current data into dataframes of max size batch_size
|
|
52
|
-
batch_size = batch_config.batch_size
|
|
53
|
-
for start in range(0, len(previous_outputs), batch_size):
|
|
54
|
-
end = start + batch_size
|
|
55
|
-
batch = previous_outputs.iloc[start:end]
|
|
56
|
-
# Execute the step with the current batch
|
|
57
|
-
batch_results, batch_errors = batch_execute_step(
|
|
58
|
-
step, batch, batch_config
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
results.append(batch_results)
|
|
62
|
-
if len(batch_errors) > 1:
|
|
63
|
-
errors.append(batch_errors)
|
|
64
|
-
|
|
65
|
-
previous_outputs, errors_df = reconcile_results_and_errors(
|
|
66
|
-
results, errors
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
if len(errors_df):
|
|
70
|
-
all_errors.append(errors_df)
|
|
71
|
-
if batch_config.write_errors_to:
|
|
72
|
-
output_file = (
|
|
73
|
-
f"{batch_config.write_errors_to}/{step.id}.errors.parquet"
|
|
74
|
-
)
|
|
75
|
-
try:
|
|
76
|
-
errors_df.to_parquet(
|
|
77
|
-
output_file, engine="pyarrow", compression="snappy"
|
|
78
|
-
)
|
|
79
|
-
logging.info(
|
|
80
|
-
f"Saved errors for step {step.id} to {output_file}"
|
|
81
|
-
)
|
|
82
|
-
except Exception as e:
|
|
83
|
-
logging.warning(
|
|
84
|
-
f"Could not save errors step {step.id} to {output_file}",
|
|
85
|
-
exc_info=e,
|
|
86
|
-
stack_info=True,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
# Return the last steps results and errors
|
|
90
|
-
rv_errors = (
|
|
91
|
-
pd.concat(all_errors, ignore_index=True)
|
|
92
|
-
if len(all_errors)
|
|
93
|
-
else pd.DataFrame({})
|
|
94
|
-
)
|
|
95
|
-
return previous_outputs, rv_errors
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
from typing import Any, Tuple
|
|
2
|
-
|
|
3
|
-
import boto3 # type: ignore[import-untyped]
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import sqlalchemy
|
|
6
|
-
from sqlalchemy import create_engine
|
|
7
|
-
from sqlalchemy.exc import SQLAlchemyError
|
|
8
|
-
|
|
9
|
-
from qtype.base.exceptions import InterpreterError
|
|
10
|
-
from qtype.interpreter.auth.generic import auth
|
|
11
|
-
from qtype.interpreter.batch.types import BatchConfig, ErrorMode
|
|
12
|
-
from qtype.interpreter.batch.utils import (
|
|
13
|
-
reconcile_results_and_errors,
|
|
14
|
-
validate_inputs,
|
|
15
|
-
)
|
|
16
|
-
from qtype.semantic.model import SQLSource
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def to_output_columns(
|
|
20
|
-
df: pd.DataFrame, output_columns: set[str]
|
|
21
|
-
) -> pd.DataFrame:
|
|
22
|
-
"""Filters the DataFrame to only include specified output columns.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
df: The input DataFrame.
|
|
26
|
-
output_columns: A set of column names to retain in the DataFrame.
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
A DataFrame containing only the specified output columns.
|
|
30
|
-
"""
|
|
31
|
-
if len(df) == 0:
|
|
32
|
-
return df
|
|
33
|
-
missing = output_columns - set(df.columns)
|
|
34
|
-
if missing:
|
|
35
|
-
raise InterpreterError(
|
|
36
|
-
f"SQL Result was missing expected columns: {','.join(missing)}, it has columns: {','.join(df.columns)}"
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
return df[[col for col in df.columns if col in output_columns]]
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def execute_sql_source(
|
|
43
|
-
step: SQLSource,
|
|
44
|
-
inputs: pd.DataFrame,
|
|
45
|
-
batch_config: BatchConfig,
|
|
46
|
-
**kwargs: dict[Any, Any],
|
|
47
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
48
|
-
"""Executes a SQLSource step to retrieve data from a SQL database.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
step: The SQLSource step to execute.
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
A tuple containing two DataFrames:
|
|
55
|
-
- The first DataFrame contains the successfully retrieved data.
|
|
56
|
-
- The second DataFrame contains rows that encountered errors with an 'error' column.
|
|
57
|
-
"""
|
|
58
|
-
# Create a database engine
|
|
59
|
-
validate_inputs(inputs, step)
|
|
60
|
-
|
|
61
|
-
connect_args = {}
|
|
62
|
-
if step.auth:
|
|
63
|
-
with auth(step.auth) as creds:
|
|
64
|
-
if isinstance(creds, boto3.Session):
|
|
65
|
-
connect_args["session"] = creds
|
|
66
|
-
engine = create_engine(step.connection, connect_args=connect_args)
|
|
67
|
-
|
|
68
|
-
output_columns = {output.id for output in step.outputs}
|
|
69
|
-
|
|
70
|
-
results = []
|
|
71
|
-
errors = []
|
|
72
|
-
step_inputs = {i.id for i in step.inputs}
|
|
73
|
-
for _, row in inputs.iterrows():
|
|
74
|
-
try:
|
|
75
|
-
# Make a dictionary of column_name: value from row
|
|
76
|
-
params = {col: row[col] for col in row.index if col in step_inputs}
|
|
77
|
-
# Execute the query and fetch the results into a DataFrame
|
|
78
|
-
with engine.connect() as connection:
|
|
79
|
-
result = connection.execute(
|
|
80
|
-
sqlalchemy.text(step.query),
|
|
81
|
-
parameters=params if len(params) else None,
|
|
82
|
-
)
|
|
83
|
-
df = pd.DataFrame(
|
|
84
|
-
result.fetchall(), columns=list(result.keys())
|
|
85
|
-
)
|
|
86
|
-
df = to_output_columns(df, output_columns)
|
|
87
|
-
results.append(df)
|
|
88
|
-
except SQLAlchemyError as e:
|
|
89
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
90
|
-
raise e
|
|
91
|
-
# If there's an error, return an empty DataFrame and the error message
|
|
92
|
-
error_df = pd.DataFrame([{"error": str(e)}])
|
|
93
|
-
errors.append(error_df)
|
|
94
|
-
|
|
95
|
-
return reconcile_results_and_errors(results, errors)
|