qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +751 -263
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +471 -22
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +173 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
  102. qtype-0.1.3.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
qtype/dsl/linker.py ADDED
@@ -0,0 +1,384 @@
1
+ from typing import Any, Dict, Type
2
+
3
+ from pydantic import BaseModel, RootModel
4
+
5
+ import qtype.base.types as base_types
6
+ import qtype.dsl.domain_types
7
+ import qtype.dsl.model as dsl
8
+
9
+
10
+ class QTypeValidationError(Exception):
11
+ """Raised when there's an error during QType validation."""
12
+
13
+ pass
14
+
15
+
16
+ class DuplicateComponentError(QTypeValidationError):
17
+ """Raised when there are duplicate components with the same ID."""
18
+
19
+ def __init__(
20
+ self,
21
+ obj_id: str,
22
+ found_obj: qtype.dsl.domain_types.StrictBaseModel,
23
+ existing_obj: qtype.dsl.domain_types.StrictBaseModel,
24
+ ):
25
+ super().__init__(
26
+ f"Duplicate component with ID {obj_id} found:\n"
27
+ + str(found_obj.model_dump_json())
28
+ + "\nAlready exists:\n"
29
+ + str(existing_obj.model_dump_json())
30
+ )
31
+
32
+
33
+ class ComponentNotFoundError(QTypeValidationError):
34
+ """Raised when a component is not found in the DSL Application."""
35
+
36
+ def __init__(self, component_name: str):
37
+ super().__init__(
38
+ f"Component with name '{component_name}' not found in the DSL Application."
39
+ )
40
+
41
+
42
+ class ReferenceNotFoundError(QTypeValidationError):
43
+ """Raised when a reference is not found in the lookup map."""
44
+
45
+ def __init__(
46
+ self,
47
+ reference: str,
48
+ type_hint: str | None = None,
49
+ available_refs: list[str] | None = None,
50
+ ):
51
+ if type_hint:
52
+ msg = (
53
+ f"Reference '{reference}' not found for type '{type_hint}'.\n"
54
+ )
55
+ else:
56
+ msg = f"Reference '{reference}' not found.\n"
57
+
58
+ # Add helpful suggestions if we have available references
59
+ if available_refs:
60
+ # Find similar names
61
+ similar = [
62
+ ref
63
+ for ref in available_refs
64
+ if reference.lower() in ref.lower()
65
+ or ref.lower() in reference.lower()
66
+ ]
67
+ if similar:
68
+ msg += f"Did you mean one of these? {', '.join(similar[:5])}"
69
+ elif len(available_refs) <= 10:
70
+ msg += f"Available references: {', '.join(available_refs)}"
71
+ else:
72
+ msg += (
73
+ f"There are {len(available_refs)} available "
74
+ "references. Check your spelling."
75
+ )
76
+
77
+ super().__init__(msg)
78
+
79
+
80
+ def _update_map_with_unique_check(
81
+ current_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
82
+ new_objects: list[qtype.dsl.domain_types.StrictBaseModel],
83
+ ) -> None:
84
+ """
85
+ Update a map with new objects, ensuring unique IDs.
86
+
87
+ Args:
88
+ current_map: The current map of objects by ID.
89
+ new_objects: List of new objects to add to the map.
90
+
91
+ Returns:
92
+ Updated map with new objects added, ensuring unique IDs.
93
+ """
94
+ for obj in new_objects:
95
+ if obj is None:
96
+ # If the object is None, we skip it.
97
+ continue
98
+ if isinstance(obj, str) or isinstance(obj, base_types.Reference):
99
+ # If the object is a string, we assume it is an ID and skip it.
100
+ # This is a special case where we do not want to add the string itself.
101
+ continue
102
+ # Note: There is no current abstraction for the `id` field, so we assume it exists.
103
+ obj_id = obj.id # type: ignore[attr-defined]
104
+ # If the object already exists in the map, we check if it is the same object.
105
+ # If it is not the same object, we raise an error.
106
+ # This ensures that we do not have duplicate components with the same ID.
107
+ if obj_id in current_map and id(current_map[obj_id]) != id(obj):
108
+ raise DuplicateComponentError(obj.id, obj, current_map[obj_id]) # type: ignore
109
+ else:
110
+ current_map[obj_id] = obj
111
+
112
+
113
+ def _collect_components_from_object(
114
+ obj: qtype.dsl.domain_types.StrictBaseModel,
115
+ ) -> list[qtype.dsl.domain_types.StrictBaseModel]:
116
+ """
117
+ Collect all components from an object that have IDs.
118
+ This includes the object itself and any nested components.
119
+
120
+ Args:
121
+ obj: The object to extract components from.
122
+
123
+ Returns:
124
+ List of components with IDs.
125
+ """
126
+ components = []
127
+
128
+ # Add the object itself if it has an ID
129
+ if hasattr(obj, "id"):
130
+ components.append(obj)
131
+
132
+ # For Flow, also collect embedded steps, inputs, and outputs
133
+ if isinstance(obj, dsl.Flow):
134
+ components.extend(obj.steps or []) # type: ignore
135
+ components.extend(obj.variables or []) # type: ignore
136
+
137
+ return components
138
+
139
+
140
+ def _update_maps_with_embedded_objects(
141
+ lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel],
142
+ embedded_objects: list[qtype.dsl.domain_types.StrictBaseModel],
143
+ ) -> None:
144
+ """
145
+ Update lookup maps with embedded objects.
146
+ Embedded objects are when the user specifies the object and not just the ID.
147
+
148
+ Args:
149
+ lookup_maps: The current lookup maps to update.
150
+ embedded_objects: List of embedded objects to add to the maps.
151
+ """
152
+ for obj in embedded_objects:
153
+ components = _collect_components_from_object(obj)
154
+ _update_map_with_unique_check(lookup_map, components)
155
+
156
+
157
+ def _build_lookup_maps(
158
+ document: Any,
159
+ lookup_map: Dict[str, qtype.dsl.domain_types.StrictBaseModel]
160
+ | None = None,
161
+ ) -> Dict[str, qtype.dsl.domain_types.StrictBaseModel]:
162
+ """
163
+ Build lookup map for all objects in a DSL Document.
164
+ This function creates a dictionary of id -> component, where each key is a
165
+ component id and the value is the component.
166
+
167
+ Works with any Document type (Application, Flow, *List types, etc.).
168
+
169
+ Args:
170
+ document: The DSL Document to build lookup maps for.
171
+ Can be Application, Flow, or any RootModel list type.
172
+
173
+ Returns:
174
+ Dict[str, dsl.StrictBaseModel]: A dictionary of lookup maps
175
+
176
+ Throws:
177
+ QTypeValidationError: If there are duplicate components with the same ID.
178
+ """
179
+ if lookup_map is None:
180
+ lookup_map = {}
181
+
182
+ # Handle Application specially since it has multiple component lists
183
+ if isinstance(document, dsl.Application):
184
+ component_names = {
185
+ f
186
+ for f in dsl.Application.model_fields.keys()
187
+ if f not in {"id", "references", "description"}
188
+ }
189
+
190
+ for component_name in component_names:
191
+ if not hasattr(document, component_name):
192
+ raise ComponentNotFoundError(component_name)
193
+ components = getattr(document, component_name) or []
194
+ if not isinstance(components, list):
195
+ components = [components] # Ensure we have a list
196
+ _update_map_with_unique_check(lookup_map, components)
197
+ _update_maps_with_embedded_objects(lookup_map, components)
198
+
199
+ # Handle references (which can contain nested Applications or other documents)
200
+ for ref in document.references or []:
201
+ ref = ref.root # type: ignore
202
+ _build_lookup_maps(ref, lookup_map)
203
+
204
+ lookup_map[document.id] = document
205
+
206
+ # Handle RootModel list types (e.g., AuthorizationProviderList, IndexList, etc.)
207
+ elif hasattr(document, "root") and isinstance(
208
+ getattr(document, "root"), list
209
+ ):
210
+ root_list = getattr(document, "root")
211
+ _update_map_with_unique_check(lookup_map, root_list)
212
+ _update_maps_with_embedded_objects(lookup_map, root_list)
213
+
214
+ # Handle single component documents (e.g., Flow, Agent, etc.)
215
+ else:
216
+ components = _collect_components_from_object(document)
217
+ _update_map_with_unique_check(lookup_map, components)
218
+
219
+ return lookup_map
220
+
221
+
222
+ def _resolve_reference(
223
+ ref: str, type_hint: Type, lookup_map: Dict[str, Any]
224
+ ) -> Any:
225
+ """
226
+ Resolve a single reference string to its object.
227
+
228
+ Args:
229
+ ref: The reference ID to resolve
230
+ type_hint: Type hint for better error messages
231
+ lookup_map: Map of component IDs to objects
232
+
233
+ Returns:
234
+ The resolved object
235
+
236
+ Raises:
237
+ ReferenceNotFoundError: If the reference cannot be found
238
+ """
239
+ resolved_obj = lookup_map.get(ref)
240
+ if resolved_obj is None:
241
+ available_refs = list(lookup_map.keys())
242
+ raise ReferenceNotFoundError(ref, str(type_hint), available_refs)
243
+ return resolved_obj
244
+
245
+
246
+ def _resolve_rootmodel_references(
247
+ model: RootModel, lookup_map: Dict[str, Any]
248
+ ) -> None:
249
+ """
250
+ Resolve references in a RootModel (list-based documents).
251
+
252
+ Args:
253
+ model: RootModel instance to resolve
254
+ lookup_map: Map of component IDs to objects
255
+ """
256
+ root_list = model.root # type: ignore
257
+ if not isinstance(root_list, list):
258
+ return
259
+
260
+ for i, item in enumerate(root_list):
261
+ match item:
262
+ case base_types.Reference():
263
+ root_list[i] = _resolve_reference(
264
+ item.ref, type(item), lookup_map
265
+ )
266
+ case BaseModel():
267
+ _resolve_all_references(item, lookup_map)
268
+
269
+
270
+ def _resolve_list_references(
271
+ field_value: list, lookup_map: Dict[str, Any]
272
+ ) -> None:
273
+ """
274
+ Resolve references within a list field.
275
+
276
+ Args:
277
+ field_value: List to process
278
+ lookup_map: Map of component IDs to objects
279
+ """
280
+ for i, item in enumerate(field_value):
281
+ match item:
282
+ case base_types.Reference():
283
+ field_value[i] = _resolve_reference(
284
+ item.ref, type(item), lookup_map
285
+ )
286
+ case BaseModel():
287
+ _resolve_all_references(item, lookup_map)
288
+
289
+
290
+ def _resolve_dict_references(
291
+ field_value: dict, lookup_map: Dict[str, Any]
292
+ ) -> None:
293
+ """
294
+ Resolve references within a dict field.
295
+
296
+ Args:
297
+ field_value: Dict to process
298
+ lookup_map: Map of component IDs to objects
299
+ """
300
+ for k, v in field_value.items():
301
+ match v:
302
+ case base_types.Reference():
303
+ field_value[k] = _resolve_reference(v.ref, type(v), lookup_map)
304
+ case BaseModel():
305
+ _resolve_all_references(v, lookup_map)
306
+
307
+
308
+ def _resolve_all_references(
309
+ model: BaseModel,
310
+ lookup_map: Dict[str, Any],
311
+ ) -> None:
312
+ """
313
+ Walk a Pydantic model tree and resolve all Reference objects.
314
+
315
+ Args:
316
+ model: The model to process
317
+ lookup_map: Map of component IDs to objects
318
+ """
319
+ # Check if this is a RootModel (list-based document like ModelList, ToolList, etc.)
320
+ if isinstance(model, RootModel):
321
+ _resolve_rootmodel_references(model, lookup_map)
322
+ return
323
+
324
+ # For regular BaseModel types, iterate over fields
325
+ for field_name, field_value in model.__iter__():
326
+ match field_value:
327
+ case base_types.Reference():
328
+ setattr(
329
+ model,
330
+ field_name,
331
+ _resolve_reference(
332
+ field_value.ref, type(field_value), lookup_map
333
+ ),
334
+ )
335
+ case BaseModel():
336
+ _resolve_all_references(field_value, lookup_map)
337
+ case list() if len(field_value) > 0:
338
+ _resolve_list_references(field_value, lookup_map)
339
+ case dict():
340
+ _resolve_dict_references(field_value, lookup_map)
341
+
342
+
343
+ def link(document: dsl.DocumentType) -> dsl.DocumentType:
344
+ """
345
+ Links (resolves) all ID references in a DSL Document to their actual objects.
346
+
347
+ Works with any DocumentType:
348
+ - Application: Full application with all components
349
+ - Flow: Individual flow definition
350
+ - Agent: Individual agent definition
351
+ - AuthorizationProviderList: List of authorization providers
352
+ - IndexList: List of indexes
353
+ - ModelList: List of models
354
+ - ToolList: List of tools
355
+ - TypeList: List of custom types
356
+ - VariableList: List of variables
357
+
358
+ IMPORTANT: The returned object breaks the type safety of the original.
359
+ All Reference[T] fields will be replaced with actual T objects, which
360
+ violates the original type signatures. This is intentional for the
361
+ linking phase before transformation to semantic IR.
362
+
363
+ Args:
364
+ document: Any valid DSL DocumentType (one of the 9 possible document structures).
365
+
366
+ Returns:
367
+ The same document with all internal references resolved to actual objects.
368
+
369
+ Raises:
370
+ DuplicateComponentError: If there are duplicate components with the same ID.
371
+ ReferenceNotFoundError: If a reference cannot be resolved.
372
+ ComponentNotFoundError: If an expected component is missing.
373
+ """
374
+
375
+ # First, make a lookup map of all objects in the document.
376
+ # This ensures that all object ids are unique.
377
+ lookup_map = _build_lookup_maps(document)
378
+
379
+ # Now we resolve all ID references in the document.
380
+ # All DocumentType variants are BaseModel instances (including RootModel-based *List types)
381
+ if isinstance(document, BaseModel):
382
+ _resolve_all_references(document, lookup_map)
383
+
384
+ return document # type: ignore[return-value]
qtype/dsl/loader.py ADDED
@@ -0,0 +1,315 @@
1
+ """
2
+ YAML loading with environment variable and file inclusion support.
3
+
4
+ This module provides two explicit functions for loading YAML:
5
+ - load_yaml_file(path): Load YAML from a file path or URI
6
+ - load_yaml_string(content, base_path): Load YAML from a string
7
+
8
+ Both support:
9
+ - Environment variable substitution (${VAR} or ${VAR:default})
10
+ - File inclusion (!include and !include_raw)
11
+ - Multiple URI schemes via fsspec (local, http, s3, etc.)
12
+
13
+ Example:
14
+ # Load from file
15
+ data = load_yaml_file("config.yaml")
16
+ data = load_yaml_file("s3://bucket/config.yaml")
17
+
18
+ # Load from string
19
+ yaml_content = "name: test\\nvalue: ${ENV_VAR}"
20
+ data = load_yaml_string(yaml_content)
21
+
22
+ # Load string with includes (requires base_path)
23
+ data = load_yaml_string(yaml_content, base_path="/path/to/configs")
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import re
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ import fsspec
34
+ import yaml
35
+ from dotenv import load_dotenv
36
+
37
+
38
+ class YAMLLoadError(Exception):
39
+ """Error during YAML loading or parsing."""
40
+
41
+ def __init__(
42
+ self,
43
+ message: str,
44
+ line: int | None = None,
45
+ column: int | None = None,
46
+ source: str | None = None,
47
+ original_error: Exception | None = None,
48
+ ) -> None:
49
+ self.message = message
50
+ self.line = line
51
+ self.column = column
52
+ self.source = source
53
+ self.original_error = original_error
54
+ super().__init__(self._format_message())
55
+
56
+ def _format_message(self) -> str:
57
+ """Format error message with location information."""
58
+ parts = []
59
+ if self.source:
60
+ parts.append(f"in {self.source}")
61
+ if self.line is not None:
62
+ location = f"line {self.line + 1}"
63
+ if self.column is not None:
64
+ location += f", column {self.column + 1}"
65
+ parts.append(location)
66
+
67
+ if parts:
68
+ return f"{self.message} ({', '.join(parts)})"
69
+ return self.message
70
+
71
+
72
+ class YAMLLoader(yaml.SafeLoader):
73
+ """YAML loader with env var substitution and file inclusion."""
74
+
75
+ def __init__(self, stream: Any, base_path: str | None = None) -> None:
76
+ super().__init__(stream)
77
+ self.base_path = base_path or str(Path.cwd())
78
+
79
+
80
+ def _substitute_env_vars(value: str) -> str:
81
+ """
82
+ Substitute environment variables in a string.
83
+
84
+ Supports ${VAR_NAME} or ${VAR_NAME:default} syntax.
85
+
86
+ Args:
87
+ value: String containing environment variable references
88
+
89
+ Returns:
90
+ String with environment variables substituted
91
+
92
+ Raises:
93
+ ValueError: If required environment variable is not found
94
+ """
95
+ pattern = r"\$\{([^}:]+)(?::([^}]*))?\}"
96
+
97
+ def replace_env_var(match: re.Match[str]) -> str:
98
+ var_name = match.group(1)
99
+ default_value = match.group(2)
100
+
101
+ env_value = os.getenv(var_name)
102
+
103
+ if env_value is not None:
104
+ return env_value
105
+ elif default_value is not None:
106
+ return default_value
107
+ else:
108
+ raise ValueError(
109
+ f"Environment variable '{var_name}' is required but not set"
110
+ )
111
+
112
+ return re.sub(pattern, replace_env_var, value)
113
+
114
+
115
+ def _resolve_path(base_path: str, target_path: str) -> str:
116
+ """
117
+ Resolve a target path relative to base path.
118
+
119
+ Uses fsspec's URL joining logic which handles both local paths and URIs.
120
+
121
+ Args:
122
+ base_path: Base path or URI
123
+ target_path: Target path to resolve (relative or absolute)
124
+
125
+ Returns:
126
+ Resolved absolute path or URI
127
+ """
128
+ # If target is already absolute (has scheme or starts with /), use as-is
129
+ from urllib.parse import urljoin, urlparse
130
+
131
+ parsed = urlparse(target_path)
132
+ if parsed.scheme or target_path.startswith("/"):
133
+ return target_path
134
+
135
+ # Check if base is URL-like
136
+ base_parsed = urlparse(base_path)
137
+ if base_parsed.scheme:
138
+ # URL-based resolution
139
+ return urljoin(base_path, target_path)
140
+ else:
141
+ # Local file resolution
142
+ base_path_obj = Path(base_path)
143
+ if not base_path_obj.is_dir():
144
+ base_path_obj = base_path_obj.parent
145
+ return str(base_path_obj / target_path)
146
+
147
+
148
+ def _env_var_constructor(loader: YAMLLoader, node: yaml.ScalarNode) -> str:
149
+ """Constructor for environment variable substitution."""
150
+ value = loader.construct_scalar(node)
151
+ return _substitute_env_vars(value)
152
+
153
+
154
+ def _include_constructor(loader: YAMLLoader, node: yaml.ScalarNode) -> Any:
155
+ """Constructor for !include tag to load external YAML files."""
156
+ file_path = loader.construct_scalar(node)
157
+ resolved_path = _resolve_path(loader.base_path, file_path)
158
+
159
+ try:
160
+ with fsspec.open(resolved_path, "r", encoding="utf-8") as f:
161
+ content = f.read() # type: ignore[misc]
162
+ # Create a partial function to pass base_path to YAMLLoader
163
+ from functools import partial
164
+
165
+ loader_class = partial(YAMLLoader, base_path=resolved_path)
166
+ return yaml.load(content, loader_class) # type: ignore[arg-type]
167
+ except (FileNotFoundError, IOError, OSError) as e:
168
+ raise FileNotFoundError(
169
+ f"Failed to load included file '{resolved_path}': {e}"
170
+ ) from e
171
+
172
+
173
+ def _include_raw_constructor(loader: YAMLLoader, node: yaml.ScalarNode) -> str:
174
+ """Constructor for !include_raw tag to load external text files."""
175
+ file_path = loader.construct_scalar(node)
176
+ resolved_path = _resolve_path(loader.base_path, file_path)
177
+
178
+ try:
179
+ with fsspec.open(resolved_path, "r", encoding="utf-8") as f:
180
+ return f.read() # type: ignore[no-any-return]
181
+ except (FileNotFoundError, IOError, OSError) as e:
182
+ raise FileNotFoundError(
183
+ f"Failed to load included file '{resolved_path}': {e}"
184
+ ) from e
185
+
186
+
187
+ # Register constructors
188
+ YAMLLoader.add_constructor("tag:yaml.org,2002:str", _env_var_constructor)
189
+ YAMLLoader.add_constructor("!include", _include_constructor)
190
+ YAMLLoader.add_constructor("!include_raw", _include_raw_constructor)
191
+
192
+
193
+ def load_yaml_file(path: str | Path) -> dict[str, Any]:
194
+ """
195
+ Load YAML from a file path or URI.
196
+
197
+ Supports multiple URI schemes via fsspec (local files, http, s3, etc.).
198
+ Automatically loads .env files from the source directory.
199
+
200
+ Args:
201
+ path: File path or URI to load
202
+
203
+ Returns:
204
+ Parsed YAML as dictionary
205
+
206
+ Raises:
207
+ YAMLLoadError: If YAML parsing fails
208
+ FileNotFoundError: If file doesn't exist
209
+ ValueError: If required environment variable is missing
210
+ """
211
+ source_str = str(path)
212
+
213
+ # Load .env file if it exists in the source directory
214
+ try:
215
+ from urllib.parse import urlparse
216
+
217
+ parsed = urlparse(source_str)
218
+ if parsed.scheme in ["file", ""]:
219
+ # Local file - load .env from same directory
220
+ source_path = Path(parsed.path if parsed.path else source_str)
221
+ if source_path.is_file():
222
+ env_dir = source_path.parent
223
+ env_file = env_dir / ".env"
224
+ if env_file.exists():
225
+ load_dotenv(env_file)
226
+ except Exception:
227
+ pass
228
+
229
+ # Also try cwd
230
+ load_dotenv()
231
+
232
+ # Load file content
233
+ try:
234
+ with fsspec.open(source_str, "r", encoding="utf-8") as f:
235
+ content = f.read() # type: ignore[misc]
236
+ except FileNotFoundError as e:
237
+ raise FileNotFoundError(f"File not found: {source_str}") from e
238
+
239
+ return _parse_yaml(content, base_path=source_str, source_name=source_str)
240
+
241
+
242
+ def load_yaml_string(
243
+ content: str, base_path: str | Path | None = None
244
+ ) -> dict[str, Any]:
245
+ """
246
+ Load YAML from a string.
247
+
248
+ Args:
249
+ content: Raw YAML content as string
250
+ base_path: Base path for resolving relative includes (default: cwd)
251
+
252
+ Returns:
253
+ Parsed YAML as dictionary
254
+
255
+ Raises:
256
+ YAMLLoadError: If YAML parsing fails
257
+ ValueError: If required environment variable is missing
258
+ """
259
+ load_dotenv()
260
+
261
+ base = str(base_path) if base_path else str(Path.cwd())
262
+ return _parse_yaml(content, base_path=base, source_name="<string>")
263
+
264
+
265
+ def _parse_yaml(
266
+ content: str, base_path: str, source_name: str
267
+ ) -> dict[str, Any]:
268
+ """
269
+ Parse YAML content with environment variable substitution and includes.
270
+
271
+ Args:
272
+ content: YAML content to parse
273
+ base_path: Base path for resolving relative includes
274
+ source_name: Source name for error messages
275
+
276
+ Returns:
277
+ Parsed YAML as dictionary
278
+
279
+ Raises:
280
+ YAMLLoadError: If YAML parsing fails
281
+ """
282
+ try:
283
+ from functools import partial
284
+
285
+ loader_class = partial(YAMLLoader, base_path=base_path)
286
+ result = yaml.load(content, loader_class) # type: ignore[arg-type]
287
+ return result # type: ignore[no-any-return]
288
+ except yaml.YAMLError as e:
289
+ # Extract line/column information if available
290
+ line = None
291
+ column = None
292
+
293
+ if hasattr(e, "problem_mark") and e.problem_mark: # type: ignore[attr-defined]
294
+ line = e.problem_mark.line # type: ignore[attr-defined]
295
+ column = e.problem_mark.column # type: ignore[attr-defined]
296
+
297
+ # Format error message
298
+ error_msg = str(e)
299
+ if hasattr(e, "problem"):
300
+ error_msg = e.problem or error_msg # type: ignore[attr-defined]
301
+
302
+ raise YAMLLoadError(
303
+ message=f"YAML parsing error: {error_msg}",
304
+ line=line,
305
+ column=column,
306
+ source=source_name,
307
+ original_error=e,
308
+ ) from e
309
+ except ValueError as e:
310
+ # Environment variable errors
311
+ raise YAMLLoadError(
312
+ message=str(e),
313
+ source=source_name,
314
+ original_error=e,
315
+ ) from e