hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,1196 @@
1
+ """Simplified YAML Pipeline Builder with Plugin Architecture.
2
+
3
+ Philosophy:
4
+ - Core builder: Minimal YAML → DirectedGraph parser
5
+ - Preprocessing plugins: Environment variables, templating, validation
6
+ - Entity plugins: Macros, nodes (each entity type handles its own YAML)
7
+
8
+ Plugins provide clear value:
9
+ - EnvironmentVariablePlugin: Resolve ${VAR} and ${VAR:default}
10
+ - TemplatePlugin: Jinja2 templating in YAML values
11
+ - SchemaValidationPlugin: Validate schemas before execution
12
+ """
13
+
14
+ import os
15
+ import re
16
+ from contextlib import contextmanager, suppress
17
+ from functools import lru_cache, singledispatch
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING, Any, Protocol, TypeGuard, cast
20
+
21
+ import yaml
22
+ from jinja2 import TemplateSyntaxError, UndefinedError
23
+ from jinja2.sandbox import SandboxedEnvironment
24
+
25
+ from hexdag.core.configurable import ConfigurableMacro
26
+ from hexdag.core.domain.dag import DirectedGraph, NodeSpec
27
+ from hexdag.core.logging import get_logger
28
+ from hexdag.core.pipeline_builder.pipeline_config import PipelineConfig
29
+ from hexdag.core.pipeline_builder.yaml_validator import YamlValidator
30
+ from hexdag.core.resolver import ResolveError, register_runtime, resolve
31
+
32
+ if TYPE_CHECKING:
33
+ from collections.abc import Callable
34
+
35
+ logger = get_logger(__name__)
36
+
37
+
38
+ class YamlPipelineBuilderError(Exception):
39
+ """YAML pipeline building errors."""
40
+
41
+ pass
42
+
43
+
44
+ # ============================================================================
45
+ # Type Guards
46
+ # ============================================================================
47
+
48
+
49
+ def _is_dict_config(value: Any) -> TypeGuard[dict[str, Any]]:
50
+ """Type guard to verify value is a dictionary."""
51
+ return isinstance(value, dict)
52
+
53
+
54
+ # ============================================================================
55
+ # Plugin Protocol
56
+ # ============================================================================
57
+
58
+
59
+ class PreprocessPlugin(Protocol):
60
+ """Plugin for preprocessing YAML before building (env vars, templating, etc.)."""
61
+
62
+ def process(self, config: dict[str, Any]) -> dict[str, Any]:
63
+ """Process entire config, returning modified version."""
64
+ ...
65
+
66
+
67
+ class EntityPlugin(Protocol):
68
+ """Plugin for building specific entity types (macros, nodes, etc.)."""
69
+
70
+ def can_handle(self, node_config: dict[str, Any]) -> bool:
71
+ """Return True if this plugin can handle the node config."""
72
+ ...
73
+
74
+ def build(
75
+ self, node_config: dict[str, Any], builder: "YamlPipelineBuilder", graph: DirectedGraph
76
+ ) -> NodeSpec | None:
77
+ """Build entity from config.
78
+
79
+ Args
80
+ -----
81
+ node_config: The node configuration dictionary.
82
+ builder: The YamlPipelineBuilder instance.
83
+ graph: The DirectedGraph instance.
84
+
85
+ Return NodeSpec or None if handled (e.g., macro merged into graph)."""
86
+ ...
87
+
88
+
89
+ # ============================================================================
90
+ # Core Builder
91
+ # ============================================================================
92
+
93
+
94
+ class YamlPipelineBuilder:
95
+ """YAML → DirectedGraph builder with plugin support.
96
+
97
+ Workflow:
98
+ 1. Parse YAML
99
+ 2. Select environment
100
+ 3. Validate structure
101
+ 4. Run preprocessing plugins (env vars, templates)
102
+ 5. Build graph using entity plugins
103
+ 6. Extract pipeline config
104
+ """
105
+
106
+ def __init__(self, base_path: Path | None = None) -> None:
107
+ """Initialize builder.
108
+
109
+ Args:
110
+ base_path: Base directory for resolving includes (default: cwd)
111
+ """
112
+ self.base_path = base_path or Path.cwd()
113
+ self.validator = YamlValidator()
114
+
115
+ # Plugins
116
+ self.preprocess_plugins: list[PreprocessPlugin] = []
117
+ self.entity_plugins: list[EntityPlugin] = []
118
+
119
+ self._register_default_plugins()
120
+
121
+ def _register_default_plugins(self) -> None:
122
+ """Register default plugins for common use cases."""
123
+ # Preprocessing plugins (run before building)
124
+ self.preprocess_plugins.append(IncludePreprocessPlugin(base_path=self.base_path))
125
+ self.preprocess_plugins.append(EnvironmentVariablePlugin())
126
+ self.preprocess_plugins.append(TemplatePlugin())
127
+
128
+ # Entity plugins (build specific entity types)
129
+ self.entity_plugins.append(MacroDefinitionPlugin()) # Process Macro definitions first
130
+ self.entity_plugins.append(MacroEntityPlugin()) # Then macro invocations
131
+ self.entity_plugins.append(NodeEntityPlugin(self)) # Finally regular nodes
132
+
133
+ @contextmanager
134
+ def _temporary_base_path(self, new_base: Path) -> Any:
135
+ """Context manager for temporarily changing base_path.
136
+
137
+ Args:
138
+ new_base: Temporary base path to use
139
+
140
+ Yields:
141
+ None
142
+ """
143
+ original_base = self.base_path
144
+ self.base_path = new_base
145
+
146
+ # Update include plugin base paths
147
+ for plugin in self.preprocess_plugins:
148
+ if isinstance(plugin, IncludePreprocessPlugin):
149
+ plugin.base_path = new_base
150
+
151
+ try:
152
+ yield
153
+ finally:
154
+ # Always restore original state
155
+ self.base_path = original_base
156
+ for plugin in self.preprocess_plugins:
157
+ if isinstance(plugin, IncludePreprocessPlugin):
158
+ plugin.base_path = original_base
159
+
160
+ # --- Public API ---
161
+
162
+ def build_from_yaml_file(
163
+ self, yaml_path: str, use_cache: bool = True
164
+ ) -> tuple[DirectedGraph, PipelineConfig]:
165
+ """Build from YAML file.
166
+
167
+ Args:
168
+ yaml_path: Path to YAML file
169
+ use_cache: Whether to use cached YAML parsing
170
+
171
+ Returns:
172
+ Tuple of (DirectedGraph, PipelineConfig)
173
+ """
174
+ yaml_file = Path(yaml_path)
175
+ yaml_content = yaml_file.read_text(encoding="utf-8")
176
+
177
+ # Use context manager to temporarily change base_path for relative includes
178
+ with self._temporary_base_path(yaml_file.parent):
179
+ return self.build_from_yaml_string(yaml_content, use_cache=use_cache)
180
+
181
+ def build_from_yaml_string(
182
+ self, yaml_content: str, use_cache: bool = True, environment: str | None = None
183
+ ) -> tuple[DirectedGraph, PipelineConfig]:
184
+ """Build DirectedGraph + PipelineConfig from YAML string."""
185
+ # Step 1: Parse YAML
186
+ documents = self._parse_yaml(yaml_content, use_cache=use_cache)
187
+
188
+ # Step 2: Process ALL documents for macro definitions first
189
+ # This allows macros to be defined in multi-document YAML
190
+ # before the pipeline that uses them
191
+ for doc in documents:
192
+ if isinstance(doc, dict) and doc.get("kind") == "Macro":
193
+ # Process includes for macro definitions (but skip template rendering)
194
+ # Templates in macros should be preserved for expansion-time rendering
195
+ processed_doc = doc
196
+ for plugin in self.preprocess_plugins:
197
+ # Skip TemplatePlugin for Macro definitions - templates should be preserved
198
+ if not isinstance(plugin, TemplatePlugin):
199
+ processed_doc = plugin.process(processed_doc)
200
+ # Validate and process macro definition
201
+ processed_doc = self._validate_config(processed_doc)
202
+ # Register macro (MacroDefinitionPlugin handles this)
203
+ self._process_macro_definitions([processed_doc])
204
+
205
+ # Step 3: Select pipeline environment
206
+ config = self._select_environment(documents, environment)
207
+
208
+ # Skip if selected document is a Macro (already processed above)
209
+ if config.get("kind") == "Macro":
210
+ # Return empty graph for macro-only documents
211
+ logger.info("Document contains only macro definitions, no pipeline to build")
212
+ return DirectedGraph(), PipelineConfig(
213
+ ports={}, type_ports={}, policies={}, metadata={}, nodes=[]
214
+ )
215
+
216
+ # Step 4: Preprocess FIRST (includes must resolve before validation)
217
+ for plugin in self.preprocess_plugins:
218
+ config = plugin.process(config)
219
+
220
+ # Step 4.5: Register user-defined aliases BEFORE validation
221
+ # so that custom node kinds can pass validation
222
+ self._register_aliases(config)
223
+
224
+ # Step 5: Validate structure (after includes are resolved)
225
+ config = self._validate_config(config)
226
+
227
+ # Step 6: Build graph
228
+ graph = self._build_graph(config)
229
+
230
+ # Step 7: Extract pipeline config
231
+ pipeline_config = self._extract_pipeline_config(config)
232
+
233
+ logger.info(
234
+ "✅ Built pipeline '{name}' with {nodes} nodes, {ports} ports, {policies} policies",
235
+ name=pipeline_config.metadata.get("name", "unknown"),
236
+ nodes=len(graph.nodes),
237
+ ports=len(pipeline_config.ports),
238
+ policies=len(pipeline_config.policies),
239
+ )
240
+
241
+ return graph, pipeline_config
242
+
243
+ # --- Core Logic ---
244
+
245
+ def _parse_yaml(self, yaml_content: str, use_cache: bool) -> list[dict[str, Any]]:
246
+ """Parse YAML into list of documents."""
247
+ if "---" in yaml_content:
248
+ return list(yaml.safe_load_all(yaml_content))
249
+ parsed = _parse_yaml_cached(yaml_content) if use_cache else yaml.safe_load(yaml_content)
250
+ return [parsed]
251
+
252
+ def _select_environment(
253
+ self, documents: list[dict[str, Any]], environment: str | None
254
+ ) -> dict[str, Any]:
255
+ """Select document by environment name.
256
+
257
+ Skips Macro definitions when selecting the Pipeline document.
258
+ """
259
+ # Filter out Macro definitions - they're processed separately
260
+ pipeline_docs = [doc for doc in documents if doc.get("kind") != "Macro"]
261
+
262
+ if not pipeline_docs:
263
+ # No pipeline documents, return first macro (for macro-only files)
264
+ return documents[0]
265
+
266
+ if environment:
267
+ for doc in pipeline_docs:
268
+ if doc.get("metadata", {}).get("namespace") == environment:
269
+ logger.info(f"Selected environment '{environment}' from multi-document YAML")
270
+ return doc
271
+
272
+ available_envs = [
273
+ doc.get("metadata", {}).get("namespace", "default") for doc in pipeline_docs
274
+ ]
275
+ raise YamlPipelineBuilderError(
276
+ f"Environment '{environment}' not found in YAML. "
277
+ f"Available environments: {', '.join(available_envs)}"
278
+ )
279
+
280
+ if len(pipeline_docs) > 1:
281
+ logger.warning(
282
+ f"Multi-document YAML detected ({len(pipeline_docs)} pipeline documents) "
283
+ "but no environment specified. "
284
+ "Using first pipeline. Specify environment parameter to select specific config."
285
+ )
286
+
287
+ return pipeline_docs[0]
288
+
289
+ def _validate_config(self, config: Any) -> dict[str, Any]:
290
+ """Validate YAML structure."""
291
+ if not isinstance(config, dict):
292
+ raise YamlPipelineBuilderError(
293
+ f"YAML document must be a dictionary, got {type(config).__name__}"
294
+ )
295
+
296
+ self._validate_manifest_format(config)
297
+
298
+ result = self.validator.validate(config)
299
+ if not result.is_valid:
300
+ errors = "\n".join(f" ERROR: {error}" for error in result.errors)
301
+ raise YamlPipelineBuilderError(f"YAML validation failed:\n{errors}")
302
+
303
+ for warning in result.warnings:
304
+ logger.warning(f"YAML validation warning: {warning}")
305
+
306
+ return config
307
+
308
+ @staticmethod
309
+ def _validate_manifest_format(config: dict[str, Any]) -> None:
310
+ """Validate declarative manifest format."""
311
+ if "kind" not in config:
312
+ raise YamlPipelineBuilderError(
313
+ "YAML must use declarative manifest format with 'kind' field. "
314
+ "Example:\n"
315
+ "apiVersion: v1\n"
316
+ "kind: Pipeline\n"
317
+ "metadata:\n"
318
+ " name: my-pipeline\n"
319
+ "spec:\n"
320
+ " nodes: [...]"
321
+ )
322
+
323
+ # Macro definitions have different structure (no spec field)
324
+ kind = config.get("kind")
325
+ if kind == "Macro":
326
+ # Macro has: metadata, parameters, nodes (no spec)
327
+ if "metadata" not in config:
328
+ raise YamlPipelineBuilderError("Macro definition must have 'metadata' field")
329
+ if "nodes" not in config:
330
+ raise YamlPipelineBuilderError("Macro definition must have 'nodes' field")
331
+ else:
332
+ # Pipeline and other kinds require spec
333
+ if "spec" not in config:
334
+ raise YamlPipelineBuilderError("Manifest YAML must have 'spec' field")
335
+
336
+ if "metadata" not in config:
337
+ raise YamlPipelineBuilderError("Manifest YAML must have 'metadata' field")
338
+
339
+ def _process_macro_definitions(self, macro_configs: list[dict[str, Any]]) -> None:
340
+ """Process macro definitions and register them.
341
+
342
+ Parameters
343
+ ----------
344
+ macro_configs : list[dict[str, Any]]
345
+ List of validated macro configuration dictionaries
346
+ """
347
+ # Create temporary graph (not used for macros, but required by plugin interface)
348
+ temp_graph = DirectedGraph()
349
+
350
+ # Find MacroDefinitionPlugin
351
+ macro_plugin = next(
352
+ (p for p in self.entity_plugins if isinstance(p, MacroDefinitionPlugin)), None
353
+ )
354
+
355
+ if macro_plugin is None:
356
+ raise YamlPipelineBuilderError(
357
+ "MacroDefinitionPlugin not found. Cannot process macro definitions."
358
+ )
359
+
360
+ for macro_config in macro_configs:
361
+ # Wrap in a fake node config format expected by entity plugin
362
+ node_config = macro_config
363
+ macro_plugin.build(node_config, self, temp_graph)
364
+
365
+ def _register_aliases(self, config: dict[str, Any]) -> None:
366
+ """Register user-defined aliases from spec.aliases before validation.
367
+
368
+ This allows custom node kinds to be used in YAML without requiring
369
+ full module paths.
370
+
371
+ Parameters
372
+ ----------
373
+ config : dict[str, Any]
374
+ The YAML configuration dict
375
+ """
376
+ from hexdag.core.resolver import register_alias
377
+
378
+ spec = config.get("spec", {})
379
+ aliases = spec.get("aliases", {})
380
+
381
+ for alias, full_path in aliases.items():
382
+ register_alias(alias, full_path)
383
+ logger.debug(f"Registered alias: {alias} -> {full_path}")
384
+
385
+ def _build_graph(self, config: dict[str, Any]) -> DirectedGraph:
386
+ """Build DirectedGraph using entity plugins."""
387
+ graph = DirectedGraph()
388
+ spec = config.get("spec", {})
389
+ nodes_list = spec.get("nodes", [])
390
+
391
+ for node_config in nodes_list:
392
+ # Find plugin that can handle this entity
393
+ for plugin in self.entity_plugins:
394
+ if plugin.can_handle(node_config):
395
+ result = plugin.build(node_config, self, graph)
396
+ if result is not None:
397
+ graph += result
398
+ break
399
+ else:
400
+ # No plugin handled it - error
401
+ kind = node_config.get("kind", "unknown")
402
+ raise YamlPipelineBuilderError(f"No plugin can handle kind: {kind}")
403
+
404
+ return graph
405
+
406
+ @staticmethod
407
+ def _extract_pipeline_config(config: dict[str, Any]) -> PipelineConfig:
408
+ """Extract PipelineConfig from YAML."""
409
+ spec = config.get("spec", {})
410
+ metadata = config.get("metadata", {})
411
+
412
+ return PipelineConfig(
413
+ ports=spec.get("ports", {}),
414
+ type_ports=spec.get("type_ports", {}),
415
+ policies=spec.get("policies", {}),
416
+ metadata=metadata,
417
+ nodes=spec.get("nodes", []),
418
+ )
419
+
420
+
421
+ # ============================================================================
422
+ # Entity Plugins - Each handles one entity type
423
+ # ============================================================================
424
+
425
+
426
+ class MacroDefinitionPlugin:
427
+ """Plugin for handling Macro definitions (kind: Macro).
428
+
429
+ This plugin processes YAML macro definitions and registers them in the
430
+ component registry for later invocation. Macro definitions don't add
431
+ nodes to the graph - they just register reusable templates.
432
+
433
+ Examples
434
+ --------
435
+ YAML macro definition::
436
+
437
+ apiVersion: hexdag/v1
438
+ kind: Macro
439
+ metadata:
440
+ name: retry_workflow
441
+ description: Retry logic with exponential backoff
442
+ parameters:
443
+ - name: max_retries
444
+ type: int
445
+ default: 3
446
+ nodes:
447
+ - kind: function_node
448
+ metadata:
449
+ name: "{{name}}_attempt"
450
+ spec:
451
+ fn: "{{fn}}"
452
+ """
453
+
454
+ def can_handle(self, node_config: dict[str, Any]) -> bool:
455
+ """Handle Macro kind."""
456
+ return node_config.get("kind") == "Macro"
457
+
458
+ def build(
459
+ self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
460
+ ) -> NodeSpec | None:
461
+ """Register YAML macro in component registry.
462
+
463
+ Parameters
464
+ ----------
465
+ node_config : dict[str, Any]
466
+ Macro definition configuration
467
+ builder : YamlPipelineBuilder
468
+ Builder instance (unused)
469
+ graph : DirectedGraph
470
+ Graph instance (unused - definitions don't add nodes)
471
+
472
+ Returns
473
+ -------
474
+ None
475
+ Macro definitions don't add nodes to the graph
476
+
477
+ Raises
478
+ ------
479
+ YamlPipelineBuilderError
480
+ If macro definition is invalid
481
+ """
482
+ # Import here to avoid circular dependency
483
+ from hexdag.core.yaml_macro import YamlMacro, YamlMacroConfig, YamlMacroParameterSpec
484
+
485
+ # Extract metadata
486
+ metadata = node_config.get("metadata", {})
487
+ macro_name = metadata.get("name")
488
+ if not macro_name:
489
+ raise YamlPipelineBuilderError("Macro definition missing 'metadata.name'")
490
+
491
+ macro_description = metadata.get("description")
492
+ _ = metadata.get("namespace", "user") # Reserved for future namespace support
493
+
494
+ # Extract parameters
495
+ raw_parameters = node_config.get("parameters", [])
496
+ parameters = [YamlMacroParameterSpec(**p) for p in raw_parameters]
497
+
498
+ # Extract nodes
499
+ nodes = node_config.get("nodes", [])
500
+ if not nodes:
501
+ raise YamlPipelineBuilderError(
502
+ f"Macro '{macro_name}' has no nodes. Macros must define at least one node."
503
+ )
504
+
505
+ # Extract outputs (optional)
506
+ outputs = node_config.get("outputs")
507
+
508
+ # Create YamlMacroConfig
509
+ macro_config = YamlMacroConfig(
510
+ macro_name=macro_name,
511
+ macro_description=macro_description,
512
+ parameters=parameters,
513
+ nodes=nodes,
514
+ outputs=outputs,
515
+ )
516
+
517
+ # Register macro at runtime
518
+ # Create a dynamic class that pre-fills the YamlMacro config
519
+ config_dict = macro_config.model_dump()
520
+
521
+ class DynamicYamlMacro(YamlMacro):
522
+ """Dynamically generated YamlMacro with pre-filled configuration."""
523
+
524
+ def __init__(self, **kwargs: Any) -> None:
525
+ # Merge pre-filled config with any override kwargs
526
+ merged_config = {**config_dict, **kwargs}
527
+ super().__init__(**merged_config)
528
+
529
+ # Set class name for better debugging
530
+ DynamicYamlMacro.__name__ = f"YamlMacro_{macro_name}"
531
+ DynamicYamlMacro.__qualname__ = f"YamlMacro_{macro_name}"
532
+
533
+ # Register in runtime storage (for YAML-defined macros)
534
+ register_runtime(macro_name, DynamicYamlMacro)
535
+
536
+ logger.info(
537
+ f"✅ Registered YAML macro '{macro_name}' "
538
+ f"({len(parameters)} parameters, {len(nodes)} nodes)"
539
+ )
540
+
541
+ # Return None - macro definitions don't add nodes to the graph
542
+ return None
543
+
544
+
545
+ class MacroEntityPlugin:
546
+ """Plugin for handling macro_invocation entities."""
547
+
548
+ def can_handle(self, node_config: dict[str, Any]) -> bool:
549
+ """Handle macro_invocation kind."""
550
+ return node_config.get("kind") == "macro_invocation"
551
+
552
+ def build(
553
+ self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
554
+ ) -> NodeSpec | None:
555
+ """Expand macro into subgraph and merge into main graph."""
556
+ instance_name = node_config["metadata"]["name"]
557
+ spec = node_config.get("spec", {})
558
+ macro_ref = spec.get("macro")
559
+ if not macro_ref:
560
+ raise YamlPipelineBuilderError(f"Macro '{instance_name}' missing spec.macro field")
561
+
562
+ # macro_ref is the full module path (e.g., hexdag.builtin.macros.ReasoningAgentMacro)
563
+ # or a runtime-registered name for YAML-defined macros
564
+
565
+ # Get config params for macro initialization
566
+ config_params = spec.get("config", {}).copy()
567
+ inputs = spec.get("inputs", {})
568
+ dependencies = spec.get("dependencies", [])
569
+
570
+ # Resolve macro class - either full module path or runtime-registered name
571
+ try:
572
+ macro_cls = resolve(macro_ref)
573
+ except ResolveError as e:
574
+ raise YamlPipelineBuilderError(f"Macro '{macro_ref}' not found: {e}") from e
575
+
576
+ # Instantiate macro with config params
577
+ try:
578
+ macro_instance_obj = macro_cls(**config_params)
579
+ except Exception as e:
580
+ raise YamlPipelineBuilderError(f"Failed to instantiate macro '{macro_ref}': {e}") from e
581
+
582
+ # Validate it's actually a macro
583
+ if not isinstance(macro_instance_obj, ConfigurableMacro):
584
+ type_name = type(macro_instance_obj).__name__
585
+ raise YamlPipelineBuilderError(
586
+ f"Component '{macro_ref}' is not a ConfigurableMacro (got {type_name})"
587
+ )
588
+
589
+ macro_instance: ConfigurableMacro = macro_instance_obj
590
+
591
+ # Expand macro - merge config and inputs for validation
592
+ # config params are the parameter values for the macro
593
+ # inputs are the value mappings, so they should be merged
594
+ macro_inputs = {**config_params, **inputs}
595
+
596
+ # Expand macro
597
+ try:
598
+ subgraph = macro_instance.expand(
599
+ instance_name=instance_name, inputs=macro_inputs, dependencies=dependencies
600
+ )
601
+ except ValueError:
602
+ # Re-raise validation errors directly (e.g., required parameter, enum validation)
603
+ raise
604
+ except Exception as e:
605
+ raise YamlPipelineBuilderError(
606
+ f"Failed to expand macro '{macro_ref}' (instance '{instance_name}'): {e}"
607
+ ) from e
608
+
609
+ # Merge subgraph into main graph
610
+ self._merge_subgraph(graph, subgraph, dependencies)
611
+
612
+ logger.info(
613
+ "✅ Expanded macro '{macro}' as '{instance}' ({nodes} nodes)",
614
+ macro=macro_ref,
615
+ instance=instance_name,
616
+ nodes=len(subgraph.nodes),
617
+ )
618
+
619
+ # Return None - subgraph already merged into graph
620
+ return None
621
+
622
+ @staticmethod
623
+ def _merge_subgraph(
624
+ graph: DirectedGraph, subgraph: DirectedGraph, external_deps: list[str]
625
+ ) -> None:
626
+ """Merge subgraph into main graph with external dependencies.
627
+
628
+ Optimized to avoid unnecessary graph copies when no external dependencies exist.
629
+ """
630
+ if not external_deps:
631
+ # Fast path: direct merge when no external dependencies
632
+ graph |= subgraph
633
+ else:
634
+ # Only process entry nodes that need external dependencies
635
+ # Use in-place merge for better performance
636
+ for node in subgraph.nodes.values():
637
+ if not subgraph.get_dependencies(node.name):
638
+ # Entry node - add external dependencies
639
+ graph += node.after(*external_deps)
640
+ else:
641
+ # Internal node - add as-is
642
+ graph += node
643
+
644
+
645
+ class NodeEntityPlugin:
646
+ """Plugin for handling all node types (llm, function, agent, etc.)."""
647
+
648
+ def __init__(self, builder: YamlPipelineBuilder):
649
+ """Initialize with reference to builder for shared state."""
650
+ self.builder = builder
651
+
652
+ def can_handle(self, node_config: dict[str, Any]) -> bool:
653
+ """Handle everything except macro_invocation."""
654
+ return node_config.get("kind") != "macro_invocation"
655
+
656
+ def build(
657
+ self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
658
+ ) -> NodeSpec:
659
+ """Build node from config.
660
+
661
+ The 'kind' field must be a full module path to the node factory class.
662
+ Example: hexdag.builtin.nodes.LLMNode
663
+ """
664
+ # Validate structure
665
+ if "kind" not in node_config:
666
+ raise YamlPipelineBuilderError("Node missing 'kind' field")
667
+ if "metadata" not in node_config or "name" not in node_config["metadata"]:
668
+ raise YamlPipelineBuilderError(
669
+ f"Node '{node_config.get('kind')}' missing metadata.name"
670
+ )
671
+
672
+ kind = node_config["kind"]
673
+ node_id = node_config["metadata"]["name"]
674
+ spec = node_config.get("spec", {}).copy()
675
+ # Dependencies can be at node level or inside spec (for backwards compatibility)
676
+ deps = node_config.get("dependencies", []) or spec.pop("dependencies", [])
677
+
678
+ # Resolve factory class from full module path
679
+ try:
680
+ factory_obj = resolve(kind)
681
+ except ResolveError as e:
682
+ raise YamlPipelineBuilderError(f"Cannot resolve node kind '{kind}': {e}") from e
683
+
684
+ # Validate it's callable
685
+ if not callable(factory_obj):
686
+ raise YamlPipelineBuilderError(
687
+ f"Node factory '{kind}' is not callable (got {type(factory_obj).__name__})"
688
+ )
689
+
690
+ # Handle factory classes vs factory functions
691
+ # Factory classes need to be instantiated first, then called
692
+ # Factory functions can be called directly
693
+ if isinstance(factory_obj, type):
694
+ # It's a class - instantiate then call
695
+ factory_instance = factory_obj()
696
+ factory = cast("Callable[..., NodeSpec]", factory_instance)
697
+ else:
698
+ # It's already a callable (function or instance)
699
+ factory = cast("Callable[..., NodeSpec]", factory_obj) # type: ignore[unreachable]
700
+
701
+ # Create node - pass name as first positional arg
702
+ node: NodeSpec = factory(node_id, **spec)
703
+
704
+ # Add dependencies
705
+ if deps:
706
+ node = node.after(*deps) if isinstance(deps, list) else node.after(deps)
707
+
708
+ return node
709
+
710
+
711
+ # ============================================================================
712
+ # Preprocessing Plugins
713
+ # ============================================================================
714
+
715
+
716
+ class IncludePreprocessPlugin:
717
+ """Resolve !include directives for YAML file inclusion.
718
+
719
+ Supports two syntaxes:
720
+ 1. Simple include: !include path/to/file.yaml
721
+ 2. Anchor include: !include path/to/file.yaml#anchor_name
722
+
723
+ Security:
724
+ - Only allows relative paths (no absolute paths)
725
+ - Prevents directory traversal attacks (no ../ beyond project root)
726
+ - Detects circular includes
727
+
728
+ For comprehensive examples, see notebooks/03_yaml_includes_and_composition.ipynb
729
+ """
730
+
731
+ def __init__(self, base_path: Path | None = None, max_depth: int = 10):
732
+ """Initialize include plugin.
733
+
734
+ Args:
735
+ base_path: Base directory for relative includes (changeable via context manager)
736
+ max_depth: Maximum include nesting depth to prevent circular includes
737
+ """
738
+ self.base_path = base_path or Path.cwd()
739
+ self.project_root = self.base_path # Fixed project root for security validation
740
+ self.max_depth = max_depth
741
+
742
+ def process(self, config: dict[str, Any]) -> dict[str, Any]:
743
+ """Process !include directives recursively."""
744
+ # Create new include stack for this processing run (thread-safe)
745
+ include_stack: list[Path] = []
746
+ result = self._resolve_includes(
747
+ config, self.base_path, depth=0, include_stack=include_stack
748
+ )
749
+ if not _is_dict_config(result):
750
+ raise TypeError(
751
+ f"Include processing must return a dictionary, got {type(result).__name__}. "
752
+ "Check that your included files resolve to valid YAML dictionaries."
753
+ )
754
+ return result
755
+
756
+ def _resolve_includes(
757
+ self, obj: Any, current_base: Path, depth: int, include_stack: list[Path]
758
+ ) -> dict[str, Any] | list[Any] | Any:
759
+ """Recursively resolve !include directives.
760
+
761
+ Args:
762
+ obj: Object to process (dict, list, or primitive)
763
+ current_base: Base path for resolving relative includes
764
+ depth: Current recursion depth
765
+ include_stack: Stack of currently processing files (for circular detection)
766
+
767
+ Returns:
768
+ Processed object with includes resolved
769
+ """
770
+ if depth > self.max_depth:
771
+ raise YamlPipelineBuilderError(
772
+ f"Include nesting too deep (max {self.max_depth}). "
773
+ f"Possible circular include in: {' -> '.join(str(p) for p in include_stack)}"
774
+ )
775
+
776
+ if isinstance(obj, dict):
777
+ # Check for !include directive
778
+ if "!include" in obj and len(obj) == 1:
779
+ include_spec = obj["!include"]
780
+ return self._load_include(include_spec, current_base, depth, include_stack)
781
+
782
+ # Recurse into dict values
783
+ return {
784
+ k: self._resolve_includes(v, current_base, depth, include_stack)
785
+ for k, v in obj.items()
786
+ }
787
+
788
+ if isinstance(obj, list):
789
+ # Process each list item and flatten nested lists from includes
790
+ result = []
791
+ for item in obj:
792
+ resolved = self._resolve_includes(item, current_base, depth, include_stack)
793
+ # Flatten: if an include returns a list, extend rather than append
794
+ if isinstance(resolved, list):
795
+ result.extend(resolved)
796
+ else:
797
+ result.append(resolved)
798
+ return result
799
+
800
+ return obj
801
+
802
+ def _load_include(
803
+ self, include_spec: str, current_base: Path, depth: int, include_stack: list[Path]
804
+ ) -> Any:
805
+ """Load content from included file.
806
+
807
+ Args:
808
+ include_spec: Include specification (e.g., "file.yaml" or "file.yaml#anchor")
809
+ current_base: Base path for resolving relative paths
810
+ depth: Current recursion depth
811
+ include_stack: Stack of currently processing files (for circular detection)
812
+
813
+ Returns:
814
+ Loaded and processed content from included file
815
+ """
816
+ # Parse include specification (strip whitespace for better UX)
817
+ include_spec = include_spec.strip()
818
+ if "#" in include_spec:
819
+ file_path_str, anchor = include_spec.split("#", 1)
820
+ file_path_str = file_path_str.strip()
821
+ anchor = anchor.strip()
822
+ else:
823
+ file_path_str, anchor = include_spec, None
824
+
825
+ # Resolve file path
826
+ file_path = self._resolve_path(file_path_str, current_base)
827
+
828
+ # Check for circular includes
829
+ if file_path in include_stack:
830
+ cycle = " -> ".join(str(p) for p in include_stack + [file_path])
831
+ raise YamlPipelineBuilderError(f"Circular include detected: {cycle}")
832
+
833
+ # Load YAML file
834
+ try:
835
+ include_stack.append(file_path)
836
+ content = yaml.safe_load(file_path.read_text(encoding="utf-8"))
837
+
838
+ # Extract anchor if specified
839
+ if anchor:
840
+ if not isinstance(content, dict) or anchor not in content:
841
+ raise YamlPipelineBuilderError(
842
+ f"Anchor '{anchor}' not found in {file_path}. "
843
+ f"Available: {list(content.keys()) if isinstance(content, dict) else 'N/A'}"
844
+ )
845
+ content = content[anchor]
846
+
847
+ # Recursively resolve includes in loaded content
848
+ return self._resolve_includes(content, file_path.parent, depth + 1, include_stack)
849
+
850
+ except FileNotFoundError as e:
851
+ raise YamlPipelineBuilderError(
852
+ f"Include file not found: {file_path}\nSearched relative to: {current_base}"
853
+ ) from e
854
+ except yaml.YAMLError as e:
855
+ raise YamlPipelineBuilderError(f"Invalid YAML in included file {file_path}: {e}") from e
856
+ finally:
857
+ include_stack.pop()
858
+
859
+ def _resolve_path(self, path_str: str, current_base: Path) -> Path:
860
+ """Resolve and validate include path.
861
+
862
+ Args:
863
+ path_str: Path string from !include directive
864
+ current_base: Base path for resolving relative paths
865
+
866
+ Returns:
867
+ Validated absolute path
868
+
869
+ Raises:
870
+ YamlPipelineBuilderError: If path is invalid or potentially malicious
871
+ """
872
+ # Prevent absolute paths
873
+ if Path(path_str).is_absolute():
874
+ raise YamlPipelineBuilderError(
875
+ f"Absolute paths not allowed in !include: {path_str}\n"
876
+ "Use relative paths only for security."
877
+ )
878
+
879
+ # Resolve path relative to current base
880
+ resolved = (current_base / path_str).resolve()
881
+
882
+ # Prevent directory traversal outside project root
883
+ # Use the resolved project_root (not base_path) to handle symlinks properly
884
+ resolved_root = self.project_root.resolve()
885
+ try:
886
+ resolved.relative_to(resolved_root)
887
+ except ValueError as e:
888
+ raise YamlPipelineBuilderError(
889
+ f"Include path traverses outside project root: {path_str}\n"
890
+ f"Project root: {resolved_root}\n"
891
+ f"Attempted path: {resolved}"
892
+ ) from e
893
+
894
+ return resolved
895
+
896
+
897
+ class EnvironmentVariablePlugin:
898
+ """Resolve ${VAR} and ${VAR:default} in YAML with deferred secret resolution.
899
+
900
+ For KeyVault/SecretPort workflows, secret-like environment variables are
901
+ preserved as ${VAR} for runtime resolution. This allows:
902
+ - Building pipelines without secrets present
903
+ - Runtime secret injection via SecretPort → Memory
904
+ - Separation of build and deployment contexts
905
+
906
+ Secret patterns (deferred to runtime):
907
+ - *_API_KEY, *_SECRET, *_TOKEN, *_PASSWORD, *_CREDENTIAL
908
+ - SECRET_*
909
+
910
+ Non-secret variables are resolved immediately at build-time.
911
+ """
912
+
913
+ ENV_VAR_PATTERN = re.compile(r"\$\{([A-Z_][A-Z0-9_]*?)(?::([^}]*))?\}")
914
+
915
+ # Secret patterns that should be deferred to runtime
916
+ SECRET_PATTERNS = frozenset({
917
+ r".*_API_KEY$",
918
+ r".*_SECRET$",
919
+ r".*_TOKEN$",
920
+ r".*_PASSWORD$",
921
+ r".*_CREDENTIAL$",
922
+ r"^SECRET_.*",
923
+ })
924
+
925
+ def __init__(self, defer_secrets: bool = True):
926
+ """Initialize environment variable plugin.
927
+
928
+ Parameters
929
+ ----------
930
+ defer_secrets : bool, default=True
931
+ If True, preserve ${VAR} syntax for secret-like variables,
932
+ allowing runtime resolution from KeyVault/Memory.
933
+ If False, all variables are resolved at build-time (legacy behavior).
934
+ """
935
+ self.defer_secrets = defer_secrets
936
+ if defer_secrets:
937
+ # Compile secret detection regex
938
+ self._secret_regex: re.Pattern[str] | None = re.compile(
939
+ "|".join(f"({p})" for p in self.SECRET_PATTERNS)
940
+ )
941
+ else:
942
+ self._secret_regex = None
943
+
944
+ def process(self, config: dict[str, Any]) -> dict[str, Any]:
945
+ """Recursively resolve environment variables."""
946
+ result = _resolve_env_vars(
947
+ config,
948
+ self.ENV_VAR_PATTERN,
949
+ secret_regex=self._secret_regex,
950
+ defer_secrets=self.defer_secrets,
951
+ )
952
+ if not _is_dict_config(result):
953
+ raise TypeError(
954
+ f"Environment variable resolution must return a dictionary, "
955
+ f"got {type(result).__name__}"
956
+ )
957
+ return result
958
+
959
+
960
+ @singledispatch
961
+ def _resolve_env_vars(
962
+ obj: Any,
963
+ pattern: re.Pattern[str],
964
+ secret_regex: re.Pattern[str] | None = None,
965
+ defer_secrets: bool = True,
966
+ ) -> Any:
967
+ """Recursively resolve ${VAR} in any structure.
968
+
969
+ Parameters
970
+ ----------
971
+ obj : Any
972
+ Object to process
973
+ pattern : re.Pattern[str]
974
+ Regex pattern for matching ${VAR} syntax
975
+ secret_regex : re.Pattern[str] | None
976
+ Regex for detecting secret-like variable names
977
+ defer_secrets : bool
978
+ If True, preserve ${VAR} for secrets
979
+
980
+ Returns
981
+ -------
982
+ Any
983
+ - For primitives: Returns the primitive unchanged
984
+ - For strings: Returns str | int | float | bool (with type coercion)
985
+ - For dicts: Returns dict[str, Any]
986
+ - For lists: Returns list[Any]
987
+ """
988
+ return obj
989
+
990
+
991
+ @_resolve_env_vars.register(str)
992
+ def _resolve_env_vars_str(
993
+ obj: str,
994
+ pattern: re.Pattern[str],
995
+ secret_regex: re.Pattern[str] | None = None,
996
+ defer_secrets: bool = True,
997
+ ) -> str | int | float | bool:
998
+ """Resolve ${VAR} in strings with optional secret deferral."""
999
+
1000
+ def replacer(match: re.Match[str]) -> str:
1001
+ var_name, default = match.group(1), match.group(2)
1002
+
1003
+ # Check if this looks like a secret
1004
+ if defer_secrets and secret_regex and secret_regex.match(var_name):
1005
+ # Secret detected - preserve ${VAR} syntax for runtime resolution
1006
+ logger.debug(f"Deferring secret variable to runtime: {var_name}")
1007
+ return match.group(0) # Return original ${VAR} or ${VAR:default}
1008
+
1009
+ # Non-secret - resolve immediately from environment
1010
+ env_value = os.environ.get(var_name)
1011
+ if env_value is None:
1012
+ if default is not None:
1013
+ return default
1014
+ raise YamlPipelineBuilderError(f"Environment variable '${{{var_name}}}' not set")
1015
+ return env_value
1016
+
1017
+ resolved = pattern.sub(replacer, obj)
1018
+
1019
+ # Type coercion only if the value changed (was resolved)
1020
+ if resolved != obj and not (defer_secrets and resolved.startswith("${")):
1021
+ if resolved.lower() in ("true", "yes", "1"):
1022
+ return True
1023
+ if resolved.lower() in ("false", "no", "0"):
1024
+ return False
1025
+ with suppress(ValueError):
1026
+ return int(resolved)
1027
+ with suppress(ValueError):
1028
+ return float(resolved)
1029
+ return resolved
1030
+
1031
+
1032
+ @_resolve_env_vars.register(dict)
1033
+ def _resolve_env_vars_dict(
1034
+ obj: dict,
1035
+ pattern: re.Pattern[str],
1036
+ secret_regex: re.Pattern[str] | None = None,
1037
+ defer_secrets: bool = True,
1038
+ ) -> dict[str, Any]:
1039
+ """Resolve ${VAR} in dict values."""
1040
+ return {k: _resolve_env_vars(v, pattern, secret_regex, defer_secrets) for k, v in obj.items()}
1041
+
1042
+
1043
+ @_resolve_env_vars.register(list)
1044
+ def _resolve_env_vars_list(
1045
+ obj: list,
1046
+ pattern: re.Pattern[str],
1047
+ secret_regex: re.Pattern[str] | None = None,
1048
+ defer_secrets: bool = True,
1049
+ ) -> list[Any]:
1050
+ """Resolve ${VAR} in list items."""
1051
+ return [_resolve_env_vars(item, pattern, secret_regex, defer_secrets) for item in obj]
1052
+
1053
+
1054
+ class TemplatePlugin:
1055
+ """Render Jinja2 templates in YAML with two-phase rendering strategy.
1056
+
1057
+ **Build-time Rendering** (YAML configuration context):
1058
+ - Metadata fields (e.g., node names, descriptions)
1059
+ - Pipeline-level spec fields (e.g., variables, ports, policies)
1060
+ - Enables dynamic configuration from environment/variables
1061
+
1062
+ **Runtime Rendering** (node execution context):
1063
+ - Node spec fields (e.g., template, prompt_template, initial_prompt)
1064
+ - Pipeline outputs (e.g., spec.outputs with {{node.result}} references)
1065
+ - Preserved to allow access to dependency/node outputs at runtime
1066
+ - Enables dynamic prompts and output mapping based on execution results
1067
+
1068
+ Example:
1069
+ ```yaml
1070
+ spec:
1071
+ variables:
1072
+ node_name: analyzer
1073
+ nodes:
1074
+ - kind: llm_node
1075
+ metadata:
1076
+ name: "{{ spec.variables.node_name }}" # Build-time: renders to "analyzer"
1077
+ spec:
1078
+ template: "{{input}}" # Runtime: renders when node executes
1079
+ outputs:
1080
+ result: "{{analyzer.analysis}}" # Runtime: rendered after pipeline completes
1081
+ ```
1082
+
1083
+ Security:
1084
+ Uses SandboxedEnvironment to prevent arbitrary code execution.
1085
+ """
1086
+
1087
+ def __init__(self) -> None:
1088
+ # Use sandboxed environment to prevent code execution attacks
1089
+ # Even though this is for config files, defense in depth is important
1090
+ self.env = SandboxedEnvironment(autoescape=False, keep_trailing_newline=True)
1091
+
1092
+ def process(self, config: dict[str, Any]) -> dict[str, Any]:
1093
+ """Render Jinja2 templates with config as context."""
1094
+ result = _render_templates(config, config, self.env)
1095
+ if not _is_dict_config(result):
1096
+ raise TypeError(
1097
+ f"Template rendering must return a dictionary, got {type(result).__name__}"
1098
+ )
1099
+ return result
1100
+
1101
+
1102
+ @singledispatch
1103
+ def _render_templates(obj: Any, context: dict[str, Any], env: Any) -> Any:
1104
+ """Recursively render Jinja2 templates.
1105
+
1106
+ Returns:
1107
+ - For primitives: Returns the primitive unchanged
1108
+ - For strings: Returns str | int | float | bool (with type coercion)
1109
+ - For dicts: Returns dict[str, Any]
1110
+ - For lists: Returns list[Any]
1111
+ """
1112
+ return obj
1113
+
1114
+
1115
+ @_render_templates.register(str)
1116
+ def _render_templates_str(obj: str, context: dict[str, Any], env: Any) -> str | int | float | bool:
1117
+ """Render Jinja2 template in string."""
1118
+ if "{{" not in obj and "{%" not in obj:
1119
+ return obj
1120
+
1121
+ try:
1122
+ rendered: str = env.from_string(obj).render(context)
1123
+ # Type coercion
1124
+ with suppress(ValueError):
1125
+ return int(rendered)
1126
+ with suppress(ValueError):
1127
+ return float(rendered)
1128
+ if rendered.lower() in ("true", "yes"):
1129
+ return True
1130
+ if rendered.lower() in ("false", "no"):
1131
+ return False
1132
+ return rendered
1133
+ except TemplateSyntaxError as e:
1134
+ raise YamlPipelineBuilderError(
1135
+ f"Invalid Jinja2 template syntax: {e}\nTemplate: {obj}"
1136
+ ) from e
1137
+ except UndefinedError as e:
1138
+ raise YamlPipelineBuilderError(
1139
+ f"Undefined variable in template: {e}\nTemplate: {obj}"
1140
+ ) from e
1141
+
1142
+
1143
+ @_render_templates.register(dict)
1144
+ def _render_templates_dict(obj: dict, context: dict[str, Any], env: Any) -> dict[str, Any]:
1145
+ """Render templates in dict values.
1146
+
1147
+ Skip rendering for node spec fields and pipeline outputs to avoid conflicts between
1148
+ YAML-level templating and runtime template strings.
1149
+
1150
+ Strategy:
1151
+ - Node specs: Preserve for runtime rendering with dependency outputs
1152
+ - Pipeline outputs: Preserve for runtime rendering with node results
1153
+ - Metadata and config: Render at build time with YAML context
1154
+ """
1155
+ result = {}
1156
+ for k, v in obj.items():
1157
+ # Check if this is a node spec (not a Pipeline spec) by looking for the 'kind' sibling key
1158
+ # Node kinds end with '_node' (e.g., 'prompt_node', 'llm_node', 'function_node')
1159
+ # Pipeline kind is 'Pipeline' - we should NOT skip its spec
1160
+ if (
1161
+ k == "spec"
1162
+ and isinstance(v, dict)
1163
+ and "kind" in obj
1164
+ and isinstance(obj.get("kind"), str)
1165
+ and obj["kind"] != "Pipeline"
1166
+ ):
1167
+ # This is a node spec - preserve template strings for runtime rendering
1168
+ result[k] = v # Preserve entire spec as-is
1169
+ # Also preserve 'outputs' field in Pipeline spec - references node results
1170
+ elif k == "outputs" and isinstance(v, dict):
1171
+ # Pipeline outputs reference node results (e.g., {{node.output}})
1172
+ # Preserve for runtime rendering after nodes execute
1173
+ result[k] = v
1174
+ else:
1175
+ result[k] = _render_templates(v, context, env)
1176
+ return result
1177
+
1178
+
1179
+ @_render_templates.register(list)
1180
+ def _render_templates_list(obj: list, context: dict[str, Any], env: Any) -> list[Any]:
1181
+ """Render templates in list items."""
1182
+ return [_render_templates(item, context, env) for item in obj]
1183
+
1184
+
1185
+ # ============================================================================
1186
+ # Utilities
1187
+ # ============================================================================
1188
+
1189
+
1190
+ @lru_cache(maxsize=32)
1191
+ def _parse_yaml_cached(yaml_content: str) -> Any:
1192
+ """Cached YAML parsing.
1193
+
1194
+ Returns dict[str, Any] in practice, but yaml.safe_load returns Any.
1195
+ """
1196
+ return yaml.safe_load(yaml_content)