hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,1196 @@
|
|
|
1
|
+
"""Simplified YAML Pipeline Builder with Plugin Architecture.
|
|
2
|
+
|
|
3
|
+
Philosophy:
|
|
4
|
+
- Core builder: Minimal YAML → DirectedGraph parser
|
|
5
|
+
- Preprocessing plugins: Environment variables, templating, validation
|
|
6
|
+
- Entity plugins: Macros, nodes (each entity type handles its own YAML)
|
|
7
|
+
|
|
8
|
+
Plugins provide clear value:
|
|
9
|
+
- EnvironmentVariablePlugin: Resolve ${VAR} and ${VAR:default}
|
|
10
|
+
- TemplatePlugin: Jinja2 templating in YAML values
|
|
11
|
+
- SchemaValidationPlugin: Validate schemas before execution
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
from contextlib import contextmanager, suppress
|
|
17
|
+
from functools import lru_cache, singledispatch
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Protocol, TypeGuard, cast
|
|
20
|
+
|
|
21
|
+
import yaml
|
|
22
|
+
from jinja2 import TemplateSyntaxError, UndefinedError
|
|
23
|
+
from jinja2.sandbox import SandboxedEnvironment
|
|
24
|
+
|
|
25
|
+
from hexdag.core.configurable import ConfigurableMacro
|
|
26
|
+
from hexdag.core.domain.dag import DirectedGraph, NodeSpec
|
|
27
|
+
from hexdag.core.logging import get_logger
|
|
28
|
+
from hexdag.core.pipeline_builder.pipeline_config import PipelineConfig
|
|
29
|
+
from hexdag.core.pipeline_builder.yaml_validator import YamlValidator
|
|
30
|
+
from hexdag.core.resolver import ResolveError, register_runtime, resolve
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from collections.abc import Callable
|
|
34
|
+
|
|
35
|
+
logger = get_logger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class YamlPipelineBuilderError(Exception):
|
|
39
|
+
"""YAML pipeline building errors."""
|
|
40
|
+
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ============================================================================
|
|
45
|
+
# Type Guards
|
|
46
|
+
# ============================================================================
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _is_dict_config(value: Any) -> TypeGuard[dict[str, Any]]:
|
|
50
|
+
"""Type guard to verify value is a dictionary."""
|
|
51
|
+
return isinstance(value, dict)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ============================================================================
|
|
55
|
+
# Plugin Protocol
|
|
56
|
+
# ============================================================================
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PreprocessPlugin(Protocol):
|
|
60
|
+
"""Plugin for preprocessing YAML before building (env vars, templating, etc.)."""
|
|
61
|
+
|
|
62
|
+
def process(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
63
|
+
"""Process entire config, returning modified version."""
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class EntityPlugin(Protocol):
|
|
68
|
+
"""Plugin for building specific entity types (macros, nodes, etc.)."""
|
|
69
|
+
|
|
70
|
+
def can_handle(self, node_config: dict[str, Any]) -> bool:
|
|
71
|
+
"""Return True if this plugin can handle the node config."""
|
|
72
|
+
...
|
|
73
|
+
|
|
74
|
+
def build(
|
|
75
|
+
self, node_config: dict[str, Any], builder: "YamlPipelineBuilder", graph: DirectedGraph
|
|
76
|
+
) -> NodeSpec | None:
|
|
77
|
+
"""Build entity from config.
|
|
78
|
+
|
|
79
|
+
Args
|
|
80
|
+
-----
|
|
81
|
+
node_config: The node configuration dictionary.
|
|
82
|
+
builder: The YamlPipelineBuilder instance.
|
|
83
|
+
graph: The DirectedGraph instance.
|
|
84
|
+
|
|
85
|
+
Return NodeSpec or None if handled (e.g., macro merged into graph)."""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ============================================================================
|
|
90
|
+
# Core Builder
|
|
91
|
+
# ============================================================================
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class YamlPipelineBuilder:
|
|
95
|
+
"""YAML → DirectedGraph builder with plugin support.
|
|
96
|
+
|
|
97
|
+
Workflow:
|
|
98
|
+
1. Parse YAML
|
|
99
|
+
2. Select environment
|
|
100
|
+
3. Validate structure
|
|
101
|
+
4. Run preprocessing plugins (env vars, templates)
|
|
102
|
+
5. Build graph using entity plugins
|
|
103
|
+
6. Extract pipeline config
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, base_path: Path | None = None) -> None:
|
|
107
|
+
"""Initialize builder.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
base_path: Base directory for resolving includes (default: cwd)
|
|
111
|
+
"""
|
|
112
|
+
self.base_path = base_path or Path.cwd()
|
|
113
|
+
self.validator = YamlValidator()
|
|
114
|
+
|
|
115
|
+
# Plugins
|
|
116
|
+
self.preprocess_plugins: list[PreprocessPlugin] = []
|
|
117
|
+
self.entity_plugins: list[EntityPlugin] = []
|
|
118
|
+
|
|
119
|
+
self._register_default_plugins()
|
|
120
|
+
|
|
121
|
+
def _register_default_plugins(self) -> None:
|
|
122
|
+
"""Register default plugins for common use cases."""
|
|
123
|
+
# Preprocessing plugins (run before building)
|
|
124
|
+
self.preprocess_plugins.append(IncludePreprocessPlugin(base_path=self.base_path))
|
|
125
|
+
self.preprocess_plugins.append(EnvironmentVariablePlugin())
|
|
126
|
+
self.preprocess_plugins.append(TemplatePlugin())
|
|
127
|
+
|
|
128
|
+
# Entity plugins (build specific entity types)
|
|
129
|
+
self.entity_plugins.append(MacroDefinitionPlugin()) # Process Macro definitions first
|
|
130
|
+
self.entity_plugins.append(MacroEntityPlugin()) # Then macro invocations
|
|
131
|
+
self.entity_plugins.append(NodeEntityPlugin(self)) # Finally regular nodes
|
|
132
|
+
|
|
133
|
+
@contextmanager
|
|
134
|
+
def _temporary_base_path(self, new_base: Path) -> Any:
|
|
135
|
+
"""Context manager for temporarily changing base_path.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
new_base: Temporary base path to use
|
|
139
|
+
|
|
140
|
+
Yields:
|
|
141
|
+
None
|
|
142
|
+
"""
|
|
143
|
+
original_base = self.base_path
|
|
144
|
+
self.base_path = new_base
|
|
145
|
+
|
|
146
|
+
# Update include plugin base paths
|
|
147
|
+
for plugin in self.preprocess_plugins:
|
|
148
|
+
if isinstance(plugin, IncludePreprocessPlugin):
|
|
149
|
+
plugin.base_path = new_base
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
yield
|
|
153
|
+
finally:
|
|
154
|
+
# Always restore original state
|
|
155
|
+
self.base_path = original_base
|
|
156
|
+
for plugin in self.preprocess_plugins:
|
|
157
|
+
if isinstance(plugin, IncludePreprocessPlugin):
|
|
158
|
+
plugin.base_path = original_base
|
|
159
|
+
|
|
160
|
+
# --- Public API ---
|
|
161
|
+
|
|
162
|
+
def build_from_yaml_file(
|
|
163
|
+
self, yaml_path: str, use_cache: bool = True
|
|
164
|
+
) -> tuple[DirectedGraph, PipelineConfig]:
|
|
165
|
+
"""Build from YAML file.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
yaml_path: Path to YAML file
|
|
169
|
+
use_cache: Whether to use cached YAML parsing
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (DirectedGraph, PipelineConfig)
|
|
173
|
+
"""
|
|
174
|
+
yaml_file = Path(yaml_path)
|
|
175
|
+
yaml_content = yaml_file.read_text(encoding="utf-8")
|
|
176
|
+
|
|
177
|
+
# Use context manager to temporarily change base_path for relative includes
|
|
178
|
+
with self._temporary_base_path(yaml_file.parent):
|
|
179
|
+
return self.build_from_yaml_string(yaml_content, use_cache=use_cache)
|
|
180
|
+
|
|
181
|
+
def build_from_yaml_string(
|
|
182
|
+
self, yaml_content: str, use_cache: bool = True, environment: str | None = None
|
|
183
|
+
) -> tuple[DirectedGraph, PipelineConfig]:
|
|
184
|
+
"""Build DirectedGraph + PipelineConfig from YAML string."""
|
|
185
|
+
# Step 1: Parse YAML
|
|
186
|
+
documents = self._parse_yaml(yaml_content, use_cache=use_cache)
|
|
187
|
+
|
|
188
|
+
# Step 2: Process ALL documents for macro definitions first
|
|
189
|
+
# This allows macros to be defined in multi-document YAML
|
|
190
|
+
# before the pipeline that uses them
|
|
191
|
+
for doc in documents:
|
|
192
|
+
if isinstance(doc, dict) and doc.get("kind") == "Macro":
|
|
193
|
+
# Process includes for macro definitions (but skip template rendering)
|
|
194
|
+
# Templates in macros should be preserved for expansion-time rendering
|
|
195
|
+
processed_doc = doc
|
|
196
|
+
for plugin in self.preprocess_plugins:
|
|
197
|
+
# Skip TemplatePlugin for Macro definitions - templates should be preserved
|
|
198
|
+
if not isinstance(plugin, TemplatePlugin):
|
|
199
|
+
processed_doc = plugin.process(processed_doc)
|
|
200
|
+
# Validate and process macro definition
|
|
201
|
+
processed_doc = self._validate_config(processed_doc)
|
|
202
|
+
# Register macro (MacroDefinitionPlugin handles this)
|
|
203
|
+
self._process_macro_definitions([processed_doc])
|
|
204
|
+
|
|
205
|
+
# Step 3: Select pipeline environment
|
|
206
|
+
config = self._select_environment(documents, environment)
|
|
207
|
+
|
|
208
|
+
# Skip if selected document is a Macro (already processed above)
|
|
209
|
+
if config.get("kind") == "Macro":
|
|
210
|
+
# Return empty graph for macro-only documents
|
|
211
|
+
logger.info("Document contains only macro definitions, no pipeline to build")
|
|
212
|
+
return DirectedGraph(), PipelineConfig(
|
|
213
|
+
ports={}, type_ports={}, policies={}, metadata={}, nodes=[]
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Step 4: Preprocess FIRST (includes must resolve before validation)
|
|
217
|
+
for plugin in self.preprocess_plugins:
|
|
218
|
+
config = plugin.process(config)
|
|
219
|
+
|
|
220
|
+
# Step 4.5: Register user-defined aliases BEFORE validation
|
|
221
|
+
# so that custom node kinds can pass validation
|
|
222
|
+
self._register_aliases(config)
|
|
223
|
+
|
|
224
|
+
# Step 5: Validate structure (after includes are resolved)
|
|
225
|
+
config = self._validate_config(config)
|
|
226
|
+
|
|
227
|
+
# Step 6: Build graph
|
|
228
|
+
graph = self._build_graph(config)
|
|
229
|
+
|
|
230
|
+
# Step 7: Extract pipeline config
|
|
231
|
+
pipeline_config = self._extract_pipeline_config(config)
|
|
232
|
+
|
|
233
|
+
logger.info(
|
|
234
|
+
"✅ Built pipeline '{name}' with {nodes} nodes, {ports} ports, {policies} policies",
|
|
235
|
+
name=pipeline_config.metadata.get("name", "unknown"),
|
|
236
|
+
nodes=len(graph.nodes),
|
|
237
|
+
ports=len(pipeline_config.ports),
|
|
238
|
+
policies=len(pipeline_config.policies),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return graph, pipeline_config
|
|
242
|
+
|
|
243
|
+
# --- Core Logic ---
|
|
244
|
+
|
|
245
|
+
def _parse_yaml(self, yaml_content: str, use_cache: bool) -> list[dict[str, Any]]:
|
|
246
|
+
"""Parse YAML into list of documents."""
|
|
247
|
+
if "---" in yaml_content:
|
|
248
|
+
return list(yaml.safe_load_all(yaml_content))
|
|
249
|
+
parsed = _parse_yaml_cached(yaml_content) if use_cache else yaml.safe_load(yaml_content)
|
|
250
|
+
return [parsed]
|
|
251
|
+
|
|
252
|
+
def _select_environment(
|
|
253
|
+
self, documents: list[dict[str, Any]], environment: str | None
|
|
254
|
+
) -> dict[str, Any]:
|
|
255
|
+
"""Select document by environment name.
|
|
256
|
+
|
|
257
|
+
Skips Macro definitions when selecting the Pipeline document.
|
|
258
|
+
"""
|
|
259
|
+
# Filter out Macro definitions - they're processed separately
|
|
260
|
+
pipeline_docs = [doc for doc in documents if doc.get("kind") != "Macro"]
|
|
261
|
+
|
|
262
|
+
if not pipeline_docs:
|
|
263
|
+
# No pipeline documents, return first macro (for macro-only files)
|
|
264
|
+
return documents[0]
|
|
265
|
+
|
|
266
|
+
if environment:
|
|
267
|
+
for doc in pipeline_docs:
|
|
268
|
+
if doc.get("metadata", {}).get("namespace") == environment:
|
|
269
|
+
logger.info(f"Selected environment '{environment}' from multi-document YAML")
|
|
270
|
+
return doc
|
|
271
|
+
|
|
272
|
+
available_envs = [
|
|
273
|
+
doc.get("metadata", {}).get("namespace", "default") for doc in pipeline_docs
|
|
274
|
+
]
|
|
275
|
+
raise YamlPipelineBuilderError(
|
|
276
|
+
f"Environment '{environment}' not found in YAML. "
|
|
277
|
+
f"Available environments: {', '.join(available_envs)}"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if len(pipeline_docs) > 1:
|
|
281
|
+
logger.warning(
|
|
282
|
+
f"Multi-document YAML detected ({len(pipeline_docs)} pipeline documents) "
|
|
283
|
+
"but no environment specified. "
|
|
284
|
+
"Using first pipeline. Specify environment parameter to select specific config."
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return pipeline_docs[0]
|
|
288
|
+
|
|
289
|
+
def _validate_config(self, config: Any) -> dict[str, Any]:
|
|
290
|
+
"""Validate YAML structure."""
|
|
291
|
+
if not isinstance(config, dict):
|
|
292
|
+
raise YamlPipelineBuilderError(
|
|
293
|
+
f"YAML document must be a dictionary, got {type(config).__name__}"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
self._validate_manifest_format(config)
|
|
297
|
+
|
|
298
|
+
result = self.validator.validate(config)
|
|
299
|
+
if not result.is_valid:
|
|
300
|
+
errors = "\n".join(f" ERROR: {error}" for error in result.errors)
|
|
301
|
+
raise YamlPipelineBuilderError(f"YAML validation failed:\n{errors}")
|
|
302
|
+
|
|
303
|
+
for warning in result.warnings:
|
|
304
|
+
logger.warning(f"YAML validation warning: {warning}")
|
|
305
|
+
|
|
306
|
+
return config
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def _validate_manifest_format(config: dict[str, Any]) -> None:
|
|
310
|
+
"""Validate declarative manifest format."""
|
|
311
|
+
if "kind" not in config:
|
|
312
|
+
raise YamlPipelineBuilderError(
|
|
313
|
+
"YAML must use declarative manifest format with 'kind' field. "
|
|
314
|
+
"Example:\n"
|
|
315
|
+
"apiVersion: v1\n"
|
|
316
|
+
"kind: Pipeline\n"
|
|
317
|
+
"metadata:\n"
|
|
318
|
+
" name: my-pipeline\n"
|
|
319
|
+
"spec:\n"
|
|
320
|
+
" nodes: [...]"
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Macro definitions have different structure (no spec field)
|
|
324
|
+
kind = config.get("kind")
|
|
325
|
+
if kind == "Macro":
|
|
326
|
+
# Macro has: metadata, parameters, nodes (no spec)
|
|
327
|
+
if "metadata" not in config:
|
|
328
|
+
raise YamlPipelineBuilderError("Macro definition must have 'metadata' field")
|
|
329
|
+
if "nodes" not in config:
|
|
330
|
+
raise YamlPipelineBuilderError("Macro definition must have 'nodes' field")
|
|
331
|
+
else:
|
|
332
|
+
# Pipeline and other kinds require spec
|
|
333
|
+
if "spec" not in config:
|
|
334
|
+
raise YamlPipelineBuilderError("Manifest YAML must have 'spec' field")
|
|
335
|
+
|
|
336
|
+
if "metadata" not in config:
|
|
337
|
+
raise YamlPipelineBuilderError("Manifest YAML must have 'metadata' field")
|
|
338
|
+
|
|
339
|
+
def _process_macro_definitions(self, macro_configs: list[dict[str, Any]]) -> None:
|
|
340
|
+
"""Process macro definitions and register them.
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
macro_configs : list[dict[str, Any]]
|
|
345
|
+
List of validated macro configuration dictionaries
|
|
346
|
+
"""
|
|
347
|
+
# Create temporary graph (not used for macros, but required by plugin interface)
|
|
348
|
+
temp_graph = DirectedGraph()
|
|
349
|
+
|
|
350
|
+
# Find MacroDefinitionPlugin
|
|
351
|
+
macro_plugin = next(
|
|
352
|
+
(p for p in self.entity_plugins if isinstance(p, MacroDefinitionPlugin)), None
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
if macro_plugin is None:
|
|
356
|
+
raise YamlPipelineBuilderError(
|
|
357
|
+
"MacroDefinitionPlugin not found. Cannot process macro definitions."
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
for macro_config in macro_configs:
|
|
361
|
+
# Wrap in a fake node config format expected by entity plugin
|
|
362
|
+
node_config = macro_config
|
|
363
|
+
macro_plugin.build(node_config, self, temp_graph)
|
|
364
|
+
|
|
365
|
+
def _register_aliases(self, config: dict[str, Any]) -> None:
|
|
366
|
+
"""Register user-defined aliases from spec.aliases before validation.
|
|
367
|
+
|
|
368
|
+
This allows custom node kinds to be used in YAML without requiring
|
|
369
|
+
full module paths.
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
config : dict[str, Any]
|
|
374
|
+
The YAML configuration dict
|
|
375
|
+
"""
|
|
376
|
+
from hexdag.core.resolver import register_alias
|
|
377
|
+
|
|
378
|
+
spec = config.get("spec", {})
|
|
379
|
+
aliases = spec.get("aliases", {})
|
|
380
|
+
|
|
381
|
+
for alias, full_path in aliases.items():
|
|
382
|
+
register_alias(alias, full_path)
|
|
383
|
+
logger.debug(f"Registered alias: {alias} -> {full_path}")
|
|
384
|
+
|
|
385
|
+
def _build_graph(self, config: dict[str, Any]) -> DirectedGraph:
|
|
386
|
+
"""Build DirectedGraph using entity plugins."""
|
|
387
|
+
graph = DirectedGraph()
|
|
388
|
+
spec = config.get("spec", {})
|
|
389
|
+
nodes_list = spec.get("nodes", [])
|
|
390
|
+
|
|
391
|
+
for node_config in nodes_list:
|
|
392
|
+
# Find plugin that can handle this entity
|
|
393
|
+
for plugin in self.entity_plugins:
|
|
394
|
+
if plugin.can_handle(node_config):
|
|
395
|
+
result = plugin.build(node_config, self, graph)
|
|
396
|
+
if result is not None:
|
|
397
|
+
graph += result
|
|
398
|
+
break
|
|
399
|
+
else:
|
|
400
|
+
# No plugin handled it - error
|
|
401
|
+
kind = node_config.get("kind", "unknown")
|
|
402
|
+
raise YamlPipelineBuilderError(f"No plugin can handle kind: {kind}")
|
|
403
|
+
|
|
404
|
+
return graph
|
|
405
|
+
|
|
406
|
+
@staticmethod
|
|
407
|
+
def _extract_pipeline_config(config: dict[str, Any]) -> PipelineConfig:
|
|
408
|
+
"""Extract PipelineConfig from YAML."""
|
|
409
|
+
spec = config.get("spec", {})
|
|
410
|
+
metadata = config.get("metadata", {})
|
|
411
|
+
|
|
412
|
+
return PipelineConfig(
|
|
413
|
+
ports=spec.get("ports", {}),
|
|
414
|
+
type_ports=spec.get("type_ports", {}),
|
|
415
|
+
policies=spec.get("policies", {}),
|
|
416
|
+
metadata=metadata,
|
|
417
|
+
nodes=spec.get("nodes", []),
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# ============================================================================
|
|
422
|
+
# Entity Plugins - Each handles one entity type
|
|
423
|
+
# ============================================================================
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class MacroDefinitionPlugin:
|
|
427
|
+
"""Plugin for handling Macro definitions (kind: Macro).
|
|
428
|
+
|
|
429
|
+
This plugin processes YAML macro definitions and registers them in the
|
|
430
|
+
component registry for later invocation. Macro definitions don't add
|
|
431
|
+
nodes to the graph - they just register reusable templates.
|
|
432
|
+
|
|
433
|
+
Examples
|
|
434
|
+
--------
|
|
435
|
+
YAML macro definition::
|
|
436
|
+
|
|
437
|
+
apiVersion: hexdag/v1
|
|
438
|
+
kind: Macro
|
|
439
|
+
metadata:
|
|
440
|
+
name: retry_workflow
|
|
441
|
+
description: Retry logic with exponential backoff
|
|
442
|
+
parameters:
|
|
443
|
+
- name: max_retries
|
|
444
|
+
type: int
|
|
445
|
+
default: 3
|
|
446
|
+
nodes:
|
|
447
|
+
- kind: function_node
|
|
448
|
+
metadata:
|
|
449
|
+
name: "{{name}}_attempt"
|
|
450
|
+
spec:
|
|
451
|
+
fn: "{{fn}}"
|
|
452
|
+
"""
|
|
453
|
+
|
|
454
|
+
def can_handle(self, node_config: dict[str, Any]) -> bool:
|
|
455
|
+
"""Handle Macro kind."""
|
|
456
|
+
return node_config.get("kind") == "Macro"
|
|
457
|
+
|
|
458
|
+
def build(
|
|
459
|
+
self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
|
|
460
|
+
) -> NodeSpec | None:
|
|
461
|
+
"""Register YAML macro in component registry.
|
|
462
|
+
|
|
463
|
+
Parameters
|
|
464
|
+
----------
|
|
465
|
+
node_config : dict[str, Any]
|
|
466
|
+
Macro definition configuration
|
|
467
|
+
builder : YamlPipelineBuilder
|
|
468
|
+
Builder instance (unused)
|
|
469
|
+
graph : DirectedGraph
|
|
470
|
+
Graph instance (unused - definitions don't add nodes)
|
|
471
|
+
|
|
472
|
+
Returns
|
|
473
|
+
-------
|
|
474
|
+
None
|
|
475
|
+
Macro definitions don't add nodes to the graph
|
|
476
|
+
|
|
477
|
+
Raises
|
|
478
|
+
------
|
|
479
|
+
YamlPipelineBuilderError
|
|
480
|
+
If macro definition is invalid
|
|
481
|
+
"""
|
|
482
|
+
# Import here to avoid circular dependency
|
|
483
|
+
from hexdag.core.yaml_macro import YamlMacro, YamlMacroConfig, YamlMacroParameterSpec
|
|
484
|
+
|
|
485
|
+
# Extract metadata
|
|
486
|
+
metadata = node_config.get("metadata", {})
|
|
487
|
+
macro_name = metadata.get("name")
|
|
488
|
+
if not macro_name:
|
|
489
|
+
raise YamlPipelineBuilderError("Macro definition missing 'metadata.name'")
|
|
490
|
+
|
|
491
|
+
macro_description = metadata.get("description")
|
|
492
|
+
_ = metadata.get("namespace", "user") # Reserved for future namespace support
|
|
493
|
+
|
|
494
|
+
# Extract parameters
|
|
495
|
+
raw_parameters = node_config.get("parameters", [])
|
|
496
|
+
parameters = [YamlMacroParameterSpec(**p) for p in raw_parameters]
|
|
497
|
+
|
|
498
|
+
# Extract nodes
|
|
499
|
+
nodes = node_config.get("nodes", [])
|
|
500
|
+
if not nodes:
|
|
501
|
+
raise YamlPipelineBuilderError(
|
|
502
|
+
f"Macro '{macro_name}' has no nodes. Macros must define at least one node."
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Extract outputs (optional)
|
|
506
|
+
outputs = node_config.get("outputs")
|
|
507
|
+
|
|
508
|
+
# Create YamlMacroConfig
|
|
509
|
+
macro_config = YamlMacroConfig(
|
|
510
|
+
macro_name=macro_name,
|
|
511
|
+
macro_description=macro_description,
|
|
512
|
+
parameters=parameters,
|
|
513
|
+
nodes=nodes,
|
|
514
|
+
outputs=outputs,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Register macro at runtime
|
|
518
|
+
# Create a dynamic class that pre-fills the YamlMacro config
|
|
519
|
+
config_dict = macro_config.model_dump()
|
|
520
|
+
|
|
521
|
+
class DynamicYamlMacro(YamlMacro):
|
|
522
|
+
"""Dynamically generated YamlMacro with pre-filled configuration."""
|
|
523
|
+
|
|
524
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
525
|
+
# Merge pre-filled config with any override kwargs
|
|
526
|
+
merged_config = {**config_dict, **kwargs}
|
|
527
|
+
super().__init__(**merged_config)
|
|
528
|
+
|
|
529
|
+
# Set class name for better debugging
|
|
530
|
+
DynamicYamlMacro.__name__ = f"YamlMacro_{macro_name}"
|
|
531
|
+
DynamicYamlMacro.__qualname__ = f"YamlMacro_{macro_name}"
|
|
532
|
+
|
|
533
|
+
# Register in runtime storage (for YAML-defined macros)
|
|
534
|
+
register_runtime(macro_name, DynamicYamlMacro)
|
|
535
|
+
|
|
536
|
+
logger.info(
|
|
537
|
+
f"✅ Registered YAML macro '{macro_name}' "
|
|
538
|
+
f"({len(parameters)} parameters, {len(nodes)} nodes)"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# Return None - macro definitions don't add nodes to the graph
|
|
542
|
+
return None
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
class MacroEntityPlugin:
|
|
546
|
+
"""Plugin for handling macro_invocation entities."""
|
|
547
|
+
|
|
548
|
+
def can_handle(self, node_config: dict[str, Any]) -> bool:
|
|
549
|
+
"""Handle macro_invocation kind."""
|
|
550
|
+
return node_config.get("kind") == "macro_invocation"
|
|
551
|
+
|
|
552
|
+
def build(
|
|
553
|
+
self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
|
|
554
|
+
) -> NodeSpec | None:
|
|
555
|
+
"""Expand macro into subgraph and merge into main graph."""
|
|
556
|
+
instance_name = node_config["metadata"]["name"]
|
|
557
|
+
spec = node_config.get("spec", {})
|
|
558
|
+
macro_ref = spec.get("macro")
|
|
559
|
+
if not macro_ref:
|
|
560
|
+
raise YamlPipelineBuilderError(f"Macro '{instance_name}' missing spec.macro field")
|
|
561
|
+
|
|
562
|
+
# macro_ref is the full module path (e.g., hexdag.builtin.macros.ReasoningAgentMacro)
|
|
563
|
+
# or a runtime-registered name for YAML-defined macros
|
|
564
|
+
|
|
565
|
+
# Get config params for macro initialization
|
|
566
|
+
config_params = spec.get("config", {}).copy()
|
|
567
|
+
inputs = spec.get("inputs", {})
|
|
568
|
+
dependencies = spec.get("dependencies", [])
|
|
569
|
+
|
|
570
|
+
# Resolve macro class - either full module path or runtime-registered name
|
|
571
|
+
try:
|
|
572
|
+
macro_cls = resolve(macro_ref)
|
|
573
|
+
except ResolveError as e:
|
|
574
|
+
raise YamlPipelineBuilderError(f"Macro '{macro_ref}' not found: {e}") from e
|
|
575
|
+
|
|
576
|
+
# Instantiate macro with config params
|
|
577
|
+
try:
|
|
578
|
+
macro_instance_obj = macro_cls(**config_params)
|
|
579
|
+
except Exception as e:
|
|
580
|
+
raise YamlPipelineBuilderError(f"Failed to instantiate macro '{macro_ref}': {e}") from e
|
|
581
|
+
|
|
582
|
+
# Validate it's actually a macro
|
|
583
|
+
if not isinstance(macro_instance_obj, ConfigurableMacro):
|
|
584
|
+
type_name = type(macro_instance_obj).__name__
|
|
585
|
+
raise YamlPipelineBuilderError(
|
|
586
|
+
f"Component '{macro_ref}' is not a ConfigurableMacro (got {type_name})"
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
macro_instance: ConfigurableMacro = macro_instance_obj
|
|
590
|
+
|
|
591
|
+
# Expand macro - merge config and inputs for validation
|
|
592
|
+
# config params are the parameter values for the macro
|
|
593
|
+
# inputs are the value mappings, so they should be merged
|
|
594
|
+
macro_inputs = {**config_params, **inputs}
|
|
595
|
+
|
|
596
|
+
# Expand macro
|
|
597
|
+
try:
|
|
598
|
+
subgraph = macro_instance.expand(
|
|
599
|
+
instance_name=instance_name, inputs=macro_inputs, dependencies=dependencies
|
|
600
|
+
)
|
|
601
|
+
except ValueError:
|
|
602
|
+
# Re-raise validation errors directly (e.g., required parameter, enum validation)
|
|
603
|
+
raise
|
|
604
|
+
except Exception as e:
|
|
605
|
+
raise YamlPipelineBuilderError(
|
|
606
|
+
f"Failed to expand macro '{macro_ref}' (instance '{instance_name}'): {e}"
|
|
607
|
+
) from e
|
|
608
|
+
|
|
609
|
+
# Merge subgraph into main graph
|
|
610
|
+
self._merge_subgraph(graph, subgraph, dependencies)
|
|
611
|
+
|
|
612
|
+
logger.info(
|
|
613
|
+
"✅ Expanded macro '{macro}' as '{instance}' ({nodes} nodes)",
|
|
614
|
+
macro=macro_ref,
|
|
615
|
+
instance=instance_name,
|
|
616
|
+
nodes=len(subgraph.nodes),
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Return None - subgraph already merged into graph
|
|
620
|
+
return None
|
|
621
|
+
|
|
622
|
+
@staticmethod
|
|
623
|
+
def _merge_subgraph(
|
|
624
|
+
graph: DirectedGraph, subgraph: DirectedGraph, external_deps: list[str]
|
|
625
|
+
) -> None:
|
|
626
|
+
"""Merge subgraph into main graph with external dependencies.
|
|
627
|
+
|
|
628
|
+
Optimized to avoid unnecessary graph copies when no external dependencies exist.
|
|
629
|
+
"""
|
|
630
|
+
if not external_deps:
|
|
631
|
+
# Fast path: direct merge when no external dependencies
|
|
632
|
+
graph |= subgraph
|
|
633
|
+
else:
|
|
634
|
+
# Only process entry nodes that need external dependencies
|
|
635
|
+
# Use in-place merge for better performance
|
|
636
|
+
for node in subgraph.nodes.values():
|
|
637
|
+
if not subgraph.get_dependencies(node.name):
|
|
638
|
+
# Entry node - add external dependencies
|
|
639
|
+
graph += node.after(*external_deps)
|
|
640
|
+
else:
|
|
641
|
+
# Internal node - add as-is
|
|
642
|
+
graph += node
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
class NodeEntityPlugin:
|
|
646
|
+
"""Plugin for handling all node types (llm, function, agent, etc.)."""
|
|
647
|
+
|
|
648
|
+
def __init__(self, builder: YamlPipelineBuilder):
|
|
649
|
+
"""Initialize with reference to builder for shared state."""
|
|
650
|
+
self.builder = builder
|
|
651
|
+
|
|
652
|
+
def can_handle(self, node_config: dict[str, Any]) -> bool:
|
|
653
|
+
"""Handle everything except macro_invocation."""
|
|
654
|
+
return node_config.get("kind") != "macro_invocation"
|
|
655
|
+
|
|
656
|
+
def build(
|
|
657
|
+
self, node_config: dict[str, Any], builder: YamlPipelineBuilder, graph: DirectedGraph
|
|
658
|
+
) -> NodeSpec:
|
|
659
|
+
"""Build node from config.
|
|
660
|
+
|
|
661
|
+
The 'kind' field must be a full module path to the node factory class.
|
|
662
|
+
Example: hexdag.builtin.nodes.LLMNode
|
|
663
|
+
"""
|
|
664
|
+
# Validate structure
|
|
665
|
+
if "kind" not in node_config:
|
|
666
|
+
raise YamlPipelineBuilderError("Node missing 'kind' field")
|
|
667
|
+
if "metadata" not in node_config or "name" not in node_config["metadata"]:
|
|
668
|
+
raise YamlPipelineBuilderError(
|
|
669
|
+
f"Node '{node_config.get('kind')}' missing metadata.name"
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
kind = node_config["kind"]
|
|
673
|
+
node_id = node_config["metadata"]["name"]
|
|
674
|
+
spec = node_config.get("spec", {}).copy()
|
|
675
|
+
# Dependencies can be at node level or inside spec (for backwards compatibility)
|
|
676
|
+
deps = node_config.get("dependencies", []) or spec.pop("dependencies", [])
|
|
677
|
+
|
|
678
|
+
# Resolve factory class from full module path
|
|
679
|
+
try:
|
|
680
|
+
factory_obj = resolve(kind)
|
|
681
|
+
except ResolveError as e:
|
|
682
|
+
raise YamlPipelineBuilderError(f"Cannot resolve node kind '{kind}': {e}") from e
|
|
683
|
+
|
|
684
|
+
# Validate it's callable
|
|
685
|
+
if not callable(factory_obj):
|
|
686
|
+
raise YamlPipelineBuilderError(
|
|
687
|
+
f"Node factory '{kind}' is not callable (got {type(factory_obj).__name__})"
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
# Handle factory classes vs factory functions
|
|
691
|
+
# Factory classes need to be instantiated first, then called
|
|
692
|
+
# Factory functions can be called directly
|
|
693
|
+
if isinstance(factory_obj, type):
|
|
694
|
+
# It's a class - instantiate then call
|
|
695
|
+
factory_instance = factory_obj()
|
|
696
|
+
factory = cast("Callable[..., NodeSpec]", factory_instance)
|
|
697
|
+
else:
|
|
698
|
+
# It's already a callable (function or instance)
|
|
699
|
+
factory = cast("Callable[..., NodeSpec]", factory_obj) # type: ignore[unreachable]
|
|
700
|
+
|
|
701
|
+
# Create node - pass name as first positional arg
|
|
702
|
+
node: NodeSpec = factory(node_id, **spec)
|
|
703
|
+
|
|
704
|
+
# Add dependencies
|
|
705
|
+
if deps:
|
|
706
|
+
node = node.after(*deps) if isinstance(deps, list) else node.after(deps)
|
|
707
|
+
|
|
708
|
+
return node
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
# ============================================================================
|
|
712
|
+
# Preprocessing Plugins
|
|
713
|
+
# ============================================================================
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
class IncludePreprocessPlugin:
|
|
717
|
+
"""Resolve !include directives for YAML file inclusion.
|
|
718
|
+
|
|
719
|
+
Supports two syntaxes:
|
|
720
|
+
1. Simple include: !include path/to/file.yaml
|
|
721
|
+
2. Anchor include: !include path/to/file.yaml#anchor_name
|
|
722
|
+
|
|
723
|
+
Security:
|
|
724
|
+
- Only allows relative paths (no absolute paths)
|
|
725
|
+
- Prevents directory traversal attacks (no ../ beyond project root)
|
|
726
|
+
- Detects circular includes
|
|
727
|
+
|
|
728
|
+
For comprehensive examples, see notebooks/03_yaml_includes_and_composition.ipynb
|
|
729
|
+
"""
|
|
730
|
+
|
|
731
|
+
def __init__(self, base_path: Path | None = None, max_depth: int = 10):
|
|
732
|
+
"""Initialize include plugin.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
base_path: Base directory for relative includes (changeable via context manager)
|
|
736
|
+
max_depth: Maximum include nesting depth to prevent circular includes
|
|
737
|
+
"""
|
|
738
|
+
self.base_path = base_path or Path.cwd()
|
|
739
|
+
self.project_root = self.base_path # Fixed project root for security validation
|
|
740
|
+
self.max_depth = max_depth
|
|
741
|
+
|
|
742
|
+
def process(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
743
|
+
"""Process !include directives recursively."""
|
|
744
|
+
# Create new include stack for this processing run (thread-safe)
|
|
745
|
+
include_stack: list[Path] = []
|
|
746
|
+
result = self._resolve_includes(
|
|
747
|
+
config, self.base_path, depth=0, include_stack=include_stack
|
|
748
|
+
)
|
|
749
|
+
if not _is_dict_config(result):
|
|
750
|
+
raise TypeError(
|
|
751
|
+
f"Include processing must return a dictionary, got {type(result).__name__}. "
|
|
752
|
+
"Check that your included files resolve to valid YAML dictionaries."
|
|
753
|
+
)
|
|
754
|
+
return result
|
|
755
|
+
|
|
756
|
+
def _resolve_includes(
|
|
757
|
+
self, obj: Any, current_base: Path, depth: int, include_stack: list[Path]
|
|
758
|
+
) -> dict[str, Any] | list[Any] | Any:
|
|
759
|
+
"""Recursively resolve !include directives.
|
|
760
|
+
|
|
761
|
+
Args:
|
|
762
|
+
obj: Object to process (dict, list, or primitive)
|
|
763
|
+
current_base: Base path for resolving relative includes
|
|
764
|
+
depth: Current recursion depth
|
|
765
|
+
include_stack: Stack of currently processing files (for circular detection)
|
|
766
|
+
|
|
767
|
+
Returns:
|
|
768
|
+
Processed object with includes resolved
|
|
769
|
+
"""
|
|
770
|
+
if depth > self.max_depth:
|
|
771
|
+
raise YamlPipelineBuilderError(
|
|
772
|
+
f"Include nesting too deep (max {self.max_depth}). "
|
|
773
|
+
f"Possible circular include in: {' -> '.join(str(p) for p in include_stack)}"
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
if isinstance(obj, dict):
|
|
777
|
+
# Check for !include directive
|
|
778
|
+
if "!include" in obj and len(obj) == 1:
|
|
779
|
+
include_spec = obj["!include"]
|
|
780
|
+
return self._load_include(include_spec, current_base, depth, include_stack)
|
|
781
|
+
|
|
782
|
+
# Recurse into dict values
|
|
783
|
+
return {
|
|
784
|
+
k: self._resolve_includes(v, current_base, depth, include_stack)
|
|
785
|
+
for k, v in obj.items()
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
if isinstance(obj, list):
|
|
789
|
+
# Process each list item and flatten nested lists from includes
|
|
790
|
+
result = []
|
|
791
|
+
for item in obj:
|
|
792
|
+
resolved = self._resolve_includes(item, current_base, depth, include_stack)
|
|
793
|
+
# Flatten: if an include returns a list, extend rather than append
|
|
794
|
+
if isinstance(resolved, list):
|
|
795
|
+
result.extend(resolved)
|
|
796
|
+
else:
|
|
797
|
+
result.append(resolved)
|
|
798
|
+
return result
|
|
799
|
+
|
|
800
|
+
return obj
|
|
801
|
+
|
|
802
|
+
def _load_include(
|
|
803
|
+
self, include_spec: str, current_base: Path, depth: int, include_stack: list[Path]
|
|
804
|
+
) -> Any:
|
|
805
|
+
"""Load content from included file.
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
include_spec: Include specification (e.g., "file.yaml" or "file.yaml#anchor")
|
|
809
|
+
current_base: Base path for resolving relative paths
|
|
810
|
+
depth: Current recursion depth
|
|
811
|
+
include_stack: Stack of currently processing files (for circular detection)
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
Loaded and processed content from included file
|
|
815
|
+
"""
|
|
816
|
+
# Parse include specification (strip whitespace for better UX)
|
|
817
|
+
include_spec = include_spec.strip()
|
|
818
|
+
if "#" in include_spec:
|
|
819
|
+
file_path_str, anchor = include_spec.split("#", 1)
|
|
820
|
+
file_path_str = file_path_str.strip()
|
|
821
|
+
anchor = anchor.strip()
|
|
822
|
+
else:
|
|
823
|
+
file_path_str, anchor = include_spec, None
|
|
824
|
+
|
|
825
|
+
# Resolve file path
|
|
826
|
+
file_path = self._resolve_path(file_path_str, current_base)
|
|
827
|
+
|
|
828
|
+
# Check for circular includes
|
|
829
|
+
if file_path in include_stack:
|
|
830
|
+
cycle = " -> ".join(str(p) for p in include_stack + [file_path])
|
|
831
|
+
raise YamlPipelineBuilderError(f"Circular include detected: {cycle}")
|
|
832
|
+
|
|
833
|
+
# Load YAML file
|
|
834
|
+
try:
|
|
835
|
+
include_stack.append(file_path)
|
|
836
|
+
content = yaml.safe_load(file_path.read_text(encoding="utf-8"))
|
|
837
|
+
|
|
838
|
+
# Extract anchor if specified
|
|
839
|
+
if anchor:
|
|
840
|
+
if not isinstance(content, dict) or anchor not in content:
|
|
841
|
+
raise YamlPipelineBuilderError(
|
|
842
|
+
f"Anchor '{anchor}' not found in {file_path}. "
|
|
843
|
+
f"Available: {list(content.keys()) if isinstance(content, dict) else 'N/A'}"
|
|
844
|
+
)
|
|
845
|
+
content = content[anchor]
|
|
846
|
+
|
|
847
|
+
# Recursively resolve includes in loaded content
|
|
848
|
+
return self._resolve_includes(content, file_path.parent, depth + 1, include_stack)
|
|
849
|
+
|
|
850
|
+
except FileNotFoundError as e:
|
|
851
|
+
raise YamlPipelineBuilderError(
|
|
852
|
+
f"Include file not found: {file_path}\nSearched relative to: {current_base}"
|
|
853
|
+
) from e
|
|
854
|
+
except yaml.YAMLError as e:
|
|
855
|
+
raise YamlPipelineBuilderError(f"Invalid YAML in included file {file_path}: {e}") from e
|
|
856
|
+
finally:
|
|
857
|
+
include_stack.pop()
|
|
858
|
+
|
|
859
|
+
def _resolve_path(self, path_str: str, current_base: Path) -> Path:
|
|
860
|
+
"""Resolve and validate include path.
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
path_str: Path string from !include directive
|
|
864
|
+
current_base: Base path for resolving relative paths
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
Validated absolute path
|
|
868
|
+
|
|
869
|
+
Raises:
|
|
870
|
+
YamlPipelineBuilderError: If path is invalid or potentially malicious
|
|
871
|
+
"""
|
|
872
|
+
# Prevent absolute paths
|
|
873
|
+
if Path(path_str).is_absolute():
|
|
874
|
+
raise YamlPipelineBuilderError(
|
|
875
|
+
f"Absolute paths not allowed in !include: {path_str}\n"
|
|
876
|
+
"Use relative paths only for security."
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
# Resolve path relative to current base
|
|
880
|
+
resolved = (current_base / path_str).resolve()
|
|
881
|
+
|
|
882
|
+
# Prevent directory traversal outside project root
|
|
883
|
+
# Use the resolved project_root (not base_path) to handle symlinks properly
|
|
884
|
+
resolved_root = self.project_root.resolve()
|
|
885
|
+
try:
|
|
886
|
+
resolved.relative_to(resolved_root)
|
|
887
|
+
except ValueError as e:
|
|
888
|
+
raise YamlPipelineBuilderError(
|
|
889
|
+
f"Include path traverses outside project root: {path_str}\n"
|
|
890
|
+
f"Project root: {resolved_root}\n"
|
|
891
|
+
f"Attempted path: {resolved}"
|
|
892
|
+
) from e
|
|
893
|
+
|
|
894
|
+
return resolved
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
class EnvironmentVariablePlugin:
|
|
898
|
+
"""Resolve ${VAR} and ${VAR:default} in YAML with deferred secret resolution.
|
|
899
|
+
|
|
900
|
+
For KeyVault/SecretPort workflows, secret-like environment variables are
|
|
901
|
+
preserved as ${VAR} for runtime resolution. This allows:
|
|
902
|
+
- Building pipelines without secrets present
|
|
903
|
+
- Runtime secret injection via SecretPort → Memory
|
|
904
|
+
- Separation of build and deployment contexts
|
|
905
|
+
|
|
906
|
+
Secret patterns (deferred to runtime):
|
|
907
|
+
- *_API_KEY, *_SECRET, *_TOKEN, *_PASSWORD, *_CREDENTIAL
|
|
908
|
+
- SECRET_*
|
|
909
|
+
|
|
910
|
+
Non-secret variables are resolved immediately at build-time.
|
|
911
|
+
"""
|
|
912
|
+
|
|
913
|
+
ENV_VAR_PATTERN = re.compile(r"\$\{([A-Z_][A-Z0-9_]*?)(?::([^}]*))?\}")
|
|
914
|
+
|
|
915
|
+
# Secret patterns that should be deferred to runtime
|
|
916
|
+
SECRET_PATTERNS = frozenset({
|
|
917
|
+
r".*_API_KEY$",
|
|
918
|
+
r".*_SECRET$",
|
|
919
|
+
r".*_TOKEN$",
|
|
920
|
+
r".*_PASSWORD$",
|
|
921
|
+
r".*_CREDENTIAL$",
|
|
922
|
+
r"^SECRET_.*",
|
|
923
|
+
})
|
|
924
|
+
|
|
925
|
+
def __init__(self, defer_secrets: bool = True):
|
|
926
|
+
"""Initialize environment variable plugin.
|
|
927
|
+
|
|
928
|
+
Parameters
|
|
929
|
+
----------
|
|
930
|
+
defer_secrets : bool, default=True
|
|
931
|
+
If True, preserve ${VAR} syntax for secret-like variables,
|
|
932
|
+
allowing runtime resolution from KeyVault/Memory.
|
|
933
|
+
If False, all variables are resolved at build-time (legacy behavior).
|
|
934
|
+
"""
|
|
935
|
+
self.defer_secrets = defer_secrets
|
|
936
|
+
if defer_secrets:
|
|
937
|
+
# Compile secret detection regex
|
|
938
|
+
self._secret_regex: re.Pattern[str] | None = re.compile(
|
|
939
|
+
"|".join(f"({p})" for p in self.SECRET_PATTERNS)
|
|
940
|
+
)
|
|
941
|
+
else:
|
|
942
|
+
self._secret_regex = None
|
|
943
|
+
|
|
944
|
+
def process(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
945
|
+
"""Recursively resolve environment variables."""
|
|
946
|
+
result = _resolve_env_vars(
|
|
947
|
+
config,
|
|
948
|
+
self.ENV_VAR_PATTERN,
|
|
949
|
+
secret_regex=self._secret_regex,
|
|
950
|
+
defer_secrets=self.defer_secrets,
|
|
951
|
+
)
|
|
952
|
+
if not _is_dict_config(result):
|
|
953
|
+
raise TypeError(
|
|
954
|
+
f"Environment variable resolution must return a dictionary, "
|
|
955
|
+
f"got {type(result).__name__}"
|
|
956
|
+
)
|
|
957
|
+
return result
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
@singledispatch
|
|
961
|
+
def _resolve_env_vars(
|
|
962
|
+
obj: Any,
|
|
963
|
+
pattern: re.Pattern[str],
|
|
964
|
+
secret_regex: re.Pattern[str] | None = None,
|
|
965
|
+
defer_secrets: bool = True,
|
|
966
|
+
) -> Any:
|
|
967
|
+
"""Recursively resolve ${VAR} in any structure.
|
|
968
|
+
|
|
969
|
+
Parameters
|
|
970
|
+
----------
|
|
971
|
+
obj : Any
|
|
972
|
+
Object to process
|
|
973
|
+
pattern : re.Pattern[str]
|
|
974
|
+
Regex pattern for matching ${VAR} syntax
|
|
975
|
+
secret_regex : re.Pattern[str] | None
|
|
976
|
+
Regex for detecting secret-like variable names
|
|
977
|
+
defer_secrets : bool
|
|
978
|
+
If True, preserve ${VAR} for secrets
|
|
979
|
+
|
|
980
|
+
Returns
|
|
981
|
+
-------
|
|
982
|
+
Any
|
|
983
|
+
- For primitives: Returns the primitive unchanged
|
|
984
|
+
- For strings: Returns str | int | float | bool (with type coercion)
|
|
985
|
+
- For dicts: Returns dict[str, Any]
|
|
986
|
+
- For lists: Returns list[Any]
|
|
987
|
+
"""
|
|
988
|
+
return obj
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
@_resolve_env_vars.register(str)
|
|
992
|
+
def _resolve_env_vars_str(
|
|
993
|
+
obj: str,
|
|
994
|
+
pattern: re.Pattern[str],
|
|
995
|
+
secret_regex: re.Pattern[str] | None = None,
|
|
996
|
+
defer_secrets: bool = True,
|
|
997
|
+
) -> str | int | float | bool:
|
|
998
|
+
"""Resolve ${VAR} in strings with optional secret deferral."""
|
|
999
|
+
|
|
1000
|
+
def replacer(match: re.Match[str]) -> str:
|
|
1001
|
+
var_name, default = match.group(1), match.group(2)
|
|
1002
|
+
|
|
1003
|
+
# Check if this looks like a secret
|
|
1004
|
+
if defer_secrets and secret_regex and secret_regex.match(var_name):
|
|
1005
|
+
# Secret detected - preserve ${VAR} syntax for runtime resolution
|
|
1006
|
+
logger.debug(f"Deferring secret variable to runtime: {var_name}")
|
|
1007
|
+
return match.group(0) # Return original ${VAR} or ${VAR:default}
|
|
1008
|
+
|
|
1009
|
+
# Non-secret - resolve immediately from environment
|
|
1010
|
+
env_value = os.environ.get(var_name)
|
|
1011
|
+
if env_value is None:
|
|
1012
|
+
if default is not None:
|
|
1013
|
+
return default
|
|
1014
|
+
raise YamlPipelineBuilderError(f"Environment variable '${{{var_name}}}' not set")
|
|
1015
|
+
return env_value
|
|
1016
|
+
|
|
1017
|
+
resolved = pattern.sub(replacer, obj)
|
|
1018
|
+
|
|
1019
|
+
# Type coercion only if the value changed (was resolved)
|
|
1020
|
+
if resolved != obj and not (defer_secrets and resolved.startswith("${")):
|
|
1021
|
+
if resolved.lower() in ("true", "yes", "1"):
|
|
1022
|
+
return True
|
|
1023
|
+
if resolved.lower() in ("false", "no", "0"):
|
|
1024
|
+
return False
|
|
1025
|
+
with suppress(ValueError):
|
|
1026
|
+
return int(resolved)
|
|
1027
|
+
with suppress(ValueError):
|
|
1028
|
+
return float(resolved)
|
|
1029
|
+
return resolved
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
@_resolve_env_vars.register(dict)
|
|
1033
|
+
def _resolve_env_vars_dict(
|
|
1034
|
+
obj: dict,
|
|
1035
|
+
pattern: re.Pattern[str],
|
|
1036
|
+
secret_regex: re.Pattern[str] | None = None,
|
|
1037
|
+
defer_secrets: bool = True,
|
|
1038
|
+
) -> dict[str, Any]:
|
|
1039
|
+
"""Resolve ${VAR} in dict values."""
|
|
1040
|
+
return {k: _resolve_env_vars(v, pattern, secret_regex, defer_secrets) for k, v in obj.items()}
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
@_resolve_env_vars.register(list)
|
|
1044
|
+
def _resolve_env_vars_list(
|
|
1045
|
+
obj: list,
|
|
1046
|
+
pattern: re.Pattern[str],
|
|
1047
|
+
secret_regex: re.Pattern[str] | None = None,
|
|
1048
|
+
defer_secrets: bool = True,
|
|
1049
|
+
) -> list[Any]:
|
|
1050
|
+
"""Resolve ${VAR} in list items."""
|
|
1051
|
+
return [_resolve_env_vars(item, pattern, secret_regex, defer_secrets) for item in obj]
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
class TemplatePlugin:
|
|
1055
|
+
"""Render Jinja2 templates in YAML with two-phase rendering strategy.
|
|
1056
|
+
|
|
1057
|
+
**Build-time Rendering** (YAML configuration context):
|
|
1058
|
+
- Metadata fields (e.g., node names, descriptions)
|
|
1059
|
+
- Pipeline-level spec fields (e.g., variables, ports, policies)
|
|
1060
|
+
- Enables dynamic configuration from environment/variables
|
|
1061
|
+
|
|
1062
|
+
**Runtime Rendering** (node execution context):
|
|
1063
|
+
- Node spec fields (e.g., template, prompt_template, initial_prompt)
|
|
1064
|
+
- Pipeline outputs (e.g., spec.outputs with {{node.result}} references)
|
|
1065
|
+
- Preserved to allow access to dependency/node outputs at runtime
|
|
1066
|
+
- Enables dynamic prompts and output mapping based on execution results
|
|
1067
|
+
|
|
1068
|
+
Example:
|
|
1069
|
+
```yaml
|
|
1070
|
+
spec:
|
|
1071
|
+
variables:
|
|
1072
|
+
node_name: analyzer
|
|
1073
|
+
nodes:
|
|
1074
|
+
- kind: llm_node
|
|
1075
|
+
metadata:
|
|
1076
|
+
name: "{{ spec.variables.node_name }}" # Build-time: renders to "analyzer"
|
|
1077
|
+
spec:
|
|
1078
|
+
template: "{{input}}" # Runtime: renders when node executes
|
|
1079
|
+
outputs:
|
|
1080
|
+
result: "{{analyzer.analysis}}" # Runtime: rendered after pipeline completes
|
|
1081
|
+
```
|
|
1082
|
+
|
|
1083
|
+
Security:
|
|
1084
|
+
Uses SandboxedEnvironment to prevent arbitrary code execution.
|
|
1085
|
+
"""
|
|
1086
|
+
|
|
1087
|
+
def __init__(self) -> None:
|
|
1088
|
+
# Use sandboxed environment to prevent code execution attacks
|
|
1089
|
+
# Even though this is for config files, defense in depth is important
|
|
1090
|
+
self.env = SandboxedEnvironment(autoescape=False, keep_trailing_newline=True)
|
|
1091
|
+
|
|
1092
|
+
def process(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
1093
|
+
"""Render Jinja2 templates with config as context."""
|
|
1094
|
+
result = _render_templates(config, config, self.env)
|
|
1095
|
+
if not _is_dict_config(result):
|
|
1096
|
+
raise TypeError(
|
|
1097
|
+
f"Template rendering must return a dictionary, got {type(result).__name__}"
|
|
1098
|
+
)
|
|
1099
|
+
return result
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
@singledispatch
|
|
1103
|
+
def _render_templates(obj: Any, context: dict[str, Any], env: Any) -> Any:
|
|
1104
|
+
"""Recursively render Jinja2 templates.
|
|
1105
|
+
|
|
1106
|
+
Returns:
|
|
1107
|
+
- For primitives: Returns the primitive unchanged
|
|
1108
|
+
- For strings: Returns str | int | float | bool (with type coercion)
|
|
1109
|
+
- For dicts: Returns dict[str, Any]
|
|
1110
|
+
- For lists: Returns list[Any]
|
|
1111
|
+
"""
|
|
1112
|
+
return obj
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
@_render_templates.register(str)
|
|
1116
|
+
def _render_templates_str(obj: str, context: dict[str, Any], env: Any) -> str | int | float | bool:
|
|
1117
|
+
"""Render Jinja2 template in string."""
|
|
1118
|
+
if "{{" not in obj and "{%" not in obj:
|
|
1119
|
+
return obj
|
|
1120
|
+
|
|
1121
|
+
try:
|
|
1122
|
+
rendered: str = env.from_string(obj).render(context)
|
|
1123
|
+
# Type coercion
|
|
1124
|
+
with suppress(ValueError):
|
|
1125
|
+
return int(rendered)
|
|
1126
|
+
with suppress(ValueError):
|
|
1127
|
+
return float(rendered)
|
|
1128
|
+
if rendered.lower() in ("true", "yes"):
|
|
1129
|
+
return True
|
|
1130
|
+
if rendered.lower() in ("false", "no"):
|
|
1131
|
+
return False
|
|
1132
|
+
return rendered
|
|
1133
|
+
except TemplateSyntaxError as e:
|
|
1134
|
+
raise YamlPipelineBuilderError(
|
|
1135
|
+
f"Invalid Jinja2 template syntax: {e}\nTemplate: {obj}"
|
|
1136
|
+
) from e
|
|
1137
|
+
except UndefinedError as e:
|
|
1138
|
+
raise YamlPipelineBuilderError(
|
|
1139
|
+
f"Undefined variable in template: {e}\nTemplate: {obj}"
|
|
1140
|
+
) from e
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
@_render_templates.register(dict)
|
|
1144
|
+
def _render_templates_dict(obj: dict, context: dict[str, Any], env: Any) -> dict[str, Any]:
|
|
1145
|
+
"""Render templates in dict values.
|
|
1146
|
+
|
|
1147
|
+
Skip rendering for node spec fields and pipeline outputs to avoid conflicts between
|
|
1148
|
+
YAML-level templating and runtime template strings.
|
|
1149
|
+
|
|
1150
|
+
Strategy:
|
|
1151
|
+
- Node specs: Preserve for runtime rendering with dependency outputs
|
|
1152
|
+
- Pipeline outputs: Preserve for runtime rendering with node results
|
|
1153
|
+
- Metadata and config: Render at build time with YAML context
|
|
1154
|
+
"""
|
|
1155
|
+
result = {}
|
|
1156
|
+
for k, v in obj.items():
|
|
1157
|
+
# Check if this is a node spec (not a Pipeline spec) by looking for the 'kind' sibling key
|
|
1158
|
+
# Node kinds end with '_node' (e.g., 'prompt_node', 'llm_node', 'function_node')
|
|
1159
|
+
# Pipeline kind is 'Pipeline' - we should NOT skip its spec
|
|
1160
|
+
if (
|
|
1161
|
+
k == "spec"
|
|
1162
|
+
and isinstance(v, dict)
|
|
1163
|
+
and "kind" in obj
|
|
1164
|
+
and isinstance(obj.get("kind"), str)
|
|
1165
|
+
and obj["kind"] != "Pipeline"
|
|
1166
|
+
):
|
|
1167
|
+
# This is a node spec - preserve template strings for runtime rendering
|
|
1168
|
+
result[k] = v # Preserve entire spec as-is
|
|
1169
|
+
# Also preserve 'outputs' field in Pipeline spec - references node results
|
|
1170
|
+
elif k == "outputs" and isinstance(v, dict):
|
|
1171
|
+
# Pipeline outputs reference node results (e.g., {{node.output}})
|
|
1172
|
+
# Preserve for runtime rendering after nodes execute
|
|
1173
|
+
result[k] = v
|
|
1174
|
+
else:
|
|
1175
|
+
result[k] = _render_templates(v, context, env)
|
|
1176
|
+
return result
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
@_render_templates.register(list)
|
|
1180
|
+
def _render_templates_list(obj: list, context: dict[str, Any], env: Any) -> list[Any]:
|
|
1181
|
+
"""Render templates in list items."""
|
|
1182
|
+
return [_render_templates(item, context, env) for item in obj]
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
# ============================================================================
|
|
1186
|
+
# Utilities
|
|
1187
|
+
# ============================================================================
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
@lru_cache(maxsize=32)
|
|
1191
|
+
def _parse_yaml_cached(yaml_content: str) -> Any:
|
|
1192
|
+
"""Cached YAML parsing.
|
|
1193
|
+
|
|
1194
|
+
Returns dict[str, Any] in practice, but yaml.safe_load returns Any.
|
|
1195
|
+
"""
|
|
1196
|
+
return yaml.safe_load(yaml_content)
|