hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""YAML Pipeline Validator - Validates pipeline configurations."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from hexdag.core.domain.dag import DirectedGraph
|
|
6
|
+
|
|
7
|
+
# Separator for namespace:name format
|
|
8
|
+
NAMESPACE_SEPARATOR = ":"
|
|
9
|
+
|
|
10
|
+
# Lazy-loaded known node types (derived from resolver's builtin aliases)
|
|
11
|
+
_known_node_types: frozenset[str] | None = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_known_node_types() -> frozenset[str]:
|
|
15
|
+
"""Lazily load known node types from resolver's builtin aliases.
|
|
16
|
+
|
|
17
|
+
This derives valid node types from the aliases registered by hexdag.builtin,
|
|
18
|
+
maintaining hexagonal architecture (core doesn't import from builtin).
|
|
19
|
+
Types are cached after first load.
|
|
20
|
+
"""
|
|
21
|
+
global _known_node_types
|
|
22
|
+
if _known_node_types is None:
|
|
23
|
+
from hexdag.core.resolver import get_builtin_aliases
|
|
24
|
+
|
|
25
|
+
_known_node_types = frozenset(get_builtin_aliases().keys())
|
|
26
|
+
return _known_node_types
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Keep KNOWN_NODE_TYPES as a module-level reference for backwards compatibility
|
|
30
|
+
# in tests that may import it directly.
|
|
31
|
+
KNOWN_NODE_TYPES = _get_known_node_types()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ValidationReport:
|
|
35
|
+
"""Container for validation results with optimized memory usage."""
|
|
36
|
+
|
|
37
|
+
__slots__ = ("_errors", "_warnings", "_suggestions")
|
|
38
|
+
|
|
39
|
+
def __init__(self) -> None:
|
|
40
|
+
"""Initialize validation result."""
|
|
41
|
+
self._errors: list[str] = []
|
|
42
|
+
self._warnings: list[str] = []
|
|
43
|
+
self._suggestions: list[str] = []
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def is_valid(self) -> bool:
|
|
47
|
+
"""Check if validation passed (no errors).
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
bool
|
|
52
|
+
True if no errors are present, False otherwise
|
|
53
|
+
"""
|
|
54
|
+
return len(self._errors) == 0
|
|
55
|
+
|
|
56
|
+
def add_error(self, message: str) -> None:
|
|
57
|
+
"""Add an error message."""
|
|
58
|
+
self._errors.append(message)
|
|
59
|
+
|
|
60
|
+
def add_warning(self, message: str) -> None:
|
|
61
|
+
"""Add a warning message."""
|
|
62
|
+
self._warnings.append(message)
|
|
63
|
+
|
|
64
|
+
def add_suggestion(self, message: str) -> None:
|
|
65
|
+
"""Add a suggestion message."""
|
|
66
|
+
self._suggestions.append(message)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def errors(self) -> list[str]:
|
|
70
|
+
"""Get all error messages.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
list[str]
|
|
75
|
+
List of error messages
|
|
76
|
+
"""
|
|
77
|
+
return self._errors
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def warnings(self) -> list[str]:
|
|
81
|
+
"""Get all warning messages.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
list[str]
|
|
86
|
+
List of warning messages
|
|
87
|
+
"""
|
|
88
|
+
return self._warnings
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def suggestions(self) -> list[str]:
|
|
92
|
+
"""Get all suggestion messages.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
list[str]
|
|
97
|
+
List of suggestion messages
|
|
98
|
+
"""
|
|
99
|
+
return self._suggestions
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _SchemaValidator:
|
|
103
|
+
"""Validates YAML node specs against known schemas.
|
|
104
|
+
|
|
105
|
+
Since we no longer have a registry, schema validation is simplified
|
|
106
|
+
to basic structural validation.
|
|
107
|
+
|
|
108
|
+
Note: This is an internal class. Use YamlValidator for public validation API.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def validate_node_spec(
|
|
112
|
+
self,
|
|
113
|
+
node_type: str,
|
|
114
|
+
spec: dict[str, Any],
|
|
115
|
+
namespace: str = "core",
|
|
116
|
+
) -> list[str]:
|
|
117
|
+
"""Validate a node's spec with basic structural checks.
|
|
118
|
+
|
|
119
|
+
Args
|
|
120
|
+
----
|
|
121
|
+
node_type: Type of node (e.g., "llm", "agent", "function")
|
|
122
|
+
spec: Node specification from YAML manifest
|
|
123
|
+
namespace: Component namespace (default: "core")
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
List of validation error messages (empty if valid)
|
|
128
|
+
"""
|
|
129
|
+
# Basic validation - without registry, we can only do structural checks
|
|
130
|
+
errors: list[str] = []
|
|
131
|
+
|
|
132
|
+
# LLM nodes require template or prompt_template
|
|
133
|
+
if (
|
|
134
|
+
node_type in ("llm", "llm_node")
|
|
135
|
+
and "template" not in spec
|
|
136
|
+
and "prompt_template" not in spec
|
|
137
|
+
):
|
|
138
|
+
errors.append("Missing required field 'template' (or 'prompt_template')")
|
|
139
|
+
|
|
140
|
+
# Prompt nodes require template
|
|
141
|
+
if (
|
|
142
|
+
node_type in ("prompt", "prompt_node")
|
|
143
|
+
and "template" not in spec
|
|
144
|
+
and "prompt_ref" not in spec
|
|
145
|
+
):
|
|
146
|
+
errors.append("Missing required field 'template' or 'prompt_ref'")
|
|
147
|
+
|
|
148
|
+
# Agent nodes require initial_prompt_template or main_prompt
|
|
149
|
+
if (
|
|
150
|
+
node_type in ("agent", "agent_node")
|
|
151
|
+
and "initial_prompt_template" not in spec
|
|
152
|
+
and "main_prompt" not in spec
|
|
153
|
+
):
|
|
154
|
+
errors.append("Missing required field 'initial_prompt_template' (or 'main_prompt')")
|
|
155
|
+
|
|
156
|
+
# Function nodes require fn
|
|
157
|
+
if node_type in ("function", "function_node") and "fn" not in spec:
|
|
158
|
+
errors.append("Missing required field 'fn'")
|
|
159
|
+
|
|
160
|
+
# Composite nodes require mode
|
|
161
|
+
if node_type in ("composite", "composite_node"):
|
|
162
|
+
errors.extend(self._validate_composite_spec(spec))
|
|
163
|
+
|
|
164
|
+
return errors
|
|
165
|
+
|
|
166
|
+
def _validate_composite_spec(self, spec: dict[str, Any]) -> list[str]:
|
|
167
|
+
"""Validate composite_node specific requirements.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
spec : dict[str, Any]
|
|
172
|
+
Node specification from YAML manifest
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
list[str]
|
|
177
|
+
List of validation error messages
|
|
178
|
+
"""
|
|
179
|
+
errors: list[str] = []
|
|
180
|
+
|
|
181
|
+
if "mode" not in spec:
|
|
182
|
+
errors.append("Missing required field 'mode'")
|
|
183
|
+
return errors
|
|
184
|
+
|
|
185
|
+
mode = spec.get("mode")
|
|
186
|
+
valid_modes = ("while", "for-each", "times", "if-else", "switch")
|
|
187
|
+
if mode not in valid_modes:
|
|
188
|
+
errors.append(f"Invalid mode '{mode}'. Valid modes: {', '.join(valid_modes)}")
|
|
189
|
+
return errors
|
|
190
|
+
|
|
191
|
+
# Mode-specific validation
|
|
192
|
+
match mode:
|
|
193
|
+
case "while":
|
|
194
|
+
if "condition" not in spec:
|
|
195
|
+
errors.append("Mode 'while' requires 'condition' field")
|
|
196
|
+
case "for-each":
|
|
197
|
+
if "items" not in spec:
|
|
198
|
+
errors.append("Mode 'for-each' requires 'items' field")
|
|
199
|
+
case "times":
|
|
200
|
+
if "count" not in spec:
|
|
201
|
+
errors.append("Mode 'times' requires 'count' field")
|
|
202
|
+
elif not isinstance(spec.get("count"), int):
|
|
203
|
+
errors.append("Field 'count' must be an integer")
|
|
204
|
+
case "if-else":
|
|
205
|
+
if "condition" not in spec:
|
|
206
|
+
errors.append("Mode 'if-else' requires 'condition' field")
|
|
207
|
+
case "switch":
|
|
208
|
+
if "branches" not in spec:
|
|
209
|
+
errors.append("Mode 'switch' requires 'branches' field")
|
|
210
|
+
elif not isinstance(spec.get("branches"), list):
|
|
211
|
+
errors.append("Field 'branches' must be a list")
|
|
212
|
+
|
|
213
|
+
# Validate body field if present (can be string, list, or callable from !py)
|
|
214
|
+
body = spec.get("body")
|
|
215
|
+
body_pipeline = spec.get("body_pipeline")
|
|
216
|
+
|
|
217
|
+
if body is not None and body_pipeline is not None:
|
|
218
|
+
errors.append("Cannot specify both 'body' and 'body_pipeline'")
|
|
219
|
+
|
|
220
|
+
# If body is a callable (from !py tag), it's already validated at parse time
|
|
221
|
+
# If body is a list, it should be inline nodes
|
|
222
|
+
if isinstance(body, list):
|
|
223
|
+
for i, node_config in enumerate(body):
|
|
224
|
+
if not isinstance(node_config, dict):
|
|
225
|
+
errors.append(f"body[{i}] must be a node configuration dict")
|
|
226
|
+
elif "kind" not in node_config:
|
|
227
|
+
errors.append(f"body[{i}] missing required 'kind' field")
|
|
228
|
+
|
|
229
|
+
return errors
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class YamlValidator:
|
|
233
|
+
"""Validates YAML pipeline configurations with optimized performance."""
|
|
234
|
+
|
|
235
|
+
def __init__(
|
|
236
|
+
self,
|
|
237
|
+
valid_node_types: set[str] | frozenset[str] | None = None,
|
|
238
|
+
) -> None:
|
|
239
|
+
"""Initialize validator with configurable node types.
|
|
240
|
+
|
|
241
|
+
Args
|
|
242
|
+
----
|
|
243
|
+
valid_node_types: Set of valid node type names. If None, uses defaults.
|
|
244
|
+
"""
|
|
245
|
+
self._provided_node_types = (
|
|
246
|
+
frozenset(valid_node_types) if valid_node_types is not None else None
|
|
247
|
+
)
|
|
248
|
+
self._cached_node_types: frozenset[str] | None = None
|
|
249
|
+
|
|
250
|
+
# Schema validator for spec validation
|
|
251
|
+
self.schema_validator = _SchemaValidator()
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def valid_node_types(self) -> frozenset[str]:
|
|
255
|
+
"""Get valid node types.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
frozenset[str]
|
|
260
|
+
Set of valid node type names
|
|
261
|
+
"""
|
|
262
|
+
# If user provided explicit node types, use those
|
|
263
|
+
if self._provided_node_types is not None:
|
|
264
|
+
return self._provided_node_types
|
|
265
|
+
|
|
266
|
+
# Otherwise, use auto-discovered node types
|
|
267
|
+
if self._cached_node_types is None:
|
|
268
|
+
self._cached_node_types = _get_known_node_types()
|
|
269
|
+
|
|
270
|
+
return self._cached_node_types
|
|
271
|
+
|
|
272
|
+
def validate(self, config: Any) -> ValidationReport:
|
|
273
|
+
"""Validate complete YAML configuration with optimized caching.
|
|
274
|
+
|
|
275
|
+
Expects declarative manifest format: {kind: Pipeline,
|
|
276
|
+
spec: {nodes: [{kind, metadata, spec}]}}
|
|
277
|
+
|
|
278
|
+
Args
|
|
279
|
+
----
|
|
280
|
+
config: Parsed YAML configuration
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
ValidationReport
|
|
285
|
+
ValidationReport with errors, warnings, and suggestions
|
|
286
|
+
"""
|
|
287
|
+
result = ValidationReport()
|
|
288
|
+
|
|
289
|
+
# Validate manifest structure
|
|
290
|
+
self._validate_manifest_structure(config, result)
|
|
291
|
+
|
|
292
|
+
if not result.is_valid:
|
|
293
|
+
return result
|
|
294
|
+
|
|
295
|
+
spec = config.get("spec", {})
|
|
296
|
+
nodes = spec.get("nodes", [])
|
|
297
|
+
|
|
298
|
+
# Validate nodes and cache the IDs and macro instances for reuse
|
|
299
|
+
node_ids, macro_instances = self._validate_nodes(nodes, result)
|
|
300
|
+
|
|
301
|
+
# Reuse cached node_ids and macro_instances for dependency validation
|
|
302
|
+
self._validate_dependencies_with_cache(nodes, result, node_ids, macro_instances)
|
|
303
|
+
|
|
304
|
+
return result
|
|
305
|
+
|
|
306
|
+
def _validate_manifest_structure(self, config: Any, result: ValidationReport) -> None:
|
|
307
|
+
"""Validate declarative manifest YAML structure.
|
|
308
|
+
|
|
309
|
+
Args
|
|
310
|
+
----
|
|
311
|
+
config: Parsed YAML configuration
|
|
312
|
+
result: ValidationReport to add errors to
|
|
313
|
+
"""
|
|
314
|
+
if not isinstance(config, dict):
|
|
315
|
+
result.add_error("Configuration must be a dictionary")
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
if "kind" not in config:
|
|
319
|
+
result.add_error(
|
|
320
|
+
"Configuration must contain 'kind' field (declarative manifest format required)"
|
|
321
|
+
)
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
if "metadata" not in config:
|
|
325
|
+
result.add_error("Configuration must contain 'metadata' field")
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
# Macro definitions have different structure (no spec field)
|
|
329
|
+
kind = config.get("kind")
|
|
330
|
+
if kind == "Macro":
|
|
331
|
+
# Macro has: metadata, parameters, nodes (no spec)
|
|
332
|
+
if "nodes" not in config:
|
|
333
|
+
result.add_error("Macro definition must contain 'nodes' field")
|
|
334
|
+
return
|
|
335
|
+
# Skip rest of validation for Macro kind
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
# For Pipeline and other kinds, validate spec
|
|
339
|
+
if "spec" not in config:
|
|
340
|
+
result.add_error("Configuration must contain 'spec' field")
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
spec = config.get("spec", {})
|
|
344
|
+
if not isinstance(spec, dict):
|
|
345
|
+
result.add_error("'spec' field must be a dictionary")
|
|
346
|
+
return
|
|
347
|
+
|
|
348
|
+
if "nodes" not in spec:
|
|
349
|
+
result.add_error("'spec' must contain 'nodes' field")
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
if not isinstance(spec["nodes"], list):
|
|
353
|
+
result.add_error("'spec.nodes' field must be a list")
|
|
354
|
+
return
|
|
355
|
+
|
|
356
|
+
if len(spec["nodes"]) == 0:
|
|
357
|
+
result.add_warning("Pipeline has no nodes defined")
|
|
358
|
+
|
|
359
|
+
# Validate common_field_mappings structure if present
|
|
360
|
+
common_mappings = spec.get("common_field_mappings")
|
|
361
|
+
if common_mappings is not None and not isinstance(common_mappings, dict):
|
|
362
|
+
result.add_error("'spec.common_field_mappings' must be a dictionary")
|
|
363
|
+
|
|
364
|
+
def _validate_nodes(
|
|
365
|
+
self, nodes: list[dict[str, Any]], result: ValidationReport
|
|
366
|
+
) -> tuple[set[str], set[str]]:
|
|
367
|
+
"""Validate nodes and return node IDs and macro instance names.
|
|
368
|
+
|
|
369
|
+
Expects declarative node format: {kind, metadata: {name}, spec: {dependencies}}
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
tuple[set[str], set[str]]
|
|
374
|
+
Tuple of (node_ids, macro_instance_names) for caching and reuse in dependency validation
|
|
375
|
+
"""
|
|
376
|
+
node_ids = set()
|
|
377
|
+
macro_instances = set()
|
|
378
|
+
|
|
379
|
+
for i, node in enumerate(nodes):
|
|
380
|
+
# Validate node has required fields
|
|
381
|
+
if "kind" not in node:
|
|
382
|
+
result.add_error(f"Node {i}: Missing 'kind' field")
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
if "metadata" not in node:
|
|
386
|
+
result.add_error(f"Node {i}: Missing 'metadata' field")
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
node_id = node.get("metadata", {}).get("name")
|
|
390
|
+
if not node_id:
|
|
391
|
+
result.add_error(f"Node {i}: Missing 'metadata.name'")
|
|
392
|
+
continue
|
|
393
|
+
|
|
394
|
+
kind = node.get("kind", "")
|
|
395
|
+
|
|
396
|
+
# Check node ID uniqueness
|
|
397
|
+
if node_id in node_ids:
|
|
398
|
+
result.add_error(f"Duplicate node ID: '{node_id}'")
|
|
399
|
+
node_ids.add(node_id)
|
|
400
|
+
|
|
401
|
+
# Special case: macro_invocation is not a node type, skip node type validation
|
|
402
|
+
if kind == "macro_invocation":
|
|
403
|
+
# Validate macro invocation spec (macro reference required)
|
|
404
|
+
spec = node.get("spec", {})
|
|
405
|
+
if "macro" not in spec:
|
|
406
|
+
result.add_error(
|
|
407
|
+
f"Node '{node_id}': macro_invocation must specify 'spec.macro' field"
|
|
408
|
+
)
|
|
409
|
+
macro_instances.add(node_id)
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
# Handle module paths (e.g., hexdag.builtin.nodes.LLMNode)
|
|
413
|
+
if "." in kind and ":" not in kind:
|
|
414
|
+
# This is a full module path, skip node type validation
|
|
415
|
+
# (resolution will happen at build time)
|
|
416
|
+
params = node.get("spec", {})
|
|
417
|
+
continue
|
|
418
|
+
|
|
419
|
+
# Handle user-registered aliases (e.g., "fn" -> "hexdag.builtin.nodes.FunctionNode")
|
|
420
|
+
from hexdag.core.resolver import get_registered_aliases
|
|
421
|
+
|
|
422
|
+
if kind in get_registered_aliases():
|
|
423
|
+
# This is a registered alias, skip node type validation
|
|
424
|
+
# (resolution will happen at build time via resolver)
|
|
425
|
+
params = node.get("spec", {})
|
|
426
|
+
continue
|
|
427
|
+
|
|
428
|
+
if NAMESPACE_SEPARATOR in kind:
|
|
429
|
+
namespace, node_kind = kind.split(NAMESPACE_SEPARATOR, 1)
|
|
430
|
+
else:
|
|
431
|
+
namespace = "core"
|
|
432
|
+
node_kind = kind
|
|
433
|
+
|
|
434
|
+
# Remove '_node' suffix if present
|
|
435
|
+
node_type = node_kind[:-5] if node_kind.endswith("_node") else node_kind
|
|
436
|
+
|
|
437
|
+
qualified_node_type = f"{namespace}:{node_type}"
|
|
438
|
+
|
|
439
|
+
params = node.get("spec", {})
|
|
440
|
+
|
|
441
|
+
# Validate node type
|
|
442
|
+
# Support both qualified (namespace:type) and simple (type) formats
|
|
443
|
+
if (
|
|
444
|
+
qualified_node_type not in self.valid_node_types
|
|
445
|
+
and node_type not in self.valid_node_types
|
|
446
|
+
and node_kind not in self.valid_node_types
|
|
447
|
+
):
|
|
448
|
+
# Show available types grouped by namespace
|
|
449
|
+
by_namespace: dict[str, list[str]] = {}
|
|
450
|
+
simple_types: list[str] = []
|
|
451
|
+
has_namespaced = False
|
|
452
|
+
|
|
453
|
+
for valid_type in sorted(self.valid_node_types):
|
|
454
|
+
if ":" in valid_type:
|
|
455
|
+
has_namespaced = True
|
|
456
|
+
ns, nt = valid_type.split(":", 1)
|
|
457
|
+
by_namespace.setdefault(ns, []).append(nt)
|
|
458
|
+
else:
|
|
459
|
+
# Legacy format without namespace
|
|
460
|
+
simple_types.append(valid_type)
|
|
461
|
+
|
|
462
|
+
parts = []
|
|
463
|
+
if by_namespace:
|
|
464
|
+
parts.append(
|
|
465
|
+
", ".join(
|
|
466
|
+
f"{ns}:[{', '.join(types)}]"
|
|
467
|
+
for ns, types in sorted(by_namespace.items())
|
|
468
|
+
)
|
|
469
|
+
)
|
|
470
|
+
if simple_types:
|
|
471
|
+
parts.append(", ".join(sorted(simple_types)))
|
|
472
|
+
|
|
473
|
+
valid_types_str = ", ".join(parts) if parts else "none"
|
|
474
|
+
|
|
475
|
+
# Use simple node_type in error if no valid types have namespaces (legacy mode)
|
|
476
|
+
invalid_type_str = node_type if not has_namespaced else qualified_node_type
|
|
477
|
+
|
|
478
|
+
result.add_error(
|
|
479
|
+
f"Node '{node_id}': Invalid type '{invalid_type_str}'. "
|
|
480
|
+
f"Valid types: {valid_types_str}"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Validate node-specific requirements and schema
|
|
484
|
+
self._validate_node_params(node_id, node_type, params, namespace, result)
|
|
485
|
+
|
|
486
|
+
return node_ids, macro_instances
|
|
487
|
+
|
|
488
|
+
def _validate_node_params(
|
|
489
|
+
self,
|
|
490
|
+
node_id: str | None,
|
|
491
|
+
node_type: str,
|
|
492
|
+
params: dict[str, Any],
|
|
493
|
+
namespace: str,
|
|
494
|
+
result: ValidationReport,
|
|
495
|
+
) -> None:
|
|
496
|
+
"""Validate node-specific parameters using basic structural validation.
|
|
497
|
+
|
|
498
|
+
Args
|
|
499
|
+
----
|
|
500
|
+
node_id: Node identifier
|
|
501
|
+
node_type: Type of node (e.g., "llm", "function")
|
|
502
|
+
params: Node spec parameters
|
|
503
|
+
namespace: Component namespace
|
|
504
|
+
result: ValidationReport to add errors to
|
|
505
|
+
"""
|
|
506
|
+
# Schema-based validation
|
|
507
|
+
schema_errors = self.schema_validator.validate_node_spec(
|
|
508
|
+
node_type, params, namespace=namespace
|
|
509
|
+
)
|
|
510
|
+
for error in schema_errors:
|
|
511
|
+
result.add_error(f"Node '{node_id}': {error}")
|
|
512
|
+
|
|
513
|
+
def _validate_dependencies_with_cache(
|
|
514
|
+
self,
|
|
515
|
+
nodes: list[dict[str, Any]],
|
|
516
|
+
result: ValidationReport,
|
|
517
|
+
node_ids: set[str],
|
|
518
|
+
macro_instances: set[str],
|
|
519
|
+
) -> None:
|
|
520
|
+
"""Validate node dependencies using cached node IDs and check for cycles.
|
|
521
|
+
|
|
522
|
+
Dependencies are in spec.dependencies field.
|
|
523
|
+
|
|
524
|
+
Parameters
|
|
525
|
+
----------
|
|
526
|
+
nodes : list[dict[str, Any]]
|
|
527
|
+
List of node configurations
|
|
528
|
+
result : ValidationReport
|
|
529
|
+
Report to add errors to
|
|
530
|
+
node_ids : set[str]
|
|
531
|
+
Cached set of valid node IDs from _validate_nodes
|
|
532
|
+
macro_instances : set[str]
|
|
533
|
+
Set of macro instance names (nodes will be generated at runtime)
|
|
534
|
+
"""
|
|
535
|
+
dependency_graph = {}
|
|
536
|
+
|
|
537
|
+
for node in nodes:
|
|
538
|
+
node_id = node.get("metadata", {}).get("name")
|
|
539
|
+
if not node_id:
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
deps = node.get("spec", {}).get("dependencies", [])
|
|
543
|
+
|
|
544
|
+
if not isinstance(deps, list):
|
|
545
|
+
deps = [deps]
|
|
546
|
+
|
|
547
|
+
# Check all dependencies exist using cached node_ids
|
|
548
|
+
valid_deps = set()
|
|
549
|
+
for dep in deps:
|
|
550
|
+
if dep in node_ids:
|
|
551
|
+
valid_deps.add(dep)
|
|
552
|
+
continue
|
|
553
|
+
|
|
554
|
+
is_macro_generated = False
|
|
555
|
+
for macro_instance in macro_instances:
|
|
556
|
+
if dep.startswith(f"{macro_instance}_"):
|
|
557
|
+
is_macro_generated = True
|
|
558
|
+
valid_deps.add(dep)
|
|
559
|
+
break
|
|
560
|
+
|
|
561
|
+
# If not a known node and not macro-generated, it's an error
|
|
562
|
+
if not is_macro_generated:
|
|
563
|
+
result.add_error(f"Node '{node_id}': Dependency '{dep}' does not exist")
|
|
564
|
+
|
|
565
|
+
dependency_graph[node_id] = valid_deps
|
|
566
|
+
|
|
567
|
+
# Check for cycles using DirectedGraph's public static method
|
|
568
|
+
if cycle_message := DirectedGraph.detect_cycle(dependency_graph):
|
|
569
|
+
result.add_error(cycle_message)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Port interfaces for the application."""
|
|
2
|
+
|
|
3
|
+
from hexdag.core.ports.api_call import APICall
|
|
4
|
+
from hexdag.core.ports.database import (
|
|
5
|
+
ColumnSchema,
|
|
6
|
+
ColumnType,
|
|
7
|
+
DatabasePort,
|
|
8
|
+
SupportsRawSQL,
|
|
9
|
+
SupportsReadOnly,
|
|
10
|
+
SupportsStreamingQuery,
|
|
11
|
+
SupportsVectorSearch,
|
|
12
|
+
TableSchema,
|
|
13
|
+
)
|
|
14
|
+
from hexdag.core.ports.embedding import Embedding
|
|
15
|
+
from hexdag.core.ports.executor import (
|
|
16
|
+
ExecutionResult,
|
|
17
|
+
ExecutionTask,
|
|
18
|
+
ExecutorPort,
|
|
19
|
+
)
|
|
20
|
+
from hexdag.core.ports.file_storage import FileStoragePort
|
|
21
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
22
|
+
from hexdag.core.ports.llm import (
|
|
23
|
+
LLM,
|
|
24
|
+
ImageContent,
|
|
25
|
+
ImageInput,
|
|
26
|
+
SupportsEmbedding,
|
|
27
|
+
SupportsFunctionCalling,
|
|
28
|
+
SupportsGeneration,
|
|
29
|
+
SupportsVision,
|
|
30
|
+
VisionMessage,
|
|
31
|
+
)
|
|
32
|
+
from hexdag.core.ports.memory import Memory
|
|
33
|
+
from hexdag.core.ports.observer_manager import ObserverManagerPort
|
|
34
|
+
from hexdag.core.ports.secret import SecretPort
|
|
35
|
+
from hexdag.core.ports.tool_router import ToolRouter
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"APICall",
|
|
39
|
+
"ColumnSchema",
|
|
40
|
+
"ColumnType",
|
|
41
|
+
"DatabasePort",
|
|
42
|
+
"Embedding", # Deprecated - kept for backward compatibility
|
|
43
|
+
"ExecutionResult",
|
|
44
|
+
"ExecutionTask",
|
|
45
|
+
"ExecutorPort",
|
|
46
|
+
"FileStoragePort",
|
|
47
|
+
"HealthStatus",
|
|
48
|
+
"ImageContent",
|
|
49
|
+
"ImageInput",
|
|
50
|
+
"LLM",
|
|
51
|
+
"Memory",
|
|
52
|
+
"ObserverManagerPort",
|
|
53
|
+
"SecretPort",
|
|
54
|
+
"SupportsEmbedding", # New - unified embedding support in LLM port
|
|
55
|
+
"SupportsFunctionCalling",
|
|
56
|
+
"SupportsGeneration", # New - text generation support in LLM port
|
|
57
|
+
"SupportsRawSQL",
|
|
58
|
+
"SupportsReadOnly",
|
|
59
|
+
"SupportsStreamingQuery",
|
|
60
|
+
"SupportsVectorSearch",
|
|
61
|
+
"SupportsVision",
|
|
62
|
+
"TableSchema",
|
|
63
|
+
"ToolRouter",
|
|
64
|
+
"VisionMessage",
|
|
65
|
+
]
|