hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,569 @@
1
+ """YAML Pipeline Validator - Validates pipeline configurations."""
2
+
3
+ from typing import Any
4
+
5
+ from hexdag.core.domain.dag import DirectedGraph
6
+
7
+ # Separator for namespace:name format
8
+ NAMESPACE_SEPARATOR = ":"
9
+
10
+ # Lazy-loaded known node types (derived from resolver's builtin aliases)
11
+ _known_node_types: frozenset[str] | None = None
12
+
13
+
14
+ def _get_known_node_types() -> frozenset[str]:
15
+ """Lazily load known node types from resolver's builtin aliases.
16
+
17
+ This derives valid node types from the aliases registered by hexdag.builtin,
18
+ maintaining hexagonal architecture (core doesn't import from builtin).
19
+ Types are cached after first load.
20
+ """
21
+ global _known_node_types
22
+ if _known_node_types is None:
23
+ from hexdag.core.resolver import get_builtin_aliases
24
+
25
+ _known_node_types = frozenset(get_builtin_aliases().keys())
26
+ return _known_node_types
27
+
28
+
29
+ # Keep KNOWN_NODE_TYPES as a module-level reference for backwards compatibility
30
+ # in tests that may import it directly.
31
+ KNOWN_NODE_TYPES = _get_known_node_types()
32
+
33
+
34
+ class ValidationReport:
35
+ """Container for validation results with optimized memory usage."""
36
+
37
+ __slots__ = ("_errors", "_warnings", "_suggestions")
38
+
39
+ def __init__(self) -> None:
40
+ """Initialize validation result."""
41
+ self._errors: list[str] = []
42
+ self._warnings: list[str] = []
43
+ self._suggestions: list[str] = []
44
+
45
+ @property
46
+ def is_valid(self) -> bool:
47
+ """Check if validation passed (no errors).
48
+
49
+ Returns
50
+ -------
51
+ bool
52
+ True if no errors are present, False otherwise
53
+ """
54
+ return len(self._errors) == 0
55
+
56
+ def add_error(self, message: str) -> None:
57
+ """Add an error message."""
58
+ self._errors.append(message)
59
+
60
+ def add_warning(self, message: str) -> None:
61
+ """Add a warning message."""
62
+ self._warnings.append(message)
63
+
64
+ def add_suggestion(self, message: str) -> None:
65
+ """Add a suggestion message."""
66
+ self._suggestions.append(message)
67
+
68
+ @property
69
+ def errors(self) -> list[str]:
70
+ """Get all error messages.
71
+
72
+ Returns
73
+ -------
74
+ list[str]
75
+ List of error messages
76
+ """
77
+ return self._errors
78
+
79
+ @property
80
+ def warnings(self) -> list[str]:
81
+ """Get all warning messages.
82
+
83
+ Returns
84
+ -------
85
+ list[str]
86
+ List of warning messages
87
+ """
88
+ return self._warnings
89
+
90
+ @property
91
+ def suggestions(self) -> list[str]:
92
+ """Get all suggestion messages.
93
+
94
+ Returns
95
+ -------
96
+ list[str]
97
+ List of suggestion messages
98
+ """
99
+ return self._suggestions
100
+
101
+
102
+ class _SchemaValidator:
103
+ """Validates YAML node specs against known schemas.
104
+
105
+ Since we no longer have a registry, schema validation is simplified
106
+ to basic structural validation.
107
+
108
+ Note: This is an internal class. Use YamlValidator for public validation API.
109
+ """
110
+
111
+ def validate_node_spec(
112
+ self,
113
+ node_type: str,
114
+ spec: dict[str, Any],
115
+ namespace: str = "core",
116
+ ) -> list[str]:
117
+ """Validate a node's spec with basic structural checks.
118
+
119
+ Args
120
+ ----
121
+ node_type: Type of node (e.g., "llm", "agent", "function")
122
+ spec: Node specification from YAML manifest
123
+ namespace: Component namespace (default: "core")
124
+
125
+ Returns
126
+ -------
127
+ List of validation error messages (empty if valid)
128
+ """
129
+ # Basic validation - without registry, we can only do structural checks
130
+ errors: list[str] = []
131
+
132
+ # LLM nodes require template or prompt_template
133
+ if (
134
+ node_type in ("llm", "llm_node")
135
+ and "template" not in spec
136
+ and "prompt_template" not in spec
137
+ ):
138
+ errors.append("Missing required field 'template' (or 'prompt_template')")
139
+
140
+ # Prompt nodes require template
141
+ if (
142
+ node_type in ("prompt", "prompt_node")
143
+ and "template" not in spec
144
+ and "prompt_ref" not in spec
145
+ ):
146
+ errors.append("Missing required field 'template' or 'prompt_ref'")
147
+
148
+ # Agent nodes require initial_prompt_template or main_prompt
149
+ if (
150
+ node_type in ("agent", "agent_node")
151
+ and "initial_prompt_template" not in spec
152
+ and "main_prompt" not in spec
153
+ ):
154
+ errors.append("Missing required field 'initial_prompt_template' (or 'main_prompt')")
155
+
156
+ # Function nodes require fn
157
+ if node_type in ("function", "function_node") and "fn" not in spec:
158
+ errors.append("Missing required field 'fn'")
159
+
160
+ # Composite nodes require mode
161
+ if node_type in ("composite", "composite_node"):
162
+ errors.extend(self._validate_composite_spec(spec))
163
+
164
+ return errors
165
+
166
+ def _validate_composite_spec(self, spec: dict[str, Any]) -> list[str]:
167
+ """Validate composite_node specific requirements.
168
+
169
+ Parameters
170
+ ----------
171
+ spec : dict[str, Any]
172
+ Node specification from YAML manifest
173
+
174
+ Returns
175
+ -------
176
+ list[str]
177
+ List of validation error messages
178
+ """
179
+ errors: list[str] = []
180
+
181
+ if "mode" not in spec:
182
+ errors.append("Missing required field 'mode'")
183
+ return errors
184
+
185
+ mode = spec.get("mode")
186
+ valid_modes = ("while", "for-each", "times", "if-else", "switch")
187
+ if mode not in valid_modes:
188
+ errors.append(f"Invalid mode '{mode}'. Valid modes: {', '.join(valid_modes)}")
189
+ return errors
190
+
191
+ # Mode-specific validation
192
+ match mode:
193
+ case "while":
194
+ if "condition" not in spec:
195
+ errors.append("Mode 'while' requires 'condition' field")
196
+ case "for-each":
197
+ if "items" not in spec:
198
+ errors.append("Mode 'for-each' requires 'items' field")
199
+ case "times":
200
+ if "count" not in spec:
201
+ errors.append("Mode 'times' requires 'count' field")
202
+ elif not isinstance(spec.get("count"), int):
203
+ errors.append("Field 'count' must be an integer")
204
+ case "if-else":
205
+ if "condition" not in spec:
206
+ errors.append("Mode 'if-else' requires 'condition' field")
207
+ case "switch":
208
+ if "branches" not in spec:
209
+ errors.append("Mode 'switch' requires 'branches' field")
210
+ elif not isinstance(spec.get("branches"), list):
211
+ errors.append("Field 'branches' must be a list")
212
+
213
+ # Validate body field if present (can be string, list, or callable from !py)
214
+ body = spec.get("body")
215
+ body_pipeline = spec.get("body_pipeline")
216
+
217
+ if body is not None and body_pipeline is not None:
218
+ errors.append("Cannot specify both 'body' and 'body_pipeline'")
219
+
220
+ # If body is a callable (from !py tag), it's already validated at parse time
221
+ # If body is a list, it should be inline nodes
222
+ if isinstance(body, list):
223
+ for i, node_config in enumerate(body):
224
+ if not isinstance(node_config, dict):
225
+ errors.append(f"body[{i}] must be a node configuration dict")
226
+ elif "kind" not in node_config:
227
+ errors.append(f"body[{i}] missing required 'kind' field")
228
+
229
+ return errors
230
+
231
+
232
+ class YamlValidator:
233
+ """Validates YAML pipeline configurations with optimized performance."""
234
+
235
+ def __init__(
236
+ self,
237
+ valid_node_types: set[str] | frozenset[str] | None = None,
238
+ ) -> None:
239
+ """Initialize validator with configurable node types.
240
+
241
+ Args
242
+ ----
243
+ valid_node_types: Set of valid node type names. If None, uses defaults.
244
+ """
245
+ self._provided_node_types = (
246
+ frozenset(valid_node_types) if valid_node_types is not None else None
247
+ )
248
+ self._cached_node_types: frozenset[str] | None = None
249
+
250
+ # Schema validator for spec validation
251
+ self.schema_validator = _SchemaValidator()
252
+
253
+ @property
254
+ def valid_node_types(self) -> frozenset[str]:
255
+ """Get valid node types.
256
+
257
+ Returns
258
+ -------
259
+ frozenset[str]
260
+ Set of valid node type names
261
+ """
262
+ # If user provided explicit node types, use those
263
+ if self._provided_node_types is not None:
264
+ return self._provided_node_types
265
+
266
+ # Otherwise, use auto-discovered node types
267
+ if self._cached_node_types is None:
268
+ self._cached_node_types = _get_known_node_types()
269
+
270
+ return self._cached_node_types
271
+
272
+ def validate(self, config: Any) -> ValidationReport:
273
+ """Validate complete YAML configuration with optimized caching.
274
+
275
+ Expects declarative manifest format: {kind: Pipeline,
276
+ spec: {nodes: [{kind, metadata, spec}]}}
277
+
278
+ Args
279
+ ----
280
+ config: Parsed YAML configuration
281
+
282
+ Returns
283
+ -------
284
+ ValidationReport
285
+ ValidationReport with errors, warnings, and suggestions
286
+ """
287
+ result = ValidationReport()
288
+
289
+ # Validate manifest structure
290
+ self._validate_manifest_structure(config, result)
291
+
292
+ if not result.is_valid:
293
+ return result
294
+
295
+ spec = config.get("spec", {})
296
+ nodes = spec.get("nodes", [])
297
+
298
+ # Validate nodes and cache the IDs and macro instances for reuse
299
+ node_ids, macro_instances = self._validate_nodes(nodes, result)
300
+
301
+ # Reuse cached node_ids and macro_instances for dependency validation
302
+ self._validate_dependencies_with_cache(nodes, result, node_ids, macro_instances)
303
+
304
+ return result
305
+
306
+ def _validate_manifest_structure(self, config: Any, result: ValidationReport) -> None:
307
+ """Validate declarative manifest YAML structure.
308
+
309
+ Args
310
+ ----
311
+ config: Parsed YAML configuration
312
+ result: ValidationReport to add errors to
313
+ """
314
+ if not isinstance(config, dict):
315
+ result.add_error("Configuration must be a dictionary")
316
+ return
317
+
318
+ if "kind" not in config:
319
+ result.add_error(
320
+ "Configuration must contain 'kind' field (declarative manifest format required)"
321
+ )
322
+ return
323
+
324
+ if "metadata" not in config:
325
+ result.add_error("Configuration must contain 'metadata' field")
326
+ return
327
+
328
+ # Macro definitions have different structure (no spec field)
329
+ kind = config.get("kind")
330
+ if kind == "Macro":
331
+ # Macro has: metadata, parameters, nodes (no spec)
332
+ if "nodes" not in config:
333
+ result.add_error("Macro definition must contain 'nodes' field")
334
+ return
335
+ # Skip rest of validation for Macro kind
336
+ return
337
+
338
+ # For Pipeline and other kinds, validate spec
339
+ if "spec" not in config:
340
+ result.add_error("Configuration must contain 'spec' field")
341
+ return
342
+
343
+ spec = config.get("spec", {})
344
+ if not isinstance(spec, dict):
345
+ result.add_error("'spec' field must be a dictionary")
346
+ return
347
+
348
+ if "nodes" not in spec:
349
+ result.add_error("'spec' must contain 'nodes' field")
350
+ return
351
+
352
+ if not isinstance(spec["nodes"], list):
353
+ result.add_error("'spec.nodes' field must be a list")
354
+ return
355
+
356
+ if len(spec["nodes"]) == 0:
357
+ result.add_warning("Pipeline has no nodes defined")
358
+
359
+ # Validate common_field_mappings structure if present
360
+ common_mappings = spec.get("common_field_mappings")
361
+ if common_mappings is not None and not isinstance(common_mappings, dict):
362
+ result.add_error("'spec.common_field_mappings' must be a dictionary")
363
+
364
+ def _validate_nodes(
365
+ self, nodes: list[dict[str, Any]], result: ValidationReport
366
+ ) -> tuple[set[str], set[str]]:
367
+ """Validate nodes and return node IDs and macro instance names.
368
+
369
+ Expects declarative node format: {kind, metadata: {name}, spec: {dependencies}}
370
+
371
+ Returns
372
+ -------
373
+ tuple[set[str], set[str]]
374
+ Tuple of (node_ids, macro_instance_names) for caching and reuse in dependency validation
375
+ """
376
+ node_ids = set()
377
+ macro_instances = set()
378
+
379
+ for i, node in enumerate(nodes):
380
+ # Validate node has required fields
381
+ if "kind" not in node:
382
+ result.add_error(f"Node {i}: Missing 'kind' field")
383
+ continue
384
+
385
+ if "metadata" not in node:
386
+ result.add_error(f"Node {i}: Missing 'metadata' field")
387
+ continue
388
+
389
+ node_id = node.get("metadata", {}).get("name")
390
+ if not node_id:
391
+ result.add_error(f"Node {i}: Missing 'metadata.name'")
392
+ continue
393
+
394
+ kind = node.get("kind", "")
395
+
396
+ # Check node ID uniqueness
397
+ if node_id in node_ids:
398
+ result.add_error(f"Duplicate node ID: '{node_id}'")
399
+ node_ids.add(node_id)
400
+
401
+ # Special case: macro_invocation is not a node type, skip node type validation
402
+ if kind == "macro_invocation":
403
+ # Validate macro invocation spec (macro reference required)
404
+ spec = node.get("spec", {})
405
+ if "macro" not in spec:
406
+ result.add_error(
407
+ f"Node '{node_id}': macro_invocation must specify 'spec.macro' field"
408
+ )
409
+ macro_instances.add(node_id)
410
+ continue
411
+
412
+ # Handle module paths (e.g., hexdag.builtin.nodes.LLMNode)
413
+ if "." in kind and ":" not in kind:
414
+ # This is a full module path, skip node type validation
415
+ # (resolution will happen at build time)
416
+ params = node.get("spec", {})
417
+ continue
418
+
419
+ # Handle user-registered aliases (e.g., "fn" -> "hexdag.builtin.nodes.FunctionNode")
420
+ from hexdag.core.resolver import get_registered_aliases
421
+
422
+ if kind in get_registered_aliases():
423
+ # This is a registered alias, skip node type validation
424
+ # (resolution will happen at build time via resolver)
425
+ params = node.get("spec", {})
426
+ continue
427
+
428
+ if NAMESPACE_SEPARATOR in kind:
429
+ namespace, node_kind = kind.split(NAMESPACE_SEPARATOR, 1)
430
+ else:
431
+ namespace = "core"
432
+ node_kind = kind
433
+
434
+ # Remove '_node' suffix if present
435
+ node_type = node_kind[:-5] if node_kind.endswith("_node") else node_kind
436
+
437
+ qualified_node_type = f"{namespace}:{node_type}"
438
+
439
+ params = node.get("spec", {})
440
+
441
+ # Validate node type
442
+ # Support both qualified (namespace:type) and simple (type) formats
443
+ if (
444
+ qualified_node_type not in self.valid_node_types
445
+ and node_type not in self.valid_node_types
446
+ and node_kind not in self.valid_node_types
447
+ ):
448
+ # Show available types grouped by namespace
449
+ by_namespace: dict[str, list[str]] = {}
450
+ simple_types: list[str] = []
451
+ has_namespaced = False
452
+
453
+ for valid_type in sorted(self.valid_node_types):
454
+ if ":" in valid_type:
455
+ has_namespaced = True
456
+ ns, nt = valid_type.split(":", 1)
457
+ by_namespace.setdefault(ns, []).append(nt)
458
+ else:
459
+ # Legacy format without namespace
460
+ simple_types.append(valid_type)
461
+
462
+ parts = []
463
+ if by_namespace:
464
+ parts.append(
465
+ ", ".join(
466
+ f"{ns}:[{', '.join(types)}]"
467
+ for ns, types in sorted(by_namespace.items())
468
+ )
469
+ )
470
+ if simple_types:
471
+ parts.append(", ".join(sorted(simple_types)))
472
+
473
+ valid_types_str = ", ".join(parts) if parts else "none"
474
+
475
+ # Use simple node_type in error if no valid types have namespaces (legacy mode)
476
+ invalid_type_str = node_type if not has_namespaced else qualified_node_type
477
+
478
+ result.add_error(
479
+ f"Node '{node_id}': Invalid type '{invalid_type_str}'. "
480
+ f"Valid types: {valid_types_str}"
481
+ )
482
+
483
+ # Validate node-specific requirements and schema
484
+ self._validate_node_params(node_id, node_type, params, namespace, result)
485
+
486
+ return node_ids, macro_instances
487
+
488
+ def _validate_node_params(
489
+ self,
490
+ node_id: str | None,
491
+ node_type: str,
492
+ params: dict[str, Any],
493
+ namespace: str,
494
+ result: ValidationReport,
495
+ ) -> None:
496
+ """Validate node-specific parameters using basic structural validation.
497
+
498
+ Args
499
+ ----
500
+ node_id: Node identifier
501
+ node_type: Type of node (e.g., "llm", "function")
502
+ params: Node spec parameters
503
+ namespace: Component namespace
504
+ result: ValidationReport to add errors to
505
+ """
506
+ # Schema-based validation
507
+ schema_errors = self.schema_validator.validate_node_spec(
508
+ node_type, params, namespace=namespace
509
+ )
510
+ for error in schema_errors:
511
+ result.add_error(f"Node '{node_id}': {error}")
512
+
513
+ def _validate_dependencies_with_cache(
514
+ self,
515
+ nodes: list[dict[str, Any]],
516
+ result: ValidationReport,
517
+ node_ids: set[str],
518
+ macro_instances: set[str],
519
+ ) -> None:
520
+ """Validate node dependencies using cached node IDs and check for cycles.
521
+
522
+ Dependencies are in spec.dependencies field.
523
+
524
+ Parameters
525
+ ----------
526
+ nodes : list[dict[str, Any]]
527
+ List of node configurations
528
+ result : ValidationReport
529
+ Report to add errors to
530
+ node_ids : set[str]
531
+ Cached set of valid node IDs from _validate_nodes
532
+ macro_instances : set[str]
533
+ Set of macro instance names (nodes will be generated at runtime)
534
+ """
535
+ dependency_graph = {}
536
+
537
+ for node in nodes:
538
+ node_id = node.get("metadata", {}).get("name")
539
+ if not node_id:
540
+ continue
541
+
542
+ deps = node.get("spec", {}).get("dependencies", [])
543
+
544
+ if not isinstance(deps, list):
545
+ deps = [deps]
546
+
547
+ # Check all dependencies exist using cached node_ids
548
+ valid_deps = set()
549
+ for dep in deps:
550
+ if dep in node_ids:
551
+ valid_deps.add(dep)
552
+ continue
553
+
554
+ is_macro_generated = False
555
+ for macro_instance in macro_instances:
556
+ if dep.startswith(f"{macro_instance}_"):
557
+ is_macro_generated = True
558
+ valid_deps.add(dep)
559
+ break
560
+
561
+ # If not a known node and not macro-generated, it's an error
562
+ if not is_macro_generated:
563
+ result.add_error(f"Node '{node_id}': Dependency '{dep}' does not exist")
564
+
565
+ dependency_graph[node_id] = valid_deps
566
+
567
+ # Check for cycles using DirectedGraph's public static method
568
+ if cycle_message := DirectedGraph.detect_cycle(dependency_graph):
569
+ result.add_error(cycle_message)
@@ -0,0 +1,65 @@
1
+ """Port interfaces for the application."""
2
+
3
+ from hexdag.core.ports.api_call import APICall
4
+ from hexdag.core.ports.database import (
5
+ ColumnSchema,
6
+ ColumnType,
7
+ DatabasePort,
8
+ SupportsRawSQL,
9
+ SupportsReadOnly,
10
+ SupportsStreamingQuery,
11
+ SupportsVectorSearch,
12
+ TableSchema,
13
+ )
14
+ from hexdag.core.ports.embedding import Embedding
15
+ from hexdag.core.ports.executor import (
16
+ ExecutionResult,
17
+ ExecutionTask,
18
+ ExecutorPort,
19
+ )
20
+ from hexdag.core.ports.file_storage import FileStoragePort
21
+ from hexdag.core.ports.healthcheck import HealthStatus
22
+ from hexdag.core.ports.llm import (
23
+ LLM,
24
+ ImageContent,
25
+ ImageInput,
26
+ SupportsEmbedding,
27
+ SupportsFunctionCalling,
28
+ SupportsGeneration,
29
+ SupportsVision,
30
+ VisionMessage,
31
+ )
32
+ from hexdag.core.ports.memory import Memory
33
+ from hexdag.core.ports.observer_manager import ObserverManagerPort
34
+ from hexdag.core.ports.secret import SecretPort
35
+ from hexdag.core.ports.tool_router import ToolRouter
36
+
37
+ __all__ = [
38
+ "APICall",
39
+ "ColumnSchema",
40
+ "ColumnType",
41
+ "DatabasePort",
42
+ "Embedding", # Deprecated - kept for backward compatibility
43
+ "ExecutionResult",
44
+ "ExecutionTask",
45
+ "ExecutorPort",
46
+ "FileStoragePort",
47
+ "HealthStatus",
48
+ "ImageContent",
49
+ "ImageInput",
50
+ "LLM",
51
+ "Memory",
52
+ "ObserverManagerPort",
53
+ "SecretPort",
54
+ "SupportsEmbedding", # New - unified embedding support in LLM port
55
+ "SupportsFunctionCalling",
56
+ "SupportsGeneration", # New - text generation support in LLM port
57
+ "SupportsRawSQL",
58
+ "SupportsReadOnly",
59
+ "SupportsStreamingQuery",
60
+ "SupportsVectorSearch",
61
+ "SupportsVision",
62
+ "TableSchema",
63
+ "ToolRouter",
64
+ "VisionMessage",
65
+ ]