hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,133 @@
1
+ """Pipeline configuration models for YAML-based component configuration.
2
+
3
+ This module provides Pydantic models for declaring and configuring:
4
+ - Global ports (adapters)
5
+ - Global policies
6
+ - Per-type port defaults (type_ports)
7
+ - Per-node port/policy overrides
8
+
9
+ Note
10
+ ----
11
+ All configuration dictionaries use native YAML dict format.
12
+ Formats:
13
+ - ports: port_name -> {namespace: str, name: str, params: dict}
14
+ - type_ports: node_type -> {port_name -> {namespace: str, name: str, params: dict}}
15
+ - policies: policy_name -> {namespace: str, name: str, params: dict}
16
+ """
17
+
18
+ from typing import Any
19
+
20
+ from pydantic import BaseModel, Field
21
+
22
+
23
+ class PipelineConfig(BaseModel):
24
+ """Complete pipeline configuration including ports and policies.
25
+
26
+ This model represents the full configuration extracted from YAML
27
+ that will be used to instantiate and configure the orchestrator.
28
+
29
+ Attributes
30
+ ----------
31
+ ports : dict[str, dict[str, Any]]
32
+ Global port (adapter) configurations using native YAML dict format.
33
+ Format: {namespace: str, name: str, params: dict}
34
+ type_ports : dict[str, dict[str, dict[str, Any]]]
35
+ Per-type port defaults. Maps node_type -> {port_name -> dict spec}
36
+ policies : dict[str, dict[str, Any]]
37
+ Global policy configurations using native YAML dict format.
38
+ Format: {namespace: str, name: str, params: dict}
39
+ metadata : dict[str, Any]
40
+ Pipeline metadata (name, description, version, etc.)
41
+ nodes : list[dict[str, Any]]
42
+ Node specifications (handled by existing builder)
43
+
44
+ Examples
45
+ --------
46
+ ```yaml
47
+ apiVersion: hexdag.omniviser.io/v1alpha1
48
+ kind: Pipeline
49
+ metadata:
50
+ name: my-pipeline
51
+ description: Example pipeline
52
+
53
+ spec:
54
+ # Global ports (adapters) - native YAML dict format
55
+ ports:
56
+ llm:
57
+ namespace: core
58
+ name: openai
59
+ params:
60
+ model: gpt-4
61
+ temperature: 0.7
62
+ api_key: ${OPENAI_API_KEY}
63
+ database:
64
+ namespace: core
65
+ name: postgres
66
+ params:
67
+ connection_string: ${DB_URL}
68
+
69
+ # Per-type port defaults
70
+ type_ports:
71
+ agent:
72
+ llm:
73
+ namespace: core
74
+ name: anthropic
75
+ params:
76
+ model: claude-3-5-sonnet
77
+
78
+ # Global policies - native YAML dict format
79
+ policies:
80
+ retry:
81
+ name: retry
82
+ params:
83
+ max_retries: 3
84
+ timeout:
85
+ name: timeout
86
+ params:
87
+ timeout_seconds: 300
88
+
89
+ # Nodes
90
+ nodes:
91
+ - kind: core:agent_node
92
+ metadata:
93
+ name: researcher
94
+ spec:
95
+ initial_prompt_template: "Research: {{topic}}"
96
+ max_steps: 10
97
+ # Node-level port override
98
+ ports:
99
+ llm:
100
+ namespace: core
101
+ name: openai
102
+ params:
103
+ model: gpt-4o
104
+ # Node-level policy override
105
+ policies:
106
+ timeout:
107
+ name: timeout
108
+ params:
109
+ timeout_seconds: 600
110
+ ```
111
+ """
112
+
113
+ # Global configuration
114
+ ports: dict[str, dict[str, Any]] = Field(
115
+ default_factory=dict, description="Global adapter configurations"
116
+ )
117
+ type_ports: dict[str, dict[str, dict[str, Any]]] = Field(
118
+ default_factory=dict, description="Per-type port defaults"
119
+ )
120
+ policies: dict[str, dict[str, Any]] = Field(
121
+ default_factory=dict, description="Global policy configurations"
122
+ )
123
+
124
+ # Metadata
125
+ metadata: dict[str, Any] = Field(default_factory=dict, description="Pipeline metadata")
126
+
127
+ # Node configurations (handled by existing builder)
128
+ nodes: list[dict[str, Any]] = Field(default_factory=list, description="Node specifications")
129
+
130
+ class Config:
131
+ """Pydantic model configuration."""
132
+
133
+ extra = "allow" # Allow additional fields for extensibility
@@ -0,0 +1,223 @@
1
+ """!py YAML custom tag handler for inline Python functions.
2
+
3
+ This module provides a YAML custom tag constructor that compiles inline
4
+ Python code into callable functions, enabling inline Python in YAML pipelines.
5
+
6
+ Security Warning
7
+ ----------------
8
+ The !py tag executes arbitrary Python code. Only use with trusted YAML files.
9
+ For untrusted input, use module path strings instead.
10
+
11
+ Examples
12
+ --------
13
+ YAML usage::
14
+
15
+ body: !py |
16
+ async def process(item, index, state, **ports):
17
+ '''Process an item with database lookup.'''
18
+ db = ports.get('database')
19
+ if db:
20
+ context = await db.aquery(item['id'])
21
+ else:
22
+ context = {}
23
+ return {"item": item, "context": context}
24
+
25
+ The function will be compiled and made available as a callable.
26
+ """
27
+
28
+ from collections.abc import Callable
29
+ from typing import Any
30
+
31
+ import yaml
32
+
33
+ from hexdag.core.logging import get_logger
34
+
35
+ logger = get_logger(__name__)
36
+
37
+
38
+ class PyTagError(Exception):
39
+ """Error compiling !py tagged Python code."""
40
+
41
+ pass
42
+
43
+
44
+ def py_constructor(loader: yaml.SafeLoader, node: yaml.ScalarNode) -> Callable[..., Any]:
45
+ """Compile !py tagged Python code into a callable function.
46
+
47
+ The !py block must define exactly one function. The function can be
48
+ sync or async, and should follow the signature convention:
49
+
50
+ async def process(item, index, state, **ports) -> Any:
51
+ '''Docstring.'''
52
+ ...
53
+
54
+ Parameters
55
+ ----------
56
+ loader : yaml.SafeLoader
57
+ YAML loader instance
58
+ node : yaml.ScalarNode
59
+ YAML node containing Python source code
60
+
61
+ Returns
62
+ -------
63
+ Callable
64
+ Compiled Python function
65
+
66
+ Raises
67
+ ------
68
+ PyTagError
69
+ If compilation fails or no function is defined
70
+
71
+ Examples
72
+ --------
73
+ The following YAML::
74
+
75
+ body: !py |
76
+ def double(item, index, state, **ports):
77
+ return item * 2
78
+
79
+ Will compile to a callable function that can be invoked.
80
+ """
81
+ source_code = loader.construct_scalar(node)
82
+
83
+ if not isinstance(source_code, str) or not source_code.strip():
84
+ raise PyTagError("!py block must be a non-empty string defining a function.")
85
+
86
+ # Compile the source code
87
+ try:
88
+ compiled = compile(source_code, "<yaml-!py>", "exec")
89
+ except SyntaxError as e:
90
+ raise PyTagError(f"Syntax error in !py block at line {e.lineno}: {e.msg}") from e
91
+ except Exception as e:
92
+ raise PyTagError(f"Failed to compile !py block: {e}") from e
93
+
94
+ # Execute to get the function
95
+ namespace: dict[str, Any] = {}
96
+ try:
97
+ exec(compiled, namespace) # noqa: S102
98
+ except Exception as e:
99
+ raise PyTagError(f"Failed to execute !py block: {e}") from e
100
+
101
+ # Find the defined function (first callable in namespace that's not a builtin)
102
+ func: Callable[..., Any] | None = None
103
+ for name, obj in namespace.items():
104
+ if name.startswith("_"):
105
+ continue
106
+ if callable(obj) and not isinstance(obj, type):
107
+ func = obj
108
+ break
109
+
110
+ if func is None:
111
+ defined = [k for k in namespace if not k.startswith("_")]
112
+ raise PyTagError(
113
+ f"!py block must define a function. Found: {defined if defined else 'nothing'}"
114
+ )
115
+
116
+ logger.debug(
117
+ "Compiled !py function",
118
+ function_name=getattr(func, "__name__", "<anonymous>"),
119
+ )
120
+
121
+ return func
122
+
123
+
124
+ def validate_py_source(source_code: str) -> list[str]:
125
+ """Validate !py source code without executing it.
126
+
127
+ This function performs static validation of Python source code intended
128
+ for !py tags. It checks syntax and verifies a function is defined.
129
+
130
+ Parameters
131
+ ----------
132
+ source_code : str
133
+ Python source code to validate
134
+
135
+ Returns
136
+ -------
137
+ list[str]
138
+ List of validation error messages (empty if valid)
139
+
140
+ Examples
141
+ --------
142
+ Validate valid code::
143
+
144
+ >>> errors = validate_py_source("def process(item, index, state, **ports): return item")
145
+ >>> assert errors == []
146
+
147
+ Validate invalid syntax::
148
+
149
+ >>> errors = validate_py_source("def process( invalid")
150
+ >>> assert "Syntax error" in errors[0]
151
+
152
+ Validate missing function::
153
+
154
+ >>> errors = validate_py_source("x = 1")
155
+ >>> assert "must define a function" in errors[0]
156
+ """
157
+ errors: list[str] = []
158
+
159
+ if not source_code or not source_code.strip():
160
+ errors.append("!py block is empty. Must define a function.")
161
+ return errors
162
+
163
+ # Check syntax by compiling
164
+ try:
165
+ compiled = compile(source_code, "<yaml-!py>", "exec")
166
+ except SyntaxError as e:
167
+ errors.append(f"Syntax error at line {e.lineno}: {e.msg}")
168
+ return errors
169
+ except Exception as e:
170
+ errors.append(f"Compilation error: {e}")
171
+ return errors
172
+
173
+ # Execute to check for function definition
174
+ namespace: dict[str, Any] = {}
175
+ try:
176
+ exec(compiled, namespace) # noqa: S102
177
+ except Exception as e:
178
+ errors.append(f"Execution error: {e}")
179
+ return errors
180
+
181
+ # Check that a function is defined
182
+ has_function = False
183
+ for name, obj in namespace.items():
184
+ if name.startswith("_"):
185
+ continue
186
+ if callable(obj) and not isinstance(obj, type):
187
+ has_function = True
188
+ break
189
+
190
+ if not has_function:
191
+ defined = [k for k in namespace if not k.startswith("_")]
192
+ errors.append(
193
+ f"!py block must define a function. Found: {defined if defined else 'nothing'}"
194
+ )
195
+
196
+ return errors
197
+
198
+
199
+ def register_py_tag() -> None:
200
+ """Register the !py custom tag with YAML SafeLoader.
201
+
202
+ This function should be called during module initialization to enable
203
+ !py tag support in YAML parsing.
204
+
205
+ Examples
206
+ --------
207
+ Import the module to auto-register::
208
+
209
+ import hexdag.core.pipeline_builder.py_tag # Registers !py tag
210
+
211
+ Or explicitly register::
212
+
213
+ from hexdag.core.pipeline_builder.py_tag import register_py_tag
214
+ register_py_tag()
215
+ """
216
+ # Check if already registered to avoid duplicate registration
217
+ if "!py" not in yaml.SafeLoader.yaml_constructors:
218
+ yaml.SafeLoader.add_constructor("!py", py_constructor)
219
+ logger.debug("Registered !py YAML tag")
220
+
221
+
222
+ # Auto-register when module is imported
223
+ register_py_tag()
@@ -0,0 +1,268 @@
1
+ """Auto-discovery and introspection of YAML custom tags.
2
+
3
+ This module provides functions to discover available YAML custom tags
4
+ and extract documentation from their constructors.
5
+
6
+ Usage
7
+ -----
8
+ >>> from hexdag.core.pipeline_builder.tag_discovery import discover_tags
9
+ >>> tags = discover_tags()
10
+ >>> tags["!py"]["name"]
11
+ '!py'
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import importlib
17
+ import inspect
18
+ from functools import lru_cache
19
+ from typing import Any
20
+
21
+ import yaml
22
+
23
+ # Tag metadata registry
24
+ # Maps tag name -> (module, constructor_func, description)
25
+ _TAG_REGISTRY: dict[str, tuple[str, str, str]] = {
26
+ "!py": (
27
+ "hexdag.core.pipeline_builder.py_tag",
28
+ "py_constructor",
29
+ "Compile inline Python code into callable functions",
30
+ ),
31
+ "!include": (
32
+ "hexdag.core.pipeline_builder.include_tag",
33
+ "include_constructor",
34
+ "Include content from external YAML files",
35
+ ),
36
+ }
37
+
38
+
39
+ @lru_cache(maxsize=1)
40
+ def discover_tags() -> dict[str, dict[str, Any]]:
41
+ """Discover all registered YAML custom tags with their metadata.
42
+
43
+ Returns
44
+ -------
45
+ dict[str, dict[str, Any]]
46
+ Mapping of tag name to tag info dict with keys:
47
+ - name: Tag name (e.g., "!py")
48
+ - module: Full module path
49
+ - constructor: Constructor function name
50
+ - description: Short description
51
+ - documentation: Full docstring
52
+ - syntax: Syntax patterns
53
+ - is_registered: Whether tag is registered with YAML SafeLoader
54
+
55
+ Examples
56
+ --------
57
+ >>> tags = discover_tags()
58
+ >>> "!py" in tags
59
+ True
60
+ >>> tags["!py"]["description"]
61
+ 'Compile inline Python code into callable functions'
62
+ """
63
+ result: dict[str, dict[str, Any]] = {}
64
+
65
+ for tag_name, (module_path, constructor_name, description) in _TAG_REGISTRY.items():
66
+ tag_info = _extract_tag_info(tag_name, module_path, constructor_name, description)
67
+ result[tag_name] = tag_info
68
+
69
+ return result
70
+
71
+
72
+ def _extract_tag_info(
73
+ tag_name: str,
74
+ module_path: str,
75
+ constructor_name: str,
76
+ description: str,
77
+ ) -> dict[str, Any]:
78
+ """Extract comprehensive information about a tag.
79
+
80
+ Parameters
81
+ ----------
82
+ tag_name : str
83
+ The YAML tag (e.g., "!py")
84
+ module_path : str
85
+ Full module path to the tag module
86
+ constructor_name : str
87
+ Name of the constructor function
88
+ description : str
89
+ Short description
90
+
91
+ Returns
92
+ -------
93
+ dict[str, Any]
94
+ Tag information dictionary
95
+ """
96
+ tag_info: dict[str, Any] = {
97
+ "name": tag_name,
98
+ "module": module_path,
99
+ "constructor": constructor_name,
100
+ "description": description,
101
+ "documentation": "",
102
+ "syntax": [],
103
+ "is_registered": tag_name in yaml.SafeLoader.yaml_constructors,
104
+ }
105
+
106
+ try:
107
+ module = importlib.import_module(module_path)
108
+ constructor = getattr(module, constructor_name, None)
109
+
110
+ if constructor:
111
+ # Extract full docstring from constructor
112
+ docstring = inspect.getdoc(constructor) or ""
113
+ tag_info["documentation"] = docstring
114
+
115
+ # Also get module-level docstring for additional context
116
+ module_doc = inspect.getdoc(module) or ""
117
+ if module_doc and not tag_info["documentation"]:
118
+ tag_info["documentation"] = module_doc
119
+
120
+ # Extract syntax patterns
121
+ tag_info["syntax"] = _get_tag_syntax(tag_name)
122
+
123
+ except (ImportError, AttributeError) as e:
124
+ tag_info["error"] = str(e)
125
+
126
+ return tag_info
127
+
128
+
129
+ def _get_tag_syntax(tag_name: str) -> list[str]:
130
+ """Get syntax patterns for a tag.
131
+
132
+ Parameters
133
+ ----------
134
+ tag_name : str
135
+ The tag name (e.g., "!py")
136
+
137
+ Returns
138
+ -------
139
+ list[str]
140
+ List of syntax pattern descriptions
141
+ """
142
+ if tag_name == "!py":
143
+ return [
144
+ "!py | <python_code> # Inline Python code block",
145
+ "The code block must define exactly one function",
146
+ "Function signature: async def process(item, index, state, **ports)",
147
+ ]
148
+ if tag_name == "!include":
149
+ return [
150
+ "!include ./path/to/file.yaml # Simple file inclusion",
151
+ "!include {path: ./file.yaml, vars: {key: value}} # With variable substitution",
152
+ "Variables use {{var}} placeholder syntax in included files",
153
+ ]
154
+ return []
155
+
156
+
157
+ def get_tag_schema(tag_name: str) -> dict[str, Any]:
158
+ """Get JSON Schema-like representation of a tag's usage.
159
+
160
+ Parameters
161
+ ----------
162
+ tag_name : str
163
+ Tag name (e.g., "!py" or "!include")
164
+
165
+ Returns
166
+ -------
167
+ dict[str, Any]
168
+ Schema-like dict with tag information
169
+
170
+ Raises
171
+ ------
172
+ ValueError
173
+ If tag is not found
174
+
175
+ Examples
176
+ --------
177
+ >>> schema = get_tag_schema("!py")
178
+ >>> schema["name"]
179
+ '!py'
180
+ >>> schema["type"]
181
+ 'yaml_tag'
182
+ """
183
+ # Normalize tag name (add ! prefix if missing)
184
+ normalized_name = tag_name if tag_name.startswith("!") else f"!{tag_name}"
185
+
186
+ tags = discover_tags()
187
+ if normalized_name not in tags:
188
+ available = list(tags.keys())
189
+ raise ValueError(f"Unknown tag: {tag_name}. Available: {available}")
190
+
191
+ tag_info = tags[normalized_name]
192
+
193
+ # Build a schema-like representation
194
+ schema: dict[str, Any] = {
195
+ "name": tag_info["name"],
196
+ "type": "yaml_tag",
197
+ "description": tag_info["description"],
198
+ "module": tag_info["module"],
199
+ "documentation": tag_info["documentation"],
200
+ "syntax": tag_info["syntax"],
201
+ "is_registered": tag_info["is_registered"],
202
+ }
203
+
204
+ # Add tag-specific schema information
205
+ if normalized_name == "!py":
206
+ schema["input_schema"] = {
207
+ "type": "string",
208
+ "format": "python_code",
209
+ "description": "Python source code defining a single function",
210
+ "examples": ["async def process(item, index, state, **ports):\n return item * 2"],
211
+ }
212
+ schema["output"] = {
213
+ "type": "callable",
214
+ "description": "Compiled Python function",
215
+ }
216
+ schema["security_warning"] = (
217
+ "Executes arbitrary Python code. Only use with trusted YAML files."
218
+ )
219
+
220
+ elif normalized_name == "!include":
221
+ schema["input_schema"] = {
222
+ "oneOf": [
223
+ {
224
+ "type": "string",
225
+ "description": "Path to YAML file to include",
226
+ "examples": ["./shared/nodes.yaml"],
227
+ },
228
+ {
229
+ "type": "object",
230
+ "properties": {
231
+ "path": {
232
+ "type": "string",
233
+ "description": "Path to YAML file",
234
+ },
235
+ "vars": {
236
+ "type": "object",
237
+ "description": "Variables to substitute using {{var}} syntax",
238
+ },
239
+ },
240
+ "required": ["path"],
241
+ },
242
+ ],
243
+ }
244
+ schema["output"] = {
245
+ "type": "any",
246
+ "description": "Parsed YAML content from the included file",
247
+ }
248
+
249
+ return schema
250
+
251
+
252
+ def get_known_tag_names() -> frozenset[str]:
253
+ """Get all registered tag names for validation.
254
+
255
+ Returns
256
+ -------
257
+ frozenset[str]
258
+ Set of all tag names (e.g., {"!py", "!include"})
259
+
260
+ Examples
261
+ --------
262
+ >>> names = get_known_tag_names()
263
+ >>> "!py" in names
264
+ True
265
+ >>> "!include" in names
266
+ True
267
+ """
268
+ return frozenset(discover_tags().keys())