hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,812 @@
1
+ """Docker build commands for HexDAG CLI."""
2
+
3
+ import os
4
+ import re
5
+ import shlex
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ import yaml
11
+ from rich.console import Console
12
+
13
+ app = typer.Typer()
14
+ console = Console()
15
+
16
+ # Valid identifier pattern for pipeline names (alphanumeric, dash, underscore only)
17
+ _VALID_IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
18
+
19
+
20
+ def _sanitize_shell_string(value: str) -> str:
21
+ """Sanitize a string for safe use in shell scripts.
22
+
23
+ Uses shlex.quote() to properly escape the value for shell interpolation.
24
+
25
+ Parameters
26
+ ----------
27
+ value : str
28
+ Value to sanitize
29
+
30
+ Returns
31
+ -------
32
+ str
33
+ Sanitized value safe for shell interpolation
34
+
35
+ Examples
36
+ --------
37
+ >>> _sanitize_shell_string("my-pipeline")
38
+ "'my-pipeline'"
39
+ >>> _sanitize_shell_string("pipe; rm -rf /")
40
+ "'pipe; rm -rf /'"
41
+ """
42
+ return shlex.quote(value)
43
+
44
+
45
+ def _validate_identifier(name: str, context: str = "identifier") -> None:
46
+ """Validate that a name is a safe identifier.
47
+
48
+ Parameters
49
+ ----------
50
+ name : str
51
+ Name to validate
52
+ context : str
53
+ Context for error messages (e.g., "pipeline name", "image name")
54
+
55
+ Raises
56
+ ------
57
+ typer.Exit
58
+ If name contains invalid characters
59
+ """
60
+ if not _VALID_IDENTIFIER_PATTERN.match(name):
61
+ console.print(
62
+ f"[red]Error:[/red] Invalid {context}: {name!r}. "
63
+ f"Only alphanumeric characters, dashes, and underscores are allowed."
64
+ )
65
+ raise typer.Exit(1)
66
+
67
+
68
+ def _read_pipeline_yaml(path: Path) -> dict:
69
+ """Read and validate pipeline YAML file."""
70
+ if not path.exists():
71
+ console.print(f"[red]Error:[/red] Pipeline file not found: {path}")
72
+ raise typer.Exit(1)
73
+
74
+ with Path.open(path) as f:
75
+ try:
76
+ data = yaml.safe_load(f)
77
+ if not isinstance(data, dict):
78
+ console.print(f"[red]Error:[/red] Invalid YAML structure in {path}")
79
+ raise typer.Exit(1)
80
+ return data
81
+ except yaml.YAMLError as e:
82
+ console.print(f"[red]Error:[/red] Failed to parse YAML: {e}")
83
+ raise typer.Exit(1) from e
84
+
85
+
86
+ def _validate_pipeline_structure(pipeline_path: Path, data: dict) -> None:
87
+ """Validate pipeline structure before building Docker image.
88
+
89
+ Parameters
90
+ ----------
91
+ pipeline_path : Path
92
+ Path to the pipeline file (for error messages)
93
+ data : dict
94
+ Parsed pipeline data
95
+
96
+ Raises
97
+ ------
98
+ typer.Exit
99
+ If validation fails
100
+ """
101
+ try:
102
+ # Check for required top-level keys (support both old and new format)
103
+ if "name" not in data and "metadata" not in data:
104
+ raise ValueError("Pipeline must have a 'name' or 'metadata' field")
105
+
106
+ if "nodes" not in data and "spec" not in data:
107
+ raise ValueError("Pipeline must have a 'nodes' or 'spec' field")
108
+
109
+ nodes = data.get("nodes") or data.get("spec", {}).get("nodes", [])
110
+
111
+ if not isinstance(nodes, list):
112
+ raise ValueError("'nodes' field must be a list")
113
+
114
+ if not nodes:
115
+ raise ValueError("Pipeline must have at least one node")
116
+
117
+ # Validate each node has required fields
118
+ for i, node in enumerate(nodes):
119
+ if not isinstance(node, dict):
120
+ raise ValueError(f"Node {i} must be a dictionary")
121
+
122
+ # Support both old (type/id) and new (kind/metadata.name) formats
123
+ node_type = node.get("type") or node.get("kind")
124
+ if not node_type:
125
+ raise ValueError(f"Node {i} must have a 'type' or 'kind' field")
126
+
127
+ node_id = node.get("id") or node.get("name")
128
+ if not node_id and isinstance(node.get("metadata"), dict):
129
+ node_id = node["metadata"].get("name")
130
+
131
+ if not node_id:
132
+ raise ValueError(f"Node {i} must have an 'id', 'name', or 'metadata.name' field")
133
+
134
+ except ValueError as e:
135
+ console.print(f"[red]Error:[/red] Invalid pipeline structure in {pipeline_path.name}: {e}")
136
+ raise typer.Exit(1) from e
137
+
138
+
139
+ def _generate_dockerfile(
140
+ pipeline_files: list[Path],
141
+ output_dir: Path,
142
+ python_version: str = "3.12",
143
+ base_image: str | None = None,
144
+ local_install: bool = False,
145
+ extras: str = "",
146
+ ) -> Path:
147
+ """Generate Dockerfile for pipeline(s)."""
148
+ if base_image is None:
149
+ base_image = f"python:{python_version}-slim"
150
+
151
+ # Collect all pipeline names for the image
152
+ pipeline_names = [p.stem for p in pipeline_files]
153
+
154
+ # Format extras for pip install
155
+ extras_str = f"[{extras}]" if extras else ""
156
+
157
+ dockerfile_content = (
158
+ f"""# HexDAG Pipeline Container
159
+ # Generated by hexdag build
160
+ """ # nosec B608
161
+ f"""# Pipelines: {", ".join(pipeline_names)}
162
+
163
+ FROM {base_image}
164
+
165
+ # Install system dependencies
166
+ RUN apt-get update && apt-get install -y \\
167
+ git \\
168
+ && rm -rf /var/lib/apt/lists/*
169
+
170
+ WORKDIR /app
171
+
172
+ RUN mkdir -p /app/pipelines /app/src
173
+
174
+ """
175
+ )
176
+
177
+ # Install hexdag - either from PyPI or local source
178
+ if local_install:
179
+ dockerfile_content += f"""# Install hexdag from local source
180
+ # Extras: {extras if extras else "none (base install only)"}
181
+ COPY hexdag/ /tmp/hexdag/
182
+ RUN pip install --no-cache-dir /tmp/hexdag{extras_str} && rm -rf /tmp/hexdag
183
+
184
+ """
185
+ else:
186
+ dockerfile_content += f"""# Install hexdag with dependencies
187
+ # Extras: {extras if extras else "none (base install only)"}
188
+ RUN pip install --no-cache-dir hexdag{extras_str}
189
+
190
+ """
191
+
192
+ dockerfile_content += "# Copy pipeline files\n"
193
+
194
+ for pipeline_file in pipeline_files:
195
+ dockerfile_content += (
196
+ f"COPY pipelines/{pipeline_file.name} /app/pipelines/{pipeline_file.name}\n"
197
+ )
198
+
199
+ # Copy requirements.txt and src directory
200
+ dockerfile_content += """
201
+ # Copy custom dependencies and code
202
+ COPY requirements.txt /app/requirements.txt
203
+ COPY src/ /app/src/
204
+
205
+ # Install custom requirements if not empty
206
+ RUN if [ -s requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi
207
+
208
+ ENV PYTHONPATH=/app:$PYTHONPATH
209
+
210
+ COPY docker-entrypoint.sh /usr/local/bin/
211
+ RUN chmod +x /usr/local/bin/docker-entrypoint.sh
212
+
213
+ ENTRYPOINT ["docker-entrypoint.sh"]
214
+ CMD ["--help"]
215
+ """
216
+
217
+ dockerfile_path = output_dir / "Dockerfile"
218
+ with Path.open(dockerfile_path, "w") as f:
219
+ f.write(dockerfile_content)
220
+
221
+ return dockerfile_path
222
+
223
+
224
+ def _generate_entrypoint_script(output_dir: Path, pipeline_files: list[Path]) -> Path:
225
+ """Generate docker-entrypoint.sh script."""
226
+ pipeline_names = [p.stem for p in pipeline_files]
227
+
228
+ # Validate all pipeline names are safe identifiers
229
+ for name in pipeline_names:
230
+ _validate_identifier(name, "pipeline name")
231
+
232
+ script_content = """#!/bin/bash
233
+ set -e
234
+
235
+ # HexDAG Pipeline Entrypoint
236
+ # This script runs hexdag pipelines in a containerized environment
237
+
238
+ """
239
+
240
+ script_content += "# Available pipelines:\n"
241
+ for name in pipeline_names:
242
+ script_content += f"# - {name}\n"
243
+
244
+ script_content += """
245
+ # Parse arguments
246
+ PIPELINE_NAME="${1:-}"
247
+ PIPELINE_INPUT="${2:-\\{\\}}"
248
+
249
+ if [ "$PIPELINE_NAME" = "--help" ] || [ -z "$PIPELINE_NAME" ]; then
250
+ echo "Usage: docker run <image> <pipeline-name> [input-json]"
251
+ echo ""
252
+ echo "Available pipelines:"
253
+ """
254
+
255
+ # Use sanitized names for echo commands (defense in depth)
256
+ for name in pipeline_names:
257
+ safe_name = _sanitize_shell_string(name)
258
+ script_content += f' echo " - {safe_name}"\n'
259
+
260
+ script_content += """ echo ""
261
+ echo "Examples:"
262
+ echo " docker run <image> my-pipeline '{\"input\": \"data\"}'"
263
+ echo " docker run <image> my-pipeline @input.json"
264
+ exit 0
265
+ fi
266
+
267
+ # Find pipeline file
268
+ PIPELINE_FILE="/app/pipelines/${PIPELINE_NAME}.yaml"
269
+ if [ ! -f "$PIPELINE_FILE" ]; then
270
+ PIPELINE_FILE="/app/pipelines/${PIPELINE_NAME}.yml"
271
+ fi
272
+
273
+ if [ ! -f "$PIPELINE_FILE" ]; then
274
+ echo "Error: Pipeline '${PIPELINE_NAME}' not found"
275
+ echo "Available pipelines:"
276
+ """
277
+
278
+ # Use sanitized names for echo commands
279
+ for name in pipeline_names:
280
+ safe_name = _sanitize_shell_string(name)
281
+ script_content += f' echo " - {safe_name}"\n'
282
+
283
+ script_content += ''' exit 1
284
+ fi
285
+
286
+ # Run pipeline using Python
287
+ python3 <<EOF
288
+ import asyncio
289
+ import json
290
+ import sys
291
+ from pathlib import Path
292
+
293
+ from hexdag.core.pipeline_builder import YamlPipelineBuilder
294
+
295
+ async def main():
296
+ try:
297
+ # Load pipeline
298
+ builder = YamlPipelineBuilder()
299
+ with Path.open("$PIPELINE_FILE") as f:
300
+ builder.load_from_yaml(f.read())
301
+
302
+ # Parse input - support both JSON string and file input
303
+ input_str = """$PIPELINE_INPUT"""
304
+
305
+ if input_str.startswith("@"):
306
+ input_file = input_str[1:] # Remove @ prefix
307
+ try:
308
+ with Path.open(input_file) as f:
309
+ input_data = json.load(f)
310
+ except FileNotFoundError:
311
+ print(f"Error: Input file not found: {input_file}", file=sys.stderr)
312
+ sys.exit(1)
313
+ except json.JSONDecodeError as e:
314
+ print(f"Error: Invalid JSON in file {input_file}: {e}", file=sys.stderr)
315
+ sys.exit(1)
316
+ else:
317
+ # Parse as JSON string
318
+ try:
319
+ input_data = json.loads(input_str)
320
+ except json.JSONDecodeError as e:
321
+ print(f"Error: Invalid JSON input: {e}", file=sys.stderr)
322
+ print(f"Received: {input_str[:100]}...", file=sys.stderr)
323
+ sys.exit(1)
324
+
325
+ dag = builder.build()
326
+ results = await dag.aexecute(input_data)
327
+
328
+ # Output results
329
+ print(json.dumps(results, indent=2))
330
+
331
+ except Exception as e:
332
+ print(f"Error executing pipeline: {e}", file=sys.stderr)
333
+ import traceback
334
+ traceback.print_exc()
335
+ sys.exit(1)
336
+
337
+ if __name__ == "__main__":
338
+ asyncio.run(main())
339
+ EOF
340
+ '''
341
+
342
+ script_path = output_dir / "docker-entrypoint.sh"
343
+ with Path.open(script_path, "w") as f:
344
+ f.write(script_content)
345
+
346
+ return script_path
347
+
348
+
349
+ def _generate_docker_compose(
350
+ pipeline_files: list[Path],
351
+ output_dir: Path,
352
+ image_name: str,
353
+ ) -> Path:
354
+ """Generate docker-compose.yml for multi-pipeline orchestration."""
355
+ services = {}
356
+
357
+ for pipeline_file in pipeline_files:
358
+ pipeline_name = pipeline_file.stem
359
+ pipeline_data = _read_pipeline_yaml(pipeline_file)
360
+
361
+ env_vars = {}
362
+ if "metadata" in pipeline_data:
363
+ metadata = pipeline_data["metadata"]
364
+ if "environment" in metadata:
365
+ env_vars = metadata["environment"]
366
+
367
+ service_config: dict[str, str | list[str] | dict[str, str]] = {
368
+ "image": image_name,
369
+ "container_name": f"hexdag-{pipeline_name}",
370
+ "command": [pipeline_name, "${{INPUT:-{}}}"],
371
+ "env_file": [".env"], # Support .env file for configuration
372
+ "volumes": ["./data:/app/data"],
373
+ "restart": "unless-stopped",
374
+ }
375
+
376
+ if env_vars:
377
+ service_config["environment"] = env_vars
378
+
379
+ services[pipeline_name] = service_config
380
+
381
+ compose_data = {
382
+ "version": "3.8",
383
+ "services": services,
384
+ "volumes": {"data": {"driver": "local"}},
385
+ }
386
+
387
+ compose_path = output_dir / "docker-compose.yml"
388
+ with Path.open(compose_path, "w") as f:
389
+ yaml.dump(compose_data, f, default_flow_style=False, sort_keys=False)
390
+
391
+ return compose_path
392
+
393
+
394
+ def _generate_readme(
395
+ output_dir: Path,
396
+ pipeline_files: list[Path],
397
+ image_name: str,
398
+ ) -> Path:
399
+ """Generate README.md with usage instructions."""
400
+ pipeline_names = [p.stem for p in pipeline_files]
401
+
402
+ readme_content = f"""# HexDAG Pipeline Container
403
+
404
+ This container includes the following pipelines:
405
+ {chr(10).join(f"- `{name}`" for name in pipeline_names)}
406
+
407
+ ## Building the Image
408
+
409
+ > **Note**: If hexdag is not published to PyPI, you'll need to install it from source.
410
+ > Replace the `RUN pip install --no-cache-dir hexdag[all]` line in the Dockerfile with:
411
+ > ```dockerfile
412
+ > COPY . /tmp/hexdag
413
+ > RUN pip install --no-cache-dir /tmp/hexdag[all]
414
+ > ```
415
+ > And copy the hexdag source code to the build directory before building.
416
+
417
+ ```bash
418
+ docker build -t {image_name} .
419
+ ```
420
+
421
+ ## Running Pipelines
422
+
423
+ ### Single Pipeline
424
+
425
+ ```bash
426
+ # Run with JSON input
427
+ docker run {image_name} <pipeline-name> '{{"input": "data"}}'
428
+
429
+ # Run with input file
430
+ docker run -v $(pwd)/input.json:/app/input.json {image_name} <pipeline-name> @/app/input.json
431
+ ```
432
+
433
+ ### Using Docker Compose
434
+
435
+ ```bash
436
+ # Start all pipelines
437
+ docker-compose up -d
438
+
439
+ # Run specific pipeline
440
+ docker-compose run <pipeline-name>
441
+
442
+ # View logs
443
+ docker-compose logs -f <pipeline-name>
444
+
445
+ # Stop all pipelines
446
+ docker-compose down
447
+ ```
448
+
449
+ ## Configuration
450
+
451
+ ### Environment Variables
452
+
453
+ Configure adapters using environment variables:
454
+
455
+ ```bash
456
+ # LLM Configuration
457
+ export OPENAI_API_KEY="your-key"
458
+ export ANTHROPIC_API_KEY="your-key"
459
+
460
+ # Database Configuration
461
+ export DATABASE_URL="postgresql://..."
462
+
463
+ # Run with environment
464
+ docker run --env-file .env {image_name} <pipeline-name> '{{"input": "data"}}'
465
+ ```
466
+
467
+ ### Custom Dependencies
468
+
469
+ Add custom Python dependencies to `requirements.txt` before building:
470
+
471
+ ```bash
472
+ echo "pandas>=2.0.0" >> requirements.txt
473
+ docker build -t {image_name} .
474
+ ```
475
+
476
+ ### Custom Code
477
+
478
+ Add custom Python modules to `src/` directory:
479
+
480
+ ```
481
+ src/
482
+ ├── my_module/
483
+ │ ├── __init__.py
484
+ │ └── functions.py
485
+ ```
486
+
487
+ ## Pipeline Details
488
+
489
+ """
490
+
491
+ for pipeline_file in pipeline_files:
492
+ pipeline_data = _read_pipeline_yaml(pipeline_file)
493
+ name = pipeline_file.stem
494
+ description = pipeline_data.get("metadata", {}).get("description", "No description")
495
+
496
+ readme_content += f"""### {name}
497
+
498
+ **Description:** {description}
499
+
500
+ **Run:**
501
+ ```bash
502
+ docker run {image_name} {name} '{{"input": "data"}}'
503
+ ```
504
+
505
+ """
506
+
507
+ readme_path = output_dir / "README.md"
508
+ with Path.open(readme_path, "w") as f:
509
+ f.write(readme_content)
510
+
511
+ return readme_path
512
+
513
+
514
+ @app.command()
515
+ def build(
516
+ pipeline: Annotated[
517
+ list[Path],
518
+ typer.Argument(
519
+ help="Pipeline YAML file(s) to build",
520
+ exists=True,
521
+ dir_okay=False,
522
+ ),
523
+ ],
524
+ output: Annotated[
525
+ str,
526
+ typer.Option(
527
+ "--output",
528
+ "-o",
529
+ help="Output directory for Docker files (default: ./build)",
530
+ ),
531
+ ] = "./build",
532
+ image: Annotated[
533
+ str | None,
534
+ typer.Option(
535
+ "--image",
536
+ "-i",
537
+ help="Docker image name (default: hexdag-<pipeline-name>)",
538
+ ),
539
+ ] = None,
540
+ python_version: Annotated[
541
+ str,
542
+ typer.Option(
543
+ "--python-version",
544
+ "-p",
545
+ help="Python version for base image",
546
+ ),
547
+ ] = "3.12",
548
+ base_image: Annotated[
549
+ str | None,
550
+ typer.Option(
551
+ "--base-image",
552
+ "-b",
553
+ help="Custom base Docker image",
554
+ ),
555
+ ] = None,
556
+ compose: Annotated[
557
+ bool,
558
+ typer.Option(
559
+ "--compose/--no-compose",
560
+ "-c/-C",
561
+ help="Generate docker-compose.yml for multi-pipeline orchestration",
562
+ ),
563
+ ] = True,
564
+ local: Annotated[
565
+ bool,
566
+ typer.Option(
567
+ "--local",
568
+ "-l",
569
+ help="Install hexdag from local source (copies hexdag/ directory to build context)",
570
+ ),
571
+ ] = False,
572
+ extras: Annotated[
573
+ str,
574
+ typer.Option(
575
+ "--extras",
576
+ "-e",
577
+ help=(
578
+ "Comma-separated list of extras to install (e.g., 'yaml,openai,anthropic,cli'). "
579
+ "Available: yaml, viz, openai, anthropic, database, cli, docs, all"
580
+ ),
581
+ ),
582
+ ] = "yaml,openai,anthropic,cli",
583
+ ) -> None:
584
+ """Build Docker container(s) for pipeline(s).
585
+
586
+ Security Warning
587
+ ----------------
588
+ This command is intended for DEVELOPMENT USE ONLY with trusted pipelines.
589
+
590
+ ⚠️ DO NOT use in production environments with untrusted YAML files.
591
+
592
+ For production deployments:
593
+ - Build containers in CI/CD with trusted YAML files
594
+ - Deploy pre-built, verified images only
595
+ - Set HEXDAG_DISABLE_BUILD=1 to disable this command
596
+
597
+ Examples
598
+ --------
599
+ # Single pipeline
600
+ hexdag build my-pipeline.yaml
601
+
602
+ # Multiple pipelines
603
+ hexdag build pipeline1.yaml pipeline2.yaml pipeline3.yaml
604
+
605
+ # Custom output directory
606
+ hexdag build my-pipeline.yaml --output ./docker
607
+
608
+ # Custom image name
609
+ hexdag build my-pipeline.yaml --image my-org/my-pipeline:latest
610
+
611
+ # Skip docker-compose generation
612
+ hexdag build my-pipeline.yaml --no-compose
613
+ """
614
+ # Security: Check if build command is disabled (production safety)
615
+
616
+ if os.getenv("HEXDAG_DISABLE_BUILD", "").lower() in ("1", "true", "yes"):
617
+ console.print(
618
+ "[red]Error:[/red] Docker build command is disabled "
619
+ "(HEXDAG_DISABLE_BUILD is set).\n"
620
+ "[yellow]This is a security feature to prevent untrusted pipeline execution.[/yellow]\n"
621
+ "If you need to build containers, unset HEXDAG_DISABLE_BUILD in development."
622
+ )
623
+ raise typer.Exit(1)
624
+
625
+ output_path = Path(output)
626
+
627
+ output_path.mkdir(parents=True, exist_ok=True)
628
+
629
+ pipelines_dir = output_path / "pipelines"
630
+ pipelines_dir.mkdir(exist_ok=True)
631
+
632
+ # Determine image name
633
+ if image is None:
634
+ image = f"hexdag-{pipeline[0].stem}" if len(pipeline) == 1 else "hexdag-pipelines"
635
+
636
+ console.print(f"[cyan]Building Docker container for {len(pipeline)} pipeline(s)...[/cyan]\n")
637
+
638
+ # Validate all pipeline files and names before building
639
+ console.print("[cyan]Validating pipeline files...[/cyan]")
640
+ for p in pipeline:
641
+ # Validate pipeline filename is a safe identifier
642
+ _validate_identifier(p.stem, f"pipeline filename '{p.name}'")
643
+
644
+ pipeline_data = _read_pipeline_yaml(p)
645
+ _validate_pipeline_structure(p, pipeline_data)
646
+ console.print(f" [green]✓[/green] Valid: {p.name}")
647
+ console.print()
648
+
649
+ # Copy pipeline files to output
650
+ copied_pipelines = []
651
+ for p in pipeline:
652
+ dest = pipelines_dir / p.name
653
+ with Path.open(p) as src_file, open(dest, "w") as dst_file:
654
+ dst_file.write(src_file.read())
655
+ copied_pipelines.append(dest)
656
+ console.print(f" [green]✓[/green] Copied: {p.name}")
657
+
658
+ # Copy hexdag source if --local flag is set
659
+ if local:
660
+ import shutil
661
+
662
+ # Find hexdag source directory more robustly
663
+ import hexdag
664
+
665
+ hexdag_src = Path(hexdag.__file__).parent.parent
666
+
667
+ # Verify we found the right directory
668
+ if not (hexdag_src / "pyproject.toml").exists():
669
+ console.print(
670
+ f"[red]Error:[/red] Could not find hexdag source directory. "
671
+ f"Expected pyproject.toml in {hexdag_src}"
672
+ )
673
+ raise typer.Exit(1)
674
+
675
+ hexdag_dest = output_path / "hexdag"
676
+
677
+ console.print("\n[cyan]Copying hexdag source for local installation...[/cyan]")
678
+
679
+ # Remove existing directory with error handling
680
+ if hexdag_dest.exists():
681
+ try:
682
+ shutil.rmtree(hexdag_dest)
683
+ except PermissionError as e:
684
+ console.print(f"[red]Error:[/red] Cannot remove {hexdag_dest}: {e}")
685
+ console.print(
686
+ "[yellow]Hint:[/yellow] Check file permissions or close any programs "
687
+ "using these files"
688
+ )
689
+ raise typer.Exit(1) from e
690
+
691
+ try:
692
+ shutil.copytree(
693
+ hexdag_src,
694
+ hexdag_dest,
695
+ ignore=shutil.ignore_patterns(
696
+ "__pycache__",
697
+ "*.pyc",
698
+ "*.pyo",
699
+ ".git",
700
+ ".venv",
701
+ "venv",
702
+ "*.egg-info",
703
+ "build",
704
+ "dist",
705
+ ".pytest_cache",
706
+ ".mypy_cache",
707
+ "tests",
708
+ "examples",
709
+ "docs",
710
+ ".coverage",
711
+ "htmlcov",
712
+ "*.egg",
713
+ ".eggs",
714
+ ".tox",
715
+ ".ruff_cache",
716
+ ),
717
+ )
718
+ console.print(f" [green]✓[/green] Copied hexdag source from {hexdag_src}")
719
+ except Exception as e:
720
+ console.print(f"[red]Error:[/red] Failed to copy hexdag source: {e}")
721
+ raise typer.Exit(1) from e
722
+
723
+ # Generate Dockerfile
724
+ console.print("\n[cyan]Generating Dockerfile...[/cyan]")
725
+ dockerfile = _generate_dockerfile(
726
+ [pipelines_dir / p.name for p in pipeline],
727
+ output_path,
728
+ python_version,
729
+ base_image,
730
+ local_install=local,
731
+ extras=extras,
732
+ )
733
+ console.print(f" [green]✓[/green] Created: {dockerfile.relative_to(output_path.parent)}")
734
+ if extras:
735
+ console.print(f" [dim]Installing with extras: {extras}[/dim]")
736
+ else:
737
+ console.print(" [dim]Installing base package only (no extras)[/dim]")
738
+
739
+ # Generate entrypoint script
740
+ console.print("\n[cyan]Generating entrypoint script...[/cyan]")
741
+ entrypoint = _generate_entrypoint_script(
742
+ output_path,
743
+ [pipelines_dir / p.name for p in pipeline],
744
+ )
745
+ console.print(f" [green]✓[/green] Created: {entrypoint.relative_to(output_path.parent)}")
746
+
747
+ # Generate docker-compose if requested and multiple pipelines
748
+ if compose and len(pipeline) > 1:
749
+ console.print("\n[cyan]Generating docker-compose.yml...[/cyan]")
750
+ compose_file = _generate_docker_compose(
751
+ [pipelines_dir / p.name for p in pipeline],
752
+ output_path,
753
+ image,
754
+ )
755
+ console.print(f" [green]✓[/green] Created: {compose_file.relative_to(output_path.parent)}")
756
+
757
+ # Generate README
758
+ console.print("\n[cyan]Generating README.md...[/cyan]")
759
+ readme = _generate_readme(output_path, [pipelines_dir / p.name for p in pipeline], image)
760
+ console.print(f" [green]✓[/green] Created: {readme.relative_to(output_path.parent)}")
761
+
762
+ dockerignore = output_path / ".dockerignore"
763
+ with Path.open(dockerignore, "w") as f:
764
+ f.write("**/__pycache__\n**/*.pyc\n**/*.pyo\n**/.git\n**/.venv\n**/venv\n")
765
+ console.print(f" [green]✓[/green] Created: {dockerignore.relative_to(output_path.parent)}")
766
+
767
+ requirements = output_path / "requirements.txt"
768
+ if not requirements.exists():
769
+ requirements.touch()
770
+ console.print(f" [green]✓[/green] Created: {requirements.relative_to(output_path.parent)}")
771
+
772
+ src_dir = output_path / "src"
773
+ src_dir.mkdir(exist_ok=True)
774
+ (src_dir / "__init__.py").touch()
775
+ console.print(f" [green]✓[/green] Created: {src_dir.relative_to(output_path.parent)}/")
776
+
777
+ env_file = output_path / ".env.example"
778
+ with Path.open(env_file, "w") as f:
779
+ f.write("""# HexDAG Environment Configuration
780
+ # Copy this file to .env and fill in your values
781
+
782
+ # LLM API Keys
783
+ OPENAI_API_KEY=your-openai-api-key-here
784
+ ANTHROPIC_API_KEY=your-anthropic-api-key-here
785
+
786
+ # Database Configuration (if using database extra)
787
+ DATABASE_URL=sqlite:///app/data/hexdag.db
788
+
789
+ # Logging
790
+ LOG_LEVEL=INFO
791
+
792
+ # Custom environment variables
793
+ """)
794
+ console.print(f" [green]✓[/green] Created: {env_file.relative_to(output_path.parent)}")
795
+
796
+ console.print("\n[green]✓ Docker build files generated successfully![/green]\n")
797
+ console.print("[cyan]Next steps:[/cyan]")
798
+ console.print(f" 1. Review generated files in: [bold]{output_path}[/bold]")
799
+ console.print(f" 2. Build image: [bold]cd {output_path} && docker build -t {image} .[/bold]")
800
+
801
+ if compose and len(pipeline) > 1:
802
+ console.print(f" 3. Run with compose: [bold]cd {output_path} && docker-compose up[/bold]")
803
+ else:
804
+ pipeline_name = pipeline[0].stem
805
+ console.print(
806
+ f" 3. Run pipeline: [bold]docker run {image} {pipeline_name} "
807
+ f'\'{{"{pipeline_name}_input": "value"}}\'[/bold]'
808
+ )
809
+
810
+ console.print(
811
+ f"\n[dim]See {readme.relative_to(output_path.parent)} for detailed usage instructions[/dim]"
812
+ )