hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,109 @@
1
+ """Example: File-based ETL pipeline with CSV input and output.
2
+
3
+ This example demonstrates:
4
+ 1. Reading CSV files with FileReaderNode
5
+ 2. Writing results to CSV with FileWriterNode
6
+
7
+ For a complete example with transforms, use YAML pipelines where
8
+ template expressions ({{ }}) are properly resolved.
9
+ """
10
+
11
+ import asyncio
12
+ import tempfile
13
+ from pathlib import Path
14
+
15
+ import pandas as pd
16
+
17
+
18
+ async def main() -> None:
19
+ """Run a simple file-based ETL pipeline (read -> write)."""
20
+ # Import after ensuring the module path is set
21
+ import sys
22
+
23
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
24
+
25
+ from hexdag.core.domain.dag import DirectedGraph
26
+ from hexdag.core.orchestration.orchestrator import Orchestrator
27
+
28
+ # Import ETL nodes
29
+ from hexdag_plugins.hexdag_etl.hexdag_etl.nodes.file_io import (
30
+ FileReaderNode,
31
+ FileWriterNode,
32
+ )
33
+
34
+ # Create temporary directory for test files
35
+ with tempfile.TemporaryDirectory() as tmpdir:
36
+ tmpdir_path = Path(tmpdir)
37
+
38
+ # Create sample input CSV
39
+ input_csv = tmpdir_path / "sales_data.csv"
40
+ sample_data = pd.DataFrame(
41
+ {
42
+ "product": ["Widget A", "Widget B", "Widget C", "Widget A", "Widget B"],
43
+ "quantity": [10, 5, 8, 15, 3],
44
+ "price": [29.99, 49.99, 19.99, 29.99, 49.99],
45
+ "region": ["North", "South", "North", "East", "West"],
46
+ }
47
+ )
48
+ sample_data.to_csv(input_csv, index=False)
49
+ print(f"Created input CSV: {input_csv}")
50
+ print(f"Input data:\n{sample_data}\n")
51
+
52
+ # Create output path (using CSV for portability - no pyarrow required)
53
+ output_csv = tmpdir_path / "sales_copy.csv"
54
+
55
+ # Build the pipeline graph
56
+ graph = DirectedGraph()
57
+
58
+ # Step 1: Read CSV file
59
+ reader = FileReaderNode()
60
+ read_spec = reader(
61
+ name="read_sales",
62
+ file_path=str(input_csv),
63
+ format="csv",
64
+ )
65
+ graph.add(read_spec)
66
+
67
+ # Step 2: Write to CSV (directly from reader output)
68
+ writer = FileWriterNode()
69
+ write_spec = writer(
70
+ name="write_results",
71
+ file_path=str(output_csv),
72
+ format="csv",
73
+ input_key="data",
74
+ deps=["read_sales"],
75
+ )
76
+ graph.add(write_spec)
77
+
78
+ # Execute the pipeline
79
+ print("Executing ETL pipeline...")
80
+ orchestrator = Orchestrator()
81
+ results = await orchestrator.run(graph, {})
82
+
83
+ # Display results
84
+ print("\n=== Pipeline Results ===")
85
+ for node_name, result in results.items():
86
+ print(f"\n{node_name}:")
87
+ if isinstance(result, dict):
88
+ for key, value in result.items():
89
+ if key == "data" and isinstance(value, pd.DataFrame):
90
+ print(f" {key}: DataFrame with {len(value)} rows")
91
+ elif key == "columns":
92
+ print(f" {key}: {value}")
93
+ else:
94
+ print(f" {key}: {value}")
95
+ else:
96
+ print(f" {result}")
97
+
98
+ # Verify output file
99
+ if output_csv.exists():
100
+ output_df = pd.read_csv(output_csv)
101
+ print("\n=== Output CSV Contents ===")
102
+ print(output_df)
103
+ print(f"\nRows written: {len(output_df)}")
104
+ else:
105
+ print(f"\nWarning: Output file not created: {output_csv}")
106
+
107
+
108
+ if __name__ == "__main__":
109
+ asyncio.run(main())
@@ -0,0 +1,84 @@
1
+ """Test pandas transform node directly without plugin registration."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ # Add parent directories to path
7
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
8
+ sys.path.insert(0, str(Path(__file__).parent.parent))
9
+
10
+ from hexdag.core.pipeline_builder.yaml_builder import YamlPipelineBuilder
11
+
12
+
13
+ def test_node_registration():
14
+ """Test that node is discoverable."""
15
+ print("Testing node registration...")
16
+
17
+ # List all registered nodes
18
+ from hexdag.core.registry import registry
19
+
20
+ try:
21
+ # Try to get pandas_transform
22
+ pandas_factory = registry.get("pandas_transform", namespace="etl")
23
+ print(f"✓ PandasTransformNode factory: {pandas_factory}")
24
+ print(f"✓ PandasTransformNode class: {pandas_factory.__class__}")
25
+ return True
26
+ except Exception as e:
27
+ print(f"❌ Error getting pandas_transform: {e}")
28
+ print(" This is expected if nodes aren't auto-registered yet")
29
+ return False
30
+
31
+
32
+ def test_yaml_parsing():
33
+ """Test YAML pipeline parsing."""
34
+ print("\nTesting YAML pipeline...")
35
+
36
+ pipeline_yaml = """
37
+ apiVersion: hexdag/v1
38
+ kind: Pipeline
39
+ metadata:
40
+ name: test-pandas-transform
41
+ spec:
42
+ nodes:
43
+ - kind: etl:pandas_transform
44
+ metadata:
45
+ name: test_transform
46
+ spec:
47
+ operations:
48
+ - type: transform
49
+ method: pandas.DataFrame.head
50
+ kwargs:
51
+ n: 5
52
+ """
53
+
54
+ try:
55
+ builder = YamlPipelineBuilder()
56
+ graph, config = builder.build_from_string(pipeline_yaml)
57
+ print("\n✓ Pipeline built successfully!")
58
+ print(f"✓ Nodes: {len(graph._graph.nodes())}")
59
+ return True
60
+ except Exception as e:
61
+ print(f"\n❌ Error building pipeline: {e}")
62
+ import traceback
63
+
64
+ traceback.print_exc()
65
+ return False
66
+
67
+
68
+ if __name__ == "__main__":
69
+ print("=" * 80)
70
+ print("Testing Pandas Transform Node")
71
+ print("=" * 80)
72
+
73
+ reg_ok = test_node_registration()
74
+ yaml_ok = test_yaml_parsing()
75
+
76
+ if reg_ok and yaml_ok:
77
+ print("\n" + "=" * 80)
78
+ print("✓ All tests passed!")
79
+ print("=" * 80)
80
+ else:
81
+ print("\n" + "=" * 80)
82
+ print("❌ Some tests failed")
83
+ print("=" * 80)
84
+ sys.exit(1)
@@ -0,0 +1,25 @@
1
+ # hexDAG ETL Plugin Configuration
2
+ # This file configures how the plugin loads its components
3
+
4
+ [tool.hexdag]
5
+ # Core modules to load - include the plugin's nodes
6
+ modules = [
7
+ "hexdag.core.ports", # Core port definitions
8
+ "hexdag.builtin.nodes", # Core node factories
9
+ "hexdag.builtin.tools.builtin_tools",
10
+ "hexdag.builtin.policies.execution_policies",
11
+ "hexdag.builtin.macros",
12
+ "hexdag_etl.nodes", # ETL plugin nodes (pandas_transform, etc.)
13
+ ]
14
+
15
+ # No additional plugins needed for ETL plugin
16
+ plugins = []
17
+
18
+ # Development mode enabled
19
+ dev_mode = true
20
+
21
+ [tool.hexdag.logging]
22
+ level = "DEBUG" # More verbose for plugin development
23
+ format = "structured"
24
+ use_color = true
25
+ include_timestamp = true
@@ -0,0 +1,48 @@
1
+ """hexdag-etl: ETL infrastructure for hexDAG pipelines.
2
+
3
+ Provides file I/O and multi-operation pandas transform nodes for data transformation pipelines.
4
+
5
+ This plugin extends hexDAG with ETL capabilities:
6
+ - FileReaderNode: Read CSV, Parquet, JSON, Excel files
7
+ - FileWriterNode: Write data to various file formats
8
+ - PandasTransformNode: Chain pandas operations
9
+
10
+ Example Pipeline:
11
+ - kind: etl:file_reader_node
12
+ metadata:
13
+ name: load_data
14
+ spec:
15
+ file_path: data/input.csv
16
+ format: csv
17
+
18
+ - kind: etl:pandas_transform_node
19
+ metadata:
20
+ name: transform
21
+ spec:
22
+ operations:
23
+ - type: filter
24
+ condition: "{{ df['value'] > 0 }}"
25
+ dependencies: [load_data]
26
+
27
+ - kind: etl:file_writer_node
28
+ metadata:
29
+ name: save_results
30
+ spec:
31
+ file_path: output/results.parquet
32
+ format: parquet
33
+ dependencies: [transform]
34
+ """
35
+
36
+ from .nodes.file_io import FileReaderNode, FileWriterNode
37
+ from .nodes.outlook import OutlookReaderNode, OutlookSenderNode
38
+ from .nodes.pandas_transform import PandasTransformNode
39
+
40
+ __version__ = "0.1.0"
41
+
42
+ __all__ = [
43
+ "FileReaderNode",
44
+ "FileWriterNode",
45
+ "OutlookReaderNode",
46
+ "OutlookSenderNode",
47
+ "PandasTransformNode",
48
+ ]
@@ -0,0 +1,13 @@
1
+ """ETL nodes for data extraction, transformation, and loading."""
2
+
3
+ from .file_io import FileReaderNode, FileWriterNode
4
+ from .outlook import OutlookReaderNode, OutlookSenderNode
5
+ from .pandas_transform import PandasTransformNode
6
+
7
+ __all__ = [
8
+ "FileReaderNode",
9
+ "FileWriterNode",
10
+ "OutlookReaderNode",
11
+ "OutlookSenderNode",
12
+ "PandasTransformNode",
13
+ ]
@@ -0,0 +1,230 @@
1
+ """API extraction node for HTTP/REST API data extraction."""
2
+
3
+ from typing import Any
4
+
5
+ from hexdag.core.domain.dag import NodeSpec
6
+ from hexdag.core.registry import node
7
+ from hexdag.core.registry.models import NodeSubtype
8
+
9
+ from .base_node_factory import BaseNodeFactory
10
+
11
+
12
+ @node(name="api_extract", subtype=NodeSubtype.TOOL, namespace="etl")
13
+ class APIExtractNode(BaseNodeFactory):
14
+ """Extract data from REST APIs with pagination, authentication, and error handling.
15
+
16
+ Supports:
17
+ - GET/POST requests
18
+ - Bearer token, API key, OAuth authentication
19
+ - Pagination (cursor, offset, page)
20
+ - Rate limiting
21
+ - Retry logic
22
+ - Structured error handling
23
+
24
+ Examples
25
+ --------
26
+ YAML pipeline::
27
+
28
+ - kind: api_extract_node
29
+ metadata:
30
+ name: fetch_customers
31
+ spec:
32
+ endpoint: https://api.example.com/v1/customers
33
+ method: GET
34
+ params:
35
+ limit: 100
36
+ status: active
37
+ pagination:
38
+ type: cursor
39
+ cursor_param: after
40
+ cursor_path: meta.next_cursor
41
+ has_more_path: meta.has_more
42
+ auth:
43
+ type: bearer
44
+ token: ${API_TOKEN}
45
+ output_artifact:
46
+ slot: raw_customers
47
+ key: customers_2024_01_15
48
+ format: json
49
+ """
50
+
51
+ def __call__(
52
+ self,
53
+ name: str,
54
+ endpoint: str,
55
+ method: str = "GET",
56
+ params: dict[str, Any] | None = None,
57
+ headers: dict[str, str] | None = None,
58
+ auth: dict[str, Any] | None = None,
59
+ pagination: dict[str, Any] | None = None,
60
+ output_artifact: dict[str, Any] | None = None,
61
+ timeout: int = 30,
62
+ max_retries: int = 3,
63
+ backoff_factor: float = 0.3,
64
+ rate_limit: dict[str, Any] | None = None,
65
+ deps: list[str] | None = None,
66
+ **kwargs: Any,
67
+ ) -> NodeSpec:
68
+ """Create API extraction node specification.
69
+
70
+ Parameters
71
+ ----------
72
+ name : str
73
+ Node name
74
+ endpoint : str
75
+ API endpoint URL
76
+ method : str
77
+ HTTP method: "GET", "POST", "PUT", "DELETE"
78
+ params : dict, optional
79
+ Query parameters or request body
80
+ headers : dict, optional
81
+ Additional HTTP headers
82
+ auth : dict, optional
83
+ Authentication configuration
84
+ pagination : dict, optional
85
+ Pagination configuration
86
+ output_artifact : dict, optional
87
+ Output artifact configuration
88
+ timeout : int
89
+ Request timeout in seconds
90
+ max_retries : int
91
+ Maximum retry attempts
92
+ backoff_factor : float
93
+ Backoff multiplier for retries
94
+ rate_limit : dict, optional
95
+ Rate limiting configuration
96
+ deps : list[str], optional
97
+ Dependency node names
98
+ **kwargs : Any
99
+ Additional parameters
100
+
101
+ Returns
102
+ -------
103
+ NodeSpec
104
+ Node specification ready for execution
105
+ """
106
+ # Create wrapped function
107
+ wrapped_fn = self._create_api_function(
108
+ name,
109
+ endpoint,
110
+ method,
111
+ params,
112
+ headers,
113
+ auth,
114
+ pagination,
115
+ output_artifact,
116
+ timeout,
117
+ max_retries,
118
+ backoff_factor,
119
+ rate_limit,
120
+ )
121
+
122
+ # Define schemas
123
+ input_schema = {"input_data": dict, "**ports": dict}
124
+ output_schema = {"output": dict, "metadata": dict}
125
+
126
+ input_model = self.create_pydantic_model(f"{name}Input", input_schema)
127
+ output_model = self.create_pydantic_model(f"{name}Output", output_schema)
128
+
129
+ # Store parameters
130
+ node_params = {
131
+ "endpoint": endpoint,
132
+ "method": method,
133
+ "params": params,
134
+ "headers": headers,
135
+ "auth": auth,
136
+ "pagination": pagination,
137
+ "output_artifact": output_artifact,
138
+ "timeout": timeout,
139
+ "max_retries": max_retries,
140
+ "backoff_factor": backoff_factor,
141
+ "rate_limit": rate_limit,
142
+ **kwargs,
143
+ }
144
+
145
+ return NodeSpec(
146
+ name=name,
147
+ fn=wrapped_fn,
148
+ in_model=input_model,
149
+ out_model=output_model,
150
+ deps=frozenset(deps or []),
151
+ params=node_params,
152
+ )
153
+
154
+ def _create_api_function(
155
+ self,
156
+ name: str,
157
+ endpoint: str,
158
+ method: str,
159
+ params: dict[str, Any] | None,
160
+ headers: dict[str, str] | None,
161
+ auth: dict[str, Any] | None,
162
+ pagination: dict[str, Any] | None,
163
+ output_artifact: dict[str, Any] | None,
164
+ timeout: int,
165
+ max_retries: int,
166
+ backoff_factor: float,
167
+ rate_limit: dict[str, Any] | None,
168
+ ) -> Any:
169
+ """Create the wrapped API extraction function.
170
+
171
+ Implementation details omitted for brevity - similar to the original
172
+ but simplified for demo purposes.
173
+
174
+ Parameters
175
+ ----------
176
+ name : str
177
+ Node name
178
+ endpoint : str
179
+ API endpoint
180
+ method : str
181
+ HTTP method
182
+ params : dict, optional
183
+ Request parameters
184
+ headers : dict, optional
185
+ Request headers
186
+ auth : dict, optional
187
+ Authentication config
188
+ pagination : dict, optional
189
+ Pagination config
190
+ output_artifact : dict, optional
191
+ Output artifact config
192
+ timeout : int
193
+ Request timeout
194
+ max_retries : int
195
+ Maximum retries
196
+ backoff_factor : float
197
+ Backoff factor
198
+ rate_limit : dict, optional
199
+ Rate limiting config
200
+
201
+ Returns
202
+ -------
203
+ Callable
204
+ Async function that performs API extraction
205
+ """
206
+
207
+ # Implementation would go here - simplified for the example
208
+ async def wrapped_fn(input_data: Any, **ports: Any) -> dict[str, Any]:
209
+ """Placeholder implementation."""
210
+ # In a real implementation, this would:
211
+ # 1. Make HTTP requests with aiohttp
212
+ # 2. Handle pagination
213
+ # 3. Apply authentication
214
+ # 4. Store results in artifact store
215
+ # 5. Return structured results
216
+
217
+ return {
218
+ "data": [],
219
+ "metadata": {
220
+ "endpoint": endpoint,
221
+ "method": method,
222
+ "records_extracted": 0,
223
+ "status": "placeholder_implementation",
224
+ },
225
+ }
226
+
227
+ wrapped_fn.__name__ = f"api_extract_{name}"
228
+ wrapped_fn.__doc__ = f"API extraction: {endpoint}"
229
+
230
+ return wrapped_fn
@@ -0,0 +1,181 @@
1
+ """Simplified BaseNodeFactory for creating nodes with Pydantic models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, cast
7
+
8
+ from hexdag.core.domain.dag import NodeSpec
9
+ from hexdag.core.orchestration.prompt.template import PromptTemplate
10
+ from hexdag.core.protocols import is_schema_type
11
+ from pydantic import BaseModel, create_model
12
+
13
+
14
+ class BaseNodeFactory(ABC):
15
+ """Minimal base class for node factories with Pydantic models."""
16
+
17
+ # Note: Event emission has been removed as it's now handled by the orchestrator
18
+ # The new event system uses ObserverManager at the orchestrator level
19
+
20
+ def create_pydantic_model(
21
+ self, name: str, schema: dict[str, Any] | type[BaseModel] | type[Any] | None
22
+ ) -> type[BaseModel] | None:
23
+ """Create a Pydantic model from a schema.
24
+
25
+ Raises
26
+ ------
27
+ ValueError
28
+ If schema type is not supported
29
+ """
30
+ if schema is None:
31
+ return None
32
+
33
+ if is_schema_type(schema):
34
+ return schema # type: ignore[return-value] # is_schema_type checks for BaseModel subclass
35
+
36
+ if isinstance(schema, dict):
37
+ # String type names mapping (for when field_type is a string)
38
+ type_map = {
39
+ "str": str,
40
+ "int": int,
41
+ "float": float,
42
+ "bool": bool,
43
+ "list": list,
44
+ "dict": dict,
45
+ "Any": Any,
46
+ }
47
+
48
+ field_definitions: dict[str, Any] = {}
49
+ for field_name, field_type in schema.items():
50
+ # Dispatch based on field_type's type using match pattern
51
+ match field_type:
52
+ case str():
53
+ # String type names - convert to actual types
54
+ actual_type = type_map.get(field_type, Any)
55
+ field_definitions[field_name] = (actual_type, ...)
56
+ case type():
57
+ # Already a type
58
+ field_definitions[field_name] = (field_type, ...)
59
+ case tuple():
60
+ # Already in the correct format (type, default)
61
+ field_definitions[field_name] = field_type
62
+ case _:
63
+ # Unknown type specification - use Any
64
+ field_definitions[field_name] = (Any, ...)
65
+
66
+ return create_model(name, **field_definitions)
67
+
68
+ # At this point, schema should be a type
69
+ try:
70
+ return cast("type[Any] | None", create_model(name, value=(schema, ...)))
71
+ except (TypeError, AttributeError) as e:
72
+ # If we get here, schema is an unexpected type
73
+ raise ValueError(f"Schema must be a dict, type, or Pydantic model, got {type(schema).__name__}") from e
74
+
75
+ @staticmethod
76
+ def infer_input_schema_from_template(
77
+ template: str | PromptTemplate,
78
+ special_params: set[str] | None = None,
79
+ ) -> dict[str, Any]:
80
+ """Infer input schema from template variables with configurable filtering.
81
+
82
+ This method extracts variable names from a prompt template and creates
83
+ a schema dictionary mapping those variables to string types. It supports
84
+ filtering out special parameters that are not user inputs.
85
+
86
+ Parameters
87
+ ----------
88
+ template : str | PromptTemplate
89
+ The prompt template to analyze. Can be a string or PromptTemplate instance.
90
+ special_params : set[str] | None, optional
91
+ Set of parameter names to exclude from the schema (e.g., "context_history").
92
+ If None, no filtering is applied.
93
+
94
+ Returns
95
+ -------
96
+ dict[str, Any]
97
+ Schema dictionary mapping variable names to str type.
98
+ Returns {"input": str} if no variables found.
99
+
100
+ Examples
101
+ --------
102
+ >>> BaseNodeFactory.infer_input_schema_from_template("Hello {{name}}")
103
+ {'name': <class 'str'>}
104
+
105
+ >>> BaseNodeFactory.infer_input_schema_from_template(
106
+ ... "Process {{user}} with {{context_history}}",
107
+ ... special_params={"context_history"}
108
+ ... )
109
+ {'user': <class 'str'>}
110
+
111
+ >>> BaseNodeFactory.infer_input_schema_from_template("No variables")
112
+ {'input': <class 'str'>}
113
+ """
114
+
115
+ if isinstance(template, str):
116
+ template = PromptTemplate(template)
117
+
118
+ variables = getattr(template, "input_vars", [])
119
+
120
+ if special_params:
121
+ variables = [v for v in variables if v not in special_params]
122
+
123
+ if not variables:
124
+ return {"input": str}
125
+
126
+ schema: dict[str, Any] = {}
127
+ for var in variables:
128
+ base_var = var.split(".")[0]
129
+ # Double-check against special params for nested variables
130
+ if not special_params or base_var not in special_params:
131
+ schema[base_var] = str
132
+
133
+ return schema
134
+
135
+ def _copy_required_ports_to_wrapper(self, wrapper_fn: Any) -> None:
136
+ """Copy required_ports metadata from factory class to wrapper function.
137
+
138
+ This ensures port requirements are preserved when creating node functions.
139
+ """
140
+ if hasattr(self.__class__, "_hexdag_required_ports"):
141
+ # _hexdag_required_ports is added dynamically by @node decorator
142
+ wrapper_fn._hexdag_required_ports = self.__class__._hexdag_required_ports # pyright: ignore[reportAttributeAccessIssue] # noqa: B010
143
+
144
+ def create_node_with_mapping(
145
+ self,
146
+ name: str,
147
+ wrapped_fn: Any,
148
+ input_schema: dict[str, Any] | None,
149
+ output_schema: dict[str, Any] | type[BaseModel] | None,
150
+ deps: list[str] | None = None,
151
+ **kwargs: Any,
152
+ ) -> NodeSpec:
153
+ """Universal NodeSpec creation."""
154
+ # Copy required_ports metadata to wrapper
155
+ self._copy_required_ports_to_wrapper(wrapped_fn)
156
+
157
+ input_model = self.create_pydantic_model(f"{name}Input", input_schema)
158
+ output_model = self.create_pydantic_model(f"{name}Output", output_schema)
159
+
160
+ return NodeSpec(
161
+ name=name,
162
+ fn=wrapped_fn,
163
+ in_model=input_model,
164
+ out_model=output_model,
165
+ deps=frozenset(deps or []),
166
+ params=kwargs,
167
+ )
168
+
169
+ @abstractmethod
170
+ def __call__(self, name: str, *args: Any, **kwargs: Any) -> NodeSpec: # noqa: ARG002
171
+ """Create a NodeSpec.
172
+
173
+ Must be implemented by subclasses.
174
+
175
+ Args:
176
+ name: Name of the node
177
+ *args: Additional positional arguments (unused, for subclass flexibility)
178
+ **kwargs: Additional keyword arguments
179
+ """
180
+ _ = args # Marked as intentionally unused for subclass API flexibility
181
+ raise NotImplementedError