hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Example: File-based ETL pipeline with CSV input and output.
|
|
2
|
+
|
|
3
|
+
This example demonstrates:
|
|
4
|
+
1. Reading CSV files with FileReaderNode
|
|
5
|
+
2. Writing results to CSV with FileWriterNode
|
|
6
|
+
|
|
7
|
+
For a complete example with transforms, use YAML pipelines where
|
|
8
|
+
template expressions ({{ }}) are properly resolved.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import tempfile
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def main() -> None:
|
|
19
|
+
"""Run a simple file-based ETL pipeline (read -> write)."""
|
|
20
|
+
# Import after ensuring the module path is set
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
24
|
+
|
|
25
|
+
from hexdag.core.domain.dag import DirectedGraph
|
|
26
|
+
from hexdag.core.orchestration.orchestrator import Orchestrator
|
|
27
|
+
|
|
28
|
+
# Import ETL nodes
|
|
29
|
+
from hexdag_plugins.hexdag_etl.hexdag_etl.nodes.file_io import (
|
|
30
|
+
FileReaderNode,
|
|
31
|
+
FileWriterNode,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Create temporary directory for test files
|
|
35
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
36
|
+
tmpdir_path = Path(tmpdir)
|
|
37
|
+
|
|
38
|
+
# Create sample input CSV
|
|
39
|
+
input_csv = tmpdir_path / "sales_data.csv"
|
|
40
|
+
sample_data = pd.DataFrame(
|
|
41
|
+
{
|
|
42
|
+
"product": ["Widget A", "Widget B", "Widget C", "Widget A", "Widget B"],
|
|
43
|
+
"quantity": [10, 5, 8, 15, 3],
|
|
44
|
+
"price": [29.99, 49.99, 19.99, 29.99, 49.99],
|
|
45
|
+
"region": ["North", "South", "North", "East", "West"],
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
sample_data.to_csv(input_csv, index=False)
|
|
49
|
+
print(f"Created input CSV: {input_csv}")
|
|
50
|
+
print(f"Input data:\n{sample_data}\n")
|
|
51
|
+
|
|
52
|
+
# Create output path (using CSV for portability - no pyarrow required)
|
|
53
|
+
output_csv = tmpdir_path / "sales_copy.csv"
|
|
54
|
+
|
|
55
|
+
# Build the pipeline graph
|
|
56
|
+
graph = DirectedGraph()
|
|
57
|
+
|
|
58
|
+
# Step 1: Read CSV file
|
|
59
|
+
reader = FileReaderNode()
|
|
60
|
+
read_spec = reader(
|
|
61
|
+
name="read_sales",
|
|
62
|
+
file_path=str(input_csv),
|
|
63
|
+
format="csv",
|
|
64
|
+
)
|
|
65
|
+
graph.add(read_spec)
|
|
66
|
+
|
|
67
|
+
# Step 2: Write to CSV (directly from reader output)
|
|
68
|
+
writer = FileWriterNode()
|
|
69
|
+
write_spec = writer(
|
|
70
|
+
name="write_results",
|
|
71
|
+
file_path=str(output_csv),
|
|
72
|
+
format="csv",
|
|
73
|
+
input_key="data",
|
|
74
|
+
deps=["read_sales"],
|
|
75
|
+
)
|
|
76
|
+
graph.add(write_spec)
|
|
77
|
+
|
|
78
|
+
# Execute the pipeline
|
|
79
|
+
print("Executing ETL pipeline...")
|
|
80
|
+
orchestrator = Orchestrator()
|
|
81
|
+
results = await orchestrator.run(graph, {})
|
|
82
|
+
|
|
83
|
+
# Display results
|
|
84
|
+
print("\n=== Pipeline Results ===")
|
|
85
|
+
for node_name, result in results.items():
|
|
86
|
+
print(f"\n{node_name}:")
|
|
87
|
+
if isinstance(result, dict):
|
|
88
|
+
for key, value in result.items():
|
|
89
|
+
if key == "data" and isinstance(value, pd.DataFrame):
|
|
90
|
+
print(f" {key}: DataFrame with {len(value)} rows")
|
|
91
|
+
elif key == "columns":
|
|
92
|
+
print(f" {key}: {value}")
|
|
93
|
+
else:
|
|
94
|
+
print(f" {key}: {value}")
|
|
95
|
+
else:
|
|
96
|
+
print(f" {result}")
|
|
97
|
+
|
|
98
|
+
# Verify output file
|
|
99
|
+
if output_csv.exists():
|
|
100
|
+
output_df = pd.read_csv(output_csv)
|
|
101
|
+
print("\n=== Output CSV Contents ===")
|
|
102
|
+
print(output_df)
|
|
103
|
+
print(f"\nRows written: {len(output_df)}")
|
|
104
|
+
else:
|
|
105
|
+
print(f"\nWarning: Output file not created: {output_csv}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Test pandas transform node directly without plugin registration."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Add parent directories to path
|
|
7
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
8
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
9
|
+
|
|
10
|
+
from hexdag.core.pipeline_builder.yaml_builder import YamlPipelineBuilder
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_node_registration():
|
|
14
|
+
"""Test that node is discoverable."""
|
|
15
|
+
print("Testing node registration...")
|
|
16
|
+
|
|
17
|
+
# List all registered nodes
|
|
18
|
+
from hexdag.core.registry import registry
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
# Try to get pandas_transform
|
|
22
|
+
pandas_factory = registry.get("pandas_transform", namespace="etl")
|
|
23
|
+
print(f"✓ PandasTransformNode factory: {pandas_factory}")
|
|
24
|
+
print(f"✓ PandasTransformNode class: {pandas_factory.__class__}")
|
|
25
|
+
return True
|
|
26
|
+
except Exception as e:
|
|
27
|
+
print(f"❌ Error getting pandas_transform: {e}")
|
|
28
|
+
print(" This is expected if nodes aren't auto-registered yet")
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_yaml_parsing():
|
|
33
|
+
"""Test YAML pipeline parsing."""
|
|
34
|
+
print("\nTesting YAML pipeline...")
|
|
35
|
+
|
|
36
|
+
pipeline_yaml = """
|
|
37
|
+
apiVersion: hexdag/v1
|
|
38
|
+
kind: Pipeline
|
|
39
|
+
metadata:
|
|
40
|
+
name: test-pandas-transform
|
|
41
|
+
spec:
|
|
42
|
+
nodes:
|
|
43
|
+
- kind: etl:pandas_transform
|
|
44
|
+
metadata:
|
|
45
|
+
name: test_transform
|
|
46
|
+
spec:
|
|
47
|
+
operations:
|
|
48
|
+
- type: transform
|
|
49
|
+
method: pandas.DataFrame.head
|
|
50
|
+
kwargs:
|
|
51
|
+
n: 5
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
builder = YamlPipelineBuilder()
|
|
56
|
+
graph, config = builder.build_from_string(pipeline_yaml)
|
|
57
|
+
print("\n✓ Pipeline built successfully!")
|
|
58
|
+
print(f"✓ Nodes: {len(graph._graph.nodes())}")
|
|
59
|
+
return True
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(f"\n❌ Error building pipeline: {e}")
|
|
62
|
+
import traceback
|
|
63
|
+
|
|
64
|
+
traceback.print_exc()
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
print("=" * 80)
|
|
70
|
+
print("Testing Pandas Transform Node")
|
|
71
|
+
print("=" * 80)
|
|
72
|
+
|
|
73
|
+
reg_ok = test_node_registration()
|
|
74
|
+
yaml_ok = test_yaml_parsing()
|
|
75
|
+
|
|
76
|
+
if reg_ok and yaml_ok:
|
|
77
|
+
print("\n" + "=" * 80)
|
|
78
|
+
print("✓ All tests passed!")
|
|
79
|
+
print("=" * 80)
|
|
80
|
+
else:
|
|
81
|
+
print("\n" + "=" * 80)
|
|
82
|
+
print("❌ Some tests failed")
|
|
83
|
+
print("=" * 80)
|
|
84
|
+
sys.exit(1)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# hexDAG ETL Plugin Configuration
|
|
2
|
+
# This file configures how the plugin loads its components
|
|
3
|
+
|
|
4
|
+
[tool.hexdag]
|
|
5
|
+
# Core modules to load - include the plugin's nodes
|
|
6
|
+
modules = [
|
|
7
|
+
"hexdag.core.ports", # Core port definitions
|
|
8
|
+
"hexdag.builtin.nodes", # Core node factories
|
|
9
|
+
"hexdag.builtin.tools.builtin_tools",
|
|
10
|
+
"hexdag.builtin.policies.execution_policies",
|
|
11
|
+
"hexdag.builtin.macros",
|
|
12
|
+
"hexdag_etl.nodes", # ETL plugin nodes (pandas_transform, etc.)
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
# No additional plugins needed for ETL plugin
|
|
16
|
+
plugins = []
|
|
17
|
+
|
|
18
|
+
# Development mode enabled
|
|
19
|
+
dev_mode = true
|
|
20
|
+
|
|
21
|
+
[tool.hexdag.logging]
|
|
22
|
+
level = "DEBUG" # More verbose for plugin development
|
|
23
|
+
format = "structured"
|
|
24
|
+
use_color = true
|
|
25
|
+
include_timestamp = true
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""hexdag-etl: ETL infrastructure for hexDAG pipelines.
|
|
2
|
+
|
|
3
|
+
Provides file I/O and multi-operation pandas transform nodes for data transformation pipelines.
|
|
4
|
+
|
|
5
|
+
This plugin extends hexDAG with ETL capabilities:
|
|
6
|
+
- FileReaderNode: Read CSV, Parquet, JSON, Excel files
|
|
7
|
+
- FileWriterNode: Write data to various file formats
|
|
8
|
+
- PandasTransformNode: Chain pandas operations
|
|
9
|
+
|
|
10
|
+
Example Pipeline:
|
|
11
|
+
- kind: etl:file_reader_node
|
|
12
|
+
metadata:
|
|
13
|
+
name: load_data
|
|
14
|
+
spec:
|
|
15
|
+
file_path: data/input.csv
|
|
16
|
+
format: csv
|
|
17
|
+
|
|
18
|
+
- kind: etl:pandas_transform_node
|
|
19
|
+
metadata:
|
|
20
|
+
name: transform
|
|
21
|
+
spec:
|
|
22
|
+
operations:
|
|
23
|
+
- type: filter
|
|
24
|
+
condition: "{{ df['value'] > 0 }}"
|
|
25
|
+
dependencies: [load_data]
|
|
26
|
+
|
|
27
|
+
- kind: etl:file_writer_node
|
|
28
|
+
metadata:
|
|
29
|
+
name: save_results
|
|
30
|
+
spec:
|
|
31
|
+
file_path: output/results.parquet
|
|
32
|
+
format: parquet
|
|
33
|
+
dependencies: [transform]
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from .nodes.file_io import FileReaderNode, FileWriterNode
|
|
37
|
+
from .nodes.outlook import OutlookReaderNode, OutlookSenderNode
|
|
38
|
+
from .nodes.pandas_transform import PandasTransformNode
|
|
39
|
+
|
|
40
|
+
__version__ = "0.1.0"
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"FileReaderNode",
|
|
44
|
+
"FileWriterNode",
|
|
45
|
+
"OutlookReaderNode",
|
|
46
|
+
"OutlookSenderNode",
|
|
47
|
+
"PandasTransformNode",
|
|
48
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""ETL nodes for data extraction, transformation, and loading."""
|
|
2
|
+
|
|
3
|
+
from .file_io import FileReaderNode, FileWriterNode
|
|
4
|
+
from .outlook import OutlookReaderNode, OutlookSenderNode
|
|
5
|
+
from .pandas_transform import PandasTransformNode
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"FileReaderNode",
|
|
9
|
+
"FileWriterNode",
|
|
10
|
+
"OutlookReaderNode",
|
|
11
|
+
"OutlookSenderNode",
|
|
12
|
+
"PandasTransformNode",
|
|
13
|
+
]
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""API extraction node for HTTP/REST API data extraction."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from hexdag.core.domain.dag import NodeSpec
|
|
6
|
+
from hexdag.core.registry import node
|
|
7
|
+
from hexdag.core.registry.models import NodeSubtype
|
|
8
|
+
|
|
9
|
+
from .base_node_factory import BaseNodeFactory
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@node(name="api_extract", subtype=NodeSubtype.TOOL, namespace="etl")
|
|
13
|
+
class APIExtractNode(BaseNodeFactory):
|
|
14
|
+
"""Extract data from REST APIs with pagination, authentication, and error handling.
|
|
15
|
+
|
|
16
|
+
Supports:
|
|
17
|
+
- GET/POST requests
|
|
18
|
+
- Bearer token, API key, OAuth authentication
|
|
19
|
+
- Pagination (cursor, offset, page)
|
|
20
|
+
- Rate limiting
|
|
21
|
+
- Retry logic
|
|
22
|
+
- Structured error handling
|
|
23
|
+
|
|
24
|
+
Examples
|
|
25
|
+
--------
|
|
26
|
+
YAML pipeline::
|
|
27
|
+
|
|
28
|
+
- kind: api_extract_node
|
|
29
|
+
metadata:
|
|
30
|
+
name: fetch_customers
|
|
31
|
+
spec:
|
|
32
|
+
endpoint: https://api.example.com/v1/customers
|
|
33
|
+
method: GET
|
|
34
|
+
params:
|
|
35
|
+
limit: 100
|
|
36
|
+
status: active
|
|
37
|
+
pagination:
|
|
38
|
+
type: cursor
|
|
39
|
+
cursor_param: after
|
|
40
|
+
cursor_path: meta.next_cursor
|
|
41
|
+
has_more_path: meta.has_more
|
|
42
|
+
auth:
|
|
43
|
+
type: bearer
|
|
44
|
+
token: ${API_TOKEN}
|
|
45
|
+
output_artifact:
|
|
46
|
+
slot: raw_customers
|
|
47
|
+
key: customers_2024_01_15
|
|
48
|
+
format: json
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __call__(
|
|
52
|
+
self,
|
|
53
|
+
name: str,
|
|
54
|
+
endpoint: str,
|
|
55
|
+
method: str = "GET",
|
|
56
|
+
params: dict[str, Any] | None = None,
|
|
57
|
+
headers: dict[str, str] | None = None,
|
|
58
|
+
auth: dict[str, Any] | None = None,
|
|
59
|
+
pagination: dict[str, Any] | None = None,
|
|
60
|
+
output_artifact: dict[str, Any] | None = None,
|
|
61
|
+
timeout: int = 30,
|
|
62
|
+
max_retries: int = 3,
|
|
63
|
+
backoff_factor: float = 0.3,
|
|
64
|
+
rate_limit: dict[str, Any] | None = None,
|
|
65
|
+
deps: list[str] | None = None,
|
|
66
|
+
**kwargs: Any,
|
|
67
|
+
) -> NodeSpec:
|
|
68
|
+
"""Create API extraction node specification.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
name : str
|
|
73
|
+
Node name
|
|
74
|
+
endpoint : str
|
|
75
|
+
API endpoint URL
|
|
76
|
+
method : str
|
|
77
|
+
HTTP method: "GET", "POST", "PUT", "DELETE"
|
|
78
|
+
params : dict, optional
|
|
79
|
+
Query parameters or request body
|
|
80
|
+
headers : dict, optional
|
|
81
|
+
Additional HTTP headers
|
|
82
|
+
auth : dict, optional
|
|
83
|
+
Authentication configuration
|
|
84
|
+
pagination : dict, optional
|
|
85
|
+
Pagination configuration
|
|
86
|
+
output_artifact : dict, optional
|
|
87
|
+
Output artifact configuration
|
|
88
|
+
timeout : int
|
|
89
|
+
Request timeout in seconds
|
|
90
|
+
max_retries : int
|
|
91
|
+
Maximum retry attempts
|
|
92
|
+
backoff_factor : float
|
|
93
|
+
Backoff multiplier for retries
|
|
94
|
+
rate_limit : dict, optional
|
|
95
|
+
Rate limiting configuration
|
|
96
|
+
deps : list[str], optional
|
|
97
|
+
Dependency node names
|
|
98
|
+
**kwargs : Any
|
|
99
|
+
Additional parameters
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
NodeSpec
|
|
104
|
+
Node specification ready for execution
|
|
105
|
+
"""
|
|
106
|
+
# Create wrapped function
|
|
107
|
+
wrapped_fn = self._create_api_function(
|
|
108
|
+
name,
|
|
109
|
+
endpoint,
|
|
110
|
+
method,
|
|
111
|
+
params,
|
|
112
|
+
headers,
|
|
113
|
+
auth,
|
|
114
|
+
pagination,
|
|
115
|
+
output_artifact,
|
|
116
|
+
timeout,
|
|
117
|
+
max_retries,
|
|
118
|
+
backoff_factor,
|
|
119
|
+
rate_limit,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Define schemas
|
|
123
|
+
input_schema = {"input_data": dict, "**ports": dict}
|
|
124
|
+
output_schema = {"output": dict, "metadata": dict}
|
|
125
|
+
|
|
126
|
+
input_model = self.create_pydantic_model(f"{name}Input", input_schema)
|
|
127
|
+
output_model = self.create_pydantic_model(f"{name}Output", output_schema)
|
|
128
|
+
|
|
129
|
+
# Store parameters
|
|
130
|
+
node_params = {
|
|
131
|
+
"endpoint": endpoint,
|
|
132
|
+
"method": method,
|
|
133
|
+
"params": params,
|
|
134
|
+
"headers": headers,
|
|
135
|
+
"auth": auth,
|
|
136
|
+
"pagination": pagination,
|
|
137
|
+
"output_artifact": output_artifact,
|
|
138
|
+
"timeout": timeout,
|
|
139
|
+
"max_retries": max_retries,
|
|
140
|
+
"backoff_factor": backoff_factor,
|
|
141
|
+
"rate_limit": rate_limit,
|
|
142
|
+
**kwargs,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return NodeSpec(
|
|
146
|
+
name=name,
|
|
147
|
+
fn=wrapped_fn,
|
|
148
|
+
in_model=input_model,
|
|
149
|
+
out_model=output_model,
|
|
150
|
+
deps=frozenset(deps or []),
|
|
151
|
+
params=node_params,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def _create_api_function(
|
|
155
|
+
self,
|
|
156
|
+
name: str,
|
|
157
|
+
endpoint: str,
|
|
158
|
+
method: str,
|
|
159
|
+
params: dict[str, Any] | None,
|
|
160
|
+
headers: dict[str, str] | None,
|
|
161
|
+
auth: dict[str, Any] | None,
|
|
162
|
+
pagination: dict[str, Any] | None,
|
|
163
|
+
output_artifact: dict[str, Any] | None,
|
|
164
|
+
timeout: int,
|
|
165
|
+
max_retries: int,
|
|
166
|
+
backoff_factor: float,
|
|
167
|
+
rate_limit: dict[str, Any] | None,
|
|
168
|
+
) -> Any:
|
|
169
|
+
"""Create the wrapped API extraction function.
|
|
170
|
+
|
|
171
|
+
Implementation details omitted for brevity - similar to the original
|
|
172
|
+
but simplified for demo purposes.
|
|
173
|
+
|
|
174
|
+
Parameters
|
|
175
|
+
----------
|
|
176
|
+
name : str
|
|
177
|
+
Node name
|
|
178
|
+
endpoint : str
|
|
179
|
+
API endpoint
|
|
180
|
+
method : str
|
|
181
|
+
HTTP method
|
|
182
|
+
params : dict, optional
|
|
183
|
+
Request parameters
|
|
184
|
+
headers : dict, optional
|
|
185
|
+
Request headers
|
|
186
|
+
auth : dict, optional
|
|
187
|
+
Authentication config
|
|
188
|
+
pagination : dict, optional
|
|
189
|
+
Pagination config
|
|
190
|
+
output_artifact : dict, optional
|
|
191
|
+
Output artifact config
|
|
192
|
+
timeout : int
|
|
193
|
+
Request timeout
|
|
194
|
+
max_retries : int
|
|
195
|
+
Maximum retries
|
|
196
|
+
backoff_factor : float
|
|
197
|
+
Backoff factor
|
|
198
|
+
rate_limit : dict, optional
|
|
199
|
+
Rate limiting config
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
Callable
|
|
204
|
+
Async function that performs API extraction
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
# Implementation would go here - simplified for the example
|
|
208
|
+
async def wrapped_fn(input_data: Any, **ports: Any) -> dict[str, Any]:
|
|
209
|
+
"""Placeholder implementation."""
|
|
210
|
+
# In a real implementation, this would:
|
|
211
|
+
# 1. Make HTTP requests with aiohttp
|
|
212
|
+
# 2. Handle pagination
|
|
213
|
+
# 3. Apply authentication
|
|
214
|
+
# 4. Store results in artifact store
|
|
215
|
+
# 5. Return structured results
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
"data": [],
|
|
219
|
+
"metadata": {
|
|
220
|
+
"endpoint": endpoint,
|
|
221
|
+
"method": method,
|
|
222
|
+
"records_extracted": 0,
|
|
223
|
+
"status": "placeholder_implementation",
|
|
224
|
+
},
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
wrapped_fn.__name__ = f"api_extract_{name}"
|
|
228
|
+
wrapped_fn.__doc__ = f"API extraction: {endpoint}"
|
|
229
|
+
|
|
230
|
+
return wrapped_fn
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Simplified BaseNodeFactory for creating nodes with Pydantic models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
from hexdag.core.domain.dag import NodeSpec
|
|
9
|
+
from hexdag.core.orchestration.prompt.template import PromptTemplate
|
|
10
|
+
from hexdag.core.protocols import is_schema_type
|
|
11
|
+
from pydantic import BaseModel, create_model
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseNodeFactory(ABC):
|
|
15
|
+
"""Minimal base class for node factories with Pydantic models."""
|
|
16
|
+
|
|
17
|
+
# Note: Event emission has been removed as it's now handled by the orchestrator
|
|
18
|
+
# The new event system uses ObserverManager at the orchestrator level
|
|
19
|
+
|
|
20
|
+
def create_pydantic_model(
|
|
21
|
+
self, name: str, schema: dict[str, Any] | type[BaseModel] | type[Any] | None
|
|
22
|
+
) -> type[BaseModel] | None:
|
|
23
|
+
"""Create a Pydantic model from a schema.
|
|
24
|
+
|
|
25
|
+
Raises
|
|
26
|
+
------
|
|
27
|
+
ValueError
|
|
28
|
+
If schema type is not supported
|
|
29
|
+
"""
|
|
30
|
+
if schema is None:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
if is_schema_type(schema):
|
|
34
|
+
return schema # type: ignore[return-value] # is_schema_type checks for BaseModel subclass
|
|
35
|
+
|
|
36
|
+
if isinstance(schema, dict):
|
|
37
|
+
# String type names mapping (for when field_type is a string)
|
|
38
|
+
type_map = {
|
|
39
|
+
"str": str,
|
|
40
|
+
"int": int,
|
|
41
|
+
"float": float,
|
|
42
|
+
"bool": bool,
|
|
43
|
+
"list": list,
|
|
44
|
+
"dict": dict,
|
|
45
|
+
"Any": Any,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
field_definitions: dict[str, Any] = {}
|
|
49
|
+
for field_name, field_type in schema.items():
|
|
50
|
+
# Dispatch based on field_type's type using match pattern
|
|
51
|
+
match field_type:
|
|
52
|
+
case str():
|
|
53
|
+
# String type names - convert to actual types
|
|
54
|
+
actual_type = type_map.get(field_type, Any)
|
|
55
|
+
field_definitions[field_name] = (actual_type, ...)
|
|
56
|
+
case type():
|
|
57
|
+
# Already a type
|
|
58
|
+
field_definitions[field_name] = (field_type, ...)
|
|
59
|
+
case tuple():
|
|
60
|
+
# Already in the correct format (type, default)
|
|
61
|
+
field_definitions[field_name] = field_type
|
|
62
|
+
case _:
|
|
63
|
+
# Unknown type specification - use Any
|
|
64
|
+
field_definitions[field_name] = (Any, ...)
|
|
65
|
+
|
|
66
|
+
return create_model(name, **field_definitions)
|
|
67
|
+
|
|
68
|
+
# At this point, schema should be a type
|
|
69
|
+
try:
|
|
70
|
+
return cast("type[Any] | None", create_model(name, value=(schema, ...)))
|
|
71
|
+
except (TypeError, AttributeError) as e:
|
|
72
|
+
# If we get here, schema is an unexpected type
|
|
73
|
+
raise ValueError(f"Schema must be a dict, type, or Pydantic model, got {type(schema).__name__}") from e
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def infer_input_schema_from_template(
|
|
77
|
+
template: str | PromptTemplate,
|
|
78
|
+
special_params: set[str] | None = None,
|
|
79
|
+
) -> dict[str, Any]:
|
|
80
|
+
"""Infer input schema from template variables with configurable filtering.
|
|
81
|
+
|
|
82
|
+
This method extracts variable names from a prompt template and creates
|
|
83
|
+
a schema dictionary mapping those variables to string types. It supports
|
|
84
|
+
filtering out special parameters that are not user inputs.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
template : str | PromptTemplate
|
|
89
|
+
The prompt template to analyze. Can be a string or PromptTemplate instance.
|
|
90
|
+
special_params : set[str] | None, optional
|
|
91
|
+
Set of parameter names to exclude from the schema (e.g., "context_history").
|
|
92
|
+
If None, no filtering is applied.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
dict[str, Any]
|
|
97
|
+
Schema dictionary mapping variable names to str type.
|
|
98
|
+
Returns {"input": str} if no variables found.
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
>>> BaseNodeFactory.infer_input_schema_from_template("Hello {{name}}")
|
|
103
|
+
{'name': <class 'str'>}
|
|
104
|
+
|
|
105
|
+
>>> BaseNodeFactory.infer_input_schema_from_template(
|
|
106
|
+
... "Process {{user}} with {{context_history}}",
|
|
107
|
+
... special_params={"context_history"}
|
|
108
|
+
... )
|
|
109
|
+
{'user': <class 'str'>}
|
|
110
|
+
|
|
111
|
+
>>> BaseNodeFactory.infer_input_schema_from_template("No variables")
|
|
112
|
+
{'input': <class 'str'>}
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
if isinstance(template, str):
|
|
116
|
+
template = PromptTemplate(template)
|
|
117
|
+
|
|
118
|
+
variables = getattr(template, "input_vars", [])
|
|
119
|
+
|
|
120
|
+
if special_params:
|
|
121
|
+
variables = [v for v in variables if v not in special_params]
|
|
122
|
+
|
|
123
|
+
if not variables:
|
|
124
|
+
return {"input": str}
|
|
125
|
+
|
|
126
|
+
schema: dict[str, Any] = {}
|
|
127
|
+
for var in variables:
|
|
128
|
+
base_var = var.split(".")[0]
|
|
129
|
+
# Double-check against special params for nested variables
|
|
130
|
+
if not special_params or base_var not in special_params:
|
|
131
|
+
schema[base_var] = str
|
|
132
|
+
|
|
133
|
+
return schema
|
|
134
|
+
|
|
135
|
+
def _copy_required_ports_to_wrapper(self, wrapper_fn: Any) -> None:
|
|
136
|
+
"""Copy required_ports metadata from factory class to wrapper function.
|
|
137
|
+
|
|
138
|
+
This ensures port requirements are preserved when creating node functions.
|
|
139
|
+
"""
|
|
140
|
+
if hasattr(self.__class__, "_hexdag_required_ports"):
|
|
141
|
+
# _hexdag_required_ports is added dynamically by @node decorator
|
|
142
|
+
wrapper_fn._hexdag_required_ports = self.__class__._hexdag_required_ports # pyright: ignore[reportAttributeAccessIssue] # noqa: B010
|
|
143
|
+
|
|
144
|
+
def create_node_with_mapping(
|
|
145
|
+
self,
|
|
146
|
+
name: str,
|
|
147
|
+
wrapped_fn: Any,
|
|
148
|
+
input_schema: dict[str, Any] | None,
|
|
149
|
+
output_schema: dict[str, Any] | type[BaseModel] | None,
|
|
150
|
+
deps: list[str] | None = None,
|
|
151
|
+
**kwargs: Any,
|
|
152
|
+
) -> NodeSpec:
|
|
153
|
+
"""Universal NodeSpec creation."""
|
|
154
|
+
# Copy required_ports metadata to wrapper
|
|
155
|
+
self._copy_required_ports_to_wrapper(wrapped_fn)
|
|
156
|
+
|
|
157
|
+
input_model = self.create_pydantic_model(f"{name}Input", input_schema)
|
|
158
|
+
output_model = self.create_pydantic_model(f"{name}Output", output_schema)
|
|
159
|
+
|
|
160
|
+
return NodeSpec(
|
|
161
|
+
name=name,
|
|
162
|
+
fn=wrapped_fn,
|
|
163
|
+
in_model=input_model,
|
|
164
|
+
out_model=output_model,
|
|
165
|
+
deps=frozenset(deps or []),
|
|
166
|
+
params=kwargs,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
@abstractmethod
|
|
170
|
+
def __call__(self, name: str, *args: Any, **kwargs: Any) -> NodeSpec: # noqa: ARG002
|
|
171
|
+
"""Create a NodeSpec.
|
|
172
|
+
|
|
173
|
+
Must be implemented by subclasses.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
name: Name of the node
|
|
177
|
+
*args: Additional positional arguments (unused, for subclass flexibility)
|
|
178
|
+
**kwargs: Additional keyword arguments
|
|
179
|
+
"""
|
|
180
|
+
_ = args # Marked as intentionally unused for subclass API flexibility
|
|
181
|
+
raise NotImplementedError
|