PyPI - hexdag - Versions diffs - 0.5.0.dev1__py3-none-any.whl - Mend

hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

hexdag/__init__.py +116 -0
hexdag/__main__.py +30 -0
hexdag/adapters/executors/__init__.py +5 -0
hexdag/adapters/executors/local_executor.py +316 -0
hexdag/builtin/__init__.py +6 -0
hexdag/builtin/adapters/__init__.py +51 -0
hexdag/builtin/adapters/anthropic/__init__.py +5 -0
hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
hexdag/builtin/adapters/database/__init__.py +6 -0
hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
hexdag/builtin/adapters/local/README.md +59 -0
hexdag/builtin/adapters/local/__init__.py +7 -0
hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
hexdag/builtin/adapters/memory/__init__.py +47 -0
hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
hexdag/builtin/adapters/memory/schemas.py +57 -0
hexdag/builtin/adapters/memory/session_memory.py +178 -0
hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
hexdag/builtin/adapters/memory/state_memory.py +280 -0
hexdag/builtin/adapters/mock/README.md +89 -0
hexdag/builtin/adapters/mock/__init__.py +15 -0
hexdag/builtin/adapters/mock/hexdag.toml +50 -0
hexdag/builtin/adapters/mock/mock_database.py +225 -0
hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
hexdag/builtin/adapters/mock/mock_llm.py +177 -0
hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
hexdag/builtin/adapters/openai/__init__.py +5 -0
hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
hexdag/builtin/adapters/secret/__init__.py +7 -0
hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
hexdag/builtin/adapters/unified_tool_router.py +280 -0
hexdag/builtin/macros/__init__.py +17 -0
hexdag/builtin/macros/conversation_agent.py +390 -0
hexdag/builtin/macros/llm_macro.py +151 -0
hexdag/builtin/macros/reasoning_agent.py +423 -0
hexdag/builtin/macros/tool_macro.py +380 -0
hexdag/builtin/nodes/__init__.py +38 -0
hexdag/builtin/nodes/_discovery.py +123 -0
hexdag/builtin/nodes/agent_node.py +696 -0
hexdag/builtin/nodes/base_node_factory.py +242 -0
hexdag/builtin/nodes/composite_node.py +926 -0
hexdag/builtin/nodes/data_node.py +201 -0
hexdag/builtin/nodes/expression_node.py +487 -0
hexdag/builtin/nodes/function_node.py +454 -0
hexdag/builtin/nodes/llm_node.py +491 -0
hexdag/builtin/nodes/loop_node.py +920 -0
hexdag/builtin/nodes/mapped_input.py +518 -0
hexdag/builtin/nodes/port_call_node.py +269 -0
hexdag/builtin/nodes/tool_call_node.py +195 -0
hexdag/builtin/nodes/tool_utils.py +390 -0
hexdag/builtin/prompts/__init__.py +68 -0
hexdag/builtin/prompts/base.py +422 -0
hexdag/builtin/prompts/chat_prompts.py +303 -0
hexdag/builtin/prompts/error_correction_prompts.py +320 -0
hexdag/builtin/prompts/tool_prompts.py +160 -0
hexdag/builtin/tools/builtin_tools.py +84 -0
hexdag/builtin/tools/database_tools.py +164 -0
hexdag/cli/__init__.py +17 -0
hexdag/cli/__main__.py +7 -0
hexdag/cli/commands/__init__.py +27 -0
hexdag/cli/commands/build_cmd.py +812 -0
hexdag/cli/commands/create_cmd.py +208 -0
hexdag/cli/commands/docs_cmd.py +293 -0
hexdag/cli/commands/generate_types_cmd.py +252 -0
hexdag/cli/commands/init_cmd.py +188 -0
hexdag/cli/commands/pipeline_cmd.py +494 -0
hexdag/cli/commands/plugin_dev_cmd.py +529 -0
hexdag/cli/commands/plugins_cmd.py +441 -0
hexdag/cli/commands/studio_cmd.py +101 -0
hexdag/cli/commands/validate_cmd.py +221 -0
hexdag/cli/main.py +84 -0
hexdag/core/__init__.py +83 -0
hexdag/core/config/__init__.py +20 -0
hexdag/core/config/loader.py +479 -0
hexdag/core/config/models.py +150 -0
hexdag/core/configurable.py +294 -0
hexdag/core/context/__init__.py +37 -0
hexdag/core/context/execution_context.py +378 -0
hexdag/core/docs/__init__.py +26 -0
hexdag/core/docs/extractors.py +678 -0
hexdag/core/docs/generators.py +890 -0
hexdag/core/docs/models.py +120 -0
hexdag/core/domain/__init__.py +10 -0
hexdag/core/domain/dag.py +1225 -0
hexdag/core/exceptions.py +234 -0
hexdag/core/expression_parser.py +569 -0
hexdag/core/logging.py +449 -0
hexdag/core/models/__init__.py +17 -0
hexdag/core/models/base.py +138 -0
hexdag/core/orchestration/__init__.py +46 -0
hexdag/core/orchestration/body_executor.py +481 -0
hexdag/core/orchestration/components/__init__.py +97 -0
hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
hexdag/core/orchestration/components/execution_coordinator.py +360 -0
hexdag/core/orchestration/components/health_check_manager.py +176 -0
hexdag/core/orchestration/components/input_mapper.py +143 -0
hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
hexdag/core/orchestration/components/node_executor.py +377 -0
hexdag/core/orchestration/components/secret_manager.py +202 -0
hexdag/core/orchestration/components/wave_executor.py +158 -0
hexdag/core/orchestration/constants.py +17 -0
hexdag/core/orchestration/events/README.md +312 -0
hexdag/core/orchestration/events/__init__.py +104 -0
hexdag/core/orchestration/events/batching.py +330 -0
hexdag/core/orchestration/events/decorators.py +139 -0
hexdag/core/orchestration/events/events.py +573 -0
hexdag/core/orchestration/events/observers/__init__.py +30 -0
hexdag/core/orchestration/events/observers/core_observers.py +690 -0
hexdag/core/orchestration/events/observers/models.py +111 -0
hexdag/core/orchestration/events/taxonomy.py +269 -0
hexdag/core/orchestration/hook_context.py +237 -0
hexdag/core/orchestration/hooks.py +437 -0
hexdag/core/orchestration/models.py +418 -0
hexdag/core/orchestration/orchestrator.py +910 -0
hexdag/core/orchestration/orchestrator_factory.py +275 -0
hexdag/core/orchestration/port_wrappers.py +327 -0
hexdag/core/orchestration/prompt/__init__.py +32 -0
hexdag/core/orchestration/prompt/template.py +332 -0
hexdag/core/pipeline_builder/__init__.py +21 -0
hexdag/core/pipeline_builder/component_instantiator.py +386 -0
hexdag/core/pipeline_builder/include_tag.py +265 -0
hexdag/core/pipeline_builder/pipeline_config.py +133 -0
hexdag/core/pipeline_builder/py_tag.py +223 -0
hexdag/core/pipeline_builder/tag_discovery.py +268 -0
hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
hexdag/core/pipeline_builder/yaml_validator.py +569 -0
hexdag/core/ports/__init__.py +65 -0
hexdag/core/ports/api_call.py +133 -0
hexdag/core/ports/database.py +489 -0
hexdag/core/ports/embedding.py +215 -0
hexdag/core/ports/executor.py +237 -0
hexdag/core/ports/file_storage.py +117 -0
hexdag/core/ports/healthcheck.py +87 -0
hexdag/core/ports/llm.py +551 -0
hexdag/core/ports/memory.py +70 -0
hexdag/core/ports/observer_manager.py +130 -0
hexdag/core/ports/secret.py +145 -0
hexdag/core/ports/tool_router.py +94 -0
hexdag/core/ports_builder.py +623 -0
hexdag/core/protocols.py +273 -0
hexdag/core/resolver.py +304 -0
hexdag/core/schema/__init__.py +9 -0
hexdag/core/schema/generator.py +742 -0
hexdag/core/secrets.py +242 -0
hexdag/core/types.py +413 -0
hexdag/core/utils/async_warnings.py +206 -0
hexdag/core/utils/schema_conversion.py +78 -0
hexdag/core/utils/sql_validation.py +86 -0
hexdag/core/validation/secure_json.py +148 -0
hexdag/core/yaml_macro.py +517 -0
hexdag/mcp_server.py +3120 -0
hexdag/studio/__init__.py +10 -0
hexdag/studio/build_ui.py +92 -0
hexdag/studio/server/__init__.py +1 -0
hexdag/studio/server/main.py +100 -0
hexdag/studio/server/routes/__init__.py +9 -0
hexdag/studio/server/routes/execute.py +208 -0
hexdag/studio/server/routes/export.py +558 -0
hexdag/studio/server/routes/files.py +207 -0
hexdag/studio/server/routes/plugins.py +419 -0
hexdag/studio/server/routes/validate.py +220 -0
hexdag/studio/ui/index.html +13 -0
hexdag/studio/ui/package-lock.json +2992 -0
hexdag/studio/ui/package.json +31 -0
hexdag/studio/ui/postcss.config.js +6 -0
hexdag/studio/ui/public/hexdag.svg +5 -0
hexdag/studio/ui/src/App.tsx +251 -0
hexdag/studio/ui/src/components/Canvas.tsx +408 -0
hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
hexdag/studio/ui/src/components/Header.tsx +181 -0
hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
hexdag/studio/ui/src/components/index.ts +8 -0
hexdag/studio/ui/src/index.css +92 -0
hexdag/studio/ui/src/main.tsx +10 -0
hexdag/studio/ui/src/types/index.ts +123 -0
hexdag/studio/ui/src/vite-env.d.ts +1 -0
hexdag/studio/ui/tailwind.config.js +29 -0
hexdag/studio/ui/tsconfig.json +37 -0
hexdag/studio/ui/tsconfig.node.json +13 -0
hexdag/studio/ui/vite.config.ts +35 -0
hexdag/visualization/__init__.py +69 -0
hexdag/visualization/dag_visualizer.py +1020 -0
hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
hexdag_plugins/.gitignore +43 -0
hexdag_plugins/README.md +73 -0
hexdag_plugins/__init__.py +1 -0
hexdag_plugins/azure/LICENSE +21 -0
hexdag_plugins/azure/README.md +414 -0
hexdag_plugins/azure/__init__.py +21 -0
hexdag_plugins/azure/azure_blob_adapter.py +450 -0
hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
hexdag_plugins/azure/azure_openai_adapter.py +415 -0
hexdag_plugins/azure/pyproject.toml +107 -0
hexdag_plugins/azure/tests/__init__.py +1 -0
hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
hexdag_plugins/hexdag_etl/README.md +168 -0
hexdag_plugins/hexdag_etl/__init__.py +53 -0
hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
hexdag_plugins/hexdag_etl/test_transform.py +54 -0
hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
hexdag_plugins/mysql_adapter/LICENSE +21 -0
hexdag_plugins/mysql_adapter/README.md +224 -0
hexdag_plugins/mysql_adapter/__init__.py +6 -0
hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
hexdag_plugins/storage/README.md +184 -0
hexdag_plugins/storage/__init__.py +19 -0
hexdag_plugins/storage/file/__init__.py +5 -0
hexdag_plugins/storage/file/local.py +325 -0
hexdag_plugins/storage/ports/__init__.py +5 -0
hexdag_plugins/storage/ports/vector_store.py +236 -0
hexdag_plugins/storage/sql/__init__.py +7 -0
hexdag_plugins/storage/sql/base.py +187 -0
hexdag_plugins/storage/sql/mysql.py +27 -0
hexdag_plugins/storage/sql/postgresql.py +27 -0
hexdag_plugins/storage/tests/__init__.py +1 -0
hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
hexdag_plugins/storage/vector/__init__.py +7 -0
hexdag_plugins/storage/vector/chromadb.py +223 -0
hexdag_plugins/storage/vector/in_memory.py +285 -0
hexdag_plugins/storage/vector/pgvector.py +502 -0

hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py ADDED Viewed

@@ -0,0 +1,563 @@
+"""Pandas transform node with multi-operation support for ETL pipelines."""
+import asyncio
+import importlib
+from collections.abc import Callable
+from dataclasses import asdict
+from typing import Any
+import pandas as pd
+from hexdag.builtin.nodes.base_node_factory import BaseNodeFactory
+from hexdag.core.domain.dag import NodeSpec
+from hexdag.core.registry import node
+from hexdag.core.registry.models import NodeSubtype
+from pydantic import BaseModel
+class PandasOperation(BaseModel):
+    """Single pandas operation configuration."""
+    type: str = "transform"
+    """Operation type: 'transform', 'map', 'filter', 'assign'"""
+    method: str | None = None
+    """Pandas method path (e.g., 'pandas.DataFrame.groupby', 'pandas.merge')"""
+    args: list[Any] | None = None
+    """Positional arguments for the operation"""
+    kwargs: dict[str, Any] | None = None
+    """Keyword arguments for the operation"""
+    columns: dict[str, str] | None = None
+    """Column mappings (for 'map' or 'rename' operations)"""
+    condition: str | None = None
+    """Filter condition expression (for 'filter' operations)"""
+@node(name="pandas_transform_node", subtype=NodeSubtype.FUNCTION, namespace="etl")
+class PandasTransformNode(BaseNodeFactory):
+    """Node factory for multi-operation pandas transforms.
+    Executes a sequence of pandas operations on DataFrames, supporting:
+    - Chained transformations
+    - Multiple input DataFrames
+    - Artifact storage integration
+    - Complex data cleaning and enrichment
+    Examples
+    --------
+    YAML pipeline::
+        - kind: pandas_transform_node
+          metadata:
+            name: clean_and_aggregate
+          spec:
+            input_artifacts:
+              - slot: raw_customers
+                key: customers_v1
+              - slot: raw_transactions
+                key: transactions_v1
+            operations:
+              # Operation 1: Join DataFrames
+              - type: transform
+                method: pandas.merge
+                args:
+                  - {{input_artifacts[0]}}
+                  - {{input_artifacts[1]}}
+                kwargs:
+                  on: customer_id
+                  how: left
+              # Operation 2: Drop missing values
+              - type: transform
+                method: pandas.DataFrame.dropna
+                kwargs:
+                  subset: [customer_id, amount]
+              # Operation 3: Calculate new column
+              - type: transform
+                method: pandas.DataFrame.assign
+                kwargs:
+                  revenue_tier: |
+                    lambda df: pd.cut(
+                      df['amount'],
+                      bins=[0, 100, 500, float('inf')],
+                      labels=['Low', 'Medium', 'High']
+                    )
+              # Operation 4: Rename columns
+              - type: map
+                columns:
+                  transaction_id: txn_id
+                  customer_id: cust_id
+                  amount: total_amount
+              # Operation 5: Filter rows
+              - type: filter
+                condition: "{{ df['amount'] > 0 }}"
+              # Operation 6: Group and aggregate
+              - type: transform
+                method: pandas.DataFrame.groupby
+                args:
+                  - customer_id
+                kwargs:
+                  as_index: false
+              # Operation 7: Calculate aggregations
+              - type: transform
+                method: pandas.DataFrame.agg
+                kwargs:
+                  amount: ['count', 'sum', 'mean']
+                  customer_id: 'count'
+            output_artifact:
+              slot: enriched_customers
+              key: enriched_v1
+              format: parquet
+              compression: snappy
+    """
+    def __call__(
+        self,
+        name: str,
+        operations: list[dict[str, Any]],
+        input_artifacts: list[dict[str, Any]] | None = None,
+        output_artifact: dict[str, Any] | None = None,
+        deps: list[str] | None = None,
+        **kwargs: Any,
+    ) -> NodeSpec:
+        """Create a pandas transform node specification.
+        Parameters
+        ----------
+        name : str
+            Node name
+        operations : list[dict]
+            List of pandas operation configurations
+        input_artifacts : list[dict], optional
+            Artifact references for input DataFrames
+        output_artifact : dict, optional
+            Artifact configuration for output DataFrame
+        deps : list[str], optional
+            Dependency node names
+        **kwargs : Any
+            Additional node parameters
+        Returns
+        -------
+        NodeSpec
+            Node specification ready for execution
+        """
+        # Convert operation dicts to Pydantic models for validation
+        operation_models = [PandasOperation(**op) for op in operations]
+        # Create wrapped function
+        wrapped_fn = self._create_transform_function(name, operation_models, input_artifacts, output_artifact)
+        # Define input schema
+        if input_artifacts:
+            input_schema = {"input_data": dict, "**ports": dict}
+        else:
+            input_schema = {"input_data": dict, "**ports": dict}
+        # Define output schema
+        output_schema = {"output": dict}
+        input_model = self.create_pydantic_model(f"{name}Input", input_schema)
+        output_model = self.create_pydantic_model(f"{name}Output", output_schema)
+        # Store parameters
+        node_params = {
+            "operations": operations,
+            "input_artifacts": input_artifacts,
+            "output_artifact": output_artifact,
+            **kwargs,
+        }
+        return NodeSpec(
+            name=name,
+            fn=wrapped_fn,
+            in_model=input_model,
+            out_model=output_model,
+            deps=frozenset(deps or []),
+            params=node_params,
+        )
+    def _create_transform_function(
+        self,
+        name: str,
+        operations: list[PandasOperation],
+        input_artifacts: list[dict[str, Any]] | None,
+        output_artifact: dict[str, Any] | None,
+    ) -> Callable[..., dict[str, Any]]:
+        """Create the wrapped transformation function.
+        Parameters
+        ----------
+        name : str
+            Node name
+        operations : list[PandasOperation]
+            Operations to execute
+        input_artifacts : list[dict], optional
+            Input artifact references
+        output_artifact : dict, optional
+            Output artifact configuration
+        Returns
+        -------
+        Callable
+            Async function that executes the transformation
+        """
+        async def wrapped_fn(input_data: Any, **ports: Any) -> dict[str, Any]:
+            """Execute pandas transformation operations."""
+            # Initialize result DataFrame
+            df = None
+            # Load input artifacts if specified
+            if input_artifacts:
+                artifact_store = ports.get("artifact_store")
+                if not artifact_store:
+                    raise ValueError("artifact_store port required when using input_artifacts")
+                loaded_dfs = []
+                for artifact_ref in input_artifacts:
+                    slot = artifact_ref.get("slot")
+                    key = artifact_ref.get("key")
+                    format = artifact_ref.get("format")
+                    if not slot or not key:
+                        raise ValueError(f"Invalid artifact reference: {artifact_ref}")
+                    # Load from artifact store
+                    df_loaded = await artifact_store.read(name=slot, key=key, format=format)
+                    loaded_dfs.append(df_loaded)
+                # Start with first DataFrame if available
+                if loaded_dfs:
+                    df = loaded_dfs[0]
+            else:
+                # Use input_data directly
+                if isinstance(input_data, dict) and "data" in input_data:
+                    df = input_data["data"]
+                else:
+                    df = input_data
+            if df is None:
+                raise ValueError("No input DataFrame available")
+            if not isinstance(df, pd.DataFrame):
+                # Try to convert to DataFrame
+                try:
+                    df = pd.DataFrame(df)
+                except Exception as e:
+                    raise ValueError(f"Could not convert input to DataFrame: {e}")
+            # Execute operations sequentially
+            for i, op in enumerate(operations):
+                df = await self._execute_operation(df, op, loaded_dfs if input_artifacts else [df])
+            # Store output artifact if specified
+            result = {"output": df}
+            if output_artifact:
+                artifact_store = ports.get("artifact_store")
+                if not artifact_store:
+                    raise ValueError("artifact_store port required when using output_artifact")
+                slot = output_artifact.get("slot")
+                key = output_artifact.get("key", f"{name}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}")
+                format = output_artifact.get("format", "pickle")
+                compression = output_artifact.get("compression")
+                metadata = output_artifact.get("metadata")
+                if not slot:
+                    raise ValueError("output_artifact must specify 'slot' name")
+                # Write to artifact store
+                artifact_info = await artifact_store.write(
+                    name=slot,
+                    key=key,
+                    data=df,
+                    format=format,
+                    compression=compression,
+                    metadata=metadata,
+                )
+                result["artifact_info"] = asdict(artifact_info)
+                result["records"] = len(df)
+            return result
+        # Preserve function metadata
+        wrapped_fn.__name__ = f"pandas_transform_{name}"
+        wrapped_fn.__doc__ = f"Multi-operation pandas transform: {name}"
+        return wrapped_fn
+    async def _execute_operation(
+        self, df: pd.DataFrame, op: PandasOperation, input_dfs: list[pd.DataFrame]
+    ) -> pd.DataFrame:
+        """Execute a single pandas operation.
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Current DataFrame
+        op : PandasOperation
+            Operation to execute
+        input_dfs : list[pd.DataFrame]
+            Available input DataFrames
+        Returns
+        -------
+        pd.DataFrame
+            Transformed DataFrame
+        """
+        op_type = op.type or "transform"
+        if op_type == "transform":
+            return await self._execute_transform(df, op, input_dfs)
+        elif op_type == "map":
+            return await self._execute_map(df, op)
+        elif op_type == "filter":
+            return await self._execute_filter(df, op)
+        elif op_type == "assign":
+            return await self._execute_assign(df, op)
+        else:
+            raise ValueError(f"Unknown operation type: {op_type}")
+    async def _execute_transform(
+        self, df: pd.DataFrame, op: PandasOperation, input_dfs: list[pd.DataFrame]
+    ) -> pd.DataFrame:
+        """Execute a transform operation (calls a pandas method).
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Current DataFrame
+        op : PandasOperation
+            Operation configuration
+        input_dfs : list[pd.DataFrame]
+            Available input DataFrames
+        Returns
+        -------
+        pd.DataFrame
+            Transformed DataFrame
+        """
+        if not op.method:
+            raise ValueError("Transform operation requires 'method' parameter")
+        # Resolve method
+        method = self._resolve_method(op.method)
+        # Prepare arguments (resolve template expressions)
+        args = []
+        if op.args:
+            for arg in op.args:
+                args.append(self._resolve_arg(arg, df, input_dfs))
+        # Prepare keyword arguments
+        kwargs = {}
+        if op.kwargs:
+            for k, v in op.kwargs.items():
+                kwargs[k] = self._resolve_arg(v, df, input_dfs)
+        # Execute method (handle both sync and async)
+        if asyncio.iscoroutinefunction(method):
+            result = await method(df, *args, **kwargs)
+        else:
+            result = method(df, *args, **kwargs)
+        return result
+    async def _execute_map(self, df: pd.DataFrame, op: PandasOperation) -> pd.DataFrame:
+        """Execute a map operation (column rename/mapping).
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Current DataFrame
+        op : PandasOperation
+            Operation configuration with columns mapping
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with renamed columns
+        """
+        if not op.columns:
+            return df
+        return df.rename(columns=op.columns)
+    async def _execute_filter(self, df: pd.DataFrame, op: PandasOperation) -> pd.DataFrame:
+        """Execute a filter operation.
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Current DataFrame
+        op : PandasOperation
+            Operation configuration with filter condition
+        Returns
+        -------
+        pd.DataFrame
+            Filtered DataFrame
+        """
+        if not op.condition:
+            return df
+        # Evaluate condition
+        # Note: This is a simplified implementation - production code should validate
+        # the condition for security
+        condition_result = self._resolve_arg(op.condition, df, [df])
+        return df[condition_result]
+    async def _execute_assign(self, df: pd.DataFrame, op: PandasOperation) -> pd.DataFrame:
+        """Execute an assign operation (add new columns).
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Current DataFrame
+        op : PandasOperation
+            Operation configuration
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with new columns
+        """
+        if not op.kwargs:
+            return df
+        # Prepare new column assignments
+        new_cols = {}
+        for col_name, col_expr in op.kwargs.items():
+            new_cols[col_name] = self._resolve_arg(col_expr, df, [df])
+        return df.assign(**new_cols)
+    def _resolve_method(self, method_path: str) -> Callable:
+        """Resolve a method from a path string or return callable directly.
+        Parameters
+        ----------
+        method_path : str
+            Path like "pandas.DataFrame.groupby" or "pandas.merge"
+        Returns
+        -------
+        Callable
+            The resolved method
+        """
+        # Already callable
+        if callable(method_path):
+            return method_path
+        # Parse module path
+        if "." not in method_path:
+            raise ValueError(f"Method path must contain '.', got: {method_path}")
+        try:
+            # Handle pandas class paths like pandas.DataFrame.sort_values
+            if method_path.startswith("pandas."):
+                parts = method_path.split(".")
+                if len(parts) >= 3 and parts[1] == "DataFrame":
+                    # It's a DataFrame method: pandas.DataFrame.method_name
+                    method_name = parts[2]
+                    return getattr(pd.DataFrame, method_name)
+                else:
+                    # It's a module-level function like pandas.merge
+                    module_path = ".".join(parts[:-1])
+                    attr_path = parts[-1]
+                    module = importlib.import_module(module_path)
+                    method = getattr(module, attr_path)
+                    if not callable(method):
+                        raise ValueError(f"'{method_path}' is not callable")
+                    return method
+            else:
+                # Standard module attribute resolution
+                module_path, attr_path = method_path.rsplit(".", 1)
+                module = importlib.import_module(module_path)
+                method = getattr(module, attr_path)
+                if not callable(method):
+                    raise ValueError(f"'{method_path}' is not callable")
+                return method
+        except Exception as e:
+            raise ValueError(f"Could not resolve method '{method_path}': {e}") from e
+    def _resolve_arg(self, arg: Any, df: pd.DataFrame, input_dfs: list[pd.DataFrame]) -> Any:
+        """Resolve an argument value (handles templates and expressions).
+        Parameters
+        ----------
+        arg : Any
+            Argument value or template expression
+        df : pd.DataFrame
+            Current DataFrame for context
+        input_dfs : list[pd.DataFrame]
+            All input DataFrames
+        Returns
+        -------
+        Any
+            Resolved argument value
+        """
+        # If it's a string template expression
+        if isinstance(arg, str) and "{{" in arg and "}}" in arg:
+            # Parse template expression
+            import re
+            pattern = r"\{\{\s*(.+?)\s*\}\}"
+            match = re.search(pattern, arg)
+            if match:
+                expr = match.group(1)
+                # Handle special variables
+                if expr == "df":
+                    return df
+                elif expr.startswith("input_artifacts["):
+                    # Extract index
+                    idx_match = re.search(r"input_artifacts\[(\d+)\]", expr)
+                    if idx_match:
+                        idx = int(idx_match.group(1))
+                        if 0 <= idx < len(input_dfs):
+                            return input_dfs[idx]
+                        else:
+                            raise IndexError(f"input_artifacts[{idx}] out of range")
+                # Try to evaluate as Python expression
+                try:
+                    # Safe evaluation - limited scope
+                    scope = {"df": df, "input_artifacts": input_dfs, "pd": pd}
+                    return eval(expr, {"__builtins__": {}}, scope)
+                except Exception:
+                    # Return as-is if evaluation fails
+                    return arg
+        # If it's a dict with lambda expression
+        if isinstance(arg, dict):
+            resolved = {}
+            for k, v in arg.items():
+                resolved[k] = self._resolve_arg(v, df, input_dfs)
+            return resolved
+        # Return as-is
+        return arg

hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""SQL extraction and loading nodes for database operations."""
+from typing import Any
+from hexdag.core.domain.dag import NodeSpec
+from hexdag.core.registry import node
+from hexdag.core.registry.models import NodeSubtype
+from .base_node_factory import BaseNodeFactory
+@node(name="sql_extract", subtype=NodeSubtype.TOOL, namespace="etl")
+class SQLExtractNode(BaseNodeFactory):
+    """Extract data from SQL databases.
+    Placeholder implementation - to be completed with full SQLAlchemy integration.
+    """
+    def __call__(
+        self, name: str, query: str, database: str | None = None, deps: list[str] | None = None, **kwargs: Any
+    ) -> NodeSpec:
+        """Create SQL extract node.
+        Parameters
+        ----------
+        name : str
+            Node name
+        query : str
+            SQL query to execute
+        database : str, optional
+            Database connection reference
+        deps : list, optional
+            Dependencies
+        **kwargs : Any
+            Additional parameters
+        Returns
+        -------
+        NodeSpec
+            Node specification
+        """
+        async def wrapped_fn(input_data: dict, **ports: dict) -> dict:
+            """Placeholder implementation."""
+            return {"output": [], "metadata": {"query": query, "database": database, "status": "placeholder"}}
+        wrapped_fn.__name__ = f"sql_extract_{name}"
+        return self.create_node_with_mapping(
+            name=name,
+            wrapped_fn=wrapped_fn,
+            input_schema={"input_data": dict, "**ports": dict},
+            output_schema={"output": dict, "metadata": dict},
+            deps=deps or [],
+            **kwargs,
+        )
+@node(name="sql_load", subtype=NodeSubtype.TOOL, namespace="etl")
+class SQLLoadNode(BaseNodeFactory):
+    """Load data into SQL databases.
+    Placeholder implementation - to be completed with SQLAlchemy integration.
+    """
+    def __call__(
+        self,
+        name: str,
+        table: str,
+        mode: str = "append",
+        database: str | None = None,
+        deps: list[str] | None = None,
+        **kwargs: Any,
+    ) -> NodeSpec:
+        """Create SQL load node.
+        Parameters
+        ----------
+        name : str
+            Node name
+        table : str
+            Target table name
+        mode : str
+            Load mode: "append", "replace", "truncate_insert", "merge"
+        database : str, optional
+            Database connection reference
+        deps : list, optional
+            Dependencies
+        **kwargs : Any
+            Additional parameters
+        Returns
+        -------
+        NodeSpec
+            Node specification
+        """
+        async def wrapped_fn(input_data: dict, **ports: dict) -> dict:
+            """Placeholder implementation."""
+            row_count = len(input_data.get("output", [])) if isinstance(input_data, dict) else 0
+            return {"status": "loaded", "table": table, "rows": row_count}
+        wrapped_fn.__name__ = f"sql_load_{name}"
+        return self.create_node_with_mapping(
+            name=name,
+            wrapped_fn=wrapped_fn,
+            input_schema={"input_data": dict, "**ports": dict},
+            output_schema={"status": dict, "table": dict, "rows": dict},
+            deps=deps or [],
+            **kwargs,
+        )