PyPI - pycharter - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

pycharter 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (222) hide show

api/dependencies/__init__.py +2 -1
api/dependencies/database.py +71 -5
api/main.py +47 -8
api/models/contracts.py +6 -4
api/models/metadata.py +11 -7
api/models/schemas.py +16 -10
api/routes/v1/contracts.py +498 -226
api/routes/v1/metadata.py +52 -211
api/routes/v1/schemas.py +1 -1
api/routes/v1/settings.py +88 -1
api/utils.py +224 -0
pycharter/__init__.py +149 -93
pycharter/data/templates/template_transform_advanced.yaml +50 -0
pycharter/data/templates/template_transform_simple.yaml +59 -0
pycharter/db/models/base.py +1 -2
pycharter/etl_generator/orchestrator.py +463 -487
pycharter/metadata_store/postgres.py +16 -191
pycharter/metadata_store/sqlite.py +12 -41
{pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/METADATA +284 -62
pycharter-0.0.22.dist-info/RECORD +358 -0
ui/static/404/index.html +1 -1
ui/static/404.html +1 -1
ui/static/__next.__PAGE__.txt +1 -1
ui/static/__next._full.txt +2 -2
ui/static/__next._head.txt +1 -1
ui/static/__next._index.txt +2 -2
ui/static/__next._tree.txt +2 -2
ui/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
ui/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
ui/static/_next/static/chunks/c4fa4f4114b7c352.js +1 -0
ui/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
ui/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
ui/static/_not-found/__next._full.txt +2 -2
ui/static/_not-found/__next._head.txt +1 -1
ui/static/_not-found/__next._index.txt +2 -2
ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
ui/static/_not-found/__next._not-found.txt +1 -1
ui/static/_not-found/__next._tree.txt +2 -2
ui/static/_not-found/index.html +1 -1
ui/static/_not-found/index.txt +2 -2
ui/static/contracts/__next._full.txt +3 -3
ui/static/contracts/__next._head.txt +1 -1
ui/static/contracts/__next._index.txt +2 -2
ui/static/contracts/__next._tree.txt +2 -2
ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
ui/static/contracts/__next.contracts.txt +1 -1
ui/static/contracts/index.html +1 -1
ui/static/contracts/index.txt +3 -3
ui/static/documentation/__next._full.txt +3 -3
ui/static/documentation/__next._head.txt +1 -1
ui/static/documentation/__next._index.txt +2 -2
ui/static/documentation/__next._tree.txt +2 -2
ui/static/documentation/__next.documentation.__PAGE__.txt +2 -2
ui/static/documentation/__next.documentation.txt +1 -1
ui/static/documentation/index.html +2 -2
ui/static/documentation/index.txt +3 -3
ui/static/index.html +1 -1
ui/static/index.txt +2 -2
ui/static/metadata/__next._full.txt +2 -2
ui/static/metadata/__next._head.txt +1 -1
ui/static/metadata/__next._index.txt +2 -2
ui/static/metadata/__next._tree.txt +2 -2
ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
ui/static/metadata/__next.metadata.txt +1 -1
ui/static/metadata/index.html +1 -1
ui/static/metadata/index.txt +2 -2
ui/static/quality/__next._full.txt +2 -2
ui/static/quality/__next._head.txt +1 -1
ui/static/quality/__next._index.txt +2 -2
ui/static/quality/__next._tree.txt +2 -2
ui/static/quality/__next.quality.__PAGE__.txt +1 -1
ui/static/quality/__next.quality.txt +1 -1
ui/static/quality/index.html +2 -2
ui/static/quality/index.txt +2 -2
ui/static/rules/__next._full.txt +2 -2
ui/static/rules/__next._head.txt +1 -1
ui/static/rules/__next._index.txt +2 -2
ui/static/rules/__next._tree.txt +2 -2
ui/static/rules/__next.rules.__PAGE__.txt +1 -1
ui/static/rules/__next.rules.txt +1 -1
ui/static/rules/index.html +1 -1
ui/static/rules/index.txt +2 -2
ui/static/schemas/__next._full.txt +2 -2
ui/static/schemas/__next._head.txt +1 -1
ui/static/schemas/__next._index.txt +2 -2
ui/static/schemas/__next._tree.txt +2 -2
ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
ui/static/schemas/__next.schemas.txt +1 -1
ui/static/schemas/index.html +1 -1
ui/static/schemas/index.txt +2 -2
ui/static/settings/__next._full.txt +2 -2
ui/static/settings/__next._head.txt +1 -1
ui/static/settings/__next._index.txt +2 -2
ui/static/settings/__next._tree.txt +2 -2
ui/static/settings/__next.settings.__PAGE__.txt +1 -1
ui/static/settings/__next.settings.txt +1 -1
ui/static/settings/index.html +1 -1
ui/static/settings/index.txt +2 -2
ui/static/static/.gitkeep +0 -0
ui/static/static/404/index.html +1 -0
ui/static/static/404.html +1 -0
ui/static/static/__next.__PAGE__.txt +10 -0
ui/static/static/__next._full.txt +30 -0
ui/static/static/__next._head.txt +7 -0
ui/static/static/__next._index.txt +9 -0
ui/static/static/__next._tree.txt +2 -0
ui/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
ui/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
ui/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
ui/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
ui/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
ui/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
ui/static/static/_next/static/chunks/5e04d10c4a7b58a3.js +1 -0
ui/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
ui/static/static/_next/static/chunks/75d88a058d8ffaa6.js +1 -0
ui/static/static/_next/static/chunks/8c89634cf6bad76f.js +1 -0
ui/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
ui/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
ui/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
ui/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
ui/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
ui/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
ui/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
ui/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
ui/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
ui/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
ui/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
ui/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
ui/static/static/_not-found/__next._full.txt +17 -0
ui/static/static/_not-found/__next._head.txt +7 -0
ui/static/static/_not-found/__next._index.txt +9 -0
ui/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
ui/static/static/_not-found/__next._not-found.txt +4 -0
ui/static/static/_not-found/__next._tree.txt +2 -0
ui/static/static/_not-found/index.html +1 -0
ui/static/static/_not-found/index.txt +17 -0
ui/static/static/contracts/__next._full.txt +21 -0
ui/static/static/contracts/__next._head.txt +7 -0
ui/static/static/contracts/__next._index.txt +9 -0
ui/static/static/contracts/__next._tree.txt +2 -0
ui/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
ui/static/static/contracts/__next.contracts.txt +4 -0
ui/static/static/contracts/index.html +1 -0
ui/static/static/contracts/index.txt +21 -0
ui/static/static/documentation/__next._full.txt +21 -0
ui/static/static/documentation/__next._head.txt +7 -0
ui/static/static/documentation/__next._index.txt +9 -0
ui/static/static/documentation/__next._tree.txt +2 -0
ui/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
ui/static/static/documentation/__next.documentation.txt +4 -0
ui/static/static/documentation/index.html +93 -0
ui/static/static/documentation/index.txt +21 -0
ui/static/static/index.html +1 -0
ui/static/static/index.txt +30 -0
ui/static/static/metadata/__next._full.txt +21 -0
ui/static/static/metadata/__next._head.txt +7 -0
ui/static/static/metadata/__next._index.txt +9 -0
ui/static/static/metadata/__next._tree.txt +2 -0
ui/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
ui/static/static/metadata/__next.metadata.txt +4 -0
ui/static/static/metadata/index.html +1 -0
ui/static/static/metadata/index.txt +21 -0
ui/static/static/quality/__next._full.txt +21 -0
ui/static/static/quality/__next._head.txt +7 -0
ui/static/static/quality/__next._index.txt +9 -0
ui/static/static/quality/__next._tree.txt +2 -0
ui/static/static/quality/__next.quality.__PAGE__.txt +9 -0
ui/static/static/quality/__next.quality.txt +4 -0
ui/static/static/quality/index.html +2 -0
ui/static/static/quality/index.txt +21 -0
ui/static/static/rules/__next._full.txt +21 -0
ui/static/static/rules/__next._head.txt +7 -0
ui/static/static/rules/__next._index.txt +9 -0
ui/static/static/rules/__next._tree.txt +2 -0
ui/static/static/rules/__next.rules.__PAGE__.txt +9 -0
ui/static/static/rules/__next.rules.txt +4 -0
ui/static/static/rules/index.html +1 -0
ui/static/static/rules/index.txt +21 -0
ui/static/static/schemas/__next._full.txt +21 -0
ui/static/static/schemas/__next._head.txt +7 -0
ui/static/static/schemas/__next._index.txt +9 -0
ui/static/static/schemas/__next._tree.txt +2 -0
ui/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
ui/static/static/schemas/__next.schemas.txt +4 -0
ui/static/static/schemas/index.html +1 -0
ui/static/static/schemas/index.txt +21 -0
ui/static/static/settings/__next._full.txt +21 -0
ui/static/static/settings/__next._head.txt +7 -0
ui/static/static/settings/__next._index.txt +9 -0
ui/static/static/settings/__next._tree.txt +2 -0
ui/static/static/settings/__next.settings.__PAGE__.txt +9 -0
ui/static/static/settings/__next.settings.txt +4 -0
ui/static/static/settings/index.html +1 -0
ui/static/static/settings/index.txt +21 -0
ui/static/static/validation/__next._full.txt +21 -0
ui/static/static/validation/__next._head.txt +7 -0
ui/static/static/validation/__next._index.txt +9 -0
ui/static/static/validation/__next._tree.txt +2 -0
ui/static/static/validation/__next.validation.__PAGE__.txt +9 -0
ui/static/static/validation/__next.validation.txt +4 -0
ui/static/static/validation/index.html +1 -0
ui/static/static/validation/index.txt +21 -0
ui/static/validation/__next._full.txt +2 -2
ui/static/validation/__next._head.txt +1 -1
ui/static/validation/__next._index.txt +2 -2
ui/static/validation/__next._tree.txt +2 -2
ui/static/validation/__next.validation.__PAGE__.txt +1 -1
ui/static/validation/__next.validation.txt +1 -1
ui/static/validation/index.html +1 -1
ui/static/validation/index.txt +2 -2
pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +0 -160
pycharter-0.0.20.dist-info/RECORD +0 -247
{pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/WHEEL +0 -0
{pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/entry_points.txt +0 -0
{pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/licenses/LICENSE +0 -0
{pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/top_level.txt +0 -0
/ui/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
/ui/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
/ui/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
/ui/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
/ui/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
/ui/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0

pycharter/etl_generator/orchestrator.py CHANGED Viewed

@@ -1,29 +1,32 @@
 """
-Generic ETL Orchestrator - Runtime ETL pipeline execution from contract artifacts.
+ETL Orchestrator - Streaming ETL pipeline with simple operations, JSONata, and custom functions.
-This orchestrator reads contract artifacts (schema, coercion rules, validation rules)
-and ETL configuration files (extract, transform, load) and executes the ETL pipeline
-dynamically using streaming mode for memory-efficient processing.
+Executes ETL pipelines: Extract → Transform (Simple Operations → JSONata → Custom Functions) → Load.
+Transformation Pipeline:
+1. Simple Operations: rename, convert, defaults, add, select, drop (declarative, easy to use)
+2. JSONata: Powerful query language for complex transformations (full JSONata support)
+3. Custom Functions: Import and run external Python modules/functions
 """
 import asyncio
 import gc
+import importlib
 import logging
+import re
 import uuid
 import warnings
 from collections import Counter, defaultdict
-from datetime import date, datetime, timedelta
+from datetime import datetime
 from pathlib import Path
 from typing import Any, AsyncIterator, Callable, Dict, List, Optional, Tuple
+import jsonata
 import yaml
 from pycharter.contract_parser import ContractMetadata, parse_contract_file
 from pycharter.etl_generator.checkpoint import CheckpointManager
-from pycharter.etl_generator.database import (
-    get_database_connection,
-    load_data,
-)
+from pycharter.etl_generator.database import get_database_connection, load_data
 from pycharter.etl_generator.dlq import DeadLetterQueue, DLQReason
 from pycharter.etl_generator.extraction import extract_with_pagination_streaming
 from pycharter.etl_generator.progress import ETLProgress, ProgressTracker
@@ -31,32 +34,14 @@ from pycharter.utils.value_injector import resolve_values
 logger = logging.getLogger(__name__)
-# Optional dependency for memory monitoring
+# Optional memory monitoring
 try:
     import psutil
     PSUTIL_AVAILABLE = True
 except ImportError:
     PSUTIL_AVAILABLE = False
-# ============================================================================
-# CONSTANTS
-# ============================================================================
-COMPUTED_DATETIME_NOW = "@now"
-COMPUTED_DATETIME_UTC_NOW = "@utcnow"
-COMPUTED_WEEK_START = "@week_start"
-COMPUTED_WEEK_END = "@week_end"
 DEFAULT_BATCH_SIZE = 1000
-DEFAULT_MAX_DEPTH = 10
-DEFAULT_SEPARATOR = "_"
-# Datetime parsing formats (in order of preference)
-DATETIME_FORMATS = [
-    '%Y-%m-%dT%H:%M:%S',
-    '%Y-%m-%d %H:%M:%S',
-    '%Y-%m-%d'
-]
 class ETLOrchestrator:
@@ -473,526 +458,517 @@ class ETLOrchestrator:
             yield batch
     # ============================================================================
-    # TRANSFORMATION
+    # TRANSFORMATION (Simple Operations → JSONata → Custom Functions)
     # ============================================================================
     def transform(self, raw_data: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
         """
-        Transform extracted data according to transformation rules.
+        Transform data using simple operations, JSONata expressions, and/or custom Python functions.
-        Transformation steps (in order):
-        1. Rename fields (with optional flattening)
-        2. Copy remaining fields (with optional flattening)
-        3. Add computed fields from kwargs
-        4. Apply type conversions
-        5. Apply fill_null rules
-        6. Drop specified fields
+        Pipeline order (applied sequentially):
+        1. Simple operations (rename, select, drop, convert, defaults, add)
+        2. JSONata transformation (if configured)
+        3. Custom function execution (if configured)
         Args:
             raw_data: Raw data from extraction
-            **kwargs: Additional transformation parameters
+            **kwargs: Additional parameters (passed to custom functions)
         Returns:
             Transformed data
+        Example - Simple operations:
+            transform_config:
+              rename:
+                oldName: new_name
+                camelCase: snake_case
+              select:
+                - field1
+                - field2
+              convert:
+                price: float
+                quantity: integer
+              defaults:
+                status: "pending"
+        Example - JSONata (advanced):
+            transform_config:
+              jsonata:
+                expression: |
+                  $.{
+                    "ticker": symbol,
+                    "avg_price": $average(prices)
+                  }
+        Example - Custom function:
+            transform_config:
+              custom_function:
+                module: "myproject.transforms"
+                function: "optimize_data"
+                mode: "batch"
         """
         if not self.transform_config:
             return raw_data
-        # Extract transformation rules once
-        transform_rules = self._extract_transform_rules()
-        transformed_data = []
-        for record in raw_data:
-            transformed_record = self._transform_single_record(
-                record, transform_rules, **kwargs
-            )
-            transformed_data.append(transformed_record)
-        return transformed_data
-    def _extract_transform_rules(self) -> Dict[str, Any]:
-        """Extract and return all transformation rules from config."""
-        return {
-            'rename': self.transform_config.get('rename', {}),
-            'flatten': self.transform_config.get('flatten', {}),
-            'type': self.transform_config.get('type', {}),
-            'fill_null': self.transform_config.get('fill_null', {}),
-            'drop': self.transform_config.get('drop', []),
-        }
+        data = raw_data
+        # Step 1: Apply simple operations (in order)
+        # Support both new 'transform' key and legacy top-level keys for backward compatibility
+        simple_ops = {}
+        # New format: transform: { rename: {...}, select: [...] }
+        if 'transform' in self.transform_config:
+            simple_ops = self.transform_config.get('transform', {})
+        # Legacy format: rename: {...} at top level (for backward compatibility)
+        if 'rename' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['rename'] = self.transform_config.get('rename')
+        if 'select' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['select'] = self.transform_config.get('select')
+        if 'drop' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['drop'] = self.transform_config.get('drop')
+        if 'convert' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['convert'] = self.transform_config.get('convert')
+        if 'defaults' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['defaults'] = self.transform_config.get('defaults')
+        if 'add' in self.transform_config and 'transform' not in self.transform_config:
+            simple_ops['add'] = self.transform_config.get('add')
+        if simple_ops:
+            data = self._apply_simple_operations(data, simple_ops)
+        # Step 2: Apply JSONata transformation (if configured)
+        jsonata_config = self.transform_config.get('jsonata')
+        if jsonata_config:
+            data = self._apply_jsonata(data, jsonata_config)
+        # Step 3: Apply custom function (if configured)
+        custom_func_config = self.transform_config.get('custom_function')
+        if custom_func_config:
+            data = self._apply_custom_function(data, custom_func_config, **kwargs)
+        return data
-    def _transform_single_record(
+    def _apply_simple_operations(
         self,
-        record: Dict[str, Any],
-        transform_rules: Dict[str, Any],
-        **kwargs
-    ) -> Dict[str, Any]:
-        """Transform a single record through all transformation steps."""
-        rename_rules = transform_rules['rename']
-        flatten_rules = transform_rules['flatten']
-        type_rules = transform_rules['type']
-        fill_null_rules = transform_rules['fill_null']
-        drop_fields = transform_rules['drop']
-        transformed_record = {}
-        # Step 1: Apply rename transformations (with flattening if configured)
-        transformed_record.update(
-            self._apply_rename_transformations(record, rename_rules, flatten_rules)
-        )
+        data: List[Dict[str, Any]],
+        config: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """
+        Apply simple declarative transformation operations.
-        # Step 2: Copy remaining fields (with flattening if configured)
-        transformed_record.update(
-            self._copy_remaining_fields(record, rename_rules, flatten_rules, drop_fields)
-        )
+        Operations are applied in this order:
+        1. rename - Rename fields (old_name: new_name)
+        2. convert - Convert field types (field: type)
+        3. defaults - Set default values for missing fields
+        4. add - Add computed fields with expressions
+        5. select - Keep only specified fields
+        6. drop - Remove specified fields
-        # Step 3: Add computed fields from kwargs (with rename rules applied)
-        self._add_computed_fields(transformed_record, rename_rules, **kwargs)
+        Args:
+            data: Input data (list of records)
+            config: Simple operations configuration
-        # Step 4: Apply type conversions
-        self._apply_type_conversions(transformed_record, type_rules)
+        Returns:
+            Transformed data
-        # Step 5: Apply fill_null rules
-        self._apply_fill_null_rules(transformed_record, fill_null_rules)
+        Example config:
+            transform:
+              rename:
+                oldName: new_name
+                camelCase: snake_case
+              convert:
+                price: float
+                quantity: integer
+                active: boolean
+              defaults:
+                status: "pending"
+                priority: 0
+              add:
+                full_name: "${first_name} ${last_name}"
+                created_at: "now()"
+                record_id: "uuid()"
+              select:
+                - field1
+                - field2
+              drop:
+                - internal_id
+                - debug_info
+        """
+        if not data:
+            return data
+        result = []
+        # Get available fields from first record for validation
+        available_fields = set(data[0].keys()) if data else set()
+        # Step 1: Rename fields
+        rename_map = config.get('rename', {})
+        if rename_map:
+            # Validate rename mappings
+            missing_fields = [old for old in rename_map.keys() if old not in available_fields]
+            if missing_fields:
+                logger.warning(
+                    f"Rename operation: Fields not found in data: {missing_fields}. "
+                    f"Available fields: {sorted(available_fields)}"
+                )
-        # Step 6: Drop specified fields
-        self._drop_fields(transformed_record, drop_fields)
+        # Step 2: Convert types
+        convert_map = config.get('convert', {})
-        return transformed_record
-    def _apply_rename_transformations(
-        self,
-        record: Dict[str, Any],
-        rename_rules: Dict[str, str],
-        flatten_rules: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """Apply rename transformations, handling flattening if configured."""
-        transformed = {}
-        for source_field, target_field in rename_rules.items():
-            if source_field in record:
-                value = record[source_field]
-                flattened = self._maybe_flatten_field(source_field, value, flatten_rules)
-                if flattened is not None:
-                    transformed.update(flattened)
-                else:
-                    transformed[target_field] = value
-            elif target_field in record:
-                transformed[target_field] = record[target_field]
+        # Step 3: Defaults
+        defaults_map = config.get('defaults', {})
-        return transformed
-    def _copy_remaining_fields(
-        self,
-        record: Dict[str, Any],
-        rename_rules: Dict[str, str],
-        flatten_rules: Dict[str, Any],
-        drop_fields: List[str]
-    ) -> Dict[str, Any]:
-        """Copy remaining fields not in rename rules, handling flattening if configured."""
-        transformed = {}
-        for key, value in record.items():
-            if key not in rename_rules and key not in transformed:
-                if key not in drop_fields:
-                    flattened = self._maybe_flatten_field(key, value, flatten_rules)
-                    if flattened is not None:
-                        transformed.update(flattened)
-                    else:
-                        transformed[key] = value
+        # Step 4: Add computed fields
+        add_map = config.get('add', {})
+        # Step 5: Select fields (keep only these)
+        select_fields = config.get('select')
+        # Step 6: Drop fields (remove these)
+        drop_fields = set(config.get('drop', []))
-        return transformed
+        for record in data:
+            transformed = dict(record)
+            # 1. Rename
+            if rename_map:
+                for old_name, new_name in rename_map.items():
+                    if old_name in transformed:
+                        transformed[new_name] = transformed.pop(old_name)
+            # 2. Convert types
+            if convert_map:
+                for field_name, target_type in convert_map.items():
+                    if field_name in transformed:
+                        try:
+                            transformed[field_name] = self._convert_type(
+                                transformed[field_name], target_type
+                            )
+                        except (ValueError, TypeError) as e:
+                            logger.warning(
+                                f"Failed to convert field '{field_name}' to {target_type}: {e}. "
+                                f"Keeping original value."
+                            )
+            # 3. Apply defaults
+            if defaults_map:
+                for field_name, default_value in defaults_map.items():
+                    if field_name not in transformed or transformed[field_name] is None:
+                        transformed[field_name] = default_value
+            # 4. Add computed fields
+            if add_map:
+                for field_name, expression in add_map.items():
+                    try:
+                        transformed[field_name] = self._evaluate_expression(
+                            expression, transformed
+                        )
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to compute field '{field_name}': {e}. "
+                            f"Skipping this field."
+                        )
+            # 5. Select (keep only specified fields)
+            if select_fields:
+                transformed = {
+                    k: v for k, v in transformed.items()
+                    if k in select_fields
+                }
+            # 6. Drop (remove specified fields)
+            if drop_fields:
+                transformed = {
+                    k: v for k, v in transformed.items()
+                    if k not in drop_fields
+                }
+            result.append(transformed)
+        return result
-    def _maybe_flatten_field(
-        self,
-        field_name: str,
-        value: Any,
-        flatten_rules: Dict[str, Any]
-    ) -> Optional[Dict[str, Any]]:
+    def _convert_type(self, value: Any, target_type: str) -> Any:
         """
-        Flatten a field if it's configured for flattening, otherwise return None.
+        Convert a value to the specified type.
+        Args:
+            value: Value to convert
+            target_type: Target type (string, integer, float, boolean, datetime, date)
         Returns:
-            Flattened dictionary if field should be flattened, None otherwise
+            Converted value
         """
-        if field_name not in flatten_rules:
-            return None
-        flatten_config = flatten_rules[field_name]
-        if not flatten_config.get('enabled', True):
+        if value is None:
             return None
-        if isinstance(value, dict):
-            return self._flatten_nested_object(value, field_name, flatten_config)
-        elif isinstance(value, list):
-            return self._flatten_array(value, field_name, flatten_config)
+        target_type_lower = target_type.lower().strip()
-        return None
+        if target_type_lower in ('str', 'string'):
+            return str(value)
+        elif target_type_lower in ('int', 'integer'):
+            if isinstance(value, str):
+                # Try to parse as float first (handles "1.0" -> 1)
+                try:
+                    return int(float(value))
+                except ValueError:
+                    return int(value)
+            return int(value)
+        elif target_type_lower in ('float', 'number', 'numeric'):
+            if isinstance(value, str):
+                return float(value)
+            return float(value)
+        elif target_type_lower in ('bool', 'boolean'):
+            if isinstance(value, str):
+                return value.lower() in ('true', '1', 'yes', 'on')
+            return bool(value)
+        elif target_type_lower == 'datetime':
+            from datetime import datetime
+            if isinstance(value, str):
+                # Try common datetime formats
+                for fmt in [
+                    '%Y-%m-%dT%H:%M:%S',
+                    '%Y-%m-%dT%H:%M:%S.%f',
+                    '%Y-%m-%dT%H:%M:%SZ',
+                    '%Y-%m-%dT%H:%M:%S.%fZ',
+                    '%Y-%m-%d %H:%M:%S',
+                    '%Y-%m-%d %H:%M:%S.%f',
+                ]:
+                    try:
+                        return datetime.strptime(value, fmt)
+                    except ValueError:
+                        continue
+                raise ValueError(f"Cannot parse datetime: {value}")
+            return value
+        elif target_type_lower == 'date':
+            from datetime import date, datetime
+            if isinstance(value, str):
+                # Try common date formats
+                for fmt in ['%Y-%m-%d', '%Y/%m/%d', '%m/%d/%Y']:
+                    try:
+                        dt = datetime.strptime(value, fmt)
+                        return dt.date()
+                    except ValueError:
+                        continue
+                raise ValueError(f"Cannot parse date: {value}")
+            elif isinstance(value, datetime):
+                return value.date()
+            return value
+        else:
+            raise ValueError(f"Unsupported target type: {target_type}")
-    def _add_computed_fields(
-        self,
-        transformed_record: Dict[str, Any],
-        rename_rules: Dict[str, str],
-        **kwargs
-    ) -> None:
+    def _evaluate_expression(self, expression: str, record: Dict[str, Any]) -> Any:
         """
-        Add computed fields from kwargs to transformed records.
-        This method adds input parameters (defined in input_params) to transformed records,
-        overriding any values from the API response. It also applies rename rules to input
-        parameters, allowing them to be stored with different names (e.g., 'type' -> 'direction').
+        Evaluate a simple expression in the context of a record.
-        This ensures that:
-        - Request parameters (e.g., symbol, period) always use the input values
-        - Input parameters take precedence over API response values for consistency
-        - Missing parameters are added if not present in the API response
-        - Input parameters can be renamed according to transform rules
+        Supports:
+        - Field references: "${field_name}"
+        - String concatenation: "${field1} ${field2}"
+        - Simple functions: "now()", "uuid()"
+        - Literal values (if no placeholders)
         Args:
-            transformed_record: The record being transformed (modified in place)
-            rename_rules: Dictionary mapping source field names to target field names
-            **kwargs: Input parameters passed to the pipeline
-        """
-        # Always override input parameters from kwargs to ensure consistency
-        # This ensures that request parameters (like symbol, period) always use
-        # the values from the input parameters, not from the API response
-        for param_name in self.input_params.keys():
-            if param_name in kwargs:
-                # Apply rename rule if one exists for this parameter
-                # This allows input parameters to be stored with different names
-                # (e.g., 'type' parameter -> 'direction' field)
-                target_name = rename_rules.get(param_name, param_name)
-                transformed_record[target_name] = kwargs[param_name]
-    def _apply_type_conversions(
-        self,
-        transformed_record: Dict[str, Any],
-        type_rules: Dict[str, str]
-    ) -> None:
-        """Apply type conversions to fields."""
-        for field, field_type in type_rules.items():
-            if field in transformed_record:
-                transformed_record[field] = self._convert_type(
-                    transformed_record[field], field_type
-                )
-    def _apply_fill_null_rules(
-        self,
-        transformed_record: Dict[str, Any],
-        fill_null_rules: Dict[str, Any]
-    ) -> None:
-        """Apply fill_null rules, handling computed datetime values."""
-        for field, config in fill_null_rules.items():
-            if field not in transformed_record or transformed_record[field] is None:
-                default_value = config.get('default') if isinstance(config, dict) else config
-                computed_value = self._compute_datetime_value(default_value)
-                transformed_record[field] = computed_value if computed_value is not None else default_value
-    def _compute_datetime_value(self, value: Any) -> Optional[datetime]:
-        """
-        Compute datetime value from special constants.
+            expression: Expression string
+            record: Record dictionary for context
-        Args:
-            value: Value to check (may be a computed datetime constant)
         Returns:
-            Computed datetime if value is a computed constant, None otherwise
+            Evaluated result
+        Examples:
+            "${first_name} ${last_name}" -> "John Doe"
+            "now()" -> "2024-01-01T12:00:00"
+            "uuid()" -> "123e4567-e89b-12d3-a456-426614174000"
+            "static_value" -> "static_value"
         """
-        if value == COMPUTED_DATETIME_NOW:
-            return datetime.now()
-        elif value == COMPUTED_DATETIME_UTC_NOW:
-            return datetime.utcnow()
-        elif value == COMPUTED_WEEK_START:
-            return self._get_week_start()
-        elif value == COMPUTED_WEEK_END:
-            return self._get_week_end()
-        return None
-    def _get_week_start(self) -> datetime:
-        """Calculate Monday of current week (00:00:00 UTC)."""
-        now = datetime.utcnow()
-        days_since_monday = now.weekday()
-        week_start = now - timedelta(days=days_since_monday)
-        return week_start.replace(hour=0, minute=0, second=0, microsecond=0)
-    def _get_week_end(self) -> datetime:
-        """Calculate Sunday of current week (23:59:59.999999 UTC)."""
-        week_start = self._get_week_start()
-        return week_start + timedelta(days=6, hours=23, minutes=59, seconds=59, microseconds=999999)
-    def _drop_fields(self, transformed_record: Dict[str, Any], drop_fields: List[str]) -> None:
-        """Remove specified fields from the record."""
-        for field in drop_fields:
-            transformed_record.pop(field, None)
+        if not isinstance(expression, str):
+            return expression
+        expression = expression.strip()
+        # Handle special functions
+        if expression == 'now()':
+            return datetime.now().isoformat()
+        elif expression == 'uuid()':
+            return str(uuid.uuid4())
+        # Handle field references and string interpolation
+        try:
+            # Simple string interpolation: "${field1} ${field2}"
+            result = expression
+            placeholders_found = False
+            # Find all ${...} placeholders
+            placeholder_pattern = r'\$\{([^}]+)\}'
+            matches = re.findall(placeholder_pattern, expression)
+            if matches:
+                placeholders_found = True
+                for field_name in matches:
+                    if field_name in record:
+                        value = record[field_name]
+                        placeholder = f"${{{field_name}}}"
+                        result = result.replace(placeholder, str(value) if value is not None else '')
+                    else:
+                        logger.warning(
+                            f"Expression '{expression}': Field '{field_name}' not found in record. "
+                            f"Available fields: {sorted(record.keys())}"
+                        )
+                        # Replace with empty string if field not found
+                        placeholder = f"${{{field_name}}}"
+                        result = result.replace(placeholder, '')
+            # If no placeholders were found and it's not a function, return as literal
+            if not placeholders_found and not expression.endswith('()'):
+                return expression
+            return result
+        except Exception as e:
+            raise ValueError(f"Failed to evaluate expression '{expression}': {e}") from e
-    def _flatten_nested_object(
-        self,
-        nested_obj: Dict[str, Any],
-        field_name: str,
-        config: Dict[str, Any],
-        depth: int = 0,
-        max_depth: int = 10
-    ) -> Dict[str, Any]:
+    def _apply_jsonata(
+        self,
+        data: List[Dict[str, Any]],
+        config: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
         """
-        Flatten a nested object according to configuration.
-        Supports simple key mapping, recursive flattening, and prefix patterns.
+        Apply JSONata expression to transform data.
         Args:
-            nested_obj: The nested dictionary to flatten
-            field_name: Name of the field containing the nested object
-            config: Flatten configuration from transform.yaml
-            depth: Current recursion depth
-            max_depth: Maximum recursion depth to prevent infinite loops
+            data: Input data (list of records)
+            config: JSONata configuration with 'expression' and optional 'mode'
         Returns:
-            Dictionary with flattened keys
+            Transformed data
+        Example config:
+            jsonata:
+              expression: |
+                $.{
+                  "ticker": symbol,
+                  "avg_price": $average(prices),
+                  "total_volume": $sum(volumes)
+                }
+              mode: "batch"  # or "record"
         """
-        if not isinstance(nested_obj, dict):
-            return {field_name: nested_obj}
-        flattened = {}
-        strategy = config.get('strategy', 'simple')  # simple, recursive
-        separator = config.get('separator', DEFAULT_SEPARATOR)
-        max_depth_config = config.get('max_depth', max_depth)
-        key_mapping = config.get('key_mapping', {})
-        prefix = config.get('prefix', '')
-        if depth >= max_depth_config:
-            # Prevent infinite recursion
-            return {field_name: nested_obj}
-        if strategy == 'recursive':
-            # Recursively flatten all nested objects
-            for key, value in nested_obj.items():
-                if isinstance(value, dict):
-                    # Recursively flatten nested dict
-                    nested_flattened = self._flatten_nested_object(
-                        value,
-                        f"{field_name}{separator}{key}",
-                        config,
-                        depth + 1,
-                        max_depth_config
-                    )
-                    flattened.update(nested_flattened)
-                elif isinstance(value, list):
-                    # Handle arrays
-                    array_config = config.get('array_fields', [])
-                    flatten_arrays = config.get('flatten_arrays', False)
-                    if key in array_config or flatten_arrays:
-                        # Flatten array items
-                        for idx, item in enumerate(value):
-                            if isinstance(item, dict):
-                                item_flattened = self._flatten_nested_object(
-                                    item,
-                                    f"{field_name}{separator}{key}{separator}{idx}",
-                                    config,
-                                    depth + 1,
-                                    max_depth_config
-                                )
-                                flattened.update(item_flattened)
-                            else:
-                                flattened[f"{field_name}{separator}{key}{separator}{idx}"] = item
-                    else:
-                        # Keep array as-is
-                        flattened[f"{field_name}{separator}{key}"] = value
-                else:
-                    # Simple value
-                    flattened[f"{field_name}{separator}{key}"] = value
-        else:  # strategy == 'simple' (default)
-            # Simple flattening with key mapping or prefix
-            for nested_key, nested_value in nested_obj.items():
-                if isinstance(nested_value, dict):
-                    # Nested dict - recursively flatten if recursive is enabled
-                    if config.get('recursive', False):
-                        nested_flattened = self._flatten_nested_object(
-                            nested_value,
-                            f"{field_name}{separator}{nested_key}",
-                            config,
-                            depth + 1,
-                            max_depth_config
-                        )
-                        flattened.update(nested_flattened)
-                    else:
-                        # Keep as nested or use prefix
-                        if prefix:
-                            flattened[f"{prefix}{nested_key}"] = nested_value
-                        else:
-                            flattened[f"{field_name}{separator}{nested_key}"] = nested_value
-                elif isinstance(nested_value, list):
-                    # Array - handle if configured
-                    if config.get('flatten_arrays', False):
-                        for idx, item in enumerate(nested_value):
-                            if isinstance(item, dict):
-                                item_flattened = self._flatten_nested_object(
-                                    item,
-                                    f"{field_name}{separator}{nested_key}{separator}{idx}",
-                                    config,
-                                    depth + 1,
-                                    max_depth_config
-                                )
-                                flattened.update(item_flattened)
-                            else:
-                                flattened[f"{field_name}{separator}{nested_key}{separator}{idx}"] = item
-                    else:
-                        # Keep array as-is
-                        if key_mapping and nested_key in key_mapping:
-                            flattened_key = key_mapping[nested_key]
-                        elif prefix:
-                            flattened_key = f"{prefix}{nested_key}"
-                        else:
-                            flattened_key = f"{field_name}{separator}{nested_key}"
-                        flattened[flattened_key] = nested_value
-                else:
-                    # Simple value - use key mapping, prefix, or default
-                    if key_mapping and nested_key in key_mapping:
-                        # Use mapped key directly for simple strategy
-                        # For array flattening, field_name will already include index (e.g., "orders_0")
-                        mapped_key = key_mapping[nested_key]
-                        # Only add field_name prefix if we're in array context (field_name contains separator)
-                        if field_name and separator in field_name:
-                            # This is from array flattening - preserve the indexed prefix
-                            flattened_key = f"{field_name}{separator}{mapped_key}"
-                        else:
-                            # Simple strategy - use mapped key directly (no prefix)
-                            flattened_key = mapped_key
-                    elif prefix:
-                        flattened_key = f"{prefix}{nested_key}"
-                    else:
-                        flattened_key = f"{field_name}{separator}{nested_key}"
-                    flattened[flattened_key] = nested_value
+        expression_str = config.get('expression')
+        if not expression_str:
+            return data
+        mode = config.get('mode', 'batch')
-        return flattened
+        try:
+            expr = jsonata.Jsonata(expression_str)
+            if mode == 'batch':
+                # Apply expression to entire dataset
+                result = expr.evaluate(data)
+                if result is None:
+                    return []
+                return result if isinstance(result, list) else [result]
+            else:
+                # Apply expression to each record individually
+                return [expr.evaluate(record) for record in data if expr.evaluate(record) is not None]
+        except Exception as e:
+            logger.error(f"JSONata transformation failed: {e}")
+            raise ValueError(f"JSONata transformation error: {e}") from e
-    def _flatten_array(
+    def _apply_custom_function(
         self,
-        array_obj: List[Any],
-        field_name: str,
+        data: List[Dict[str, Any]],
         config: Dict[str, Any],
-        depth: int = 0,
-        max_depth: int = 10
-    ) -> Dict[str, Any]:
+        **kwargs
+    ) -> List[Dict[str, Any]]:
         """
-        Flatten an array of nested objects according to configuration.
+        Execute a custom Python function for transformation.
         Args:
-            array_obj: The array to flatten
-            field_name: Name of the field containing the array
-            config: Flatten configuration from transform.yaml
-            depth: Current recursion depth
-            max_depth: Maximum recursion depth
+            data: Input data
+            config: Custom function configuration
+            **kwargs: Additional parameters passed to the function
         Returns:
-            Dictionary with flattened keys
-        """
-        if not isinstance(array_obj, list):
-            return {field_name: array_obj}
-        flattened = {}
-        strategy = config.get('strategy', 'array_flatten')
-        separator = config.get('separator', DEFAULT_SEPARATOR)
-        max_depth_config = config.get('max_depth', max_depth)
-        item_flatten = config.get('item_flatten', {})
-        aggregate = config.get('aggregate', False)  # If True, aggregate all items into single keys
-        if depth >= max_depth_config:
-            return {field_name: array_obj}
-        if strategy == 'array_flatten' or strategy == 'simple':
-            # Flatten each item in the array
-            for idx, item in enumerate(array_obj):
-                if isinstance(item, dict):
-                    # Use item_flatten config if provided, otherwise use parent config
-                    item_config = item_flatten if item_flatten else config
-                    if aggregate:
-                        # Aggregate: all items contribute to same keys (last wins or merge)
-                        item_flattened = self._flatten_nested_object(
-                            item,
-                            field_name,  # Same base name for all items
-                            item_config,
-                            depth + 1,
-                            max_depth_config
-                        )
-                        # Merge or overwrite (last item wins)
-                        flattened.update(item_flattened)
-                    else:
-                        # Indexed: each item gets its own keys with index
-                        # Create a new config that uses the indexed field name as base
-                        indexed_config = item_config.copy()
-                        indexed_config['_base_field'] = f"{field_name}{separator}{idx}"
-                        item_flattened = self._flatten_nested_object(
-                            item,
-                            f"{field_name}{separator}{idx}",
-                            indexed_config,
-                            depth + 1,
-                            max_depth_config
-                        )
-                        flattened.update(item_flattened)
-                else:
-                    # Simple value in array
-                    if aggregate:
-                        # For aggregate, use field name directly (last value wins)
-                        flattened[field_name] = item
-                    else:
-                        flattened[f"{field_name}{separator}{idx}"] = item
-        else:
-            # Keep array as-is
-            flattened[field_name] = array_obj
+            Transformed data
-        return flattened
-    def _convert_type(self, value: Any, target_type: str) -> Any:
-        """Convert value to target type."""
-        if value is None:
-            return None
+        Example config:
+            custom_function:
+              module: "pyoptima"
+              function: "optimize_from_etl_inputs"
+              mode: "batch"
+              kwargs:
+                method: "min_volatility"
+                solver: "ipopt"
+        Alternative config (using callable path):
+            custom_function:
+              callable: "myproject.transforms.optimize_portfolio"
+              mode: "batch"
+        """
+        # Get module and function
+        callable_path = config.get('callable')
+        module_path = config.get('module')
+        func_name = config.get('function')
+        if callable_path:
+            # Parse "module.submodule.function" format
+            parts = callable_path.rsplit('.', 1)
+            if len(parts) != 2:
+                raise ValueError(f"Invalid callable path: {callable_path}. Use 'module.function' format.")
+            module_path, func_name = parts
+        if not module_path or not func_name:
+            raise ValueError("custom_function requires either 'callable' or 'module' + 'function'")
+        # Dynamic import
+        try:
+            module = importlib.import_module(module_path)
+            func = getattr(module, func_name)
+        except ImportError as e:
+            raise ValueError(f"Cannot import module '{module_path}': {e}") from e
+        except AttributeError as e:
+            raise ValueError(f"Function '{func_name}' not found in module '{module_path}'") from e
+        # Handle class-based methods (e.g., pyoptima optimization methods)
+        if isinstance(func, type):
+            instance = func()
+            if hasattr(instance, 'optimize'):
+                func = instance.optimize
+            elif hasattr(instance, 'run'):
+                func = instance.run
+            elif hasattr(instance, '__call__'):
+                func = instance
+            else:
+                raise ValueError(f"Class '{func_name}' has no 'optimize', 'run', or '__call__' method")
-        type_map = {
-            'string': str,
-            'integer': int,
-            'int': int,
-            'float': float,
-            'double': float,
-            'boolean': bool,
-            'bool': bool,
-            'datetime': self._parse_datetime,
-            'timestamp': self._parse_datetime,
-            'date': self._parse_date,
-        }
+        # Get mode and kwargs
+        mode = config.get('mode', 'batch')
+        func_kwargs = config.get('kwargs', {})
-        converter = type_map.get(target_type.lower())
-        if converter:
-            try:
-                # Converter is either a type (callable) or a method (also callable)
-                return converter(value)
-            except (ValueError, TypeError):
-                return value
+        # Merge with runtime kwargs
+        merged_kwargs = {**func_kwargs, **kwargs}
-        return value
-    def _parse_datetime(self, value: Any) -> Any:
-        """Parse datetime value from string or return datetime object."""
-        if isinstance(value, datetime):
-            return value
-        if isinstance(value, str):
-            for fmt in DATETIME_FORMATS:
-                try:
-                    return datetime.strptime(value, fmt)
-                except ValueError:
-                    continue
-        return value
-    def _parse_date(self, value: Any) -> Any:
-        """Parse date value."""
-        if isinstance(value, date):
-            return value
-        if isinstance(value, datetime):
-            return value.date()
-        if isinstance(value, str):
-            try:
-                return datetime.strptime(value, '%Y-%m-%d').date()
-            except ValueError:
-                pass
-        return value
+        try:
+            if mode == 'batch':
+                result = func(data, **merged_kwargs)
+                if result is None:
+                    return []
+                return result if isinstance(result, list) else [result]
+            else:
+                # Record mode
+                results = []
+                for record in data:
+                    record_result = func(record, **merged_kwargs)
+                    if record_result is not None:
+                        if isinstance(record_result, list):
+                            results.extend(record_result)
+                        else:
+                            results.append(record_result)
+                return results
+        except Exception as e:
+            logger.error(f"Custom function '{func_name}' failed: {e}")
+            raise ValueError(f"Custom function error: {e}") from e
     # ============================================================================
     # LOADING

pycharter 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

pycharter 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl