pycharter 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. api/dependencies/__init__.py +2 -1
  2. api/dependencies/database.py +71 -5
  3. api/main.py +47 -8
  4. api/models/contracts.py +6 -4
  5. api/models/metadata.py +11 -7
  6. api/models/schemas.py +16 -10
  7. api/routes/v1/contracts.py +498 -226
  8. api/routes/v1/metadata.py +52 -211
  9. api/routes/v1/schemas.py +1 -1
  10. api/routes/v1/settings.py +88 -1
  11. api/utils.py +224 -0
  12. pycharter/__init__.py +149 -93
  13. pycharter/data/templates/template_transform_advanced.yaml +50 -0
  14. pycharter/data/templates/template_transform_simple.yaml +59 -0
  15. pycharter/db/models/base.py +1 -2
  16. pycharter/etl_generator/orchestrator.py +463 -487
  17. pycharter/metadata_store/postgres.py +16 -191
  18. pycharter/metadata_store/sqlite.py +12 -41
  19. {pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/METADATA +284 -62
  20. pycharter-0.0.22.dist-info/RECORD +358 -0
  21. ui/static/404/index.html +1 -1
  22. ui/static/404.html +1 -1
  23. ui/static/__next.__PAGE__.txt +1 -1
  24. ui/static/__next._full.txt +2 -2
  25. ui/static/__next._head.txt +1 -1
  26. ui/static/__next._index.txt +2 -2
  27. ui/static/__next._tree.txt +2 -2
  28. ui/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
  29. ui/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
  30. ui/static/_next/static/chunks/c4fa4f4114b7c352.js +1 -0
  31. ui/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
  32. ui/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
  33. ui/static/_not-found/__next._full.txt +2 -2
  34. ui/static/_not-found/__next._head.txt +1 -1
  35. ui/static/_not-found/__next._index.txt +2 -2
  36. ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  37. ui/static/_not-found/__next._not-found.txt +1 -1
  38. ui/static/_not-found/__next._tree.txt +2 -2
  39. ui/static/_not-found/index.html +1 -1
  40. ui/static/_not-found/index.txt +2 -2
  41. ui/static/contracts/__next._full.txt +3 -3
  42. ui/static/contracts/__next._head.txt +1 -1
  43. ui/static/contracts/__next._index.txt +2 -2
  44. ui/static/contracts/__next._tree.txt +2 -2
  45. ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
  46. ui/static/contracts/__next.contracts.txt +1 -1
  47. ui/static/contracts/index.html +1 -1
  48. ui/static/contracts/index.txt +3 -3
  49. ui/static/documentation/__next._full.txt +3 -3
  50. ui/static/documentation/__next._head.txt +1 -1
  51. ui/static/documentation/__next._index.txt +2 -2
  52. ui/static/documentation/__next._tree.txt +2 -2
  53. ui/static/documentation/__next.documentation.__PAGE__.txt +2 -2
  54. ui/static/documentation/__next.documentation.txt +1 -1
  55. ui/static/documentation/index.html +2 -2
  56. ui/static/documentation/index.txt +3 -3
  57. ui/static/index.html +1 -1
  58. ui/static/index.txt +2 -2
  59. ui/static/metadata/__next._full.txt +2 -2
  60. ui/static/metadata/__next._head.txt +1 -1
  61. ui/static/metadata/__next._index.txt +2 -2
  62. ui/static/metadata/__next._tree.txt +2 -2
  63. ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
  64. ui/static/metadata/__next.metadata.txt +1 -1
  65. ui/static/metadata/index.html +1 -1
  66. ui/static/metadata/index.txt +2 -2
  67. ui/static/quality/__next._full.txt +2 -2
  68. ui/static/quality/__next._head.txt +1 -1
  69. ui/static/quality/__next._index.txt +2 -2
  70. ui/static/quality/__next._tree.txt +2 -2
  71. ui/static/quality/__next.quality.__PAGE__.txt +1 -1
  72. ui/static/quality/__next.quality.txt +1 -1
  73. ui/static/quality/index.html +2 -2
  74. ui/static/quality/index.txt +2 -2
  75. ui/static/rules/__next._full.txt +2 -2
  76. ui/static/rules/__next._head.txt +1 -1
  77. ui/static/rules/__next._index.txt +2 -2
  78. ui/static/rules/__next._tree.txt +2 -2
  79. ui/static/rules/__next.rules.__PAGE__.txt +1 -1
  80. ui/static/rules/__next.rules.txt +1 -1
  81. ui/static/rules/index.html +1 -1
  82. ui/static/rules/index.txt +2 -2
  83. ui/static/schemas/__next._full.txt +2 -2
  84. ui/static/schemas/__next._head.txt +1 -1
  85. ui/static/schemas/__next._index.txt +2 -2
  86. ui/static/schemas/__next._tree.txt +2 -2
  87. ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
  88. ui/static/schemas/__next.schemas.txt +1 -1
  89. ui/static/schemas/index.html +1 -1
  90. ui/static/schemas/index.txt +2 -2
  91. ui/static/settings/__next._full.txt +2 -2
  92. ui/static/settings/__next._head.txt +1 -1
  93. ui/static/settings/__next._index.txt +2 -2
  94. ui/static/settings/__next._tree.txt +2 -2
  95. ui/static/settings/__next.settings.__PAGE__.txt +1 -1
  96. ui/static/settings/__next.settings.txt +1 -1
  97. ui/static/settings/index.html +1 -1
  98. ui/static/settings/index.txt +2 -2
  99. ui/static/static/.gitkeep +0 -0
  100. ui/static/static/404/index.html +1 -0
  101. ui/static/static/404.html +1 -0
  102. ui/static/static/__next.__PAGE__.txt +10 -0
  103. ui/static/static/__next._full.txt +30 -0
  104. ui/static/static/__next._head.txt +7 -0
  105. ui/static/static/__next._index.txt +9 -0
  106. ui/static/static/__next._tree.txt +2 -0
  107. ui/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
  108. ui/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
  109. ui/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
  110. ui/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
  111. ui/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
  112. ui/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
  113. ui/static/static/_next/static/chunks/5e04d10c4a7b58a3.js +1 -0
  114. ui/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
  115. ui/static/static/_next/static/chunks/75d88a058d8ffaa6.js +1 -0
  116. ui/static/static/_next/static/chunks/8c89634cf6bad76f.js +1 -0
  117. ui/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
  118. ui/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
  119. ui/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
  120. ui/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
  121. ui/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
  122. ui/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
  123. ui/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
  124. ui/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
  125. ui/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
  126. ui/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
  127. ui/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
  128. ui/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
  129. ui/static/static/_not-found/__next._full.txt +17 -0
  130. ui/static/static/_not-found/__next._head.txt +7 -0
  131. ui/static/static/_not-found/__next._index.txt +9 -0
  132. ui/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
  133. ui/static/static/_not-found/__next._not-found.txt +4 -0
  134. ui/static/static/_not-found/__next._tree.txt +2 -0
  135. ui/static/static/_not-found/index.html +1 -0
  136. ui/static/static/_not-found/index.txt +17 -0
  137. ui/static/static/contracts/__next._full.txt +21 -0
  138. ui/static/static/contracts/__next._head.txt +7 -0
  139. ui/static/static/contracts/__next._index.txt +9 -0
  140. ui/static/static/contracts/__next._tree.txt +2 -0
  141. ui/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
  142. ui/static/static/contracts/__next.contracts.txt +4 -0
  143. ui/static/static/contracts/index.html +1 -0
  144. ui/static/static/contracts/index.txt +21 -0
  145. ui/static/static/documentation/__next._full.txt +21 -0
  146. ui/static/static/documentation/__next._head.txt +7 -0
  147. ui/static/static/documentation/__next._index.txt +9 -0
  148. ui/static/static/documentation/__next._tree.txt +2 -0
  149. ui/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
  150. ui/static/static/documentation/__next.documentation.txt +4 -0
  151. ui/static/static/documentation/index.html +93 -0
  152. ui/static/static/documentation/index.txt +21 -0
  153. ui/static/static/index.html +1 -0
  154. ui/static/static/index.txt +30 -0
  155. ui/static/static/metadata/__next._full.txt +21 -0
  156. ui/static/static/metadata/__next._head.txt +7 -0
  157. ui/static/static/metadata/__next._index.txt +9 -0
  158. ui/static/static/metadata/__next._tree.txt +2 -0
  159. ui/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
  160. ui/static/static/metadata/__next.metadata.txt +4 -0
  161. ui/static/static/metadata/index.html +1 -0
  162. ui/static/static/metadata/index.txt +21 -0
  163. ui/static/static/quality/__next._full.txt +21 -0
  164. ui/static/static/quality/__next._head.txt +7 -0
  165. ui/static/static/quality/__next._index.txt +9 -0
  166. ui/static/static/quality/__next._tree.txt +2 -0
  167. ui/static/static/quality/__next.quality.__PAGE__.txt +9 -0
  168. ui/static/static/quality/__next.quality.txt +4 -0
  169. ui/static/static/quality/index.html +2 -0
  170. ui/static/static/quality/index.txt +21 -0
  171. ui/static/static/rules/__next._full.txt +21 -0
  172. ui/static/static/rules/__next._head.txt +7 -0
  173. ui/static/static/rules/__next._index.txt +9 -0
  174. ui/static/static/rules/__next._tree.txt +2 -0
  175. ui/static/static/rules/__next.rules.__PAGE__.txt +9 -0
  176. ui/static/static/rules/__next.rules.txt +4 -0
  177. ui/static/static/rules/index.html +1 -0
  178. ui/static/static/rules/index.txt +21 -0
  179. ui/static/static/schemas/__next._full.txt +21 -0
  180. ui/static/static/schemas/__next._head.txt +7 -0
  181. ui/static/static/schemas/__next._index.txt +9 -0
  182. ui/static/static/schemas/__next._tree.txt +2 -0
  183. ui/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
  184. ui/static/static/schemas/__next.schemas.txt +4 -0
  185. ui/static/static/schemas/index.html +1 -0
  186. ui/static/static/schemas/index.txt +21 -0
  187. ui/static/static/settings/__next._full.txt +21 -0
  188. ui/static/static/settings/__next._head.txt +7 -0
  189. ui/static/static/settings/__next._index.txt +9 -0
  190. ui/static/static/settings/__next._tree.txt +2 -0
  191. ui/static/static/settings/__next.settings.__PAGE__.txt +9 -0
  192. ui/static/static/settings/__next.settings.txt +4 -0
  193. ui/static/static/settings/index.html +1 -0
  194. ui/static/static/settings/index.txt +21 -0
  195. ui/static/static/validation/__next._full.txt +21 -0
  196. ui/static/static/validation/__next._head.txt +7 -0
  197. ui/static/static/validation/__next._index.txt +9 -0
  198. ui/static/static/validation/__next._tree.txt +2 -0
  199. ui/static/static/validation/__next.validation.__PAGE__.txt +9 -0
  200. ui/static/static/validation/__next.validation.txt +4 -0
  201. ui/static/static/validation/index.html +1 -0
  202. ui/static/static/validation/index.txt +21 -0
  203. ui/static/validation/__next._full.txt +2 -2
  204. ui/static/validation/__next._head.txt +1 -1
  205. ui/static/validation/__next._index.txt +2 -2
  206. ui/static/validation/__next._tree.txt +2 -2
  207. ui/static/validation/__next.validation.__PAGE__.txt +1 -1
  208. ui/static/validation/__next.validation.txt +1 -1
  209. ui/static/validation/index.html +1 -1
  210. ui/static/validation/index.txt +2 -2
  211. pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +0 -160
  212. pycharter-0.0.20.dist-info/RECORD +0 -247
  213. {pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/WHEEL +0 -0
  214. {pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/entry_points.txt +0 -0
  215. {pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/licenses/LICENSE +0 -0
  216. {pycharter-0.0.20.dist-info → pycharter-0.0.22.dist-info}/top_level.txt +0 -0
  217. /ui/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
  218. /ui/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
  219. /ui/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
  220. /ui/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
  221. /ui/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
  222. /ui/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
@@ -1,29 +1,32 @@
1
1
  """
2
- Generic ETL Orchestrator - Runtime ETL pipeline execution from contract artifacts.
2
+ ETL Orchestrator - Streaming ETL pipeline with simple operations, JSONata, and custom functions.
3
3
 
4
- This orchestrator reads contract artifacts (schema, coercion rules, validation rules)
5
- and ETL configuration files (extract, transform, load) and executes the ETL pipeline
6
- dynamically using streaming mode for memory-efficient processing.
4
+ Executes ETL pipelines: Extract Transform (Simple Operations JSONata → Custom Functions) → Load.
5
+
6
+ Transformation Pipeline:
7
+ 1. Simple Operations: rename, convert, defaults, add, select, drop (declarative, easy to use)
8
+ 2. JSONata: Powerful query language for complex transformations (full JSONata support)
9
+ 3. Custom Functions: Import and run external Python modules/functions
7
10
  """
8
11
 
9
12
  import asyncio
10
13
  import gc
14
+ import importlib
11
15
  import logging
16
+ import re
12
17
  import uuid
13
18
  import warnings
14
19
  from collections import Counter, defaultdict
15
- from datetime import date, datetime, timedelta
20
+ from datetime import datetime
16
21
  from pathlib import Path
17
22
  from typing import Any, AsyncIterator, Callable, Dict, List, Optional, Tuple
18
23
 
24
+ import jsonata
19
25
  import yaml
20
26
 
21
27
  from pycharter.contract_parser import ContractMetadata, parse_contract_file
22
28
  from pycharter.etl_generator.checkpoint import CheckpointManager
23
- from pycharter.etl_generator.database import (
24
- get_database_connection,
25
- load_data,
26
- )
29
+ from pycharter.etl_generator.database import get_database_connection, load_data
27
30
  from pycharter.etl_generator.dlq import DeadLetterQueue, DLQReason
28
31
  from pycharter.etl_generator.extraction import extract_with_pagination_streaming
29
32
  from pycharter.etl_generator.progress import ETLProgress, ProgressTracker
@@ -31,32 +34,14 @@ from pycharter.utils.value_injector import resolve_values
31
34
 
32
35
  logger = logging.getLogger(__name__)
33
36
 
34
- # Optional dependency for memory monitoring
37
+ # Optional memory monitoring
35
38
  try:
36
39
  import psutil
37
40
  PSUTIL_AVAILABLE = True
38
41
  except ImportError:
39
42
  PSUTIL_AVAILABLE = False
40
43
 
41
-
42
- # ============================================================================
43
- # CONSTANTS
44
- # ============================================================================
45
-
46
- COMPUTED_DATETIME_NOW = "@now"
47
- COMPUTED_DATETIME_UTC_NOW = "@utcnow"
48
- COMPUTED_WEEK_START = "@week_start"
49
- COMPUTED_WEEK_END = "@week_end"
50
44
  DEFAULT_BATCH_SIZE = 1000
51
- DEFAULT_MAX_DEPTH = 10
52
- DEFAULT_SEPARATOR = "_"
53
-
54
- # Datetime parsing formats (in order of preference)
55
- DATETIME_FORMATS = [
56
- '%Y-%m-%dT%H:%M:%S',
57
- '%Y-%m-%d %H:%M:%S',
58
- '%Y-%m-%d'
59
- ]
60
45
 
61
46
 
62
47
  class ETLOrchestrator:
@@ -473,526 +458,517 @@ class ETLOrchestrator:
473
458
  yield batch
474
459
 
475
460
  # ============================================================================
476
- # TRANSFORMATION
461
+ # TRANSFORMATION (Simple Operations → JSONata → Custom Functions)
477
462
  # ============================================================================
478
463
 
479
464
  def transform(self, raw_data: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
480
465
  """
481
- Transform extracted data according to transformation rules.
466
+ Transform data using simple operations, JSONata expressions, and/or custom Python functions.
482
467
 
483
- Transformation steps (in order):
484
- 1. Rename fields (with optional flattening)
485
- 2. Copy remaining fields (with optional flattening)
486
- 3. Add computed fields from kwargs
487
- 4. Apply type conversions
488
- 5. Apply fill_null rules
489
- 6. Drop specified fields
468
+ Pipeline order (applied sequentially):
469
+ 1. Simple operations (rename, select, drop, convert, defaults, add)
470
+ 2. JSONata transformation (if configured)
471
+ 3. Custom function execution (if configured)
490
472
 
491
473
  Args:
492
474
  raw_data: Raw data from extraction
493
- **kwargs: Additional transformation parameters
475
+ **kwargs: Additional parameters (passed to custom functions)
494
476
 
495
477
  Returns:
496
478
  Transformed data
479
+
480
+ Example - Simple operations:
481
+ transform_config:
482
+ rename:
483
+ oldName: new_name
484
+ camelCase: snake_case
485
+ select:
486
+ - field1
487
+ - field2
488
+ convert:
489
+ price: float
490
+ quantity: integer
491
+ defaults:
492
+ status: "pending"
493
+
494
+ Example - JSONata (advanced):
495
+ transform_config:
496
+ jsonata:
497
+ expression: |
498
+ $.{
499
+ "ticker": symbol,
500
+ "avg_price": $average(prices)
501
+ }
502
+
503
+ Example - Custom function:
504
+ transform_config:
505
+ custom_function:
506
+ module: "myproject.transforms"
507
+ function: "optimize_data"
508
+ mode: "batch"
497
509
  """
498
510
  if not self.transform_config:
499
511
  return raw_data
500
512
 
501
- # Extract transformation rules once
502
- transform_rules = self._extract_transform_rules()
503
-
504
- transformed_data = []
505
- for record in raw_data:
506
- transformed_record = self._transform_single_record(
507
- record, transform_rules, **kwargs
508
- )
509
- transformed_data.append(transformed_record)
510
-
511
- return transformed_data
512
-
513
- def _extract_transform_rules(self) -> Dict[str, Any]:
514
- """Extract and return all transformation rules from config."""
515
- return {
516
- 'rename': self.transform_config.get('rename', {}),
517
- 'flatten': self.transform_config.get('flatten', {}),
518
- 'type': self.transform_config.get('type', {}),
519
- 'fill_null': self.transform_config.get('fill_null', {}),
520
- 'drop': self.transform_config.get('drop', []),
521
- }
513
+ data = raw_data
514
+
515
+ # Step 1: Apply simple operations (in order)
516
+ # Support both new 'transform' key and legacy top-level keys for backward compatibility
517
+ simple_ops = {}
518
+
519
+ # New format: transform: { rename: {...}, select: [...] }
520
+ if 'transform' in self.transform_config:
521
+ simple_ops = self.transform_config.get('transform', {})
522
+
523
+ # Legacy format: rename: {...} at top level (for backward compatibility)
524
+ if 'rename' in self.transform_config and 'transform' not in self.transform_config:
525
+ simple_ops['rename'] = self.transform_config.get('rename')
526
+ if 'select' in self.transform_config and 'transform' not in self.transform_config:
527
+ simple_ops['select'] = self.transform_config.get('select')
528
+ if 'drop' in self.transform_config and 'transform' not in self.transform_config:
529
+ simple_ops['drop'] = self.transform_config.get('drop')
530
+ if 'convert' in self.transform_config and 'transform' not in self.transform_config:
531
+ simple_ops['convert'] = self.transform_config.get('convert')
532
+ if 'defaults' in self.transform_config and 'transform' not in self.transform_config:
533
+ simple_ops['defaults'] = self.transform_config.get('defaults')
534
+ if 'add' in self.transform_config and 'transform' not in self.transform_config:
535
+ simple_ops['add'] = self.transform_config.get('add')
536
+
537
+ if simple_ops:
538
+ data = self._apply_simple_operations(data, simple_ops)
539
+
540
+ # Step 2: Apply JSONata transformation (if configured)
541
+ jsonata_config = self.transform_config.get('jsonata')
542
+ if jsonata_config:
543
+ data = self._apply_jsonata(data, jsonata_config)
544
+
545
+ # Step 3: Apply custom function (if configured)
546
+ custom_func_config = self.transform_config.get('custom_function')
547
+ if custom_func_config:
548
+ data = self._apply_custom_function(data, custom_func_config, **kwargs)
549
+
550
+ return data
522
551
 
523
- def _transform_single_record(
552
+ def _apply_simple_operations(
524
553
  self,
525
- record: Dict[str, Any],
526
- transform_rules: Dict[str, Any],
527
- **kwargs
528
- ) -> Dict[str, Any]:
529
- """Transform a single record through all transformation steps."""
530
- rename_rules = transform_rules['rename']
531
- flatten_rules = transform_rules['flatten']
532
- type_rules = transform_rules['type']
533
- fill_null_rules = transform_rules['fill_null']
534
- drop_fields = transform_rules['drop']
535
-
536
- transformed_record = {}
537
-
538
- # Step 1: Apply rename transformations (with flattening if configured)
539
- transformed_record.update(
540
- self._apply_rename_transformations(record, rename_rules, flatten_rules)
541
- )
554
+ data: List[Dict[str, Any]],
555
+ config: Dict[str, Any]
556
+ ) -> List[Dict[str, Any]]:
557
+ """
558
+ Apply simple declarative transformation operations.
542
559
 
543
- # Step 2: Copy remaining fields (with flattening if configured)
544
- transformed_record.update(
545
- self._copy_remaining_fields(record, rename_rules, flatten_rules, drop_fields)
546
- )
560
+ Operations are applied in this order:
561
+ 1. rename - Rename fields (old_name: new_name)
562
+ 2. convert - Convert field types (field: type)
563
+ 3. defaults - Set default values for missing fields
564
+ 4. add - Add computed fields with expressions
565
+ 5. select - Keep only specified fields
566
+ 6. drop - Remove specified fields
547
567
 
548
- # Step 3: Add computed fields from kwargs (with rename rules applied)
549
- self._add_computed_fields(transformed_record, rename_rules, **kwargs)
568
+ Args:
569
+ data: Input data (list of records)
570
+ config: Simple operations configuration
550
571
 
551
- # Step 4: Apply type conversions
552
- self._apply_type_conversions(transformed_record, type_rules)
572
+ Returns:
573
+ Transformed data
553
574
 
554
- # Step 5: Apply fill_null rules
555
- self._apply_fill_null_rules(transformed_record, fill_null_rules)
575
+ Example config:
576
+ transform:
577
+ rename:
578
+ oldName: new_name
579
+ camelCase: snake_case
580
+ convert:
581
+ price: float
582
+ quantity: integer
583
+ active: boolean
584
+ defaults:
585
+ status: "pending"
586
+ priority: 0
587
+ add:
588
+ full_name: "${first_name} ${last_name}"
589
+ created_at: "now()"
590
+ record_id: "uuid()"
591
+ select:
592
+ - field1
593
+ - field2
594
+ drop:
595
+ - internal_id
596
+ - debug_info
597
+ """
598
+ if not data:
599
+ return data
600
+
601
+ result = []
602
+
603
+ # Get available fields from first record for validation
604
+ available_fields = set(data[0].keys()) if data else set()
605
+
606
+ # Step 1: Rename fields
607
+ rename_map = config.get('rename', {})
608
+ if rename_map:
609
+ # Validate rename mappings
610
+ missing_fields = [old for old in rename_map.keys() if old not in available_fields]
611
+ if missing_fields:
612
+ logger.warning(
613
+ f"Rename operation: Fields not found in data: {missing_fields}. "
614
+ f"Available fields: {sorted(available_fields)}"
615
+ )
556
616
 
557
- # Step 6: Drop specified fields
558
- self._drop_fields(transformed_record, drop_fields)
617
+ # Step 2: Convert types
618
+ convert_map = config.get('convert', {})
559
619
 
560
- return transformed_record
561
-
562
- def _apply_rename_transformations(
563
- self,
564
- record: Dict[str, Any],
565
- rename_rules: Dict[str, str],
566
- flatten_rules: Dict[str, Any]
567
- ) -> Dict[str, Any]:
568
- """Apply rename transformations, handling flattening if configured."""
569
- transformed = {}
570
-
571
- for source_field, target_field in rename_rules.items():
572
- if source_field in record:
573
- value = record[source_field]
574
- flattened = self._maybe_flatten_field(source_field, value, flatten_rules)
575
- if flattened is not None:
576
- transformed.update(flattened)
577
- else:
578
- transformed[target_field] = value
579
- elif target_field in record:
580
- transformed[target_field] = record[target_field]
620
+ # Step 3: Defaults
621
+ defaults_map = config.get('defaults', {})
581
622
 
582
- return transformed
583
-
584
- def _copy_remaining_fields(
585
- self,
586
- record: Dict[str, Any],
587
- rename_rules: Dict[str, str],
588
- flatten_rules: Dict[str, Any],
589
- drop_fields: List[str]
590
- ) -> Dict[str, Any]:
591
- """Copy remaining fields not in rename rules, handling flattening if configured."""
592
- transformed = {}
593
-
594
- for key, value in record.items():
595
- if key not in rename_rules and key not in transformed:
596
- if key not in drop_fields:
597
- flattened = self._maybe_flatten_field(key, value, flatten_rules)
598
- if flattened is not None:
599
- transformed.update(flattened)
600
- else:
601
- transformed[key] = value
623
+ # Step 4: Add computed fields
624
+ add_map = config.get('add', {})
625
+
626
+ # Step 5: Select fields (keep only these)
627
+ select_fields = config.get('select')
628
+
629
+ # Step 6: Drop fields (remove these)
630
+ drop_fields = set(config.get('drop', []))
602
631
 
603
- return transformed
632
+ for record in data:
633
+ transformed = dict(record)
634
+
635
+ # 1. Rename
636
+ if rename_map:
637
+ for old_name, new_name in rename_map.items():
638
+ if old_name in transformed:
639
+ transformed[new_name] = transformed.pop(old_name)
640
+
641
+ # 2. Convert types
642
+ if convert_map:
643
+ for field_name, target_type in convert_map.items():
644
+ if field_name in transformed:
645
+ try:
646
+ transformed[field_name] = self._convert_type(
647
+ transformed[field_name], target_type
648
+ )
649
+ except (ValueError, TypeError) as e:
650
+ logger.warning(
651
+ f"Failed to convert field '{field_name}' to {target_type}: {e}. "
652
+ f"Keeping original value."
653
+ )
654
+
655
+ # 3. Apply defaults
656
+ if defaults_map:
657
+ for field_name, default_value in defaults_map.items():
658
+ if field_name not in transformed or transformed[field_name] is None:
659
+ transformed[field_name] = default_value
660
+
661
+ # 4. Add computed fields
662
+ if add_map:
663
+ for field_name, expression in add_map.items():
664
+ try:
665
+ transformed[field_name] = self._evaluate_expression(
666
+ expression, transformed
667
+ )
668
+ except Exception as e:
669
+ logger.warning(
670
+ f"Failed to compute field '{field_name}': {e}. "
671
+ f"Skipping this field."
672
+ )
673
+
674
+ # 5. Select (keep only specified fields)
675
+ if select_fields:
676
+ transformed = {
677
+ k: v for k, v in transformed.items()
678
+ if k in select_fields
679
+ }
680
+
681
+ # 6. Drop (remove specified fields)
682
+ if drop_fields:
683
+ transformed = {
684
+ k: v for k, v in transformed.items()
685
+ if k not in drop_fields
686
+ }
687
+
688
+ result.append(transformed)
689
+
690
+ return result
604
691
 
605
- def _maybe_flatten_field(
606
- self,
607
- field_name: str,
608
- value: Any,
609
- flatten_rules: Dict[str, Any]
610
- ) -> Optional[Dict[str, Any]]:
692
+ def _convert_type(self, value: Any, target_type: str) -> Any:
611
693
  """
612
- Flatten a field if it's configured for flattening, otherwise return None.
694
+ Convert a value to the specified type.
695
+
696
+ Args:
697
+ value: Value to convert
698
+ target_type: Target type (string, integer, float, boolean, datetime, date)
613
699
 
614
700
  Returns:
615
- Flattened dictionary if field should be flattened, None otherwise
701
+ Converted value
616
702
  """
617
- if field_name not in flatten_rules:
618
- return None
619
-
620
- flatten_config = flatten_rules[field_name]
621
- if not flatten_config.get('enabled', True):
703
+ if value is None:
622
704
  return None
623
705
 
624
- if isinstance(value, dict):
625
- return self._flatten_nested_object(value, field_name, flatten_config)
626
- elif isinstance(value, list):
627
- return self._flatten_array(value, field_name, flatten_config)
706
+ target_type_lower = target_type.lower().strip()
628
707
 
629
- return None
708
+ if target_type_lower in ('str', 'string'):
709
+ return str(value)
710
+ elif target_type_lower in ('int', 'integer'):
711
+ if isinstance(value, str):
712
+ # Try to parse as float first (handles "1.0" -> 1)
713
+ try:
714
+ return int(float(value))
715
+ except ValueError:
716
+ return int(value)
717
+ return int(value)
718
+ elif target_type_lower in ('float', 'number', 'numeric'):
719
+ if isinstance(value, str):
720
+ return float(value)
721
+ return float(value)
722
+ elif target_type_lower in ('bool', 'boolean'):
723
+ if isinstance(value, str):
724
+ return value.lower() in ('true', '1', 'yes', 'on')
725
+ return bool(value)
726
+ elif target_type_lower == 'datetime':
727
+ from datetime import datetime
728
+ if isinstance(value, str):
729
+ # Try common datetime formats
730
+ for fmt in [
731
+ '%Y-%m-%dT%H:%M:%S',
732
+ '%Y-%m-%dT%H:%M:%S.%f',
733
+ '%Y-%m-%dT%H:%M:%SZ',
734
+ '%Y-%m-%dT%H:%M:%S.%fZ',
735
+ '%Y-%m-%d %H:%M:%S',
736
+ '%Y-%m-%d %H:%M:%S.%f',
737
+ ]:
738
+ try:
739
+ return datetime.strptime(value, fmt)
740
+ except ValueError:
741
+ continue
742
+ raise ValueError(f"Cannot parse datetime: {value}")
743
+ return value
744
+ elif target_type_lower == 'date':
745
+ from datetime import date, datetime
746
+ if isinstance(value, str):
747
+ # Try common date formats
748
+ for fmt in ['%Y-%m-%d', '%Y/%m/%d', '%m/%d/%Y']:
749
+ try:
750
+ dt = datetime.strptime(value, fmt)
751
+ return dt.date()
752
+ except ValueError:
753
+ continue
754
+ raise ValueError(f"Cannot parse date: {value}")
755
+ elif isinstance(value, datetime):
756
+ return value.date()
757
+ return value
758
+ else:
759
+ raise ValueError(f"Unsupported target type: {target_type}")
630
760
 
631
- def _add_computed_fields(
632
- self,
633
- transformed_record: Dict[str, Any],
634
- rename_rules: Dict[str, str],
635
- **kwargs
636
- ) -> None:
761
+ def _evaluate_expression(self, expression: str, record: Dict[str, Any]) -> Any:
637
762
  """
638
- Add computed fields from kwargs to transformed records.
639
-
640
- This method adds input parameters (defined in input_params) to transformed records,
641
- overriding any values from the API response. It also applies rename rules to input
642
- parameters, allowing them to be stored with different names (e.g., 'type' -> 'direction').
763
+ Evaluate a simple expression in the context of a record.
643
764
 
644
- This ensures that:
645
- - Request parameters (e.g., symbol, period) always use the input values
646
- - Input parameters take precedence over API response values for consistency
647
- - Missing parameters are added if not present in the API response
648
- - Input parameters can be renamed according to transform rules
765
+ Supports:
766
+ - Field references: "${field_name}"
767
+ - String concatenation: "${field1} ${field2}"
768
+ - Simple functions: "now()", "uuid()"
769
+ - Literal values (if no placeholders)
649
770
 
650
771
  Args:
651
- transformed_record: The record being transformed (modified in place)
652
- rename_rules: Dictionary mapping source field names to target field names
653
- **kwargs: Input parameters passed to the pipeline
654
- """
655
- # Always override input parameters from kwargs to ensure consistency
656
- # This ensures that request parameters (like symbol, period) always use
657
- # the values from the input parameters, not from the API response
658
- for param_name in self.input_params.keys():
659
- if param_name in kwargs:
660
- # Apply rename rule if one exists for this parameter
661
- # This allows input parameters to be stored with different names
662
- # (e.g., 'type' parameter -> 'direction' field)
663
- target_name = rename_rules.get(param_name, param_name)
664
- transformed_record[target_name] = kwargs[param_name]
665
-
666
- def _apply_type_conversions(
667
- self,
668
- transformed_record: Dict[str, Any],
669
- type_rules: Dict[str, str]
670
- ) -> None:
671
- """Apply type conversions to fields."""
672
- for field, field_type in type_rules.items():
673
- if field in transformed_record:
674
- transformed_record[field] = self._convert_type(
675
- transformed_record[field], field_type
676
- )
677
-
678
- def _apply_fill_null_rules(
679
- self,
680
- transformed_record: Dict[str, Any],
681
- fill_null_rules: Dict[str, Any]
682
- ) -> None:
683
- """Apply fill_null rules, handling computed datetime values."""
684
- for field, config in fill_null_rules.items():
685
- if field not in transformed_record or transformed_record[field] is None:
686
- default_value = config.get('default') if isinstance(config, dict) else config
687
- computed_value = self._compute_datetime_value(default_value)
688
- transformed_record[field] = computed_value if computed_value is not None else default_value
689
-
690
- def _compute_datetime_value(self, value: Any) -> Optional[datetime]:
691
- """
692
- Compute datetime value from special constants.
772
+ expression: Expression string
773
+ record: Record dictionary for context
693
774
 
694
- Args:
695
- value: Value to check (may be a computed datetime constant)
696
-
697
775
  Returns:
698
- Computed datetime if value is a computed constant, None otherwise
776
+ Evaluated result
777
+
778
+ Examples:
779
+ "${first_name} ${last_name}" -> "John Doe"
780
+ "now()" -> "2024-01-01T12:00:00"
781
+ "uuid()" -> "123e4567-e89b-12d3-a456-426614174000"
782
+ "static_value" -> "static_value"
699
783
  """
700
- if value == COMPUTED_DATETIME_NOW:
701
- return datetime.now()
702
- elif value == COMPUTED_DATETIME_UTC_NOW:
703
- return datetime.utcnow()
704
- elif value == COMPUTED_WEEK_START:
705
- return self._get_week_start()
706
- elif value == COMPUTED_WEEK_END:
707
- return self._get_week_end()
708
- return None
709
-
710
- def _get_week_start(self) -> datetime:
711
- """Calculate Monday of current week (00:00:00 UTC)."""
712
- now = datetime.utcnow()
713
- days_since_monday = now.weekday()
714
- week_start = now - timedelta(days=days_since_monday)
715
- return week_start.replace(hour=0, minute=0, second=0, microsecond=0)
716
-
717
- def _get_week_end(self) -> datetime:
718
- """Calculate Sunday of current week (23:59:59.999999 UTC)."""
719
- week_start = self._get_week_start()
720
- return week_start + timedelta(days=6, hours=23, minutes=59, seconds=59, microseconds=999999)
721
-
722
- def _drop_fields(self, transformed_record: Dict[str, Any], drop_fields: List[str]) -> None:
723
- """Remove specified fields from the record."""
724
- for field in drop_fields:
725
- transformed_record.pop(field, None)
784
+ if not isinstance(expression, str):
785
+ return expression
786
+
787
+ expression = expression.strip()
788
+
789
+ # Handle special functions
790
+ if expression == 'now()':
791
+ return datetime.now().isoformat()
792
+ elif expression == 'uuid()':
793
+ return str(uuid.uuid4())
794
+
795
+ # Handle field references and string interpolation
796
+ try:
797
+ # Simple string interpolation: "${field1} ${field2}"
798
+ result = expression
799
+ placeholders_found = False
800
+
801
+ # Find all ${...} placeholders
802
+ placeholder_pattern = r'\$\{([^}]+)\}'
803
+ matches = re.findall(placeholder_pattern, expression)
804
+
805
+ if matches:
806
+ placeholders_found = True
807
+ for field_name in matches:
808
+ if field_name in record:
809
+ value = record[field_name]
810
+ placeholder = f"${{{field_name}}}"
811
+ result = result.replace(placeholder, str(value) if value is not None else '')
812
+ else:
813
+ logger.warning(
814
+ f"Expression '{expression}': Field '{field_name}' not found in record. "
815
+ f"Available fields: {sorted(record.keys())}"
816
+ )
817
+ # Replace with empty string if field not found
818
+ placeholder = f"${{{field_name}}}"
819
+ result = result.replace(placeholder, '')
820
+
821
+ # If no placeholders were found and it's not a function, return as literal
822
+ if not placeholders_found and not expression.endswith('()'):
823
+ return expression
824
+
825
+ return result
826
+ except Exception as e:
827
+ raise ValueError(f"Failed to evaluate expression '{expression}': {e}") from e
726
828
 
727
- def _flatten_nested_object(
728
- self,
729
- nested_obj: Dict[str, Any],
730
- field_name: str,
731
- config: Dict[str, Any],
732
- depth: int = 0,
733
- max_depth: int = 10
734
- ) -> Dict[str, Any]:
829
+ def _apply_jsonata(
830
+ self,
831
+ data: List[Dict[str, Any]],
832
+ config: Dict[str, Any]
833
+ ) -> List[Dict[str, Any]]:
735
834
  """
736
- Flatten a nested object according to configuration.
737
- Supports simple key mapping, recursive flattening, and prefix patterns.
835
+ Apply JSONata expression to transform data.
738
836
 
739
837
  Args:
740
- nested_obj: The nested dictionary to flatten
741
- field_name: Name of the field containing the nested object
742
- config: Flatten configuration from transform.yaml
743
- depth: Current recursion depth
744
- max_depth: Maximum recursion depth to prevent infinite loops
838
+ data: Input data (list of records)
839
+ config: JSONata configuration with 'expression' and optional 'mode'
745
840
 
746
841
  Returns:
747
- Dictionary with flattened keys
842
+ Transformed data
843
+
844
+ Example config:
845
+ jsonata:
846
+ expression: |
847
+ $.{
848
+ "ticker": symbol,
849
+ "avg_price": $average(prices),
850
+ "total_volume": $sum(volumes)
851
+ }
852
+ mode: "batch" # or "record"
748
853
  """
749
- if not isinstance(nested_obj, dict):
750
- return {field_name: nested_obj}
751
-
752
- flattened = {}
753
- strategy = config.get('strategy', 'simple') # simple, recursive
754
- separator = config.get('separator', DEFAULT_SEPARATOR)
755
- max_depth_config = config.get('max_depth', max_depth)
756
- key_mapping = config.get('key_mapping', {})
757
- prefix = config.get('prefix', '')
758
-
759
- if depth >= max_depth_config:
760
- # Prevent infinite recursion
761
- return {field_name: nested_obj}
762
-
763
- if strategy == 'recursive':
764
- # Recursively flatten all nested objects
765
- for key, value in nested_obj.items():
766
- if isinstance(value, dict):
767
- # Recursively flatten nested dict
768
- nested_flattened = self._flatten_nested_object(
769
- value,
770
- f"{field_name}{separator}{key}",
771
- config,
772
- depth + 1,
773
- max_depth_config
774
- )
775
- flattened.update(nested_flattened)
776
- elif isinstance(value, list):
777
- # Handle arrays
778
- array_config = config.get('array_fields', [])
779
- flatten_arrays = config.get('flatten_arrays', False)
780
- if key in array_config or flatten_arrays:
781
- # Flatten array items
782
- for idx, item in enumerate(value):
783
- if isinstance(item, dict):
784
- item_flattened = self._flatten_nested_object(
785
- item,
786
- f"{field_name}{separator}{key}{separator}{idx}",
787
- config,
788
- depth + 1,
789
- max_depth_config
790
- )
791
- flattened.update(item_flattened)
792
- else:
793
- flattened[f"{field_name}{separator}{key}{separator}{idx}"] = item
794
- else:
795
- # Keep array as-is
796
- flattened[f"{field_name}{separator}{key}"] = value
797
- else:
798
- # Simple value
799
- flattened[f"{field_name}{separator}{key}"] = value
800
- else: # strategy == 'simple' (default)
801
- # Simple flattening with key mapping or prefix
802
- for nested_key, nested_value in nested_obj.items():
803
- if isinstance(nested_value, dict):
804
- # Nested dict - recursively flatten if recursive is enabled
805
- if config.get('recursive', False):
806
- nested_flattened = self._flatten_nested_object(
807
- nested_value,
808
- f"{field_name}{separator}{nested_key}",
809
- config,
810
- depth + 1,
811
- max_depth_config
812
- )
813
- flattened.update(nested_flattened)
814
- else:
815
- # Keep as nested or use prefix
816
- if prefix:
817
- flattened[f"{prefix}{nested_key}"] = nested_value
818
- else:
819
- flattened[f"{field_name}{separator}{nested_key}"] = nested_value
820
- elif isinstance(nested_value, list):
821
- # Array - handle if configured
822
- if config.get('flatten_arrays', False):
823
- for idx, item in enumerate(nested_value):
824
- if isinstance(item, dict):
825
- item_flattened = self._flatten_nested_object(
826
- item,
827
- f"{field_name}{separator}{nested_key}{separator}{idx}",
828
- config,
829
- depth + 1,
830
- max_depth_config
831
- )
832
- flattened.update(item_flattened)
833
- else:
834
- flattened[f"{field_name}{separator}{nested_key}{separator}{idx}"] = item
835
- else:
836
- # Keep array as-is
837
- if key_mapping and nested_key in key_mapping:
838
- flattened_key = key_mapping[nested_key]
839
- elif prefix:
840
- flattened_key = f"{prefix}{nested_key}"
841
- else:
842
- flattened_key = f"{field_name}{separator}{nested_key}"
843
- flattened[flattened_key] = nested_value
844
- else:
845
- # Simple value - use key mapping, prefix, or default
846
- if key_mapping and nested_key in key_mapping:
847
- # Use mapped key directly for simple strategy
848
- # For array flattening, field_name will already include index (e.g., "orders_0")
849
- mapped_key = key_mapping[nested_key]
850
- # Only add field_name prefix if we're in array context (field_name contains separator)
851
- if field_name and separator in field_name:
852
- # This is from array flattening - preserve the indexed prefix
853
- flattened_key = f"{field_name}{separator}{mapped_key}"
854
- else:
855
- # Simple strategy - use mapped key directly (no prefix)
856
- flattened_key = mapped_key
857
- elif prefix:
858
- flattened_key = f"{prefix}{nested_key}"
859
- else:
860
- flattened_key = f"{field_name}{separator}{nested_key}"
861
- flattened[flattened_key] = nested_value
854
+ expression_str = config.get('expression')
855
+ if not expression_str:
856
+ return data
857
+
858
+ mode = config.get('mode', 'batch')
862
859
 
863
- return flattened
860
+ try:
861
+ expr = jsonata.Jsonata(expression_str)
862
+
863
+ if mode == 'batch':
864
+ # Apply expression to entire dataset
865
+ result = expr.evaluate(data)
866
+ if result is None:
867
+ return []
868
+ return result if isinstance(result, list) else [result]
869
+ else:
870
+ # Apply expression to each record individually
871
+ return [expr.evaluate(record) for record in data if expr.evaluate(record) is not None]
872
+
873
+ except Exception as e:
874
+ logger.error(f"JSONata transformation failed: {e}")
875
+ raise ValueError(f"JSONata transformation error: {e}") from e
864
876
 
865
- def _flatten_array(
877
+ def _apply_custom_function(
866
878
  self,
867
- array_obj: List[Any],
868
- field_name: str,
879
+ data: List[Dict[str, Any]],
869
880
  config: Dict[str, Any],
870
- depth: int = 0,
871
- max_depth: int = 10
872
- ) -> Dict[str, Any]:
881
+ **kwargs
882
+ ) -> List[Dict[str, Any]]:
873
883
  """
874
- Flatten an array of nested objects according to configuration.
884
+ Execute a custom Python function for transformation.
875
885
 
876
886
  Args:
877
- array_obj: The array to flatten
878
- field_name: Name of the field containing the array
879
- config: Flatten configuration from transform.yaml
880
- depth: Current recursion depth
881
- max_depth: Maximum recursion depth
887
+ data: Input data
888
+ config: Custom function configuration
889
+ **kwargs: Additional parameters passed to the function
882
890
 
883
891
  Returns:
884
- Dictionary with flattened keys
885
- """
886
- if not isinstance(array_obj, list):
887
- return {field_name: array_obj}
888
-
889
- flattened = {}
890
- strategy = config.get('strategy', 'array_flatten')
891
- separator = config.get('separator', DEFAULT_SEPARATOR)
892
- max_depth_config = config.get('max_depth', max_depth)
893
- item_flatten = config.get('item_flatten', {})
894
- aggregate = config.get('aggregate', False) # If True, aggregate all items into single keys
895
-
896
- if depth >= max_depth_config:
897
- return {field_name: array_obj}
898
-
899
- if strategy == 'array_flatten' or strategy == 'simple':
900
- # Flatten each item in the array
901
- for idx, item in enumerate(array_obj):
902
- if isinstance(item, dict):
903
- # Use item_flatten config if provided, otherwise use parent config
904
- item_config = item_flatten if item_flatten else config
905
-
906
- if aggregate:
907
- # Aggregate: all items contribute to same keys (last wins or merge)
908
- item_flattened = self._flatten_nested_object(
909
- item,
910
- field_name, # Same base name for all items
911
- item_config,
912
- depth + 1,
913
- max_depth_config
914
- )
915
- # Merge or overwrite (last item wins)
916
- flattened.update(item_flattened)
917
- else:
918
- # Indexed: each item gets its own keys with index
919
- # Create a new config that uses the indexed field name as base
920
- indexed_config = item_config.copy()
921
- indexed_config['_base_field'] = f"{field_name}{separator}{idx}"
922
-
923
- item_flattened = self._flatten_nested_object(
924
- item,
925
- f"{field_name}{separator}{idx}",
926
- indexed_config,
927
- depth + 1,
928
- max_depth_config
929
- )
930
- flattened.update(item_flattened)
931
- else:
932
- # Simple value in array
933
- if aggregate:
934
- # For aggregate, use field name directly (last value wins)
935
- flattened[field_name] = item
936
- else:
937
- flattened[f"{field_name}{separator}{idx}"] = item
938
- else:
939
- # Keep array as-is
940
- flattened[field_name] = array_obj
892
+ Transformed data
941
893
 
942
- return flattened
943
-
944
- def _convert_type(self, value: Any, target_type: str) -> Any:
945
- """Convert value to target type."""
946
- if value is None:
947
- return None
894
+ Example config:
895
+ custom_function:
896
+ module: "pyoptima"
897
+ function: "optimize_from_etl_inputs"
898
+ mode: "batch"
899
+ kwargs:
900
+ method: "min_volatility"
901
+ solver: "ipopt"
902
+
903
+ Alternative config (using callable path):
904
+ custom_function:
905
+ callable: "myproject.transforms.optimize_portfolio"
906
+ mode: "batch"
907
+ """
908
+ # Get module and function
909
+ callable_path = config.get('callable')
910
+ module_path = config.get('module')
911
+ func_name = config.get('function')
912
+
913
+ if callable_path:
914
+ # Parse "module.submodule.function" format
915
+ parts = callable_path.rsplit('.', 1)
916
+ if len(parts) != 2:
917
+ raise ValueError(f"Invalid callable path: {callable_path}. Use 'module.function' format.")
918
+ module_path, func_name = parts
919
+
920
+ if not module_path or not func_name:
921
+ raise ValueError("custom_function requires either 'callable' or 'module' + 'function'")
922
+
923
+ # Dynamic import
924
+ try:
925
+ module = importlib.import_module(module_path)
926
+ func = getattr(module, func_name)
927
+ except ImportError as e:
928
+ raise ValueError(f"Cannot import module '{module_path}': {e}") from e
929
+ except AttributeError as e:
930
+ raise ValueError(f"Function '{func_name}' not found in module '{module_path}'") from e
931
+
932
+ # Handle class-based methods (e.g., pyoptima optimization methods)
933
+ if isinstance(func, type):
934
+ instance = func()
935
+ if hasattr(instance, 'optimize'):
936
+ func = instance.optimize
937
+ elif hasattr(instance, 'run'):
938
+ func = instance.run
939
+ elif hasattr(instance, '__call__'):
940
+ func = instance
941
+ else:
942
+ raise ValueError(f"Class '{func_name}' has no 'optimize', 'run', or '__call__' method")
948
943
 
949
- type_map = {
950
- 'string': str,
951
- 'integer': int,
952
- 'int': int,
953
- 'float': float,
954
- 'double': float,
955
- 'boolean': bool,
956
- 'bool': bool,
957
- 'datetime': self._parse_datetime,
958
- 'timestamp': self._parse_datetime,
959
- 'date': self._parse_date,
960
- }
944
+ # Get mode and kwargs
945
+ mode = config.get('mode', 'batch')
946
+ func_kwargs = config.get('kwargs', {})
961
947
 
962
- converter = type_map.get(target_type.lower())
963
- if converter:
964
- try:
965
- # Converter is either a type (callable) or a method (also callable)
966
- return converter(value)
967
- except (ValueError, TypeError):
968
- return value
948
+ # Merge with runtime kwargs
949
+ merged_kwargs = {**func_kwargs, **kwargs}
969
950
 
970
- return value
971
-
972
- def _parse_datetime(self, value: Any) -> Any:
973
- """Parse datetime value from string or return datetime object."""
974
- if isinstance(value, datetime):
975
- return value
976
- if isinstance(value, str):
977
- for fmt in DATETIME_FORMATS:
978
- try:
979
- return datetime.strptime(value, fmt)
980
- except ValueError:
981
- continue
982
- return value
983
-
984
- def _parse_date(self, value: Any) -> Any:
985
- """Parse date value."""
986
- if isinstance(value, date):
987
- return value
988
- if isinstance(value, datetime):
989
- return value.date()
990
- if isinstance(value, str):
991
- try:
992
- return datetime.strptime(value, '%Y-%m-%d').date()
993
- except ValueError:
994
- pass
995
- return value
951
+ try:
952
+ if mode == 'batch':
953
+ result = func(data, **merged_kwargs)
954
+ if result is None:
955
+ return []
956
+ return result if isinstance(result, list) else [result]
957
+ else:
958
+ # Record mode
959
+ results = []
960
+ for record in data:
961
+ record_result = func(record, **merged_kwargs)
962
+ if record_result is not None:
963
+ if isinstance(record_result, list):
964
+ results.extend(record_result)
965
+ else:
966
+ results.append(record_result)
967
+ return results
968
+
969
+ except Exception as e:
970
+ logger.error(f"Custom function '{func_name}' failed: {e}")
971
+ raise ValueError(f"Custom function error: {e}") from e
996
972
 
997
973
  # ============================================================================
998
974
  # LOADING