informatica-python 1.4.2__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.4.2 → informatica_python-1.5.0}/PKG-INFO +1 -1
  2. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/cli.py +5 -0
  4. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/helper_gen.py +81 -7
  5. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/mapping_gen.py +79 -80
  6. informatica_python-1.5.0/informatica_python/utils/expression_converter.py +437 -0
  7. informatica_python-1.5.0/informatica_python/utils/lib_adapters.py +164 -0
  8. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/PKG-INFO +1 -1
  9. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/SOURCES.txt +3 -1
  10. {informatica_python-1.4.2 → informatica_python-1.5.0}/pyproject.toml +1 -1
  11. informatica_python-1.5.0/tests/test_integration.py +518 -0
  12. informatica_python-1.4.2/informatica_python/utils/expression_converter.py +0 -264
  13. {informatica_python-1.4.2 → informatica_python-1.5.0}/LICENSE +0 -0
  14. {informatica_python-1.4.2 → informatica_python-1.5.0}/README.md +0 -0
  15. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/converter.py +0 -0
  16. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/__init__.py +0 -0
  17. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/config_gen.py +0 -0
  18. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/error_log_gen.py +0 -0
  19. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/sql_gen.py +0 -0
  20. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/workflow_gen.py +0 -0
  21. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/models.py +0 -0
  22. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/parser.py +0 -0
  23. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/utils/__init__.py +0 -0
  24. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/utils/datatype_map.py +0 -0
  25. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.4.2 → informatica_python-1.5.0}/setup.cfg +0 -0
  30. {informatica_python-1.4.2 → informatica_python-1.5.0}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.4.2
3
+ Version: 1.5.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.4.2"
10
+ __version__ = "1.5.0"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -41,6 +41,11 @@ def main():
41
41
  default=None,
42
42
  help="Save parsed JSON to a file",
43
43
  )
44
+ parser.add_argument(
45
+ "--param-file",
46
+ default=None,
47
+ help="Path to Informatica .param file for variable substitution",
48
+ )
44
49
 
45
50
  args = parser.parse_args()
46
51
 
@@ -44,13 +44,20 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
44
44
  lines.append('logger = logging.getLogger("informatica_converter")')
45
45
  lines.append("")
46
46
  lines.append("")
47
- lines.append("def load_config(config_path='config.yml'):")
48
- lines.append(' """Load configuration from YAML file."""')
47
+ lines.append("def load_config(config_path='config.yml', param_file=None):")
48
+ lines.append(' """Load configuration from YAML file, optionally merging Informatica .param file."""')
49
49
  lines.append(" with open(config_path, 'r') as f:")
50
- lines.append(" return yaml.safe_load(f)")
50
+ lines.append(" config = yaml.safe_load(f) or {}")
51
+ lines.append(" if param_file:")
52
+ lines.append(" params = parse_param_file(param_file)")
53
+ lines.append(" config['params'] = params")
54
+ lines.append(" for key, val in params.items():")
55
+ lines.append(" os.environ[f'INFA_VAR_{key}'] = str(val)")
56
+ lines.append(" return config")
51
57
  lines.append("")
52
58
  lines.append("")
53
59
 
60
+ _add_param_file_functions(lines)
54
61
  _add_db_functions(lines, data_lib)
55
62
  _add_file_functions(lines, data_lib)
56
63
  _add_expression_helpers(lines)
@@ -59,6 +66,61 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
59
66
  return "\n".join(lines)
60
67
 
61
68
 
69
+ def _add_param_file_functions(lines):
70
+ lines.append("# ============================================================")
71
+ lines.append("# Informatica Parameter File Support")
72
+ lines.append("# ============================================================")
73
+ lines.append("")
74
+ lines.append("")
75
+ lines.append("def parse_param_file(param_path):")
76
+ lines.append(' """')
77
+ lines.append(" Parse an Informatica .param file into a flat dict of variable names to values.")
78
+ lines.append(" Supports standard Informatica parameter file format:")
79
+ lines.append(" [Global]")
80
+ lines.append(" $$VAR_NAME=value")
81
+ lines.append(" [folder_name.WF:workflow_name.ST:session_name]")
82
+ lines.append(" $$CONN_NAME=value")
83
+ lines.append(' """')
84
+ lines.append(" params = {}")
85
+ lines.append(" if not os.path.exists(param_path):")
86
+ lines.append(" logger.warning(f'Parameter file not found: {param_path}')")
87
+ lines.append(" return params")
88
+ lines.append("")
89
+ lines.append(" current_section = 'Global'")
90
+ lines.append(" with open(param_path, 'r') as f:")
91
+ lines.append(" for line_num, line in enumerate(f, 1):")
92
+ lines.append(" line = line.strip()")
93
+ lines.append(" if not line or line.startswith('#'):")
94
+ lines.append(" continue")
95
+ lines.append(" if line.startswith('[') and line.endswith(']'):")
96
+ lines.append(" current_section = line[1:-1].strip()")
97
+ lines.append(" continue")
98
+ lines.append(" if '=' in line:")
99
+ lines.append(" key, _, value = line.partition('=')")
100
+ lines.append(" key = key.strip()")
101
+ lines.append(" value = value.strip()")
102
+ lines.append(" clean_key = key.lstrip('$')")
103
+ lines.append(" params[clean_key] = value")
104
+ lines.append(" if current_section != 'Global':")
105
+ lines.append(" params[f'{current_section}.{clean_key}'] = value")
106
+ lines.append(" logger.info(f'Loaded {len(params)} parameters from {param_path}')")
107
+ lines.append(" return params")
108
+ lines.append("")
109
+ lines.append("")
110
+ lines.append("def get_param(config, var_name, default=''):")
111
+ lines.append(' """Get a parameter value from config params, then env vars, then default."""')
112
+ lines.append(" clean = var_name.lstrip('$')")
113
+ lines.append(" params = config.get('params', {})")
114
+ lines.append(" if clean in params:")
115
+ lines.append(" return params[clean]")
116
+ lines.append(" env_val = os.environ.get(f'INFA_VAR_{clean}')")
117
+ lines.append(" if env_val is not None:")
118
+ lines.append(" return env_val")
119
+ lines.append(" return default")
120
+ lines.append("")
121
+ lines.append("")
122
+
123
+
62
124
  def _add_db_functions(lines, data_lib):
63
125
  lines.append("# ============================================================")
64
126
  lines.append("# Database Operations")
@@ -1060,14 +1122,26 @@ def _add_expression_helpers(lines):
1060
1122
  lines.append(" return None")
1061
1123
  lines.append("")
1062
1124
  lines.append("")
1063
- lines.append("def get_variable(var_name):")
1064
- lines.append(' """Get workflow/mapping variable value."""')
1065
- lines.append(" return os.environ.get(f'INFA_VAR_{var_name}', '')")
1125
+ lines.append("_param_store = {}")
1126
+ lines.append("")
1127
+ lines.append("")
1128
+ lines.append("def get_variable(var_name, config=None):")
1129
+ lines.append(' """Get workflow/mapping variable value from params, env vars, or param store."""')
1130
+ lines.append(" clean = var_name.lstrip('$')")
1131
+ lines.append(" if config and 'params' in config:")
1132
+ lines.append(" val = config['params'].get(clean)")
1133
+ lines.append(" if val is not None:")
1134
+ lines.append(" return val")
1135
+ lines.append(" if clean in _param_store:")
1136
+ lines.append(" return _param_store[clean]")
1137
+ lines.append(" return os.environ.get(f'INFA_VAR_{clean}', '')")
1066
1138
  lines.append("")
1067
1139
  lines.append("")
1068
1140
  lines.append("def set_variable(var_name, value):")
1069
1141
  lines.append(' """Set workflow/mapping variable value."""')
1070
- lines.append(" os.environ[f'INFA_VAR_{var_name}'] = str(value)")
1142
+ lines.append(" clean = var_name.lstrip('$')")
1143
+ lines.append(" _param_store[clean] = value")
1144
+ lines.append(" os.environ[f'INFA_VAR_{clean}'] = str(value)")
1071
1145
  lines.append(" return value")
1072
1146
  lines.append("")
1073
1147
  lines.append("")
@@ -4,11 +4,16 @@ from informatica_python.models import (
4
4
  TransformationDef, ConnectorDef, InstanceDef, MappletDef,
5
5
  )
6
6
  from informatica_python.utils.expression_converter import (
7
- convert_expression, convert_sql_expression,
7
+ convert_expression, convert_expression_vectorized,
8
+ convert_sql_expression, convert_filter_vectorized,
8
9
  parse_join_condition, parse_lookup_condition,
9
10
  parse_aggregate_expression, PANDAS_AGG_MAP,
10
11
  )
11
12
  from informatica_python.utils.datatype_map import get_python_type
13
+ from informatica_python.utils.lib_adapters import (
14
+ lib_merge, lib_sort, lib_groupby_agg, lib_groupby_first,
15
+ lib_concat, lib_empty_df, lib_copy, lib_rank,
16
+ )
12
17
 
13
18
 
14
19
  def _inline_mapplets(mapping, folder):
@@ -184,6 +189,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
184
189
  lines.append(f"Auto-generated by informatica-python")
185
190
  lines.append('"""')
186
191
  lines.append("")
192
+ lines.append("import numpy as np")
187
193
  lines.append("from helper_functions import *")
188
194
  lines.append("")
189
195
  lines.append("")
@@ -266,7 +272,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
266
272
  for tx in processing_order:
267
273
  if tx.type in ("Source Qualifier", "Application Source Qualifier"):
268
274
  continue
269
- _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
275
+ _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
270
276
 
271
277
  for tgt_name, tgt_def in target_map.items():
272
278
  _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
@@ -560,7 +566,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
560
566
  lines.append("")
561
567
 
562
568
 
563
- def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map):
569
+ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
564
570
  tx_safe = _safe_name(tx.name)
565
571
  tx_type = tx.type.lower().strip()
566
572
 
@@ -588,17 +594,17 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
588
594
  elif tx_type == "filter":
589
595
  _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
590
596
  elif tx_type in ("aggregator",):
591
- _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs)
597
+ _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
592
598
  elif tx_type == "sorter":
593
- _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
599
+ _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
594
600
  elif tx_type in ("joiner",):
595
- _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
601
+ _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
596
602
  elif tx_type in ("lookup procedure", "lookup"):
597
- _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
603
+ _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
598
604
  elif tx_type == "router":
599
605
  _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
600
606
  elif tx_type in ("union",):
601
- _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs)
607
+ _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib)
602
608
  elif tx_type in ("update strategy",):
603
609
  _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs)
604
610
  elif tx_type == "sequence generator":
@@ -606,9 +612,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
606
612
  elif tx_type in ("normalizer",):
607
613
  _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs)
608
614
  elif tx_type in ("rank",):
609
- _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs)
615
+ _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
610
616
  elif tx_type in ("custom transformation",):
611
- _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
617
+ _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib)
612
618
  elif tx_type in ("stored procedure",):
613
619
  _gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs)
614
620
  elif tx_type in ("java",):
@@ -617,7 +623,8 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
617
623
  _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
618
624
  else:
619
625
  lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
620
- lines.append(f" df_{tx_safe} = {input_df}.copy() if hasattr({input_df}, 'copy') else {input_df}")
626
+ copy_expr = lib_copy(data_lib, input_df)
627
+ lines.append(f" df_{tx_safe} = {copy_expr}")
621
628
  source_dfs[tx.name] = f"df_{tx_safe}"
622
629
 
623
630
  lines.append("")
@@ -629,12 +636,12 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
629
636
  for fld in tx.fields:
630
637
  if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
631
638
  has_expressions = True
632
- expr_py = convert_expression(fld.expression)
639
+ expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
633
640
  lines.append(f" # {fld.name} = {fld.expression}")
634
641
  if fld.porttype and "OUTPUT" in fld.porttype.upper() and "INPUT" not in fld.porttype.upper():
635
- lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_py} # output-only port")
642
+ lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
636
643
  else:
637
- lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_py}")
644
+ lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
638
645
  if not has_expressions:
639
646
  lines.append(f" # Pass-through expression (no transformations)")
640
647
  source_dfs[tx.name] = f"df_{tx_safe}"
@@ -646,15 +653,15 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
646
653
  if attr.name == "Filter Condition":
647
654
  filter_condition = attr.value
648
655
  if filter_condition:
649
- expr_py = convert_expression(filter_condition)
656
+ expr_vec = convert_filter_vectorized(filter_condition, input_df)
650
657
  lines.append(f" # Filter: {filter_condition}")
651
- lines.append(f" df_{tx_safe} = {input_df}[{expr_py}].copy()")
658
+ lines.append(f" df_{tx_safe} = {input_df}[{expr_vec}].copy()")
652
659
  else:
653
660
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
654
661
  source_dfs[tx.name] = f"df_{tx_safe}"
655
662
 
656
663
 
657
- def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
664
+ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
658
665
  group_by_ports = []
659
666
  agg_ports = []
660
667
  for fld in tx.fields:
@@ -686,22 +693,18 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
686
693
 
687
694
  if group_by_ports and agg_dict:
688
695
  lines.append(f" # Aggregator: group by {group_by_ports}")
689
- agg_spec = {}
690
- for out_name, (col, func) in agg_dict.items():
691
- agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
692
-
693
- lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
694
- for out_name, spec in agg_spec.items():
695
- lines.append(f" {out_name}={spec},")
696
- lines.append(f" )")
696
+ agg_expr = lib_groupby_agg(data_lib, input_df, group_by_ports, agg_dict)
697
+ lines.append(f" df_{tx_safe} = {agg_expr}")
697
698
 
698
699
  if rename_map:
699
700
  lines.append(f" df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
700
701
  elif group_by_ports:
701
702
  lines.append(f" # Aggregator: group by {group_by_ports}")
702
- lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg('first')")
703
+ first_expr = lib_groupby_first(data_lib, input_df, group_by_ports)
704
+ lines.append(f" df_{tx_safe} = {first_expr}")
703
705
  else:
704
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
706
+ copy_expr = lib_copy(data_lib, input_df)
707
+ lines.append(f" df_{tx_safe} = {copy_expr}")
705
708
 
706
709
  for col_name, expr_text in computed_aggs:
707
710
  expr_py = convert_expression(expr_text)
@@ -711,20 +714,22 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
711
714
  source_dfs[tx.name] = f"df_{tx_safe}"
712
715
 
713
716
 
714
- def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
717
+ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
715
718
  sort_keys = []
716
719
  sort_dirs = []
717
720
  for fld in tx.fields:
718
721
  sort_keys.append(fld.name)
719
722
  sort_dirs.append(True)
720
723
  if sort_keys:
721
- lines.append(f" df_{tx_safe} = {input_df}.sort_values(by={sort_keys}, ascending={sort_dirs}).reset_index(drop=True)")
724
+ sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
725
+ lines.append(f" df_{tx_safe} = {sort_expr}")
722
726
  else:
723
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
727
+ copy_expr = lib_copy(data_lib, input_df)
728
+ lines.append(f" df_{tx_safe} = {copy_expr}")
724
729
  source_dfs[tx.name] = f"df_{tx_safe}"
725
730
 
726
731
 
727
- def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
732
+ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None, data_lib="pandas"):
728
733
  join_type = "inner"
729
734
  join_condition = ""
730
735
  for attr in tx.attributes:
@@ -778,33 +783,29 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
778
783
 
779
784
  lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
780
785
  if left_keys and right_keys:
781
- lines.append(f" df_{tx_safe} = {df_detail}.merge(")
782
- lines.append(f" {df_master},")
783
- lines.append(f" left_on={left_keys},")
784
- lines.append(f" right_on={right_keys},")
785
- lines.append(f" how='{join_type}',")
786
- lines.append(f" suffixes=('', '_master')")
787
- lines.append(f" )")
786
+ merge_expr = lib_merge(data_lib, df_detail, df_master,
787
+ left_on=left_keys, right_on=right_keys, how=join_type)
788
+ lines.append(f" df_{tx_safe} = {merge_expr}")
788
789
  else:
789
790
  common_cols = [f for f in detail_fields if f in master_fields]
790
791
  if common_cols:
791
- lines.append(f" df_{tx_safe} = {df_detail}.merge(")
792
- lines.append(f" {df_master},")
793
- lines.append(f" on={common_cols},")
794
- lines.append(f" how='{join_type}',")
795
- lines.append(f" suffixes=('', '_master')")
796
- lines.append(f" )")
792
+ merge_expr = lib_merge(data_lib, df_detail, df_master,
793
+ on=common_cols, how=join_type)
794
+ lines.append(f" df_{tx_safe} = {merge_expr}")
797
795
  else:
798
- lines.append(f" df_{tx_safe} = {df_detail}.merge({df_master}, how='{join_type}', suffixes=('', '_master'))")
796
+ merge_expr = lib_merge(data_lib, df_detail, df_master, how=join_type)
797
+ lines.append(f" df_{tx_safe} = {merge_expr}")
799
798
  elif len(src_list) == 1:
800
799
  df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
801
- lines.append(f" df_{tx_safe} = {df1}.copy()")
800
+ copy_expr = lib_copy(data_lib, df1)
801
+ lines.append(f" df_{tx_safe} = {copy_expr}")
802
802
  else:
803
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
803
+ copy_expr = lib_copy(data_lib, input_df)
804
+ lines.append(f" df_{tx_safe} = {copy_expr}")
804
805
  source_dfs[tx.name] = f"df_{tx_safe}"
805
806
 
806
807
 
807
- def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
808
+ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
808
809
  lookup_table = ""
809
810
  lookup_sql = ""
810
811
  lookup_condition = ""
@@ -862,13 +863,10 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
862
863
  else:
863
864
  lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
864
865
 
865
- lines.append(f" df_{tx_safe} = {input_df}.merge(")
866
- lines.append(f" df_lkp_{tx_safe},")
867
- lines.append(f" left_on={input_keys},")
868
- lines.append(f" right_on={lookup_keys},")
869
- lines.append(f" how='left',")
870
- lines.append(f" suffixes=('', '_lkp')")
871
- lines.append(f" )")
866
+ merge_expr = lib_merge(data_lib, input_df, f"df_lkp_{tx_safe}",
867
+ left_on=input_keys, right_on=lookup_keys,
868
+ how="left", suffixes=("", "_lkp"))
869
+ lines.append(f" df_{tx_safe} = {merge_expr}")
872
870
 
873
871
  drop_cols = [k for k in lookup_keys if k not in input_keys]
874
872
  if drop_cols:
@@ -910,7 +908,7 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
910
908
  source_dfs[tx.name] = f"df_{tx_safe}"
911
909
 
912
910
 
913
- def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
911
+ def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib="pandas"):
914
912
  dfs_to_union = []
915
913
  for src in input_sources:
916
914
  df_name = source_dfs.get(src, f"df_{_safe_name(src)}")
@@ -918,11 +916,14 @@ def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
918
916
 
919
917
  if len(dfs_to_union) > 1:
920
918
  df_list = ", ".join(dfs_to_union)
921
- lines.append(f" df_{tx_safe} = pd.concat([{df_list}], ignore_index=True)")
919
+ concat_expr = lib_concat(data_lib, df_list)
920
+ lines.append(f" df_{tx_safe} = {concat_expr}")
922
921
  elif dfs_to_union:
923
- lines.append(f" df_{tx_safe} = {dfs_to_union[0]}.copy()")
922
+ copy_expr = lib_copy(data_lib, dfs_to_union[0])
923
+ lines.append(f" df_{tx_safe} = {copy_expr}")
924
924
  else:
925
- lines.append(f" df_{tx_safe} = pd.DataFrame()")
925
+ empty_expr = lib_empty_df(data_lib)
926
+ lines.append(f" df_{tx_safe} = {empty_expr}")
926
927
  source_dfs[tx.name] = f"df_{tx_safe}"
927
928
 
928
929
 
@@ -1037,7 +1038,7 @@ def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
1037
1038
  source_dfs[tx.name] = f"df_{tx_safe}"
1038
1039
 
1039
1040
 
1040
- def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
1041
+ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
1041
1042
  rank_port = None
1042
1043
  group_by_ports = []
1043
1044
  top_bottom = "TOP"
@@ -1080,19 +1081,15 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
1080
1081
  rank_out_field = fld.name
1081
1082
  break
1082
1083
 
1083
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
1084
- if rank_port and group_by_ports:
1085
- lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1086
- lines.append(f" _rank_vals = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1087
- lines.append(f" method='min', ascending={ascending}")
1088
- lines.append(f" )")
1089
- lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1090
- if top_n:
1091
- lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1092
- elif rank_port:
1093
- lines.append(f" # Rank by '{rank_port}' (no group-by)")
1094
- lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
1095
- lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1084
+ copy_expr = lib_copy(data_lib, input_df)
1085
+ lines.append(f" df_{tx_safe} = {copy_expr}")
1086
+ if rank_port:
1087
+ rank_code = lib_rank(data_lib, f"df_{tx_safe}", group_by_ports, rank_port, ascending, rank_out_field)
1088
+ if group_by_ports:
1089
+ lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1090
+ else:
1091
+ lines.append(f" # Rank by '{rank_port}' (no group-by)")
1092
+ lines.append(f" {rank_code}")
1096
1093
  if top_n:
1097
1094
  lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1098
1095
  else:
@@ -1100,7 +1097,7 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
1100
1097
  source_dfs[tx.name] = f"df_{tx_safe}"
1101
1098
 
1102
1099
 
1103
- def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
1100
+ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib="pandas"):
1104
1101
  is_union = False
1105
1102
  output_fields = []
1106
1103
  input_groups = {}
@@ -1108,11 +1105,9 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
1108
1105
  for fld in tx.fields:
1109
1106
  if "OUTPUT" in (fld.porttype or "").upper():
1110
1107
  output_fields.append(fld)
1111
- group_suffix_match = None
1112
1108
  import re
1113
1109
  m = re.match(r'^(.+?)(\d+)$', fld.name)
1114
1110
  if m and "INPUT" in (fld.porttype or "").upper():
1115
- base_name = m.group(1)
1116
1111
  group_idx = m.group(2)
1117
1112
  if group_idx not in input_groups:
1118
1113
  input_groups[group_idx] = []
@@ -1128,14 +1123,18 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
1128
1123
  dfs_to_union.append(df_name)
1129
1124
  if len(dfs_to_union) > 1:
1130
1125
  df_list = ", ".join(dfs_to_union)
1131
- lines.append(f" df_{tx_safe} = pd.concat([{df_list}], ignore_index=True)")
1126
+ concat_expr = lib_concat(data_lib, df_list)
1127
+ lines.append(f" df_{tx_safe} = {concat_expr}")
1132
1128
  elif dfs_to_union:
1133
- lines.append(f" df_{tx_safe} = {dfs_to_union[0]}.copy()")
1129
+ copy_expr = lib_copy(data_lib, dfs_to_union[0])
1130
+ lines.append(f" df_{tx_safe} = {copy_expr}")
1134
1131
  else:
1135
- lines.append(f" df_{tx_safe} = pd.DataFrame()")
1132
+ empty_expr = lib_empty_df(data_lib)
1133
+ lines.append(f" df_{tx_safe} = {empty_expr}")
1136
1134
  else:
1137
1135
  lines.append(f" # Custom transformation: {tx.name}")
1138
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
1136
+ copy_expr = lib_copy(data_lib, input_df)
1137
+ lines.append(f" df_{tx_safe} = {copy_expr}")
1139
1138
 
1140
1139
  source_dfs[tx.name] = f"df_{tx_safe}"
1141
1140