informatica-python 1.4.2__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.4.2 → informatica_python-1.5.0}/PKG-INFO +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/__init__.py +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/cli.py +5 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/helper_gen.py +81 -7
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/mapping_gen.py +79 -80
- informatica_python-1.5.0/informatica_python/utils/expression_converter.py +437 -0
- informatica_python-1.5.0/informatica_python/utils/lib_adapters.py +164 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/SOURCES.txt +3 -1
- {informatica_python-1.4.2 → informatica_python-1.5.0}/pyproject.toml +1 -1
- informatica_python-1.5.0/tests/test_integration.py +518 -0
- informatica_python-1.4.2/informatica_python/utils/expression_converter.py +0 -264
- {informatica_python-1.4.2 → informatica_python-1.5.0}/LICENSE +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/README.md +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/converter.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/workflow_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/models.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/setup.cfg +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.0}/tests/test_converter.py +0 -0
{informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -44,13 +44,20 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
44
44
|
lines.append('logger = logging.getLogger("informatica_converter")')
|
|
45
45
|
lines.append("")
|
|
46
46
|
lines.append("")
|
|
47
|
-
lines.append("def load_config(config_path='config.yml'):")
|
|
48
|
-
lines.append(' """Load configuration from YAML file."""')
|
|
47
|
+
lines.append("def load_config(config_path='config.yml', param_file=None):")
|
|
48
|
+
lines.append(' """Load configuration from YAML file, optionally merging Informatica .param file."""')
|
|
49
49
|
lines.append(" with open(config_path, 'r') as f:")
|
|
50
|
-
lines.append("
|
|
50
|
+
lines.append(" config = yaml.safe_load(f) or {}")
|
|
51
|
+
lines.append(" if param_file:")
|
|
52
|
+
lines.append(" params = parse_param_file(param_file)")
|
|
53
|
+
lines.append(" config['params'] = params")
|
|
54
|
+
lines.append(" for key, val in params.items():")
|
|
55
|
+
lines.append(" os.environ[f'INFA_VAR_{key}'] = str(val)")
|
|
56
|
+
lines.append(" return config")
|
|
51
57
|
lines.append("")
|
|
52
58
|
lines.append("")
|
|
53
59
|
|
|
60
|
+
_add_param_file_functions(lines)
|
|
54
61
|
_add_db_functions(lines, data_lib)
|
|
55
62
|
_add_file_functions(lines, data_lib)
|
|
56
63
|
_add_expression_helpers(lines)
|
|
@@ -59,6 +66,61 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
59
66
|
return "\n".join(lines)
|
|
60
67
|
|
|
61
68
|
|
|
69
|
+
def _add_param_file_functions(lines):
|
|
70
|
+
lines.append("# ============================================================")
|
|
71
|
+
lines.append("# Informatica Parameter File Support")
|
|
72
|
+
lines.append("# ============================================================")
|
|
73
|
+
lines.append("")
|
|
74
|
+
lines.append("")
|
|
75
|
+
lines.append("def parse_param_file(param_path):")
|
|
76
|
+
lines.append(' """')
|
|
77
|
+
lines.append(" Parse an Informatica .param file into a flat dict of variable names to values.")
|
|
78
|
+
lines.append(" Supports standard Informatica parameter file format:")
|
|
79
|
+
lines.append(" [Global]")
|
|
80
|
+
lines.append(" $$VAR_NAME=value")
|
|
81
|
+
lines.append(" [folder_name.WF:workflow_name.ST:session_name]")
|
|
82
|
+
lines.append(" $$CONN_NAME=value")
|
|
83
|
+
lines.append(' """')
|
|
84
|
+
lines.append(" params = {}")
|
|
85
|
+
lines.append(" if not os.path.exists(param_path):")
|
|
86
|
+
lines.append(" logger.warning(f'Parameter file not found: {param_path}')")
|
|
87
|
+
lines.append(" return params")
|
|
88
|
+
lines.append("")
|
|
89
|
+
lines.append(" current_section = 'Global'")
|
|
90
|
+
lines.append(" with open(param_path, 'r') as f:")
|
|
91
|
+
lines.append(" for line_num, line in enumerate(f, 1):")
|
|
92
|
+
lines.append(" line = line.strip()")
|
|
93
|
+
lines.append(" if not line or line.startswith('#'):")
|
|
94
|
+
lines.append(" continue")
|
|
95
|
+
lines.append(" if line.startswith('[') and line.endswith(']'):")
|
|
96
|
+
lines.append(" current_section = line[1:-1].strip()")
|
|
97
|
+
lines.append(" continue")
|
|
98
|
+
lines.append(" if '=' in line:")
|
|
99
|
+
lines.append(" key, _, value = line.partition('=')")
|
|
100
|
+
lines.append(" key = key.strip()")
|
|
101
|
+
lines.append(" value = value.strip()")
|
|
102
|
+
lines.append(" clean_key = key.lstrip('$')")
|
|
103
|
+
lines.append(" params[clean_key] = value")
|
|
104
|
+
lines.append(" if current_section != 'Global':")
|
|
105
|
+
lines.append(" params[f'{current_section}.{clean_key}'] = value")
|
|
106
|
+
lines.append(" logger.info(f'Loaded {len(params)} parameters from {param_path}')")
|
|
107
|
+
lines.append(" return params")
|
|
108
|
+
lines.append("")
|
|
109
|
+
lines.append("")
|
|
110
|
+
lines.append("def get_param(config, var_name, default=''):")
|
|
111
|
+
lines.append(' """Get a parameter value from config params, then env vars, then default."""')
|
|
112
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
113
|
+
lines.append(" params = config.get('params', {})")
|
|
114
|
+
lines.append(" if clean in params:")
|
|
115
|
+
lines.append(" return params[clean]")
|
|
116
|
+
lines.append(" env_val = os.environ.get(f'INFA_VAR_{clean}')")
|
|
117
|
+
lines.append(" if env_val is not None:")
|
|
118
|
+
lines.append(" return env_val")
|
|
119
|
+
lines.append(" return default")
|
|
120
|
+
lines.append("")
|
|
121
|
+
lines.append("")
|
|
122
|
+
|
|
123
|
+
|
|
62
124
|
def _add_db_functions(lines, data_lib):
|
|
63
125
|
lines.append("# ============================================================")
|
|
64
126
|
lines.append("# Database Operations")
|
|
@@ -1060,14 +1122,26 @@ def _add_expression_helpers(lines):
|
|
|
1060
1122
|
lines.append(" return None")
|
|
1061
1123
|
lines.append("")
|
|
1062
1124
|
lines.append("")
|
|
1063
|
-
lines.append("
|
|
1064
|
-
lines.append(
|
|
1065
|
-
lines.append("
|
|
1125
|
+
lines.append("_param_store = {}")
|
|
1126
|
+
lines.append("")
|
|
1127
|
+
lines.append("")
|
|
1128
|
+
lines.append("def get_variable(var_name, config=None):")
|
|
1129
|
+
lines.append(' """Get workflow/mapping variable value from params, env vars, or param store."""')
|
|
1130
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1131
|
+
lines.append(" if config and 'params' in config:")
|
|
1132
|
+
lines.append(" val = config['params'].get(clean)")
|
|
1133
|
+
lines.append(" if val is not None:")
|
|
1134
|
+
lines.append(" return val")
|
|
1135
|
+
lines.append(" if clean in _param_store:")
|
|
1136
|
+
lines.append(" return _param_store[clean]")
|
|
1137
|
+
lines.append(" return os.environ.get(f'INFA_VAR_{clean}', '')")
|
|
1066
1138
|
lines.append("")
|
|
1067
1139
|
lines.append("")
|
|
1068
1140
|
lines.append("def set_variable(var_name, value):")
|
|
1069
1141
|
lines.append(' """Set workflow/mapping variable value."""')
|
|
1070
|
-
lines.append("
|
|
1142
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1143
|
+
lines.append(" _param_store[clean] = value")
|
|
1144
|
+
lines.append(" os.environ[f'INFA_VAR_{clean}'] = str(value)")
|
|
1071
1145
|
lines.append(" return value")
|
|
1072
1146
|
lines.append("")
|
|
1073
1147
|
lines.append("")
|
{informatica_python-1.4.2 → informatica_python-1.5.0}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -4,11 +4,16 @@ from informatica_python.models import (
|
|
|
4
4
|
TransformationDef, ConnectorDef, InstanceDef, MappletDef,
|
|
5
5
|
)
|
|
6
6
|
from informatica_python.utils.expression_converter import (
|
|
7
|
-
convert_expression,
|
|
7
|
+
convert_expression, convert_expression_vectorized,
|
|
8
|
+
convert_sql_expression, convert_filter_vectorized,
|
|
8
9
|
parse_join_condition, parse_lookup_condition,
|
|
9
10
|
parse_aggregate_expression, PANDAS_AGG_MAP,
|
|
10
11
|
)
|
|
11
12
|
from informatica_python.utils.datatype_map import get_python_type
|
|
13
|
+
from informatica_python.utils.lib_adapters import (
|
|
14
|
+
lib_merge, lib_sort, lib_groupby_agg, lib_groupby_first,
|
|
15
|
+
lib_concat, lib_empty_df, lib_copy, lib_rank,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
def _inline_mapplets(mapping, folder):
|
|
@@ -184,6 +189,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
184
189
|
lines.append(f"Auto-generated by informatica-python")
|
|
185
190
|
lines.append('"""')
|
|
186
191
|
lines.append("")
|
|
192
|
+
lines.append("import numpy as np")
|
|
187
193
|
lines.append("from helper_functions import *")
|
|
188
194
|
lines.append("")
|
|
189
195
|
lines.append("")
|
|
@@ -266,7 +272,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
266
272
|
for tx in processing_order:
|
|
267
273
|
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
268
274
|
continue
|
|
269
|
-
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
|
|
275
|
+
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
|
|
270
276
|
|
|
271
277
|
for tgt_name, tgt_def in target_map.items():
|
|
272
278
|
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
|
|
@@ -560,7 +566,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
560
566
|
lines.append("")
|
|
561
567
|
|
|
562
568
|
|
|
563
|
-
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map):
|
|
569
|
+
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
|
|
564
570
|
tx_safe = _safe_name(tx.name)
|
|
565
571
|
tx_type = tx.type.lower().strip()
|
|
566
572
|
|
|
@@ -588,17 +594,17 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
588
594
|
elif tx_type == "filter":
|
|
589
595
|
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
590
596
|
elif tx_type in ("aggregator",):
|
|
591
|
-
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
597
|
+
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
592
598
|
elif tx_type == "sorter":
|
|
593
|
-
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
599
|
+
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
594
600
|
elif tx_type in ("joiner",):
|
|
595
|
-
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
|
|
601
|
+
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
|
|
596
602
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
597
|
-
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
603
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
598
604
|
elif tx_type == "router":
|
|
599
605
|
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
600
606
|
elif tx_type in ("union",):
|
|
601
|
-
_gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs)
|
|
607
|
+
_gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib)
|
|
602
608
|
elif tx_type in ("update strategy",):
|
|
603
609
|
_gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs)
|
|
604
610
|
elif tx_type == "sequence generator":
|
|
@@ -606,9 +612,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
606
612
|
elif tx_type in ("normalizer",):
|
|
607
613
|
_gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
608
614
|
elif tx_type in ("rank",):
|
|
609
|
-
_gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
615
|
+
_gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
610
616
|
elif tx_type in ("custom transformation",):
|
|
611
|
-
_gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
|
|
617
|
+
_gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib)
|
|
612
618
|
elif tx_type in ("stored procedure",):
|
|
613
619
|
_gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs)
|
|
614
620
|
elif tx_type in ("java",):
|
|
@@ -617,7 +623,8 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
617
623
|
_gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
618
624
|
else:
|
|
619
625
|
lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
|
|
620
|
-
|
|
626
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
627
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
621
628
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
622
629
|
|
|
623
630
|
lines.append("")
|
|
@@ -629,12 +636,12 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
629
636
|
for fld in tx.fields:
|
|
630
637
|
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
631
638
|
has_expressions = True
|
|
632
|
-
|
|
639
|
+
expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
|
|
633
640
|
lines.append(f" # {fld.name} = {fld.expression}")
|
|
634
641
|
if fld.porttype and "OUTPUT" in fld.porttype.upper() and "INPUT" not in fld.porttype.upper():
|
|
635
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {
|
|
642
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
636
643
|
else:
|
|
637
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {
|
|
644
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
638
645
|
if not has_expressions:
|
|
639
646
|
lines.append(f" # Pass-through expression (no transformations)")
|
|
640
647
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
@@ -646,15 +653,15 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
646
653
|
if attr.name == "Filter Condition":
|
|
647
654
|
filter_condition = attr.value
|
|
648
655
|
if filter_condition:
|
|
649
|
-
|
|
656
|
+
expr_vec = convert_filter_vectorized(filter_condition, input_df)
|
|
650
657
|
lines.append(f" # Filter: {filter_condition}")
|
|
651
|
-
lines.append(f" df_{tx_safe} = {input_df}[{
|
|
658
|
+
lines.append(f" df_{tx_safe} = {input_df}[{expr_vec}].copy()")
|
|
652
659
|
else:
|
|
653
660
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
654
661
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
655
662
|
|
|
656
663
|
|
|
657
|
-
def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
664
|
+
def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
658
665
|
group_by_ports = []
|
|
659
666
|
agg_ports = []
|
|
660
667
|
for fld in tx.fields:
|
|
@@ -686,22 +693,18 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
686
693
|
|
|
687
694
|
if group_by_ports and agg_dict:
|
|
688
695
|
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
|
|
692
|
-
|
|
693
|
-
lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
|
|
694
|
-
for out_name, spec in agg_spec.items():
|
|
695
|
-
lines.append(f" {out_name}={spec},")
|
|
696
|
-
lines.append(f" )")
|
|
696
|
+
agg_expr = lib_groupby_agg(data_lib, input_df, group_by_ports, agg_dict)
|
|
697
|
+
lines.append(f" df_{tx_safe} = {agg_expr}")
|
|
697
698
|
|
|
698
699
|
if rename_map:
|
|
699
700
|
lines.append(f" df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
|
|
700
701
|
elif group_by_ports:
|
|
701
702
|
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
702
|
-
|
|
703
|
+
first_expr = lib_groupby_first(data_lib, input_df, group_by_ports)
|
|
704
|
+
lines.append(f" df_{tx_safe} = {first_expr}")
|
|
703
705
|
else:
|
|
704
|
-
|
|
706
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
707
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
705
708
|
|
|
706
709
|
for col_name, expr_text in computed_aggs:
|
|
707
710
|
expr_py = convert_expression(expr_text)
|
|
@@ -711,20 +714,22 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
711
714
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
712
715
|
|
|
713
716
|
|
|
714
|
-
def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
717
|
+
def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
715
718
|
sort_keys = []
|
|
716
719
|
sort_dirs = []
|
|
717
720
|
for fld in tx.fields:
|
|
718
721
|
sort_keys.append(fld.name)
|
|
719
722
|
sort_dirs.append(True)
|
|
720
723
|
if sort_keys:
|
|
721
|
-
|
|
724
|
+
sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
|
|
725
|
+
lines.append(f" df_{tx_safe} = {sort_expr}")
|
|
722
726
|
else:
|
|
723
|
-
|
|
727
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
728
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
724
729
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
725
730
|
|
|
726
731
|
|
|
727
|
-
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
|
|
732
|
+
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None, data_lib="pandas"):
|
|
728
733
|
join_type = "inner"
|
|
729
734
|
join_condition = ""
|
|
730
735
|
for attr in tx.attributes:
|
|
@@ -778,33 +783,29 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
778
783
|
|
|
779
784
|
lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
|
|
780
785
|
if left_keys and right_keys:
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
lines.append(f"
|
|
784
|
-
lines.append(f" right_on={right_keys},")
|
|
785
|
-
lines.append(f" how='{join_type}',")
|
|
786
|
-
lines.append(f" suffixes=('', '_master')")
|
|
787
|
-
lines.append(f" )")
|
|
786
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master,
|
|
787
|
+
left_on=left_keys, right_on=right_keys, how=join_type)
|
|
788
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
788
789
|
else:
|
|
789
790
|
common_cols = [f for f in detail_fields if f in master_fields]
|
|
790
791
|
if common_cols:
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
lines.append(f"
|
|
794
|
-
lines.append(f" how='{join_type}',")
|
|
795
|
-
lines.append(f" suffixes=('', '_master')")
|
|
796
|
-
lines.append(f" )")
|
|
792
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master,
|
|
793
|
+
on=common_cols, how=join_type)
|
|
794
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
797
795
|
else:
|
|
798
|
-
|
|
796
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master, how=join_type)
|
|
797
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
799
798
|
elif len(src_list) == 1:
|
|
800
799
|
df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
|
|
801
|
-
|
|
800
|
+
copy_expr = lib_copy(data_lib, df1)
|
|
801
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
802
802
|
else:
|
|
803
|
-
|
|
803
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
804
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
804
805
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
805
806
|
|
|
806
807
|
|
|
807
|
-
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
808
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
808
809
|
lookup_table = ""
|
|
809
810
|
lookup_sql = ""
|
|
810
811
|
lookup_condition = ""
|
|
@@ -862,13 +863,10 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
862
863
|
else:
|
|
863
864
|
lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
|
|
864
865
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
lines.append(f"
|
|
869
|
-
lines.append(f" how='left',")
|
|
870
|
-
lines.append(f" suffixes=('', '_lkp')")
|
|
871
|
-
lines.append(f" )")
|
|
866
|
+
merge_expr = lib_merge(data_lib, input_df, f"df_lkp_{tx_safe}",
|
|
867
|
+
left_on=input_keys, right_on=lookup_keys,
|
|
868
|
+
how="left", suffixes=("", "_lkp"))
|
|
869
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
872
870
|
|
|
873
871
|
drop_cols = [k for k in lookup_keys if k not in input_keys]
|
|
874
872
|
if drop_cols:
|
|
@@ -910,7 +908,7 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
910
908
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
911
909
|
|
|
912
910
|
|
|
913
|
-
def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
|
|
911
|
+
def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib="pandas"):
|
|
914
912
|
dfs_to_union = []
|
|
915
913
|
for src in input_sources:
|
|
916
914
|
df_name = source_dfs.get(src, f"df_{_safe_name(src)}")
|
|
@@ -918,11 +916,14 @@ def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
|
|
|
918
916
|
|
|
919
917
|
if len(dfs_to_union) > 1:
|
|
920
918
|
df_list = ", ".join(dfs_to_union)
|
|
921
|
-
|
|
919
|
+
concat_expr = lib_concat(data_lib, df_list)
|
|
920
|
+
lines.append(f" df_{tx_safe} = {concat_expr}")
|
|
922
921
|
elif dfs_to_union:
|
|
923
|
-
|
|
922
|
+
copy_expr = lib_copy(data_lib, dfs_to_union[0])
|
|
923
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
924
924
|
else:
|
|
925
|
-
|
|
925
|
+
empty_expr = lib_empty_df(data_lib)
|
|
926
|
+
lines.append(f" df_{tx_safe} = {empty_expr}")
|
|
926
927
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
927
928
|
|
|
928
929
|
|
|
@@ -1037,7 +1038,7 @@ def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1037
1038
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1038
1039
|
|
|
1039
1040
|
|
|
1040
|
-
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
1041
|
+
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
1041
1042
|
rank_port = None
|
|
1042
1043
|
group_by_ports = []
|
|
1043
1044
|
top_bottom = "TOP"
|
|
@@ -1080,19 +1081,15 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1080
1081
|
rank_out_field = fld.name
|
|
1081
1082
|
break
|
|
1082
1083
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
elif rank_port:
|
|
1093
|
-
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1094
|
-
lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
|
|
1095
|
-
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1084
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
1085
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1086
|
+
if rank_port:
|
|
1087
|
+
rank_code = lib_rank(data_lib, f"df_{tx_safe}", group_by_ports, rank_port, ascending, rank_out_field)
|
|
1088
|
+
if group_by_ports:
|
|
1089
|
+
lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
|
|
1090
|
+
else:
|
|
1091
|
+
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1092
|
+
lines.append(f" {rank_code}")
|
|
1096
1093
|
if top_n:
|
|
1097
1094
|
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1098
1095
|
else:
|
|
@@ -1100,7 +1097,7 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1100
1097
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1101
1098
|
|
|
1102
1099
|
|
|
1103
|
-
def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
|
|
1100
|
+
def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib="pandas"):
|
|
1104
1101
|
is_union = False
|
|
1105
1102
|
output_fields = []
|
|
1106
1103
|
input_groups = {}
|
|
@@ -1108,11 +1105,9 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
1108
1105
|
for fld in tx.fields:
|
|
1109
1106
|
if "OUTPUT" in (fld.porttype or "").upper():
|
|
1110
1107
|
output_fields.append(fld)
|
|
1111
|
-
group_suffix_match = None
|
|
1112
1108
|
import re
|
|
1113
1109
|
m = re.match(r'^(.+?)(\d+)$', fld.name)
|
|
1114
1110
|
if m and "INPUT" in (fld.porttype or "").upper():
|
|
1115
|
-
base_name = m.group(1)
|
|
1116
1111
|
group_idx = m.group(2)
|
|
1117
1112
|
if group_idx not in input_groups:
|
|
1118
1113
|
input_groups[group_idx] = []
|
|
@@ -1128,14 +1123,18 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
1128
1123
|
dfs_to_union.append(df_name)
|
|
1129
1124
|
if len(dfs_to_union) > 1:
|
|
1130
1125
|
df_list = ", ".join(dfs_to_union)
|
|
1131
|
-
|
|
1126
|
+
concat_expr = lib_concat(data_lib, df_list)
|
|
1127
|
+
lines.append(f" df_{tx_safe} = {concat_expr}")
|
|
1132
1128
|
elif dfs_to_union:
|
|
1133
|
-
|
|
1129
|
+
copy_expr = lib_copy(data_lib, dfs_to_union[0])
|
|
1130
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1134
1131
|
else:
|
|
1135
|
-
|
|
1132
|
+
empty_expr = lib_empty_df(data_lib)
|
|
1133
|
+
lines.append(f" df_{tx_safe} = {empty_expr}")
|
|
1136
1134
|
else:
|
|
1137
1135
|
lines.append(f" # Custom transformation: {tx.name}")
|
|
1138
|
-
|
|
1136
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
1137
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1139
1138
|
|
|
1140
1139
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1141
1140
|
|