informatica-python 1.4.2__tar.gz → 1.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.4.2 → informatica_python-1.5.1}/PKG-INFO +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/__init__.py +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/cli.py +6 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/converter.py +6 -4
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/helper_gen.py +81 -7
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py +96 -88
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py +6 -1
- informatica_python-1.5.1/informatica_python/utils/expression_converter.py +444 -0
- informatica_python-1.5.1/informatica_python/utils/lib_adapters.py +164 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/SOURCES.txt +3 -1
- {informatica_python-1.4.2 → informatica_python-1.5.1}/pyproject.toml +1 -1
- informatica_python-1.5.1/tests/test_integration.py +540 -0
- informatica_python-1.4.2/informatica_python/utils/expression_converter.py +0 -264
- {informatica_python-1.4.2 → informatica_python-1.5.1}/LICENSE +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/README.md +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/models.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/parser.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/setup.cfg +0 -0
- {informatica_python-1.4.2 → informatica_python-1.5.1}/tests/test_converter.py +0 -0
|
@@ -41,6 +41,11 @@ def main():
|
|
|
41
41
|
default=None,
|
|
42
42
|
help="Save parsed JSON to a file",
|
|
43
43
|
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--param-file",
|
|
46
|
+
default=None,
|
|
47
|
+
help="Path to Informatica .param file for variable substitution",
|
|
48
|
+
)
|
|
44
49
|
|
|
45
50
|
args = parser.parse_args()
|
|
46
51
|
|
|
@@ -61,6 +66,7 @@ def main():
|
|
|
61
66
|
args.input_file,
|
|
62
67
|
output_dir=args.output,
|
|
63
68
|
output_zip=args.zip,
|
|
69
|
+
param_file=args.param_file,
|
|
64
70
|
)
|
|
65
71
|
print(f"Conversion complete! Output: {output_path}")
|
|
66
72
|
print(f"Files generated:")
|
|
@@ -33,7 +33,8 @@ class InformaticaConverter:
|
|
|
33
33
|
return self._powermart_to_dict(self.powermart)
|
|
34
34
|
|
|
35
35
|
def convert(self, file_path: str, output_dir: str = "output",
|
|
36
|
-
output_zip: Optional[str] = None
|
|
36
|
+
output_zip: Optional[str] = None,
|
|
37
|
+
param_file: Optional[str] = None) -> str:
|
|
37
38
|
self.powermart = self.parser.parse_file(file_path)
|
|
38
39
|
|
|
39
40
|
if not self.powermart.repositories:
|
|
@@ -47,7 +48,7 @@ class InformaticaConverter:
|
|
|
47
48
|
raise ValueError("No folder found in XML file")
|
|
48
49
|
|
|
49
50
|
if len(all_folders) == 1:
|
|
50
|
-
return self._convert_folder(all_folders[0], output_dir, output_zip)
|
|
51
|
+
return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
|
|
51
52
|
|
|
52
53
|
result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
|
|
53
54
|
for folder in all_folders:
|
|
@@ -56,7 +57,7 @@ class InformaticaConverter:
|
|
|
56
57
|
if output_zip:
|
|
57
58
|
base, ext = os.path.splitext(output_zip)
|
|
58
59
|
folder_zip = f"{base}_{folder.name}{ext}"
|
|
59
|
-
self._convert_folder(folder, folder_dir, folder_zip)
|
|
60
|
+
self._convert_folder(folder, folder_dir, folder_zip, param_file)
|
|
60
61
|
return result_path
|
|
61
62
|
|
|
62
63
|
def convert_string(self, xml_string: str, output_dir: str = "output",
|
|
@@ -87,7 +88,8 @@ class InformaticaConverter:
|
|
|
87
88
|
return result_path
|
|
88
89
|
|
|
89
90
|
def _convert_folder(self, folder: FolderDef, output_dir: str,
|
|
90
|
-
output_zip: Optional[str] = None
|
|
91
|
+
output_zip: Optional[str] = None,
|
|
92
|
+
param_file: Optional[str] = None) -> str:
|
|
91
93
|
files = {}
|
|
92
94
|
|
|
93
95
|
files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
|
{informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -44,13 +44,20 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
44
44
|
lines.append('logger = logging.getLogger("informatica_converter")')
|
|
45
45
|
lines.append("")
|
|
46
46
|
lines.append("")
|
|
47
|
-
lines.append("def load_config(config_path='config.yml'):")
|
|
48
|
-
lines.append(' """Load configuration from YAML file."""')
|
|
47
|
+
lines.append("def load_config(config_path='config.yml', param_file=None):")
|
|
48
|
+
lines.append(' """Load configuration from YAML file, optionally merging Informatica .param file."""')
|
|
49
49
|
lines.append(" with open(config_path, 'r') as f:")
|
|
50
|
-
lines.append("
|
|
50
|
+
lines.append(" config = yaml.safe_load(f) or {}")
|
|
51
|
+
lines.append(" if param_file:")
|
|
52
|
+
lines.append(" params = parse_param_file(param_file)")
|
|
53
|
+
lines.append(" config['params'] = params")
|
|
54
|
+
lines.append(" for key, val in params.items():")
|
|
55
|
+
lines.append(" os.environ[f'INFA_VAR_{key}'] = str(val)")
|
|
56
|
+
lines.append(" return config")
|
|
51
57
|
lines.append("")
|
|
52
58
|
lines.append("")
|
|
53
59
|
|
|
60
|
+
_add_param_file_functions(lines)
|
|
54
61
|
_add_db_functions(lines, data_lib)
|
|
55
62
|
_add_file_functions(lines, data_lib)
|
|
56
63
|
_add_expression_helpers(lines)
|
|
@@ -59,6 +66,61 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
59
66
|
return "\n".join(lines)
|
|
60
67
|
|
|
61
68
|
|
|
69
|
+
def _add_param_file_functions(lines):
|
|
70
|
+
lines.append("# ============================================================")
|
|
71
|
+
lines.append("# Informatica Parameter File Support")
|
|
72
|
+
lines.append("# ============================================================")
|
|
73
|
+
lines.append("")
|
|
74
|
+
lines.append("")
|
|
75
|
+
lines.append("def parse_param_file(param_path):")
|
|
76
|
+
lines.append(' """')
|
|
77
|
+
lines.append(" Parse an Informatica .param file into a flat dict of variable names to values.")
|
|
78
|
+
lines.append(" Supports standard Informatica parameter file format:")
|
|
79
|
+
lines.append(" [Global]")
|
|
80
|
+
lines.append(" $$VAR_NAME=value")
|
|
81
|
+
lines.append(" [folder_name.WF:workflow_name.ST:session_name]")
|
|
82
|
+
lines.append(" $$CONN_NAME=value")
|
|
83
|
+
lines.append(' """')
|
|
84
|
+
lines.append(" params = {}")
|
|
85
|
+
lines.append(" if not os.path.exists(param_path):")
|
|
86
|
+
lines.append(" logger.warning(f'Parameter file not found: {param_path}')")
|
|
87
|
+
lines.append(" return params")
|
|
88
|
+
lines.append("")
|
|
89
|
+
lines.append(" current_section = 'Global'")
|
|
90
|
+
lines.append(" with open(param_path, 'r') as f:")
|
|
91
|
+
lines.append(" for line_num, line in enumerate(f, 1):")
|
|
92
|
+
lines.append(" line = line.strip()")
|
|
93
|
+
lines.append(" if not line or line.startswith('#'):")
|
|
94
|
+
lines.append(" continue")
|
|
95
|
+
lines.append(" if line.startswith('[') and line.endswith(']'):")
|
|
96
|
+
lines.append(" current_section = line[1:-1].strip()")
|
|
97
|
+
lines.append(" continue")
|
|
98
|
+
lines.append(" if '=' in line:")
|
|
99
|
+
lines.append(" key, _, value = line.partition('=')")
|
|
100
|
+
lines.append(" key = key.strip()")
|
|
101
|
+
lines.append(" value = value.strip()")
|
|
102
|
+
lines.append(" clean_key = key.lstrip('$')")
|
|
103
|
+
lines.append(" params[clean_key] = value")
|
|
104
|
+
lines.append(" if current_section != 'Global':")
|
|
105
|
+
lines.append(" params[f'{current_section}.{clean_key}'] = value")
|
|
106
|
+
lines.append(" logger.info(f'Loaded {len(params)} parameters from {param_path}')")
|
|
107
|
+
lines.append(" return params")
|
|
108
|
+
lines.append("")
|
|
109
|
+
lines.append("")
|
|
110
|
+
lines.append("def get_param(config, var_name, default=''):")
|
|
111
|
+
lines.append(' """Get a parameter value from config params, then env vars, then default."""')
|
|
112
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
113
|
+
lines.append(" params = config.get('params', {})")
|
|
114
|
+
lines.append(" if clean in params:")
|
|
115
|
+
lines.append(" return params[clean]")
|
|
116
|
+
lines.append(" env_val = os.environ.get(f'INFA_VAR_{clean}')")
|
|
117
|
+
lines.append(" if env_val is not None:")
|
|
118
|
+
lines.append(" return env_val")
|
|
119
|
+
lines.append(" return default")
|
|
120
|
+
lines.append("")
|
|
121
|
+
lines.append("")
|
|
122
|
+
|
|
123
|
+
|
|
62
124
|
def _add_db_functions(lines, data_lib):
|
|
63
125
|
lines.append("# ============================================================")
|
|
64
126
|
lines.append("# Database Operations")
|
|
@@ -1060,14 +1122,26 @@ def _add_expression_helpers(lines):
|
|
|
1060
1122
|
lines.append(" return None")
|
|
1061
1123
|
lines.append("")
|
|
1062
1124
|
lines.append("")
|
|
1063
|
-
lines.append("
|
|
1064
|
-
lines.append(
|
|
1065
|
-
lines.append("
|
|
1125
|
+
lines.append("_param_store = {}")
|
|
1126
|
+
lines.append("")
|
|
1127
|
+
lines.append("")
|
|
1128
|
+
lines.append("def get_variable(var_name, config=None):")
|
|
1129
|
+
lines.append(' """Get workflow/mapping variable value from params, env vars, or param store."""')
|
|
1130
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1131
|
+
lines.append(" if config and 'params' in config:")
|
|
1132
|
+
lines.append(" val = config['params'].get(clean)")
|
|
1133
|
+
lines.append(" if val is not None:")
|
|
1134
|
+
lines.append(" return val")
|
|
1135
|
+
lines.append(" if clean in _param_store:")
|
|
1136
|
+
lines.append(" return _param_store[clean]")
|
|
1137
|
+
lines.append(" return os.environ.get(f'INFA_VAR_{clean}', '')")
|
|
1066
1138
|
lines.append("")
|
|
1067
1139
|
lines.append("")
|
|
1068
1140
|
lines.append("def set_variable(var_name, value):")
|
|
1069
1141
|
lines.append(' """Set workflow/mapping variable value."""')
|
|
1070
|
-
lines.append("
|
|
1142
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1143
|
+
lines.append(" _param_store[clean] = value")
|
|
1144
|
+
lines.append(" os.environ[f'INFA_VAR_{clean}'] = str(value)")
|
|
1071
1145
|
lines.append(" return value")
|
|
1072
1146
|
lines.append("")
|
|
1073
1147
|
lines.append("")
|
{informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -4,11 +4,16 @@ from informatica_python.models import (
|
|
|
4
4
|
TransformationDef, ConnectorDef, InstanceDef, MappletDef,
|
|
5
5
|
)
|
|
6
6
|
from informatica_python.utils.expression_converter import (
|
|
7
|
-
convert_expression,
|
|
7
|
+
convert_expression, convert_expression_vectorized,
|
|
8
|
+
convert_sql_expression, convert_filter_vectorized,
|
|
8
9
|
parse_join_condition, parse_lookup_condition,
|
|
9
10
|
parse_aggregate_expression, PANDAS_AGG_MAP,
|
|
10
11
|
)
|
|
11
12
|
from informatica_python.utils.datatype_map import get_python_type
|
|
13
|
+
from informatica_python.utils.lib_adapters import (
|
|
14
|
+
lib_merge, lib_sort, lib_groupby_agg, lib_groupby_first,
|
|
15
|
+
lib_concat, lib_empty_df, lib_copy, lib_rank,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
def _inline_mapplets(mapping, folder):
|
|
@@ -184,6 +189,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
184
189
|
lines.append(f"Auto-generated by informatica-python")
|
|
185
190
|
lines.append('"""')
|
|
186
191
|
lines.append("")
|
|
192
|
+
lines.append("import numpy as np")
|
|
187
193
|
lines.append("from helper_functions import *")
|
|
188
194
|
lines.append("")
|
|
189
195
|
lines.append("")
|
|
@@ -266,7 +272,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
266
272
|
for tx in processing_order:
|
|
267
273
|
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
268
274
|
continue
|
|
269
|
-
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
|
|
275
|
+
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
|
|
270
276
|
|
|
271
277
|
for tgt_name, tgt_def in target_map.items():
|
|
272
278
|
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
|
|
@@ -277,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
277
283
|
lines.append("")
|
|
278
284
|
lines.append("")
|
|
279
285
|
lines.append("if __name__ == '__main__':")
|
|
280
|
-
lines.append("
|
|
286
|
+
lines.append(" import argparse as _ap")
|
|
287
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
288
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
289
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
290
|
+
lines.append(" _args = _parser.parse_args()")
|
|
291
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
281
292
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
282
293
|
lines.append("")
|
|
283
294
|
|
|
@@ -560,7 +571,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
560
571
|
lines.append("")
|
|
561
572
|
|
|
562
573
|
|
|
563
|
-
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map):
|
|
574
|
+
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
|
|
564
575
|
tx_safe = _safe_name(tx.name)
|
|
565
576
|
tx_type = tx.type.lower().strip()
|
|
566
577
|
|
|
@@ -584,21 +595,21 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
584
595
|
lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
|
|
585
596
|
|
|
586
597
|
if tx_type == "expression":
|
|
587
|
-
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
598
|
+
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
588
599
|
elif tx_type == "filter":
|
|
589
|
-
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
600
|
+
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
590
601
|
elif tx_type in ("aggregator",):
|
|
591
|
-
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
602
|
+
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
592
603
|
elif tx_type == "sorter":
|
|
593
|
-
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
604
|
+
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
594
605
|
elif tx_type in ("joiner",):
|
|
595
|
-
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
|
|
606
|
+
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
|
|
596
607
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
597
|
-
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
608
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
598
609
|
elif tx_type == "router":
|
|
599
610
|
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
600
611
|
elif tx_type in ("union",):
|
|
601
|
-
_gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs)
|
|
612
|
+
_gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib)
|
|
602
613
|
elif tx_type in ("update strategy",):
|
|
603
614
|
_gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs)
|
|
604
615
|
elif tx_type == "sequence generator":
|
|
@@ -606,9 +617,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
606
617
|
elif tx_type in ("normalizer",):
|
|
607
618
|
_gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
608
619
|
elif tx_type in ("rank",):
|
|
609
|
-
_gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
620
|
+
_gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
610
621
|
elif tx_type in ("custom transformation",):
|
|
611
|
-
_gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
|
|
622
|
+
_gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib)
|
|
612
623
|
elif tx_type in ("stored procedure",):
|
|
613
624
|
_gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs)
|
|
614
625
|
elif tx_type in ("java",):
|
|
@@ -617,44 +628,48 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
617
628
|
_gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
618
629
|
else:
|
|
619
630
|
lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
|
|
620
|
-
|
|
631
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
632
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
621
633
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
622
634
|
|
|
623
635
|
lines.append("")
|
|
624
636
|
|
|
625
637
|
|
|
626
|
-
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
627
|
-
|
|
638
|
+
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
639
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
640
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
628
641
|
has_expressions = False
|
|
629
642
|
for fld in tx.fields:
|
|
630
643
|
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
631
644
|
has_expressions = True
|
|
632
|
-
|
|
645
|
+
expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
|
|
633
646
|
lines.append(f" # {fld.name} = {fld.expression}")
|
|
634
647
|
if fld.porttype and "OUTPUT" in fld.porttype.upper() and "INPUT" not in fld.porttype.upper():
|
|
635
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {
|
|
648
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
636
649
|
else:
|
|
637
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {
|
|
650
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
638
651
|
if not has_expressions:
|
|
639
652
|
lines.append(f" # Pass-through expression (no transformations)")
|
|
640
653
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
641
654
|
|
|
642
655
|
|
|
643
|
-
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
656
|
+
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
644
657
|
filter_condition = ""
|
|
645
658
|
for attr in tx.attributes:
|
|
646
659
|
if attr.name == "Filter Condition":
|
|
647
660
|
filter_condition = attr.value
|
|
648
661
|
if filter_condition:
|
|
649
|
-
|
|
662
|
+
expr_vec = convert_filter_vectorized(filter_condition, input_df)
|
|
650
663
|
lines.append(f" # Filter: {filter_condition}")
|
|
651
|
-
|
|
664
|
+
copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
|
|
665
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
652
666
|
else:
|
|
653
|
-
|
|
667
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
668
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
654
669
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
655
670
|
|
|
656
671
|
|
|
657
|
-
def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
672
|
+
def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
658
673
|
group_by_ports = []
|
|
659
674
|
agg_ports = []
|
|
660
675
|
for fld in tx.fields:
|
|
@@ -686,22 +701,18 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
686
701
|
|
|
687
702
|
if group_by_ports and agg_dict:
|
|
688
703
|
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
|
|
692
|
-
|
|
693
|
-
lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
|
|
694
|
-
for out_name, spec in agg_spec.items():
|
|
695
|
-
lines.append(f" {out_name}={spec},")
|
|
696
|
-
lines.append(f" )")
|
|
704
|
+
agg_expr = lib_groupby_agg(data_lib, input_df, group_by_ports, agg_dict)
|
|
705
|
+
lines.append(f" df_{tx_safe} = {agg_expr}")
|
|
697
706
|
|
|
698
707
|
if rename_map:
|
|
699
708
|
lines.append(f" df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
|
|
700
709
|
elif group_by_ports:
|
|
701
710
|
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
702
|
-
|
|
711
|
+
first_expr = lib_groupby_first(data_lib, input_df, group_by_ports)
|
|
712
|
+
lines.append(f" df_{tx_safe} = {first_expr}")
|
|
703
713
|
else:
|
|
704
|
-
|
|
714
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
715
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
705
716
|
|
|
706
717
|
for col_name, expr_text in computed_aggs:
|
|
707
718
|
expr_py = convert_expression(expr_text)
|
|
@@ -711,20 +722,22 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
711
722
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
712
723
|
|
|
713
724
|
|
|
714
|
-
def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
725
|
+
def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
715
726
|
sort_keys = []
|
|
716
727
|
sort_dirs = []
|
|
717
728
|
for fld in tx.fields:
|
|
718
729
|
sort_keys.append(fld.name)
|
|
719
730
|
sort_dirs.append(True)
|
|
720
731
|
if sort_keys:
|
|
721
|
-
|
|
732
|
+
sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
|
|
733
|
+
lines.append(f" df_{tx_safe} = {sort_expr}")
|
|
722
734
|
else:
|
|
723
|
-
|
|
735
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
736
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
724
737
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
725
738
|
|
|
726
739
|
|
|
727
|
-
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
|
|
740
|
+
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None, data_lib="pandas"):
|
|
728
741
|
join_type = "inner"
|
|
729
742
|
join_condition = ""
|
|
730
743
|
for attr in tx.attributes:
|
|
@@ -778,33 +791,29 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
778
791
|
|
|
779
792
|
lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
|
|
780
793
|
if left_keys and right_keys:
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
lines.append(f"
|
|
784
|
-
lines.append(f" right_on={right_keys},")
|
|
785
|
-
lines.append(f" how='{join_type}',")
|
|
786
|
-
lines.append(f" suffixes=('', '_master')")
|
|
787
|
-
lines.append(f" )")
|
|
794
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master,
|
|
795
|
+
left_on=left_keys, right_on=right_keys, how=join_type)
|
|
796
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
788
797
|
else:
|
|
789
798
|
common_cols = [f for f in detail_fields if f in master_fields]
|
|
790
799
|
if common_cols:
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
lines.append(f"
|
|
794
|
-
lines.append(f" how='{join_type}',")
|
|
795
|
-
lines.append(f" suffixes=('', '_master')")
|
|
796
|
-
lines.append(f" )")
|
|
800
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master,
|
|
801
|
+
on=common_cols, how=join_type)
|
|
802
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
797
803
|
else:
|
|
798
|
-
|
|
804
|
+
merge_expr = lib_merge(data_lib, df_detail, df_master, how=join_type)
|
|
805
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
799
806
|
elif len(src_list) == 1:
|
|
800
807
|
df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
|
|
801
|
-
|
|
808
|
+
copy_expr = lib_copy(data_lib, df1)
|
|
809
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
802
810
|
else:
|
|
803
|
-
|
|
811
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
812
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
804
813
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
805
814
|
|
|
806
815
|
|
|
807
|
-
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
816
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
808
817
|
lookup_table = ""
|
|
809
818
|
lookup_sql = ""
|
|
810
819
|
lookup_condition = ""
|
|
@@ -844,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
844
853
|
elif lookup_table:
|
|
845
854
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
846
855
|
else:
|
|
847
|
-
|
|
856
|
+
empty_expr = lib_empty_df(data_lib)
|
|
857
|
+
lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
|
|
848
858
|
|
|
849
859
|
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
850
860
|
|
|
@@ -862,13 +872,10 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
862
872
|
else:
|
|
863
873
|
lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
|
|
864
874
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
lines.append(f"
|
|
869
|
-
lines.append(f" how='left',")
|
|
870
|
-
lines.append(f" suffixes=('', '_lkp')")
|
|
871
|
-
lines.append(f" )")
|
|
875
|
+
merge_expr = lib_merge(data_lib, input_df, f"df_lkp_{tx_safe}",
|
|
876
|
+
left_on=input_keys, right_on=lookup_keys,
|
|
877
|
+
how="left", suffixes=("", "_lkp"))
|
|
878
|
+
lines.append(f" df_{tx_safe} = {merge_expr}")
|
|
872
879
|
|
|
873
880
|
drop_cols = [k for k in lookup_keys if k not in input_keys]
|
|
874
881
|
if drop_cols:
|
|
@@ -910,7 +917,7 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
910
917
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
911
918
|
|
|
912
919
|
|
|
913
|
-
def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
|
|
920
|
+
def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib="pandas"):
|
|
914
921
|
dfs_to_union = []
|
|
915
922
|
for src in input_sources:
|
|
916
923
|
df_name = source_dfs.get(src, f"df_{_safe_name(src)}")
|
|
@@ -918,11 +925,14 @@ def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
|
|
|
918
925
|
|
|
919
926
|
if len(dfs_to_union) > 1:
|
|
920
927
|
df_list = ", ".join(dfs_to_union)
|
|
921
|
-
|
|
928
|
+
concat_expr = lib_concat(data_lib, df_list)
|
|
929
|
+
lines.append(f" df_{tx_safe} = {concat_expr}")
|
|
922
930
|
elif dfs_to_union:
|
|
923
|
-
|
|
931
|
+
copy_expr = lib_copy(data_lib, dfs_to_union[0])
|
|
932
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
924
933
|
else:
|
|
925
|
-
|
|
934
|
+
empty_expr = lib_empty_df(data_lib)
|
|
935
|
+
lines.append(f" df_{tx_safe} = {empty_expr}")
|
|
926
936
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
927
937
|
|
|
928
938
|
|
|
@@ -1037,7 +1047,7 @@ def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1037
1047
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1038
1048
|
|
|
1039
1049
|
|
|
1040
|
-
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
1050
|
+
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
1041
1051
|
rank_port = None
|
|
1042
1052
|
group_by_ports = []
|
|
1043
1053
|
top_bottom = "TOP"
|
|
@@ -1080,19 +1090,15 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1080
1090
|
rank_out_field = fld.name
|
|
1081
1091
|
break
|
|
1082
1092
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
elif rank_port:
|
|
1093
|
-
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1094
|
-
lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
|
|
1095
|
-
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1093
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
1094
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1095
|
+
if rank_port:
|
|
1096
|
+
rank_code = lib_rank(data_lib, f"df_{tx_safe}", group_by_ports, rank_port, ascending, rank_out_field)
|
|
1097
|
+
if group_by_ports:
|
|
1098
|
+
lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
|
|
1099
|
+
else:
|
|
1100
|
+
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1101
|
+
lines.append(f" {rank_code}")
|
|
1096
1102
|
if top_n:
|
|
1097
1103
|
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1098
1104
|
else:
|
|
@@ -1100,7 +1106,7 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1100
1106
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1101
1107
|
|
|
1102
1108
|
|
|
1103
|
-
def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
|
|
1109
|
+
def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, data_lib="pandas"):
|
|
1104
1110
|
is_union = False
|
|
1105
1111
|
output_fields = []
|
|
1106
1112
|
input_groups = {}
|
|
@@ -1108,11 +1114,9 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
1108
1114
|
for fld in tx.fields:
|
|
1109
1115
|
if "OUTPUT" in (fld.porttype or "").upper():
|
|
1110
1116
|
output_fields.append(fld)
|
|
1111
|
-
group_suffix_match = None
|
|
1112
1117
|
import re
|
|
1113
1118
|
m = re.match(r'^(.+?)(\d+)$', fld.name)
|
|
1114
1119
|
if m and "INPUT" in (fld.porttype or "").upper():
|
|
1115
|
-
base_name = m.group(1)
|
|
1116
1120
|
group_idx = m.group(2)
|
|
1117
1121
|
if group_idx not in input_groups:
|
|
1118
1122
|
input_groups[group_idx] = []
|
|
@@ -1128,14 +1132,18 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
1128
1132
|
dfs_to_union.append(df_name)
|
|
1129
1133
|
if len(dfs_to_union) > 1:
|
|
1130
1134
|
df_list = ", ".join(dfs_to_union)
|
|
1131
|
-
|
|
1135
|
+
concat_expr = lib_concat(data_lib, df_list)
|
|
1136
|
+
lines.append(f" df_{tx_safe} = {concat_expr}")
|
|
1132
1137
|
elif dfs_to_union:
|
|
1133
|
-
|
|
1138
|
+
copy_expr = lib_copy(data_lib, dfs_to_union[0])
|
|
1139
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1134
1140
|
else:
|
|
1135
|
-
|
|
1141
|
+
empty_expr = lib_empty_df(data_lib)
|
|
1142
|
+
lines.append(f" df_{tx_safe} = {empty_expr}")
|
|
1136
1143
|
else:
|
|
1137
1144
|
lines.append(f" # Custom transformation: {tx.name}")
|
|
1138
|
-
|
|
1145
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
1146
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
1139
1147
|
|
|
1140
1148
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1141
1149
|
|
{informatica_python-1.4.2 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py
RENAMED
|
@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
|
|
|
51
51
|
|
|
52
52
|
lines.append("")
|
|
53
53
|
lines.append("if __name__ == '__main__':")
|
|
54
|
-
lines.append("
|
|
54
|
+
lines.append(" import argparse as _ap")
|
|
55
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
56
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
57
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
58
|
+
lines.append(" _args = _parser.parse_args()")
|
|
59
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
55
60
|
lines.append(" success = run_workflow(config)")
|
|
56
61
|
lines.append(" sys.exit(0 if success else 1)")
|
|
57
62
|
lines.append("")
|