informatica-python 1.5.0__tar.gz → 1.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.5.0 → informatica_python-1.5.1}/PKG-INFO +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/__init__.py +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/cli.py +1 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/converter.py +6 -4
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py +18 -9
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py +6 -1
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/expression_converter.py +16 -9
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.1}/pyproject.toml +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.1}/tests/test_integration.py +22 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/LICENSE +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/README.md +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/helper_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/models.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/parser.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/lib_adapters.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/setup.cfg +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.1}/tests/test_converter.py +0 -0
|
@@ -33,7 +33,8 @@ class InformaticaConverter:
|
|
|
33
33
|
return self._powermart_to_dict(self.powermart)
|
|
34
34
|
|
|
35
35
|
def convert(self, file_path: str, output_dir: str = "output",
|
|
36
|
-
output_zip: Optional[str] = None
|
|
36
|
+
output_zip: Optional[str] = None,
|
|
37
|
+
param_file: Optional[str] = None) -> str:
|
|
37
38
|
self.powermart = self.parser.parse_file(file_path)
|
|
38
39
|
|
|
39
40
|
if not self.powermart.repositories:
|
|
@@ -47,7 +48,7 @@ class InformaticaConverter:
|
|
|
47
48
|
raise ValueError("No folder found in XML file")
|
|
48
49
|
|
|
49
50
|
if len(all_folders) == 1:
|
|
50
|
-
return self._convert_folder(all_folders[0], output_dir, output_zip)
|
|
51
|
+
return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
|
|
51
52
|
|
|
52
53
|
result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
|
|
53
54
|
for folder in all_folders:
|
|
@@ -56,7 +57,7 @@ class InformaticaConverter:
|
|
|
56
57
|
if output_zip:
|
|
57
58
|
base, ext = os.path.splitext(output_zip)
|
|
58
59
|
folder_zip = f"{base}_{folder.name}{ext}"
|
|
59
|
-
self._convert_folder(folder, folder_dir, folder_zip)
|
|
60
|
+
self._convert_folder(folder, folder_dir, folder_zip, param_file)
|
|
60
61
|
return result_path
|
|
61
62
|
|
|
62
63
|
def convert_string(self, xml_string: str, output_dir: str = "output",
|
|
@@ -87,7 +88,8 @@ class InformaticaConverter:
|
|
|
87
88
|
return result_path
|
|
88
89
|
|
|
89
90
|
def _convert_folder(self, folder: FolderDef, output_dir: str,
|
|
90
|
-
output_zip: Optional[str] = None
|
|
91
|
+
output_zip: Optional[str] = None,
|
|
92
|
+
param_file: Optional[str] = None) -> str:
|
|
91
93
|
files = {}
|
|
92
94
|
|
|
93
95
|
files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -283,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
283
283
|
lines.append("")
|
|
284
284
|
lines.append("")
|
|
285
285
|
lines.append("if __name__ == '__main__':")
|
|
286
|
-
lines.append("
|
|
286
|
+
lines.append(" import argparse as _ap")
|
|
287
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
288
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
289
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
290
|
+
lines.append(" _args = _parser.parse_args()")
|
|
291
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
287
292
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
288
293
|
lines.append("")
|
|
289
294
|
|
|
@@ -590,9 +595,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
590
595
|
lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
|
|
591
596
|
|
|
592
597
|
if tx_type == "expression":
|
|
593
|
-
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
598
|
+
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
594
599
|
elif tx_type == "filter":
|
|
595
|
-
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
600
|
+
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
596
601
|
elif tx_type in ("aggregator",):
|
|
597
602
|
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
598
603
|
elif tx_type == "sorter":
|
|
@@ -630,8 +635,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
630
635
|
lines.append("")
|
|
631
636
|
|
|
632
637
|
|
|
633
|
-
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
634
|
-
|
|
638
|
+
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
639
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
640
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
635
641
|
has_expressions = False
|
|
636
642
|
for fld in tx.fields:
|
|
637
643
|
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
@@ -647,7 +653,7 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
647
653
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
648
654
|
|
|
649
655
|
|
|
650
|
-
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
656
|
+
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
651
657
|
filter_condition = ""
|
|
652
658
|
for attr in tx.attributes:
|
|
653
659
|
if attr.name == "Filter Condition":
|
|
@@ -655,9 +661,11 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
655
661
|
if filter_condition:
|
|
656
662
|
expr_vec = convert_filter_vectorized(filter_condition, input_df)
|
|
657
663
|
lines.append(f" # Filter: {filter_condition}")
|
|
658
|
-
|
|
664
|
+
copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
|
|
665
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
659
666
|
else:
|
|
660
|
-
|
|
667
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
668
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
661
669
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
662
670
|
|
|
663
671
|
|
|
@@ -845,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
845
853
|
elif lookup_table:
|
|
846
854
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
847
855
|
else:
|
|
848
|
-
|
|
856
|
+
empty_expr = lib_empty_df(data_lib)
|
|
857
|
+
lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
|
|
849
858
|
|
|
850
859
|
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
851
860
|
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py
RENAMED
|
@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
|
|
|
51
51
|
|
|
52
52
|
lines.append("")
|
|
53
53
|
lines.append("if __name__ == '__main__':")
|
|
54
|
-
lines.append("
|
|
54
|
+
lines.append(" import argparse as _ap")
|
|
55
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
56
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
57
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
58
|
+
lines.append(" _args = _parser.parse_args()")
|
|
59
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
55
60
|
lines.append(" success = run_workflow(config)")
|
|
56
61
|
lines.append(" sys.exit(0 if success else 1)")
|
|
57
62
|
lines.append("")
|
|
@@ -297,21 +297,28 @@ def _vectorize_value(val, df_var="df"):
|
|
|
297
297
|
|
|
298
298
|
def _vectorize_condition(cond, df_var="df"):
|
|
299
299
|
c = cond.strip()
|
|
300
|
+
|
|
300
301
|
c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
|
|
301
302
|
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
302
|
-
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
303
|
-
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
304
303
|
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
|
|
305
304
|
lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
|
|
306
|
-
c = re.sub(r'
|
|
307
|
-
lambda m: f'{df_var}["{m.group(1)}"]'
|
|
308
|
-
|
|
309
|
-
'str', 'int', 'float',
|
|
310
|
-
) else m.group(1), c)
|
|
311
|
-
c = re.sub(r'<>', '!=', c)
|
|
312
|
-
c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
|
|
305
|
+
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
306
|
+
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
307
|
+
|
|
313
308
|
c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
|
|
314
309
|
c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
|
|
310
|
+
c = re.sub(r'\bNOT\s+', ' ~', c, flags=re.IGNORECASE)
|
|
311
|
+
c = re.sub(r'<>', '!=', c)
|
|
312
|
+
c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
|
|
313
|
+
|
|
314
|
+
skip_words = {
|
|
315
|
+
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
|
|
316
|
+
'str', 'int', 'float', 'isna', 'notna', 'fillna',
|
|
317
|
+
}
|
|
318
|
+
df_base = re.escape(df_var)
|
|
319
|
+
c = re.sub(r'(?<!["\w])(?!' + df_base + r'\b)([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.\[])',
|
|
320
|
+
lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in skip_words else m.group(1),
|
|
321
|
+
c)
|
|
315
322
|
return c
|
|
316
323
|
|
|
317
324
|
|
|
@@ -202,15 +202,37 @@ class TestFilterVectorized:
|
|
|
202
202
|
result = convert_filter_vectorized("VALUE > 50", "df_src")
|
|
203
203
|
assert ">" in result
|
|
204
204
|
assert "50" in result
|
|
205
|
+
assert 'df_src["VALUE"]' in result
|
|
206
|
+
|
|
207
|
+
def test_and_condition(self):
|
|
208
|
+
result = convert_filter_vectorized("A > 1 AND B < 2", "df")
|
|
209
|
+
assert "&" in result
|
|
210
|
+
assert 'df["A"]' in result
|
|
211
|
+
assert 'df["B"]' in result
|
|
212
|
+
assert "AND" not in result
|
|
213
|
+
|
|
214
|
+
def test_or_condition(self):
|
|
215
|
+
result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
|
|
216
|
+
assert "|" in result
|
|
205
217
|
|
|
206
218
|
def test_is_null_filter(self):
|
|
207
219
|
result = convert_filter_vectorized("NAME IS NULL", "df_src")
|
|
208
220
|
assert ".isna()" in result
|
|
221
|
+
assert 'df_src["NAME"]' in result
|
|
222
|
+
|
|
223
|
+
def test_is_not_null_filter(self):
|
|
224
|
+
result = convert_filter_vectorized("NAME IS NOT NULL", "df_src")
|
|
225
|
+
assert ".notna()" in result
|
|
209
226
|
|
|
210
227
|
def test_empty_filter(self):
|
|
211
228
|
assert convert_filter_vectorized("") == "True"
|
|
212
229
|
assert convert_filter_vectorized(None) == "True"
|
|
213
230
|
|
|
231
|
+
def test_compound_iif(self):
|
|
232
|
+
result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
|
|
233
|
+
assert "np.where" in result
|
|
234
|
+
assert "&" in result
|
|
235
|
+
|
|
214
236
|
|
|
215
237
|
class TestLibAdapters:
|
|
216
238
|
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/error_log_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/helper_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/sql_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/lib_adapters.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|