informatica-python 1.5.0__tar.gz → 1.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.5.0 → informatica_python-1.5.2}/PKG-INFO +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/__init__.py +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/cli.py +1 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/converter.py +10 -4
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/mapping_gen.py +18 -9
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/workflow_gen.py +6 -1
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/expression_converter.py +54 -11
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.2}/pyproject.toml +1 -1
- {informatica_python-1.5.0 → informatica_python-1.5.2}/tests/test_integration.py +31 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/LICENSE +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/README.md +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/helper_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/models.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/parser.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/lib_adapters.py +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/setup.cfg +0 -0
- {informatica_python-1.5.0 → informatica_python-1.5.2}/tests/test_converter.py +0 -0
|
@@ -33,7 +33,8 @@ class InformaticaConverter:
|
|
|
33
33
|
return self._powermart_to_dict(self.powermart)
|
|
34
34
|
|
|
35
35
|
def convert(self, file_path: str, output_dir: str = "output",
|
|
36
|
-
output_zip: Optional[str] = None
|
|
36
|
+
output_zip: Optional[str] = None,
|
|
37
|
+
param_file: Optional[str] = None) -> str:
|
|
37
38
|
self.powermart = self.parser.parse_file(file_path)
|
|
38
39
|
|
|
39
40
|
if not self.powermart.repositories:
|
|
@@ -47,7 +48,7 @@ class InformaticaConverter:
|
|
|
47
48
|
raise ValueError("No folder found in XML file")
|
|
48
49
|
|
|
49
50
|
if len(all_folders) == 1:
|
|
50
|
-
return self._convert_folder(all_folders[0], output_dir, output_zip)
|
|
51
|
+
return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
|
|
51
52
|
|
|
52
53
|
result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
|
|
53
54
|
for folder in all_folders:
|
|
@@ -56,7 +57,7 @@ class InformaticaConverter:
|
|
|
56
57
|
if output_zip:
|
|
57
58
|
base, ext = os.path.splitext(output_zip)
|
|
58
59
|
folder_zip = f"{base}_{folder.name}{ext}"
|
|
59
|
-
self._convert_folder(folder, folder_dir, folder_zip)
|
|
60
|
+
self._convert_folder(folder, folder_dir, folder_zip, param_file)
|
|
60
61
|
return result_path
|
|
61
62
|
|
|
62
63
|
def convert_string(self, xml_string: str, output_dir: str = "output",
|
|
@@ -87,7 +88,12 @@ class InformaticaConverter:
|
|
|
87
88
|
return result_path
|
|
88
89
|
|
|
89
90
|
def _convert_folder(self, folder: FolderDef, output_dir: str,
|
|
90
|
-
output_zip: Optional[str] = None
|
|
91
|
+
output_zip: Optional[str] = None,
|
|
92
|
+
param_file: Optional[str] = None) -> str:
|
|
93
|
+
if param_file:
|
|
94
|
+
from informatica_python.utils.expression_converter import parse_param_file
|
|
95
|
+
parse_param_file(param_file)
|
|
96
|
+
|
|
91
97
|
files = {}
|
|
92
98
|
|
|
93
99
|
files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -283,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
283
283
|
lines.append("")
|
|
284
284
|
lines.append("")
|
|
285
285
|
lines.append("if __name__ == '__main__':")
|
|
286
|
-
lines.append("
|
|
286
|
+
lines.append(" import argparse as _ap")
|
|
287
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
288
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
289
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
290
|
+
lines.append(" _args = _parser.parse_args()")
|
|
291
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
287
292
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
288
293
|
lines.append("")
|
|
289
294
|
|
|
@@ -590,9 +595,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
590
595
|
lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
|
|
591
596
|
|
|
592
597
|
if tx_type == "expression":
|
|
593
|
-
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
598
|
+
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
594
599
|
elif tx_type == "filter":
|
|
595
|
-
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
600
|
+
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
596
601
|
elif tx_type in ("aggregator",):
|
|
597
602
|
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
598
603
|
elif tx_type == "sorter":
|
|
@@ -630,8 +635,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
630
635
|
lines.append("")
|
|
631
636
|
|
|
632
637
|
|
|
633
|
-
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
634
|
-
|
|
638
|
+
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
639
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
640
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
635
641
|
has_expressions = False
|
|
636
642
|
for fld in tx.fields:
|
|
637
643
|
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
@@ -647,7 +653,7 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
647
653
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
648
654
|
|
|
649
655
|
|
|
650
|
-
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
656
|
+
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
651
657
|
filter_condition = ""
|
|
652
658
|
for attr in tx.attributes:
|
|
653
659
|
if attr.name == "Filter Condition":
|
|
@@ -655,9 +661,11 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
655
661
|
if filter_condition:
|
|
656
662
|
expr_vec = convert_filter_vectorized(filter_condition, input_df)
|
|
657
663
|
lines.append(f" # Filter: {filter_condition}")
|
|
658
|
-
|
|
664
|
+
copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
|
|
665
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
659
666
|
else:
|
|
660
|
-
|
|
667
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
668
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
661
669
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
662
670
|
|
|
663
671
|
|
|
@@ -845,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
845
853
|
elif lookup_table:
|
|
846
854
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
847
855
|
else:
|
|
848
|
-
|
|
856
|
+
empty_expr = lib_empty_df(data_lib)
|
|
857
|
+
lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
|
|
849
858
|
|
|
850
859
|
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
851
860
|
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/workflow_gen.py
RENAMED
|
@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
|
|
|
51
51
|
|
|
52
52
|
lines.append("")
|
|
53
53
|
lines.append("if __name__ == '__main__':")
|
|
54
|
-
lines.append("
|
|
54
|
+
lines.append(" import argparse as _ap")
|
|
55
|
+
lines.append(" _parser = _ap.ArgumentParser()")
|
|
56
|
+
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
57
|
+
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
58
|
+
lines.append(" _args = _parser.parse_args()")
|
|
59
|
+
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
55
60
|
lines.append(" success = run_workflow(config)")
|
|
56
61
|
lines.append(" sys.exit(0 if success else 1)")
|
|
57
62
|
lines.append("")
|
|
@@ -295,26 +295,69 @@ def _vectorize_value(val, df_var="df"):
|
|
|
295
295
|
return val
|
|
296
296
|
|
|
297
297
|
|
|
298
|
-
def
|
|
299
|
-
c =
|
|
298
|
+
def _vectorize_simple(part, df_var):
|
|
299
|
+
c = part.strip()
|
|
300
|
+
|
|
300
301
|
c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
|
|
301
302
|
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
302
|
-
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
303
|
-
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
304
303
|
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
|
|
305
304
|
lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
|
|
306
|
-
c = re.sub(r'
|
|
307
|
-
lambda m: f'{df_var}["{m.group(1)}"]'
|
|
308
|
-
|
|
309
|
-
'str', 'int', 'float',
|
|
310
|
-
) else m.group(1), c)
|
|
305
|
+
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
306
|
+
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
307
|
+
|
|
311
308
|
c = re.sub(r'<>', '!=', c)
|
|
312
309
|
c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
|
|
313
|
-
|
|
314
|
-
|
|
310
|
+
|
|
311
|
+
skip_words = {
|
|
312
|
+
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
|
|
313
|
+
'str', 'int', 'float', 'isna', 'notna', 'fillna',
|
|
314
|
+
}
|
|
315
|
+
df_base = re.escape(df_var)
|
|
316
|
+
c = re.sub(r'(?<!["\w])(?!' + df_base + r'\b)([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.\[])',
|
|
317
|
+
lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in skip_words else m.group(1),
|
|
318
|
+
c)
|
|
315
319
|
return c
|
|
316
320
|
|
|
317
321
|
|
|
322
|
+
def _vectorize_condition(cond, df_var="df"):
|
|
323
|
+
c = cond.strip()
|
|
324
|
+
|
|
325
|
+
tokens = re.split(r'\b(AND|OR)\b', c, flags=re.IGNORECASE)
|
|
326
|
+
|
|
327
|
+
parts = []
|
|
328
|
+
ops = []
|
|
329
|
+
for tok in tokens:
|
|
330
|
+
stripped = tok.strip()
|
|
331
|
+
if stripped.upper() in ('AND', 'OR'):
|
|
332
|
+
ops.append('&' if stripped.upper() == 'AND' else '|')
|
|
333
|
+
elif stripped:
|
|
334
|
+
parts.append(stripped)
|
|
335
|
+
|
|
336
|
+
if not parts:
|
|
337
|
+
return "True"
|
|
338
|
+
|
|
339
|
+
vectorized = []
|
|
340
|
+
for part in parts:
|
|
341
|
+
negate = False
|
|
342
|
+
inner = part.strip()
|
|
343
|
+
if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
|
|
344
|
+
negate = True
|
|
345
|
+
inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
|
|
346
|
+
v = _vectorize_simple(inner, df_var)
|
|
347
|
+
if negate:
|
|
348
|
+
v = f"~({v})"
|
|
349
|
+
vectorized.append(v)
|
|
350
|
+
|
|
351
|
+
if len(vectorized) == 1:
|
|
352
|
+
return vectorized[0]
|
|
353
|
+
|
|
354
|
+
result_parts = [f"({vectorized[0]})"]
|
|
355
|
+
for i, op in enumerate(ops):
|
|
356
|
+
result_parts.append(f" {op} ")
|
|
357
|
+
result_parts.append(f"({vectorized[i + 1]})")
|
|
358
|
+
return "".join(result_parts)
|
|
359
|
+
|
|
360
|
+
|
|
318
361
|
def convert_filter_expression(expr):
|
|
319
362
|
if not expr or not expr.strip():
|
|
320
363
|
return "True"
|
|
@@ -202,15 +202,46 @@ class TestFilterVectorized:
|
|
|
202
202
|
result = convert_filter_vectorized("VALUE > 50", "df_src")
|
|
203
203
|
assert ">" in result
|
|
204
204
|
assert "50" in result
|
|
205
|
+
assert 'df_src["VALUE"]' in result
|
|
206
|
+
|
|
207
|
+
def test_and_condition(self):
|
|
208
|
+
result = convert_filter_vectorized("A > 1 AND B < 2", "df")
|
|
209
|
+
assert "&" in result
|
|
210
|
+
assert 'df["A"]' in result
|
|
211
|
+
assert 'df["B"]' in result
|
|
212
|
+
assert "AND" not in result
|
|
213
|
+
assert "(df[" in result
|
|
214
|
+
|
|
215
|
+
def test_or_condition(self):
|
|
216
|
+
result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
|
|
217
|
+
assert "|" in result
|
|
218
|
+
assert "(df[" in result
|
|
219
|
+
|
|
220
|
+
def test_not_condition(self):
|
|
221
|
+
result = convert_filter_vectorized("NOT A = 1", "df")
|
|
222
|
+
assert "~(" in result
|
|
223
|
+
assert 'df["A"]' in result
|
|
224
|
+
assert "==" in result
|
|
205
225
|
|
|
206
226
|
def test_is_null_filter(self):
|
|
207
227
|
result = convert_filter_vectorized("NAME IS NULL", "df_src")
|
|
208
228
|
assert ".isna()" in result
|
|
229
|
+
assert 'df_src["NAME"]' in result
|
|
230
|
+
|
|
231
|
+
def test_is_not_null_filter(self):
|
|
232
|
+
result = convert_filter_vectorized("NAME IS NOT NULL", "df_src")
|
|
233
|
+
assert ".notna()" in result
|
|
209
234
|
|
|
210
235
|
def test_empty_filter(self):
|
|
211
236
|
assert convert_filter_vectorized("") == "True"
|
|
212
237
|
assert convert_filter_vectorized(None) == "True"
|
|
213
238
|
|
|
239
|
+
def test_compound_iif(self):
|
|
240
|
+
result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
|
|
241
|
+
assert "np.where" in result
|
|
242
|
+
assert "&" in result
|
|
243
|
+
assert "(" in result
|
|
244
|
+
|
|
214
245
|
|
|
215
246
|
class TestLibAdapters:
|
|
216
247
|
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/error_log_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/helper_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/sql_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/lib_adapters.py
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|