informatica-python 1.9.5__tar.gz → 1.9.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.9.5 → informatica_python-1.9.7}/PKG-INFO +1 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/__init__.py +1 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/helper_gen.py +1 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/mapping_gen.py +18 -13
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/models.py +3 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/parser.py +3 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/expression_converter.py +40 -11
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/pyproject.toml +1 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_integration.py +204 -1
- {informatica_python-1.9.5 → informatica_python-1.9.7}/LICENSE +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/README.md +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/cli.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/converter.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/workflow_gen.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/lib_adapters.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/sql_dialect.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/setup.cfg +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_converter.py +0 -0
- {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_expressions.py +0 -0
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -360,7 +360,7 @@ def _add_db_functions(lines, data_lib):
|
|
|
360
360
|
lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
|
|
361
361
|
lines.append(" conn = get_db_connection(config, connection_name)")
|
|
362
362
|
lines.append(" try:")
|
|
363
|
-
lines.append(" if hasattr(conn, '
|
|
363
|
+
lines.append(" if hasattr(conn, 'dialect'):")
|
|
364
364
|
lines.append(" from sqlalchemy import text")
|
|
365
365
|
lines.append(" conn.execute(text(sql))")
|
|
366
366
|
lines.append(" conn.commit()")
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -316,7 +316,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
316
316
|
if t.type in ("Source Qualifier", "Application Source Qualifier")]
|
|
317
317
|
if sq_transforms:
|
|
318
318
|
for sq in sq_transforms:
|
|
319
|
-
_generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
|
|
319
|
+
_generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides, mapping_name=mapping.name, folder_name=folder.name)
|
|
320
320
|
else:
|
|
321
321
|
for src_name, src_def in source_map.items():
|
|
322
322
|
safe = _safe_name(src_name)
|
|
@@ -347,7 +347,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
347
347
|
for tx in processing_order:
|
|
348
348
|
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
349
349
|
continue
|
|
350
|
-
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
|
|
350
|
+
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib, mapping_name=mapping.name, folder_name=folder.name)
|
|
351
351
|
|
|
352
352
|
for tgt_name, tgt_def in target_map.items():
|
|
353
353
|
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
|
|
@@ -481,7 +481,12 @@ def _emit_flatfile_read(lines, var_name, src_def, indent=" ", file_path_overr
|
|
|
481
481
|
if fc.get("fixed_width"):
|
|
482
482
|
widths = []
|
|
483
483
|
for fld in src_def.fields:
|
|
484
|
-
|
|
484
|
+
if fld.physical_length and fld.physical_length > 0:
|
|
485
|
+
widths.append(fld.physical_length)
|
|
486
|
+
elif fld.precision:
|
|
487
|
+
widths.append(fld.precision)
|
|
488
|
+
else:
|
|
489
|
+
widths.append(10)
|
|
485
490
|
lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
|
|
486
491
|
lines.append(f"{indent} {default_path},")
|
|
487
492
|
lines.append(f"{indent} widths={widths},")
|
|
@@ -626,7 +631,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
|
|
|
626
631
|
return ordered
|
|
627
632
|
|
|
628
633
|
|
|
629
|
-
def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
|
|
634
|
+
def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None, mapping_name="", folder_name=""):
|
|
630
635
|
sq_safe = _safe_name(sq.name)
|
|
631
636
|
sql_override = ""
|
|
632
637
|
pre_sql = ""
|
|
@@ -665,7 +670,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
665
670
|
if not connected_sources:
|
|
666
671
|
sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
|
|
667
672
|
if sql_override:
|
|
668
|
-
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
673
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
|
|
669
674
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
|
|
670
675
|
else:
|
|
671
676
|
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
|
|
@@ -676,7 +681,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
676
681
|
sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
|
|
677
682
|
conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
678
683
|
|
|
679
|
-
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
684
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
|
|
680
685
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
|
|
681
686
|
elif len(connected_sources) == 1:
|
|
682
687
|
src_name = next(iter(connected_sources))
|
|
@@ -718,7 +723,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
718
723
|
lines.append("")
|
|
719
724
|
|
|
720
725
|
|
|
721
|
-
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
|
|
726
|
+
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas", mapping_name="", folder_name=""):
|
|
722
727
|
tx_safe = _safe_name(tx.name)
|
|
723
728
|
tx_type = tx.type.lower().strip()
|
|
724
729
|
|
|
@@ -765,7 +770,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
765
770
|
elif tx_type in ("joiner",):
|
|
766
771
|
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
|
|
767
772
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
768
|
-
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
|
|
773
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib, mapping_name=mapping_name, folder_name=folder_name)
|
|
769
774
|
elif tx_type == "router":
|
|
770
775
|
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
771
776
|
elif tx_type in ("union",):
|
|
@@ -785,7 +790,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
785
790
|
elif tx_type in ("java",):
|
|
786
791
|
_gen_java_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
787
792
|
elif tx_type in ("sql",):
|
|
788
|
-
_gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
793
|
+
_gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name=mapping_name, folder_name=folder_name)
|
|
789
794
|
else:
|
|
790
795
|
lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
|
|
791
796
|
copy_expr = lib_copy(data_lib, input_df)
|
|
@@ -990,7 +995,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
990
995
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
991
996
|
|
|
992
997
|
|
|
993
|
-
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
|
|
998
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas", mapping_name="", folder_name=""):
|
|
994
999
|
lookup_table = ""
|
|
995
1000
|
lookup_sql = ""
|
|
996
1001
|
lookup_condition = ""
|
|
@@ -1027,7 +1032,7 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_gr
|
|
|
1027
1032
|
|
|
1028
1033
|
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
1029
1034
|
if lookup_sql:
|
|
1030
|
-
_emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
|
|
1035
|
+
_emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql, mapping_name=mapping_name, folder_name=folder_name)
|
|
1031
1036
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
|
|
1032
1037
|
elif lookup_table:
|
|
1033
1038
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
@@ -1423,14 +1428,14 @@ def _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1423
1428
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1424
1429
|
|
|
1425
1430
|
|
|
1426
|
-
def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
1431
|
+
def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name="", folder_name=""):
|
|
1427
1432
|
sql_query = ""
|
|
1428
1433
|
for attr in tx.attributes:
|
|
1429
1434
|
if attr.name == "Sql Query" and attr.value:
|
|
1430
1435
|
sql_query = convert_sql_expression(attr.value)
|
|
1431
1436
|
lines.append(f" # SQL Transformation: {tx.name}")
|
|
1432
1437
|
if sql_query:
|
|
1433
|
-
_emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
|
|
1438
|
+
_emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query, mapping_name=mapping_name, folder_name=folder_name)
|
|
1434
1439
|
lines.append(f" df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
|
|
1435
1440
|
else:
|
|
1436
1441
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
@@ -417,6 +417,9 @@ class InformaticaParser:
|
|
|
417
417
|
hidden=self._attr(elem, "HIDDEN", "NO"),
|
|
418
418
|
business_name=self._attr(elem, "BUSINESSNAME"),
|
|
419
419
|
description=self._attr(elem, "DESCRIPTION"),
|
|
420
|
+
offset=self._int_attr(elem, "OFFSET"),
|
|
421
|
+
physical_offset=self._int_attr(elem, "PHYSICALOFFSET"),
|
|
422
|
+
physical_length=self._int_attr(elem, "PHYSICALLENGTH"),
|
|
420
423
|
)
|
|
421
424
|
for fa in elem.findall("FIELDATTRIBUTE"):
|
|
422
425
|
fld.field_attributes.append({
|
|
@@ -184,7 +184,9 @@ def convert_expression(expr):
|
|
|
184
184
|
return cleaned
|
|
185
185
|
|
|
186
186
|
if cleaned.startswith("'") and cleaned.endswith("'"):
|
|
187
|
-
|
|
187
|
+
close_pos = cleaned.find("'", 1)
|
|
188
|
+
if close_pos == len(cleaned) - 1:
|
|
189
|
+
return cleaned
|
|
188
190
|
|
|
189
191
|
converted = cleaned
|
|
190
192
|
|
|
@@ -428,7 +430,9 @@ def _vec_recursive(expr, df_var):
|
|
|
428
430
|
return cleaned
|
|
429
431
|
|
|
430
432
|
if cleaned.startswith("'") and cleaned.endswith("'"):
|
|
431
|
-
|
|
433
|
+
close_pos = cleaned.find("'", 1)
|
|
434
|
+
if close_pos == len(cleaned) - 1:
|
|
435
|
+
return cleaned
|
|
432
436
|
|
|
433
437
|
upper = cleaned.upper()
|
|
434
438
|
|
|
@@ -452,6 +456,17 @@ def _vec_recursive(expr, df_var):
|
|
|
452
456
|
var_name = cleaned[2:]
|
|
453
457
|
return f'get_variable("{var_name}")'
|
|
454
458
|
|
|
459
|
+
if re.match(r'^\$PM\w+$', cleaned):
|
|
460
|
+
var_name = cleaned[1:]
|
|
461
|
+
return f'resolve_builtin_variable("{var_name}")'
|
|
462
|
+
|
|
463
|
+
not_result = _find_func_call(cleaned, 'NOT')
|
|
464
|
+
if not_result and not_result[0] == 0 and not_result[1] == len(cleaned):
|
|
465
|
+
_, _, args = not_result
|
|
466
|
+
if len(args) >= 1:
|
|
467
|
+
inner = _vec_recursive(args[0], df_var)
|
|
468
|
+
return f'~({inner})'
|
|
469
|
+
|
|
455
470
|
lkp_result = _find_func_call(cleaned, 'LKP')
|
|
456
471
|
if lkp_result is None:
|
|
457
472
|
lkp_match = re.match(r'^:LKP\.(\w+)\s*\(', cleaned, re.IGNORECASE)
|
|
@@ -666,6 +681,8 @@ def _vec_recursive(expr, df_var):
|
|
|
666
681
|
if len(args) >= 2:
|
|
667
682
|
fmt = _convert_infa_date_format(args[1])
|
|
668
683
|
return f'{field_val}.dt.strftime("{fmt}")'
|
|
684
|
+
if any(op in field_val for op in (' + ', ' - ', ' * ', ' / ', ' % ')):
|
|
685
|
+
return f'({field_val}).astype(str)'
|
|
669
686
|
return f'{field_val}.astype(str)'
|
|
670
687
|
|
|
671
688
|
make_dt_result = _find_func_call(cleaned, 'MAKE_DATE_TIME')
|
|
@@ -883,6 +900,7 @@ def _vec_recursive(expr, df_var):
|
|
|
883
900
|
converted = re.sub(r':LKP\.(\w+)\s*\(', r'lookup_func("\1", ', converted)
|
|
884
901
|
|
|
885
902
|
converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
|
|
903
|
+
converted = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', converted)
|
|
886
904
|
|
|
887
905
|
converted = re.sub(r'\b([A-Za-z_][A-Za-z0-9_]*)\s*IS\s+NOT\s+NULL\b',
|
|
888
906
|
lambda m: f'{df_var}["{m.group(1)}"].notna()', converted, flags=re.IGNORECASE)
|
|
@@ -895,8 +913,15 @@ def _vec_recursive(expr, df_var):
|
|
|
895
913
|
|
|
896
914
|
converted = _convert_remaining_funcs(converted, df_var)
|
|
897
915
|
|
|
916
|
+
converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
|
|
917
|
+
converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
|
|
918
|
+
converted = re.sub(r'\bNOT\b', ' ~ ', converted, flags=re.IGNORECASE)
|
|
919
|
+
converted = re.sub(r'<>', '!=', converted)
|
|
920
|
+
converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
|
|
921
|
+
|
|
898
922
|
skip_words = {
|
|
899
923
|
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
|
|
924
|
+
'resolve_builtin_variable',
|
|
900
925
|
'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
|
|
901
926
|
'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
|
|
902
927
|
'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
|
|
@@ -904,11 +929,6 @@ def _vec_recursive(expr, df_var):
|
|
|
904
929
|
}
|
|
905
930
|
converted = _substitute_fields(converted, df_var, skip_words)
|
|
906
931
|
|
|
907
|
-
converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
|
|
908
|
-
converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
|
|
909
|
-
converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
|
|
910
|
-
converted = re.sub(r'<>', '!=', converted)
|
|
911
|
-
converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
|
|
912
932
|
converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
|
|
913
933
|
converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
|
|
914
934
|
converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
|
|
@@ -1041,6 +1061,8 @@ def _vectorize_simple(part, df_var):
|
|
|
1041
1061
|
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
1042
1062
|
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
1043
1063
|
|
|
1064
|
+
c = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', c)
|
|
1065
|
+
|
|
1044
1066
|
c = re.sub(r'<>', '!=', c)
|
|
1045
1067
|
c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
|
|
1046
1068
|
|
|
@@ -1048,8 +1070,13 @@ def _vectorize_simple(part, df_var):
|
|
|
1048
1070
|
c = re.sub(r'\bTRUE\b', 'True', c, flags=re.IGNORECASE)
|
|
1049
1071
|
c = re.sub(r'\bFALSE\b', 'False', c, flags=re.IGNORECASE)
|
|
1050
1072
|
|
|
1073
|
+
c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
|
|
1074
|
+
c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
|
|
1075
|
+
c = re.sub(r'\bNOT\b', ' ~ ', c, flags=re.IGNORECASE)
|
|
1076
|
+
|
|
1051
1077
|
skip_words = {
|
|
1052
1078
|
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
|
|
1079
|
+
'resolve_builtin_variable',
|
|
1053
1080
|
'str', 'int', 'float', 'isna', 'notna', 'fillna',
|
|
1054
1081
|
'get_variable', 'lookup_func', 'isin', 'eq',
|
|
1055
1082
|
'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
|
|
@@ -1089,8 +1116,9 @@ def _split_condition_tokens(text):
|
|
|
1089
1116
|
current.append(ch)
|
|
1090
1117
|
elif depth == 0:
|
|
1091
1118
|
rest = text[i:]
|
|
1092
|
-
|
|
1093
|
-
|
|
1119
|
+
prev_is_word = i > 0 and (text[i - 1].isalnum() or text[i - 1] == '_')
|
|
1120
|
+
and_match = re.match(r'\bAND\b', rest, re.IGNORECASE) if not prev_is_word else None
|
|
1121
|
+
or_match = re.match(r'\bOR\b', rest, re.IGNORECASE) if not prev_is_word else None
|
|
1094
1122
|
if and_match:
|
|
1095
1123
|
tokens.append(''.join(current).strip())
|
|
1096
1124
|
current = []
|
|
@@ -1134,9 +1162,10 @@ def _vectorize_condition(cond, df_var="df"):
|
|
|
1134
1162
|
for part in parts:
|
|
1135
1163
|
negate = False
|
|
1136
1164
|
inner = part.strip()
|
|
1137
|
-
|
|
1165
|
+
not_match = re.match(r'^NOT\b\s*', inner, flags=re.IGNORECASE)
|
|
1166
|
+
if not_match:
|
|
1138
1167
|
negate = True
|
|
1139
|
-
inner =
|
|
1168
|
+
inner = inner[not_match.end():].strip()
|
|
1140
1169
|
|
|
1141
1170
|
v = _vectorize_simple(inner, df_var)
|
|
1142
1171
|
if negate:
|
|
@@ -2676,6 +2676,8 @@ class TestPMVariableHandling(unittest.TestCase):
|
|
|
2676
2676
|
if "$PMMappingName" in code:
|
|
2677
2677
|
assert "resolve_builtin_variable" in code, \
|
|
2678
2678
|
"SQL with $PMMappingName should call resolve_builtin_variable"
|
|
2679
|
+
assert "mapping_name='m_pm_vars'" in code, \
|
|
2680
|
+
"resolve_builtin_variable should receive actual mapping name"
|
|
2679
2681
|
break
|
|
2680
2682
|
finally:
|
|
2681
2683
|
shutil.rmtree(tmpdir)
|
|
@@ -2691,7 +2693,8 @@ class TestExecuteSqlAlchemy(unittest.TestCase):
|
|
|
2691
2693
|
with open(os.path.join(tmpdir, "helper_functions.py")) as f:
|
|
2692
2694
|
code = f.read()
|
|
2693
2695
|
exec_block = code.split("def execute_sql(")[1]
|
|
2694
|
-
assert "
|
|
2696
|
+
assert "text(sql)" in exec_block
|
|
2697
|
+
assert "dialect" in exec_block, "Should check for dialect attribute to detect SQLAlchemy"
|
|
2695
2698
|
finally:
|
|
2696
2699
|
shutil.rmtree(tmpdir)
|
|
2697
2700
|
|
|
@@ -2708,3 +2711,203 @@ class TestImportRe(unittest.TestCase):
|
|
|
2708
2711
|
assert "import re" in code
|
|
2709
2712
|
finally:
|
|
2710
2713
|
shutil.rmtree(tmpdir)
|
|
2714
|
+
|
|
2715
|
+
|
|
2716
|
+
class TestNotFunctionCallForm(unittest.TestCase):
|
|
2717
|
+
|
|
2718
|
+
def test_not_without_space_isnull(self):
|
|
2719
|
+
result = convert_expression_vectorized("NOT(ISNULL(Postal_Code))")
|
|
2720
|
+
assert "~" in result
|
|
2721
|
+
assert "isna" in result
|
|
2722
|
+
assert "NOT(" not in result
|
|
2723
|
+
|
|
2724
|
+
def test_not_with_space_isnull(self):
|
|
2725
|
+
result = convert_expression_vectorized("NOT ISNULL(Postal_Code)")
|
|
2726
|
+
assert "~" in result
|
|
2727
|
+
assert "isna" in result
|
|
2728
|
+
|
|
2729
|
+
def test_not_in_iif_condition(self):
|
|
2730
|
+
result = convert_expression_vectorized("IIF(NOT(ISNULL(X)), X, 'default')")
|
|
2731
|
+
assert "np.where" in result
|
|
2732
|
+
assert "~" in result
|
|
2733
|
+
assert "isna" in result
|
|
2734
|
+
|
|
2735
|
+
def test_not_vectorize_condition_no_space(self):
|
|
2736
|
+
result = convert_filter_vectorized("NOT(ISNULL(field1))")
|
|
2737
|
+
assert "~" in result
|
|
2738
|
+
assert "isna" in result
|
|
2739
|
+
assert "NOT(" not in result
|
|
2740
|
+
|
|
2741
|
+
def test_not_vectorize_condition_with_space(self):
|
|
2742
|
+
result = convert_filter_vectorized("NOT ISNULL(field1)")
|
|
2743
|
+
assert "~" in result
|
|
2744
|
+
assert "isna" in result
|
|
2745
|
+
|
|
2746
|
+
|
|
2747
|
+
class TestAndOrNotAsFieldNames(unittest.TestCase):
|
|
2748
|
+
|
|
2749
|
+
def test_and_not_treated_as_field(self):
|
|
2750
|
+
result = convert_filter_vectorized("A = 1 AND B = 2")
|
|
2751
|
+
assert 'df["AND"]' not in result
|
|
2752
|
+
assert "&" in result
|
|
2753
|
+
|
|
2754
|
+
def test_or_not_treated_as_field(self):
|
|
2755
|
+
result = convert_filter_vectorized("A = 'TRUE' OR B = 'FALSE'")
|
|
2756
|
+
assert 'df["OR"]' not in result
|
|
2757
|
+
assert "|" in result
|
|
2758
|
+
|
|
2759
|
+
def test_complex_and_or_filter(self):
|
|
2760
|
+
expr = "FILTER_FLAG = 'TRUE' OR (FILTER_FLAG='FALSE' AND ACCBALANCE='Y')"
|
|
2761
|
+
result = convert_filter_vectorized(expr)
|
|
2762
|
+
assert 'df["AND"]' not in result
|
|
2763
|
+
assert 'df["OR"]' not in result
|
|
2764
|
+
assert "&" in result
|
|
2765
|
+
assert "|" in result
|
|
2766
|
+
|
|
2767
|
+
def test_nested_and_in_iif(self):
|
|
2768
|
+
expr = "IIF(UPPER(X) = 'A' AND UPPER(Y) = 'B', 1, 0)"
|
|
2769
|
+
result = convert_expression_vectorized(expr)
|
|
2770
|
+
assert "np.where" in result
|
|
2771
|
+
assert 'df["AND"]' not in result
|
|
2772
|
+
assert "&" in result
|
|
2773
|
+
|
|
2774
|
+
def test_and_or_in_vectorize_simple(self):
|
|
2775
|
+
result = convert_filter_vectorized("(X = 1 AND Y = 2)")
|
|
2776
|
+
assert 'df["AND"]' not in result
|
|
2777
|
+
assert "&" in result
|
|
2778
|
+
|
|
2779
|
+
|
|
2780
|
+
class TestPMBuiltinVariableInExpression(unittest.TestCase):
|
|
2781
|
+
|
|
2782
|
+
def test_pm_mapping_name_standalone(self):
|
|
2783
|
+
result = convert_expression_vectorized("$PMMappingName")
|
|
2784
|
+
assert "resolve_builtin_variable" in result
|
|
2785
|
+
assert "PMMappingName" in result
|
|
2786
|
+
assert '$df[' not in result
|
|
2787
|
+
|
|
2788
|
+
def test_pm_in_concat(self):
|
|
2789
|
+
result = convert_expression_vectorized("'prefix_' || $PMSessionName || '_suffix'")
|
|
2790
|
+
assert "resolve_builtin_variable" in result
|
|
2791
|
+
assert "PMSessionName" in result
|
|
2792
|
+
assert '$df[' not in result
|
|
2793
|
+
|
|
2794
|
+
def test_pm_variable_not_mangled(self):
|
|
2795
|
+
result = convert_expression_vectorized("IIF($PMMappingName = 'test', 1, 0)")
|
|
2796
|
+
assert "resolve_builtin_variable" in result
|
|
2797
|
+
assert '$df[' not in result
|
|
2798
|
+
|
|
2799
|
+
|
|
2800
|
+
class TestToCharParenthesization(unittest.TestCase):
|
|
2801
|
+
|
|
2802
|
+
def test_to_char_with_arithmetic(self):
|
|
2803
|
+
result = convert_expression_vectorized("TO_CHAR(TO_INTEGER(x) - 1)")
|
|
2804
|
+
assert ".astype(str)" in result
|
|
2805
|
+
assert result.count("(") >= result.count(")")
|
|
2806
|
+
assert "- 1.astype(str)" not in result
|
|
2807
|
+
assert "- 1).astype(str)" in result
|
|
2808
|
+
|
|
2809
|
+
def test_to_char_simple_field(self):
|
|
2810
|
+
result = convert_expression_vectorized("TO_CHAR(x)")
|
|
2811
|
+
assert ".astype(str)" in result
|
|
2812
|
+
|
|
2813
|
+
def test_to_char_with_addition(self):
|
|
2814
|
+
result = convert_expression_vectorized("TO_CHAR(x + y)")
|
|
2815
|
+
assert "- 1.astype" not in result or "+ " not in result
|
|
2816
|
+
if " + " in result:
|
|
2817
|
+
assert ").astype(str)" in result
|
|
2818
|
+
|
|
2819
|
+
|
|
2820
|
+
class TestIifFieldEqualsNumeric(unittest.TestCase):
|
|
2821
|
+
|
|
2822
|
+
def test_iif_field_equals_zero(self):
|
|
2823
|
+
result = convert_expression_vectorized("IIF(DeletedIndicator=0,'N','Y')")
|
|
2824
|
+
assert "np.where" in result
|
|
2825
|
+
assert "==" in result
|
|
2826
|
+
assert 'DeletedIndicator' in result.replace('"', '')
|
|
2827
|
+
assert "| (" not in result
|
|
2828
|
+
|
|
2829
|
+
def test_iif_field_equals_string(self):
|
|
2830
|
+
result = convert_expression_vectorized("IIF(Status='A','Active','Inactive')")
|
|
2831
|
+
assert "np.where" in result
|
|
2832
|
+
assert "==" in result
|
|
2833
|
+
|
|
2834
|
+
|
|
2835
|
+
class TestFixedWidthPhysicalLength(unittest.TestCase):
|
|
2836
|
+
|
|
2837
|
+
def test_field_def_has_physical_length(self):
|
|
2838
|
+
from informatica_python.models import FieldDef
|
|
2839
|
+
fld = FieldDef(name="test", datatype="string", physical_length=20, offset=5)
|
|
2840
|
+
assert fld.physical_length == 20
|
|
2841
|
+
assert fld.offset == 5
|
|
2842
|
+
|
|
2843
|
+
def test_fixed_width_xml(self):
|
|
2844
|
+
xml = '''<?xml version="1.0" encoding="UTF-8"?>
|
|
2845
|
+
<!DOCTYPE POWERMART SYSTEM "powrmart.dtd">
|
|
2846
|
+
<POWERMART CREATION_DATE="01/01/2025" REPOSITORY_VERSION="1">
|
|
2847
|
+
<REPOSITORY NAME="repo" VERSION="1" CODEPAGE="UTF-8" DATABASETYPE="Oracle">
|
|
2848
|
+
<FOLDER NAME="TEST_FOLDER" OWNER="admin">
|
|
2849
|
+
<SOURCE NAME="SRC_FW" DATABASETYPE="Flat File" DBDNAME="SRC_FW">
|
|
2850
|
+
<FLATFILE ISFIXEDWIDTH="YES" PADBYTES="NO"/>
|
|
2851
|
+
<SOURCEFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1" PHYSICALLENGTH="15" OFFSET="0"/>
|
|
2852
|
+
<SOURCEFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2" PHYSICALLENGTH="25" OFFSET="15"/>
|
|
2853
|
+
</SOURCE>
|
|
2854
|
+
<TARGET NAME="TGT_FW" DATABASETYPE="Flat File">
|
|
2855
|
+
<TARGETFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1"/>
|
|
2856
|
+
<TARGETFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2"/>
|
|
2857
|
+
</TARGET>
|
|
2858
|
+
<MAPPING NAME="m_test_fw" ISVALID="YES">
|
|
2859
|
+
<TRANSFORMATION NAME="SQ_SRC_FW" TYPE="Source Qualifier" REUSABLE="NO">
|
|
2860
|
+
<TRANSFORMFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" PORTTYPE="INPUT/OUTPUT"/>
|
|
2861
|
+
<TRANSFORMFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" PORTTYPE="INPUT/OUTPUT"/>
|
|
2862
|
+
<TABLEATTRIBUTE NAME="Sql Query" VALUE=""/>
|
|
2863
|
+
<TABLEATTRIBUTE NAME="User Defined Join" VALUE=""/>
|
|
2864
|
+
<TABLEATTRIBUTE NAME="Source Filter" VALUE=""/>
|
|
2865
|
+
</TRANSFORMATION>
|
|
2866
|
+
<CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD1" TOINSTANCE="TGT_FW" TOFIELD="FIELD1"/>
|
|
2867
|
+
<CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD2" TOINSTANCE="TGT_FW" TOFIELD="FIELD2"/>
|
|
2868
|
+
<INSTANCE NAME="SQ_SRC_FW" TRANSFORMATION_NAME="SQ_SRC_FW" TYPE="Source Qualifier">
|
|
2869
|
+
<ASSOCIATED_SOURCE_INSTANCE NAME="SRC_FW"/>
|
|
2870
|
+
</INSTANCE>
|
|
2871
|
+
<INSTANCE NAME="SRC_FW" TRANSFORMATION_NAME="SRC_FW" TYPE="Source Definition"/>
|
|
2872
|
+
<INSTANCE NAME="TGT_FW" TRANSFORMATION_NAME="TGT_FW" TYPE="Target Definition"/>
|
|
2873
|
+
</MAPPING>
|
|
2874
|
+
<SESSION NAME="s_test_fw" MAPPINGNAME="m_test_fw" ISVALID="YES">
|
|
2875
|
+
<SESSTRANSFORMATIONINST TRANSFORMATIONNAME="SQ_SRC_FW" SINSTANCENAME="SQ_SRC_FW"/>
|
|
2876
|
+
<CONFIGREFERENCE REFOBJECTNAME="default_session_config" TYPE="Session Config"/>
|
|
2877
|
+
</SESSION>
|
|
2878
|
+
<WORKFLOW NAME="wf_test_fw" ISVALID="YES">
|
|
2879
|
+
<TASKINSTANCE NAME="s_test_fw" TASKNAME="s_test_fw" TASKTYPE="Session"/>
|
|
2880
|
+
</WORKFLOW>
|
|
2881
|
+
</FOLDER>
|
|
2882
|
+
</REPOSITORY>
|
|
2883
|
+
</POWERMART>'''
|
|
2884
|
+
converter = InformaticaConverter()
|
|
2885
|
+
tmpdir = tempfile.mkdtemp()
|
|
2886
|
+
try:
|
|
2887
|
+
converter.convert_string(xml, output_dir=tmpdir)
|
|
2888
|
+
mapping_file = os.path.join(tmpdir, "mapping_m_test_fw.py")
|
|
2889
|
+
assert os.path.exists(mapping_file), "mapping file not created"
|
|
2890
|
+
with open(mapping_file) as f:
|
|
2891
|
+
code = f.read()
|
|
2892
|
+
assert "read_fwf" in code
|
|
2893
|
+
assert "15" in code
|
|
2894
|
+
assert "25" in code
|
|
2895
|
+
finally:
|
|
2896
|
+
shutil.rmtree(tmpdir)
|
|
2897
|
+
|
|
2898
|
+
|
|
2899
|
+
class TestConcatWithLtrimRtrim(unittest.TestCase):
|
|
2900
|
+
|
|
2901
|
+
def test_concat_ltrim_rtrim(self):
|
|
2902
|
+
expr = "'PER_' || ltrim(rtrim(X)) || '_suffix'"
|
|
2903
|
+
result = convert_expression_vectorized(expr)
|
|
2904
|
+
assert "+" in result
|
|
2905
|
+
assert "||" not in result
|
|
2906
|
+
assert "lstrip" in result or "strip" in result
|
|
2907
|
+
assert "rstrip" in result or "strip" in result
|
|
2908
|
+
|
|
2909
|
+
def test_concat_simple_fields(self):
|
|
2910
|
+
expr = "A || '_' || B"
|
|
2911
|
+
result = convert_expression_vectorized(expr)
|
|
2912
|
+
assert "+" in result
|
|
2913
|
+
assert "||" not in result
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/error_log_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/sql_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/workflow_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/lib_adapters.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/sql_dialect.py
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|