informatica-python 1.9.5__tar.gz → 1.9.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {informatica_python-1.9.5 → informatica_python-1.9.7}/PKG-INFO +1 -1
  2. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/helper_gen.py +1 -1
  4. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/mapping_gen.py +18 -13
  5. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/models.py +3 -0
  6. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/parser.py +3 -0
  7. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/expression_converter.py +40 -11
  8. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/PKG-INFO +1 -1
  9. {informatica_python-1.9.5 → informatica_python-1.9.7}/pyproject.toml +1 -1
  10. {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_integration.py +204 -1
  11. {informatica_python-1.9.5 → informatica_python-1.9.7}/LICENSE +0 -0
  12. {informatica_python-1.9.5 → informatica_python-1.9.7}/README.md +0 -0
  13. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/cli.py +0 -0
  14. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/converter.py +0 -0
  15. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/__init__.py +0 -0
  16. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/config_gen.py +0 -0
  17. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/error_log_gen.py +0 -0
  18. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/sql_gen.py +0 -0
  19. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/workflow_gen.py +0 -0
  20. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/__init__.py +0 -0
  21. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/datatype_map.py +0 -0
  22. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/sql_dialect.py +0 -0
  24. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/SOURCES.txt +0 -0
  25. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.9.5 → informatica_python-1.9.7}/setup.cfg +0 -0
  30. {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_converter.py +0 -0
  31. {informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_expressions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.5
3
+ Version: 1.9.7
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.9.5"
10
+ __version__ = "1.9.6"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -360,7 +360,7 @@ def _add_db_functions(lines, data_lib):
360
360
  lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
361
361
  lines.append(" conn = get_db_connection(config, connection_name)")
362
362
  lines.append(" try:")
363
- lines.append(" if hasattr(conn, 'execute'):")
363
+ lines.append(" if hasattr(conn, 'dialect'):")
364
364
  lines.append(" from sqlalchemy import text")
365
365
  lines.append(" conn.execute(text(sql))")
366
366
  lines.append(" conn.commit()")
@@ -316,7 +316,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
316
316
  if t.type in ("Source Qualifier", "Application Source Qualifier")]
317
317
  if sq_transforms:
318
318
  for sq in sq_transforms:
319
- _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
319
+ _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides, mapping_name=mapping.name, folder_name=folder.name)
320
320
  else:
321
321
  for src_name, src_def in source_map.items():
322
322
  safe = _safe_name(src_name)
@@ -347,7 +347,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
347
347
  for tx in processing_order:
348
348
  if tx.type in ("Source Qualifier", "Application Source Qualifier"):
349
349
  continue
350
- _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
350
+ _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib, mapping_name=mapping.name, folder_name=folder.name)
351
351
 
352
352
  for tgt_name, tgt_def in target_map.items():
353
353
  _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
@@ -481,7 +481,12 @@ def _emit_flatfile_read(lines, var_name, src_def, indent=" ", file_path_overr
481
481
  if fc.get("fixed_width"):
482
482
  widths = []
483
483
  for fld in src_def.fields:
484
- widths.append(fld.precision if fld.precision else 10)
484
+ if fld.physical_length and fld.physical_length > 0:
485
+ widths.append(fld.physical_length)
486
+ elif fld.precision:
487
+ widths.append(fld.precision)
488
+ else:
489
+ widths.append(10)
485
490
  lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
486
491
  lines.append(f"{indent} {default_path},")
487
492
  lines.append(f"{indent} widths={widths},")
@@ -626,7 +631,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
626
631
  return ordered
627
632
 
628
633
 
629
- def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
634
+ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None, mapping_name="", folder_name=""):
630
635
  sq_safe = _safe_name(sq.name)
631
636
  sql_override = ""
632
637
  pre_sql = ""
@@ -665,7 +670,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
665
670
  if not connected_sources:
666
671
  sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
667
672
  if sql_override:
668
- _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
673
+ _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
669
674
  lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
670
675
  else:
671
676
  lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
@@ -676,7 +681,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
676
681
  sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
677
682
  conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
678
683
 
679
- _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
684
+ _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
680
685
  lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
681
686
  elif len(connected_sources) == 1:
682
687
  src_name = next(iter(connected_sources))
@@ -718,7 +723,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
718
723
  lines.append("")
719
724
 
720
725
 
721
- def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
726
+ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas", mapping_name="", folder_name=""):
722
727
  tx_safe = _safe_name(tx.name)
723
728
  tx_type = tx.type.lower().strip()
724
729
 
@@ -765,7 +770,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
765
770
  elif tx_type in ("joiner",):
766
771
  _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
767
772
  elif tx_type in ("lookup procedure", "lookup"):
768
- _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
773
+ _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib, mapping_name=mapping_name, folder_name=folder_name)
769
774
  elif tx_type == "router":
770
775
  _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
771
776
  elif tx_type in ("union",):
@@ -785,7 +790,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
785
790
  elif tx_type in ("java",):
786
791
  _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs)
787
792
  elif tx_type in ("sql",):
788
- _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
793
+ _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name=mapping_name, folder_name=folder_name)
789
794
  else:
790
795
  lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
791
796
  copy_expr = lib_copy(data_lib, input_df)
@@ -990,7 +995,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
990
995
  source_dfs[tx.name] = f"df_{tx_safe}"
991
996
 
992
997
 
993
- def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
998
+ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas", mapping_name="", folder_name=""):
994
999
  lookup_table = ""
995
1000
  lookup_sql = ""
996
1001
  lookup_condition = ""
@@ -1027,7 +1032,7 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_gr
1027
1032
 
1028
1033
  lines.append(f" # Lookup: {lookup_table or tx.name}")
1029
1034
  if lookup_sql:
1030
- _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
1035
+ _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql, mapping_name=mapping_name, folder_name=folder_name)
1031
1036
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
1032
1037
  elif lookup_table:
1033
1038
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
@@ -1423,14 +1428,14 @@ def _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs):
1423
1428
  source_dfs[tx.name] = f"df_{tx_safe}"
1424
1429
 
1425
1430
 
1426
- def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
1431
+ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name="", folder_name=""):
1427
1432
  sql_query = ""
1428
1433
  for attr in tx.attributes:
1429
1434
  if attr.name == "Sql Query" and attr.value:
1430
1435
  sql_query = convert_sql_expression(attr.value)
1431
1436
  lines.append(f" # SQL Transformation: {tx.name}")
1432
1437
  if sql_query:
1433
- _emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
1438
+ _emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query, mapping_name=mapping_name, folder_name=folder_name)
1434
1439
  lines.append(f" df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
1435
1440
  else:
1436
1441
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
@@ -18,6 +18,9 @@ class FieldDef:
18
18
  field_number: int = 0
19
19
  hidden: str = "NO"
20
20
  business_name: str = ""
21
+ offset: int = 0
22
+ physical_offset: int = 0
23
+ physical_length: int = 0
21
24
  field_attributes: List[Dict[str, str]] = field(default_factory=list)
22
25
 
23
26
 
@@ -417,6 +417,9 @@ class InformaticaParser:
417
417
  hidden=self._attr(elem, "HIDDEN", "NO"),
418
418
  business_name=self._attr(elem, "BUSINESSNAME"),
419
419
  description=self._attr(elem, "DESCRIPTION"),
420
+ offset=self._int_attr(elem, "OFFSET"),
421
+ physical_offset=self._int_attr(elem, "PHYSICALOFFSET"),
422
+ physical_length=self._int_attr(elem, "PHYSICALLENGTH"),
420
423
  )
421
424
  for fa in elem.findall("FIELDATTRIBUTE"):
422
425
  fld.field_attributes.append({
@@ -184,7 +184,9 @@ def convert_expression(expr):
184
184
  return cleaned
185
185
 
186
186
  if cleaned.startswith("'") and cleaned.endswith("'"):
187
- return cleaned
187
+ close_pos = cleaned.find("'", 1)
188
+ if close_pos == len(cleaned) - 1:
189
+ return cleaned
188
190
 
189
191
  converted = cleaned
190
192
 
@@ -428,7 +430,9 @@ def _vec_recursive(expr, df_var):
428
430
  return cleaned
429
431
 
430
432
  if cleaned.startswith("'") and cleaned.endswith("'"):
431
- return cleaned
433
+ close_pos = cleaned.find("'", 1)
434
+ if close_pos == len(cleaned) - 1:
435
+ return cleaned
432
436
 
433
437
  upper = cleaned.upper()
434
438
 
@@ -452,6 +456,17 @@ def _vec_recursive(expr, df_var):
452
456
  var_name = cleaned[2:]
453
457
  return f'get_variable("{var_name}")'
454
458
 
459
+ if re.match(r'^\$PM\w+$', cleaned):
460
+ var_name = cleaned[1:]
461
+ return f'resolve_builtin_variable("{var_name}")'
462
+
463
+ not_result = _find_func_call(cleaned, 'NOT')
464
+ if not_result and not_result[0] == 0 and not_result[1] == len(cleaned):
465
+ _, _, args = not_result
466
+ if len(args) >= 1:
467
+ inner = _vec_recursive(args[0], df_var)
468
+ return f'~({inner})'
469
+
455
470
  lkp_result = _find_func_call(cleaned, 'LKP')
456
471
  if lkp_result is None:
457
472
  lkp_match = re.match(r'^:LKP\.(\w+)\s*\(', cleaned, re.IGNORECASE)
@@ -666,6 +681,8 @@ def _vec_recursive(expr, df_var):
666
681
  if len(args) >= 2:
667
682
  fmt = _convert_infa_date_format(args[1])
668
683
  return f'{field_val}.dt.strftime("{fmt}")'
684
+ if any(op in field_val for op in (' + ', ' - ', ' * ', ' / ', ' % ')):
685
+ return f'({field_val}).astype(str)'
669
686
  return f'{field_val}.astype(str)'
670
687
 
671
688
  make_dt_result = _find_func_call(cleaned, 'MAKE_DATE_TIME')
@@ -883,6 +900,7 @@ def _vec_recursive(expr, df_var):
883
900
  converted = re.sub(r':LKP\.(\w+)\s*\(', r'lookup_func("\1", ', converted)
884
901
 
885
902
  converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
903
+ converted = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', converted)
886
904
 
887
905
  converted = re.sub(r'\b([A-Za-z_][A-Za-z0-9_]*)\s*IS\s+NOT\s+NULL\b',
888
906
  lambda m: f'{df_var}["{m.group(1)}"].notna()', converted, flags=re.IGNORECASE)
@@ -895,8 +913,15 @@ def _vec_recursive(expr, df_var):
895
913
 
896
914
  converted = _convert_remaining_funcs(converted, df_var)
897
915
 
916
+ converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
917
+ converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
918
+ converted = re.sub(r'\bNOT\b', ' ~ ', converted, flags=re.IGNORECASE)
919
+ converted = re.sub(r'<>', '!=', converted)
920
+ converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
921
+
898
922
  skip_words = {
899
923
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
924
+ 'resolve_builtin_variable',
900
925
  'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
901
926
  'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
902
927
  'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
@@ -904,11 +929,6 @@ def _vec_recursive(expr, df_var):
904
929
  }
905
930
  converted = _substitute_fields(converted, df_var, skip_words)
906
931
 
907
- converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
908
- converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
909
- converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
910
- converted = re.sub(r'<>', '!=', converted)
911
- converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
912
932
  converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
913
933
  converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
914
934
  converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
@@ -1041,6 +1061,8 @@ def _vectorize_simple(part, df_var):
1041
1061
  c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
1042
1062
  lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
1043
1063
 
1064
+ c = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', c)
1065
+
1044
1066
  c = re.sub(r'<>', '!=', c)
1045
1067
  c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
1046
1068
 
@@ -1048,8 +1070,13 @@ def _vectorize_simple(part, df_var):
1048
1070
  c = re.sub(r'\bTRUE\b', 'True', c, flags=re.IGNORECASE)
1049
1071
  c = re.sub(r'\bFALSE\b', 'False', c, flags=re.IGNORECASE)
1050
1072
 
1073
+ c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
1074
+ c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
1075
+ c = re.sub(r'\bNOT\b', ' ~ ', c, flags=re.IGNORECASE)
1076
+
1051
1077
  skip_words = {
1052
1078
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
1079
+ 'resolve_builtin_variable',
1053
1080
  'str', 'int', 'float', 'isna', 'notna', 'fillna',
1054
1081
  'get_variable', 'lookup_func', 'isin', 'eq',
1055
1082
  'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
@@ -1089,8 +1116,9 @@ def _split_condition_tokens(text):
1089
1116
  current.append(ch)
1090
1117
  elif depth == 0:
1091
1118
  rest = text[i:]
1092
- and_match = re.match(r'\bAND\b', rest, re.IGNORECASE)
1093
- or_match = re.match(r'\bOR\b', rest, re.IGNORECASE)
1119
+ prev_is_word = i > 0 and (text[i - 1].isalnum() or text[i - 1] == '_')
1120
+ and_match = re.match(r'\bAND\b', rest, re.IGNORECASE) if not prev_is_word else None
1121
+ or_match = re.match(r'\bOR\b', rest, re.IGNORECASE) if not prev_is_word else None
1094
1122
  if and_match:
1095
1123
  tokens.append(''.join(current).strip())
1096
1124
  current = []
@@ -1134,9 +1162,10 @@ def _vectorize_condition(cond, df_var="df"):
1134
1162
  for part in parts:
1135
1163
  negate = False
1136
1164
  inner = part.strip()
1137
- if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
1165
+ not_match = re.match(r'^NOT\b\s*', inner, flags=re.IGNORECASE)
1166
+ if not_match:
1138
1167
  negate = True
1139
- inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
1168
+ inner = inner[not_match.end():].strip()
1140
1169
 
1141
1170
  v = _vectorize_simple(inner, df_var)
1142
1171
  if negate:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.5
3
+ Version: 1.9.7
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.9.5"
7
+ version = "1.9.7"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -2676,6 +2676,8 @@ class TestPMVariableHandling(unittest.TestCase):
2676
2676
  if "$PMMappingName" in code:
2677
2677
  assert "resolve_builtin_variable" in code, \
2678
2678
  "SQL with $PMMappingName should call resolve_builtin_variable"
2679
+ assert "mapping_name='m_pm_vars'" in code, \
2680
+ "resolve_builtin_variable should receive actual mapping name"
2679
2681
  break
2680
2682
  finally:
2681
2683
  shutil.rmtree(tmpdir)
@@ -2691,7 +2693,8 @@ class TestExecuteSqlAlchemy(unittest.TestCase):
2691
2693
  with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2692
2694
  code = f.read()
2693
2695
  exec_block = code.split("def execute_sql(")[1]
2694
- assert "sqlalchemy" in exec_block or "text(sql)" in exec_block
2696
+ assert "text(sql)" in exec_block
2697
+ assert "dialect" in exec_block, "Should check for dialect attribute to detect SQLAlchemy"
2695
2698
  finally:
2696
2699
  shutil.rmtree(tmpdir)
2697
2700
 
@@ -2708,3 +2711,203 @@ class TestImportRe(unittest.TestCase):
2708
2711
  assert "import re" in code
2709
2712
  finally:
2710
2713
  shutil.rmtree(tmpdir)
2714
+
2715
+
2716
+ class TestNotFunctionCallForm(unittest.TestCase):
2717
+
2718
+ def test_not_without_space_isnull(self):
2719
+ result = convert_expression_vectorized("NOT(ISNULL(Postal_Code))")
2720
+ assert "~" in result
2721
+ assert "isna" in result
2722
+ assert "NOT(" not in result
2723
+
2724
+ def test_not_with_space_isnull(self):
2725
+ result = convert_expression_vectorized("NOT ISNULL(Postal_Code)")
2726
+ assert "~" in result
2727
+ assert "isna" in result
2728
+
2729
+ def test_not_in_iif_condition(self):
2730
+ result = convert_expression_vectorized("IIF(NOT(ISNULL(X)), X, 'default')")
2731
+ assert "np.where" in result
2732
+ assert "~" in result
2733
+ assert "isna" in result
2734
+
2735
+ def test_not_vectorize_condition_no_space(self):
2736
+ result = convert_filter_vectorized("NOT(ISNULL(field1))")
2737
+ assert "~" in result
2738
+ assert "isna" in result
2739
+ assert "NOT(" not in result
2740
+
2741
+ def test_not_vectorize_condition_with_space(self):
2742
+ result = convert_filter_vectorized("NOT ISNULL(field1)")
2743
+ assert "~" in result
2744
+ assert "isna" in result
2745
+
2746
+
2747
+ class TestAndOrNotAsFieldNames(unittest.TestCase):
2748
+
2749
+ def test_and_not_treated_as_field(self):
2750
+ result = convert_filter_vectorized("A = 1 AND B = 2")
2751
+ assert 'df["AND"]' not in result
2752
+ assert "&" in result
2753
+
2754
+ def test_or_not_treated_as_field(self):
2755
+ result = convert_filter_vectorized("A = 'TRUE' OR B = 'FALSE'")
2756
+ assert 'df["OR"]' not in result
2757
+ assert "|" in result
2758
+
2759
+ def test_complex_and_or_filter(self):
2760
+ expr = "FILTER_FLAG = 'TRUE' OR (FILTER_FLAG='FALSE' AND ACCBALANCE='Y')"
2761
+ result = convert_filter_vectorized(expr)
2762
+ assert 'df["AND"]' not in result
2763
+ assert 'df["OR"]' not in result
2764
+ assert "&" in result
2765
+ assert "|" in result
2766
+
2767
+ def test_nested_and_in_iif(self):
2768
+ expr = "IIF(UPPER(X) = 'A' AND UPPER(Y) = 'B', 1, 0)"
2769
+ result = convert_expression_vectorized(expr)
2770
+ assert "np.where" in result
2771
+ assert 'df["AND"]' not in result
2772
+ assert "&" in result
2773
+
2774
+ def test_and_or_in_vectorize_simple(self):
2775
+ result = convert_filter_vectorized("(X = 1 AND Y = 2)")
2776
+ assert 'df["AND"]' not in result
2777
+ assert "&" in result
2778
+
2779
+
2780
+ class TestPMBuiltinVariableInExpression(unittest.TestCase):
2781
+
2782
+ def test_pm_mapping_name_standalone(self):
2783
+ result = convert_expression_vectorized("$PMMappingName")
2784
+ assert "resolve_builtin_variable" in result
2785
+ assert "PMMappingName" in result
2786
+ assert '$df[' not in result
2787
+
2788
+ def test_pm_in_concat(self):
2789
+ result = convert_expression_vectorized("'prefix_' || $PMSessionName || '_suffix'")
2790
+ assert "resolve_builtin_variable" in result
2791
+ assert "PMSessionName" in result
2792
+ assert '$df[' not in result
2793
+
2794
+ def test_pm_variable_not_mangled(self):
2795
+ result = convert_expression_vectorized("IIF($PMMappingName = 'test', 1, 0)")
2796
+ assert "resolve_builtin_variable" in result
2797
+ assert '$df[' not in result
2798
+
2799
+
2800
+ class TestToCharParenthesization(unittest.TestCase):
2801
+
2802
+ def test_to_char_with_arithmetic(self):
2803
+ result = convert_expression_vectorized("TO_CHAR(TO_INTEGER(x) - 1)")
2804
+ assert ".astype(str)" in result
2805
+ assert result.count("(") >= result.count(")")
2806
+ assert "- 1.astype(str)" not in result
2807
+ assert "- 1).astype(str)" in result
2808
+
2809
+ def test_to_char_simple_field(self):
2810
+ result = convert_expression_vectorized("TO_CHAR(x)")
2811
+ assert ".astype(str)" in result
2812
+
2813
+ def test_to_char_with_addition(self):
2814
+ result = convert_expression_vectorized("TO_CHAR(x + y)")
2815
+ assert "- 1.astype" not in result or "+ " not in result
2816
+ if " + " in result:
2817
+ assert ").astype(str)" in result
2818
+
2819
+
2820
+ class TestIifFieldEqualsNumeric(unittest.TestCase):
2821
+
2822
+ def test_iif_field_equals_zero(self):
2823
+ result = convert_expression_vectorized("IIF(DeletedIndicator=0,'N','Y')")
2824
+ assert "np.where" in result
2825
+ assert "==" in result
2826
+ assert 'DeletedIndicator' in result.replace('"', '')
2827
+ assert "| (" not in result
2828
+
2829
+ def test_iif_field_equals_string(self):
2830
+ result = convert_expression_vectorized("IIF(Status='A','Active','Inactive')")
2831
+ assert "np.where" in result
2832
+ assert "==" in result
2833
+
2834
+
2835
+ class TestFixedWidthPhysicalLength(unittest.TestCase):
2836
+
2837
+ def test_field_def_has_physical_length(self):
2838
+ from informatica_python.models import FieldDef
2839
+ fld = FieldDef(name="test", datatype="string", physical_length=20, offset=5)
2840
+ assert fld.physical_length == 20
2841
+ assert fld.offset == 5
2842
+
2843
+ def test_fixed_width_xml(self):
2844
+ xml = '''<?xml version="1.0" encoding="UTF-8"?>
2845
+ <!DOCTYPE POWERMART SYSTEM "powrmart.dtd">
2846
+ <POWERMART CREATION_DATE="01/01/2025" REPOSITORY_VERSION="1">
2847
+ <REPOSITORY NAME="repo" VERSION="1" CODEPAGE="UTF-8" DATABASETYPE="Oracle">
2848
+ <FOLDER NAME="TEST_FOLDER" OWNER="admin">
2849
+ <SOURCE NAME="SRC_FW" DATABASETYPE="Flat File" DBDNAME="SRC_FW">
2850
+ <FLATFILE ISFIXEDWIDTH="YES" PADBYTES="NO"/>
2851
+ <SOURCEFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1" PHYSICALLENGTH="15" OFFSET="0"/>
2852
+ <SOURCEFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2" PHYSICALLENGTH="25" OFFSET="15"/>
2853
+ </SOURCE>
2854
+ <TARGET NAME="TGT_FW" DATABASETYPE="Flat File">
2855
+ <TARGETFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1"/>
2856
+ <TARGETFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2"/>
2857
+ </TARGET>
2858
+ <MAPPING NAME="m_test_fw" ISVALID="YES">
2859
+ <TRANSFORMATION NAME="SQ_SRC_FW" TYPE="Source Qualifier" REUSABLE="NO">
2860
+ <TRANSFORMFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" PORTTYPE="INPUT/OUTPUT"/>
2861
+ <TRANSFORMFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" PORTTYPE="INPUT/OUTPUT"/>
2862
+ <TABLEATTRIBUTE NAME="Sql Query" VALUE=""/>
2863
+ <TABLEATTRIBUTE NAME="User Defined Join" VALUE=""/>
2864
+ <TABLEATTRIBUTE NAME="Source Filter" VALUE=""/>
2865
+ </TRANSFORMATION>
2866
+ <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD1" TOINSTANCE="TGT_FW" TOFIELD="FIELD1"/>
2867
+ <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD2" TOINSTANCE="TGT_FW" TOFIELD="FIELD2"/>
2868
+ <INSTANCE NAME="SQ_SRC_FW" TRANSFORMATION_NAME="SQ_SRC_FW" TYPE="Source Qualifier">
2869
+ <ASSOCIATED_SOURCE_INSTANCE NAME="SRC_FW"/>
2870
+ </INSTANCE>
2871
+ <INSTANCE NAME="SRC_FW" TRANSFORMATION_NAME="SRC_FW" TYPE="Source Definition"/>
2872
+ <INSTANCE NAME="TGT_FW" TRANSFORMATION_NAME="TGT_FW" TYPE="Target Definition"/>
2873
+ </MAPPING>
2874
+ <SESSION NAME="s_test_fw" MAPPINGNAME="m_test_fw" ISVALID="YES">
2875
+ <SESSTRANSFORMATIONINST TRANSFORMATIONNAME="SQ_SRC_FW" SINSTANCENAME="SQ_SRC_FW"/>
2876
+ <CONFIGREFERENCE REFOBJECTNAME="default_session_config" TYPE="Session Config"/>
2877
+ </SESSION>
2878
+ <WORKFLOW NAME="wf_test_fw" ISVALID="YES">
2879
+ <TASKINSTANCE NAME="s_test_fw" TASKNAME="s_test_fw" TASKTYPE="Session"/>
2880
+ </WORKFLOW>
2881
+ </FOLDER>
2882
+ </REPOSITORY>
2883
+ </POWERMART>'''
2884
+ converter = InformaticaConverter()
2885
+ tmpdir = tempfile.mkdtemp()
2886
+ try:
2887
+ converter.convert_string(xml, output_dir=tmpdir)
2888
+ mapping_file = os.path.join(tmpdir, "mapping_m_test_fw.py")
2889
+ assert os.path.exists(mapping_file), "mapping file not created"
2890
+ with open(mapping_file) as f:
2891
+ code = f.read()
2892
+ assert "read_fwf" in code
2893
+ assert "15" in code
2894
+ assert "25" in code
2895
+ finally:
2896
+ shutil.rmtree(tmpdir)
2897
+
2898
+
2899
+ class TestConcatWithLtrimRtrim(unittest.TestCase):
2900
+
2901
+ def test_concat_ltrim_rtrim(self):
2902
+ expr = "'PER_' || ltrim(rtrim(X)) || '_suffix'"
2903
+ result = convert_expression_vectorized(expr)
2904
+ assert "+" in result
2905
+ assert "||" not in result
2906
+ assert "lstrip" in result or "strip" in result
2907
+ assert "rstrip" in result or "strip" in result
2908
+
2909
+ def test_concat_simple_fields(self):
2910
+ expr = "A || '_' || B"
2911
+ result = convert_expression_vectorized(expr)
2912
+ assert "+" in result
2913
+ assert "||" not in result