informatica-python 1.5.2__tar.gz → 1.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.5.2 → informatica_python-1.6.1}/PKG-INFO +1 -1
  2. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/cli.py +6 -0
  4. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/converter.py +7 -5
  5. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/error_log_gen.py +117 -0
  6. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/mapping_gen.py +80 -31
  7. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/sql_gen.py +43 -62
  8. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/utils/expression_converter.py +2 -1
  9. informatica_python-1.6.1/informatica_python/utils/sql_dialect.py +183 -0
  10. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/PKG-INFO +1 -1
  11. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/SOURCES.txt +1 -0
  12. {informatica_python-1.5.2 → informatica_python-1.6.1}/pyproject.toml +1 -1
  13. {informatica_python-1.5.2 → informatica_python-1.6.1}/tests/test_integration.py +280 -0
  14. {informatica_python-1.5.2 → informatica_python-1.6.1}/LICENSE +0 -0
  15. {informatica_python-1.5.2 → informatica_python-1.6.1}/README.md +0 -0
  16. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/__init__.py +0 -0
  17. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/config_gen.py +0 -0
  18. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/helper_gen.py +0 -0
  19. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/generators/workflow_gen.py +0 -0
  20. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/models.py +0 -0
  21. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/parser.py +0 -0
  22. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/utils/__init__.py +0 -0
  23. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/utils/datatype_map.py +0 -0
  24. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python/utils/lib_adapters.py +0 -0
  25. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.5.2 → informatica_python-1.6.1}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.5.2 → informatica_python-1.6.1}/setup.cfg +0 -0
  30. {informatica_python-1.5.2 → informatica_python-1.6.1}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.2
3
+ Version: 1.6.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.5.2"
10
+ __version__ = "1.6.1"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -46,6 +46,11 @@ def main():
46
46
  default=None,
47
47
  help="Path to Informatica .param file for variable substitution",
48
48
  )
49
+ parser.add_argument(
50
+ "--validate-casts",
51
+ action="store_true",
52
+ help="Generate data quality validation code that logs warnings on type coercion",
53
+ )
49
54
 
50
55
  args = parser.parse_args()
51
56
 
@@ -67,6 +72,7 @@ def main():
67
72
  output_dir=args.output,
68
73
  output_zip=args.zip,
69
74
  param_file=args.param_file,
75
+ validate_casts=args.validate_casts,
70
76
  )
71
77
  print(f"Conversion complete! Output: {output_path}")
72
78
  print(f"Files generated:")
@@ -34,7 +34,8 @@ class InformaticaConverter:
34
34
 
35
35
  def convert(self, file_path: str, output_dir: str = "output",
36
36
  output_zip: Optional[str] = None,
37
- param_file: Optional[str] = None) -> str:
37
+ param_file: Optional[str] = None,
38
+ validate_casts: bool = False) -> str:
38
39
  self.powermart = self.parser.parse_file(file_path)
39
40
 
40
41
  if not self.powermart.repositories:
@@ -48,7 +49,7 @@ class InformaticaConverter:
48
49
  raise ValueError("No folder found in XML file")
49
50
 
50
51
  if len(all_folders) == 1:
51
- return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
52
+ return self._convert_folder(all_folders[0], output_dir, output_zip, param_file, validate_casts)
52
53
 
53
54
  result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
54
55
  for folder in all_folders:
@@ -57,7 +58,7 @@ class InformaticaConverter:
57
58
  if output_zip:
58
59
  base, ext = os.path.splitext(output_zip)
59
60
  folder_zip = f"{base}_{folder.name}{ext}"
60
- self._convert_folder(folder, folder_dir, folder_zip, param_file)
61
+ self._convert_folder(folder, folder_dir, folder_zip, param_file, validate_casts)
61
62
  return result_path
62
63
 
63
64
  def convert_string(self, xml_string: str, output_dir: str = "output",
@@ -89,7 +90,8 @@ class InformaticaConverter:
89
90
 
90
91
  def _convert_folder(self, folder: FolderDef, output_dir: str,
91
92
  output_zip: Optional[str] = None,
92
- param_file: Optional[str] = None) -> str:
93
+ param_file: Optional[str] = None,
94
+ validate_casts: bool = False) -> str:
93
95
  if param_file:
94
96
  from informatica_python.utils.expression_converter import parse_param_file
95
97
  parse_param_file(param_file)
@@ -99,7 +101,7 @@ class InformaticaConverter:
99
101
  files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
100
102
 
101
103
  for i, mapping in enumerate(folder.mappings, 1):
102
- code = generate_mapping_code(mapping, folder, self.data_lib, i)
104
+ code = generate_mapping_code(mapping, folder, self.data_lib, i, validate_casts=validate_casts)
103
105
  files[f"mapping_{i}.py"] = code
104
106
 
105
107
  files["workflow.py"] = generate_workflow_code(folder)
@@ -222,6 +222,123 @@ def generate_error_log(folder: FolderDef, parser_errors=None, parser_warnings=No
222
222
  for part in sti.partitions:
223
223
  lines.append(f"[INFO] Session '{session.name}': Partition '{part.name}' (type={part.partition_type}) on '{sti.instance_name}'")
224
224
 
225
+ lines.append("")
226
+ lines.append("-" * 70)
227
+ lines.append("UNSUPPORTED TRANSFORMS (Require Manual Review)")
228
+ lines.append("-" * 70)
229
+ lines.append("")
230
+
231
+ unsupported_types = {
232
+ "Custom Transformation", "Java", "Stored Procedure",
233
+ "External Procedure", "HTTP Transformation",
234
+ "Web Service Consumer", "SQL",
235
+ }
236
+ skipped_items = []
237
+ for mapping in folder.mappings:
238
+ for tx in mapping.transformations:
239
+ if tx.type in unsupported_types:
240
+ skipped_attrs = []
241
+ for attr in tx.attributes:
242
+ if attr.value and attr.value.strip():
243
+ skipped_attrs.append(attr.name)
244
+ skipped_items.append({
245
+ "mapping": mapping.name,
246
+ "transform": tx.name,
247
+ "type": tx.type,
248
+ "field_count": len(tx.fields),
249
+ "skipped_attrs": skipped_attrs,
250
+ })
251
+
252
+ if skipped_items:
253
+ for item in skipped_items:
254
+ lines.append(f" Mapping: {item['mapping']}")
255
+ lines.append(f" Transform: {item['transform']} (type={item['type']}, {item['field_count']} fields)")
256
+ if item['skipped_attrs']:
257
+ lines.append(f" Skipped attributes: {', '.join(item['skipped_attrs'])}")
258
+ lines.append("")
259
+ else:
260
+ lines.append(" None - all transformations are supported")
261
+ lines.append("")
262
+
263
+ lines.append("-" * 70)
264
+ lines.append("UNMAPPED PORTS (Fields with no connectors)")
265
+ lines.append("-" * 70)
266
+ lines.append("")
267
+
268
+ for mapping in folder.mappings:
269
+ connected_fields = set()
270
+ for conn in mapping.connectors:
271
+ connected_fields.add((conn.from_instance, conn.from_field))
272
+ connected_fields.add((conn.to_instance, conn.to_field))
273
+
274
+ unmapped = []
275
+ for tx in mapping.transformations:
276
+ for fld in tx.fields:
277
+ pt = (fld.porttype or "").upper()
278
+ if "OUTPUT" in pt or "INPUT/OUTPUT" in pt:
279
+ if (tx.name, fld.name) not in connected_fields:
280
+ unmapped.append((tx.name, tx.type, fld.name, fld.porttype or ""))
281
+
282
+ if unmapped:
283
+ lines.append(f" Mapping: {mapping.name}")
284
+ for tx_name, tx_type, fld_name, port_type in unmapped:
285
+ lines.append(f" {tx_name} ({tx_type}): {fld_name} [{port_type}]")
286
+ lines.append("")
287
+
288
+ lines.append("-" * 70)
289
+ lines.append("UNSUPPORTED EXPRESSION FUNCTIONS")
290
+ lines.append("-" * 70)
291
+ lines.append("")
292
+
293
+ import re
294
+ known_functions = {
295
+ "IIF", "DECODE", "CHOOSE", "IN", "LTRIM", "RTRIM", "TRIM",
296
+ "UPPER", "LOWER", "INITCAP", "SUBSTR", "LPAD", "RPAD",
297
+ "REVERSE", "CHR", "ASCII", "LEFT", "RIGHT", "INDEXOF",
298
+ "TO_CHAR", "TO_DATE", "TO_TIMESTAMP", "TO_INTEGER", "TO_BIGINT",
299
+ "TO_FLOAT", "TO_DECIMAL", "CAST", "SYSDATE", "SYSTIMESTAMP",
300
+ "GET_DATE_PART", "SET_DATE_PART", "ADD_TO_DATE", "DATE_DIFF",
301
+ "DATE_COMPARE", "LAST_DAY", "MAKE_DATE_TIME", "TRUNC", "ROUND",
302
+ "ABS", "CEIL", "CEILING", "FLOOR", "MOD", "POWER", "SQRT",
303
+ "LOG", "EXP", "SIGN", "LENGTH", "CONCAT", "INSTR", "REPLACE",
304
+ "REPLACESTR", "REPLACECHR", "REG_EXTRACT", "REG_REPLACE",
305
+ "REG_MATCH", "IS_SPACES", "IS_NUMBER", "IS_DATE", "NVL",
306
+ "NVL2", "ISNULL", "MAX", "MIN", "SUM", "AVG", "COUNT",
307
+ "FIRST", "LAST", "MEDIAN", "PERCENTILE", "VARIANCE", "STDDEV",
308
+ "LOOKUP", "ERROR", "ABORT", "SESSSTARTTIME",
309
+ "METAPHONE", "SOUNDEX", "COMPRESS", "DECOMPRESS",
310
+ "RANK", "MOVINGAVG", "MOVINGSUM", "CUME",
311
+ }
312
+ func_pattern = re.compile(r'\b([A-Z_][A-Z0-9_]*)\s*\(', re.IGNORECASE)
313
+ unsupported_funcs = {}
314
+ for mapping in folder.mappings:
315
+ for tx in mapping.transformations:
316
+ for fld in tx.fields:
317
+ if not fld.expression:
318
+ continue
319
+ for m in func_pattern.finditer(fld.expression):
320
+ func_name = m.group(1).upper()
321
+ if func_name not in known_functions:
322
+ key = func_name
323
+ if key not in unsupported_funcs:
324
+ unsupported_funcs[key] = []
325
+ unsupported_funcs[key].append(
326
+ f"{mapping.name} > {tx.name} > {fld.name}"
327
+ )
328
+
329
+ if unsupported_funcs:
330
+ for func_name in sorted(unsupported_funcs.keys()):
331
+ locations = unsupported_funcs[func_name]
332
+ lines.append(f" {func_name}() — found in {len(locations)} field(s):")
333
+ for loc in locations[:5]:
334
+ lines.append(f" - {loc}")
335
+ if len(locations) > 5:
336
+ lines.append(f" ... and {len(locations) - 5} more")
337
+ lines.append("")
338
+ else:
339
+ lines.append(" None - all expression functions are recognized")
340
+ lines.append("")
341
+
225
342
  lines.append("")
226
343
  lines.append("-" * 70)
227
344
  lines.append("PARSED XML TAG COVERAGE")
@@ -16,6 +16,62 @@ from informatica_python.utils.lib_adapters import (
16
16
  )
17
17
 
18
18
 
19
+ def _expand_mapplet_recursive(mapplet, mapplet_map, prefix, depth=0, max_depth=10, visited=None):
20
+ if visited is None:
21
+ visited = set()
22
+ if depth > max_depth:
23
+ return [], []
24
+ if mapplet.name in visited:
25
+ return [], []
26
+ visited.add(mapplet.name)
27
+
28
+ transforms = []
29
+ connectors = []
30
+ tx_names = {t.name for t in mapplet.transformations}
31
+
32
+ for tx in mapplet.transformations:
33
+ inlined = TransformationDef(
34
+ name=f"{prefix}__{tx.name}",
35
+ type=tx.type,
36
+ description=tx.description,
37
+ reusable=tx.reusable,
38
+ fields=list(tx.fields),
39
+ attributes=list(tx.attributes),
40
+ groups=list(tx.groups),
41
+ metadata_extensions=list(tx.metadata_extensions),
42
+ )
43
+ transforms.append(inlined)
44
+
45
+ for conn in mapplet.connectors:
46
+ from informatica_python.models import ConnectorDef
47
+ new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in tx_names else conn.from_instance
48
+ new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in tx_names else conn.to_instance
49
+ connectors.append(ConnectorDef(
50
+ from_instance=new_from,
51
+ from_field=conn.from_field,
52
+ from_instance_type=conn.from_instance_type,
53
+ to_instance=new_to,
54
+ to_field=conn.to_field,
55
+ to_instance_type=conn.to_instance_type,
56
+ ))
57
+
58
+ for inst in getattr(mapplet, 'instances', []):
59
+ if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
60
+ nested_name = inst.transformation_name or inst.name
61
+ nested_mapplet = mapplet_map.get(nested_name)
62
+ if not nested_mapplet:
63
+ continue
64
+ nested_prefix = f"{prefix}__{inst.name}"
65
+ nested_tx, nested_conn = _expand_mapplet_recursive(
66
+ nested_mapplet, mapplet_map, nested_prefix,
67
+ depth + 1, max_depth, visited.copy()
68
+ )
69
+ transforms.extend(nested_tx)
70
+ connectors.extend(nested_conn)
71
+
72
+ return transforms, connectors
73
+
74
+
19
75
  def _inline_mapplets(mapping, folder):
20
76
  mapplet_map = {m.name: m for m in folder.mapplets}
21
77
  extra_transforms = []
@@ -31,32 +87,11 @@ def _inline_mapplets(mapping, folder):
31
87
  mapplet_instances.add(inst.name)
32
88
  prefix = inst.name
33
89
 
34
- for tx in mapplet.transformations:
35
- inlined = TransformationDef(
36
- name=f"{prefix}__{tx.name}",
37
- type=tx.type,
38
- description=tx.description,
39
- reusable=tx.reusable,
40
- fields=list(tx.fields),
41
- attributes=list(tx.attributes),
42
- groups=list(tx.groups),
43
- metadata_extensions=list(tx.metadata_extensions),
44
- )
45
- extra_transforms.append(inlined)
46
-
47
- for conn in mapplet.connectors:
48
- from informatica_python.models import ConnectorDef
49
- new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
50
- new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
51
- inlined_conn = ConnectorDef(
52
- from_instance=new_from,
53
- from_field=conn.from_field,
54
- from_instance_type=conn.from_instance_type,
55
- to_instance=new_to,
56
- to_field=conn.to_field,
57
- to_instance_type=conn.to_instance_type,
58
- )
59
- extra_connectors.append(inlined_conn)
90
+ nested_tx, nested_conn = _expand_mapplet_recursive(
91
+ mapplet, mapplet_map, prefix
92
+ )
93
+ extra_transforms.extend(nested_tx)
94
+ extra_connectors.extend(nested_conn)
60
95
 
61
96
  rewired_connectors = []
62
97
  mapplet_internal_names = set()
@@ -181,7 +216,8 @@ def _build_session_conn_overrides(mapping, folder):
181
216
 
182
217
 
183
218
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
184
- data_lib: str = "pandas", mapping_index: int = 1) -> str:
219
+ data_lib: str = "pandas", mapping_index: int = 1,
220
+ validate_casts: bool = False) -> str:
185
221
  lines = []
186
222
  lines.append('"""')
187
223
  lines.append(f"Mapping: {mapping.name}")
@@ -275,7 +311,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
275
311
  _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
276
312
 
277
313
  for tgt_name, tgt_def in target_map.items():
278
- _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
314
+ _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
279
315
 
280
316
  lines.append("")
281
317
  lines.append(f" log_mapping_end('{mapping.name}', start_time)")
@@ -1181,7 +1217,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
1181
1217
  source_dfs[tx.name] = f"df_{tx_safe}"
1182
1218
 
1183
1219
 
1184
- def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
1220
+ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None, validate_casts=False):
1185
1221
  tgt_safe = _safe_name(tgt_name)
1186
1222
 
1187
1223
  to_conns = connector_graph.get("to", {}).get(tgt_name, [])
@@ -1213,7 +1249,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1213
1249
  else:
1214
1250
  lines.append(f" df_target_{tgt_safe} = {input_df}")
1215
1251
 
1216
- _emit_type_casting(lines, tgt_safe, tgt_def)
1252
+ _emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=validate_casts)
1217
1253
 
1218
1254
  tgt_override = (session_overrides or {}).get(tgt_name, {})
1219
1255
  tgt_conn = tgt_override.get("connection_name")
@@ -1269,7 +1305,7 @@ CAST_MAP = {
1269
1305
  }
1270
1306
 
1271
1307
 
1272
- def _emit_type_casting(lines, tgt_safe, tgt_def):
1308
+ def _emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=False):
1273
1309
  cast_ops = []
1274
1310
  for fld in tgt_def.fields:
1275
1311
  dt_key = fld.datatype.lower().strip()
@@ -1289,8 +1325,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
1289
1325
  return
1290
1326
 
1291
1327
  lines.append(f" # Type casting for target fields")
1328
+ if validate_casts:
1329
+ lines.append(f" _cast_warnings = []")
1292
1330
  for col_name, cast_type, pd_dtype, nullable in cast_ops:
1293
1331
  lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
1332
+ if validate_casts:
1333
+ lines.append(f" _pre_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
1294
1334
  if cast_type == "datetime":
1295
1335
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1296
1336
  elif cast_type == "int":
@@ -1302,3 +1342,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
1302
1342
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1303
1343
  elif cast_type == "bool":
1304
1344
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")
1345
+ if validate_casts:
1346
+ lines.append(f" _post_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
1347
+ lines.append(f" _coerced_{_safe_name(col_name)} = int(_post_null_{_safe_name(col_name)} - _pre_null_{_safe_name(col_name)})")
1348
+ lines.append(f" if _coerced_{_safe_name(col_name)} > 0:")
1349
+ lines.append(f" _cast_warnings.append('{col_name}: {{}} values coerced to null during {cast_type} cast'.format(_coerced_{_safe_name(col_name)}))")
1350
+ lines.append(f" logger.warning('Column {col_name}: %d values coerced to null during {cast_type} cast', _coerced_{_safe_name(col_name)})")
1351
+ if validate_casts:
1352
+ lines.append(f" if _cast_warnings:")
1353
+ lines.append(f" logger.warning('Data quality warnings for target {tgt_safe}: %s', '; '.join(_cast_warnings))")
@@ -1,5 +1,16 @@
1
1
  from informatica_python.models import FolderDef
2
2
  from informatica_python.utils.expression_converter import convert_sql_expression, detect_sql_dialect
3
+ from informatica_python.utils.sql_dialect import translate_sql
4
+
5
+
6
+ SQL_ATTR_NAMES = {
7
+ "Sql Query": "Sql Query",
8
+ "Lookup Sql Override": "Lookup SQL Override",
9
+ "Pre SQL": "Pre-SQL",
10
+ "Post SQL": "Post-SQL",
11
+ "User Defined Join": "User Defined Join",
12
+ "Source Filter": "Source Filter",
13
+ }
3
14
 
4
15
 
5
16
  def generate_sql_file(folder: FolderDef) -> str:
@@ -7,82 +18,34 @@ def generate_sql_file(folder: FolderDef) -> str:
7
18
  lines.append("-- ============================================================")
8
19
  lines.append(f"-- All SQL Queries extracted from folder: {folder.name}")
9
20
  lines.append("-- Auto-generated by informatica-python")
21
+ lines.append("-- Includes ANSI SQL translations where dialect was detected")
10
22
  lines.append("-- ============================================================")
11
23
  lines.append("")
12
24
 
13
25
  sql_count = 0
26
+ translated_count = 0
14
27
 
15
28
  for mapping in folder.mappings:
16
29
  mapping_sqls = []
17
30
 
18
31
  for tx in mapping.transformations:
19
32
  for attr in tx.attributes:
20
- if attr.name == "Sql Query" and attr.value and attr.value.strip():
21
- sql = convert_sql_expression(attr.value)
22
- dialect = detect_sql_dialect(sql)
23
- mapping_sqls.append({
24
- "transformation": tx.name,
25
- "type": tx.type,
26
- "attribute": "Sql Query",
27
- "sql": sql,
28
- "dialect": dialect,
29
- })
30
- sql_count += 1
31
-
32
- elif attr.name == "Lookup Sql Override" and attr.value and attr.value.strip():
33
+ display_name = SQL_ATTR_NAMES.get(attr.name)
34
+ if display_name and attr.value and attr.value.strip():
33
35
  sql = convert_sql_expression(attr.value)
34
36
  dialect = detect_sql_dialect(sql)
35
- mapping_sqls.append({
37
+ ansi = translate_sql(sql, source_dialect=dialect.lower())
38
+ entry = {
36
39
  "transformation": tx.name,
37
40
  "type": tx.type,
38
- "attribute": "Lookup SQL Override",
41
+ "attribute": display_name,
39
42
  "sql": sql,
40
43
  "dialect": dialect,
41
- })
42
- sql_count += 1
43
-
44
- elif attr.name == "Pre SQL" and attr.value and attr.value.strip():
45
- sql = convert_sql_expression(attr.value)
46
- mapping_sqls.append({
47
- "transformation": tx.name,
48
- "type": tx.type,
49
- "attribute": "Pre-SQL",
50
- "sql": sql,
51
- "dialect": detect_sql_dialect(sql),
52
- })
53
- sql_count += 1
54
-
55
- elif attr.name == "Post SQL" and attr.value and attr.value.strip():
56
- sql = convert_sql_expression(attr.value)
57
- mapping_sqls.append({
58
- "transformation": tx.name,
59
- "type": tx.type,
60
- "attribute": "Post-SQL",
61
- "sql": sql,
62
- "dialect": detect_sql_dialect(sql),
63
- })
64
- sql_count += 1
65
-
66
- elif attr.name == "User Defined Join" and attr.value and attr.value.strip():
67
- sql = convert_sql_expression(attr.value)
68
- mapping_sqls.append({
69
- "transformation": tx.name,
70
- "type": tx.type,
71
- "attribute": "User Defined Join",
72
- "sql": sql,
73
- "dialect": detect_sql_dialect(sql),
74
- })
75
- sql_count += 1
76
-
77
- elif attr.name == "Source Filter" and attr.value and attr.value.strip():
78
- sql = convert_sql_expression(attr.value)
79
- mapping_sqls.append({
80
- "transformation": tx.name,
81
- "type": tx.type,
82
- "attribute": "Source Filter",
83
- "sql": sql,
84
- "dialect": detect_sql_dialect(sql),
85
- })
44
+ }
45
+ if ansi.strip() != sql.strip():
46
+ entry["translated"] = ansi
47
+ translated_count += 1
48
+ mapping_sqls.append(entry)
86
49
  sql_count += 1
87
50
 
88
51
  if mapping_sqls:
@@ -98,6 +61,10 @@ def generate_sql_file(folder: FolderDef) -> str:
98
61
  lines.append(f"-- ----")
99
62
  lines.append(sq["sql"].rstrip())
100
63
  lines.append("")
64
+ if "translated" in sq:
65
+ lines.append(f"-- >> ANSI SQL Translation:")
66
+ lines.append(sq["translated"].rstrip())
67
+ lines.append("")
101
68
  lines.append("")
102
69
 
103
70
  for session in folder.sessions:
@@ -106,11 +73,18 @@ def generate_sql_file(folder: FolderDef) -> str:
106
73
  for attr in sti.attributes:
107
74
  if "sql" in attr.name.lower() and attr.value and attr.value.strip():
108
75
  sql = convert_sql_expression(attr.value)
109
- session_sqls.append({
76
+ dialect = detect_sql_dialect(sql)
77
+ ansi = translate_sql(sql, source_dialect=dialect.lower())
78
+ entry = {
110
79
  "instance": sti.instance_name,
111
80
  "attribute": attr.name,
112
81
  "sql": sql,
113
- })
82
+ "dialect": dialect,
83
+ }
84
+ if ansi.strip() != sql.strip():
85
+ entry["translated"] = ansi
86
+ translated_count += 1
87
+ session_sqls.append(entry)
114
88
  sql_count += 1
115
89
 
116
90
  if session_sqls:
@@ -121,12 +95,19 @@ def generate_sql_file(folder: FolderDef) -> str:
121
95
  for sq in session_sqls:
122
96
  lines.append(f"-- Instance: {sq['instance']}")
123
97
  lines.append(f"-- Attribute: {sq['attribute']}")
98
+ lines.append(f"-- Detected dialect: {sq['dialect']}")
124
99
  lines.append(f"-- ----")
125
100
  lines.append(sq["sql"].rstrip())
126
101
  lines.append("")
102
+ if "translated" in sq:
103
+ lines.append(f"-- >> ANSI SQL Translation:")
104
+ lines.append(sq["translated"].rstrip())
105
+ lines.append("")
127
106
  lines.append("")
128
107
 
129
108
  lines.append(f"-- Total SQL queries extracted: {sql_count}")
109
+ if translated_count:
110
+ lines.append(f"-- SQL queries with dialect translation: {translated_count}")
130
111
  lines.append("")
131
112
 
132
113
  return "\n".join(lines)
@@ -472,7 +472,8 @@ def detect_sql_dialect(sql_text):
472
472
 
473
473
  if "GETDATE()" in sql_upper or "ISNULL(" in sql_upper or "TOP " in sql_upper:
474
474
  return "mssql"
475
- if "NVL(" in sql_upper or "SYSDATE" in sql_upper or "ROWNUM" in sql_upper:
475
+ if ("NVL(" in sql_upper or "SYSDATE" in sql_upper or "ROWNUM" in sql_upper
476
+ or "DECODE(" in sql_upper or "(+)" in sql_upper or "SYSTIMESTAMP" in sql_upper):
476
477
  return "oracle"
477
478
  if "NOW()" in sql_upper or "COALESCE(" in sql_upper:
478
479
  return "postgresql"
@@ -0,0 +1,183 @@
1
+ import re
2
+
3
+
4
+ ORACLE_TO_ANSI = [
5
+ (re.compile(r'\bNVL2\s*\(\s*([^,]+?)\s*,\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
6
+ r'CASE WHEN \1 IS NOT NULL THEN \2 ELSE \3 END'),
7
+ (re.compile(r'\bNVL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
8
+ r'COALESCE(\1, \2)'),
9
+ (re.compile(r'\bSYSDATE\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
10
+ (re.compile(r'\bSYSTIMESTAMP\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
11
+ (re.compile(r'\|\|', re.IGNORECASE), ' || '),
12
+ ]
13
+
14
+ MSSQL_TO_ANSI = [
15
+ (re.compile(r'\bISNULL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
16
+ r'COALESCE(\1, \2)'),
17
+ (re.compile(r'\bGETDATE\s*\(\s*\)', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
18
+ (re.compile(r'\bCONVERT\s*\(\s*VARCHAR\s*,\s*([^,)]+?)\s*,\s*\d+\s*\)', re.IGNORECASE),
19
+ r'CAST(\1 AS VARCHAR)'),
20
+ (re.compile(r'\bLEN\s*\(', re.IGNORECASE), 'LENGTH('),
21
+ (re.compile(r'\bCHARINDEX\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
22
+ r'POSITION(\1 IN \2)'),
23
+ ]
24
+
25
+ _DECODE_RE = re.compile(
26
+ r'\bDECODE\s*\(', re.IGNORECASE
27
+ )
28
+
29
+ _ORACLE_JOIN_RE = re.compile(
30
+ r'(\w+\.\w+)\s*=\s*(\w+\.\w+)\s*\(\+\)'
31
+ )
32
+
33
+ _ORACLE_JOIN_RE2 = re.compile(
34
+ r'(\w+\.\w+)\s*\(\+\)\s*=\s*(\w+\.\w+)'
35
+ )
36
+
37
+ _ROWNUM_RE = re.compile(
38
+ r'\bAND\s+ROWNUM\s*<=?\s*(\d+)\b|\bWHERE\s+ROWNUM\s*<=?\s*(\d+)\b',
39
+ re.IGNORECASE
40
+ )
41
+
42
+ _TOP_RE = re.compile(
43
+ r'\bSELECT\s+TOP\s+(\d+)\b', re.IGNORECASE
44
+ )
45
+
46
+
47
+ def _convert_decode(sql):
48
+ result = sql
49
+ idx = 0
50
+ while True:
51
+ m = _DECODE_RE.search(result, idx)
52
+ if not m:
53
+ break
54
+ start = m.start()
55
+ paren_start = m.end() - 1
56
+ depth = 1
57
+ pos = paren_start + 1
58
+ while pos < len(result) and depth > 0:
59
+ if result[pos] == '(':
60
+ depth += 1
61
+ elif result[pos] == ')':
62
+ depth -= 1
63
+ pos += 1
64
+ if depth != 0:
65
+ idx = pos
66
+ continue
67
+ inner = result[paren_start + 1:pos - 1]
68
+ args = _split_args(inner)
69
+ if len(args) < 3:
70
+ idx = pos
71
+ continue
72
+ expr = args[0].strip()
73
+ pairs = args[1:]
74
+ case_parts = [f"CASE {expr}"]
75
+ i = 0
76
+ while i < len(pairs) - 1:
77
+ case_parts.append(f" WHEN {pairs[i].strip()} THEN {pairs[i+1].strip()}")
78
+ i += 2
79
+ if i < len(pairs):
80
+ case_parts.append(f" ELSE {pairs[i].strip()}")
81
+ case_parts.append(" END")
82
+ replacement = "".join(case_parts)
83
+ result = result[:start] + replacement + result[pos:]
84
+ idx = start + len(replacement)
85
+ return result
86
+
87
+
88
+ def _split_args(s):
89
+ args = []
90
+ depth = 0
91
+ in_quote = None
92
+ current = []
93
+ for ch in s:
94
+ if in_quote:
95
+ current.append(ch)
96
+ if ch == in_quote:
97
+ in_quote = None
98
+ elif ch in ("'", '"'):
99
+ in_quote = ch
100
+ current.append(ch)
101
+ elif ch == '(':
102
+ depth += 1
103
+ current.append(ch)
104
+ elif ch == ')':
105
+ depth -= 1
106
+ current.append(ch)
107
+ elif ch == ',' and depth == 0:
108
+ args.append(''.join(current))
109
+ current = []
110
+ else:
111
+ current.append(ch)
112
+ if current:
113
+ args.append(''.join(current))
114
+ return args
115
+
116
+
117
+ def _convert_oracle_outer_join(sql):
118
+ result = _ORACLE_JOIN_RE.sub(
119
+ lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use LEFT JOIN',
120
+ sql
121
+ )
122
+ result = _ORACLE_JOIN_RE2.sub(
123
+ lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use RIGHT JOIN',
124
+ result
125
+ )
126
+ return result
127
+
128
+
129
+ def _convert_rownum(sql):
130
+ m = _ROWNUM_RE.search(sql)
131
+ if m:
132
+ limit_val = m.group(1) or m.group(2)
133
+ cleaned = _ROWNUM_RE.sub('', sql).strip()
134
+ if cleaned.endswith('AND'):
135
+ cleaned = cleaned[:-3].strip()
136
+ if cleaned.endswith('WHERE'):
137
+ cleaned = cleaned[:-5].strip()
138
+ cleaned = cleaned.rstrip(';')
139
+ return f"{cleaned}\nLIMIT {limit_val}"
140
+ return sql
141
+
142
+
143
+ def _convert_top(sql):
144
+ m = _TOP_RE.search(sql)
145
+ if m:
146
+ limit_val = m.group(1)
147
+ cleaned = _TOP_RE.sub('SELECT', sql)
148
+ cleaned = cleaned.rstrip(';')
149
+ return f"{cleaned}\nLIMIT {limit_val}"
150
+ return sql
151
+
152
+
153
+ def translate_sql(sql, source_dialect="auto", target_dialect="ansi"):
154
+ if not sql or not sql.strip():
155
+ return sql
156
+
157
+ from informatica_python.utils.expression_converter import detect_sql_dialect
158
+
159
+ if source_dialect == "auto":
160
+ source_dialect = detect_sql_dialect(sql).lower()
161
+
162
+ translated = sql
163
+
164
+ if source_dialect == "oracle":
165
+ translated = _convert_oracle_outer_join(translated)
166
+ translated = _convert_decode(translated)
167
+ translated = _convert_rownum(translated)
168
+ for pattern, replacement in ORACLE_TO_ANSI:
169
+ translated = pattern.sub(replacement, translated)
170
+
171
+ elif source_dialect in ("mssql", "sql server"):
172
+ translated = _convert_top(translated)
173
+ for pattern, replacement in MSSQL_TO_ANSI:
174
+ translated = pattern.sub(replacement, translated)
175
+
176
+ elif source_dialect in ("generic", "postgresql"):
177
+ translated = _convert_decode(translated)
178
+ for pattern, replacement in ORACLE_TO_ANSI:
179
+ translated = pattern.sub(replacement, translated)
180
+ for pattern, replacement in MSSQL_TO_ANSI:
181
+ translated = pattern.sub(replacement, translated)
182
+
183
+ return translated
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.2
3
+ Version: 1.6.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -23,5 +23,6 @@ informatica_python/utils/__init__.py
23
23
  informatica_python/utils/datatype_map.py
24
24
  informatica_python/utils/expression_converter.py
25
25
  informatica_python/utils/lib_adapters.py
26
+ informatica_python/utils/sql_dialect.py
26
27
  tests/test_converter.py
27
28
  tests/test_integration.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.5.2"
7
+ version = "1.6.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -547,3 +547,283 @@ class TestCLIParamFile:
547
547
  help_text = f.getvalue()
548
548
  from informatica_python.cli import main as cli_main
549
549
  assert callable(cli_main)
550
+
551
+
552
+ class TestSQLDialectTranslation:
553
+
554
+ def test_nvl_to_coalesce(self):
555
+ from informatica_python.utils.sql_dialect import translate_sql
556
+ result = translate_sql("SELECT NVL(COL1, 0) FROM T", source_dialect="oracle")
557
+ assert "COALESCE" in result
558
+ assert "NVL" not in result
559
+
560
+ def test_sysdate_to_current_timestamp(self):
561
+ from informatica_python.utils.sql_dialect import translate_sql
562
+ result = translate_sql("SELECT SYSDATE FROM DUAL", source_dialect="oracle")
563
+ assert "CURRENT_TIMESTAMP" in result
564
+ assert "SYSDATE" not in result
565
+
566
+ def test_decode_to_case(self):
567
+ from informatica_python.utils.sql_dialect import translate_sql
568
+ result = translate_sql("SELECT DECODE(STATUS, 'A', 'Active', 'I', 'Inactive', 'Unknown') FROM T", source_dialect="oracle")
569
+ assert "CASE" in result
570
+ assert "WHEN" in result
571
+ assert "ELSE" in result
572
+
573
+ def test_nvl2_to_case(self):
574
+ from informatica_python.utils.sql_dialect import translate_sql
575
+ result = translate_sql("SELECT NVL2(COL1, 'has value', 'null') FROM T", source_dialect="oracle")
576
+ assert "CASE WHEN" in result
577
+ assert "IS NOT NULL" in result
578
+
579
+ def test_getdate_to_current_timestamp(self):
580
+ from informatica_python.utils.sql_dialect import translate_sql
581
+ result = translate_sql("SELECT GETDATE() FROM T", source_dialect="mssql")
582
+ assert "CURRENT_TIMESTAMP" in result
583
+
584
+ def test_isnull_mssql_to_coalesce(self):
585
+ from informatica_python.utils.sql_dialect import translate_sql
586
+ result = translate_sql("SELECT ISNULL(COL1, 0) FROM T", source_dialect="mssql")
587
+ assert "COALESCE" in result
588
+
589
+ def test_top_to_limit(self):
590
+ from informatica_python.utils.sql_dialect import translate_sql
591
+ result = translate_sql("SELECT TOP 10 * FROM T", source_dialect="mssql")
592
+ assert "LIMIT 10" in result
593
+ assert "TOP" not in result
594
+
595
+ def test_rownum_to_limit(self):
596
+ from informatica_python.utils.sql_dialect import translate_sql
597
+ result = translate_sql("SELECT * FROM T WHERE ROWNUM <= 5", source_dialect="oracle")
598
+ assert "LIMIT 5" in result
599
+
600
+ def test_auto_dialect_detection(self):
601
+ from informatica_python.utils.sql_dialect import translate_sql
602
+ result = translate_sql("SELECT NVL(A, 0), SYSDATE FROM T")
603
+ assert "COALESCE" in result
604
+ assert "CURRENT_TIMESTAMP" in result
605
+
606
+ def test_decode_with_quoted_commas(self):
607
+ from informatica_python.utils.sql_dialect import translate_sql
608
+ result = translate_sql("SELECT DECODE(col, 'A,B', 1, 0) FROM T", source_dialect="oracle")
609
+ assert "CASE" in result
610
+ assert "'A,B'" in result
611
+
612
+ def test_dialect_detects_decode(self):
613
+ from informatica_python.utils.expression_converter import detect_sql_dialect
614
+ assert detect_sql_dialect("SELECT DECODE(X, 1, 'A', 'B') FROM T") == "oracle"
615
+
616
+ def test_dialect_detects_outer_join(self):
617
+ from informatica_python.utils.expression_converter import detect_sql_dialect
618
+ assert detect_sql_dialect("SELECT * FROM a, b WHERE a.id = b.id(+)") == "oracle"
619
+
620
+ def test_no_change_for_clean_sql(self):
621
+ from informatica_python.utils.sql_dialect import translate_sql
622
+ sql = "SELECT * FROM employees WHERE id = 1"
623
+ result = translate_sql(sql, source_dialect="generic")
624
+ assert result.strip() == sql.strip()
625
+
626
+ def test_sql_gen_includes_translation(self):
627
+ from informatica_python.generators.sql_gen import generate_sql_file
628
+ from informatica_python.models import (
629
+ FolderDef, MappingDef, TransformationDef, TableAttribute, FieldDef
630
+ )
631
+ tx = TransformationDef(
632
+ name="SQ_TEST", type="Source Qualifier",
633
+ attributes=[TableAttribute(name="Sql Query", value="SELECT NVL(A, 0), SYSDATE FROM T")],
634
+ )
635
+ mapping = MappingDef(name="m_test", transformations=[tx])
636
+ folder = FolderDef(name="F", mappings=[mapping])
637
+ result = generate_sql_file(folder)
638
+ assert "ANSI SQL Translation" in result
639
+ assert "COALESCE" in result
640
+
641
+
642
+ class TestEnhancedErrorReporting:
643
+
644
+ def test_unsupported_transforms_section(self):
645
+ from informatica_python.generators.error_log_gen import generate_error_log
646
+ from informatica_python.models import (
647
+ FolderDef, MappingDef, TransformationDef, FieldDef, TableAttribute
648
+ )
649
+ tx = TransformationDef(
650
+ name="JAVA_TX", type="Java",
651
+ attributes=[TableAttribute(name="Class Name", value="com.example.Transform")],
652
+ fields=[FieldDef(name="OUT1", datatype="string", porttype="OUTPUT")],
653
+ )
654
+ mapping = MappingDef(name="m_test", transformations=[tx])
655
+ folder = FolderDef(name="F", mappings=[mapping])
656
+ result = generate_error_log(folder)
657
+ assert "UNSUPPORTED TRANSFORMS" in result
658
+ assert "JAVA_TX" in result
659
+ assert "Java" in result
660
+ assert "Class Name" in result
661
+
662
+ def test_unmapped_ports_section(self):
663
+ from informatica_python.generators.error_log_gen import generate_error_log
664
+ from informatica_python.models import (
665
+ FolderDef, MappingDef, TransformationDef, FieldDef, ConnectorDef
666
+ )
667
+ tx = TransformationDef(
668
+ name="EXP1", type="Expression",
669
+ fields=[
670
+ FieldDef(name="IN1", datatype="string", porttype="INPUT"),
671
+ FieldDef(name="OUT1", datatype="string", porttype="OUTPUT"),
672
+ FieldDef(name="OUT2", datatype="string", porttype="OUTPUT"),
673
+ ],
674
+ )
675
+ conn = ConnectorDef(
676
+ from_instance="EXP1", from_field="OUT1",
677
+ from_instance_type="Expression",
678
+ to_instance="TGT", to_field="COL1",
679
+ to_instance_type="Target Definition",
680
+ )
681
+ mapping = MappingDef(name="m_test", transformations=[tx], connectors=[conn])
682
+ folder = FolderDef(name="F", mappings=[mapping])
683
+ result = generate_error_log(folder)
684
+ assert "UNMAPPED PORTS" in result
685
+ assert "OUT2" in result
686
+
687
+ def test_unsupported_functions_section(self):
688
+ from informatica_python.generators.error_log_gen import generate_error_log
689
+ from informatica_python.models import (
690
+ FolderDef, MappingDef, TransformationDef, FieldDef
691
+ )
692
+ tx = TransformationDef(
693
+ name="EXP1", type="Expression",
694
+ fields=[
695
+ FieldDef(name="OUT1", datatype="string", porttype="OUTPUT",
696
+ expression="CUSTOM_FUNC(IN1, 'abc')"),
697
+ ],
698
+ )
699
+ mapping = MappingDef(name="m_test", transformations=[tx])
700
+ folder = FolderDef(name="F", mappings=[mapping])
701
+ result = generate_error_log(folder)
702
+ assert "UNSUPPORTED EXPRESSION FUNCTIONS" in result
703
+ assert "CUSTOM_FUNC" in result
704
+
705
+
706
+ class TestNestedMapplets:
707
+
708
+ def test_recursive_expansion(self):
709
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
710
+ from informatica_python.models import (
711
+ MappletDef, TransformationDef, FieldDef, ConnectorDef, InstanceDef
712
+ )
713
+ inner_mapplet = MappletDef(
714
+ name="INNER_MPL",
715
+ transformations=[
716
+ TransformationDef(name="INNER_EXP", type="Expression",
717
+ fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
718
+ ],
719
+ connectors=[],
720
+ )
721
+ outer_mapplet = MappletDef(
722
+ name="OUTER_MPL",
723
+ transformations=[
724
+ TransformationDef(name="OUTER_EXP", type="Expression",
725
+ fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
726
+ ],
727
+ connectors=[],
728
+ instances=[
729
+ InstanceDef(name="INNER_INST", type="Mapplet",
730
+ transformation_name="INNER_MPL", transformation_type="Mapplet"),
731
+ ],
732
+ )
733
+ mapplet_map = {"INNER_MPL": inner_mapplet, "OUTER_MPL": outer_mapplet}
734
+ transforms, connectors = _expand_mapplet_recursive(outer_mapplet, mapplet_map, "MPL1")
735
+ names = [t.name for t in transforms]
736
+ assert "MPL1__OUTER_EXP" in names
737
+ assert "MPL1__INNER_INST__INNER_EXP" in names
738
+
739
+ def test_circular_reference_protection(self):
740
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
741
+ from informatica_python.models import (
742
+ MappletDef, TransformationDef, FieldDef, InstanceDef
743
+ )
744
+ circular = MappletDef(
745
+ name="SELF_REF",
746
+ transformations=[
747
+ TransformationDef(name="EXP1", type="Expression",
748
+ fields=[FieldDef(name="F1", datatype="string")]),
749
+ ],
750
+ connectors=[],
751
+ instances=[
752
+ InstanceDef(name="SELF", type="Mapplet",
753
+ transformation_name="SELF_REF", transformation_type="Mapplet"),
754
+ ],
755
+ )
756
+ mapplet_map = {"SELF_REF": circular}
757
+ transforms, _ = _expand_mapplet_recursive(circular, mapplet_map, "M")
758
+ assert len(transforms) == 1
759
+
760
+ def test_depth_limit(self):
761
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
762
+ from informatica_python.models import (
763
+ MappletDef, TransformationDef, FieldDef, InstanceDef
764
+ )
765
+ mapplets = {}
766
+ for i in range(15):
767
+ name = f"MPL_{i}"
768
+ instances = []
769
+ if i < 14:
770
+ instances = [InstanceDef(name=f"NEST_{i+1}", type="Mapplet",
771
+ transformation_name=f"MPL_{i+1}",
772
+ transformation_type="Mapplet")]
773
+ mapplets[name] = MappletDef(
774
+ name=name,
775
+ transformations=[
776
+ TransformationDef(name=f"TX_{i}", type="Expression",
777
+ fields=[FieldDef(name="F", datatype="string")]),
778
+ ],
779
+ connectors=[],
780
+ instances=instances,
781
+ )
782
+ transforms, _ = _expand_mapplet_recursive(mapplets["MPL_0"], mapplets, "ROOT")
783
+ assert len(transforms) <= 11
784
+
785
+
786
+ class TestDataQualityValidation:
787
+
788
+ def test_validate_casts_generates_warnings(self):
789
+ from informatica_python.generators.mapping_gen import _emit_type_casting, _safe_name
790
+ from informatica_python.models import FieldDef
791
+ class FakeTgt:
792
+ fields = [
793
+ FieldDef(name="AGE", datatype="integer", nullable="NULL"),
794
+ FieldDef(name="CREATED", datatype="date/time", nullable="NULL"),
795
+ ]
796
+ lines = []
797
+ _emit_type_casting(lines, "TGT1", FakeTgt(), validate_casts=True)
798
+ code = "\n".join(lines)
799
+ assert "_cast_warnings" in code
800
+ assert "_pre_null_" in code
801
+ assert "_post_null_" in code
802
+ assert "coerced to null" in code
803
+ assert "logger.warning" in code
804
+
805
+ def test_no_validation_by_default(self):
806
+ from informatica_python.generators.mapping_gen import _emit_type_casting
807
+ from informatica_python.models import FieldDef
808
+ class FakeTgt:
809
+ fields = [
810
+ FieldDef(name="AGE", datatype="integer", nullable="NULL"),
811
+ ]
812
+ lines = []
813
+ _emit_type_casting(lines, "TGT1", FakeTgt())
814
+ code = "\n".join(lines)
815
+ assert "_cast_warnings" not in code
816
+ assert "_pre_null_" not in code
817
+
818
+ def test_validate_casts_cli_flag(self):
819
+ import io, contextlib
820
+ from informatica_python.cli import main
821
+ f = io.StringIO()
822
+ with contextlib.redirect_stdout(f):
823
+ try:
824
+ sys.argv = ["informatica-python", "--help"]
825
+ main()
826
+ except SystemExit:
827
+ pass
828
+ help_text = f.getvalue()
829
+ assert "--validate-casts" in help_text or "validate_casts" in help_text