informatica-python 1.5.2__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.5.2 → informatica_python-1.6.0}/PKG-INFO +1 -1
  2. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/cli.py +6 -0
  4. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/converter.py +7 -5
  5. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/error_log_gen.py +117 -0
  6. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/mapping_gen.py +81 -31
  7. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/sql_gen.py +43 -62
  8. informatica_python-1.6.0/informatica_python/utils/sql_dialect.py +174 -0
  9. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/PKG-INFO +1 -1
  10. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/SOURCES.txt +1 -0
  11. {informatica_python-1.5.2 → informatica_python-1.6.0}/pyproject.toml +1 -1
  12. {informatica_python-1.5.2 → informatica_python-1.6.0}/tests/test_integration.py +266 -0
  13. {informatica_python-1.5.2 → informatica_python-1.6.0}/LICENSE +0 -0
  14. {informatica_python-1.5.2 → informatica_python-1.6.0}/README.md +0 -0
  15. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/__init__.py +0 -0
  16. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/config_gen.py +0 -0
  17. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/helper_gen.py +0 -0
  18. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/generators/workflow_gen.py +0 -0
  19. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/models.py +0 -0
  20. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/parser.py +0 -0
  21. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/utils/__init__.py +0 -0
  22. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/utils/datatype_map.py +0 -0
  23. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/utils/expression_converter.py +0 -0
  24. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python/utils/lib_adapters.py +0 -0
  25. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.5.2 → informatica_python-1.6.0}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.5.2 → informatica_python-1.6.0}/setup.cfg +0 -0
  30. {informatica_python-1.5.2 → informatica_python-1.6.0}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.2
3
+ Version: 1.6.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.5.2"
10
+ __version__ = "1.6.0"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -46,6 +46,11 @@ def main():
46
46
  default=None,
47
47
  help="Path to Informatica .param file for variable substitution",
48
48
  )
49
+ parser.add_argument(
50
+ "--validate-casts",
51
+ action="store_true",
52
+ help="Generate data quality validation code that logs warnings on type coercion",
53
+ )
49
54
 
50
55
  args = parser.parse_args()
51
56
 
@@ -67,6 +72,7 @@ def main():
67
72
  output_dir=args.output,
68
73
  output_zip=args.zip,
69
74
  param_file=args.param_file,
75
+ validate_casts=args.validate_casts,
70
76
  )
71
77
  print(f"Conversion complete! Output: {output_path}")
72
78
  print(f"Files generated:")
@@ -34,7 +34,8 @@ class InformaticaConverter:
34
34
 
35
35
  def convert(self, file_path: str, output_dir: str = "output",
36
36
  output_zip: Optional[str] = None,
37
- param_file: Optional[str] = None) -> str:
37
+ param_file: Optional[str] = None,
38
+ validate_casts: bool = False) -> str:
38
39
  self.powermart = self.parser.parse_file(file_path)
39
40
 
40
41
  if not self.powermart.repositories:
@@ -48,7 +49,7 @@ class InformaticaConverter:
48
49
  raise ValueError("No folder found in XML file")
49
50
 
50
51
  if len(all_folders) == 1:
51
- return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
52
+ return self._convert_folder(all_folders[0], output_dir, output_zip, param_file, validate_casts)
52
53
 
53
54
  result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
54
55
  for folder in all_folders:
@@ -57,7 +58,7 @@ class InformaticaConverter:
57
58
  if output_zip:
58
59
  base, ext = os.path.splitext(output_zip)
59
60
  folder_zip = f"{base}_{folder.name}{ext}"
60
- self._convert_folder(folder, folder_dir, folder_zip, param_file)
61
+ self._convert_folder(folder, folder_dir, folder_zip, param_file, validate_casts)
61
62
  return result_path
62
63
 
63
64
  def convert_string(self, xml_string: str, output_dir: str = "output",
@@ -89,7 +90,8 @@ class InformaticaConverter:
89
90
 
90
91
  def _convert_folder(self, folder: FolderDef, output_dir: str,
91
92
  output_zip: Optional[str] = None,
92
- param_file: Optional[str] = None) -> str:
93
+ param_file: Optional[str] = None,
94
+ validate_casts: bool = False) -> str:
93
95
  if param_file:
94
96
  from informatica_python.utils.expression_converter import parse_param_file
95
97
  parse_param_file(param_file)
@@ -99,7 +101,7 @@ class InformaticaConverter:
99
101
  files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
100
102
 
101
103
  for i, mapping in enumerate(folder.mappings, 1):
102
- code = generate_mapping_code(mapping, folder, self.data_lib, i)
104
+ code = generate_mapping_code(mapping, folder, self.data_lib, i, validate_casts=validate_casts)
103
105
  files[f"mapping_{i}.py"] = code
104
106
 
105
107
  files["workflow.py"] = generate_workflow_code(folder)
@@ -222,6 +222,123 @@ def generate_error_log(folder: FolderDef, parser_errors=None, parser_warnings=No
222
222
  for part in sti.partitions:
223
223
  lines.append(f"[INFO] Session '{session.name}': Partition '{part.name}' (type={part.partition_type}) on '{sti.instance_name}'")
224
224
 
225
+ lines.append("")
226
+ lines.append("-" * 70)
227
+ lines.append("UNSUPPORTED TRANSFORMS (Require Manual Review)")
228
+ lines.append("-" * 70)
229
+ lines.append("")
230
+
231
+ unsupported_types = {
232
+ "Custom Transformation", "Java", "Stored Procedure",
233
+ "External Procedure", "HTTP Transformation",
234
+ "Web Service Consumer", "SQL",
235
+ }
236
+ skipped_items = []
237
+ for mapping in folder.mappings:
238
+ for tx in mapping.transformations:
239
+ if tx.type in unsupported_types:
240
+ skipped_attrs = []
241
+ for attr in tx.attributes:
242
+ if attr.value and attr.value.strip():
243
+ skipped_attrs.append(attr.name)
244
+ skipped_items.append({
245
+ "mapping": mapping.name,
246
+ "transform": tx.name,
247
+ "type": tx.type,
248
+ "field_count": len(tx.fields),
249
+ "skipped_attrs": skipped_attrs,
250
+ })
251
+
252
+ if skipped_items:
253
+ for item in skipped_items:
254
+ lines.append(f" Mapping: {item['mapping']}")
255
+ lines.append(f" Transform: {item['transform']} (type={item['type']}, {item['field_count']} fields)")
256
+ if item['skipped_attrs']:
257
+ lines.append(f" Skipped attributes: {', '.join(item['skipped_attrs'])}")
258
+ lines.append("")
259
+ else:
260
+ lines.append(" None - all transformations are supported")
261
+ lines.append("")
262
+
263
+ lines.append("-" * 70)
264
+ lines.append("UNMAPPED PORTS (Fields with no connectors)")
265
+ lines.append("-" * 70)
266
+ lines.append("")
267
+
268
+ for mapping in folder.mappings:
269
+ connected_fields = set()
270
+ for conn in mapping.connectors:
271
+ connected_fields.add((conn.from_instance, conn.from_field))
272
+ connected_fields.add((conn.to_instance, conn.to_field))
273
+
274
+ unmapped = []
275
+ for tx in mapping.transformations:
276
+ for fld in tx.fields:
277
+ pt = (fld.porttype or "").upper()
278
+ if "OUTPUT" in pt or "INPUT/OUTPUT" in pt:
279
+ if (tx.name, fld.name) not in connected_fields:
280
+ unmapped.append((tx.name, tx.type, fld.name, fld.porttype or ""))
281
+
282
+ if unmapped:
283
+ lines.append(f" Mapping: {mapping.name}")
284
+ for tx_name, tx_type, fld_name, port_type in unmapped:
285
+ lines.append(f" {tx_name} ({tx_type}): {fld_name} [{port_type}]")
286
+ lines.append("")
287
+
288
+ lines.append("-" * 70)
289
+ lines.append("UNSUPPORTED EXPRESSION FUNCTIONS")
290
+ lines.append("-" * 70)
291
+ lines.append("")
292
+
293
+ import re
294
+ known_functions = {
295
+ "IIF", "DECODE", "CHOOSE", "IN", "LTRIM", "RTRIM", "TRIM",
296
+ "UPPER", "LOWER", "INITCAP", "SUBSTR", "LPAD", "RPAD",
297
+ "REVERSE", "CHR", "ASCII", "LEFT", "RIGHT", "INDEXOF",
298
+ "TO_CHAR", "TO_DATE", "TO_TIMESTAMP", "TO_INTEGER", "TO_BIGINT",
299
+ "TO_FLOAT", "TO_DECIMAL", "CAST", "SYSDATE", "SYSTIMESTAMP",
300
+ "GET_DATE_PART", "SET_DATE_PART", "ADD_TO_DATE", "DATE_DIFF",
301
+ "DATE_COMPARE", "LAST_DAY", "MAKE_DATE_TIME", "TRUNC", "ROUND",
302
+ "ABS", "CEIL", "CEILING", "FLOOR", "MOD", "POWER", "SQRT",
303
+ "LOG", "EXP", "SIGN", "LENGTH", "CONCAT", "INSTR", "REPLACE",
304
+ "REPLACESTR", "REPLACECHR", "REG_EXTRACT", "REG_REPLACE",
305
+ "REG_MATCH", "IS_SPACES", "IS_NUMBER", "IS_DATE", "NVL",
306
+ "NVL2", "ISNULL", "MAX", "MIN", "SUM", "AVG", "COUNT",
307
+ "FIRST", "LAST", "MEDIAN", "PERCENTILE", "VARIANCE", "STDDEV",
308
+ "LOOKUP", "ERROR", "ABORT", "SESSSTARTTIME",
309
+ "METAPHONE", "SOUNDEX", "COMPRESS", "DECOMPRESS",
310
+ "RANK", "MOVINGAVG", "MOVINGSUM", "CUME",
311
+ }
312
+ func_pattern = re.compile(r'\b([A-Z_][A-Z0-9_]*)\s*\(', re.IGNORECASE)
313
+ unsupported_funcs = {}
314
+ for mapping in folder.mappings:
315
+ for tx in mapping.transformations:
316
+ for fld in tx.fields:
317
+ if not fld.expression:
318
+ continue
319
+ for m in func_pattern.finditer(fld.expression):
320
+ func_name = m.group(1).upper()
321
+ if func_name not in known_functions:
322
+ key = func_name
323
+ if key not in unsupported_funcs:
324
+ unsupported_funcs[key] = []
325
+ unsupported_funcs[key].append(
326
+ f"{mapping.name} > {tx.name} > {fld.name}"
327
+ )
328
+
329
+ if unsupported_funcs:
330
+ for func_name in sorted(unsupported_funcs.keys()):
331
+ locations = unsupported_funcs[func_name]
332
+ lines.append(f" {func_name}() — found in {len(locations)} field(s):")
333
+ for loc in locations[:5]:
334
+ lines.append(f" - {loc}")
335
+ if len(locations) > 5:
336
+ lines.append(f" ... and {len(locations) - 5} more")
337
+ lines.append("")
338
+ else:
339
+ lines.append(" None - all expression functions are recognized")
340
+ lines.append("")
341
+
225
342
  lines.append("")
226
343
  lines.append("-" * 70)
227
344
  lines.append("PARSED XML TAG COVERAGE")
@@ -16,6 +16,62 @@ from informatica_python.utils.lib_adapters import (
16
16
  )
17
17
 
18
18
 
19
+ def _expand_mapplet_recursive(mapplet, mapplet_map, prefix, depth=0, max_depth=10, visited=None):
20
+ if visited is None:
21
+ visited = set()
22
+ if depth > max_depth:
23
+ return [], []
24
+ if mapplet.name in visited:
25
+ return [], []
26
+ visited.add(mapplet.name)
27
+
28
+ transforms = []
29
+ connectors = []
30
+ tx_names = {t.name for t in mapplet.transformations}
31
+
32
+ for tx in mapplet.transformations:
33
+ inlined = TransformationDef(
34
+ name=f"{prefix}__{tx.name}",
35
+ type=tx.type,
36
+ description=tx.description,
37
+ reusable=tx.reusable,
38
+ fields=list(tx.fields),
39
+ attributes=list(tx.attributes),
40
+ groups=list(tx.groups),
41
+ metadata_extensions=list(tx.metadata_extensions),
42
+ )
43
+ transforms.append(inlined)
44
+
45
+ for conn in mapplet.connectors:
46
+ from informatica_python.models import ConnectorDef
47
+ new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in tx_names else conn.from_instance
48
+ new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in tx_names else conn.to_instance
49
+ connectors.append(ConnectorDef(
50
+ from_instance=new_from,
51
+ from_field=conn.from_field,
52
+ from_instance_type=conn.from_instance_type,
53
+ to_instance=new_to,
54
+ to_field=conn.to_field,
55
+ to_instance_type=conn.to_instance_type,
56
+ ))
57
+
58
+ for inst in getattr(mapplet, 'instances', []):
59
+ if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
60
+ nested_name = inst.transformation_name or inst.name
61
+ nested_mapplet = mapplet_map.get(nested_name)
62
+ if not nested_mapplet:
63
+ continue
64
+ nested_prefix = f"{prefix}__{inst.name}"
65
+ nested_tx, nested_conn = _expand_mapplet_recursive(
66
+ nested_mapplet, mapplet_map, nested_prefix,
67
+ depth + 1, max_depth, visited.copy()
68
+ )
69
+ transforms.extend(nested_tx)
70
+ connectors.extend(nested_conn)
71
+
72
+ return transforms, connectors
73
+
74
+
19
75
  def _inline_mapplets(mapping, folder):
20
76
  mapplet_map = {m.name: m for m in folder.mapplets}
21
77
  extra_transforms = []
@@ -31,32 +87,11 @@ def _inline_mapplets(mapping, folder):
31
87
  mapplet_instances.add(inst.name)
32
88
  prefix = inst.name
33
89
 
34
- for tx in mapplet.transformations:
35
- inlined = TransformationDef(
36
- name=f"{prefix}__{tx.name}",
37
- type=tx.type,
38
- description=tx.description,
39
- reusable=tx.reusable,
40
- fields=list(tx.fields),
41
- attributes=list(tx.attributes),
42
- groups=list(tx.groups),
43
- metadata_extensions=list(tx.metadata_extensions),
44
- )
45
- extra_transforms.append(inlined)
46
-
47
- for conn in mapplet.connectors:
48
- from informatica_python.models import ConnectorDef
49
- new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
50
- new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
51
- inlined_conn = ConnectorDef(
52
- from_instance=new_from,
53
- from_field=conn.from_field,
54
- from_instance_type=conn.from_instance_type,
55
- to_instance=new_to,
56
- to_field=conn.to_field,
57
- to_instance_type=conn.to_instance_type,
58
- )
59
- extra_connectors.append(inlined_conn)
90
+ nested_tx, nested_conn = _expand_mapplet_recursive(
91
+ mapplet, mapplet_map, prefix
92
+ )
93
+ extra_transforms.extend(nested_tx)
94
+ extra_connectors.extend(nested_conn)
60
95
 
61
96
  rewired_connectors = []
62
97
  mapplet_internal_names = set()
@@ -181,7 +216,8 @@ def _build_session_conn_overrides(mapping, folder):
181
216
 
182
217
 
183
218
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
184
- data_lib: str = "pandas", mapping_index: int = 1) -> str:
219
+ data_lib: str = "pandas", mapping_index: int = 1,
220
+ validate_casts: bool = False) -> str:
185
221
  lines = []
186
222
  lines.append('"""')
187
223
  lines.append(f"Mapping: {mapping.name}")
@@ -275,7 +311,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
275
311
  _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
276
312
 
277
313
  for tgt_name, tgt_def in target_map.items():
278
- _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
314
+ _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
279
315
 
280
316
  lines.append("")
281
317
  lines.append(f" log_mapping_end('{mapping.name}', start_time)")
@@ -287,6 +323,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
287
323
  lines.append(" _parser = _ap.ArgumentParser()")
288
324
  lines.append(" _parser.add_argument('--param-file', default=None)")
289
325
  lines.append(" _parser.add_argument('--config', default='config.yml')")
326
+ lines.append(" _parser.add_argument('--validate-casts', action='store_true', help='Log data quality warnings on type coercion')")
290
327
  lines.append(" _args = _parser.parse_args()")
291
328
  lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
292
329
  lines.append(f" run_{_safe_name(mapping.name)}(config)")
@@ -1181,7 +1218,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
1181
1218
  source_dfs[tx.name] = f"df_{tx_safe}"
1182
1219
 
1183
1220
 
1184
- def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
1221
+ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None, validate_casts=False):
1185
1222
  tgt_safe = _safe_name(tgt_name)
1186
1223
 
1187
1224
  to_conns = connector_graph.get("to", {}).get(tgt_name, [])
@@ -1213,7 +1250,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1213
1250
  else:
1214
1251
  lines.append(f" df_target_{tgt_safe} = {input_df}")
1215
1252
 
1216
- _emit_type_casting(lines, tgt_safe, tgt_def)
1253
+ _emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=validate_casts)
1217
1254
 
1218
1255
  tgt_override = (session_overrides or {}).get(tgt_name, {})
1219
1256
  tgt_conn = tgt_override.get("connection_name")
@@ -1269,7 +1306,7 @@ CAST_MAP = {
1269
1306
  }
1270
1307
 
1271
1308
 
1272
- def _emit_type_casting(lines, tgt_safe, tgt_def):
1309
+ def _emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=False):
1273
1310
  cast_ops = []
1274
1311
  for fld in tgt_def.fields:
1275
1312
  dt_key = fld.datatype.lower().strip()
@@ -1289,8 +1326,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
1289
1326
  return
1290
1327
 
1291
1328
  lines.append(f" # Type casting for target fields")
1329
+ if validate_casts:
1330
+ lines.append(f" _cast_warnings = []")
1292
1331
  for col_name, cast_type, pd_dtype, nullable in cast_ops:
1293
1332
  lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
1333
+ if validate_casts:
1334
+ lines.append(f" _pre_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
1294
1335
  if cast_type == "datetime":
1295
1336
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1296
1337
  elif cast_type == "int":
@@ -1302,3 +1343,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
1302
1343
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1303
1344
  elif cast_type == "bool":
1304
1345
  lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")
1346
+ if validate_casts:
1347
+ lines.append(f" _post_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
1348
+ lines.append(f" _coerced_{_safe_name(col_name)} = int(_post_null_{_safe_name(col_name)} - _pre_null_{_safe_name(col_name)})")
1349
+ lines.append(f" if _coerced_{_safe_name(col_name)} > 0:")
1350
+ lines.append(f" _cast_warnings.append('{col_name}: {{}} values coerced to null during {cast_type} cast'.format(_coerced_{_safe_name(col_name)}))")
1351
+ lines.append(f" logger.warning('Column {col_name}: %d values coerced to null during {cast_type} cast', _coerced_{_safe_name(col_name)})")
1352
+ if validate_casts:
1353
+ lines.append(f" if _cast_warnings:")
1354
+ lines.append(f" logger.warning('Data quality warnings for target {tgt_safe}: %s', '; '.join(_cast_warnings))")
@@ -1,5 +1,16 @@
1
1
  from informatica_python.models import FolderDef
2
2
  from informatica_python.utils.expression_converter import convert_sql_expression, detect_sql_dialect
3
+ from informatica_python.utils.sql_dialect import translate_sql
4
+
5
+
6
+ SQL_ATTR_NAMES = {
7
+ "Sql Query": "Sql Query",
8
+ "Lookup Sql Override": "Lookup SQL Override",
9
+ "Pre SQL": "Pre-SQL",
10
+ "Post SQL": "Post-SQL",
11
+ "User Defined Join": "User Defined Join",
12
+ "Source Filter": "Source Filter",
13
+ }
3
14
 
4
15
 
5
16
  def generate_sql_file(folder: FolderDef) -> str:
@@ -7,82 +18,34 @@ def generate_sql_file(folder: FolderDef) -> str:
7
18
  lines.append("-- ============================================================")
8
19
  lines.append(f"-- All SQL Queries extracted from folder: {folder.name}")
9
20
  lines.append("-- Auto-generated by informatica-python")
21
+ lines.append("-- Includes ANSI SQL translations where dialect was detected")
10
22
  lines.append("-- ============================================================")
11
23
  lines.append("")
12
24
 
13
25
  sql_count = 0
26
+ translated_count = 0
14
27
 
15
28
  for mapping in folder.mappings:
16
29
  mapping_sqls = []
17
30
 
18
31
  for tx in mapping.transformations:
19
32
  for attr in tx.attributes:
20
- if attr.name == "Sql Query" and attr.value and attr.value.strip():
21
- sql = convert_sql_expression(attr.value)
22
- dialect = detect_sql_dialect(sql)
23
- mapping_sqls.append({
24
- "transformation": tx.name,
25
- "type": tx.type,
26
- "attribute": "Sql Query",
27
- "sql": sql,
28
- "dialect": dialect,
29
- })
30
- sql_count += 1
31
-
32
- elif attr.name == "Lookup Sql Override" and attr.value and attr.value.strip():
33
+ display_name = SQL_ATTR_NAMES.get(attr.name)
34
+ if display_name and attr.value and attr.value.strip():
33
35
  sql = convert_sql_expression(attr.value)
34
36
  dialect = detect_sql_dialect(sql)
35
- mapping_sqls.append({
37
+ ansi = translate_sql(sql, source_dialect=dialect.lower())
38
+ entry = {
36
39
  "transformation": tx.name,
37
40
  "type": tx.type,
38
- "attribute": "Lookup SQL Override",
41
+ "attribute": display_name,
39
42
  "sql": sql,
40
43
  "dialect": dialect,
41
- })
42
- sql_count += 1
43
-
44
- elif attr.name == "Pre SQL" and attr.value and attr.value.strip():
45
- sql = convert_sql_expression(attr.value)
46
- mapping_sqls.append({
47
- "transformation": tx.name,
48
- "type": tx.type,
49
- "attribute": "Pre-SQL",
50
- "sql": sql,
51
- "dialect": detect_sql_dialect(sql),
52
- })
53
- sql_count += 1
54
-
55
- elif attr.name == "Post SQL" and attr.value and attr.value.strip():
56
- sql = convert_sql_expression(attr.value)
57
- mapping_sqls.append({
58
- "transformation": tx.name,
59
- "type": tx.type,
60
- "attribute": "Post-SQL",
61
- "sql": sql,
62
- "dialect": detect_sql_dialect(sql),
63
- })
64
- sql_count += 1
65
-
66
- elif attr.name == "User Defined Join" and attr.value and attr.value.strip():
67
- sql = convert_sql_expression(attr.value)
68
- mapping_sqls.append({
69
- "transformation": tx.name,
70
- "type": tx.type,
71
- "attribute": "User Defined Join",
72
- "sql": sql,
73
- "dialect": detect_sql_dialect(sql),
74
- })
75
- sql_count += 1
76
-
77
- elif attr.name == "Source Filter" and attr.value and attr.value.strip():
78
- sql = convert_sql_expression(attr.value)
79
- mapping_sqls.append({
80
- "transformation": tx.name,
81
- "type": tx.type,
82
- "attribute": "Source Filter",
83
- "sql": sql,
84
- "dialect": detect_sql_dialect(sql),
85
- })
44
+ }
45
+ if ansi.strip() != sql.strip():
46
+ entry["translated"] = ansi
47
+ translated_count += 1
48
+ mapping_sqls.append(entry)
86
49
  sql_count += 1
87
50
 
88
51
  if mapping_sqls:
@@ -98,6 +61,10 @@ def generate_sql_file(folder: FolderDef) -> str:
98
61
  lines.append(f"-- ----")
99
62
  lines.append(sq["sql"].rstrip())
100
63
  lines.append("")
64
+ if "translated" in sq:
65
+ lines.append(f"-- >> ANSI SQL Translation:")
66
+ lines.append(sq["translated"].rstrip())
67
+ lines.append("")
101
68
  lines.append("")
102
69
 
103
70
  for session in folder.sessions:
@@ -106,11 +73,18 @@ def generate_sql_file(folder: FolderDef) -> str:
106
73
  for attr in sti.attributes:
107
74
  if "sql" in attr.name.lower() and attr.value and attr.value.strip():
108
75
  sql = convert_sql_expression(attr.value)
109
- session_sqls.append({
76
+ dialect = detect_sql_dialect(sql)
77
+ ansi = translate_sql(sql, source_dialect=dialect.lower())
78
+ entry = {
110
79
  "instance": sti.instance_name,
111
80
  "attribute": attr.name,
112
81
  "sql": sql,
113
- })
82
+ "dialect": dialect,
83
+ }
84
+ if ansi.strip() != sql.strip():
85
+ entry["translated"] = ansi
86
+ translated_count += 1
87
+ session_sqls.append(entry)
114
88
  sql_count += 1
115
89
 
116
90
  if session_sqls:
@@ -121,12 +95,19 @@ def generate_sql_file(folder: FolderDef) -> str:
121
95
  for sq in session_sqls:
122
96
  lines.append(f"-- Instance: {sq['instance']}")
123
97
  lines.append(f"-- Attribute: {sq['attribute']}")
98
+ lines.append(f"-- Detected dialect: {sq['dialect']}")
124
99
  lines.append(f"-- ----")
125
100
  lines.append(sq["sql"].rstrip())
126
101
  lines.append("")
102
+ if "translated" in sq:
103
+ lines.append(f"-- >> ANSI SQL Translation:")
104
+ lines.append(sq["translated"].rstrip())
105
+ lines.append("")
127
106
  lines.append("")
128
107
 
129
108
  lines.append(f"-- Total SQL queries extracted: {sql_count}")
109
+ if translated_count:
110
+ lines.append(f"-- SQL queries with dialect translation: {translated_count}")
130
111
  lines.append("")
131
112
 
132
113
  return "\n".join(lines)
@@ -0,0 +1,174 @@
1
+ import re
2
+
3
+
4
+ ORACLE_TO_ANSI = [
5
+ (re.compile(r'\bNVL2\s*\(\s*([^,]+?)\s*,\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
6
+ r'CASE WHEN \1 IS NOT NULL THEN \2 ELSE \3 END'),
7
+ (re.compile(r'\bNVL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
8
+ r'COALESCE(\1, \2)'),
9
+ (re.compile(r'\bSYSDATE\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
10
+ (re.compile(r'\bSYSTIMESTAMP\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
11
+ (re.compile(r'\|\|', re.IGNORECASE), ' || '),
12
+ ]
13
+
14
+ MSSQL_TO_ANSI = [
15
+ (re.compile(r'\bISNULL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
16
+ r'COALESCE(\1, \2)'),
17
+ (re.compile(r'\bGETDATE\s*\(\s*\)', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
18
+ (re.compile(r'\bCONVERT\s*\(\s*VARCHAR\s*,\s*([^,)]+?)\s*,\s*\d+\s*\)', re.IGNORECASE),
19
+ r'CAST(\1 AS VARCHAR)'),
20
+ (re.compile(r'\bLEN\s*\(', re.IGNORECASE), 'LENGTH('),
21
+ (re.compile(r'\bCHARINDEX\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
22
+ r'POSITION(\1 IN \2)'),
23
+ ]
24
+
25
+ _DECODE_RE = re.compile(
26
+ r'\bDECODE\s*\(', re.IGNORECASE
27
+ )
28
+
29
+ _ORACLE_JOIN_RE = re.compile(
30
+ r'(\w+\.\w+)\s*=\s*(\w+\.\w+)\s*\(\+\)'
31
+ )
32
+
33
+ _ORACLE_JOIN_RE2 = re.compile(
34
+ r'(\w+\.\w+)\s*\(\+\)\s*=\s*(\w+\.\w+)'
35
+ )
36
+
37
+ _ROWNUM_RE = re.compile(
38
+ r'\bAND\s+ROWNUM\s*<=?\s*(\d+)\b|\bWHERE\s+ROWNUM\s*<=?\s*(\d+)\b',
39
+ re.IGNORECASE
40
+ )
41
+
42
+ _TOP_RE = re.compile(
43
+ r'\bSELECT\s+TOP\s+(\d+)\b', re.IGNORECASE
44
+ )
45
+
46
+
47
+ def _convert_decode(sql):
48
+ result = sql
49
+ idx = 0
50
+ while True:
51
+ m = _DECODE_RE.search(result, idx)
52
+ if not m:
53
+ break
54
+ start = m.start()
55
+ paren_start = m.end() - 1
56
+ depth = 1
57
+ pos = paren_start + 1
58
+ while pos < len(result) and depth > 0:
59
+ if result[pos] == '(':
60
+ depth += 1
61
+ elif result[pos] == ')':
62
+ depth -= 1
63
+ pos += 1
64
+ if depth != 0:
65
+ idx = pos
66
+ continue
67
+ inner = result[paren_start + 1:pos - 1]
68
+ args = _split_args(inner)
69
+ if len(args) < 3:
70
+ idx = pos
71
+ continue
72
+ expr = args[0].strip()
73
+ pairs = args[1:]
74
+ case_parts = [f"CASE {expr}"]
75
+ i = 0
76
+ while i < len(pairs) - 1:
77
+ case_parts.append(f" WHEN {pairs[i].strip()} THEN {pairs[i+1].strip()}")
78
+ i += 2
79
+ if i < len(pairs):
80
+ case_parts.append(f" ELSE {pairs[i].strip()}")
81
+ case_parts.append(" END")
82
+ replacement = "".join(case_parts)
83
+ result = result[:start] + replacement + result[pos:]
84
+ idx = start + len(replacement)
85
+ return result
86
+
87
+
88
+ def _split_args(s):
89
+ args = []
90
+ depth = 0
91
+ current = []
92
+ for ch in s:
93
+ if ch == '(':
94
+ depth += 1
95
+ current.append(ch)
96
+ elif ch == ')':
97
+ depth -= 1
98
+ current.append(ch)
99
+ elif ch == ',' and depth == 0:
100
+ args.append(''.join(current))
101
+ current = []
102
+ else:
103
+ current.append(ch)
104
+ if current:
105
+ args.append(''.join(current))
106
+ return args
107
+
108
+
109
+ def _convert_oracle_outer_join(sql):
110
+ result = _ORACLE_JOIN_RE.sub(
111
+ lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use LEFT JOIN',
112
+ sql
113
+ )
114
+ result = _ORACLE_JOIN_RE2.sub(
115
+ lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use RIGHT JOIN',
116
+ result
117
+ )
118
+ return result
119
+
120
+
121
+ def _convert_rownum(sql):
122
+ m = _ROWNUM_RE.search(sql)
123
+ if m:
124
+ limit_val = m.group(1) or m.group(2)
125
+ cleaned = _ROWNUM_RE.sub('', sql).strip()
126
+ if cleaned.endswith('AND'):
127
+ cleaned = cleaned[:-3].strip()
128
+ if cleaned.endswith('WHERE'):
129
+ cleaned = cleaned[:-5].strip()
130
+ cleaned = cleaned.rstrip(';')
131
+ return f"{cleaned}\nLIMIT {limit_val}"
132
+ return sql
133
+
134
+
135
+ def _convert_top(sql):
136
+ m = _TOP_RE.search(sql)
137
+ if m:
138
+ limit_val = m.group(1)
139
+ cleaned = _TOP_RE.sub('SELECT', sql)
140
+ cleaned = cleaned.rstrip(';')
141
+ return f"{cleaned}\nLIMIT {limit_val}"
142
+ return sql
143
+
144
+
145
+ def translate_sql(sql, source_dialect="auto", target_dialect="ansi"):
146
+ if not sql or not sql.strip():
147
+ return sql
148
+
149
+ from informatica_python.utils.expression_converter import detect_sql_dialect
150
+
151
+ if source_dialect == "auto":
152
+ source_dialect = detect_sql_dialect(sql).lower()
153
+
154
+ translated = sql
155
+
156
+ if source_dialect == "oracle":
157
+ translated = _convert_oracle_outer_join(translated)
158
+ translated = _convert_decode(translated)
159
+ translated = _convert_rownum(translated)
160
+ for pattern, replacement in ORACLE_TO_ANSI:
161
+ translated = pattern.sub(replacement, translated)
162
+
163
+ elif source_dialect in ("mssql", "sql server"):
164
+ translated = _convert_top(translated)
165
+ for pattern, replacement in MSSQL_TO_ANSI:
166
+ translated = pattern.sub(replacement, translated)
167
+
168
+ elif source_dialect in ("generic", "postgresql"):
169
+ for pattern, replacement in ORACLE_TO_ANSI:
170
+ translated = pattern.sub(replacement, translated)
171
+ for pattern, replacement in MSSQL_TO_ANSI:
172
+ translated = pattern.sub(replacement, translated)
173
+
174
+ return translated
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.2
3
+ Version: 1.6.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -23,5 +23,6 @@ informatica_python/utils/__init__.py
23
23
  informatica_python/utils/datatype_map.py
24
24
  informatica_python/utils/expression_converter.py
25
25
  informatica_python/utils/lib_adapters.py
26
+ informatica_python/utils/sql_dialect.py
26
27
  tests/test_converter.py
27
28
  tests/test_integration.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.5.2"
7
+ version = "1.6.0"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -547,3 +547,269 @@ class TestCLIParamFile:
547
547
  help_text = f.getvalue()
548
548
  from informatica_python.cli import main as cli_main
549
549
  assert callable(cli_main)
550
+
551
+
552
+ class TestSQLDialectTranslation:
553
+
554
+ def test_nvl_to_coalesce(self):
555
+ from informatica_python.utils.sql_dialect import translate_sql
556
+ result = translate_sql("SELECT NVL(COL1, 0) FROM T", source_dialect="oracle")
557
+ assert "COALESCE" in result
558
+ assert "NVL" not in result
559
+
560
+ def test_sysdate_to_current_timestamp(self):
561
+ from informatica_python.utils.sql_dialect import translate_sql
562
+ result = translate_sql("SELECT SYSDATE FROM DUAL", source_dialect="oracle")
563
+ assert "CURRENT_TIMESTAMP" in result
564
+ assert "SYSDATE" not in result
565
+
566
+ def test_decode_to_case(self):
567
+ from informatica_python.utils.sql_dialect import translate_sql
568
+ result = translate_sql("SELECT DECODE(STATUS, 'A', 'Active', 'I', 'Inactive', 'Unknown') FROM T", source_dialect="oracle")
569
+ assert "CASE" in result
570
+ assert "WHEN" in result
571
+ assert "ELSE" in result
572
+
573
+ def test_nvl2_to_case(self):
574
+ from informatica_python.utils.sql_dialect import translate_sql
575
+ result = translate_sql("SELECT NVL2(COL1, 'has value', 'null') FROM T", source_dialect="oracle")
576
+ assert "CASE WHEN" in result
577
+ assert "IS NOT NULL" in result
578
+
579
+ def test_getdate_to_current_timestamp(self):
580
+ from informatica_python.utils.sql_dialect import translate_sql
581
+ result = translate_sql("SELECT GETDATE() FROM T", source_dialect="mssql")
582
+ assert "CURRENT_TIMESTAMP" in result
583
+
584
+ def test_isnull_mssql_to_coalesce(self):
585
+ from informatica_python.utils.sql_dialect import translate_sql
586
+ result = translate_sql("SELECT ISNULL(COL1, 0) FROM T", source_dialect="mssql")
587
+ assert "COALESCE" in result
588
+
589
+ def test_top_to_limit(self):
590
+ from informatica_python.utils.sql_dialect import translate_sql
591
+ result = translate_sql("SELECT TOP 10 * FROM T", source_dialect="mssql")
592
+ assert "LIMIT 10" in result
593
+ assert "TOP" not in result
594
+
595
+ def test_rownum_to_limit(self):
596
+ from informatica_python.utils.sql_dialect import translate_sql
597
+ result = translate_sql("SELECT * FROM T WHERE ROWNUM <= 5", source_dialect="oracle")
598
+ assert "LIMIT 5" in result
599
+
600
+ def test_auto_dialect_detection(self):
601
+ from informatica_python.utils.sql_dialect import translate_sql
602
+ result = translate_sql("SELECT NVL(A, 0), SYSDATE FROM T")
603
+ assert "COALESCE" in result
604
+ assert "CURRENT_TIMESTAMP" in result
605
+
606
+ def test_no_change_for_clean_sql(self):
607
+ from informatica_python.utils.sql_dialect import translate_sql
608
+ sql = "SELECT * FROM employees WHERE id = 1"
609
+ result = translate_sql(sql, source_dialect="generic")
610
+ assert result.strip() == sql.strip()
611
+
612
+ def test_sql_gen_includes_translation(self):
613
+ from informatica_python.generators.sql_gen import generate_sql_file
614
+ from informatica_python.models import (
615
+ FolderDef, MappingDef, TransformationDef, TableAttribute, FieldDef
616
+ )
617
+ tx = TransformationDef(
618
+ name="SQ_TEST", type="Source Qualifier",
619
+ attributes=[TableAttribute(name="Sql Query", value="SELECT NVL(A, 0), SYSDATE FROM T")],
620
+ )
621
+ mapping = MappingDef(name="m_test", transformations=[tx])
622
+ folder = FolderDef(name="F", mappings=[mapping])
623
+ result = generate_sql_file(folder)
624
+ assert "ANSI SQL Translation" in result
625
+ assert "COALESCE" in result
626
+
627
+
628
+ class TestEnhancedErrorReporting:
629
+
630
+ def test_unsupported_transforms_section(self):
631
+ from informatica_python.generators.error_log_gen import generate_error_log
632
+ from informatica_python.models import (
633
+ FolderDef, MappingDef, TransformationDef, FieldDef, TableAttribute
634
+ )
635
+ tx = TransformationDef(
636
+ name="JAVA_TX", type="Java",
637
+ attributes=[TableAttribute(name="Class Name", value="com.example.Transform")],
638
+ fields=[FieldDef(name="OUT1", datatype="string", porttype="OUTPUT")],
639
+ )
640
+ mapping = MappingDef(name="m_test", transformations=[tx])
641
+ folder = FolderDef(name="F", mappings=[mapping])
642
+ result = generate_error_log(folder)
643
+ assert "UNSUPPORTED TRANSFORMS" in result
644
+ assert "JAVA_TX" in result
645
+ assert "Java" in result
646
+ assert "Class Name" in result
647
+
648
+ def test_unmapped_ports_section(self):
649
+ from informatica_python.generators.error_log_gen import generate_error_log
650
+ from informatica_python.models import (
651
+ FolderDef, MappingDef, TransformationDef, FieldDef, ConnectorDef
652
+ )
653
+ tx = TransformationDef(
654
+ name="EXP1", type="Expression",
655
+ fields=[
656
+ FieldDef(name="IN1", datatype="string", porttype="INPUT"),
657
+ FieldDef(name="OUT1", datatype="string", porttype="OUTPUT"),
658
+ FieldDef(name="OUT2", datatype="string", porttype="OUTPUT"),
659
+ ],
660
+ )
661
+ conn = ConnectorDef(
662
+ from_instance="EXP1", from_field="OUT1",
663
+ from_instance_type="Expression",
664
+ to_instance="TGT", to_field="COL1",
665
+ to_instance_type="Target Definition",
666
+ )
667
+ mapping = MappingDef(name="m_test", transformations=[tx], connectors=[conn])
668
+ folder = FolderDef(name="F", mappings=[mapping])
669
+ result = generate_error_log(folder)
670
+ assert "UNMAPPED PORTS" in result
671
+ assert "OUT2" in result
672
+
673
+ def test_unsupported_functions_section(self):
674
+ from informatica_python.generators.error_log_gen import generate_error_log
675
+ from informatica_python.models import (
676
+ FolderDef, MappingDef, TransformationDef, FieldDef
677
+ )
678
+ tx = TransformationDef(
679
+ name="EXP1", type="Expression",
680
+ fields=[
681
+ FieldDef(name="OUT1", datatype="string", porttype="OUTPUT",
682
+ expression="CUSTOM_FUNC(IN1, 'abc')"),
683
+ ],
684
+ )
685
+ mapping = MappingDef(name="m_test", transformations=[tx])
686
+ folder = FolderDef(name="F", mappings=[mapping])
687
+ result = generate_error_log(folder)
688
+ assert "UNSUPPORTED EXPRESSION FUNCTIONS" in result
689
+ assert "CUSTOM_FUNC" in result
690
+
691
+
692
+ class TestNestedMapplets:
693
+
694
+ def test_recursive_expansion(self):
695
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
696
+ from informatica_python.models import (
697
+ MappletDef, TransformationDef, FieldDef, ConnectorDef, InstanceDef
698
+ )
699
+ inner_mapplet = MappletDef(
700
+ name="INNER_MPL",
701
+ transformations=[
702
+ TransformationDef(name="INNER_EXP", type="Expression",
703
+ fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
704
+ ],
705
+ connectors=[],
706
+ )
707
+ outer_mapplet = MappletDef(
708
+ name="OUTER_MPL",
709
+ transformations=[
710
+ TransformationDef(name="OUTER_EXP", type="Expression",
711
+ fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
712
+ ],
713
+ connectors=[],
714
+ instances=[
715
+ InstanceDef(name="INNER_INST", type="Mapplet",
716
+ transformation_name="INNER_MPL", transformation_type="Mapplet"),
717
+ ],
718
+ )
719
+ mapplet_map = {"INNER_MPL": inner_mapplet, "OUTER_MPL": outer_mapplet}
720
+ transforms, connectors = _expand_mapplet_recursive(outer_mapplet, mapplet_map, "MPL1")
721
+ names = [t.name for t in transforms]
722
+ assert "MPL1__OUTER_EXP" in names
723
+ assert "MPL1__INNER_INST__INNER_EXP" in names
724
+
725
+ def test_circular_reference_protection(self):
726
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
727
+ from informatica_python.models import (
728
+ MappletDef, TransformationDef, FieldDef, InstanceDef
729
+ )
730
+ circular = MappletDef(
731
+ name="SELF_REF",
732
+ transformations=[
733
+ TransformationDef(name="EXP1", type="Expression",
734
+ fields=[FieldDef(name="F1", datatype="string")]),
735
+ ],
736
+ connectors=[],
737
+ instances=[
738
+ InstanceDef(name="SELF", type="Mapplet",
739
+ transformation_name="SELF_REF", transformation_type="Mapplet"),
740
+ ],
741
+ )
742
+ mapplet_map = {"SELF_REF": circular}
743
+ transforms, _ = _expand_mapplet_recursive(circular, mapplet_map, "M")
744
+ assert len(transforms) == 1
745
+
746
+ def test_depth_limit(self):
747
+ from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
748
+ from informatica_python.models import (
749
+ MappletDef, TransformationDef, FieldDef, InstanceDef
750
+ )
751
+ mapplets = {}
752
+ for i in range(15):
753
+ name = f"MPL_{i}"
754
+ instances = []
755
+ if i < 14:
756
+ instances = [InstanceDef(name=f"NEST_{i+1}", type="Mapplet",
757
+ transformation_name=f"MPL_{i+1}",
758
+ transformation_type="Mapplet")]
759
+ mapplets[name] = MappletDef(
760
+ name=name,
761
+ transformations=[
762
+ TransformationDef(name=f"TX_{i}", type="Expression",
763
+ fields=[FieldDef(name="F", datatype="string")]),
764
+ ],
765
+ connectors=[],
766
+ instances=instances,
767
+ )
768
+ transforms, _ = _expand_mapplet_recursive(mapplets["MPL_0"], mapplets, "ROOT")
769
+ assert len(transforms) <= 11
770
+
771
+
772
+ class TestDataQualityValidation:
773
+
774
+ def test_validate_casts_generates_warnings(self):
775
+ from informatica_python.generators.mapping_gen import _emit_type_casting, _safe_name
776
+ from informatica_python.models import FieldDef
777
+ class FakeTgt:
778
+ fields = [
779
+ FieldDef(name="AGE", datatype="integer", nullable="NULL"),
780
+ FieldDef(name="CREATED", datatype="date/time", nullable="NULL"),
781
+ ]
782
+ lines = []
783
+ _emit_type_casting(lines, "TGT1", FakeTgt(), validate_casts=True)
784
+ code = "\n".join(lines)
785
+ assert "_cast_warnings" in code
786
+ assert "_pre_null_" in code
787
+ assert "_post_null_" in code
788
+ assert "coerced to null" in code
789
+ assert "logger.warning" in code
790
+
791
+ def test_no_validation_by_default(self):
792
+ from informatica_python.generators.mapping_gen import _emit_type_casting
793
+ from informatica_python.models import FieldDef
794
+ class FakeTgt:
795
+ fields = [
796
+ FieldDef(name="AGE", datatype="integer", nullable="NULL"),
797
+ ]
798
+ lines = []
799
+ _emit_type_casting(lines, "TGT1", FakeTgt())
800
+ code = "\n".join(lines)
801
+ assert "_cast_warnings" not in code
802
+ assert "_pre_null_" not in code
803
+
804
+ def test_validate_casts_cli_flag(self):
805
+ import io, contextlib
806
+ from informatica_python.cli import main
807
+ f = io.StringIO()
808
+ with contextlib.redirect_stdout(f):
809
+ try:
810
+ sys.argv = ["informatica-python", "--help"]
811
+ main()
812
+ except SystemExit:
813
+ pass
814
+ help_text = f.getvalue()
815
+ assert "--validate-casts" in help_text or "validate_casts" in help_text