informatica-python 1.7.0__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.7.0 → informatica_python-1.8.0}/PKG-INFO +1 -1
  2. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/helper_gen.py +6 -3
  4. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/mapping_gen.py +59 -3
  5. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/PKG-INFO +1 -1
  6. {informatica_python-1.7.0 → informatica_python-1.8.0}/pyproject.toml +1 -1
  7. {informatica_python-1.7.0 → informatica_python-1.8.0}/tests/test_integration.py +428 -2
  8. {informatica_python-1.7.0 → informatica_python-1.8.0}/LICENSE +0 -0
  9. {informatica_python-1.7.0 → informatica_python-1.8.0}/README.md +0 -0
  10. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/cli.py +0 -0
  11. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/converter.py +0 -0
  12. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/__init__.py +0 -0
  13. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/config_gen.py +0 -0
  14. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/error_log_gen.py +0 -0
  15. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/sql_gen.py +0 -0
  16. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/generators/workflow_gen.py +0 -0
  17. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/models.py +0 -0
  18. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/utils/expression_converter.py +0 -0
  22. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python/utils/sql_dialect.py +0 -0
  24. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  25. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.7.0 → informatica_python-1.8.0}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.7.0 → informatica_python-1.8.0}/setup.cfg +0 -0
  30. {informatica_python-1.7.0 → informatica_python-1.8.0}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.7.0"
10
+ __version__ = "1.8.0"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -1278,6 +1278,9 @@ def _add_update_strategy_functions(lines):
1278
1278
  lines.append(" key_columns = data_cols[:1]")
1279
1279
  lines.append(" logger.warning(f'No key columns specified for update strategy — using first column: {key_columns}')")
1280
1280
  lines.append("")
1281
+ lines.append(" db_type = conn_config.get('type', 'mssql')")
1282
+ lines.append(" ph = '?' if db_type == 'mssql' else '%s'")
1283
+ lines.append("")
1281
1284
  lines.append(" try:")
1282
1285
  lines.append(" cursor = conn.cursor()")
1283
1286
  lines.append("")
@@ -1295,8 +1298,8 @@ def _add_update_strategy_functions(lines):
1295
1298
  lines.append("")
1296
1299
  lines.append(" if len(df_update) > 0:")
1297
1300
  lines.append(" non_key_cols = [c for c in data_cols if c not in key_columns]")
1298
- lines.append(" set_clause = ', '.join(f'{c} = ?' for c in non_key_cols)")
1299
- lines.append(" where_clause = ' AND '.join(f'{c} = ?' for c in key_columns)")
1301
+ lines.append(" set_clause = ', '.join(f'{c} = {ph}' for c in non_key_cols)")
1302
+ lines.append(" where_clause = ' AND '.join(f'{c} = {ph}' for c in key_columns)")
1300
1303
  lines.append(" update_sql = f'UPDATE {qualified_table} SET {set_clause} WHERE {where_clause}'")
1301
1304
  lines.append(" logger.info(f'Updating {len(df_update)} rows in {qualified_table}')")
1302
1305
  lines.append(" for _, row in df_update.iterrows():")
@@ -1304,7 +1307,7 @@ def _add_update_strategy_functions(lines):
1304
1307
  lines.append(" cursor.execute(update_sql, values)")
1305
1308
  lines.append("")
1306
1309
  lines.append(" if len(df_delete) > 0:")
1307
- lines.append(" where_clause = ' AND '.join(f'{c} = ?' for c in key_columns)")
1310
+ lines.append(" where_clause = ' AND '.join(f'{c} = {ph}' for c in key_columns)")
1308
1311
  lines.append(" delete_sql = f'DELETE FROM {qualified_table} WHERE {where_clause}'")
1309
1312
  lines.append(" logger.info(f'Deleting {len(df_delete)} rows from {qualified_table}')")
1310
1313
  lines.append(" for _, row in df_delete.iterrows():")
@@ -225,9 +225,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
225
225
  lines.append(f"Auto-generated by informatica-python")
226
226
  lines.append('"""')
227
227
  lines.append("")
228
+ lines.append("import logging")
228
229
  lines.append("import numpy as np")
229
230
  lines.append("from helper_functions import *")
230
231
  lines.append("")
232
+ lines.append("logger = logging.getLogger(__name__)")
233
+ lines.append("")
231
234
  lines.append("")
232
235
 
233
236
  inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
@@ -248,12 +251,41 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
248
251
  instance_map = {i.name: i for i in mapping.instances}
249
252
  session_overrides = _build_session_conn_overrides(mapping, folder)
250
253
 
254
+ source_names = list(source_map.keys())
255
+ target_names = list(target_map.keys())
256
+ tx_summary = []
257
+ for tx in all_transforms:
258
+ if tx.type not in ("Source Qualifier", "Application Source Qualifier"):
259
+ in_flds = [f.name for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
260
+ out_flds = [f.name for f in tx.fields if "OUTPUT" in (f.porttype or "").upper()]
261
+ tx_summary.append((tx.name, tx.type, in_flds, out_flds))
262
+
251
263
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
252
- lines.append(f' """Execute mapping: {mapping.name}"""')
264
+ lines.append(f' """')
265
+ lines.append(f" Execute mapping: {mapping.name}")
266
+ if mapping.description:
267
+ lines.append(f" Description: {mapping.description}")
268
+ lines.append(f"")
269
+ lines.append(f" Sources: {', '.join(source_names) if source_names else 'None'}")
270
+ lines.append(f" Targets: {', '.join(target_names) if target_names else 'None'}")
271
+ if tx_summary:
272
+ lines.append(f"")
273
+ lines.append(f" Transformation pipeline:")
274
+ for tx_name, tx_type, in_f, out_f in tx_summary:
275
+ lines.append(f" - {tx_name} ({tx_type}): {len(in_f)} input fields -> {len(out_f)} output fields")
276
+ lines.append(f' """')
253
277
  lines.append(f" start_time = log_mapping_start('{mapping.name}')")
254
278
  lines.append("")
255
279
 
256
- has_persistent_vars = False
280
+ has_persistent_vars = any(
281
+ getattr(v, 'is_persistent', 'NO').upper() == 'YES'
282
+ for v in (mapping.variables or [])
283
+ )
284
+ if has_persistent_vars:
285
+ lines.append(" # Load persistent state for mapping variables")
286
+ lines.append(" load_persistent_state()")
287
+ lines.append("")
288
+
257
289
  if mapping.variables:
258
290
  lines.append(" # Mapping Variables")
259
291
  for var in mapping.variables:
@@ -318,6 +350,14 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
318
350
  for tgt_name, tgt_def in target_map.items():
319
351
  _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
320
352
 
353
+ if has_persistent_vars:
354
+ lines.append(" # Save persistent mapping variables")
355
+ for var in mapping.variables:
356
+ if getattr(var, 'is_persistent', 'NO').upper() == 'YES':
357
+ safe_var = _safe_name(var.name.replace("$$", ""))
358
+ lines.append(f" set_persistent_variable('{mapping.name}', '{safe_var}', {safe_var})")
359
+ lines.append(" save_persistent_state()")
360
+
321
361
  lines.append("")
322
362
  lines.append(f" log_mapping_end('{mapping.name}', start_time)")
323
363
  lines.append(f" logger.info('Mapping {mapping.name} completed successfully')")
@@ -604,6 +644,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
604
644
  lines.append(f" df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
605
645
 
606
646
  source_dfs[sq.name] = f"df_{sq_safe}"
647
+ lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
607
648
 
608
649
  if post_sql:
609
650
  lines.append(f" # Post-SQL")
@@ -633,7 +674,17 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
633
674
  if not input_df:
634
675
  input_df = "df_input"
635
676
 
677
+ in_fields = [f.name for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
678
+ out_fields = [f.name for f in tx.fields if "OUTPUT" in (f.porttype or "").upper()]
679
+ tx_desc = tx.description or ""
680
+ lines.append(f" # -------------------------------------------------------------------")
636
681
  lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
682
+ if tx_desc:
683
+ lines.append(f" # Description: {tx_desc}")
684
+ lines.append(f" # Input fields: {', '.join(in_fields[:10])}{' ...' if len(in_fields) > 10 else ''}")
685
+ lines.append(f" # Output fields: {', '.join(out_fields[:10])}{' ...' if len(out_fields) > 10 else ''}")
686
+ lines.append(f" # -------------------------------------------------------------------")
687
+ lines.append(f" _input_rows_{tx_safe} = len({input_df}) if hasattr({input_df}, '__len__') else 0")
637
688
 
638
689
  if tx_type == "expression":
639
690
  _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
@@ -673,6 +724,8 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
673
724
  lines.append(f" df_{tx_safe} = {copy_expr}")
674
725
  source_dfs[tx.name] = f"df_{tx_safe}"
675
726
 
727
+ lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe}) if hasattr(df_{tx_safe}, '__len__') else 0")
728
+ lines.append(f" logger.info(f'{tx.name} ({tx.type}): {{_input_rows_{tx_safe}}} input rows -> {{_output_rows_{tx_safe}}} output rows')")
676
729
  lines.append("")
677
730
 
678
731
 
@@ -1230,7 +1283,7 @@ def _gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs):
1230
1283
  lines.append(f" # Stored Procedure: {proc_name or tx.name}")
1231
1284
 
1232
1285
  if input_params:
1233
- param_dict_items = ", ".join(f"'{p}': {input_df}['{p}'].iloc[0] if '{p}' in {input_df}.columns else None" for p in input_params)
1286
+ param_dict_items = ", ".join(f"'{p}': {input_df}['{p}'].iloc[0] if '{p}' in {input_df}.columns and len({input_df}) > 0 else None" for p in input_params)
1234
1287
  lines.append(f" _sp_params_{tx_safe} = {{{param_dict_items}}}")
1235
1288
  else:
1236
1289
  lines.append(f" _sp_params_{tx_safe} = {{}}")
@@ -1304,6 +1357,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1304
1357
  target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
1305
1358
  if target_cols:
1306
1359
  lines.append(f" available_cols = [c for c in {target_cols} if c in df_target_{tgt_safe}.columns]")
1360
+ lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns and '_update_strategy' not in available_cols:")
1361
+ lines.append(f" available_cols.append('_update_strategy')")
1307
1362
  lines.append(f" df_target_{tgt_safe} = df_target_{tgt_safe}[available_cols]")
1308
1363
  else:
1309
1364
  lines.append(f" df_target_{tgt_safe} = {input_df}")
@@ -1337,6 +1392,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1337
1392
  else:
1338
1393
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
1339
1394
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1395
+ lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
1340
1396
 
1341
1397
 
1342
1398
  CAST_MAP = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.7.0"
7
+ version = "1.8.0"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -946,6 +946,31 @@ class TestUpdateStrategy:
946
946
  assert "write_to_db" in code
947
947
  assert "_update_strategy" in code
948
948
 
949
+ def test_update_strategy_preserved_through_projection(self):
950
+ from informatica_python.models import TargetDef, FieldDef
951
+ from informatica_python.generators.mapping_gen import _generate_target_write
952
+ tgt = TargetDef(name="TGT_DB", database_type="Oracle",
953
+ fields=[FieldDef(name="ID", datatype="integer"),
954
+ FieldDef(name="VAL", datatype="string")])
955
+ lines = []
956
+ source_dfs = {"SRC": "df_src"}
957
+ from informatica_python.models import ConnectorDef
958
+ conns = [ConnectorDef(from_instance="SRC", to_instance="TGT_DB",
959
+ from_instance_type="", to_instance_type="",
960
+ from_field="ID", to_field="ID")]
961
+ connector_graph = {"to": {"TGT_DB": conns}, "from": {"SRC": conns}}
962
+ _generate_target_write(lines, "TGT_DB", tgt, connector_graph, source_dfs, {}, {})
963
+ code = "\n".join(lines)
964
+ assert "_update_strategy" in code
965
+ assert "available_cols.append('_update_strategy')" in code
966
+
967
+ def test_update_strategy_dialect_aware_placeholders(self):
968
+ from informatica_python.models import FolderDef
969
+ from informatica_python.generators.helper_gen import generate_helper_functions
970
+ folder = FolderDef(name="TestFolder")
971
+ code = generate_helper_functions(folder)
972
+ assert "ph = '?' if db_type == 'mssql' else '%s'" in code
973
+
949
974
  def test_update_strategy_helper_generated(self):
950
975
  from informatica_python.models import FolderDef
951
976
  from informatica_python.generators.helper_gen import generate_helper_functions
@@ -1064,8 +1089,8 @@ class TestStatePersistence:
1064
1089
  default_value="''", is_persistent="NO")])
1065
1090
  folder = FolderDef(name="TestFolder", workflows=[wf])
1066
1091
  code = generate_workflow_code(folder)
1067
- assert "load_persistent_state" not in code or "import" in code
1068
- assert "save_persistent_state" not in code or "import" in code
1092
+ assert "load_persistent_state()" not in code
1093
+ assert "save_persistent_state()" not in code
1069
1094
 
1070
1095
  def test_mapping_persistent_variables(self):
1071
1096
  from informatica_python.models import (
@@ -1091,6 +1116,9 @@ class TestStatePersistence:
1091
1116
  code = generate_mapping_code(mapping, folder, "pandas", 1)
1092
1117
  assert "get_persistent_variable('m_persist_test', 'last_id'" in code
1093
1118
  assert "temp = ''" in code
1119
+ assert "load_persistent_state()" in code
1120
+ assert "set_persistent_variable('m_persist_test', 'last_id'" in code
1121
+ assert "save_persistent_state()" in code
1094
1122
 
1095
1123
  def test_workflow_persistent_imports(self):
1096
1124
  from informatica_python.models import FolderDef, WorkflowDef, WorkflowVariable
@@ -1110,3 +1138,401 @@ class TestStatePersistence:
1110
1138
  assert "json.load" in code
1111
1139
  assert "json.dump" in code
1112
1140
  assert "persistent_state.json" in code
1141
+
1142
+
1143
+ class TestLoggingEnrichment:
1144
+
1145
+ def test_mapping_imports_logging(self):
1146
+ from informatica_python.models import (
1147
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1148
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1149
+ )
1150
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1151
+ mapping = MappingDef(
1152
+ name="m_log_test",
1153
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1154
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1155
+ transformations=[
1156
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1157
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1158
+ TransformationDef(name="EXP_TEST", type="Expression",
1159
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1160
+ ],
1161
+ connectors=[
1162
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1163
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_TEST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1164
+ ConnectorDef(from_instance="EXP_TEST", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1165
+ ],
1166
+ instances=[
1167
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1168
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1169
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1170
+ InstanceDef(name="EXP_TEST", type="Expression", transformation_name="EXP_TEST"),
1171
+ ],
1172
+ )
1173
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1174
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1175
+ assert "import logging" in code
1176
+ assert "logger = logging.getLogger(__name__)" in code
1177
+
1178
+ def test_source_row_count_logging(self):
1179
+ from informatica_python.models import (
1180
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1181
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1182
+ )
1183
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1184
+ mapping = MappingDef(
1185
+ name="m_log_src",
1186
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1187
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1188
+ transformations=[
1189
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1190
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1191
+ ],
1192
+ connectors=[
1193
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1194
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1195
+ ],
1196
+ instances=[
1197
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1198
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1199
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1200
+ ],
1201
+ )
1202
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1203
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1204
+ assert "logger.info(f'Source SQ_SRC1:" in code
1205
+ assert "rows read" in code
1206
+
1207
+ def test_transform_row_count_logging(self):
1208
+ from informatica_python.models import (
1209
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1210
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1211
+ )
1212
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1213
+ mapping = MappingDef(
1214
+ name="m_log_tx",
1215
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1216
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1217
+ transformations=[
1218
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1219
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1220
+ TransformationDef(name="FIL_ACTIVE", type="Filter",
1221
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")],
1222
+ attributes=[]),
1223
+ ],
1224
+ connectors=[
1225
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1226
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="FIL_ACTIVE", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1227
+ ConnectorDef(from_instance="FIL_ACTIVE", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1228
+ ],
1229
+ instances=[
1230
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1231
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1232
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1233
+ InstanceDef(name="FIL_ACTIVE", type="Filter", transformation_name="FIL_ACTIVE"),
1234
+ ],
1235
+ )
1236
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1237
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1238
+ assert "_input_rows_fil_active = len(" in code
1239
+ assert "_output_rows_fil_active = len(df_fil_active)" in code
1240
+ assert "FIL_ACTIVE (Filter):" in code
1241
+ assert "input rows ->" in code
1242
+ assert "output rows" in code
1243
+
1244
+ def test_target_row_count_logging(self):
1245
+ from informatica_python.models import (
1246
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1247
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1248
+ )
1249
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1250
+ mapping = MappingDef(
1251
+ name="m_log_tgt",
1252
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1253
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1254
+ transformations=[
1255
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1256
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1257
+ ],
1258
+ connectors=[
1259
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1260
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1261
+ ],
1262
+ instances=[
1263
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1264
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1265
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1266
+ ],
1267
+ )
1268
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1269
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1270
+ assert "logger.info(f'Target TGT1:" in code
1271
+ assert "rows written" in code
1272
+
1273
+ def test_input_output_rows_multiple_transforms(self):
1274
+ from informatica_python.models import (
1275
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1276
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1277
+ )
1278
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1279
+ mapping = MappingDef(
1280
+ name="m_log_multi",
1281
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1282
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="VAL", datatype="integer")])],
1283
+ transformations=[
1284
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1285
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1286
+ TransformationDef(name="EXP1", type="Expression",
1287
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1288
+ TransformationDef(name="SRT1", type="Sorter",
1289
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1290
+ ],
1291
+ connectors=[
1292
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1293
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1294
+ ConnectorDef(from_instance="EXP1", to_instance="SRT1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1295
+ ConnectorDef(from_instance="SRT1", to_instance="TGT1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1296
+ ],
1297
+ instances=[
1298
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1299
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1300
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1301
+ InstanceDef(name="EXP1", type="Expression", transformation_name="EXP1"),
1302
+ InstanceDef(name="SRT1", type="Sorter", transformation_name="SRT1"),
1303
+ ],
1304
+ )
1305
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1306
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1307
+ assert "_input_rows_exp1" in code
1308
+ assert "_output_rows_exp1" in code
1309
+ assert "_input_rows_srt1" in code
1310
+ assert "_output_rows_srt1" in code
1311
+ assert code.count("logger.info") >= 4
1312
+
1313
+
1314
+ class TestGeneratedCodeDocumentation:
1315
+
1316
+ def test_mapping_docstring_sources_targets(self):
1317
+ from informatica_python.models import (
1318
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1319
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1320
+ )
1321
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1322
+ mapping = MappingDef(
1323
+ name="m_doc_test",
1324
+ description="Load customer data from staging to warehouse",
1325
+ sources=[SourceDef(name="SRC_CUST", database_type="Flat File")],
1326
+ targets=[TargetDef(name="TGT_CUST", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1327
+ transformations=[
1328
+ TransformationDef(name="SQ_SRC_CUST", type="Source Qualifier",
1329
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1330
+ ],
1331
+ connectors=[
1332
+ ConnectorDef(from_instance="SRC_CUST", to_instance="SQ_SRC_CUST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1333
+ ConnectorDef(from_instance="SQ_SRC_CUST", to_instance="TGT_CUST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1334
+ ],
1335
+ instances=[
1336
+ InstanceDef(name="SRC_CUST", type="Source Definition", transformation_name="SRC_CUST"),
1337
+ InstanceDef(name="TGT_CUST", type="Target Definition", transformation_name="TGT_CUST"),
1338
+ InstanceDef(name="SQ_SRC_CUST", type="Source Qualifier", transformation_name="SQ_SRC_CUST"),
1339
+ ],
1340
+ )
1341
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1342
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1343
+ assert "Execute mapping: m_doc_test" in code
1344
+ assert "Load customer data from staging to warehouse" in code
1345
+ assert "Sources: SRC_CUST" in code
1346
+ assert "Targets: TGT_CUST" in code
1347
+
1348
+ def test_mapping_docstring_transform_pipeline(self):
1349
+ from informatica_python.models import (
1350
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1351
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1352
+ )
1353
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1354
+ mapping = MappingDef(
1355
+ name="m_doc_pipeline",
1356
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1357
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1358
+ transformations=[
1359
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1360
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1361
+ TransformationDef(name="EXP_CALC", type="Expression",
1362
+ fields=[
1363
+ FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT"),
1364
+ FieldDef(name="TOTAL", datatype="decimal", porttype="OUTPUT"),
1365
+ ]),
1366
+ TransformationDef(name="FIL_VALID", type="Filter",
1367
+ fields=[
1368
+ FieldDef(name="ID", datatype="integer", porttype="INPUT"),
1369
+ FieldDef(name="TOTAL", datatype="decimal", porttype="INPUT/OUTPUT"),
1370
+ ],
1371
+ attributes=[]),
1372
+ ],
1373
+ connectors=[
1374
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1375
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_CALC", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1376
+ ConnectorDef(from_instance="EXP_CALC", to_instance="FIL_VALID", from_field="TOTAL", to_field="TOTAL", from_instance_type="", to_instance_type=""),
1377
+ ConnectorDef(from_instance="FIL_VALID", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1378
+ ],
1379
+ instances=[
1380
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1381
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1382
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1383
+ InstanceDef(name="EXP_CALC", type="Expression", transformation_name="EXP_CALC"),
1384
+ InstanceDef(name="FIL_VALID", type="Filter", transformation_name="FIL_VALID"),
1385
+ ],
1386
+ )
1387
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1388
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1389
+ assert "Transformation pipeline:" in code
1390
+ assert "EXP_CALC (Expression):" in code
1391
+ assert "FIL_VALID (Filter):" in code
1392
+ assert "input fields ->" in code
1393
+ assert "output fields" in code
1394
+
1395
+ def test_transform_field_documentation(self):
1396
+ from informatica_python.models import (
1397
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1398
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1399
+ )
1400
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1401
+ mapping = MappingDef(
1402
+ name="m_doc_fields",
1403
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1404
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1405
+ transformations=[
1406
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1407
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT"),
1408
+ FieldDef(name="NAME", datatype="string", porttype="INPUT/OUTPUT")]),
1409
+ TransformationDef(name="EXP_UPPER", type="Expression",
1410
+ description="Uppercase the name field",
1411
+ fields=[
1412
+ FieldDef(name="ID", datatype="integer", porttype="INPUT"),
1413
+ FieldDef(name="NAME", datatype="string", porttype="INPUT"),
1414
+ FieldDef(name="NAME_UPPER", datatype="string", porttype="OUTPUT", expression="UPPER(NAME)"),
1415
+ ]),
1416
+ ],
1417
+ connectors=[
1418
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1419
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_UPPER", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1420
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_UPPER", from_field="NAME", to_field="NAME", from_instance_type="", to_instance_type=""),
1421
+ ConnectorDef(from_instance="EXP_UPPER", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1422
+ ],
1423
+ instances=[
1424
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1425
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1426
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1427
+ InstanceDef(name="EXP_UPPER", type="Expression", transformation_name="EXP_UPPER"),
1428
+ ],
1429
+ )
1430
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1431
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1432
+ assert "# Input fields: ID, NAME" in code
1433
+ assert "# Output fields: NAME_UPPER" in code
1434
+ assert "# Description: Uppercase the name field" in code
1435
+
1436
+ def test_transform_description_comment(self):
1437
+ from informatica_python.models import (
1438
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1439
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1440
+ )
1441
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1442
+ mapping = MappingDef(
1443
+ name="m_doc_desc",
1444
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1445
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1446
+ transformations=[
1447
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1448
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1449
+ TransformationDef(name="AGG_TOTALS", type="Aggregator",
1450
+ description="Calculate regional totals",
1451
+ fields=[
1452
+ FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
1453
+ FieldDef(name="TOTAL", datatype="decimal", porttype="OUTPUT", expression="SUM(AMOUNT)"),
1454
+ ]),
1455
+ ],
1456
+ connectors=[
1457
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1458
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="AGG_TOTALS", from_field="ID", to_field="REGION", from_instance_type="", to_instance_type=""),
1459
+ ConnectorDef(from_instance="AGG_TOTALS", to_instance="TGT1", from_field="REGION", to_field="ID", from_instance_type="", to_instance_type=""),
1460
+ ],
1461
+ instances=[
1462
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1463
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1464
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1465
+ InstanceDef(name="AGG_TOTALS", type="Aggregator", transformation_name="AGG_TOTALS"),
1466
+ ],
1467
+ )
1468
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1469
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1470
+ assert "# Description: Calculate regional totals" in code
1471
+ assert "# ---" in code
1472
+
1473
+ def test_no_description_no_desc_line(self):
1474
+ from informatica_python.models import (
1475
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1476
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1477
+ )
1478
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1479
+ mapping = MappingDef(
1480
+ name="m_doc_nodesc",
1481
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1482
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1483
+ transformations=[
1484
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1485
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1486
+ TransformationDef(name="EXP_PASS", type="Expression",
1487
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1488
+ ],
1489
+ connectors=[
1490
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1491
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_PASS", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1492
+ ConnectorDef(from_instance="EXP_PASS", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1493
+ ],
1494
+ instances=[
1495
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1496
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1497
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1498
+ InstanceDef(name="EXP_PASS", type="Expression", transformation_name="EXP_PASS"),
1499
+ ],
1500
+ )
1501
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1502
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1503
+ lines = code.split('\n')
1504
+ desc_lines = [l for l in lines if '# Description:' in l]
1505
+ for dl in desc_lines:
1506
+ assert dl.strip() != "# Description:"
1507
+
1508
+ def test_field_list_truncation(self):
1509
+ from informatica_python.models import (
1510
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1511
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1512
+ )
1513
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1514
+ many_fields = [FieldDef(name=f"F{i}", datatype="string", porttype="INPUT/OUTPUT") for i in range(15)]
1515
+ mapping = MappingDef(
1516
+ name="m_doc_many",
1517
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1518
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1519
+ transformations=[
1520
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1521
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1522
+ TransformationDef(name="EXP_MANY", type="Expression", fields=many_fields),
1523
+ ],
1524
+ connectors=[
1525
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1526
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_MANY", from_field="ID", to_field="F0", from_instance_type="", to_instance_type=""),
1527
+ ConnectorDef(from_instance="EXP_MANY", to_instance="TGT1", from_field="F0", to_field="ID", from_instance_type="", to_instance_type=""),
1528
+ ],
1529
+ instances=[
1530
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1531
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1532
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1533
+ InstanceDef(name="EXP_MANY", type="Expression", transformation_name="EXP_MANY"),
1534
+ ],
1535
+ )
1536
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1537
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1538
+ assert "..." in code