informatica-python 1.7.1__tar.gz → 1.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.7.1 → informatica_python-1.8.1}/PKG-INFO +1 -1
  2. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/mapping_gen.py +51 -1
  4. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/PKG-INFO +1 -1
  5. {informatica_python-1.7.1 → informatica_python-1.8.1}/pyproject.toml +1 -1
  6. {informatica_python-1.7.1 → informatica_python-1.8.1}/tests/test_integration.py +398 -0
  7. {informatica_python-1.7.1 → informatica_python-1.8.1}/LICENSE +0 -0
  8. {informatica_python-1.7.1 → informatica_python-1.8.1}/README.md +0 -0
  9. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/cli.py +0 -0
  10. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/converter.py +0 -0
  11. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/__init__.py +0 -0
  12. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/config_gen.py +0 -0
  13. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/error_log_gen.py +0 -0
  14. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/helper_gen.py +0 -0
  15. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/sql_gen.py +0 -0
  16. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/generators/workflow_gen.py +0 -0
  17. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/models.py +0 -0
  18. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/utils/expression_converter.py +0 -0
  22. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python/utils/sql_dialect.py +0 -0
  24. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/SOURCES.txt +0 -0
  25. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.7.1 → informatica_python-1.8.1}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.7.1 → informatica_python-1.8.1}/setup.cfg +0 -0
  30. {informatica_python-1.7.1 → informatica_python-1.8.1}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.7.1
3
+ Version: 1.8.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.7.1"
10
+ __version__ = "1.8.1"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -225,9 +225,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
225
225
  lines.append(f"Auto-generated by informatica-python")
226
226
  lines.append('"""')
227
227
  lines.append("")
228
+ lines.append("import logging")
228
229
  lines.append("import numpy as np")
229
230
  lines.append("from helper_functions import *")
230
231
  lines.append("")
232
+ lines.append("logger = logging.getLogger(__name__)")
233
+ lines.append("")
231
234
  lines.append("")
232
235
 
233
236
  inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
@@ -248,8 +251,29 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
248
251
  instance_map = {i.name: i for i in mapping.instances}
249
252
  session_overrides = _build_session_conn_overrides(mapping, folder)
250
253
 
254
+ source_names = list(source_map.keys())
255
+ target_names = list(target_map.keys())
256
+ tx_summary = []
257
+ for tx in all_transforms:
258
+ if tx.type not in ("Source Qualifier", "Application Source Qualifier"):
259
+ in_flds = [f.name for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
260
+ out_flds = [f.name for f in tx.fields if "OUTPUT" in (f.porttype or "").upper()]
261
+ tx_summary.append((tx.name, tx.type, in_flds, out_flds))
262
+
251
263
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
252
- lines.append(f' """Execute mapping: {mapping.name}"""')
264
+ lines.append(f' """')
265
+ lines.append(f" Execute mapping: {mapping.name}")
266
+ if mapping.description:
267
+ lines.append(f" Description: {mapping.description}")
268
+ lines.append(f"")
269
+ lines.append(f" Sources: {', '.join(source_names) if source_names else 'None'}")
270
+ lines.append(f" Targets: {', '.join(target_names) if target_names else 'None'}")
271
+ if tx_summary:
272
+ lines.append(f"")
273
+ lines.append(f" Transformation pipeline:")
274
+ for tx_name, tx_type, in_f, out_f in tx_summary:
275
+ lines.append(f" - {tx_name} ({tx_type}): {len(in_f)} input fields -> {len(out_f)} output fields")
276
+ lines.append(f' """')
253
277
  lines.append(f" start_time = log_mapping_start('{mapping.name}')")
254
278
  lines.append("")
255
279
 
@@ -620,6 +644,10 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
620
644
  lines.append(f" df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
621
645
 
622
646
  source_dfs[sq.name] = f"df_{sq_safe}"
647
+ lines.append(f" try:")
648
+ lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
649
+ lines.append(f" except Exception:")
650
+ lines.append(f" logger.info('Source {sq.name}: rows read (count unavailable)')")
623
651
 
624
652
  if post_sql:
625
653
  lines.append(f" # Post-SQL")
@@ -649,7 +677,20 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
649
677
  if not input_df:
650
678
  input_df = "df_input"
651
679
 
680
+ in_fields = [f.name for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
681
+ out_fields = [f.name for f in tx.fields if "OUTPUT" in (f.porttype or "").upper()]
682
+ tx_desc = tx.description or ""
683
+ lines.append(f" # -------------------------------------------------------------------")
652
684
  lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
685
+ if tx_desc:
686
+ lines.append(f" # Description: {tx_desc}")
687
+ lines.append(f" # Input fields: {', '.join(in_fields[:10])}{' ...' if len(in_fields) > 10 else ''}")
688
+ lines.append(f" # Output fields: {', '.join(out_fields[:10])}{' ...' if len(out_fields) > 10 else ''}")
689
+ lines.append(f" # -------------------------------------------------------------------")
690
+ lines.append(f" try:")
691
+ lines.append(f" _input_rows_{tx_safe} = len({input_df})")
692
+ lines.append(f" except Exception:")
693
+ lines.append(f" _input_rows_{tx_safe} = -1")
653
694
 
654
695
  if tx_type == "expression":
655
696
  _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
@@ -689,6 +730,11 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
689
730
  lines.append(f" df_{tx_safe} = {copy_expr}")
690
731
  source_dfs[tx.name] = f"df_{tx_safe}"
691
732
 
733
+ lines.append(f" try:")
734
+ lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe})")
735
+ lines.append(f" except Exception:")
736
+ lines.append(f" _output_rows_{tx_safe} = -1")
737
+ lines.append(f" logger.info(f'{tx.name} ({tx.type}): {{_input_rows_{tx_safe}}} input rows -> {{_output_rows_{tx_safe}}} output rows')")
692
738
  lines.append("")
693
739
 
694
740
 
@@ -1355,6 +1401,10 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1355
1401
  else:
1356
1402
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
1357
1403
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1404
+ lines.append(f" try:")
1405
+ lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
1406
+ lines.append(f" except Exception:")
1407
+ lines.append(f" logger.info('Target {tgt_def.name}: rows written (count unavailable)')")
1358
1408
 
1359
1409
 
1360
1410
  CAST_MAP = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.7.1
3
+ Version: 1.8.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.7.1"
7
+ version = "1.8.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1138,3 +1138,401 @@ class TestStatePersistence:
1138
1138
  assert "json.load" in code
1139
1139
  assert "json.dump" in code
1140
1140
  assert "persistent_state.json" in code
1141
+
1142
+
1143
+ class TestLoggingEnrichment:
1144
+
1145
+ def test_mapping_imports_logging(self):
1146
+ from informatica_python.models import (
1147
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1148
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1149
+ )
1150
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1151
+ mapping = MappingDef(
1152
+ name="m_log_test",
1153
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1154
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1155
+ transformations=[
1156
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1157
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1158
+ TransformationDef(name="EXP_TEST", type="Expression",
1159
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1160
+ ],
1161
+ connectors=[
1162
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1163
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_TEST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1164
+ ConnectorDef(from_instance="EXP_TEST", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1165
+ ],
1166
+ instances=[
1167
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1168
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1169
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1170
+ InstanceDef(name="EXP_TEST", type="Expression", transformation_name="EXP_TEST"),
1171
+ ],
1172
+ )
1173
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1174
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1175
+ assert "import logging" in code
1176
+ assert "logger = logging.getLogger(__name__)" in code
1177
+
1178
+ def test_source_row_count_logging(self):
1179
+ from informatica_python.models import (
1180
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1181
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1182
+ )
1183
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1184
+ mapping = MappingDef(
1185
+ name="m_log_src",
1186
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1187
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1188
+ transformations=[
1189
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1190
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1191
+ ],
1192
+ connectors=[
1193
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1194
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1195
+ ],
1196
+ instances=[
1197
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1198
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1199
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1200
+ ],
1201
+ )
1202
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1203
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1204
+ assert "logger.info(f'Source SQ_SRC1:" in code
1205
+ assert "rows read" in code
1206
+
1207
+ def test_transform_row_count_logging(self):
1208
+ from informatica_python.models import (
1209
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1210
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1211
+ )
1212
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1213
+ mapping = MappingDef(
1214
+ name="m_log_tx",
1215
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1216
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1217
+ transformations=[
1218
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1219
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1220
+ TransformationDef(name="FIL_ACTIVE", type="Filter",
1221
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")],
1222
+ attributes=[]),
1223
+ ],
1224
+ connectors=[
1225
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1226
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="FIL_ACTIVE", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1227
+ ConnectorDef(from_instance="FIL_ACTIVE", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1228
+ ],
1229
+ instances=[
1230
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1231
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1232
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1233
+ InstanceDef(name="FIL_ACTIVE", type="Filter", transformation_name="FIL_ACTIVE"),
1234
+ ],
1235
+ )
1236
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1237
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1238
+ assert "_input_rows_fil_active = len(" in code
1239
+ assert "_output_rows_fil_active = len(df_fil_active" in code
1240
+ assert "FIL_ACTIVE (Filter):" in code
1241
+ assert "input rows ->" in code
1242
+ assert "output rows" in code
1243
+
1244
+ def test_target_row_count_logging(self):
1245
+ from informatica_python.models import (
1246
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1247
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1248
+ )
1249
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1250
+ mapping = MappingDef(
1251
+ name="m_log_tgt",
1252
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1253
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1254
+ transformations=[
1255
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1256
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1257
+ ],
1258
+ connectors=[
1259
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1260
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1261
+ ],
1262
+ instances=[
1263
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1264
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1265
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1266
+ ],
1267
+ )
1268
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1269
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1270
+ assert "logger.info(f'Target TGT1:" in code
1271
+ assert "rows written" in code
1272
+
1273
+ def test_input_output_rows_multiple_transforms(self):
1274
+ from informatica_python.models import (
1275
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1276
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1277
+ )
1278
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1279
+ mapping = MappingDef(
1280
+ name="m_log_multi",
1281
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1282
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="VAL", datatype="integer")])],
1283
+ transformations=[
1284
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1285
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1286
+ TransformationDef(name="EXP1", type="Expression",
1287
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1288
+ TransformationDef(name="SRT1", type="Sorter",
1289
+ fields=[FieldDef(name="VAL", datatype="integer", porttype="INPUT/OUTPUT")]),
1290
+ ],
1291
+ connectors=[
1292
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1293
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1294
+ ConnectorDef(from_instance="EXP1", to_instance="SRT1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1295
+ ConnectorDef(from_instance="SRT1", to_instance="TGT1", from_field="VAL", to_field="VAL", from_instance_type="", to_instance_type=""),
1296
+ ],
1297
+ instances=[
1298
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1299
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1300
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1301
+ InstanceDef(name="EXP1", type="Expression", transformation_name="EXP1"),
1302
+ InstanceDef(name="SRT1", type="Sorter", transformation_name="SRT1"),
1303
+ ],
1304
+ )
1305
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1306
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1307
+ assert "_input_rows_exp1" in code
1308
+ assert "_output_rows_exp1" in code
1309
+ assert "_input_rows_srt1" in code
1310
+ assert "_output_rows_srt1" in code
1311
+ assert code.count("logger.info") >= 4
1312
+
1313
+
1314
+ class TestGeneratedCodeDocumentation:
1315
+
1316
+ def test_mapping_docstring_sources_targets(self):
1317
+ from informatica_python.models import (
1318
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1319
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1320
+ )
1321
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1322
+ mapping = MappingDef(
1323
+ name="m_doc_test",
1324
+ description="Load customer data from staging to warehouse",
1325
+ sources=[SourceDef(name="SRC_CUST", database_type="Flat File")],
1326
+ targets=[TargetDef(name="TGT_CUST", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1327
+ transformations=[
1328
+ TransformationDef(name="SQ_SRC_CUST", type="Source Qualifier",
1329
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1330
+ ],
1331
+ connectors=[
1332
+ ConnectorDef(from_instance="SRC_CUST", to_instance="SQ_SRC_CUST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1333
+ ConnectorDef(from_instance="SQ_SRC_CUST", to_instance="TGT_CUST", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1334
+ ],
1335
+ instances=[
1336
+ InstanceDef(name="SRC_CUST", type="Source Definition", transformation_name="SRC_CUST"),
1337
+ InstanceDef(name="TGT_CUST", type="Target Definition", transformation_name="TGT_CUST"),
1338
+ InstanceDef(name="SQ_SRC_CUST", type="Source Qualifier", transformation_name="SQ_SRC_CUST"),
1339
+ ],
1340
+ )
1341
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1342
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1343
+ assert "Execute mapping: m_doc_test" in code
1344
+ assert "Load customer data from staging to warehouse" in code
1345
+ assert "Sources: SRC_CUST" in code
1346
+ assert "Targets: TGT_CUST" in code
1347
+
1348
+ def test_mapping_docstring_transform_pipeline(self):
1349
+ from informatica_python.models import (
1350
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1351
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1352
+ )
1353
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1354
+ mapping = MappingDef(
1355
+ name="m_doc_pipeline",
1356
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1357
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1358
+ transformations=[
1359
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1360
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1361
+ TransformationDef(name="EXP_CALC", type="Expression",
1362
+ fields=[
1363
+ FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT"),
1364
+ FieldDef(name="TOTAL", datatype="decimal", porttype="OUTPUT"),
1365
+ ]),
1366
+ TransformationDef(name="FIL_VALID", type="Filter",
1367
+ fields=[
1368
+ FieldDef(name="ID", datatype="integer", porttype="INPUT"),
1369
+ FieldDef(name="TOTAL", datatype="decimal", porttype="INPUT/OUTPUT"),
1370
+ ],
1371
+ attributes=[]),
1372
+ ],
1373
+ connectors=[
1374
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1375
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_CALC", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1376
+ ConnectorDef(from_instance="EXP_CALC", to_instance="FIL_VALID", from_field="TOTAL", to_field="TOTAL", from_instance_type="", to_instance_type=""),
1377
+ ConnectorDef(from_instance="FIL_VALID", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1378
+ ],
1379
+ instances=[
1380
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1381
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1382
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1383
+ InstanceDef(name="EXP_CALC", type="Expression", transformation_name="EXP_CALC"),
1384
+ InstanceDef(name="FIL_VALID", type="Filter", transformation_name="FIL_VALID"),
1385
+ ],
1386
+ )
1387
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1388
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1389
+ assert "Transformation pipeline:" in code
1390
+ assert "EXP_CALC (Expression):" in code
1391
+ assert "FIL_VALID (Filter):" in code
1392
+ assert "input fields ->" in code
1393
+ assert "output fields" in code
1394
+
1395
+ def test_transform_field_documentation(self):
1396
+ from informatica_python.models import (
1397
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1398
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1399
+ )
1400
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1401
+ mapping = MappingDef(
1402
+ name="m_doc_fields",
1403
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1404
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1405
+ transformations=[
1406
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1407
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT"),
1408
+ FieldDef(name="NAME", datatype="string", porttype="INPUT/OUTPUT")]),
1409
+ TransformationDef(name="EXP_UPPER", type="Expression",
1410
+ description="Uppercase the name field",
1411
+ fields=[
1412
+ FieldDef(name="ID", datatype="integer", porttype="INPUT"),
1413
+ FieldDef(name="NAME", datatype="string", porttype="INPUT"),
1414
+ FieldDef(name="NAME_UPPER", datatype="string", porttype="OUTPUT", expression="UPPER(NAME)"),
1415
+ ]),
1416
+ ],
1417
+ connectors=[
1418
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1419
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_UPPER", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1420
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_UPPER", from_field="NAME", to_field="NAME", from_instance_type="", to_instance_type=""),
1421
+ ConnectorDef(from_instance="EXP_UPPER", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1422
+ ],
1423
+ instances=[
1424
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1425
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1426
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1427
+ InstanceDef(name="EXP_UPPER", type="Expression", transformation_name="EXP_UPPER"),
1428
+ ],
1429
+ )
1430
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1431
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1432
+ assert "# Input fields: ID, NAME" in code
1433
+ assert "# Output fields: NAME_UPPER" in code
1434
+ assert "# Description: Uppercase the name field" in code
1435
+
1436
+ def test_transform_description_comment(self):
1437
+ from informatica_python.models import (
1438
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1439
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1440
+ )
1441
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1442
+ mapping = MappingDef(
1443
+ name="m_doc_desc",
1444
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1445
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1446
+ transformations=[
1447
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1448
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1449
+ TransformationDef(name="AGG_TOTALS", type="Aggregator",
1450
+ description="Calculate regional totals",
1451
+ fields=[
1452
+ FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
1453
+ FieldDef(name="TOTAL", datatype="decimal", porttype="OUTPUT", expression="SUM(AMOUNT)"),
1454
+ ]),
1455
+ ],
1456
+ connectors=[
1457
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1458
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="AGG_TOTALS", from_field="ID", to_field="REGION", from_instance_type="", to_instance_type=""),
1459
+ ConnectorDef(from_instance="AGG_TOTALS", to_instance="TGT1", from_field="REGION", to_field="ID", from_instance_type="", to_instance_type=""),
1460
+ ],
1461
+ instances=[
1462
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1463
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1464
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1465
+ InstanceDef(name="AGG_TOTALS", type="Aggregator", transformation_name="AGG_TOTALS"),
1466
+ ],
1467
+ )
1468
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1469
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1470
+ assert "# Description: Calculate regional totals" in code
1471
+ assert "# ---" in code
1472
+
1473
+ def test_no_description_no_desc_line(self):
1474
+ from informatica_python.models import (
1475
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1476
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1477
+ )
1478
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1479
+ mapping = MappingDef(
1480
+ name="m_doc_nodesc",
1481
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1482
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1483
+ transformations=[
1484
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1485
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1486
+ TransformationDef(name="EXP_PASS", type="Expression",
1487
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1488
+ ],
1489
+ connectors=[
1490
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1491
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_PASS", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1492
+ ConnectorDef(from_instance="EXP_PASS", to_instance="TGT1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1493
+ ],
1494
+ instances=[
1495
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1496
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1497
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1498
+ InstanceDef(name="EXP_PASS", type="Expression", transformation_name="EXP_PASS"),
1499
+ ],
1500
+ )
1501
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1502
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1503
+ lines = code.split('\n')
1504
+ desc_lines = [l for l in lines if '# Description:' in l]
1505
+ for dl in desc_lines:
1506
+ assert dl.strip() != "# Description:"
1507
+
1508
+ def test_field_list_truncation(self):
1509
+ from informatica_python.models import (
1510
+ MappingDef, FolderDef, TransformationDef, ConnectorDef,
1511
+ FieldDef, SourceDef, TargetDef, InstanceDef,
1512
+ )
1513
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1514
+ many_fields = [FieldDef(name=f"F{i}", datatype="string", porttype="INPUT/OUTPUT") for i in range(15)]
1515
+ mapping = MappingDef(
1516
+ name="m_doc_many",
1517
+ sources=[SourceDef(name="SRC1", database_type="Flat File")],
1518
+ targets=[TargetDef(name="TGT1", database_type="Flat File", fields=[FieldDef(name="ID", datatype="integer")])],
1519
+ transformations=[
1520
+ TransformationDef(name="SQ_SRC1", type="Source Qualifier",
1521
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
1522
+ TransformationDef(name="EXP_MANY", type="Expression", fields=many_fields),
1523
+ ],
1524
+ connectors=[
1525
+ ConnectorDef(from_instance="SRC1", to_instance="SQ_SRC1", from_field="ID", to_field="ID", from_instance_type="", to_instance_type=""),
1526
+ ConnectorDef(from_instance="SQ_SRC1", to_instance="EXP_MANY", from_field="ID", to_field="F0", from_instance_type="", to_instance_type=""),
1527
+ ConnectorDef(from_instance="EXP_MANY", to_instance="TGT1", from_field="F0", to_field="ID", from_instance_type="", to_instance_type=""),
1528
+ ],
1529
+ instances=[
1530
+ InstanceDef(name="SRC1", type="Source Definition", transformation_name="SRC1"),
1531
+ InstanceDef(name="TGT1", type="Target Definition", transformation_name="TGT1"),
1532
+ InstanceDef(name="SQ_SRC1", type="Source Qualifier", transformation_name="SQ_SRC1"),
1533
+ InstanceDef(name="EXP_MANY", type="Expression", transformation_name="EXP_MANY"),
1534
+ ],
1535
+ )
1536
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1537
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1538
+ assert "..." in code