informatica-python 1.6.1__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {informatica_python-1.6.1 → informatica_python-1.7.0}/PKG-INFO +1 -1
  2. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/helper_gen.py +232 -4
  4. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/mapping_gen.py +73 -7
  5. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/workflow_gen.py +23 -2
  6. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/PKG-INFO +1 -1
  7. {informatica_python-1.6.1 → informatica_python-1.7.0}/pyproject.toml +1 -1
  8. {informatica_python-1.6.1 → informatica_python-1.7.0}/tests/test_integration.py +283 -0
  9. {informatica_python-1.6.1 → informatica_python-1.7.0}/LICENSE +0 -0
  10. {informatica_python-1.6.1 → informatica_python-1.7.0}/README.md +0 -0
  11. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/cli.py +0 -0
  12. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/converter.py +0 -0
  13. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/__init__.py +0 -0
  14. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/config_gen.py +0 -0
  15. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/error_log_gen.py +0 -0
  16. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/generators/sql_gen.py +0 -0
  17. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/models.py +0 -0
  18. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/utils/expression_converter.py +0 -0
  22. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python/utils/sql_dialect.py +0 -0
  24. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  25. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.6.1 → informatica_python-1.7.0}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.6.1 → informatica_python-1.7.0}/setup.cfg +0 -0
  30. {informatica_python-1.6.1 → informatica_python-1.7.0}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.6.1
3
+ Version: 1.7.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.6.1"
10
+ __version__ = "1.7.0"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -1063,10 +1063,15 @@ def _add_expression_helpers(lines):
1063
1063
  lines.append("")
1064
1064
  lines.append("")
1065
1065
  lines.append("def percentile_val(value, pct):")
1066
- lines.append(' """Informatica PERCENTILE equivalent."""')
1066
+ lines.append(' """Informatica PERCENTILE equivalent (row-level fallback)."""')
1067
1067
  lines.append(" return value")
1068
1068
  lines.append("")
1069
1069
  lines.append("")
1070
+ lines.append("def percentile_df(df, col, pct=0.5):")
1071
+ lines.append(' """Informatica PERCENTILE equivalent (DataFrame-level)."""')
1072
+ lines.append(" return df[col].quantile(pct)")
1073
+ lines.append("")
1074
+ lines.append("")
1070
1075
  lines.append("def first_val(*args):")
1071
1076
  lines.append(' """Informatica FIRST equivalent."""')
1072
1077
  lines.append(" for a in args:")
@@ -1085,20 +1090,38 @@ def _add_expression_helpers(lines):
1085
1090
  lines.append("")
1086
1091
  lines.append("")
1087
1092
  lines.append("def moving_avg(value, window=3):")
1088
- lines.append(' """Informatica MOVINGAVG equivalent."""')
1093
+ lines.append(' """Informatica MOVINGAVG equivalent (row-level fallback)."""')
1089
1094
  lines.append(" return value")
1090
1095
  lines.append("")
1091
1096
  lines.append("")
1097
+ lines.append("def moving_avg_df(df, col, window=3):")
1098
+ lines.append(' """Informatica MOVINGAVG equivalent (DataFrame-level).')
1099
+ lines.append(' Returns a rolling mean over the specified window size."""')
1100
+ lines.append(" return df[col].rolling(window=window, min_periods=1).mean()")
1101
+ lines.append("")
1102
+ lines.append("")
1092
1103
  lines.append("def moving_sum(value, window=3):")
1093
- lines.append(' """Informatica MOVINGSUM equivalent."""')
1104
+ lines.append(' """Informatica MOVINGSUM equivalent (row-level fallback)."""')
1094
1105
  lines.append(" return value")
1095
1106
  lines.append("")
1096
1107
  lines.append("")
1108
+ lines.append("def moving_sum_df(df, col, window=3):")
1109
+ lines.append(' """Informatica MOVINGSUM equivalent (DataFrame-level).')
1110
+ lines.append(' Returns a rolling sum over the specified window size."""')
1111
+ lines.append(" return df[col].rolling(window=window, min_periods=1).sum()")
1112
+ lines.append("")
1113
+ lines.append("")
1097
1114
  lines.append("def cume(value):")
1098
- lines.append(' """Informatica CUME equivalent."""')
1115
+ lines.append(' """Informatica CUME equivalent (row-level fallback)."""')
1099
1116
  lines.append(" return value")
1100
1117
  lines.append("")
1101
1118
  lines.append("")
1119
+ lines.append("def cume_df(df, col):")
1120
+ lines.append(' """Informatica CUME equivalent (DataFrame-level).')
1121
+ lines.append(' Returns the cumulative (expanding) sum of the column."""')
1122
+ lines.append(" return df[col].expanding(min_periods=1).sum()")
1123
+ lines.append("")
1124
+ lines.append("")
1102
1125
  lines.append("def set_count_variable(var_name, value=1):")
1103
1126
  lines.append(' """Informatica SETCOUNTVARIABLE equivalent."""')
1104
1127
  lines.append(" return set_variable(var_name, value)")
@@ -1150,6 +1173,211 @@ def _add_expression_helpers(lines):
1150
1173
  lines.append("")
1151
1174
  lines.append("")
1152
1175
 
1176
+ _add_stored_procedure_functions(lines)
1177
+ _add_update_strategy_functions(lines)
1178
+ _add_state_persistence_functions(lines)
1179
+
1180
+
1181
+ def _add_stored_procedure_functions(lines):
1182
+ lines.append("# ============================================================")
1183
+ lines.append("# Stored Procedure Execution")
1184
+ lines.append("# ============================================================")
1185
+ lines.append("")
1186
+ lines.append("")
1187
+ lines.append("def call_stored_procedure(config, proc_name, params=None, connection_name='default', output_params=None):")
1188
+ lines.append(' """')
1189
+ lines.append(" Execute a stored procedure and return results.")
1190
+ lines.append(" params: dict of {param_name: value} for input parameters")
1191
+ lines.append(" output_params: list of output parameter names to capture")
1192
+ lines.append(" Returns: (result_df, output_values) where output_values is a dict")
1193
+ lines.append(' """')
1194
+ lines.append(" if params is None:")
1195
+ lines.append(" params = {}")
1196
+ lines.append(" if output_params is None:")
1197
+ lines.append(" output_params = []")
1198
+ lines.append("")
1199
+ lines.append(" conn = get_db_connection(config, connection_name)")
1200
+ lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
1201
+ lines.append(" db_type = conn_config.get('type', 'mssql')")
1202
+ lines.append("")
1203
+ lines.append(" try:")
1204
+ lines.append(" cursor = conn.cursor()")
1205
+ lines.append(" logger.info(f'Calling stored procedure: {proc_name} with params: {params}')")
1206
+ lines.append("")
1207
+ lines.append(" if db_type == 'oracle':")
1208
+ lines.append(" param_list = list(params.values())")
1209
+ lines.append(" for out_p in output_params:")
1210
+ lines.append(" import cx_Oracle")
1211
+ lines.append(" param_list.append(cursor.var(cx_Oracle.STRING))")
1212
+ lines.append(" result = cursor.callproc(proc_name, param_list)")
1213
+ lines.append(" out_values = {}")
1214
+ lines.append(" for i, out_p in enumerate(output_params):")
1215
+ lines.append(" out_values[out_p] = result[len(params) + i].getvalue()")
1216
+ lines.append(" elif db_type == 'mssql':")
1217
+ lines.append(" param_placeholders = ', '.join(['?'] * len(params))")
1218
+ lines.append(" if output_params:")
1219
+ lines.append(" out_decl = '; '.join(f'@{p} OUTPUT' for p in output_params)")
1220
+ lines.append(" sql = f'EXEC {proc_name} {param_placeholders}'")
1221
+ lines.append(" if out_decl:")
1222
+ lines.append(" sql += f', {out_decl}'")
1223
+ lines.append(" else:")
1224
+ lines.append(" sql = f'EXEC {proc_name} {param_placeholders}'")
1225
+ lines.append(" cursor.execute(sql, list(params.values()))")
1226
+ lines.append(" out_values = {}")
1227
+ lines.append(" else:")
1228
+ lines.append(" param_placeholders = ', '.join(['%s'] * len(params))")
1229
+ lines.append(" cursor.execute(f'CALL {proc_name}({param_placeholders})', list(params.values()))")
1230
+ lines.append(" out_values = {}")
1231
+ lines.append("")
1232
+ lines.append(" try:")
1233
+ lines.append(" columns = [desc[0] for desc in cursor.description] if cursor.description else []")
1234
+ lines.append(" rows = cursor.fetchall()")
1235
+ lines.append(" result_df = pd.DataFrame(rows, columns=columns) if columns else pd.DataFrame()")
1236
+ lines.append(" except Exception:")
1237
+ lines.append(" result_df = pd.DataFrame()")
1238
+ lines.append("")
1239
+ lines.append(" conn.commit()")
1240
+ lines.append(" logger.info(f'Stored procedure {proc_name} returned {len(result_df)} rows')")
1241
+ lines.append(" return result_df, out_values")
1242
+ lines.append("")
1243
+ lines.append(" except Exception as e:")
1244
+ lines.append(" logger.error(f'Stored procedure {proc_name} failed: {e}')")
1245
+ lines.append(" conn.rollback()")
1246
+ lines.append(" raise")
1247
+ lines.append(" finally:")
1248
+ lines.append(" conn.close()")
1249
+ lines.append("")
1250
+ lines.append("")
1251
+
1252
+
1253
+ def _add_update_strategy_functions(lines):
1254
+ lines.append("# ============================================================")
1255
+ lines.append("# Update Strategy Target Operations")
1256
+ lines.append("# ============================================================")
1257
+ lines.append("")
1258
+ lines.append("")
1259
+ lines.append("def write_with_update_strategy(config, df, table_name, connection_name='default', key_columns=None, schema=None):")
1260
+ lines.append(' """')
1261
+ lines.append(" Write DataFrame to database using row-level update strategy.")
1262
+ lines.append(" Expects a '_update_strategy' column with values: INSERT, UPDATE, DELETE, REJECT.")
1263
+ lines.append(" key_columns: list of primary key columns for UPDATE/DELETE operations.")
1264
+ lines.append(' """')
1265
+ lines.append(" if '_update_strategy' not in df.columns:")
1266
+ lines.append(" logger.warning('No _update_strategy column found — falling back to bulk INSERT')")
1267
+ lines.append(" write_to_db(config, df, table_name, connection_name, schema=schema)")
1268
+ lines.append(" return")
1269
+ lines.append("")
1270
+ lines.append(" conn = get_db_connection(config, connection_name)")
1271
+ lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
1272
+ lines.append(" if schema is None:")
1273
+ lines.append(" schema = conn_config.get('schema', 'dbo')")
1274
+ lines.append(" qualified_table = f'{schema}.{table_name}' if schema else table_name")
1275
+ lines.append("")
1276
+ lines.append(" data_cols = [c for c in df.columns if c != '_update_strategy']")
1277
+ lines.append(" if key_columns is None:")
1278
+ lines.append(" key_columns = data_cols[:1]")
1279
+ lines.append(" logger.warning(f'No key columns specified for update strategy — using first column: {key_columns}')")
1280
+ lines.append("")
1281
+ lines.append(" try:")
1282
+ lines.append(" cursor = conn.cursor()")
1283
+ lines.append("")
1284
+ lines.append(" df_insert = df[df['_update_strategy'] == 'INSERT']")
1285
+ lines.append(" df_update = df[df['_update_strategy'] == 'UPDATE']")
1286
+ lines.append(" df_delete = df[df['_update_strategy'] == 'DELETE']")
1287
+ lines.append(" df_reject = df[df['_update_strategy'] == 'REJECT']")
1288
+ lines.append("")
1289
+ lines.append(" if len(df_reject) > 0:")
1290
+ lines.append(" logger.warning(f'Rejecting {len(df_reject)} rows for target {table_name}')")
1291
+ lines.append("")
1292
+ lines.append(" if len(df_insert) > 0:")
1293
+ lines.append(" logger.info(f'Inserting {len(df_insert)} rows into {qualified_table}')")
1294
+ lines.append(" df_insert[data_cols].to_sql(table_name, conn, schema=schema, if_exists='append', index=False)")
1295
+ lines.append("")
1296
+ lines.append(" if len(df_update) > 0:")
1297
+ lines.append(" non_key_cols = [c for c in data_cols if c not in key_columns]")
1298
+ lines.append(" set_clause = ', '.join(f'{c} = ?' for c in non_key_cols)")
1299
+ lines.append(" where_clause = ' AND '.join(f'{c} = ?' for c in key_columns)")
1300
+ lines.append(" update_sql = f'UPDATE {qualified_table} SET {set_clause} WHERE {where_clause}'")
1301
+ lines.append(" logger.info(f'Updating {len(df_update)} rows in {qualified_table}')")
1302
+ lines.append(" for _, row in df_update.iterrows():")
1303
+ lines.append(" values = [row[c] for c in non_key_cols] + [row[c] for c in key_columns]")
1304
+ lines.append(" cursor.execute(update_sql, values)")
1305
+ lines.append("")
1306
+ lines.append(" if len(df_delete) > 0:")
1307
+ lines.append(" where_clause = ' AND '.join(f'{c} = ?' for c in key_columns)")
1308
+ lines.append(" delete_sql = f'DELETE FROM {qualified_table} WHERE {where_clause}'")
1309
+ lines.append(" logger.info(f'Deleting {len(df_delete)} rows from {qualified_table}')")
1310
+ lines.append(" for _, row in df_delete.iterrows():")
1311
+ lines.append(" values = [row[c] for c in key_columns]")
1312
+ lines.append(" cursor.execute(delete_sql, values)")
1313
+ lines.append("")
1314
+ lines.append(" conn.commit()")
1315
+ lines.append(" total = len(df_insert) + len(df_update) + len(df_delete)")
1316
+ lines.append(" logger.info(f'Update strategy complete for {qualified_table}: {len(df_insert)} inserts, {len(df_update)} updates, {len(df_delete)} deletes, {len(df_reject)} rejects')")
1317
+ lines.append("")
1318
+ lines.append(" except Exception as e:")
1319
+ lines.append(" logger.error(f'Update strategy write error for {qualified_table}: {e}')")
1320
+ lines.append(" conn.rollback()")
1321
+ lines.append(" raise")
1322
+ lines.append(" finally:")
1323
+ lines.append(" conn.close()")
1324
+ lines.append("")
1325
+ lines.append("")
1326
+
1327
+
1328
+ def _add_state_persistence_functions(lines):
1329
+ lines.append("# ============================================================")
1330
+ lines.append("# State Persistence (Persistent Variables)")
1331
+ lines.append("# ============================================================")
1332
+ lines.append("")
1333
+ lines.append("")
1334
+ lines.append("_persistent_state = {}")
1335
+ lines.append("")
1336
+ lines.append("")
1337
+ lines.append("def load_persistent_state(state_file='persistent_state.json'):")
1338
+ lines.append(' """Load persistent variable state from JSON file."""')
1339
+ lines.append(" import json")
1340
+ lines.append(" global _persistent_state")
1341
+ lines.append(" if os.path.exists(state_file):")
1342
+ lines.append(" try:")
1343
+ lines.append(" with open(state_file, 'r') as f:")
1344
+ lines.append(" _persistent_state = json.load(f)")
1345
+ lines.append(" logger.info(f'Loaded persistent state from {state_file} ({len(_persistent_state)} scopes)')")
1346
+ lines.append(" except Exception as e:")
1347
+ lines.append(" logger.warning(f'Could not load persistent state from {state_file}: {e}')")
1348
+ lines.append(" _persistent_state = {}")
1349
+ lines.append(" else:")
1350
+ lines.append(" logger.info(f'No persistent state file found at {state_file} — starting fresh')")
1351
+ lines.append(" _persistent_state = {}")
1352
+ lines.append(" return _persistent_state")
1353
+ lines.append("")
1354
+ lines.append("")
1355
+ lines.append("def save_persistent_state(state_file='persistent_state.json'):")
1356
+ lines.append(' """Save persistent variable state to JSON file."""')
1357
+ lines.append(" import json")
1358
+ lines.append(" try:")
1359
+ lines.append(" with open(state_file, 'w') as f:")
1360
+ lines.append(" json.dump(_persistent_state, f, indent=2, default=str)")
1361
+ lines.append(" logger.info(f'Saved persistent state to {state_file}')")
1362
+ lines.append(" except Exception as e:")
1363
+ lines.append(" logger.warning(f'Could not save persistent state to {state_file}: {e}')")
1364
+ lines.append("")
1365
+ lines.append("")
1366
+ lines.append("def get_persistent_variable(scope, var_name, default=None):")
1367
+ lines.append(' """Get a persistent variable value from state."""')
1368
+ lines.append(" scope_state = _persistent_state.get(scope, {})")
1369
+ lines.append(" return scope_state.get(var_name, default)")
1370
+ lines.append("")
1371
+ lines.append("")
1372
+ lines.append("def set_persistent_variable(scope, var_name, value):")
1373
+ lines.append(' """Set a persistent variable value in state."""')
1374
+ lines.append(" if scope not in _persistent_state:")
1375
+ lines.append(" _persistent_state[scope] = {}")
1376
+ lines.append(" _persistent_state[scope][var_name] = value")
1377
+ lines.append(" return value")
1378
+ lines.append("")
1379
+ lines.append("")
1380
+
1153
1381
 
1154
1382
  def _add_utility_functions(lines):
1155
1383
  lines.append("# ============================================================")
@@ -253,6 +253,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
253
253
  lines.append(f" start_time = log_mapping_start('{mapping.name}')")
254
254
  lines.append("")
255
255
 
256
+ has_persistent_vars = False
256
257
  if mapping.variables:
257
258
  lines.append(" # Mapping Variables")
258
259
  for var in mapping.variables:
@@ -260,7 +261,11 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
260
261
  default = var.default_value or "''"
261
262
  if var.datatype.lower() in ("integer", "bigint", "int"):
262
263
  default = var.default_value or "0"
263
- lines.append(f" {safe_var} = {default}")
264
+ if getattr(var, 'is_persistent', 'NO').upper() == 'YES':
265
+ has_persistent_vars = True
266
+ lines.append(f" {safe_var} = get_persistent_variable('{mapping.name}', '{safe_var}', {default})")
267
+ else:
268
+ lines.append(f" {safe_var} = {default}")
264
269
  lines.append("")
265
270
 
266
271
  if session_overrides:
@@ -978,10 +983,30 @@ def _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs):
978
983
  if attr.name == "Update Strategy Expression":
979
984
  strategy_expr = attr.value
980
985
  strategy_map = {"0": "INSERT", "1": "UPDATE", "2": "DELETE", "3": "REJECT"}
981
- strategy_name = strategy_map.get(strategy_expr, f"EXPR({strategy_expr})")
982
- lines.append(f" # Update Strategy: {strategy_name}")
986
+
987
+ lines.append(f" # Update Strategy: {tx.name}")
983
988
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
984
- lines.append(f" df_{tx_safe}['_update_strategy'] = '{strategy_name}'")
989
+
990
+ if strategy_expr in strategy_map:
991
+ strategy_name = strategy_map[strategy_expr]
992
+ lines.append(f" df_{tx_safe}['_update_strategy'] = '{strategy_name}'")
993
+ else:
994
+ dd_map = {
995
+ "DD_INSERT": "INSERT", "DD_UPDATE": "UPDATE",
996
+ "DD_DELETE": "DELETE", "DD_REJECT": "REJECT",
997
+ }
998
+ expr = strategy_expr
999
+ for dd_const, label in dd_map.items():
1000
+ expr = expr.replace(dd_const, f"'{label}'")
1001
+ try:
1002
+ converted = convert_expression(expr)
1003
+ lines.append(f" # Original expression: {strategy_expr}")
1004
+ lines.append(f" def _resolve_strategy(row):")
1005
+ lines.append(f" return {converted}")
1006
+ lines.append(f" df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
1007
+ except Exception:
1008
+ lines.append(f" # Could not parse strategy expression: {strategy_expr}")
1009
+ lines.append(f" df_{tx_safe}['_update_strategy'] = 'INSERT'")
985
1010
  source_dfs[tx.name] = f"df_{tx_safe}"
986
1011
 
987
1012
 
@@ -1186,13 +1211,47 @@ def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_df
1186
1211
 
1187
1212
  def _gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs):
1188
1213
  proc_name = ""
1214
+ conn_name = "default"
1189
1215
  for attr in tx.attributes:
1190
1216
  if attr.name in ("Stored Procedure Name", "sp name"):
1191
1217
  proc_name = attr.value
1218
+ elif attr.name in ("Connection Name", "connection_name"):
1219
+ conn_name = attr.value or "default"
1220
+
1221
+ input_params = []
1222
+ output_params = []
1223
+ for fld in tx.fields:
1224
+ pt = (fld.porttype or "").upper()
1225
+ if "INPUT" in pt and "OUTPUT" not in pt:
1226
+ input_params.append(fld.name)
1227
+ elif "OUTPUT" in pt:
1228
+ output_params.append(fld.name)
1192
1229
 
1193
1230
  lines.append(f" # Stored Procedure: {proc_name or tx.name}")
1194
- lines.append(f" # TODO: Execute stored procedure and capture results")
1195
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
1231
+
1232
+ if input_params:
1233
+ param_dict_items = ", ".join(f"'{p}': {input_df}['{p}'].iloc[0] if '{p}' in {input_df}.columns else None" for p in input_params)
1234
+ lines.append(f" _sp_params_{tx_safe} = {{{param_dict_items}}}")
1235
+ else:
1236
+ lines.append(f" _sp_params_{tx_safe} = {{}}")
1237
+
1238
+ if output_params:
1239
+ out_list = repr(output_params)
1240
+ lines.append(f" _sp_out_names_{tx_safe} = {out_list}")
1241
+ else:
1242
+ lines.append(f" _sp_out_names_{tx_safe} = []")
1243
+
1244
+ lines.append(f" df_{tx_safe}, _sp_out_vals_{tx_safe} = call_stored_procedure(")
1245
+ lines.append(f" config, '{proc_name or tx.name}', params=_sp_params_{tx_safe},")
1246
+ lines.append(f" connection_name='{conn_name}', output_params=_sp_out_names_{tx_safe})")
1247
+
1248
+ if output_params:
1249
+ lines.append(f" if df_{tx_safe}.empty and _sp_out_vals_{tx_safe}:")
1250
+ lines.append(f" df_{tx_safe} = {input_df}.copy()")
1251
+ for op in output_params:
1252
+ lines.append(f" if '{op}' in _sp_out_vals_{tx_safe}:")
1253
+ lines.append(f" df_{tx_safe}['{op}'] = _sp_out_vals_{tx_safe}['{op}']")
1254
+
1196
1255
  source_dfs[tx.name] = f"df_{tx_safe}"
1197
1256
 
1198
1257
 
@@ -1265,7 +1324,14 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1265
1324
  lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1266
1325
  elif tgt_def.database_type and tgt_def.database_type != "Flat File":
1267
1326
  conn_label = tgt_conn or "target"
1268
- lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
1327
+ lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns:")
1328
+ key_cols = [f.name for f in tgt_def.fields if getattr(f, 'keytype', 'NOT A KEY') == 'PRIMARY KEY'] or None
1329
+ if key_cols:
1330
+ lines.append(f" write_with_update_strategy(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}', key_columns={key_cols})")
1331
+ else:
1332
+ lines.append(f" write_with_update_strategy(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
1333
+ lines.append(f" else:")
1334
+ lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
1269
1335
  elif tgt_def.flatfile:
1270
1336
  _emit_flatfile_write(lines, tgt_safe, tgt_def)
1271
1337
  else:
@@ -22,7 +22,7 @@ def generate_workflow_code(folder: FolderDef) -> str:
22
22
  lines.append("import sys")
23
23
  lines.append("import logging")
24
24
  lines.append("from datetime import datetime")
25
- lines.append("from helper_functions import load_config, logger")
25
+ lines.append("from helper_functions import load_config, logger, load_persistent_state, save_persistent_state, get_persistent_variable, set_persistent_variable")
26
26
  lines.append("")
27
27
 
28
28
  for i, mapping in enumerate(folder.mappings, 1):
@@ -124,12 +124,24 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, workl
124
124
  lines.append(f" failed_tasks = []")
125
125
  lines.append("")
126
126
 
127
+ has_persistent = any(
128
+ getattr(v, 'is_persistent', 'NO').upper() == 'YES'
129
+ for v in (wf.variables or [])
130
+ )
131
+ if has_persistent:
132
+ lines.append(" # Load persistent variable state from previous runs")
133
+ lines.append(" load_persistent_state()")
134
+ lines.append("")
135
+
127
136
  if wf.variables:
128
137
  lines.append(" # Workflow Variables")
129
138
  for var in wf.variables:
130
139
  var_name = _safe_name(var.name.replace("$$", ""))
131
140
  default = var.default_value or "''"
132
- lines.append(f" {var_name} = {default}")
141
+ if getattr(var, 'is_persistent', 'NO').upper() == 'YES':
142
+ lines.append(f" {var_name} = get_persistent_variable('{wf.name}', '{var_name}', {default})")
143
+ else:
144
+ lines.append(f" {var_name} = {default}")
133
145
  lines.append("")
134
146
 
135
147
  execution_order = _get_task_execution_order(wf)
@@ -146,6 +158,15 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, workl
146
158
  for task in execution_order:
147
159
  _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets or [])
148
160
 
161
+ if has_persistent:
162
+ lines.append(" # Save persistent variable state for next run")
163
+ for var in wf.variables:
164
+ if getattr(var, 'is_persistent', 'NO').upper() == 'YES':
165
+ var_name = _safe_name(var.name.replace("$$", ""))
166
+ lines.append(f" set_persistent_variable('{wf.name}', '{var_name}', {var_name})")
167
+ lines.append(" save_persistent_state()")
168
+ lines.append("")
169
+
149
170
  lines.append(f" elapsed = (datetime.now() - wf_start).total_seconds()")
150
171
  lines.append(f" if success:")
151
172
  lines.append(f" logger.info(f'=== Workflow {wf.name} completed successfully in {{elapsed:.2f}}s ===')")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.6.1
3
+ Version: 1.7.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.6.1"
7
+ version = "1.7.0"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -827,3 +827,286 @@ class TestDataQualityValidation:
827
827
  pass
828
828
  help_text = f.getvalue()
829
829
  assert "--validate-casts" in help_text or "validate_casts" in help_text
830
+
831
+
832
+ class TestWindowAnalyticFunctions:
833
+ def test_moving_avg_df_generated(self):
834
+ from informatica_python.models import FolderDef
835
+ from informatica_python.generators.helper_gen import generate_helper_functions
836
+ folder = FolderDef(name="TestFolder")
837
+ code = generate_helper_functions(folder)
838
+ assert "def moving_avg_df(df, col, window=3):" in code
839
+ assert ".rolling(window=window, min_periods=1).mean()" in code
840
+
841
+ def test_moving_sum_df_generated(self):
842
+ from informatica_python.models import FolderDef
843
+ from informatica_python.generators.helper_gen import generate_helper_functions
844
+ folder = FolderDef(name="TestFolder")
845
+ code = generate_helper_functions(folder)
846
+ assert "def moving_sum_df(df, col, window=3):" in code
847
+ assert ".rolling(window=window, min_periods=1).sum()" in code
848
+
849
+ def test_cume_df_generated(self):
850
+ from informatica_python.models import FolderDef
851
+ from informatica_python.generators.helper_gen import generate_helper_functions
852
+ folder = FolderDef(name="TestFolder")
853
+ code = generate_helper_functions(folder)
854
+ assert "def cume_df(df, col):" in code
855
+ assert ".expanding(min_periods=1).sum()" in code
856
+
857
+ def test_percentile_df_generated(self):
858
+ from informatica_python.models import FolderDef
859
+ from informatica_python.generators.helper_gen import generate_helper_functions
860
+ folder = FolderDef(name="TestFolder")
861
+ code = generate_helper_functions(folder)
862
+ assert "def percentile_df(df, col, pct=0.5):" in code
863
+ assert ".quantile(pct)" in code
864
+
865
+ def test_row_level_fallbacks_still_exist(self):
866
+ from informatica_python.models import FolderDef
867
+ from informatica_python.generators.helper_gen import generate_helper_functions
868
+ folder = FolderDef(name="TestFolder")
869
+ code = generate_helper_functions(folder)
870
+ assert "def moving_avg(value, window=3):" in code
871
+ assert "def moving_sum(value, window=3):" in code
872
+ assert "def cume(value):" in code
873
+ assert "def percentile_val(value, pct):" in code
874
+
875
+ def test_window_functions_execute(self):
876
+ import pandas as pd
877
+ df = pd.DataFrame({"val": [10, 20, 30, 40, 50]})
878
+ rolling_mean = df["val"].rolling(window=3, min_periods=1).mean()
879
+ assert rolling_mean.iloc[0] == 10.0
880
+ assert rolling_mean.iloc[2] == 20.0
881
+ rolling_sum = df["val"].rolling(window=3, min_periods=1).sum()
882
+ assert rolling_sum.iloc[2] == 60.0
883
+ cume_sum = df["val"].expanding(min_periods=1).sum()
884
+ assert cume_sum.iloc[4] == 150.0
885
+
886
+
887
+ class TestUpdateStrategy:
888
+ def test_static_insert_strategy(self):
889
+ from informatica_python.models import TransformationDef, TableAttribute
890
+ from informatica_python.generators.mapping_gen import _gen_update_strategy
891
+ tx = TransformationDef(name="UPD_INSERT", type="Update Strategy",
892
+ attributes=[TableAttribute(name="Update Strategy Expression", value="0")])
893
+ lines = []
894
+ source_dfs = {}
895
+ _gen_update_strategy(lines, tx, "upd_insert", "df_input", source_dfs)
896
+ code = "\n".join(lines)
897
+ assert "'_update_strategy'] = 'INSERT'" in code
898
+ assert "upd_insert" in source_dfs.get("UPD_INSERT", "")
899
+
900
+ def test_static_update_strategy(self):
901
+ from informatica_python.models import TransformationDef, TableAttribute
902
+ from informatica_python.generators.mapping_gen import _gen_update_strategy
903
+ tx = TransformationDef(name="UPD_UPDATE", type="Update Strategy",
904
+ attributes=[TableAttribute(name="Update Strategy Expression", value="1")])
905
+ lines = []
906
+ source_dfs = {}
907
+ _gen_update_strategy(lines, tx, "upd_update", "df_input", source_dfs)
908
+ code = "\n".join(lines)
909
+ assert "'_update_strategy'] = 'UPDATE'" in code
910
+
911
+ def test_static_delete_strategy(self):
912
+ from informatica_python.models import TransformationDef, TableAttribute
913
+ from informatica_python.generators.mapping_gen import _gen_update_strategy
914
+ tx = TransformationDef(name="UPD_DEL", type="Update Strategy",
915
+ attributes=[TableAttribute(name="Update Strategy Expression", value="2")])
916
+ lines = []
917
+ source_dfs = {}
918
+ _gen_update_strategy(lines, tx, "upd_del", "df_input", source_dfs)
919
+ code = "\n".join(lines)
920
+ assert "'_update_strategy'] = 'DELETE'" in code
921
+
922
+ def test_dd_constant_expression(self):
923
+ from informatica_python.models import TransformationDef, TableAttribute
924
+ from informatica_python.generators.mapping_gen import _gen_update_strategy
925
+ tx = TransformationDef(name="UPD_EXPR", type="Update Strategy",
926
+ attributes=[TableAttribute(name="Update Strategy Expression", value="DD_UPDATE")])
927
+ lines = []
928
+ source_dfs = {}
929
+ _gen_update_strategy(lines, tx, "upd_expr", "df_input", source_dfs)
930
+ code = "\n".join(lines)
931
+ assert "_update_strategy" in code
932
+ assert "UPD_EXPR" in source_dfs
933
+
934
+ def test_target_write_routes_strategy(self):
935
+ from informatica_python.models import TargetDef, FieldDef
936
+ from informatica_python.generators.mapping_gen import _generate_target_write
937
+ tgt = TargetDef(name="TGT_DB", database_type="Oracle",
938
+ fields=[FieldDef(name="ID", datatype="integer", keytype="PRIMARY KEY"),
939
+ FieldDef(name="VAL", datatype="string")])
940
+ lines = []
941
+ source_dfs = {"SRC": "df_src"}
942
+ connector_graph = {"to": {"TGT_DB": []}, "from": {}}
943
+ _generate_target_write(lines, "TGT_DB", tgt, connector_graph, source_dfs, {}, {})
944
+ code = "\n".join(lines)
945
+ assert "write_with_update_strategy" in code
946
+ assert "write_to_db" in code
947
+ assert "_update_strategy" in code
948
+
949
+ def test_update_strategy_helper_generated(self):
950
+ from informatica_python.models import FolderDef
951
+ from informatica_python.generators.helper_gen import generate_helper_functions
952
+ folder = FolderDef(name="TestFolder")
953
+ code = generate_helper_functions(folder)
954
+ assert "def write_with_update_strategy(" in code
955
+ assert "df_insert" in code
956
+ assert "df_update" in code
957
+ assert "df_delete" in code
958
+ assert "df_reject" in code
959
+ assert "INSERT" in code
960
+ assert "UPDATE" in code
961
+ assert "DELETE" in code
962
+
963
+
964
+ class TestStoredProcedure:
965
+ def test_stored_proc_basic(self):
966
+ from informatica_python.models import TransformationDef, TableAttribute
967
+ from informatica_python.generators.mapping_gen import _gen_stored_proc
968
+ tx = TransformationDef(name="SP_GET_DATA", type="Stored Procedure",
969
+ attributes=[TableAttribute(name="Stored Procedure Name", value="usp_get_data")])
970
+ lines = []
971
+ source_dfs = {}
972
+ _gen_stored_proc(lines, tx, "sp_get_data", "df_input", source_dfs)
973
+ code = "\n".join(lines)
974
+ assert "call_stored_procedure" in code
975
+ assert "usp_get_data" in code
976
+ assert "SP_GET_DATA" in source_dfs
977
+
978
+ def test_stored_proc_with_input_params(self):
979
+ from informatica_python.models import TransformationDef, TableAttribute, FieldDef
980
+ from informatica_python.generators.mapping_gen import _gen_stored_proc
981
+ tx = TransformationDef(name="SP_LOOKUP", type="Stored Procedure",
982
+ attributes=[TableAttribute(name="Stored Procedure Name", value="usp_lookup")],
983
+ fields=[FieldDef(name="CUSTOMER_ID", datatype="integer", porttype="INPUT"),
984
+ FieldDef(name="RESULT", datatype="string", porttype="OUTPUT")])
985
+ lines = []
986
+ source_dfs = {}
987
+ _gen_stored_proc(lines, tx, "sp_lookup", "df_input", source_dfs)
988
+ code = "\n".join(lines)
989
+ assert "CUSTOMER_ID" in code
990
+ assert "RESULT" in code
991
+ assert "_sp_out_names_" in code
992
+ assert "call_stored_procedure" in code
993
+
994
+ def test_stored_proc_with_output_params(self):
995
+ from informatica_python.models import TransformationDef, TableAttribute, FieldDef
996
+ from informatica_python.generators.mapping_gen import _gen_stored_proc
997
+ tx = TransformationDef(name="SP_OUT", type="Stored Procedure",
998
+ attributes=[TableAttribute(name="Stored Procedure Name", value="usp_out")],
999
+ fields=[FieldDef(name="RET_VAL", datatype="string", porttype="OUTPUT")])
1000
+ lines = []
1001
+ source_dfs = {}
1002
+ _gen_stored_proc(lines, tx, "sp_out", "df_input", source_dfs)
1003
+ code = "\n".join(lines)
1004
+ assert "['RET_VAL']" in code
1005
+ assert "_sp_out_vals_" in code
1006
+ assert "if df_sp_out.empty" in code
1007
+
1008
+ def test_stored_proc_helper_generated(self):
1009
+ from informatica_python.models import FolderDef
1010
+ from informatica_python.generators.helper_gen import generate_helper_functions
1011
+ folder = FolderDef(name="TestFolder")
1012
+ code = generate_helper_functions(folder)
1013
+ assert "def call_stored_procedure(" in code
1014
+ assert "cursor.callproc" in code
1015
+ assert "EXEC" in code
1016
+ assert "CALL" in code
1017
+
1018
+ def test_stored_proc_connection_override(self):
1019
+ from informatica_python.models import TransformationDef, TableAttribute
1020
+ from informatica_python.generators.mapping_gen import _gen_stored_proc
1021
+ tx = TransformationDef(name="SP_CONN", type="Stored Procedure",
1022
+ attributes=[
1023
+ TableAttribute(name="Stored Procedure Name", value="usp_custom"),
1024
+ TableAttribute(name="Connection Name", value="oracle_prod"),
1025
+ ])
1026
+ lines = []
1027
+ source_dfs = {}
1028
+ _gen_stored_proc(lines, tx, "sp_conn", "df_input", source_dfs)
1029
+ code = "\n".join(lines)
1030
+ assert "oracle_prod" in code
1031
+
1032
+
1033
+ class TestStatePersistence:
1034
+ def test_persistent_state_helpers_generated(self):
1035
+ from informatica_python.models import FolderDef
1036
+ from informatica_python.generators.helper_gen import generate_helper_functions
1037
+ folder = FolderDef(name="TestFolder")
1038
+ code = generate_helper_functions(folder)
1039
+ assert "def load_persistent_state(" in code
1040
+ assert "def save_persistent_state(" in code
1041
+ assert "def get_persistent_variable(" in code
1042
+ assert "def set_persistent_variable(" in code
1043
+ assert "_persistent_state" in code
1044
+ assert "persistent_state.json" in code
1045
+
1046
+ def test_workflow_loads_persistent_state(self):
1047
+ from informatica_python.models import FolderDef, WorkflowDef, WorkflowVariable
1048
+ from informatica_python.generators.workflow_gen import generate_workflow_code
1049
+ wf = WorkflowDef(name="wf_test",
1050
+ variables=[WorkflowVariable(name="$$RUN_COUNT", datatype="integer",
1051
+ default_value="0", is_persistent="YES")])
1052
+ folder = FolderDef(name="TestFolder", workflows=[wf])
1053
+ code = generate_workflow_code(folder)
1054
+ assert "load_persistent_state()" in code
1055
+ assert "get_persistent_variable" in code
1056
+ assert "save_persistent_state()" in code
1057
+ assert "set_persistent_variable" in code
1058
+
1059
+ def test_workflow_no_persist_when_not_needed(self):
1060
+ from informatica_python.models import FolderDef, WorkflowDef, WorkflowVariable
1061
+ from informatica_python.generators.workflow_gen import generate_workflow_code
1062
+ wf = WorkflowDef(name="wf_test",
1063
+ variables=[WorkflowVariable(name="$$TEMP_VAR", datatype="string",
1064
+ default_value="''", is_persistent="NO")])
1065
+ folder = FolderDef(name="TestFolder", workflows=[wf])
1066
+ code = generate_workflow_code(folder)
1067
+ assert "load_persistent_state" not in code or "import" in code
1068
+ assert "save_persistent_state" not in code or "import" in code
1069
+
1070
+ def test_mapping_persistent_variables(self):
1071
+ from informatica_python.models import (
1072
+ FolderDef, MappingDef, MappingVariable, SourceDef, FieldDef,
1073
+ TransformationDef, ConnectorDef, InstanceDef
1074
+ )
1075
+ from informatica_python.generators.mapping_gen import generate_mapping_code
1076
+ mapping = MappingDef(
1077
+ name="m_persist_test",
1078
+ variables=[
1079
+ MappingVariable(name="$$LAST_ID", datatype="integer",
1080
+ default_value="0", is_persistent="YES"),
1081
+ MappingVariable(name="$$TEMP", datatype="string",
1082
+ default_value="''", is_persistent="NO"),
1083
+ ],
1084
+ sources=[],
1085
+ targets=[],
1086
+ transformations=[],
1087
+ connectors=[],
1088
+ instances=[],
1089
+ )
1090
+ folder = FolderDef(name="TestFolder", mappings=[mapping])
1091
+ code = generate_mapping_code(mapping, folder, "pandas", 1)
1092
+ assert "get_persistent_variable('m_persist_test', 'last_id'" in code
1093
+ assert "temp = ''" in code
1094
+
1095
+ def test_workflow_persistent_imports(self):
1096
+ from informatica_python.models import FolderDef, WorkflowDef, WorkflowVariable
1097
+ from informatica_python.generators.workflow_gen import generate_workflow_code
1098
+ wf = WorkflowDef(name="wf_test",
1099
+ variables=[WorkflowVariable(name="$$COUNT", is_persistent="YES")])
1100
+ folder = FolderDef(name="TestFolder", workflows=[wf])
1101
+ code = generate_workflow_code(folder)
1102
+ assert "load_persistent_state" in code
1103
+ assert "save_persistent_state" in code
1104
+
1105
+ def test_state_file_json_format(self):
1106
+ from informatica_python.models import FolderDef
1107
+ from informatica_python.generators.helper_gen import generate_helper_functions
1108
+ folder = FolderDef(name="TestFolder")
1109
+ code = generate_helper_functions(folder)
1110
+ assert "json.load" in code
1111
+ assert "json.dump" in code
1112
+ assert "persistent_state.json" in code