informatica-python 1.9.4__tar.gz → 1.9.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {informatica_python-1.9.4 → informatica_python-1.9.5}/PKG-INFO +1 -1
  2. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/helper_gen.py +218 -49
  4. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/mapping_gen.py +10 -2
  5. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/utils/expression_converter.py +1 -1
  6. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/PKG-INFO +1 -1
  7. {informatica_python-1.9.4 → informatica_python-1.9.5}/pyproject.toml +1 -1
  8. {informatica_python-1.9.4 → informatica_python-1.9.5}/tests/test_integration.py +223 -0
  9. {informatica_python-1.9.4 → informatica_python-1.9.5}/LICENSE +0 -0
  10. {informatica_python-1.9.4 → informatica_python-1.9.5}/README.md +0 -0
  11. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/cli.py +0 -0
  12. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/converter.py +0 -0
  13. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/__init__.py +0 -0
  14. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/config_gen.py +0 -0
  15. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/error_log_gen.py +0 -0
  16. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/sql_gen.py +0 -0
  17. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/generators/workflow_gen.py +0 -0
  18. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/models.py +0 -0
  19. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/parser.py +0 -0
  20. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/utils/__init__.py +0 -0
  21. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/utils/datatype_map.py +0 -0
  22. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python/utils/sql_dialect.py +0 -0
  24. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/SOURCES.txt +0 -0
  25. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/dependency_links.txt +0 -0
  26. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/entry_points.txt +0 -0
  27. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/requires.txt +0 -0
  28. {informatica_python-1.9.4 → informatica_python-1.9.5}/informatica_python.egg-info/top_level.txt +0 -0
  29. {informatica_python-1.9.4 → informatica_python-1.9.5}/setup.cfg +0 -0
  30. {informatica_python-1.9.4 → informatica_python-1.9.5}/tests/test_converter.py +0 -0
  31. {informatica_python-1.9.4 → informatica_python-1.9.5}/tests/test_expressions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.4
3
+ Version: 1.9.5
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.9.4"
10
+ __version__ = "1.9.5"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -11,6 +11,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
11
11
  lines.append("")
12
12
 
13
13
  lines.append("import os")
14
+ lines.append("import re")
14
15
  lines.append("import sys")
15
16
  lines.append("import logging")
16
17
  lines.append("import yaml")
@@ -58,6 +59,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
58
59
  lines.append("")
59
60
 
60
61
  _add_param_file_functions(lines)
62
+ _add_env_resolution(lines)
61
63
  _add_db_functions(lines, data_lib)
62
64
  _add_file_functions(lines, data_lib)
63
65
  _add_expression_helpers(lines)
@@ -121,23 +123,143 @@ def _add_param_file_functions(lines):
121
123
  lines.append("")
122
124
 
123
125
 
126
+ def _add_env_resolution(lines):
127
+ lines.append("# ============================================================")
128
+ lines.append("# Environment Variable Resolution")
129
+ lines.append("# ============================================================")
130
+ lines.append("")
131
+ lines.append("")
132
+ lines.append("def resolve_env(value, config=None):")
133
+ lines.append(' """')
134
+ lines.append(" Resolve ${VAR} placeholders in a string.")
135
+ lines.append(" Lookup order: OS environment variable -> config connections/variables -> literal.")
136
+ lines.append(' """')
137
+ lines.append(" if not isinstance(value, str):")
138
+ lines.append(" return value")
139
+ lines.append(" def _replace(m):")
140
+ lines.append(" var = m.group(1)")
141
+ lines.append(" env_val = os.environ.get(var)")
142
+ lines.append(" if env_val is not None:")
143
+ lines.append(" return env_val")
144
+ lines.append(" if config:")
145
+ lines.append(" for section in ('variables', 'connections', 'params'):")
146
+ lines.append(" sect = config.get(section, {})")
147
+ lines.append(" if isinstance(sect, dict) and var in sect:")
148
+ lines.append(" v = sect[var]")
149
+ lines.append(" return str(v) if not isinstance(v, dict) else str(v.get('default_value', ''))")
150
+ lines.append(" return m.group(0)")
151
+ lines.append(r" return re.sub(r'\$\{(\w+)\}', _replace, value)")
152
+ lines.append("")
153
+ lines.append("")
154
+ lines.append("def rename_with_duplicates(df, col_mapping):")
155
+ lines.append(' """')
156
+ lines.append(" Rename DataFrame columns supporting one-source-to-many-target mapping.")
157
+ lines.append(" col_mapping is {target_col: source_col}.")
158
+ lines.append(" When multiple target cols map to the same source col, we duplicate the column.")
159
+ lines.append(' """')
160
+ lines.append(" result = df.copy()")
161
+ lines.append(" from collections import Counter")
162
+ lines.append(" src_counts = Counter(col_mapping.values())")
163
+ lines.append(" simple_rename = {}")
164
+ lines.append(" for tgt, src in col_mapping.items():")
165
+ lines.append(" if src_counts[src] == 1 and src in result.columns:")
166
+ lines.append(" simple_rename[src] = tgt")
167
+ lines.append(" elif src in result.columns:")
168
+ lines.append(" result[tgt] = result[src].copy()")
169
+ lines.append(" if simple_rename:")
170
+ lines.append(" result = result.rename(columns=simple_rename)")
171
+ lines.append(" return result")
172
+ lines.append("")
173
+ lines.append("")
174
+
175
+ lines.append("def resolve_builtin_variable(var_name, mapping_name='', session_name='', folder_name=''):")
176
+ lines.append(' """Resolve Informatica built-in variables like $PMMappingName, $PMSessionName."""')
177
+ lines.append(" builtins = {")
178
+ lines.append(" 'PMMappingName': mapping_name,")
179
+ lines.append(" 'PMSessionName': session_name,")
180
+ lines.append(" 'PMFolderName': folder_name,")
181
+ lines.append(" 'PMWorkflowName': os.environ.get('INFA_VAR_PMWorkflowName', ''),")
182
+ lines.append(" 'PMWorkflowRunId': os.environ.get('INFA_VAR_PMWorkflowRunId', '0'),")
183
+ lines.append(" 'PMSessionRunId': os.environ.get('INFA_VAR_PMSessionRunId', '0'),")
184
+ lines.append(" 'PMIntegrationServiceName': os.environ.get('INFA_VAR_PMIntegrationServiceName', ''),")
185
+ lines.append(" 'PMRepositoryServiceName': os.environ.get('INFA_VAR_PMRepositoryServiceName', ''),")
186
+ lines.append(" 'PMSourceDBConnection': os.environ.get('INFA_VAR_PMSourceDBConnection', ''),")
187
+ lines.append(" 'PMTargetDBConnection': os.environ.get('INFA_VAR_PMTargetDBConnection', ''),")
188
+ lines.append(" }")
189
+ lines.append(" clean = var_name.lstrip('$').lstrip('PM')")
190
+ lines.append(" for key, val in builtins.items():")
191
+ lines.append(" if key.lower() == ('PM' + clean).lower() or key.lower() == var_name.lstrip('$').lower():")
192
+ lines.append(" return val")
193
+ lines.append(" return os.environ.get(f'INFA_VAR_{var_name.lstrip(\"$\")}', '')")
194
+ lines.append("")
195
+ lines.append("")
196
+
197
+
124
198
  def _add_db_functions(lines, data_lib):
125
199
  lines.append("# ============================================================")
126
200
  lines.append("# Database Operations")
127
201
  lines.append("# ============================================================")
128
202
  lines.append("")
129
203
  lines.append("")
204
+ lines.append("_engine_cache = {}")
205
+ lines.append("")
206
+ lines.append("")
130
207
  lines.append("def get_db_connection(config, connection_name='default'):")
131
- lines.append(' """Create database connection from config."""')
208
+ lines.append(' """')
209
+ lines.append(" Create database connection from config.")
210
+ lines.append(" Prefers SQLAlchemy engine (with connection pooling) over raw drivers.")
211
+ lines.append(" Config values support ${VAR} env-var placeholders via resolve_env().")
212
+ lines.append(' """')
132
213
  lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
133
- lines.append(" db_type = conn_config.get('type', 'mssql')")
134
- lines.append(" host = conn_config.get('host', 'localhost')")
135
- lines.append(" port = conn_config.get('port', 1433)")
136
- lines.append(" database = conn_config.get('database', '')")
137
- lines.append(" username = conn_config.get('username', '')")
138
- lines.append(" password = conn_config.get('password', '')")
139
- lines.append(" schema = conn_config.get('schema', 'dbo')")
214
+ lines.append(" db_type = resolve_env(conn_config.get('type', 'mssql'), config)")
215
+ lines.append(" host = resolve_env(conn_config.get('host', 'localhost'), config)")
216
+ lines.append(" port = resolve_env(conn_config.get('port', 1433), config)")
217
+ lines.append(" database = resolve_env(conn_config.get('database', ''), config)")
218
+ lines.append(" username = resolve_env(conn_config.get('username', ''), config)")
219
+ lines.append(" password = resolve_env(conn_config.get('password', ''), config)")
220
+ lines.append(" schema = resolve_env(conn_config.get('schema', 'dbo'), config)")
221
+ lines.append("")
222
+ lines.append(" cache_key = f'{db_type}://{username}@{host}:{port}/{database}'")
223
+ lines.append(" if cache_key in _engine_cache:")
224
+ lines.append(" return _engine_cache[cache_key].connect()")
225
+ lines.append("")
226
+ lines.append(" try:")
227
+ lines.append(" from sqlalchemy import create_engine")
228
+ lines.append(" url = _build_sqlalchemy_url(db_type, host, port, database, username, password)")
229
+ lines.append(" if url:")
230
+ lines.append(" engine = create_engine(url, pool_pre_ping=True, pool_size=5)")
231
+ lines.append(" _engine_cache[cache_key] = engine")
232
+ lines.append(" return engine.connect()")
233
+ lines.append(" except ImportError:")
234
+ lines.append(" logger.info('SQLAlchemy not available, falling back to raw drivers')")
235
+ lines.append(" except Exception as e:")
236
+ lines.append(" logger.warning(f'SQLAlchemy connection failed: {e}, falling back to raw drivers')")
237
+ lines.append("")
238
+ lines.append(" return _get_raw_connection(db_type, host, port, database, username, password, conn_config)")
140
239
  lines.append("")
240
+ lines.append("")
241
+ lines.append("def _build_sqlalchemy_url(db_type, host, port, database, username, password):")
242
+ lines.append(" from urllib.parse import quote_plus")
243
+ lines.append(" pw = quote_plus(str(password)) if password else ''")
244
+ lines.append(" if db_type == 'mssql':")
245
+ lines.append(" try:")
246
+ lines.append(" import pyodbc")
247
+ lines.append(" conn_str = quote_plus(")
248
+ lines.append(" f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={host},{port};DATABASE={database};UID={username};PWD={password}'")
249
+ lines.append(" )")
250
+ lines.append(" return f'mssql+pyodbc:///?odbc_connect={conn_str}'")
251
+ lines.append(" except ImportError:")
252
+ lines.append(" return f'mssql+pymssql://{username}:{pw}@{host}:{port}/{database}'")
253
+ lines.append(" elif db_type == 'postgresql':")
254
+ lines.append(" return f'postgresql://{username}:{pw}@{host}:{port}/{database}'")
255
+ lines.append(" elif db_type == 'oracle':")
256
+ lines.append(" return f'oracle+cx_oracle://{username}:{pw}@{host}:{port}/{database}'")
257
+ lines.append(" elif db_type == 'mysql':")
258
+ lines.append(" return f'mysql+pymysql://{username}:{pw}@{host}:{port}/{database}'")
259
+ lines.append(" return None")
260
+ lines.append("")
261
+ lines.append("")
262
+ lines.append("def _get_raw_connection(db_type, host, port, database, username, password, conn_config):")
141
263
  lines.append(" if db_type == 'mssql':")
142
264
  lines.append(" try:")
143
265
  lines.append(" import pyodbc")
@@ -156,39 +278,19 @@ def _add_db_functions(lines, data_lib):
156
278
  lines.append(" return pymssql.connect(server=host, port=int(port), database=database, user=username, password=password)")
157
279
  lines.append(" except ImportError:")
158
280
  lines.append(" pass")
159
- lines.append(" try:")
160
- lines.append(" from sqlalchemy import create_engine")
161
- lines.append(" engine = create_engine(f'mssql+pymssql://{username}:{password}@{host}:{port}/{database}')")
162
- lines.append(" return engine.connect()")
163
- lines.append(" except ImportError:")
164
- lines.append(" pass")
165
- lines.append("")
166
- lines.append(" if db_type == 'postgresql':")
167
- lines.append(" try:")
168
- lines.append(" import psycopg2")
169
- lines.append(" return psycopg2.connect(")
170
- lines.append(" host=host, port=port, dbname=database,")
171
- lines.append(" user=username, password=password")
172
- lines.append(" )")
173
- lines.append(" except ImportError:")
174
- lines.append(" pass")
175
- lines.append("")
176
- lines.append(" if db_type == 'oracle':")
177
- lines.append(" try:")
178
- lines.append(" import cx_Oracle")
179
- lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
180
- lines.append(" return cx_Oracle.connect(username, password, dsn)")
181
- lines.append(" except ImportError:")
182
- lines.append(" pass")
281
+ lines.append(" elif db_type == 'postgresql':")
282
+ lines.append(" import psycopg2")
283
+ lines.append(" return psycopg2.connect(host=host, port=port, dbname=database, user=username, password=password)")
284
+ lines.append(" elif db_type == 'oracle':")
285
+ lines.append(" import cx_Oracle")
286
+ lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
287
+ lines.append(" return cx_Oracle.connect(username, password, dsn)")
183
288
  lines.append("")
184
289
  lines.append(" jdbc_url = conn_config.get('jdbc_url', '')")
185
290
  lines.append(" if jdbc_url:")
186
- lines.append(" try:")
187
- lines.append(" import jaydebeapi")
188
- lines.append(" driver = conn_config.get('jdbc_driver', '')")
189
- lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
190
- lines.append(" except ImportError:")
191
- lines.append(" pass")
291
+ lines.append(" import jaydebeapi")
292
+ lines.append(" driver = conn_config.get('jdbc_driver', '')")
293
+ lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
192
294
  lines.append("")
193
295
  lines.append(" raise ConnectionError(f'Cannot create connection for type: {db_type}')")
194
296
  lines.append("")
@@ -203,6 +305,15 @@ def _add_db_functions(lines, data_lib):
203
305
  else:
204
306
  read_func = "pd.read_sql"
205
307
 
308
+ lines.append("def _safe_close(conn):")
309
+ lines.append(' """Close connection safely — handles both SQLAlchemy and raw connections."""')
310
+ lines.append(" try:")
311
+ lines.append(" if hasattr(conn, 'close'):")
312
+ lines.append(" conn.close()")
313
+ lines.append(" except Exception:")
314
+ lines.append(" pass")
315
+ lines.append("")
316
+ lines.append("")
206
317
  lines.append("def read_from_db(config, query, connection_name='default'):")
207
318
  lines.append(' """Read data from database using SQL query."""')
208
319
  lines.append(" conn = get_db_connection(config, connection_name)")
@@ -220,7 +331,7 @@ def _add_db_functions(lines, data_lib):
220
331
  lines.append(" logger.error(f'DB read error on {{connection_name}}: {{e}}')")
221
332
  lines.append(" raise")
222
333
  lines.append(" finally:")
223
- lines.append(" conn.close()")
334
+ lines.append(" _safe_close(conn)")
224
335
  lines.append("")
225
336
  lines.append("")
226
337
  lines.append("def write_to_db(config, df, table_name, connection_name='default', if_exists='append', schema=None):")
@@ -242,23 +353,31 @@ def _add_db_functions(lines, data_lib):
242
353
  lines.append(" logger.error(f'DB write error to {{schema}}.{{table_name}}: {{e}}')")
243
354
  lines.append(" raise")
244
355
  lines.append(" finally:")
245
- lines.append(" conn.close()")
356
+ lines.append(" _safe_close(conn)")
246
357
  lines.append("")
247
358
  lines.append("")
248
359
  lines.append("def execute_sql(config, sql, connection_name='default'):")
249
360
  lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
250
361
  lines.append(" conn = get_db_connection(config, connection_name)")
251
362
  lines.append(" try:")
252
- lines.append(" cursor = conn.cursor()")
253
- lines.append(" cursor.execute(sql)")
254
- lines.append(" conn.commit()")
363
+ lines.append(" if hasattr(conn, 'execute'):")
364
+ lines.append(" from sqlalchemy import text")
365
+ lines.append(" conn.execute(text(sql))")
366
+ lines.append(" conn.commit()")
367
+ lines.append(" else:")
368
+ lines.append(" cursor = conn.cursor()")
369
+ lines.append(" cursor.execute(sql)")
370
+ lines.append(" conn.commit()")
255
371
  lines.append(" logger.info(f'Executed SQL on {{connection_name}}')")
256
372
  lines.append(" except Exception as e:")
257
373
  lines.append(" logger.error(f'SQL execution error: {{e}}')")
258
- lines.append(" conn.rollback()")
374
+ lines.append(" try:")
375
+ lines.append(" conn.rollback()")
376
+ lines.append(" except Exception:")
377
+ lines.append(" pass")
259
378
  lines.append(" raise")
260
379
  lines.append(" finally:")
261
- lines.append(" conn.close()")
380
+ lines.append(" _safe_close(conn)")
262
381
  lines.append("")
263
382
  lines.append("")
264
383
 
@@ -1150,10 +1269,60 @@ def _add_expression_helpers(lines):
1150
1269
  lines.append(" raise SystemExit(message)")
1151
1270
  lines.append("")
1152
1271
  lines.append("")
1153
- lines.append("def lookup_func(table, condition, *fields):")
1154
- lines.append(' """Placeholder for Informatica LOOKUP function."""')
1155
- lines.append(" logger.warning(f'LOOKUP called for table {table} - implement in mapping-specific code')")
1156
- lines.append(" return None")
1272
+ lines.append("_lookup_cache = {}")
1273
+ lines.append("")
1274
+ lines.append("")
1275
+ lines.append("def lookup_func(table, condition, *fields, config=None, connection_name='default'):")
1276
+ lines.append(' """')
1277
+ lines.append(" Informatica unconnected LOOKUP function.")
1278
+ lines.append(" Loads and caches the lookup table, then filters by condition.")
1279
+ lines.append(" Returns the first matching value of the first return field, or None.")
1280
+ lines.append(' """')
1281
+ lines.append(" global _lookup_cache")
1282
+ lines.append(" if table not in _lookup_cache:")
1283
+ lines.append(" if config is not None:")
1284
+ lines.append(" try:")
1285
+ lines.append(" lkp_conn = connection_name")
1286
+ lines.append(" conns = config.get('connections', {})")
1287
+ lines.append(" for cname, cval in conns.items():")
1288
+ lines.append(" if isinstance(cval, dict) and cval.get('connection_name', '') == table:")
1289
+ lines.append(" lkp_conn = cname")
1290
+ lines.append(" break")
1291
+ lines.append(" df_lkp = read_from_db(config, f'SELECT * FROM {table}', lkp_conn)")
1292
+ lines.append(" _lookup_cache[table] = df_lkp")
1293
+ lines.append(" logger.info(f'Cached lookup table {table}: {len(df_lkp)} rows')")
1294
+ lines.append(" except Exception as e:")
1295
+ lines.append(" logger.warning(f'Could not load lookup table {table}: {e}')")
1296
+ lines.append(" _lookup_cache[table] = None")
1297
+ lines.append(" else:")
1298
+ lines.append(" logger.warning(f'LOOKUP called for {table} without config - returning None')")
1299
+ lines.append(" return None")
1300
+ lines.append(" df_lkp = _lookup_cache.get(table)")
1301
+ lines.append(" if df_lkp is None or df_lkp.empty:")
1302
+ lines.append(" return None")
1303
+ lines.append(" try:")
1304
+ lines.append(" if callable(condition):")
1305
+ lines.append(" matches = df_lkp[condition(df_lkp)]")
1306
+ lines.append(" elif isinstance(condition, str) and '=' in condition:")
1307
+ lines.append(" col, _, val = condition.partition('=')")
1308
+ lines.append(" col = col.strip()")
1309
+ lines.append(" val = val.strip().strip(\"'\")")
1310
+ lines.append(" if col in df_lkp.columns:")
1311
+ lines.append(" matches = df_lkp[df_lkp[col].astype(str) == str(val)]")
1312
+ lines.append(" else:")
1313
+ lines.append(" return None")
1314
+ lines.append(" else:")
1315
+ lines.append(" return None")
1316
+ lines.append(" if matches.empty:")
1317
+ lines.append(" return None")
1318
+ lines.append(" if fields:")
1319
+ lines.append(" field = str(fields[0]).strip()")
1320
+ lines.append(" if field in matches.columns:")
1321
+ lines.append(" return matches.iloc[0][field]")
1322
+ lines.append(" return matches.iloc[0].to_dict()")
1323
+ lines.append(" except Exception as e:")
1324
+ lines.append(" logger.warning(f'LOOKUP error on {table}: {e}')")
1325
+ lines.append(" return None")
1157
1326
  lines.append("")
1158
1327
  lines.append("")
1159
1328
  lines.append("_param_store = {}")
@@ -419,9 +419,10 @@ def _safe_name(name):
419
419
  return safe.lower()
420
420
 
421
421
 
422
- def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
422
+ def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" ", mapping_name="", session_name="", folder_name=""):
423
423
  import re
424
424
  params = re.findall(r'\$\$(\w+)', sql_text)
425
+ pm_vars = re.findall(r'\$(PM\w+)', sql_text)
425
426
  lines.append(f"{indent}{sql_var_name} = '''")
426
427
  for sql_line in sql_text.strip().split("\n"):
427
428
  lines.append(f"{indent}{sql_line}")
@@ -433,6 +434,13 @@ def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
433
434
  continue
434
435
  seen.add(p)
435
436
  lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
437
+ if pm_vars:
438
+ seen_pm = set()
439
+ for pm in pm_vars:
440
+ if pm in seen_pm:
441
+ continue
442
+ seen_pm.add(pm)
443
+ lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('${pm}', str(resolve_builtin_variable('{pm}', mapping_name='{mapping_name}', session_name='{session_name}', folder_name='{folder_name}')))")
436
444
 
437
445
 
438
446
  def _flatfile_config_dict(ff):
@@ -1461,7 +1469,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1461
1469
  if col_mapping:
1462
1470
  lines.append(f" # Column mapping: source -> target")
1463
1471
  lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
1464
- lines.append(f" df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
1472
+ lines.append(f" df_target_{tgt_safe} = rename_with_duplicates({input_df}, target_columns_{tgt_safe})")
1465
1473
  target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
1466
1474
  if target_cols:
1467
1475
  lines.append(f" # Select only target columns")
@@ -867,7 +867,7 @@ def _vec_recursive(expr, df_var):
867
867
  if v.startswith("'") and v.endswith("'"):
868
868
  vec_parts.append(v)
869
869
  else:
870
- vec_parts.append(f'{v}.astype(str)')
870
+ vec_parts.append(f'{v}.fillna(\'\').astype(str)')
871
871
  return " + ".join(vec_parts)
872
872
 
873
873
  for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.4
3
+ Version: 1.9.5
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.9.4"
7
+ version = "1.9.5"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -2485,3 +2485,226 @@ class TestLookupWarning(unittest.TestCase):
2485
2485
  break
2486
2486
  finally:
2487
2487
  shutil.rmtree(tmpdir)
2488
+
2489
+
2490
+ class TestRenameWithDuplicates(unittest.TestCase):
2491
+
2492
+ def test_helper_contains_rename_with_duplicates(self):
2493
+ converter = InformaticaConverter()
2494
+ tmpdir = tempfile.mkdtemp()
2495
+ try:
2496
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2497
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2498
+ code = f.read()
2499
+ assert "def rename_with_duplicates(" in code
2500
+ finally:
2501
+ shutil.rmtree(tmpdir)
2502
+
2503
+ def test_target_uses_rename_with_duplicates(self):
2504
+ converter = InformaticaConverter()
2505
+ tmpdir = tempfile.mkdtemp()
2506
+ try:
2507
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2508
+ for fn in os.listdir(tmpdir):
2509
+ if fn.startswith("mapping_") and fn.endswith(".py"):
2510
+ with open(os.path.join(tmpdir, fn)) as f:
2511
+ code = f.read()
2512
+ if "target_columns_" in code:
2513
+ assert "rename_with_duplicates(" in code, \
2514
+ "Target rename should use rename_with_duplicates"
2515
+ finally:
2516
+ shutil.rmtree(tmpdir)
2517
+
2518
+
2519
+ class TestResolveEnv(unittest.TestCase):
2520
+
2521
+ def test_helper_contains_resolve_env(self):
2522
+ converter = InformaticaConverter()
2523
+ tmpdir = tempfile.mkdtemp()
2524
+ try:
2525
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2526
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2527
+ code = f.read()
2528
+ assert "def resolve_env(" in code
2529
+ finally:
2530
+ shutil.rmtree(tmpdir)
2531
+
2532
+ def test_helper_contains_resolve_builtin_variable(self):
2533
+ converter = InformaticaConverter()
2534
+ tmpdir = tempfile.mkdtemp()
2535
+ try:
2536
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2537
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2538
+ code = f.read()
2539
+ assert "def resolve_builtin_variable(" in code
2540
+ assert "PMMappingName" in code
2541
+ finally:
2542
+ shutil.rmtree(tmpdir)
2543
+
2544
+
2545
+ class TestGetDbConnectionSQLAlchemy(unittest.TestCase):
2546
+
2547
+ def test_helper_sqlalchemy_primary(self):
2548
+ converter = InformaticaConverter()
2549
+ tmpdir = tempfile.mkdtemp()
2550
+ try:
2551
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2552
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2553
+ code = f.read()
2554
+ sa_pos = code.index("create_engine")
2555
+ pyodbc_pos = code.index("pyodbc")
2556
+ assert sa_pos < pyodbc_pos, "SQLAlchemy should be tried before raw pyodbc"
2557
+ finally:
2558
+ shutil.rmtree(tmpdir)
2559
+
2560
+ def test_helper_engine_cache(self):
2561
+ converter = InformaticaConverter()
2562
+ tmpdir = tempfile.mkdtemp()
2563
+ try:
2564
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2565
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2566
+ code = f.read()
2567
+ assert "_engine_cache" in code
2568
+ assert "pool_pre_ping=True" in code
2569
+ finally:
2570
+ shutil.rmtree(tmpdir)
2571
+
2572
+ def test_helper_safe_close(self):
2573
+ converter = InformaticaConverter()
2574
+ tmpdir = tempfile.mkdtemp()
2575
+ try:
2576
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2577
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2578
+ code = f.read()
2579
+ assert "def _safe_close(" in code
2580
+ assert "_safe_close(conn)" in code
2581
+ finally:
2582
+ shutil.rmtree(tmpdir)
2583
+
2584
+ def test_helper_resolve_env_in_db(self):
2585
+ converter = InformaticaConverter()
2586
+ tmpdir = tempfile.mkdtemp()
2587
+ try:
2588
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2589
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2590
+ code = f.read()
2591
+ assert "resolve_env(" in code
2592
+ finally:
2593
+ shutil.rmtree(tmpdir)
2594
+
2595
+
2596
+ class TestLookupFuncImpl(unittest.TestCase):
2597
+
2598
+ def test_helper_lookup_func_full_impl(self):
2599
+ converter = InformaticaConverter()
2600
+ tmpdir = tempfile.mkdtemp()
2601
+ try:
2602
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2603
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2604
+ code = f.read()
2605
+ assert "_lookup_cache" in code
2606
+ assert "def lookup_func(" in code
2607
+ assert "config=None" in code
2608
+ assert "read_from_db" in code.split("def lookup_func")[1]
2609
+ finally:
2610
+ shutil.rmtree(tmpdir)
2611
+
2612
+
2613
+ class TestNullSafeConcat(unittest.TestCase):
2614
+
2615
+ def test_concat_fillna(self):
2616
+ result = convert_expression_vectorized("A || B", "df")
2617
+ assert ".fillna('')" in result, f"Concat should use fillna, got: {result}"
2618
+ assert ".astype(str)" in result
2619
+
2620
+ def test_concat_literal_no_fillna(self):
2621
+ result = convert_expression_vectorized("A || '-' || B", "df")
2622
+ assert "'-'" in result
2623
+ parts = result.split(" + ")
2624
+ for part in parts:
2625
+ if part.strip().startswith("'") and part.strip().endswith("'"):
2626
+ assert ".fillna" not in part
2627
+ else:
2628
+ assert ".fillna('')" in part
2629
+
2630
+ def test_concat_three_fields_all_fillna(self):
2631
+ result = convert_expression_vectorized("X || Y || Z", "df")
2632
+ assert result.count(".fillna('')") == 3
2633
+
2634
+
2635
+ class TestPMVariableHandling(unittest.TestCase):
2636
+
2637
+ PM_VAR_XML = '''<?xml version="1.0" encoding="UTF-8"?>
2638
+ <!DOCTYPE POWERMART SYSTEM "powrmart.dtd">
2639
+ <POWERMART CREATION_DATE="01/01/2025" REPOSITORY_VERSION="1">
2640
+ <REPOSITORY NAME="repo" VERSION="1" CODEPAGE="UTF-8" DATABASETYPE="Oracle">
2641
+ <FOLDER NAME="TEST_FOLDER" OWNER="admin">
2642
+ <SOURCE NAME="SRC_PM" DATABASETYPE="Microsoft SQL Server" DBDNAME="TestDB" OWNERNAME="dbo">
2643
+ <SOURCEFIELD NAME="ID" DATATYPE="integer" PRECISION="10" SCALE="0" NULLABLE="NOTNULL" KEYTYPE="PRIMARY KEY" FIELDNUMBER="1"/>
2644
+ </SOURCE>
2645
+ <TARGET NAME="TGT_PM" DATABASETYPE="Microsoft SQL Server">
2646
+ <TARGETFIELD NAME="ID" DATATYPE="integer" PRECISION="10" SCALE="0" NULLABLE="NOTNULL" KEYTYPE="PRIMARY KEY" FIELDNUMBER="1"/>
2647
+ </TARGET>
2648
+ <MAPPING NAME="m_pm_vars" ISVALID="YES">
2649
+ <TRANSFORMATION NAME="SQ_SRC_PM" TYPE="Source Qualifier" REUSABLE="NO">
2650
+ <TRANSFORMFIELD NAME="ID" DATATYPE="integer" PORTTYPE="INPUT/OUTPUT" PRECISION="10" SCALE="0"/>
2651
+ <TABLEATTRIBUTE NAME="Sql Query" VALUE="SELECT ID FROM dbo.SRC_PM WHERE mapping_name = &apos;$PMMappingName&apos;"/>
2652
+ </TRANSFORMATION>
2653
+ <INSTANCE NAME="SQ_SRC_PM" TRANSFORMATION_NAME="SQ_SRC_PM" TYPE="Source Qualifier"/>
2654
+ <INSTANCE NAME="SRC_PM" TRANSFORMATION_NAME="SRC_PM" TYPE="Source Definition"/>
2655
+ <INSTANCE NAME="TGT_PM" TRANSFORMATION_NAME="TGT_PM" TYPE="Target Definition"/>
2656
+ <CONNECTOR FROMINSTANCE="SRC_PM" FROMFIELD="ID" TOINSTANCE="SQ_SRC_PM" TOFIELD="ID" FROMINSTANCETYPE="Source Definition" TOINSTANCETYPE="Source Qualifier"/>
2657
+ <CONNECTOR FROMINSTANCE="SQ_SRC_PM" FROMFIELD="ID" TOINSTANCE="TGT_PM" TOFIELD="ID" FROMINSTANCETYPE="Source Qualifier" TOINSTANCETYPE="Target Definition"/>
2658
+ </MAPPING>
2659
+ <CONFIG NAME="default_session_config"/>
2660
+ <WORKFLOW NAME="wf_pm_vars" ISVALID="YES">
2661
+ <SESSION NAME="s_pm_vars" ISVALID="YES" MAPPINGNAME="m_pm_vars"/>
2662
+ </WORKFLOW>
2663
+ </FOLDER>
2664
+ </REPOSITORY>
2665
+ </POWERMART>'''
2666
+
2667
+ def test_pm_variable_resolved_in_sql(self):
2668
+ converter = InformaticaConverter()
2669
+ tmpdir = tempfile.mkdtemp()
2670
+ try:
2671
+ converter.convert_string(self.PM_VAR_XML, output_dir=tmpdir)
2672
+ for fn in os.listdir(tmpdir):
2673
+ if fn.startswith("mapping_") and fn.endswith(".py"):
2674
+ with open(os.path.join(tmpdir, fn)) as f:
2675
+ code = f.read()
2676
+ if "$PMMappingName" in code:
2677
+ assert "resolve_builtin_variable" in code, \
2678
+ "SQL with $PMMappingName should call resolve_builtin_variable"
2679
+ break
2680
+ finally:
2681
+ shutil.rmtree(tmpdir)
2682
+
2683
+
2684
+ class TestExecuteSqlAlchemy(unittest.TestCase):
2685
+
2686
+ def test_execute_sql_handles_sqlalchemy(self):
2687
+ converter = InformaticaConverter()
2688
+ tmpdir = tempfile.mkdtemp()
2689
+ try:
2690
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2691
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2692
+ code = f.read()
2693
+ exec_block = code.split("def execute_sql(")[1]
2694
+ assert "sqlalchemy" in exec_block or "text(sql)" in exec_block
2695
+ finally:
2696
+ shutil.rmtree(tmpdir)
2697
+
2698
+
2699
+ class TestImportRe(unittest.TestCase):
2700
+
2701
+ def test_helper_imports_re(self):
2702
+ converter = InformaticaConverter()
2703
+ tmpdir = tempfile.mkdtemp()
2704
+ try:
2705
+ converter.convert_string(MINIMAL_XML, output_dir=tmpdir)
2706
+ with open(os.path.join(tmpdir, "helper_functions.py")) as f:
2707
+ code = f.read()
2708
+ assert "import re" in code
2709
+ finally:
2710
+ shutil.rmtree(tmpdir)