informatica-python 1.9.3__py3-none-any.whl → 1.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.9.3"
10
+ __version__ = "1.9.5"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -11,6 +11,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
11
11
  lines.append("")
12
12
 
13
13
  lines.append("import os")
14
+ lines.append("import re")
14
15
  lines.append("import sys")
15
16
  lines.append("import logging")
16
17
  lines.append("import yaml")
@@ -58,6 +59,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
58
59
  lines.append("")
59
60
 
60
61
  _add_param_file_functions(lines)
62
+ _add_env_resolution(lines)
61
63
  _add_db_functions(lines, data_lib)
62
64
  _add_file_functions(lines, data_lib)
63
65
  _add_expression_helpers(lines)
@@ -121,23 +123,143 @@ def _add_param_file_functions(lines):
121
123
  lines.append("")
122
124
 
123
125
 
126
+ def _add_env_resolution(lines):
127
+ lines.append("# ============================================================")
128
+ lines.append("# Environment Variable Resolution")
129
+ lines.append("# ============================================================")
130
+ lines.append("")
131
+ lines.append("")
132
+ lines.append("def resolve_env(value, config=None):")
133
+ lines.append(' """')
134
+ lines.append(" Resolve ${VAR} placeholders in a string.")
135
+ lines.append(" Lookup order: OS environment variable -> config connections/variables -> literal.")
136
+ lines.append(' """')
137
+ lines.append(" if not isinstance(value, str):")
138
+ lines.append(" return value")
139
+ lines.append(" def _replace(m):")
140
+ lines.append(" var = m.group(1)")
141
+ lines.append(" env_val = os.environ.get(var)")
142
+ lines.append(" if env_val is not None:")
143
+ lines.append(" return env_val")
144
+ lines.append(" if config:")
145
+ lines.append(" for section in ('variables', 'connections', 'params'):")
146
+ lines.append(" sect = config.get(section, {})")
147
+ lines.append(" if isinstance(sect, dict) and var in sect:")
148
+ lines.append(" v = sect[var]")
149
+ lines.append(" return str(v) if not isinstance(v, dict) else str(v.get('default_value', ''))")
150
+ lines.append(" return m.group(0)")
151
+ lines.append(r" return re.sub(r'\$\{(\w+)\}', _replace, value)")
152
+ lines.append("")
153
+ lines.append("")
154
+ lines.append("def rename_with_duplicates(df, col_mapping):")
155
+ lines.append(' """')
156
+ lines.append(" Rename DataFrame columns supporting one-source-to-many-target mapping.")
157
+ lines.append(" col_mapping is {target_col: source_col}.")
158
+ lines.append(" When multiple target cols map to the same source col, we duplicate the column.")
159
+ lines.append(' """')
160
+ lines.append(" result = df.copy()")
161
+ lines.append(" from collections import Counter")
162
+ lines.append(" src_counts = Counter(col_mapping.values())")
163
+ lines.append(" simple_rename = {}")
164
+ lines.append(" for tgt, src in col_mapping.items():")
165
+ lines.append(" if src_counts[src] == 1 and src in result.columns:")
166
+ lines.append(" simple_rename[src] = tgt")
167
+ lines.append(" elif src in result.columns:")
168
+ lines.append(" result[tgt] = result[src].copy()")
169
+ lines.append(" if simple_rename:")
170
+ lines.append(" result = result.rename(columns=simple_rename)")
171
+ lines.append(" return result")
172
+ lines.append("")
173
+ lines.append("")
174
+
175
+ lines.append("def resolve_builtin_variable(var_name, mapping_name='', session_name='', folder_name=''):")
176
+ lines.append(' """Resolve Informatica built-in variables like $PMMappingName, $PMSessionName."""')
177
+ lines.append(" builtins = {")
178
+ lines.append(" 'PMMappingName': mapping_name,")
179
+ lines.append(" 'PMSessionName': session_name,")
180
+ lines.append(" 'PMFolderName': folder_name,")
181
+ lines.append(" 'PMWorkflowName': os.environ.get('INFA_VAR_PMWorkflowName', ''),")
182
+ lines.append(" 'PMWorkflowRunId': os.environ.get('INFA_VAR_PMWorkflowRunId', '0'),")
183
+ lines.append(" 'PMSessionRunId': os.environ.get('INFA_VAR_PMSessionRunId', '0'),")
184
+ lines.append(" 'PMIntegrationServiceName': os.environ.get('INFA_VAR_PMIntegrationServiceName', ''),")
185
+ lines.append(" 'PMRepositoryServiceName': os.environ.get('INFA_VAR_PMRepositoryServiceName', ''),")
186
+ lines.append(" 'PMSourceDBConnection': os.environ.get('INFA_VAR_PMSourceDBConnection', ''),")
187
+ lines.append(" 'PMTargetDBConnection': os.environ.get('INFA_VAR_PMTargetDBConnection', ''),")
188
+ lines.append(" }")
189
+ lines.append(" clean = var_name.lstrip('$').lstrip('PM')")
190
+ lines.append(" for key, val in builtins.items():")
191
+ lines.append(" if key.lower() == ('PM' + clean).lower() or key.lower() == var_name.lstrip('$').lower():")
192
+ lines.append(" return val")
193
+ lines.append(" return os.environ.get(f'INFA_VAR_{var_name.lstrip(\"$\")}', '')")
194
+ lines.append("")
195
+ lines.append("")
196
+
197
+
124
198
  def _add_db_functions(lines, data_lib):
125
199
  lines.append("# ============================================================")
126
200
  lines.append("# Database Operations")
127
201
  lines.append("# ============================================================")
128
202
  lines.append("")
129
203
  lines.append("")
204
+ lines.append("_engine_cache = {}")
205
+ lines.append("")
206
+ lines.append("")
130
207
  lines.append("def get_db_connection(config, connection_name='default'):")
131
- lines.append(' """Create database connection from config."""')
208
+ lines.append(' """')
209
+ lines.append(" Create database connection from config.")
210
+ lines.append(" Prefers SQLAlchemy engine (with connection pooling) over raw drivers.")
211
+ lines.append(" Config values support ${VAR} env-var placeholders via resolve_env().")
212
+ lines.append(' """')
132
213
  lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
133
- lines.append(" db_type = conn_config.get('type', 'mssql')")
134
- lines.append(" host = conn_config.get('host', 'localhost')")
135
- lines.append(" port = conn_config.get('port', 1433)")
136
- lines.append(" database = conn_config.get('database', '')")
137
- lines.append(" username = conn_config.get('username', '')")
138
- lines.append(" password = conn_config.get('password', '')")
139
- lines.append(" schema = conn_config.get('schema', 'dbo')")
214
+ lines.append(" db_type = resolve_env(conn_config.get('type', 'mssql'), config)")
215
+ lines.append(" host = resolve_env(conn_config.get('host', 'localhost'), config)")
216
+ lines.append(" port = resolve_env(conn_config.get('port', 1433), config)")
217
+ lines.append(" database = resolve_env(conn_config.get('database', ''), config)")
218
+ lines.append(" username = resolve_env(conn_config.get('username', ''), config)")
219
+ lines.append(" password = resolve_env(conn_config.get('password', ''), config)")
220
+ lines.append(" schema = resolve_env(conn_config.get('schema', 'dbo'), config)")
221
+ lines.append("")
222
+ lines.append(" cache_key = f'{db_type}://{username}@{host}:{port}/{database}'")
223
+ lines.append(" if cache_key in _engine_cache:")
224
+ lines.append(" return _engine_cache[cache_key].connect()")
225
+ lines.append("")
226
+ lines.append(" try:")
227
+ lines.append(" from sqlalchemy import create_engine")
228
+ lines.append(" url = _build_sqlalchemy_url(db_type, host, port, database, username, password)")
229
+ lines.append(" if url:")
230
+ lines.append(" engine = create_engine(url, pool_pre_ping=True, pool_size=5)")
231
+ lines.append(" _engine_cache[cache_key] = engine")
232
+ lines.append(" return engine.connect()")
233
+ lines.append(" except ImportError:")
234
+ lines.append(" logger.info('SQLAlchemy not available, falling back to raw drivers')")
235
+ lines.append(" except Exception as e:")
236
+ lines.append(" logger.warning(f'SQLAlchemy connection failed: {e}, falling back to raw drivers')")
237
+ lines.append("")
238
+ lines.append(" return _get_raw_connection(db_type, host, port, database, username, password, conn_config)")
140
239
  lines.append("")
240
+ lines.append("")
241
+ lines.append("def _build_sqlalchemy_url(db_type, host, port, database, username, password):")
242
+ lines.append(" from urllib.parse import quote_plus")
243
+ lines.append(" pw = quote_plus(str(password)) if password else ''")
244
+ lines.append(" if db_type == 'mssql':")
245
+ lines.append(" try:")
246
+ lines.append(" import pyodbc")
247
+ lines.append(" conn_str = quote_plus(")
248
+ lines.append(" f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={host},{port};DATABASE={database};UID={username};PWD={password}'")
249
+ lines.append(" )")
250
+ lines.append(" return f'mssql+pyodbc:///?odbc_connect={conn_str}'")
251
+ lines.append(" except ImportError:")
252
+ lines.append(" return f'mssql+pymssql://{username}:{pw}@{host}:{port}/{database}'")
253
+ lines.append(" elif db_type == 'postgresql':")
254
+ lines.append(" return f'postgresql://{username}:{pw}@{host}:{port}/{database}'")
255
+ lines.append(" elif db_type == 'oracle':")
256
+ lines.append(" return f'oracle+cx_oracle://{username}:{pw}@{host}:{port}/{database}'")
257
+ lines.append(" elif db_type == 'mysql':")
258
+ lines.append(" return f'mysql+pymysql://{username}:{pw}@{host}:{port}/{database}'")
259
+ lines.append(" return None")
260
+ lines.append("")
261
+ lines.append("")
262
+ lines.append("def _get_raw_connection(db_type, host, port, database, username, password, conn_config):")
141
263
  lines.append(" if db_type == 'mssql':")
142
264
  lines.append(" try:")
143
265
  lines.append(" import pyodbc")
@@ -156,39 +278,19 @@ def _add_db_functions(lines, data_lib):
156
278
  lines.append(" return pymssql.connect(server=host, port=int(port), database=database, user=username, password=password)")
157
279
  lines.append(" except ImportError:")
158
280
  lines.append(" pass")
159
- lines.append(" try:")
160
- lines.append(" from sqlalchemy import create_engine")
161
- lines.append(" engine = create_engine(f'mssql+pymssql://{username}:{password}@{host}:{port}/{database}')")
162
- lines.append(" return engine.connect()")
163
- lines.append(" except ImportError:")
164
- lines.append(" pass")
165
- lines.append("")
166
- lines.append(" if db_type == 'postgresql':")
167
- lines.append(" try:")
168
- lines.append(" import psycopg2")
169
- lines.append(" return psycopg2.connect(")
170
- lines.append(" host=host, port=port, dbname=database,")
171
- lines.append(" user=username, password=password")
172
- lines.append(" )")
173
- lines.append(" except ImportError:")
174
- lines.append(" pass")
175
- lines.append("")
176
- lines.append(" if db_type == 'oracle':")
177
- lines.append(" try:")
178
- lines.append(" import cx_Oracle")
179
- lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
180
- lines.append(" return cx_Oracle.connect(username, password, dsn)")
181
- lines.append(" except ImportError:")
182
- lines.append(" pass")
281
+ lines.append(" elif db_type == 'postgresql':")
282
+ lines.append(" import psycopg2")
283
+ lines.append(" return psycopg2.connect(host=host, port=port, dbname=database, user=username, password=password)")
284
+ lines.append(" elif db_type == 'oracle':")
285
+ lines.append(" import cx_Oracle")
286
+ lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
287
+ lines.append(" return cx_Oracle.connect(username, password, dsn)")
183
288
  lines.append("")
184
289
  lines.append(" jdbc_url = conn_config.get('jdbc_url', '')")
185
290
  lines.append(" if jdbc_url:")
186
- lines.append(" try:")
187
- lines.append(" import jaydebeapi")
188
- lines.append(" driver = conn_config.get('jdbc_driver', '')")
189
- lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
190
- lines.append(" except ImportError:")
191
- lines.append(" pass")
291
+ lines.append(" import jaydebeapi")
292
+ lines.append(" driver = conn_config.get('jdbc_driver', '')")
293
+ lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
192
294
  lines.append("")
193
295
  lines.append(" raise ConnectionError(f'Cannot create connection for type: {db_type}')")
194
296
  lines.append("")
@@ -203,6 +305,15 @@ def _add_db_functions(lines, data_lib):
203
305
  else:
204
306
  read_func = "pd.read_sql"
205
307
 
308
+ lines.append("def _safe_close(conn):")
309
+ lines.append(' """Close connection safely — handles both SQLAlchemy and raw connections."""')
310
+ lines.append(" try:")
311
+ lines.append(" if hasattr(conn, 'close'):")
312
+ lines.append(" conn.close()")
313
+ lines.append(" except Exception:")
314
+ lines.append(" pass")
315
+ lines.append("")
316
+ lines.append("")
206
317
  lines.append("def read_from_db(config, query, connection_name='default'):")
207
318
  lines.append(' """Read data from database using SQL query."""')
208
319
  lines.append(" conn = get_db_connection(config, connection_name)")
@@ -220,7 +331,7 @@ def _add_db_functions(lines, data_lib):
220
331
  lines.append(" logger.error(f'DB read error on {{connection_name}}: {{e}}')")
221
332
  lines.append(" raise")
222
333
  lines.append(" finally:")
223
- lines.append(" conn.close()")
334
+ lines.append(" _safe_close(conn)")
224
335
  lines.append("")
225
336
  lines.append("")
226
337
  lines.append("def write_to_db(config, df, table_name, connection_name='default', if_exists='append', schema=None):")
@@ -242,23 +353,31 @@ def _add_db_functions(lines, data_lib):
242
353
  lines.append(" logger.error(f'DB write error to {{schema}}.{{table_name}}: {{e}}')")
243
354
  lines.append(" raise")
244
355
  lines.append(" finally:")
245
- lines.append(" conn.close()")
356
+ lines.append(" _safe_close(conn)")
246
357
  lines.append("")
247
358
  lines.append("")
248
359
  lines.append("def execute_sql(config, sql, connection_name='default'):")
249
360
  lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
250
361
  lines.append(" conn = get_db_connection(config, connection_name)")
251
362
  lines.append(" try:")
252
- lines.append(" cursor = conn.cursor()")
253
- lines.append(" cursor.execute(sql)")
254
- lines.append(" conn.commit()")
363
+ lines.append(" if hasattr(conn, 'execute'):")
364
+ lines.append(" from sqlalchemy import text")
365
+ lines.append(" conn.execute(text(sql))")
366
+ lines.append(" conn.commit()")
367
+ lines.append(" else:")
368
+ lines.append(" cursor = conn.cursor()")
369
+ lines.append(" cursor.execute(sql)")
370
+ lines.append(" conn.commit()")
255
371
  lines.append(" logger.info(f'Executed SQL on {{connection_name}}')")
256
372
  lines.append(" except Exception as e:")
257
373
  lines.append(" logger.error(f'SQL execution error: {{e}}')")
258
- lines.append(" conn.rollback()")
374
+ lines.append(" try:")
375
+ lines.append(" conn.rollback()")
376
+ lines.append(" except Exception:")
377
+ lines.append(" pass")
259
378
  lines.append(" raise")
260
379
  lines.append(" finally:")
261
- lines.append(" conn.close()")
380
+ lines.append(" _safe_close(conn)")
262
381
  lines.append("")
263
382
  lines.append("")
264
383
 
@@ -1150,10 +1269,60 @@ def _add_expression_helpers(lines):
1150
1269
  lines.append(" raise SystemExit(message)")
1151
1270
  lines.append("")
1152
1271
  lines.append("")
1153
- lines.append("def lookup_func(table, condition, *fields):")
1154
- lines.append(' """Placeholder for Informatica LOOKUP function."""')
1155
- lines.append(" logger.warning(f'LOOKUP called for table {table} - implement in mapping-specific code')")
1156
- lines.append(" return None")
1272
+ lines.append("_lookup_cache = {}")
1273
+ lines.append("")
1274
+ lines.append("")
1275
+ lines.append("def lookup_func(table, condition, *fields, config=None, connection_name='default'):")
1276
+ lines.append(' """')
1277
+ lines.append(" Informatica unconnected LOOKUP function.")
1278
+ lines.append(" Loads and caches the lookup table, then filters by condition.")
1279
+ lines.append(" Returns the first matching value of the first return field, or None.")
1280
+ lines.append(' """')
1281
+ lines.append(" global _lookup_cache")
1282
+ lines.append(" if table not in _lookup_cache:")
1283
+ lines.append(" if config is not None:")
1284
+ lines.append(" try:")
1285
+ lines.append(" lkp_conn = connection_name")
1286
+ lines.append(" conns = config.get('connections', {})")
1287
+ lines.append(" for cname, cval in conns.items():")
1288
+ lines.append(" if isinstance(cval, dict) and cval.get('connection_name', '') == table:")
1289
+ lines.append(" lkp_conn = cname")
1290
+ lines.append(" break")
1291
+ lines.append(" df_lkp = read_from_db(config, f'SELECT * FROM {table}', lkp_conn)")
1292
+ lines.append(" _lookup_cache[table] = df_lkp")
1293
+ lines.append(" logger.info(f'Cached lookup table {table}: {len(df_lkp)} rows')")
1294
+ lines.append(" except Exception as e:")
1295
+ lines.append(" logger.warning(f'Could not load lookup table {table}: {e}')")
1296
+ lines.append(" _lookup_cache[table] = None")
1297
+ lines.append(" else:")
1298
+ lines.append(" logger.warning(f'LOOKUP called for {table} without config - returning None')")
1299
+ lines.append(" return None")
1300
+ lines.append(" df_lkp = _lookup_cache.get(table)")
1301
+ lines.append(" if df_lkp is None or df_lkp.empty:")
1302
+ lines.append(" return None")
1303
+ lines.append(" try:")
1304
+ lines.append(" if callable(condition):")
1305
+ lines.append(" matches = df_lkp[condition(df_lkp)]")
1306
+ lines.append(" elif isinstance(condition, str) and '=' in condition:")
1307
+ lines.append(" col, _, val = condition.partition('=')")
1308
+ lines.append(" col = col.strip()")
1309
+ lines.append(" val = val.strip().strip(\"'\")")
1310
+ lines.append(" if col in df_lkp.columns:")
1311
+ lines.append(" matches = df_lkp[df_lkp[col].astype(str) == str(val)]")
1312
+ lines.append(" else:")
1313
+ lines.append(" return None")
1314
+ lines.append(" else:")
1315
+ lines.append(" return None")
1316
+ lines.append(" if matches.empty:")
1317
+ lines.append(" return None")
1318
+ lines.append(" if fields:")
1319
+ lines.append(" field = str(fields[0]).strip()")
1320
+ lines.append(" if field in matches.columns:")
1321
+ lines.append(" return matches.iloc[0][field]")
1322
+ lines.append(" return matches.iloc[0].to_dict()")
1323
+ lines.append(" except Exception as e:")
1324
+ lines.append(" logger.warning(f'LOOKUP error on {table}: {e}')")
1325
+ lines.append(" return None")
1157
1326
  lines.append("")
1158
1327
  lines.append("")
1159
1328
  lines.append("_param_store = {}")
@@ -419,9 +419,10 @@ def _safe_name(name):
419
419
  return safe.lower()
420
420
 
421
421
 
422
- def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
422
+ def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" ", mapping_name="", session_name="", folder_name=""):
423
423
  import re
424
424
  params = re.findall(r'\$\$(\w+)', sql_text)
425
+ pm_vars = re.findall(r'\$(PM\w+)', sql_text)
425
426
  lines.append(f"{indent}{sql_var_name} = '''")
426
427
  for sql_line in sql_text.strip().split("\n"):
427
428
  lines.append(f"{indent}{sql_line}")
@@ -433,6 +434,13 @@ def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
433
434
  continue
434
435
  seen.add(p)
435
436
  lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
437
+ if pm_vars:
438
+ seen_pm = set()
439
+ for pm in pm_vars:
440
+ if pm in seen_pm:
441
+ continue
442
+ seen_pm.add(pm)
443
+ lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('${pm}', str(resolve_builtin_variable('{pm}', mapping_name='{mapping_name}', session_name='{session_name}', folder_name='{folder_name}')))")
436
444
 
437
445
 
438
446
  def _flatfile_config_dict(ff):
@@ -757,7 +765,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
757
765
  elif tx_type in ("joiner",):
758
766
  _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
759
767
  elif tx_type in ("lookup procedure", "lookup"):
760
- _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
768
+ _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
761
769
  elif tx_type == "router":
762
770
  _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
763
771
  elif tx_type in ("union",):
@@ -982,7 +990,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
982
990
  source_dfs[tx.name] = f"df_{tx_safe}"
983
991
 
984
992
 
985
- def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
993
+ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
986
994
  lookup_table = ""
987
995
  lookup_sql = ""
988
996
  lookup_condition = ""
@@ -1012,6 +1020,11 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
1012
1020
 
1013
1021
  all_output_fields = return_fields + lookup_output_fields
1014
1022
 
1023
+ port_to_col = {}
1024
+ if connector_graph and tx.name in connector_graph.get("to", {}):
1025
+ for conn in connector_graph["to"][tx.name]:
1026
+ port_to_col[conn.to_field.lower()] = conn.from_field
1027
+
1015
1028
  lines.append(f" # Lookup: {lookup_table or tx.name}")
1016
1029
  if lookup_sql:
1017
1030
  _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
@@ -1020,10 +1033,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
1020
1033
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
1021
1034
  else:
1022
1035
  empty_expr = lib_empty_df(data_lib)
1023
- lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
1036
+ lines.append(f" df_lkp_{tx_safe} = {empty_expr} # WARNING: no lookup table/SQL override found")
1024
1037
 
1025
1038
  input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
1026
1039
 
1040
+ if input_keys and port_to_col:
1041
+ input_keys = [port_to_col.get(k.lower(), k) for k in input_keys]
1042
+
1027
1043
  if input_keys and lookup_keys:
1028
1044
  lines.append(f" # Lookup condition: {lookup_condition}")
1029
1045
 
@@ -1078,12 +1094,23 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
1078
1094
  if "Group Filter Condition" in attr.name:
1079
1095
  group_conditions[attr.name] = attr.value
1080
1096
 
1097
+ remaining_mask_parts = []
1081
1098
  if group_conditions:
1082
1099
  for i, (gname, cond) in enumerate(group_conditions.items()):
1083
- expr_py = convert_expression(cond) if cond else "True"
1084
- lines.append(f" df_{tx_safe}_group{i} = {input_df}[{expr_py}].copy() # {gname}")
1100
+ if cond and cond.strip():
1101
+ expr_py = convert_filter_vectorized(cond, input_df)
1102
+ else:
1103
+ expr_py = f"pd.Series(True, index={input_df}.index)"
1104
+ mask_var = f"_router_mask_{tx_safe}_{i}"
1105
+ lines.append(f" {mask_var} = {expr_py} # {gname}")
1106
+ lines.append(f" df_{tx_safe}_group{i} = {input_df}[{mask_var}].copy()")
1085
1107
  source_dfs[f"{tx.name}_group{i}"] = f"df_{tx_safe}_group{i}"
1086
- lines.append(f" df_{tx_safe} = {input_df}.copy() # Default group")
1108
+ remaining_mask_parts.append(f"~{mask_var}")
1109
+ if remaining_mask_parts:
1110
+ lines.append(f" _router_default_mask = {' & '.join(remaining_mask_parts)}")
1111
+ lines.append(f" df_{tx_safe} = {input_df}[_router_default_mask].copy() # Default group")
1112
+ else:
1113
+ lines.append(f" df_{tx_safe} = {input_df}.copy() # Default group")
1087
1114
  source_dfs[tx.name] = f"df_{tx_safe}"
1088
1115
 
1089
1116
 
@@ -1442,7 +1469,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1442
1469
  if col_mapping:
1443
1470
  lines.append(f" # Column mapping: source -> target")
1444
1471
  lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
1445
- lines.append(f" df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
1472
+ lines.append(f" df_target_{tgt_safe} = rename_with_duplicates({input_df}, target_columns_{tgt_safe})")
1446
1473
  target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
1447
1474
  if target_cols:
1448
1475
  lines.append(f" # Select only target columns")
@@ -248,6 +248,7 @@ def _convert_infa_date_format(fmt_str):
248
248
  fmt = fmt.replace("Mon", "%b").replace("MON", "%b")
249
249
  fmt = fmt.replace("HH24", "%H").replace("HH12", "%I").replace("HH", "%H")
250
250
  fmt = fmt.replace("MI", "%M").replace("SS", "%S")
251
+ fmt = fmt.replace("US", "%f").replace("NS", "%f").replace("MS", "%f")
251
252
  return fmt
252
253
 
253
254
 
@@ -548,7 +549,7 @@ def _vec_recursive(expr, df_var):
548
549
  'RTRIM': f'.str.rstrip("{char_arg}")',
549
550
  'TRIM': f'.str.strip("{char_arg}")',
550
551
  }
551
- return f'{inner_val}{method_map[func_name.upper()]}'
552
+ return f'{inner_val}.astype(str){method_map[func_name.upper()]}'
552
553
 
553
554
  upper_result = _find_func_call(cleaned, 'UPPER')
554
555
  if upper_result and upper_result[0] == 0 and upper_result[1] == len(cleaned):
@@ -584,7 +585,7 @@ def _vec_recursive(expr, df_var):
584
585
  if len(args) >= 2:
585
586
  field_val = _vec_recursive(args[0], df_var)
586
587
  try:
587
- start = int(args[1].strip()) - 1
588
+ start = max(int(args[1].strip()) - 1, 0)
588
589
  except ValueError:
589
590
  start_val = _vec_recursive(args[1], df_var)
590
591
  if len(args) >= 3:
@@ -722,7 +723,11 @@ def _vec_recursive(expr, df_var):
722
723
  field_val = _vec_recursive(args[0], df_var)
723
724
  pattern_val = args[1].strip().strip("'\"")
724
725
  if func_name == 'REG_EXTRACT':
725
- return f'{field_val}.str.extract(r"({pattern_val})", expand=False)'
726
+ if re.search(r'(?<!\\)\((?!\?)', pattern_val):
727
+ extract_pat = pattern_val
728
+ else:
729
+ extract_pat = f'({pattern_val})'
730
+ return f'{field_val}.str.extract(r"{extract_pat}", expand=False)'
726
731
  elif func_name == 'REG_REPLACE':
727
732
  replace_val = args[2].strip().strip("'\"") if len(args) >= 3 else ''
728
733
  return f'{field_val}.str.replace(r"{pattern_val}", "{replace_val}", regex=True)'
@@ -862,7 +867,7 @@ def _vec_recursive(expr, df_var):
862
867
  if v.startswith("'") and v.endswith("'"):
863
868
  vec_parts.append(v)
864
869
  else:
865
- vec_parts.append(f'{v}.astype(str)')
870
+ vec_parts.append(f'{v}.fillna(\'\').astype(str)')
866
871
  return " + ".join(vec_parts)
867
872
 
868
873
  for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
@@ -894,7 +899,8 @@ def _vec_recursive(expr, df_var):
894
899
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
895
900
  'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
896
901
  'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
897
- 'lookup_func',
902
+ 'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
903
+ 'upper', 'lower', 'strip', 'lstrip', 'rstrip', 'dt', 'copy',
898
904
  }
899
905
  converted = _substitute_fields(converted, df_var, skip_words)
900
906
 
@@ -904,6 +910,8 @@ def _vec_recursive(expr, df_var):
904
910
  converted = re.sub(r'<>', '!=', converted)
905
911
  converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
906
912
  converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
913
+ converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
914
+ converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
907
915
 
908
916
  converted = re.sub(r'\s+', ' ', converted).strip()
909
917
 
@@ -1044,8 +1052,14 @@ def _vectorize_simple(part, df_var):
1044
1052
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
1045
1053
  'str', 'int', 'float', 'isna', 'notna', 'fillna',
1046
1054
  'get_variable', 'lookup_func', 'isin', 'eq',
1055
+ 'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
1056
+ 'contains', 'replace', 'upper', 'lower', 'strip', 'lstrip', 'rstrip',
1057
+ 'dt', 'len', 'copy', 'abs', 'round', 'where', 'bool',
1047
1058
  }
1048
1059
  c = _substitute_fields(c, df_var, skip_words)
1060
+ c = re.sub(r'\bexpand\s*==\s*', 'expand=', c)
1061
+ c = re.sub(r'\berrors\s*==\s*', 'errors=', c)
1062
+ c = re.sub(r'\bregex\s*==\s*', 'regex=', c)
1049
1063
 
1050
1064
  return c
1051
1065
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.3
3
+ Version: 1.9.5
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -430,7 +430,7 @@ The generated `helper_functions.py` provides a complete runtime library:
430
430
  - **Generated code formatting**: Consistent `# ---` section headers for Source Qualifiers, Transforms, and Target Writes; metadata comments (database type, field lists); column mapping and write operation comments; clean blank line handling
431
431
  - **Source/target detection**: Case-insensitive instance type matching
432
432
  - **Session→mapping inference**: Longest-suffix-match strategy for ambiguous mapping names
433
- - **646 tests** across unit, integration, expression, and formatting test suites
433
+ - **663 tests** across unit, integration, expression, and formatting test suites
434
434
 
435
435
  ### v1.9.2 (Phase 8)
436
436
  - Mapping output files now use real mapping names (e.g., `mapping_m_customer_load.py`) instead of generic numeric indices (`mapping_1.py`)
@@ -495,7 +495,7 @@ The generated `helper_functions.py` provides a complete runtime library:
495
495
  cd informatica_python
496
496
  pip install -e ".[dev]"
497
497
 
498
- # Run tests (646 tests)
498
+ # Run tests (663 tests)
499
499
  pytest tests/ -v
500
500
  ```
501
501
 
@@ -1,4 +1,4 @@
1
- informatica_python/__init__.py,sha256=o9kEVkHnEwXAD7hhY8YbN6G8RP4Mqby_q8CpjfbiknQ,337
1
+ informatica_python/__init__.py,sha256=sbOT0MiP4hfEvssyO1fr_bI8Mxs6ZN6SO7rBglvkJFU,337
2
2
  informatica_python/cli.py,sha256=gFwg0O99vKM-OLO0HoHA4emd-6qrgjMNqa9T59e4e_s,2905
3
3
  informatica_python/converter.py,sha256=xCuWrYzDji0yN72D3QqOgZCVVM2j3k2_CvlGplCWxLU,22779
4
4
  informatica_python/models.py,sha256=G_C2WfQL-ykKjNj23m8vKFtLZYrQozp99HJzrLTKG1Y,17293
@@ -6,18 +6,18 @@ informatica_python/parser.py,sha256=v0qoTlAi3RZ3IHN_5g5t6f66XzRpJIjpAfpyMzZ5cuA,
6
6
  informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
8
8
  informatica_python/generators/error_log_gen.py,sha256=2cc0rEcblydHkb9VAMXlrH7WdSQ-CNqAXcwVk3FYZeM,21319
9
- informatica_python/generators/helper_gen.py,sha256=D6-UqNh09Qy2V7RimNgP-SzK_uB9YqAlsa0-cgLhf5o,72209
10
- informatica_python/generators/mapping_gen.py,sha256=gBVArcb8uODbgY3epdsldCbUywS-qo8CiKr7hcNjMnc,70654
9
+ informatica_python/generators/helper_gen.py,sha256=ylhZnZb5yQ23kQswa9jrf45DvCcHq-3K7KCT2eCwCVM,82376
10
+ informatica_python/generators/mapping_gen.py,sha256=Aic4srfI6SfbtB1ggx7Rn27STVOV2olo3YKiRysmFps,72055
11
11
  informatica_python/generators/sql_gen.py,sha256=O8Y-aJz9EyFJ0DXeuISRt5yKwC3wlp2K3B0BHrmxrXw,4872
12
12
  informatica_python/generators/workflow_gen.py,sha256=_uSlBg31ZRMhMlCYk4hWDRBPaBROrepD8_v3QGEWJxE,18089
13
13
  informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
15
- informatica_python/utils/expression_converter.py,sha256=CqkkTESMKxcYmVsDpNfn7VcZZe771uCIMy_0YQYq6pc,45946
15
+ informatica_python/utils/expression_converter.py,sha256=SkkT2CyhIZzUms9TT4cEimZlxjOoVq96AQgGTrO_Lmc,46859
16
16
  informatica_python/utils/lib_adapters.py,sha256=1ZtuMbgDg9Ukf-OF_EG1L_BeeR-6JQk8Kx3WwMfvNRU,6516
17
17
  informatica_python/utils/sql_dialect.py,sha256=_IHJbfu8a3mT_OvHpybgSfZKqz6mwVy5ItTKDRChqnU,5461
18
- informatica_python-1.9.3.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
19
- informatica_python-1.9.3.dist-info/METADATA,sha256=VbfZWdzKE382RnkR7F2rs7PNL397g3PfglvugN4XVTw,26097
20
- informatica_python-1.9.3.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
21
- informatica_python-1.9.3.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
22
- informatica_python-1.9.3.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
23
- informatica_python-1.9.3.dist-info/RECORD,,
18
+ informatica_python-1.9.5.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
19
+ informatica_python-1.9.5.dist-info/METADATA,sha256=I5YvXFM4ctZFRE_RNNi6aeZb7vLMlZM3Az-QTuXypYU,26097
20
+ informatica_python-1.9.5.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
21
+ informatica_python-1.9.5.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
22
+ informatica_python-1.9.5.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
23
+ informatica_python-1.9.5.dist-info/RECORD,,