informatica-python 1.9.3__py3-none-any.whl → 1.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +1 -1
- informatica_python/generators/helper_gen.py +218 -49
- informatica_python/generators/mapping_gen.py +35 -8
- informatica_python/utils/expression_converter.py +19 -5
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/METADATA +3 -3
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/RECORD +10 -10
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/WHEEL +0 -0
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/entry_points.txt +0 -0
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/licenses/LICENSE +0 -0
- {informatica_python-1.9.3.dist-info → informatica_python-1.9.5.dist-info}/top_level.txt +0 -0
informatica_python/__init__.py
CHANGED
|
@@ -11,6 +11,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
11
11
|
lines.append("")
|
|
12
12
|
|
|
13
13
|
lines.append("import os")
|
|
14
|
+
lines.append("import re")
|
|
14
15
|
lines.append("import sys")
|
|
15
16
|
lines.append("import logging")
|
|
16
17
|
lines.append("import yaml")
|
|
@@ -58,6 +59,7 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
58
59
|
lines.append("")
|
|
59
60
|
|
|
60
61
|
_add_param_file_functions(lines)
|
|
62
|
+
_add_env_resolution(lines)
|
|
61
63
|
_add_db_functions(lines, data_lib)
|
|
62
64
|
_add_file_functions(lines, data_lib)
|
|
63
65
|
_add_expression_helpers(lines)
|
|
@@ -121,23 +123,143 @@ def _add_param_file_functions(lines):
|
|
|
121
123
|
lines.append("")
|
|
122
124
|
|
|
123
125
|
|
|
126
|
+
def _add_env_resolution(lines):
|
|
127
|
+
lines.append("# ============================================================")
|
|
128
|
+
lines.append("# Environment Variable Resolution")
|
|
129
|
+
lines.append("# ============================================================")
|
|
130
|
+
lines.append("")
|
|
131
|
+
lines.append("")
|
|
132
|
+
lines.append("def resolve_env(value, config=None):")
|
|
133
|
+
lines.append(' """')
|
|
134
|
+
lines.append(" Resolve ${VAR} placeholders in a string.")
|
|
135
|
+
lines.append(" Lookup order: OS environment variable -> config connections/variables -> literal.")
|
|
136
|
+
lines.append(' """')
|
|
137
|
+
lines.append(" if not isinstance(value, str):")
|
|
138
|
+
lines.append(" return value")
|
|
139
|
+
lines.append(" def _replace(m):")
|
|
140
|
+
lines.append(" var = m.group(1)")
|
|
141
|
+
lines.append(" env_val = os.environ.get(var)")
|
|
142
|
+
lines.append(" if env_val is not None:")
|
|
143
|
+
lines.append(" return env_val")
|
|
144
|
+
lines.append(" if config:")
|
|
145
|
+
lines.append(" for section in ('variables', 'connections', 'params'):")
|
|
146
|
+
lines.append(" sect = config.get(section, {})")
|
|
147
|
+
lines.append(" if isinstance(sect, dict) and var in sect:")
|
|
148
|
+
lines.append(" v = sect[var]")
|
|
149
|
+
lines.append(" return str(v) if not isinstance(v, dict) else str(v.get('default_value', ''))")
|
|
150
|
+
lines.append(" return m.group(0)")
|
|
151
|
+
lines.append(r" return re.sub(r'\$\{(\w+)\}', _replace, value)")
|
|
152
|
+
lines.append("")
|
|
153
|
+
lines.append("")
|
|
154
|
+
lines.append("def rename_with_duplicates(df, col_mapping):")
|
|
155
|
+
lines.append(' """')
|
|
156
|
+
lines.append(" Rename DataFrame columns supporting one-source-to-many-target mapping.")
|
|
157
|
+
lines.append(" col_mapping is {target_col: source_col}.")
|
|
158
|
+
lines.append(" When multiple target cols map to the same source col, we duplicate the column.")
|
|
159
|
+
lines.append(' """')
|
|
160
|
+
lines.append(" result = df.copy()")
|
|
161
|
+
lines.append(" from collections import Counter")
|
|
162
|
+
lines.append(" src_counts = Counter(col_mapping.values())")
|
|
163
|
+
lines.append(" simple_rename = {}")
|
|
164
|
+
lines.append(" for tgt, src in col_mapping.items():")
|
|
165
|
+
lines.append(" if src_counts[src] == 1 and src in result.columns:")
|
|
166
|
+
lines.append(" simple_rename[src] = tgt")
|
|
167
|
+
lines.append(" elif src in result.columns:")
|
|
168
|
+
lines.append(" result[tgt] = result[src].copy()")
|
|
169
|
+
lines.append(" if simple_rename:")
|
|
170
|
+
lines.append(" result = result.rename(columns=simple_rename)")
|
|
171
|
+
lines.append(" return result")
|
|
172
|
+
lines.append("")
|
|
173
|
+
lines.append("")
|
|
174
|
+
|
|
175
|
+
lines.append("def resolve_builtin_variable(var_name, mapping_name='', session_name='', folder_name=''):")
|
|
176
|
+
lines.append(' """Resolve Informatica built-in variables like $PMMappingName, $PMSessionName."""')
|
|
177
|
+
lines.append(" builtins = {")
|
|
178
|
+
lines.append(" 'PMMappingName': mapping_name,")
|
|
179
|
+
lines.append(" 'PMSessionName': session_name,")
|
|
180
|
+
lines.append(" 'PMFolderName': folder_name,")
|
|
181
|
+
lines.append(" 'PMWorkflowName': os.environ.get('INFA_VAR_PMWorkflowName', ''),")
|
|
182
|
+
lines.append(" 'PMWorkflowRunId': os.environ.get('INFA_VAR_PMWorkflowRunId', '0'),")
|
|
183
|
+
lines.append(" 'PMSessionRunId': os.environ.get('INFA_VAR_PMSessionRunId', '0'),")
|
|
184
|
+
lines.append(" 'PMIntegrationServiceName': os.environ.get('INFA_VAR_PMIntegrationServiceName', ''),")
|
|
185
|
+
lines.append(" 'PMRepositoryServiceName': os.environ.get('INFA_VAR_PMRepositoryServiceName', ''),")
|
|
186
|
+
lines.append(" 'PMSourceDBConnection': os.environ.get('INFA_VAR_PMSourceDBConnection', ''),")
|
|
187
|
+
lines.append(" 'PMTargetDBConnection': os.environ.get('INFA_VAR_PMTargetDBConnection', ''),")
|
|
188
|
+
lines.append(" }")
|
|
189
|
+
lines.append(" clean = var_name.lstrip('$').lstrip('PM')")
|
|
190
|
+
lines.append(" for key, val in builtins.items():")
|
|
191
|
+
lines.append(" if key.lower() == ('PM' + clean).lower() or key.lower() == var_name.lstrip('$').lower():")
|
|
192
|
+
lines.append(" return val")
|
|
193
|
+
lines.append(" return os.environ.get(f'INFA_VAR_{var_name.lstrip(\"$\")}', '')")
|
|
194
|
+
lines.append("")
|
|
195
|
+
lines.append("")
|
|
196
|
+
|
|
197
|
+
|
|
124
198
|
def _add_db_functions(lines, data_lib):
|
|
125
199
|
lines.append("# ============================================================")
|
|
126
200
|
lines.append("# Database Operations")
|
|
127
201
|
lines.append("# ============================================================")
|
|
128
202
|
lines.append("")
|
|
129
203
|
lines.append("")
|
|
204
|
+
lines.append("_engine_cache = {}")
|
|
205
|
+
lines.append("")
|
|
206
|
+
lines.append("")
|
|
130
207
|
lines.append("def get_db_connection(config, connection_name='default'):")
|
|
131
|
-
lines.append(' """
|
|
208
|
+
lines.append(' """')
|
|
209
|
+
lines.append(" Create database connection from config.")
|
|
210
|
+
lines.append(" Prefers SQLAlchemy engine (with connection pooling) over raw drivers.")
|
|
211
|
+
lines.append(" Config values support ${VAR} env-var placeholders via resolve_env().")
|
|
212
|
+
lines.append(' """')
|
|
132
213
|
lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
|
|
133
|
-
lines.append(" db_type = conn_config.get('type', 'mssql')")
|
|
134
|
-
lines.append(" host = conn_config.get('host', 'localhost')")
|
|
135
|
-
lines.append(" port = conn_config.get('port', 1433)")
|
|
136
|
-
lines.append(" database = conn_config.get('database', '')")
|
|
137
|
-
lines.append(" username = conn_config.get('username', '')")
|
|
138
|
-
lines.append(" password = conn_config.get('password', '')")
|
|
139
|
-
lines.append(" schema = conn_config.get('schema', 'dbo')")
|
|
214
|
+
lines.append(" db_type = resolve_env(conn_config.get('type', 'mssql'), config)")
|
|
215
|
+
lines.append(" host = resolve_env(conn_config.get('host', 'localhost'), config)")
|
|
216
|
+
lines.append(" port = resolve_env(conn_config.get('port', 1433), config)")
|
|
217
|
+
lines.append(" database = resolve_env(conn_config.get('database', ''), config)")
|
|
218
|
+
lines.append(" username = resolve_env(conn_config.get('username', ''), config)")
|
|
219
|
+
lines.append(" password = resolve_env(conn_config.get('password', ''), config)")
|
|
220
|
+
lines.append(" schema = resolve_env(conn_config.get('schema', 'dbo'), config)")
|
|
221
|
+
lines.append("")
|
|
222
|
+
lines.append(" cache_key = f'{db_type}://{username}@{host}:{port}/{database}'")
|
|
223
|
+
lines.append(" if cache_key in _engine_cache:")
|
|
224
|
+
lines.append(" return _engine_cache[cache_key].connect()")
|
|
225
|
+
lines.append("")
|
|
226
|
+
lines.append(" try:")
|
|
227
|
+
lines.append(" from sqlalchemy import create_engine")
|
|
228
|
+
lines.append(" url = _build_sqlalchemy_url(db_type, host, port, database, username, password)")
|
|
229
|
+
lines.append(" if url:")
|
|
230
|
+
lines.append(" engine = create_engine(url, pool_pre_ping=True, pool_size=5)")
|
|
231
|
+
lines.append(" _engine_cache[cache_key] = engine")
|
|
232
|
+
lines.append(" return engine.connect()")
|
|
233
|
+
lines.append(" except ImportError:")
|
|
234
|
+
lines.append(" logger.info('SQLAlchemy not available, falling back to raw drivers')")
|
|
235
|
+
lines.append(" except Exception as e:")
|
|
236
|
+
lines.append(" logger.warning(f'SQLAlchemy connection failed: {e}, falling back to raw drivers')")
|
|
237
|
+
lines.append("")
|
|
238
|
+
lines.append(" return _get_raw_connection(db_type, host, port, database, username, password, conn_config)")
|
|
140
239
|
lines.append("")
|
|
240
|
+
lines.append("")
|
|
241
|
+
lines.append("def _build_sqlalchemy_url(db_type, host, port, database, username, password):")
|
|
242
|
+
lines.append(" from urllib.parse import quote_plus")
|
|
243
|
+
lines.append(" pw = quote_plus(str(password)) if password else ''")
|
|
244
|
+
lines.append(" if db_type == 'mssql':")
|
|
245
|
+
lines.append(" try:")
|
|
246
|
+
lines.append(" import pyodbc")
|
|
247
|
+
lines.append(" conn_str = quote_plus(")
|
|
248
|
+
lines.append(" f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={host},{port};DATABASE={database};UID={username};PWD={password}'")
|
|
249
|
+
lines.append(" )")
|
|
250
|
+
lines.append(" return f'mssql+pyodbc:///?odbc_connect={conn_str}'")
|
|
251
|
+
lines.append(" except ImportError:")
|
|
252
|
+
lines.append(" return f'mssql+pymssql://{username}:{pw}@{host}:{port}/{database}'")
|
|
253
|
+
lines.append(" elif db_type == 'postgresql':")
|
|
254
|
+
lines.append(" return f'postgresql://{username}:{pw}@{host}:{port}/{database}'")
|
|
255
|
+
lines.append(" elif db_type == 'oracle':")
|
|
256
|
+
lines.append(" return f'oracle+cx_oracle://{username}:{pw}@{host}:{port}/{database}'")
|
|
257
|
+
lines.append(" elif db_type == 'mysql':")
|
|
258
|
+
lines.append(" return f'mysql+pymysql://{username}:{pw}@{host}:{port}/{database}'")
|
|
259
|
+
lines.append(" return None")
|
|
260
|
+
lines.append("")
|
|
261
|
+
lines.append("")
|
|
262
|
+
lines.append("def _get_raw_connection(db_type, host, port, database, username, password, conn_config):")
|
|
141
263
|
lines.append(" if db_type == 'mssql':")
|
|
142
264
|
lines.append(" try:")
|
|
143
265
|
lines.append(" import pyodbc")
|
|
@@ -156,39 +278,19 @@ def _add_db_functions(lines, data_lib):
|
|
|
156
278
|
lines.append(" return pymssql.connect(server=host, port=int(port), database=database, user=username, password=password)")
|
|
157
279
|
lines.append(" except ImportError:")
|
|
158
280
|
lines.append(" pass")
|
|
159
|
-
lines.append("
|
|
160
|
-
lines.append("
|
|
161
|
-
lines.append("
|
|
162
|
-
lines.append("
|
|
163
|
-
lines.append("
|
|
164
|
-
lines.append("
|
|
165
|
-
lines.append("")
|
|
166
|
-
lines.append(" if db_type == 'postgresql':")
|
|
167
|
-
lines.append(" try:")
|
|
168
|
-
lines.append(" import psycopg2")
|
|
169
|
-
lines.append(" return psycopg2.connect(")
|
|
170
|
-
lines.append(" host=host, port=port, dbname=database,")
|
|
171
|
-
lines.append(" user=username, password=password")
|
|
172
|
-
lines.append(" )")
|
|
173
|
-
lines.append(" except ImportError:")
|
|
174
|
-
lines.append(" pass")
|
|
175
|
-
lines.append("")
|
|
176
|
-
lines.append(" if db_type == 'oracle':")
|
|
177
|
-
lines.append(" try:")
|
|
178
|
-
lines.append(" import cx_Oracle")
|
|
179
|
-
lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
|
|
180
|
-
lines.append(" return cx_Oracle.connect(username, password, dsn)")
|
|
181
|
-
lines.append(" except ImportError:")
|
|
182
|
-
lines.append(" pass")
|
|
281
|
+
lines.append(" elif db_type == 'postgresql':")
|
|
282
|
+
lines.append(" import psycopg2")
|
|
283
|
+
lines.append(" return psycopg2.connect(host=host, port=port, dbname=database, user=username, password=password)")
|
|
284
|
+
lines.append(" elif db_type == 'oracle':")
|
|
285
|
+
lines.append(" import cx_Oracle")
|
|
286
|
+
lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
|
|
287
|
+
lines.append(" return cx_Oracle.connect(username, password, dsn)")
|
|
183
288
|
lines.append("")
|
|
184
289
|
lines.append(" jdbc_url = conn_config.get('jdbc_url', '')")
|
|
185
290
|
lines.append(" if jdbc_url:")
|
|
186
|
-
lines.append("
|
|
187
|
-
lines.append("
|
|
188
|
-
lines.append("
|
|
189
|
-
lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
|
|
190
|
-
lines.append(" except ImportError:")
|
|
191
|
-
lines.append(" pass")
|
|
291
|
+
lines.append(" import jaydebeapi")
|
|
292
|
+
lines.append(" driver = conn_config.get('jdbc_driver', '')")
|
|
293
|
+
lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
|
|
192
294
|
lines.append("")
|
|
193
295
|
lines.append(" raise ConnectionError(f'Cannot create connection for type: {db_type}')")
|
|
194
296
|
lines.append("")
|
|
@@ -203,6 +305,15 @@ def _add_db_functions(lines, data_lib):
|
|
|
203
305
|
else:
|
|
204
306
|
read_func = "pd.read_sql"
|
|
205
307
|
|
|
308
|
+
lines.append("def _safe_close(conn):")
|
|
309
|
+
lines.append(' """Close connection safely — handles both SQLAlchemy and raw connections."""')
|
|
310
|
+
lines.append(" try:")
|
|
311
|
+
lines.append(" if hasattr(conn, 'close'):")
|
|
312
|
+
lines.append(" conn.close()")
|
|
313
|
+
lines.append(" except Exception:")
|
|
314
|
+
lines.append(" pass")
|
|
315
|
+
lines.append("")
|
|
316
|
+
lines.append("")
|
|
206
317
|
lines.append("def read_from_db(config, query, connection_name='default'):")
|
|
207
318
|
lines.append(' """Read data from database using SQL query."""')
|
|
208
319
|
lines.append(" conn = get_db_connection(config, connection_name)")
|
|
@@ -220,7 +331,7 @@ def _add_db_functions(lines, data_lib):
|
|
|
220
331
|
lines.append(" logger.error(f'DB read error on {{connection_name}}: {{e}}')")
|
|
221
332
|
lines.append(" raise")
|
|
222
333
|
lines.append(" finally:")
|
|
223
|
-
lines.append(" conn
|
|
334
|
+
lines.append(" _safe_close(conn)")
|
|
224
335
|
lines.append("")
|
|
225
336
|
lines.append("")
|
|
226
337
|
lines.append("def write_to_db(config, df, table_name, connection_name='default', if_exists='append', schema=None):")
|
|
@@ -242,23 +353,31 @@ def _add_db_functions(lines, data_lib):
|
|
|
242
353
|
lines.append(" logger.error(f'DB write error to {{schema}}.{{table_name}}: {{e}}')")
|
|
243
354
|
lines.append(" raise")
|
|
244
355
|
lines.append(" finally:")
|
|
245
|
-
lines.append(" conn
|
|
356
|
+
lines.append(" _safe_close(conn)")
|
|
246
357
|
lines.append("")
|
|
247
358
|
lines.append("")
|
|
248
359
|
lines.append("def execute_sql(config, sql, connection_name='default'):")
|
|
249
360
|
lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
|
|
250
361
|
lines.append(" conn = get_db_connection(config, connection_name)")
|
|
251
362
|
lines.append(" try:")
|
|
252
|
-
lines.append("
|
|
253
|
-
lines.append("
|
|
254
|
-
lines.append("
|
|
363
|
+
lines.append(" if hasattr(conn, 'execute'):")
|
|
364
|
+
lines.append(" from sqlalchemy import text")
|
|
365
|
+
lines.append(" conn.execute(text(sql))")
|
|
366
|
+
lines.append(" conn.commit()")
|
|
367
|
+
lines.append(" else:")
|
|
368
|
+
lines.append(" cursor = conn.cursor()")
|
|
369
|
+
lines.append(" cursor.execute(sql)")
|
|
370
|
+
lines.append(" conn.commit()")
|
|
255
371
|
lines.append(" logger.info(f'Executed SQL on {{connection_name}}')")
|
|
256
372
|
lines.append(" except Exception as e:")
|
|
257
373
|
lines.append(" logger.error(f'SQL execution error: {{e}}')")
|
|
258
|
-
lines.append("
|
|
374
|
+
lines.append(" try:")
|
|
375
|
+
lines.append(" conn.rollback()")
|
|
376
|
+
lines.append(" except Exception:")
|
|
377
|
+
lines.append(" pass")
|
|
259
378
|
lines.append(" raise")
|
|
260
379
|
lines.append(" finally:")
|
|
261
|
-
lines.append(" conn
|
|
380
|
+
lines.append(" _safe_close(conn)")
|
|
262
381
|
lines.append("")
|
|
263
382
|
lines.append("")
|
|
264
383
|
|
|
@@ -1150,10 +1269,60 @@ def _add_expression_helpers(lines):
|
|
|
1150
1269
|
lines.append(" raise SystemExit(message)")
|
|
1151
1270
|
lines.append("")
|
|
1152
1271
|
lines.append("")
|
|
1153
|
-
lines.append("
|
|
1154
|
-
lines.append(
|
|
1155
|
-
lines.append("
|
|
1156
|
-
lines.append("
|
|
1272
|
+
lines.append("_lookup_cache = {}")
|
|
1273
|
+
lines.append("")
|
|
1274
|
+
lines.append("")
|
|
1275
|
+
lines.append("def lookup_func(table, condition, *fields, config=None, connection_name='default'):")
|
|
1276
|
+
lines.append(' """')
|
|
1277
|
+
lines.append(" Informatica unconnected LOOKUP function.")
|
|
1278
|
+
lines.append(" Loads and caches the lookup table, then filters by condition.")
|
|
1279
|
+
lines.append(" Returns the first matching value of the first return field, or None.")
|
|
1280
|
+
lines.append(' """')
|
|
1281
|
+
lines.append(" global _lookup_cache")
|
|
1282
|
+
lines.append(" if table not in _lookup_cache:")
|
|
1283
|
+
lines.append(" if config is not None:")
|
|
1284
|
+
lines.append(" try:")
|
|
1285
|
+
lines.append(" lkp_conn = connection_name")
|
|
1286
|
+
lines.append(" conns = config.get('connections', {})")
|
|
1287
|
+
lines.append(" for cname, cval in conns.items():")
|
|
1288
|
+
lines.append(" if isinstance(cval, dict) and cval.get('connection_name', '') == table:")
|
|
1289
|
+
lines.append(" lkp_conn = cname")
|
|
1290
|
+
lines.append(" break")
|
|
1291
|
+
lines.append(" df_lkp = read_from_db(config, f'SELECT * FROM {table}', lkp_conn)")
|
|
1292
|
+
lines.append(" _lookup_cache[table] = df_lkp")
|
|
1293
|
+
lines.append(" logger.info(f'Cached lookup table {table}: {len(df_lkp)} rows')")
|
|
1294
|
+
lines.append(" except Exception as e:")
|
|
1295
|
+
lines.append(" logger.warning(f'Could not load lookup table {table}: {e}')")
|
|
1296
|
+
lines.append(" _lookup_cache[table] = None")
|
|
1297
|
+
lines.append(" else:")
|
|
1298
|
+
lines.append(" logger.warning(f'LOOKUP called for {table} without config - returning None')")
|
|
1299
|
+
lines.append(" return None")
|
|
1300
|
+
lines.append(" df_lkp = _lookup_cache.get(table)")
|
|
1301
|
+
lines.append(" if df_lkp is None or df_lkp.empty:")
|
|
1302
|
+
lines.append(" return None")
|
|
1303
|
+
lines.append(" try:")
|
|
1304
|
+
lines.append(" if callable(condition):")
|
|
1305
|
+
lines.append(" matches = df_lkp[condition(df_lkp)]")
|
|
1306
|
+
lines.append(" elif isinstance(condition, str) and '=' in condition:")
|
|
1307
|
+
lines.append(" col, _, val = condition.partition('=')")
|
|
1308
|
+
lines.append(" col = col.strip()")
|
|
1309
|
+
lines.append(" val = val.strip().strip(\"'\")")
|
|
1310
|
+
lines.append(" if col in df_lkp.columns:")
|
|
1311
|
+
lines.append(" matches = df_lkp[df_lkp[col].astype(str) == str(val)]")
|
|
1312
|
+
lines.append(" else:")
|
|
1313
|
+
lines.append(" return None")
|
|
1314
|
+
lines.append(" else:")
|
|
1315
|
+
lines.append(" return None")
|
|
1316
|
+
lines.append(" if matches.empty:")
|
|
1317
|
+
lines.append(" return None")
|
|
1318
|
+
lines.append(" if fields:")
|
|
1319
|
+
lines.append(" field = str(fields[0]).strip()")
|
|
1320
|
+
lines.append(" if field in matches.columns:")
|
|
1321
|
+
lines.append(" return matches.iloc[0][field]")
|
|
1322
|
+
lines.append(" return matches.iloc[0].to_dict()")
|
|
1323
|
+
lines.append(" except Exception as e:")
|
|
1324
|
+
lines.append(" logger.warning(f'LOOKUP error on {table}: {e}')")
|
|
1325
|
+
lines.append(" return None")
|
|
1157
1326
|
lines.append("")
|
|
1158
1327
|
lines.append("")
|
|
1159
1328
|
lines.append("_param_store = {}")
|
|
@@ -419,9 +419,10 @@ def _safe_name(name):
|
|
|
419
419
|
return safe.lower()
|
|
420
420
|
|
|
421
421
|
|
|
422
|
-
def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
|
|
422
|
+
def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" ", mapping_name="", session_name="", folder_name=""):
|
|
423
423
|
import re
|
|
424
424
|
params = re.findall(r'\$\$(\w+)', sql_text)
|
|
425
|
+
pm_vars = re.findall(r'\$(PM\w+)', sql_text)
|
|
425
426
|
lines.append(f"{indent}{sql_var_name} = '''")
|
|
426
427
|
for sql_line in sql_text.strip().split("\n"):
|
|
427
428
|
lines.append(f"{indent}{sql_line}")
|
|
@@ -433,6 +434,13 @@ def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
|
|
|
433
434
|
continue
|
|
434
435
|
seen.add(p)
|
|
435
436
|
lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
|
|
437
|
+
if pm_vars:
|
|
438
|
+
seen_pm = set()
|
|
439
|
+
for pm in pm_vars:
|
|
440
|
+
if pm in seen_pm:
|
|
441
|
+
continue
|
|
442
|
+
seen_pm.add(pm)
|
|
443
|
+
lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('${pm}', str(resolve_builtin_variable('{pm}', mapping_name='{mapping_name}', session_name='{session_name}', folder_name='{folder_name}')))")
|
|
436
444
|
|
|
437
445
|
|
|
438
446
|
def _flatfile_config_dict(ff):
|
|
@@ -757,7 +765,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
757
765
|
elif tx_type in ("joiner",):
|
|
758
766
|
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
|
|
759
767
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
760
|
-
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
768
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
|
|
761
769
|
elif tx_type == "router":
|
|
762
770
|
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
763
771
|
elif tx_type in ("union",):
|
|
@@ -982,7 +990,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
982
990
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
983
991
|
|
|
984
992
|
|
|
985
|
-
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
993
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
|
|
986
994
|
lookup_table = ""
|
|
987
995
|
lookup_sql = ""
|
|
988
996
|
lookup_condition = ""
|
|
@@ -1012,6 +1020,11 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
1012
1020
|
|
|
1013
1021
|
all_output_fields = return_fields + lookup_output_fields
|
|
1014
1022
|
|
|
1023
|
+
port_to_col = {}
|
|
1024
|
+
if connector_graph and tx.name in connector_graph.get("to", {}):
|
|
1025
|
+
for conn in connector_graph["to"][tx.name]:
|
|
1026
|
+
port_to_col[conn.to_field.lower()] = conn.from_field
|
|
1027
|
+
|
|
1015
1028
|
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
1016
1029
|
if lookup_sql:
|
|
1017
1030
|
_emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
|
|
@@ -1020,10 +1033,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
1020
1033
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
1021
1034
|
else:
|
|
1022
1035
|
empty_expr = lib_empty_df(data_lib)
|
|
1023
|
-
lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
|
|
1036
|
+
lines.append(f" df_lkp_{tx_safe} = {empty_expr} # WARNING: no lookup table/SQL override found")
|
|
1024
1037
|
|
|
1025
1038
|
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
1026
1039
|
|
|
1040
|
+
if input_keys and port_to_col:
|
|
1041
|
+
input_keys = [port_to_col.get(k.lower(), k) for k in input_keys]
|
|
1042
|
+
|
|
1027
1043
|
if input_keys and lookup_keys:
|
|
1028
1044
|
lines.append(f" # Lookup condition: {lookup_condition}")
|
|
1029
1045
|
|
|
@@ -1078,12 +1094,23 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1078
1094
|
if "Group Filter Condition" in attr.name:
|
|
1079
1095
|
group_conditions[attr.name] = attr.value
|
|
1080
1096
|
|
|
1097
|
+
remaining_mask_parts = []
|
|
1081
1098
|
if group_conditions:
|
|
1082
1099
|
for i, (gname, cond) in enumerate(group_conditions.items()):
|
|
1083
|
-
|
|
1084
|
-
|
|
1100
|
+
if cond and cond.strip():
|
|
1101
|
+
expr_py = convert_filter_vectorized(cond, input_df)
|
|
1102
|
+
else:
|
|
1103
|
+
expr_py = f"pd.Series(True, index={input_df}.index)"
|
|
1104
|
+
mask_var = f"_router_mask_{tx_safe}_{i}"
|
|
1105
|
+
lines.append(f" {mask_var} = {expr_py} # {gname}")
|
|
1106
|
+
lines.append(f" df_{tx_safe}_group{i} = {input_df}[{mask_var}].copy()")
|
|
1085
1107
|
source_dfs[f"{tx.name}_group{i}"] = f"df_{tx_safe}_group{i}"
|
|
1086
|
-
|
|
1108
|
+
remaining_mask_parts.append(f"~{mask_var}")
|
|
1109
|
+
if remaining_mask_parts:
|
|
1110
|
+
lines.append(f" _router_default_mask = {' & '.join(remaining_mask_parts)}")
|
|
1111
|
+
lines.append(f" df_{tx_safe} = {input_df}[_router_default_mask].copy() # Default group")
|
|
1112
|
+
else:
|
|
1113
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy() # Default group")
|
|
1087
1114
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1088
1115
|
|
|
1089
1116
|
|
|
@@ -1442,7 +1469,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1442
1469
|
if col_mapping:
|
|
1443
1470
|
lines.append(f" # Column mapping: source -> target")
|
|
1444
1471
|
lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
|
|
1445
|
-
lines.append(f" df_target_{tgt_safe} = {input_df}
|
|
1472
|
+
lines.append(f" df_target_{tgt_safe} = rename_with_duplicates({input_df}, target_columns_{tgt_safe})")
|
|
1446
1473
|
target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
|
|
1447
1474
|
if target_cols:
|
|
1448
1475
|
lines.append(f" # Select only target columns")
|
|
@@ -248,6 +248,7 @@ def _convert_infa_date_format(fmt_str):
|
|
|
248
248
|
fmt = fmt.replace("Mon", "%b").replace("MON", "%b")
|
|
249
249
|
fmt = fmt.replace("HH24", "%H").replace("HH12", "%I").replace("HH", "%H")
|
|
250
250
|
fmt = fmt.replace("MI", "%M").replace("SS", "%S")
|
|
251
|
+
fmt = fmt.replace("US", "%f").replace("NS", "%f").replace("MS", "%f")
|
|
251
252
|
return fmt
|
|
252
253
|
|
|
253
254
|
|
|
@@ -548,7 +549,7 @@ def _vec_recursive(expr, df_var):
|
|
|
548
549
|
'RTRIM': f'.str.rstrip("{char_arg}")',
|
|
549
550
|
'TRIM': f'.str.strip("{char_arg}")',
|
|
550
551
|
}
|
|
551
|
-
return f'{inner_val}{method_map[func_name.upper()]}'
|
|
552
|
+
return f'{inner_val}.astype(str){method_map[func_name.upper()]}'
|
|
552
553
|
|
|
553
554
|
upper_result = _find_func_call(cleaned, 'UPPER')
|
|
554
555
|
if upper_result and upper_result[0] == 0 and upper_result[1] == len(cleaned):
|
|
@@ -584,7 +585,7 @@ def _vec_recursive(expr, df_var):
|
|
|
584
585
|
if len(args) >= 2:
|
|
585
586
|
field_val = _vec_recursive(args[0], df_var)
|
|
586
587
|
try:
|
|
587
|
-
start = int(args[1].strip()) - 1
|
|
588
|
+
start = max(int(args[1].strip()) - 1, 0)
|
|
588
589
|
except ValueError:
|
|
589
590
|
start_val = _vec_recursive(args[1], df_var)
|
|
590
591
|
if len(args) >= 3:
|
|
@@ -722,7 +723,11 @@ def _vec_recursive(expr, df_var):
|
|
|
722
723
|
field_val = _vec_recursive(args[0], df_var)
|
|
723
724
|
pattern_val = args[1].strip().strip("'\"")
|
|
724
725
|
if func_name == 'REG_EXTRACT':
|
|
725
|
-
|
|
726
|
+
if re.search(r'(?<!\\)\((?!\?)', pattern_val):
|
|
727
|
+
extract_pat = pattern_val
|
|
728
|
+
else:
|
|
729
|
+
extract_pat = f'({pattern_val})'
|
|
730
|
+
return f'{field_val}.str.extract(r"{extract_pat}", expand=False)'
|
|
726
731
|
elif func_name == 'REG_REPLACE':
|
|
727
732
|
replace_val = args[2].strip().strip("'\"") if len(args) >= 3 else ''
|
|
728
733
|
return f'{field_val}.str.replace(r"{pattern_val}", "{replace_val}", regex=True)'
|
|
@@ -862,7 +867,7 @@ def _vec_recursive(expr, df_var):
|
|
|
862
867
|
if v.startswith("'") and v.endswith("'"):
|
|
863
868
|
vec_parts.append(v)
|
|
864
869
|
else:
|
|
865
|
-
vec_parts.append(f'{v}.astype(str)')
|
|
870
|
+
vec_parts.append(f'{v}.fillna(\'\').astype(str)')
|
|
866
871
|
return " + ".join(vec_parts)
|
|
867
872
|
|
|
868
873
|
for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
|
|
@@ -894,7 +899,8 @@ def _vec_recursive(expr, df_var):
|
|
|
894
899
|
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
|
|
895
900
|
'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
|
|
896
901
|
'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
|
|
897
|
-
'lookup_func',
|
|
902
|
+
'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
|
|
903
|
+
'upper', 'lower', 'strip', 'lstrip', 'rstrip', 'dt', 'copy',
|
|
898
904
|
}
|
|
899
905
|
converted = _substitute_fields(converted, df_var, skip_words)
|
|
900
906
|
|
|
@@ -904,6 +910,8 @@ def _vec_recursive(expr, df_var):
|
|
|
904
910
|
converted = re.sub(r'<>', '!=', converted)
|
|
905
911
|
converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
|
|
906
912
|
converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
|
|
913
|
+
converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
|
|
914
|
+
converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
|
|
907
915
|
|
|
908
916
|
converted = re.sub(r'\s+', ' ', converted).strip()
|
|
909
917
|
|
|
@@ -1044,8 +1052,14 @@ def _vectorize_simple(part, df_var):
|
|
|
1044
1052
|
'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
|
|
1045
1053
|
'str', 'int', 'float', 'isna', 'notna', 'fillna',
|
|
1046
1054
|
'get_variable', 'lookup_func', 'isin', 'eq',
|
|
1055
|
+
'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
|
|
1056
|
+
'contains', 'replace', 'upper', 'lower', 'strip', 'lstrip', 'rstrip',
|
|
1057
|
+
'dt', 'len', 'copy', 'abs', 'round', 'where', 'bool',
|
|
1047
1058
|
}
|
|
1048
1059
|
c = _substitute_fields(c, df_var, skip_words)
|
|
1060
|
+
c = re.sub(r'\bexpand\s*==\s*', 'expand=', c)
|
|
1061
|
+
c = re.sub(r'\berrors\s*==\s*', 'errors=', c)
|
|
1062
|
+
c = re.sub(r'\bregex\s*==\s*', 'regex=', c)
|
|
1049
1063
|
|
|
1050
1064
|
return c
|
|
1051
1065
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: informatica-python
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.5
|
|
4
4
|
Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
|
|
5
5
|
Author: Nick
|
|
6
6
|
License: MIT
|
|
@@ -430,7 +430,7 @@ The generated `helper_functions.py` provides a complete runtime library:
|
|
|
430
430
|
- **Generated code formatting**: Consistent `# ---` section headers for Source Qualifiers, Transforms, and Target Writes; metadata comments (database type, field lists); column mapping and write operation comments; clean blank line handling
|
|
431
431
|
- **Source/target detection**: Case-insensitive instance type matching
|
|
432
432
|
- **Session→mapping inference**: Longest-suffix-match strategy for ambiguous mapping names
|
|
433
|
-
- **
|
|
433
|
+
- **663 tests** across unit, integration, expression, and formatting test suites
|
|
434
434
|
|
|
435
435
|
### v1.9.2 (Phase 8)
|
|
436
436
|
- Mapping output files now use real mapping names (e.g., `mapping_m_customer_load.py`) instead of generic numeric indices (`mapping_1.py`)
|
|
@@ -495,7 +495,7 @@ The generated `helper_functions.py` provides a complete runtime library:
|
|
|
495
495
|
cd informatica_python
|
|
496
496
|
pip install -e ".[dev]"
|
|
497
497
|
|
|
498
|
-
# Run tests (
|
|
498
|
+
# Run tests (663 tests)
|
|
499
499
|
pytest tests/ -v
|
|
500
500
|
```
|
|
501
501
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
informatica_python/__init__.py,sha256=
|
|
1
|
+
informatica_python/__init__.py,sha256=sbOT0MiP4hfEvssyO1fr_bI8Mxs6ZN6SO7rBglvkJFU,337
|
|
2
2
|
informatica_python/cli.py,sha256=gFwg0O99vKM-OLO0HoHA4emd-6qrgjMNqa9T59e4e_s,2905
|
|
3
3
|
informatica_python/converter.py,sha256=xCuWrYzDji0yN72D3QqOgZCVVM2j3k2_CvlGplCWxLU,22779
|
|
4
4
|
informatica_python/models.py,sha256=G_C2WfQL-ykKjNj23m8vKFtLZYrQozp99HJzrLTKG1Y,17293
|
|
@@ -6,18 +6,18 @@ informatica_python/parser.py,sha256=v0qoTlAi3RZ3IHN_5g5t6f66XzRpJIjpAfpyMzZ5cuA,
|
|
|
6
6
|
informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
|
|
8
8
|
informatica_python/generators/error_log_gen.py,sha256=2cc0rEcblydHkb9VAMXlrH7WdSQ-CNqAXcwVk3FYZeM,21319
|
|
9
|
-
informatica_python/generators/helper_gen.py,sha256=
|
|
10
|
-
informatica_python/generators/mapping_gen.py,sha256=
|
|
9
|
+
informatica_python/generators/helper_gen.py,sha256=ylhZnZb5yQ23kQswa9jrf45DvCcHq-3K7KCT2eCwCVM,82376
|
|
10
|
+
informatica_python/generators/mapping_gen.py,sha256=Aic4srfI6SfbtB1ggx7Rn27STVOV2olo3YKiRysmFps,72055
|
|
11
11
|
informatica_python/generators/sql_gen.py,sha256=O8Y-aJz9EyFJ0DXeuISRt5yKwC3wlp2K3B0BHrmxrXw,4872
|
|
12
12
|
informatica_python/generators/workflow_gen.py,sha256=_uSlBg31ZRMhMlCYk4hWDRBPaBROrepD8_v3QGEWJxE,18089
|
|
13
13
|
informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
|
|
15
|
-
informatica_python/utils/expression_converter.py,sha256=
|
|
15
|
+
informatica_python/utils/expression_converter.py,sha256=SkkT2CyhIZzUms9TT4cEimZlxjOoVq96AQgGTrO_Lmc,46859
|
|
16
16
|
informatica_python/utils/lib_adapters.py,sha256=1ZtuMbgDg9Ukf-OF_EG1L_BeeR-6JQk8Kx3WwMfvNRU,6516
|
|
17
17
|
informatica_python/utils/sql_dialect.py,sha256=_IHJbfu8a3mT_OvHpybgSfZKqz6mwVy5ItTKDRChqnU,5461
|
|
18
|
-
informatica_python-1.9.
|
|
19
|
-
informatica_python-1.9.
|
|
20
|
-
informatica_python-1.9.
|
|
21
|
-
informatica_python-1.9.
|
|
22
|
-
informatica_python-1.9.
|
|
23
|
-
informatica_python-1.9.
|
|
18
|
+
informatica_python-1.9.5.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
|
|
19
|
+
informatica_python-1.9.5.dist-info/METADATA,sha256=I5YvXFM4ctZFRE_RNNi6aeZb7vLMlZM3Az-QTuXypYU,26097
|
|
20
|
+
informatica_python-1.9.5.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
|
|
21
|
+
informatica_python-1.9.5.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
|
|
22
|
+
informatica_python-1.9.5.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
|
|
23
|
+
informatica_python-1.9.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|