informatica-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,693 @@
1
+ from informatica_python.models import FolderDef
2
+
3
+
4
+ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> str:
5
+ lines = []
6
+ lines.append('"""')
7
+ lines.append(f"Helper functions and core logic for folder: {folder.name}")
8
+ lines.append(f"Data manipulation library: {data_lib}")
9
+ lines.append('Auto-generated by informatica-python')
10
+ lines.append('"""')
11
+ lines.append("")
12
+
13
+ lines.append("import os")
14
+ lines.append("import sys")
15
+ lines.append("import logging")
16
+ lines.append("import yaml")
17
+ lines.append("from datetime import datetime")
18
+ lines.append("")
19
+
20
+ if data_lib == "pandas":
21
+ lines.append("import pandas as pd")
22
+ elif data_lib == "dask":
23
+ lines.append("import dask.dataframe as dd")
24
+ lines.append("import pandas as pd")
25
+ elif data_lib == "polars":
26
+ lines.append("import polars as pl")
27
+ elif data_lib == "vaex":
28
+ lines.append("import vaex")
29
+ elif data_lib == "modin":
30
+ lines.append("import modin.pandas as pd")
31
+ else:
32
+ lines.append("import pandas as pd")
33
+
34
+ lines.append("")
35
+ lines.append("")
36
+ lines.append("logging.basicConfig(")
37
+ lines.append(' level=logging.INFO,')
38
+ lines.append(' format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",')
39
+ lines.append(' handlers=[')
40
+ lines.append(' logging.StreamHandler(sys.stdout),')
41
+ lines.append(' logging.FileHandler("error_log.txt", mode="a"),')
42
+ lines.append(' ]')
43
+ lines.append(")")
44
+ lines.append('logger = logging.getLogger("informatica_converter")')
45
+ lines.append("")
46
+ lines.append("")
47
+ lines.append("def load_config(config_path='config.yml'):")
48
+ lines.append(' """Load configuration from YAML file."""')
49
+ lines.append(" with open(config_path, 'r') as f:")
50
+ lines.append(" return yaml.safe_load(f)")
51
+ lines.append("")
52
+ lines.append("")
53
+
54
+ _add_db_functions(lines, data_lib)
55
+ _add_file_functions(lines, data_lib)
56
+ _add_expression_helpers(lines)
57
+ _add_utility_functions(lines)
58
+
59
+ return "\n".join(lines)
60
+
61
+
62
+ def _add_db_functions(lines, data_lib):
63
+ lines.append("# ============================================================")
64
+ lines.append("# Database Operations")
65
+ lines.append("# ============================================================")
66
+ lines.append("")
67
+ lines.append("")
68
+ lines.append("def get_db_connection(config, connection_name='default'):")
69
+ lines.append(' """Create database connection from config."""')
70
+ lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
71
+ lines.append(" db_type = conn_config.get('type', 'mssql')")
72
+ lines.append(" host = conn_config.get('host', 'localhost')")
73
+ lines.append(" port = conn_config.get('port', 1433)")
74
+ lines.append(" database = conn_config.get('database', '')")
75
+ lines.append(" username = conn_config.get('username', '')")
76
+ lines.append(" password = conn_config.get('password', '')")
77
+ lines.append(" schema = conn_config.get('schema', 'dbo')")
78
+ lines.append("")
79
+ lines.append(" if db_type == 'mssql':")
80
+ lines.append(" try:")
81
+ lines.append(" import pyodbc")
82
+ lines.append(" conn_str = (")
83
+ lines.append(" f'DRIVER={{ODBC Driver 17 for SQL Server}};'")
84
+ lines.append(" f'SERVER={host},{port};'")
85
+ lines.append(" f'DATABASE={database};'")
86
+ lines.append(" f'UID={username};'")
87
+ lines.append(" f'PWD={password}'")
88
+ lines.append(" )")
89
+ lines.append(" return pyodbc.connect(conn_str)")
90
+ lines.append(" except ImportError:")
91
+ lines.append(" pass")
92
+ lines.append("")
93
+ lines.append(" if db_type == 'postgresql':")
94
+ lines.append(" try:")
95
+ lines.append(" import psycopg2")
96
+ lines.append(" return psycopg2.connect(")
97
+ lines.append(" host=host, port=port, dbname=database,")
98
+ lines.append(" user=username, password=password")
99
+ lines.append(" )")
100
+ lines.append(" except ImportError:")
101
+ lines.append(" pass")
102
+ lines.append("")
103
+ lines.append(" if db_type == 'oracle':")
104
+ lines.append(" try:")
105
+ lines.append(" import cx_Oracle")
106
+ lines.append(" dsn = cx_Oracle.makedsn(host, port, service_name=database)")
107
+ lines.append(" return cx_Oracle.connect(username, password, dsn)")
108
+ lines.append(" except ImportError:")
109
+ lines.append(" pass")
110
+ lines.append("")
111
+ lines.append(" jdbc_url = conn_config.get('jdbc_url', '')")
112
+ lines.append(" if jdbc_url:")
113
+ lines.append(" try:")
114
+ lines.append(" import jaydebeapi")
115
+ lines.append(" driver = conn_config.get('jdbc_driver', '')")
116
+ lines.append(" return jaydebeapi.connect(driver, jdbc_url, [username, password])")
117
+ lines.append(" except ImportError:")
118
+ lines.append(" pass")
119
+ lines.append("")
120
+ lines.append(" raise ConnectionError(f'Cannot create connection for type: {db_type}')")
121
+ lines.append("")
122
+ lines.append("")
123
+
124
+ if data_lib in ("pandas", "modin"):
125
+ read_func = "pd.read_sql"
126
+ elif data_lib == "dask":
127
+ read_func = "dd.read_sql_table"
128
+ elif data_lib == "polars":
129
+ read_func = "pl.read_database"
130
+ else:
131
+ read_func = "pd.read_sql"
132
+
133
+ lines.append("def read_from_db(config, query, connection_name='default'):")
134
+ lines.append(' """Read data from database using SQL query."""')
135
+ lines.append(" conn = get_db_connection(config, connection_name)")
136
+ lines.append(" try:")
137
+ lines.append(f" logger.info(f'Executing query on {{connection_name}}')")
138
+ if data_lib == "polars":
139
+ lines.append(" df = pl.read_database(query, conn)")
140
+ elif data_lib == "dask":
141
+ lines.append(" df = dd.read_sql_query(query, conn)")
142
+ else:
143
+ lines.append(f" df = {read_func}(query, conn)")
144
+ lines.append(" logger.info(f'Read {{len(df)}} rows from {{connection_name}}')")
145
+ lines.append(" return df")
146
+ lines.append(" except Exception as e:")
147
+ lines.append(" logger.error(f'DB read error on {{connection_name}}: {{e}}')")
148
+ lines.append(" raise")
149
+ lines.append(" finally:")
150
+ lines.append(" conn.close()")
151
+ lines.append("")
152
+ lines.append("")
153
+ lines.append("def write_to_db(config, df, table_name, connection_name='default', if_exists='append', schema=None):")
154
+ lines.append(' """Write dataframe to database table."""')
155
+ lines.append(" conn = get_db_connection(config, connection_name)")
156
+ lines.append(" conn_config = config.get('connections', {}).get(connection_name, {})")
157
+ lines.append(" if schema is None:")
158
+ lines.append(" schema = conn_config.get('schema', 'dbo')")
159
+ lines.append(" try:")
160
+ lines.append(" logger.info(f'Writing {{len(df)}} rows to {{schema}}.{{table_name}}')")
161
+ if data_lib == "polars":
162
+ lines.append(" df.to_pandas().to_sql(table_name, conn, schema=schema, if_exists=if_exists, index=False)")
163
+ elif data_lib == "dask":
164
+ lines.append(" df.compute().to_sql(table_name, conn, schema=schema, if_exists=if_exists, index=False)")
165
+ else:
166
+ lines.append(" df.to_sql(table_name, conn, schema=schema, if_exists=if_exists, index=False)")
167
+ lines.append(" logger.info(f'Successfully wrote to {{schema}}.{{table_name}}')")
168
+ lines.append(" except Exception as e:")
169
+ lines.append(" logger.error(f'DB write error to {{schema}}.{{table_name}}: {{e}}')")
170
+ lines.append(" raise")
171
+ lines.append(" finally:")
172
+ lines.append(" conn.close()")
173
+ lines.append("")
174
+ lines.append("")
175
+ lines.append("def execute_sql(config, sql, connection_name='default'):")
176
+ lines.append(' """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
177
+ lines.append(" conn = get_db_connection(config, connection_name)")
178
+ lines.append(" try:")
179
+ lines.append(" cursor = conn.cursor()")
180
+ lines.append(" cursor.execute(sql)")
181
+ lines.append(" conn.commit()")
182
+ lines.append(" logger.info(f'Executed SQL on {{connection_name}}')")
183
+ lines.append(" except Exception as e:")
184
+ lines.append(" logger.error(f'SQL execution error: {{e}}')")
185
+ lines.append(" conn.rollback()")
186
+ lines.append(" raise")
187
+ lines.append(" finally:")
188
+ lines.append(" conn.close()")
189
+ lines.append("")
190
+ lines.append("")
191
+
192
+
193
+ def _add_file_functions(lines, data_lib):
194
+ lines.append("# ============================================================")
195
+ lines.append("# File Operations")
196
+ lines.append("# ============================================================")
197
+ lines.append("")
198
+ lines.append("")
199
+ lines.append("def read_file(file_path, file_config=None):")
200
+ lines.append(' """')
201
+ lines.append(' Read data from file based on extension and configuration.')
202
+ lines.append(' Supports: .csv, .dat, .txt, .xml, .xlsx, .xls, .json, .parquet, and files with no extension.')
203
+ lines.append(' """')
204
+ lines.append(" if file_config is None:")
205
+ lines.append(" file_config = {}")
206
+ lines.append("")
207
+ lines.append(" ext = os.path.splitext(file_path)[1].lower()")
208
+ lines.append(" delimiter = file_config.get('delimiter', ',')")
209
+ lines.append(" header = file_config.get('header', True)")
210
+ lines.append(" encoding = file_config.get('encoding', 'utf-8')")
211
+ lines.append(" header_row = 0 if header else None")
212
+ lines.append("")
213
+ lines.append(" logger.info(f'Reading file: {file_path} (ext={ext})')")
214
+ lines.append("")
215
+ lines.append(" try:")
216
+
217
+ if data_lib == "polars":
218
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
219
+ lines.append(" return pl.read_csv(file_path, separator=delimiter, has_header=header, encoding=encoding)")
220
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
221
+ lines.append(" return pl.read_excel(file_path)")
222
+ lines.append(" elif ext == '.xml':")
223
+ lines.append(" import xml.etree.ElementTree as ET")
224
+ lines.append(" tree = ET.parse(file_path)")
225
+ lines.append(" root = tree.getroot()")
226
+ lines.append(" return _xml_to_polars(root, file_config)")
227
+ lines.append(" elif ext == '.json':")
228
+ lines.append(" return pl.read_json(file_path)")
229
+ lines.append(" elif ext == '.parquet':")
230
+ lines.append(" return pl.read_parquet(file_path)")
231
+ lines.append(" else:")
232
+ lines.append(" return pl.read_csv(file_path, separator=delimiter, has_header=header)")
233
+ elif data_lib == "dask":
234
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
235
+ lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row, encoding=encoding)")
236
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
237
+ lines.append(" return dd.from_pandas(pd.read_excel(file_path, header=header_row), npartitions=1)")
238
+ lines.append(" elif ext == '.xml':")
239
+ lines.append(" return dd.from_pandas(_read_xml_to_pandas(file_path, file_config), npartitions=1)")
240
+ lines.append(" elif ext == '.json':")
241
+ lines.append(" return dd.read_json(file_path)")
242
+ lines.append(" elif ext == '.parquet':")
243
+ lines.append(" return dd.read_parquet(file_path)")
244
+ lines.append(" else:")
245
+ lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row)")
246
+ else:
247
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
248
+ lines.append(" return pd.read_csv(file_path, sep=delimiter, header=header_row, encoding=encoding)")
249
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
250
+ lines.append(" return pd.read_excel(file_path, header=header_row)")
251
+ lines.append(" elif ext == '.xml':")
252
+ lines.append(" return _read_xml_to_pandas(file_path, file_config)")
253
+ lines.append(" elif ext == '.json':")
254
+ lines.append(" return pd.read_json(file_path)")
255
+ lines.append(" elif ext == '.parquet':")
256
+ lines.append(" return pd.read_parquet(file_path)")
257
+ lines.append(" else:")
258
+ lines.append(" return pd.read_csv(file_path, sep=delimiter, header=header_row)")
259
+
260
+ lines.append(" except Exception as e:")
261
+ lines.append(" logger.error(f'File read error for {file_path}: {e}')")
262
+ lines.append(" raise")
263
+ lines.append("")
264
+ lines.append("")
265
+ lines.append("def write_file(df, file_path, file_config=None):")
266
+ lines.append(' """Write dataframe to file based on extension."""')
267
+ lines.append(" if file_config is None:")
268
+ lines.append(" file_config = {}")
269
+ lines.append("")
270
+ lines.append(" ext = os.path.splitext(file_path)[1].lower()")
271
+ lines.append(" delimiter = file_config.get('delimiter', ',')")
272
+ lines.append(" header = file_config.get('header', True)")
273
+ lines.append(" encoding = file_config.get('encoding', 'utf-8')")
274
+ lines.append("")
275
+ lines.append(" os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)")
276
+ lines.append(" logger.info(f'Writing file: {file_path}')")
277
+ lines.append("")
278
+ lines.append(" try:")
279
+
280
+ if data_lib == "polars":
281
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
282
+ lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
283
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
284
+ lines.append(" df.write_excel(file_path)")
285
+ lines.append(" elif ext == '.json':")
286
+ lines.append(" df.write_json(file_path)")
287
+ lines.append(" elif ext == '.parquet':")
288
+ lines.append(" df.write_parquet(file_path)")
289
+ lines.append(" else:")
290
+ lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
291
+ elif data_lib == "dask":
292
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
293
+ lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
294
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
295
+ lines.append(" df.compute().to_excel(file_path, header=header, index=False)")
296
+ lines.append(" elif ext == '.json':")
297
+ lines.append(" df.compute().to_json(file_path)")
298
+ lines.append(" elif ext == '.parquet':")
299
+ lines.append(" df.to_parquet(file_path)")
300
+ lines.append(" else:")
301
+ lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False)")
302
+ else:
303
+ lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
304
+ lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
305
+ lines.append(" elif ext in ('.xlsx', '.xls'):")
306
+ lines.append(" df.to_excel(file_path, header=header, index=False)")
307
+ lines.append(" elif ext == '.json':")
308
+ lines.append(" df.to_json(file_path)")
309
+ lines.append(" elif ext == '.parquet':")
310
+ lines.append(" df.to_parquet(file_path, index=False)")
311
+ lines.append(" else:")
312
+ lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False)")
313
+
314
+ lines.append(" except Exception as e:")
315
+ lines.append(" logger.error(f'File write error for {file_path}: {e}')")
316
+ lines.append(" raise")
317
+ lines.append("")
318
+ lines.append("")
319
+
320
+ if data_lib != "polars":
321
+ lines.append("def _read_xml_to_pandas(file_path, file_config):")
322
+ lines.append(' """Parse XML file into a pandas DataFrame."""')
323
+ lines.append(" import xml.etree.ElementTree as ET")
324
+ lines.append(" nested = file_config.get('nested', False)")
325
+ lines.append(" row_tag = file_config.get('row_tag', None)")
326
+ lines.append(" tree = ET.parse(file_path)")
327
+ lines.append(" root = tree.getroot()")
328
+ lines.append(" if row_tag:")
329
+ lines.append(" rows = root.findall(f'.//{row_tag}')")
330
+ lines.append(" else:")
331
+ lines.append(" rows = list(root)")
332
+ lines.append(" records = []")
333
+ lines.append(" for row in rows:")
334
+ lines.append(" record = dict(row.attrib)")
335
+ lines.append(" for child in row:")
336
+ lines.append(" tag = child.tag.split('}')[-1] if '}' in child.tag else child.tag")
337
+ lines.append(" if nested and len(child) > 0:")
338
+ lines.append(" record[tag] = ET.tostring(child, encoding='unicode')")
339
+ lines.append(" else:")
340
+ lines.append(" record[tag] = child.text")
341
+ lines.append(" records.append(record)")
342
+ lines.append(" return pd.DataFrame(records)")
343
+ lines.append("")
344
+ lines.append("")
345
+ else:
346
+ lines.append("def _xml_to_polars(root, file_config):")
347
+ lines.append(' """Parse XML element tree into a Polars DataFrame."""')
348
+ lines.append(" import xml.etree.ElementTree as ET")
349
+ lines.append(" row_tag = file_config.get('row_tag', None)")
350
+ lines.append(" if row_tag:")
351
+ lines.append(" rows = root.findall(f'.//{row_tag}')")
352
+ lines.append(" else:")
353
+ lines.append(" rows = list(root)")
354
+ lines.append(" records = []")
355
+ lines.append(" for row in rows:")
356
+ lines.append(" record = dict(row.attrib)")
357
+ lines.append(" for child in row:")
358
+ lines.append(" tag = child.tag.split('}')[-1] if '}' in child.tag else child.tag")
359
+ lines.append(" record[tag] = child.text")
360
+ lines.append(" records.append(record)")
361
+ lines.append(" return pl.DataFrame(records)")
362
+ lines.append("")
363
+ lines.append("")
364
+
365
+
366
+ def _add_expression_helpers(lines):
367
+ lines.append("# ============================================================")
368
+ lines.append("# Informatica Expression Helper Functions")
369
+ lines.append("# ============================================================")
370
+ lines.append("")
371
+ lines.append("")
372
+ lines.append("def iif_expr(condition, true_val, false_val=None):")
373
+ lines.append(' """Informatica IIF equivalent."""')
374
+ lines.append(" return true_val if condition else false_val")
375
+ lines.append("")
376
+ lines.append("")
377
+ lines.append("def decode_expr(value, *args):")
378
+ lines.append(' """Informatica DECODE equivalent."""')
379
+ lines.append(" pairs = list(args)")
380
+ lines.append(" default = pairs.pop() if len(pairs) % 2 != 0 else None")
381
+ lines.append(" for i in range(0, len(pairs), 2):")
382
+ lines.append(" if value == pairs[i]:")
383
+ lines.append(" return pairs[i + 1]")
384
+ lines.append(" return default")
385
+ lines.append("")
386
+ lines.append("")
387
+ lines.append("def nvl(value, default):")
388
+ lines.append(' """Informatica NVL equivalent."""')
389
+ lines.append(" return default if value is None else value")
390
+ lines.append("")
391
+ lines.append("")
392
+ lines.append("def nvl2(value, not_null_val, null_val):")
393
+ lines.append(' """Informatica NVL2 equivalent."""')
394
+ lines.append(" return not_null_val if value is not None else null_val")
395
+ lines.append("")
396
+ lines.append("")
397
+ lines.append("def isnull(value):")
398
+ lines.append(' """Informatica ISNULL equivalent."""')
399
+ lines.append(" return value is None")
400
+ lines.append("")
401
+ lines.append("")
402
+ lines.append("def ltrim(value, trim_str=None):")
403
+ lines.append(' """Informatica LTRIM equivalent."""')
404
+ lines.append(" if value is None:")
405
+ lines.append(" return None")
406
+ lines.append(" return str(value).lstrip(trim_str)")
407
+ lines.append("")
408
+ lines.append("")
409
+ lines.append("def rtrim(value, trim_str=None):")
410
+ lines.append(' """Informatica RTRIM equivalent."""')
411
+ lines.append(" if value is None:")
412
+ lines.append(" return None")
413
+ lines.append(" return str(value).rstrip(trim_str)")
414
+ lines.append("")
415
+ lines.append("")
416
+ lines.append("def substr(value, start, length=None):")
417
+ lines.append(' """Informatica SUBSTR equivalent (1-based index)."""')
418
+ lines.append(" if value is None:")
419
+ lines.append(" return None")
420
+ lines.append(" s = str(value)")
421
+ lines.append(" start_idx = max(start - 1, 0)")
422
+ lines.append(" if length is not None:")
423
+ lines.append(" return s[start_idx:start_idx + length]")
424
+ lines.append(" return s[start_idx:]")
425
+ lines.append("")
426
+ lines.append("")
427
+ lines.append("def to_char(value, fmt=None):")
428
+ lines.append(' """Informatica TO_CHAR equivalent."""')
429
+ lines.append(" if value is None:")
430
+ lines.append(" return None")
431
+ lines.append(" if fmt and hasattr(value, 'strftime'):")
432
+ lines.append(" py_fmt = fmt.replace('YYYY', '%Y').replace('MM', '%m').replace('DD', '%d')")
433
+ lines.append(" py_fmt = py_fmt.replace('HH24', '%H').replace('MI', '%M').replace('SS', '%S')")
434
+ lines.append(" return value.strftime(py_fmt)")
435
+ lines.append(" return str(value)")
436
+ lines.append("")
437
+ lines.append("")
438
+ lines.append("def to_date(value, fmt=None):")
439
+ lines.append(' """Informatica TO_DATE equivalent."""')
440
+ lines.append(" if value is None:")
441
+ lines.append(" return None")
442
+ lines.append(" if fmt:")
443
+ lines.append(" py_fmt = fmt.replace('YYYY', '%Y').replace('MM', '%m').replace('DD', '%d')")
444
+ lines.append(" py_fmt = py_fmt.replace('HH24', '%H').replace('MI', '%M').replace('SS', '%S')")
445
+ lines.append(" return datetime.strptime(str(value), py_fmt)")
446
+ lines.append(" return datetime.fromisoformat(str(value))")
447
+ lines.append("")
448
+ lines.append("")
449
+ lines.append("def to_integer(value):")
450
+ lines.append(' """Informatica TO_INTEGER equivalent."""')
451
+ lines.append(" if value is None:")
452
+ lines.append(" return None")
453
+ lines.append(" return int(float(str(value)))")
454
+ lines.append("")
455
+ lines.append("")
456
+ lines.append("def to_bigint(value):")
457
+ lines.append(' """Informatica TO_BIGINT equivalent."""')
458
+ lines.append(" return to_integer(value)")
459
+ lines.append("")
460
+ lines.append("")
461
+ lines.append("def to_float(value):")
462
+ lines.append(' """Informatica TO_FLOAT equivalent."""')
463
+ lines.append(" if value is None:")
464
+ lines.append(" return None")
465
+ lines.append(" return float(str(value))")
466
+ lines.append("")
467
+ lines.append("")
468
+ lines.append("def to_decimal(value, scale=0):")
469
+ lines.append(' """Informatica TO_DECIMAL equivalent."""')
470
+ lines.append(" if value is None:")
471
+ lines.append(" return None")
472
+ lines.append(" return round(float(str(value)), scale)")
473
+ lines.append("")
474
+ lines.append("")
475
+ lines.append("def replacechr(search_type, source, search_chars, replace_char):")
476
+ lines.append(' """Informatica REPLACECHR equivalent."""')
477
+ lines.append(" if source is None:")
478
+ lines.append(" return None")
479
+ lines.append(" result = str(source)")
480
+ lines.append(" for ch in str(search_chars):")
481
+ lines.append(" result = result.replace(ch, str(replace_char) if replace_char else '')")
482
+ lines.append(" return result")
483
+ lines.append("")
484
+ lines.append("")
485
+ lines.append("def replacestr(search_type, source, search_str, replace_str):")
486
+ lines.append(' """Informatica REPLACESTR equivalent."""')
487
+ lines.append(" if source is None:")
488
+ lines.append(" return None")
489
+ lines.append(" return str(source).replace(str(search_str), str(replace_str) if replace_str else '')")
490
+ lines.append("")
491
+ lines.append("")
492
+ lines.append("def instr(value, search, start=1, occurrence=1):")
493
+ lines.append(' """Informatica INSTR equivalent (1-based)."""')
494
+ lines.append(" if value is None or search is None:")
495
+ lines.append(" return 0")
496
+ lines.append(" s = str(value)")
497
+ lines.append(" needle = str(search)")
498
+ lines.append(" idx = max(start - 1, 0)")
499
+ lines.append(" for _ in range(occurrence):")
500
+ lines.append(" found = s.find(needle, idx)")
501
+ lines.append(" if found == -1:")
502
+ lines.append(" return 0")
503
+ lines.append(" idx = found + 1")
504
+ lines.append(" return idx")
505
+ lines.append("")
506
+ lines.append("")
507
+ lines.append("def lpad(value, length, pad_char=' '):")
508
+ lines.append(' """Informatica LPAD equivalent."""')
509
+ lines.append(" if value is None:")
510
+ lines.append(" return None")
511
+ lines.append(" return str(value).rjust(length, str(pad_char)[0] if pad_char else ' ')")
512
+ lines.append("")
513
+ lines.append("")
514
+ lines.append("def rpad(value, length, pad_char=' '):")
515
+ lines.append(' """Informatica RPAD equivalent."""')
516
+ lines.append(" if value is None:")
517
+ lines.append(" return None")
518
+ lines.append(" return str(value).ljust(length, str(pad_char)[0] if pad_char else ' ')")
519
+ lines.append("")
520
+ lines.append("")
521
+ lines.append("def length(value):")
522
+ lines.append(' """Informatica LENGTH equivalent."""')
523
+ lines.append(" if value is None:")
524
+ lines.append(" return 0")
525
+ lines.append(" return len(str(value))")
526
+ lines.append("")
527
+ lines.append("")
528
+ lines.append("def upper(value):")
529
+ lines.append(' """Informatica UPPER equivalent."""')
530
+ lines.append(" return str(value).upper() if value is not None else None")
531
+ lines.append("")
532
+ lines.append("")
533
+ lines.append("def lower(value):")
534
+ lines.append(' """Informatica LOWER equivalent."""')
535
+ lines.append(" return str(value).lower() if value is not None else None")
536
+ lines.append("")
537
+ lines.append("")
538
+ lines.append("def concat(*args):")
539
+ lines.append(' """Informatica CONCAT equivalent."""')
540
+ lines.append(" return ''.join(str(a) if a is not None else '' for a in args)")
541
+ lines.append("")
542
+ lines.append("")
543
+ lines.append("def is_date(value, fmt=None):")
544
+ lines.append(' """Informatica IS_DATE equivalent."""')
545
+ lines.append(" try:")
546
+ lines.append(" if fmt:")
547
+ lines.append(" py_fmt = fmt.replace('YYYY', '%Y').replace('MM', '%m').replace('DD', '%d')")
548
+ lines.append(" datetime.strptime(str(value), py_fmt)")
549
+ lines.append(" else:")
550
+ lines.append(" datetime.fromisoformat(str(value))")
551
+ lines.append(" return True")
552
+ lines.append(" except (ValueError, TypeError):")
553
+ lines.append(" return False")
554
+ lines.append("")
555
+ lines.append("")
556
+ lines.append("def is_number(value):")
557
+ lines.append(' """Informatica IS_NUMBER equivalent."""')
558
+ lines.append(" try:")
559
+ lines.append(" float(str(value))")
560
+ lines.append(" return True")
561
+ lines.append(" except (ValueError, TypeError):")
562
+ lines.append(" return False")
563
+ lines.append("")
564
+ lines.append("")
565
+ lines.append("def is_spaces(value):")
566
+ lines.append(' """Informatica IS_SPACES equivalent."""')
567
+ lines.append(" return value is None or str(value).strip() == ''")
568
+ lines.append("")
569
+ lines.append("")
570
+ lines.append("def reg_extract(value, pattern, group_num=0):")
571
+ lines.append(' """Informatica REG_EXTRACT equivalent."""')
572
+ lines.append(" import re")
573
+ lines.append(" if value is None:")
574
+ lines.append(" return None")
575
+ lines.append(" match = re.search(pattern, str(value))")
576
+ lines.append(" if match:")
577
+ lines.append(" return match.group(group_num)")
578
+ lines.append(" return None")
579
+ lines.append("")
580
+ lines.append("")
581
+ lines.append("def reg_match(value, pattern):")
582
+ lines.append(' """Informatica REG_MATCH equivalent."""')
583
+ lines.append(" import re")
584
+ lines.append(" if value is None:")
585
+ lines.append(" return False")
586
+ lines.append(" return bool(re.search(pattern, str(value)))")
587
+ lines.append("")
588
+ lines.append("")
589
+ lines.append("def reg_replace(value, pattern, replacement):")
590
+ lines.append(' """Informatica REG_REPLACE equivalent."""')
591
+ lines.append(" import re")
592
+ lines.append(" if value is None:")
593
+ lines.append(" return None")
594
+ lines.append(" return re.sub(pattern, replacement, str(value))")
595
+ lines.append("")
596
+ lines.append("")
597
+ lines.append("def get_date_part(part, date_val):")
598
+ lines.append(' """Informatica GET_DATE_PART equivalent."""')
599
+ lines.append(" if date_val is None:")
600
+ lines.append(" return None")
601
+ lines.append(" if isinstance(date_val, str):")
602
+ lines.append(" date_val = datetime.fromisoformat(date_val)")
603
+ lines.append(" part_map = {'YYYY': date_val.year, 'YY': date_val.year % 100,")
604
+ lines.append(" 'MM': date_val.month, 'DD': date_val.day,")
605
+ lines.append(" 'HH': date_val.hour, 'MI': date_val.minute, 'SS': date_val.second}")
606
+ lines.append(" return part_map.get(part.upper(), None)")
607
+ lines.append("")
608
+ lines.append("")
609
+ lines.append("def add_to_date(date_val, amount, part='DD'):")
610
+ lines.append(' """Informatica ADD_TO_DATE equivalent."""')
611
+ lines.append(" from datetime import timedelta")
612
+ lines.append(" if date_val is None:")
613
+ lines.append(" return None")
614
+ lines.append(" if isinstance(date_val, str):")
615
+ lines.append(" date_val = datetime.fromisoformat(date_val)")
616
+ lines.append(" if part.upper() in ('DD', 'D', 'DAY', 'DDD'):")
617
+ lines.append(" return date_val + timedelta(days=amount)")
618
+ lines.append(" elif part.upper() in ('HH', 'HH24', 'HOUR'):")
619
+ lines.append(" return date_val + timedelta(hours=amount)")
620
+ lines.append(" elif part.upper() in ('MI', 'MIN', 'MINUTE'):")
621
+ lines.append(" return date_val + timedelta(minutes=amount)")
622
+ lines.append(" elif part.upper() in ('SS', 'SEC', 'SECOND'):")
623
+ lines.append(" return date_val + timedelta(seconds=amount)")
624
+ lines.append(" elif part.upper() in ('MM', 'MON', 'MONTH'):")
625
+ lines.append(" month = date_val.month + amount")
626
+ lines.append(" year = date_val.year + (month - 1) // 12")
627
+ lines.append(" month = (month - 1) % 12 + 1")
628
+ lines.append(" return date_val.replace(year=year, month=month)")
629
+ lines.append(" elif part.upper() in ('YYYY', 'YY', 'YEAR'):")
630
+ lines.append(" return date_val.replace(year=date_val.year + amount)")
631
+ lines.append(" return date_val")
632
+ lines.append("")
633
+ lines.append("")
634
+ lines.append("def raise_error(message):")
635
+ lines.append(' """Informatica ERROR function equivalent."""')
636
+ lines.append(" logger.error(f'INFORMATICA ERROR: {message}')")
637
+ lines.append(" raise RuntimeError(message)")
638
+ lines.append("")
639
+ lines.append("")
640
+ lines.append("def abort_func(message):")
641
+ lines.append(' """Informatica ABORT function equivalent."""')
642
+ lines.append(" logger.critical(f'INFORMATICA ABORT: {message}')")
643
+ lines.append(" raise SystemExit(message)")
644
+ lines.append("")
645
+ lines.append("")
646
+ lines.append("def lookup_func(table, condition, *fields):")
647
+ lines.append(' """Placeholder for Informatica LOOKUP function."""')
648
+ lines.append(" logger.warning(f'LOOKUP called for table {table} - implement in mapping-specific code')")
649
+ lines.append(" return None")
650
+ lines.append("")
651
+ lines.append("")
652
+ lines.append("def get_variable(var_name):")
653
+ lines.append(' """Get workflow/mapping variable value."""')
654
+ lines.append(" return os.environ.get(f'INFA_VAR_{var_name}', '')")
655
+ lines.append("")
656
+ lines.append("")
657
+ lines.append("def set_variable(var_name, value):")
658
+ lines.append(' """Set workflow/mapping variable value."""')
659
+ lines.append(" os.environ[f'INFA_VAR_{var_name}'] = str(value)")
660
+ lines.append(" return value")
661
+ lines.append("")
662
+ lines.append("")
663
+ lines.append("current_timestamp = datetime.now")
664
+ lines.append("session_start_time = datetime.now")
665
+ lines.append("")
666
+ lines.append("")
667
+
668
+
669
+ def _add_utility_functions(lines):
670
+ lines.append("# ============================================================")
671
+ lines.append("# Utility Functions")
672
+ lines.append("# ============================================================")
673
+ lines.append("")
674
+ lines.append("")
675
+ lines.append("def log_mapping_start(mapping_name):")
676
+ lines.append(' """Log the start of a mapping execution."""')
677
+ lines.append(" logger.info(f'=== Starting mapping: {mapping_name} ===')")
678
+ lines.append(" return datetime.now()")
679
+ lines.append("")
680
+ lines.append("")
681
+ lines.append("def log_mapping_end(mapping_name, start_time, row_count=0):")
682
+ lines.append(' """Log the end of a mapping execution."""')
683
+ lines.append(" elapsed = (datetime.now() - start_time).total_seconds()")
684
+ lines.append(" logger.info(f'=== Completed mapping: {mapping_name} in {elapsed:.2f}s ({row_count} rows) ===')")
685
+ lines.append("")
686
+ lines.append("")
687
+ lines.append("def validate_row_count(df, mapping_name, min_rows=0):")
688
+ lines.append(' """Validate row count after transformation."""')
689
+ lines.append(" count = len(df) if hasattr(df, '__len__') else 0")
690
+ lines.append(" if count < min_rows:")
691
+ lines.append(" logger.warning(f'{mapping_name}: Expected at least {min_rows} rows, got {count}')")
692
+ lines.append(" return count")
693
+ lines.append("")