informatica-python 1.3.1__tar.gz → 1.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {informatica_python-1.3.1 → informatica_python-1.4.1}/PKG-INFO +1 -1
  2. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/mapping_gen.py +173 -19
  3. informatica_python-1.4.1/informatica_python/generators/workflow_gen.py +388 -0
  4. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/PKG-INFO +1 -1
  5. {informatica_python-1.3.1 → informatica_python-1.4.1}/pyproject.toml +1 -1
  6. {informatica_python-1.3.1 → informatica_python-1.4.1}/tests/test_converter.py +197 -0
  7. informatica_python-1.3.1/informatica_python/generators/workflow_gen.py +0 -288
  8. {informatica_python-1.3.1 → informatica_python-1.4.1}/README.md +0 -0
  9. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/__init__.py +0 -0
  10. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/cli.py +0 -0
  11. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/converter.py +0 -0
  12. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/__init__.py +0 -0
  13. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/config_gen.py +0 -0
  14. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/error_log_gen.py +0 -0
  15. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/helper_gen.py +0 -0
  16. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/generators/sql_gen.py +0 -0
  17. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/models.py +0 -0
  18. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python/utils/expression_converter.py +0 -0
  22. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/SOURCES.txt +0 -0
  23. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  24. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/entry_points.txt +0 -0
  25. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/requires.txt +0 -0
  26. {informatica_python-1.3.1 → informatica_python-1.4.1}/informatica_python.egg-info/top_level.txt +0 -0
  27. {informatica_python-1.3.1 → informatica_python-1.4.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.3.1
3
+ Version: 1.4.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -135,6 +135,46 @@ def _inline_mapplets(mapping, folder):
135
135
  return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
136
136
 
137
137
 
138
+ def _build_session_conn_overrides(mapping, folder):
139
+ overrides = {}
140
+ for session in folder.sessions:
141
+ if session.mapping_name != mapping.name:
142
+ continue
143
+ for sti in session.transform_instances:
144
+ inst_name = sti.instance_name or sti.transformation_name
145
+ for conn_ref in sti.connections:
146
+ conn_key = conn_ref.connection_name or conn_ref.variable
147
+ if conn_key:
148
+ overrides[inst_name] = {
149
+ "connection_name": conn_ref.connection_name,
150
+ "connection_type": conn_ref.connection_type,
151
+ "connection_subtype": conn_ref.connection_subtype,
152
+ "variable": conn_ref.variable,
153
+ }
154
+ for attr in sti.attributes:
155
+ if attr.name == "Connection Information" and attr.value:
156
+ if inst_name not in overrides:
157
+ overrides[inst_name] = {}
158
+ overrides[inst_name]["connection_info"] = attr.value
159
+ elif attr.name == "Source File Directory" and attr.value:
160
+ if inst_name not in overrides:
161
+ overrides[inst_name] = {}
162
+ overrides[inst_name]["source_file_directory"] = attr.value
163
+ elif attr.name == "Source filename" and attr.value:
164
+ if inst_name not in overrides:
165
+ overrides[inst_name] = {}
166
+ overrides[inst_name]["source_filename"] = attr.value
167
+ elif attr.name == "Output File Directory" and attr.value:
168
+ if inst_name not in overrides:
169
+ overrides[inst_name] = {}
170
+ overrides[inst_name]["output_file_directory"] = attr.value
171
+ elif attr.name == "Output filename" and attr.value:
172
+ if inst_name not in overrides:
173
+ overrides[inst_name] = {}
174
+ overrides[inst_name]["output_filename"] = attr.value
175
+ return overrides
176
+
177
+
138
178
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
139
179
  data_lib: str = "pandas", mapping_index: int = 1) -> str:
140
180
  lines = []
@@ -164,6 +204,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
164
204
  transform_map = {t.name: t for t in all_transforms}
165
205
  connector_graph = _build_connector_graph(all_connectors)
166
206
  instance_map = {i.name: i for i in mapping.instances}
207
+ session_overrides = _build_session_conn_overrides(mapping, folder)
167
208
 
168
209
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
169
210
  lines.append(f' """Execute mapping: {mapping.name}"""')
@@ -180,6 +221,11 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
180
221
  lines.append(f" {safe_var} = {default}")
181
222
  lines.append("")
182
223
 
224
+ if session_overrides:
225
+ lines.append(" # Session connection overrides")
226
+ lines.append(f" _sess_overrides = {repr(session_overrides)}")
227
+ lines.append("")
228
+
183
229
  source_dfs = {}
184
230
  for src_name, src_def in source_map.items():
185
231
  safe = _safe_name(src_name)
@@ -189,13 +235,23 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
189
235
  if t.type in ("Source Qualifier", "Application Source Qualifier")]
190
236
  if sq_transforms:
191
237
  for sq in sq_transforms:
192
- _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map)
238
+ _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
193
239
  else:
194
240
  for src_name, src_def in source_map.items():
195
241
  safe = _safe_name(src_name)
242
+ override = session_overrides.get(src_name, {})
196
243
  lines.append(f" # Read source: {src_name}")
197
- if src_def.database_type and src_def.database_type != "Flat File":
198
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
244
+ if override.get("source_file_directory") or override.get("source_filename"):
245
+ src_dir = override.get("source_file_directory", ".")
246
+ src_file = override.get("source_filename", src_def.name)
247
+ lines.append(f" _src_path_{safe} = config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path',")
248
+ lines.append(f" os.path.join('{src_dir}', '{src_file}'))")
249
+ if src_def.flatfile:
250
+ _emit_flatfile_read(lines, safe, src_def, file_path_override=True)
251
+ else:
252
+ lines.append(f" df_{safe} = read_file(_src_path_{safe}, config.get('sources', {{}}).get('{src_def.name}', {{}}))")
253
+ elif src_def.database_type and src_def.database_type != "Flat File":
254
+ conn_name = override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
199
255
  schema = src_def.owner_name or "dbo"
200
256
  lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
201
257
  elif src_def.flatfile:
@@ -213,7 +269,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
213
269
  _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
214
270
 
215
271
  for tgt_name, tgt_def in target_map.items():
216
- _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map)
272
+ _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
217
273
 
218
274
  lines.append("")
219
275
  lines.append(f" log_mapping_end('{mapping.name}', start_time)")
@@ -267,15 +323,16 @@ def _flatfile_config_dict(ff):
267
323
  return cfg
268
324
 
269
325
 
270
- def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
326
+ def _emit_flatfile_read(lines, var_name, src_def, indent=" ", file_path_override=None):
271
327
  ff = src_def.flatfile
272
328
  fc = _flatfile_config_dict(ff)
329
+ default_path = f"_src_path_{var_name}" if file_path_override else f"config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}')"
273
330
  if fc.get("fixed_width"):
274
331
  widths = []
275
332
  for fld in src_def.fields:
276
333
  widths.append(fld.precision if fld.precision else 10)
277
334
  lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
278
- lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
335
+ lines.append(f"{indent} {default_path},")
279
336
  lines.append(f"{indent} widths={widths},")
280
337
  hdr = fc.get("header_lines", 0)
281
338
  if hdr:
@@ -311,15 +368,22 @@ def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
311
368
  if file_cfg:
312
369
  lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
313
370
  lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('sources', {{}}).get('{src_def.name}', {{}}))")
314
- lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
371
+ if file_path_override:
372
+ lines.append(f"{indent}df_{var_name} = read_file({default_path}, ff_cfg_{var_name})")
373
+ else:
374
+ lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
315
375
  else:
316
- lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
317
- lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
376
+ if file_path_override:
377
+ lines.append(f"{indent}df_{var_name} = read_file({default_path}, config.get('sources', {{}}).get('{src_def.name}', {{}}))")
378
+ else:
379
+ lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
380
+ lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
318
381
 
319
382
 
320
- def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
383
+ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" ", file_path_override=None):
321
384
  ff = tgt_def.flatfile
322
385
  fc = _flatfile_config_dict(ff)
386
+ default_path = f"_tgt_path_{var_name}" if file_path_override else f"config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}')"
323
387
  file_cfg = {}
324
388
  if "delimiter" in fc:
325
389
  file_cfg["delimiter"] = fc["delimiter"]
@@ -331,10 +395,16 @@ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
331
395
  if file_cfg:
332
396
  lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
333
397
  lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
334
- lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
398
+ if file_path_override:
399
+ lines.append(f"{indent}write_file(df_target_{var_name}, {default_path}, ff_cfg_{var_name})")
400
+ else:
401
+ lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
335
402
  else:
336
- lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
337
- lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
403
+ if file_path_override:
404
+ lines.append(f"{indent}write_file(df_target_{var_name}, {default_path}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
405
+ else:
406
+ lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
407
+ lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
338
408
 
339
409
 
340
410
  def _build_source_map(mapping, folder):
@@ -405,7 +475,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
405
475
  return ordered
406
476
 
407
477
 
408
- def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map):
478
+ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
409
479
  sq_safe = _safe_name(sq.name)
410
480
  sql_override = ""
411
481
  pre_sql = ""
@@ -442,7 +512,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
442
512
  if sql_override:
443
513
  src_name = next(iter(connected_sources)) if connected_sources else "source"
444
514
  src_def = source_map.get(src_name, SourceDef(name=src_name))
445
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
515
+ sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
516
+ conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
446
517
 
447
518
  lines.append(f" sql_{sq_safe} = '''")
448
519
  for sql_line in sql_override.strip().split("\n"):
@@ -454,8 +525,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
454
525
  src_name = next(iter(connected_sources))
455
526
  src_def = source_map.get(src_name, SourceDef(name=src_name))
456
527
  safe_src = _safe_name(src_name)
528
+ src_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
457
529
  if src_def.database_type and src_def.database_type != "Flat File":
458
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
530
+ conn_name = src_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
459
531
  schema = src_def.owner_name or "dbo"
460
532
  cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
461
533
  lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
@@ -1101,7 +1173,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
1101
1173
  source_dfs[tx.name] = f"df_{tx_safe}"
1102
1174
 
1103
1175
 
1104
- def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map):
1176
+ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
1105
1177
  tgt_safe = _safe_name(tgt_name)
1106
1178
 
1107
1179
  to_conns = connector_graph.get("to", {}).get(tgt_name, [])
@@ -1133,10 +1205,92 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1133
1205
  else:
1134
1206
  lines.append(f" df_target_{tgt_safe} = {input_df}")
1135
1207
 
1136
- if tgt_def.database_type and tgt_def.database_type != "Flat File":
1137
- lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
1208
+ _emit_type_casting(lines, tgt_safe, tgt_def)
1209
+
1210
+ tgt_override = (session_overrides or {}).get(tgt_name, {})
1211
+ tgt_conn = tgt_override.get("connection_name")
1212
+
1213
+ if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
1214
+ out_dir = tgt_override.get("output_file_directory", ".")
1215
+ out_file = tgt_override.get("output_filename", tgt_def.name)
1216
+ lines.append(f" _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
1217
+ lines.append(f" os.path.join('{out_dir}', '{out_file}'))")
1218
+ if tgt_def.flatfile:
1219
+ _emit_flatfile_write(lines, tgt_safe, tgt_def, file_path_override=True)
1220
+ else:
1221
+ lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1222
+ elif tgt_def.database_type and tgt_def.database_type != "Flat File":
1223
+ conn_label = tgt_conn or "target"
1224
+ lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
1138
1225
  elif tgt_def.flatfile:
1139
1226
  _emit_flatfile_write(lines, tgt_safe, tgt_def)
1140
1227
  else:
1141
1228
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
1142
1229
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1230
+
1231
+
1232
+ CAST_MAP = {
1233
+ "bigint": ("int", "Int64"),
1234
+ "integer": ("int", "Int32"),
1235
+ "int": ("int", "Int32"),
1236
+ "small integer": ("int", "Int16"),
1237
+ "smallint": ("int", "Int16"),
1238
+ "tinyint": ("int", "Int8"),
1239
+ "numeric": ("float", "float64"),
1240
+ "decimal": ("float", "float64"),
1241
+ "float": ("float", "float64"),
1242
+ "double": ("float", "float64"),
1243
+ "real": ("float", "float32"),
1244
+ "money": ("float", "float64"),
1245
+ "smallmoney": ("float", "float64"),
1246
+ "string": ("str", "object"),
1247
+ "nstring": ("str", "object"),
1248
+ "text": ("str", "object"),
1249
+ "ntext": ("str", "object"),
1250
+ "varchar": ("str", "object"),
1251
+ "nvarchar": ("str", "object"),
1252
+ "char": ("str", "object"),
1253
+ "nchar": ("str", "object"),
1254
+ "date/time": ("str", "datetime64[ns]"),
1255
+ "datetime": ("str", "datetime64[ns]"),
1256
+ "datetime2": ("str", "datetime64[ns]"),
1257
+ "date": ("str", "datetime64[ns]"),
1258
+ "timestamp": ("str", "datetime64[ns]"),
1259
+ "bit": ("bool", "boolean"),
1260
+ "boolean": ("bool", "boolean"),
1261
+ }
1262
+
1263
+
1264
+ def _emit_type_casting(lines, tgt_safe, tgt_def):
1265
+ cast_ops = []
1266
+ for fld in tgt_def.fields:
1267
+ dt_key = fld.datatype.lower().strip()
1268
+ if dt_key not in CAST_MAP:
1269
+ continue
1270
+ py_type, pd_dtype = CAST_MAP[dt_key]
1271
+ if pd_dtype in ("datetime64[ns]",):
1272
+ cast_ops.append((fld.name, "datetime", pd_dtype, fld.nullable == "NULL"))
1273
+ elif pd_dtype in ("Int64", "Int32", "Int16", "Int8"):
1274
+ cast_ops.append((fld.name, "int", pd_dtype, fld.nullable == "NULL"))
1275
+ elif pd_dtype in ("float64", "float32"):
1276
+ cast_ops.append((fld.name, "float", pd_dtype, fld.nullable == "NULL"))
1277
+ elif pd_dtype == "boolean":
1278
+ cast_ops.append((fld.name, "bool", pd_dtype, fld.nullable == "NULL"))
1279
+
1280
+ if not cast_ops:
1281
+ return
1282
+
1283
+ lines.append(f" # Type casting for target fields")
1284
+ for col_name, cast_type, pd_dtype, nullable in cast_ops:
1285
+ lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
1286
+ if cast_type == "datetime":
1287
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1288
+ elif cast_type == "int":
1289
+ if nullable:
1290
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').astype('{pd_dtype}')")
1291
+ else:
1292
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').fillna(0).astype(int)")
1293
+ elif cast_type == "float":
1294
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1295
+ elif cast_type == "bool":
1296
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")
@@ -0,0 +1,388 @@
1
+ from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
2
+ from informatica_python.utils.expression_converter import convert_expression
3
+
4
+
5
+ def _is_worklet(wf: WorkflowDef) -> bool:
6
+ for ext in wf.metadata_extensions:
7
+ if ext.name == "is_worklet" and ext.value and ext.value.upper() == "YES":
8
+ return True
9
+ md = getattr(wf, "metadata", {})
10
+ if isinstance(md, dict) and md.get("is_worklet", "").upper() == "YES":
11
+ return True
12
+ return False
13
+
14
+
15
+ def generate_workflow_code(folder: FolderDef) -> str:
16
+ lines = []
17
+ lines.append('"""')
18
+ lines.append(f"Workflow orchestration for folder: {folder.name}")
19
+ lines.append("Auto-generated by informatica-python")
20
+ lines.append('"""')
21
+ lines.append("")
22
+ lines.append("import sys")
23
+ lines.append("import logging")
24
+ lines.append("from datetime import datetime")
25
+ lines.append("from helper_functions import load_config, logger")
26
+ lines.append("")
27
+
28
+ for i, mapping in enumerate(folder.mappings, 1):
29
+ safe_name = _safe_name(mapping.name)
30
+ lines.append(f"from mapping_{i} import run_{safe_name}")
31
+ lines.append("")
32
+ lines.append("")
33
+
34
+ worklets = []
35
+ main_workflows = []
36
+ if folder.workflows:
37
+ for wf in folder.workflows:
38
+ if _is_worklet(wf):
39
+ worklets.append(wf)
40
+ else:
41
+ main_workflows.append(wf)
42
+
43
+ for wkl in worklets:
44
+ _generate_worklet_function(lines, wkl, folder)
45
+
46
+ if main_workflows:
47
+ for wf in main_workflows:
48
+ _generate_workflow_function(lines, wf, folder, worklets)
49
+ else:
50
+ _generate_default_workflow(lines, folder)
51
+
52
+ lines.append("")
53
+ lines.append("if __name__ == '__main__':")
54
+ lines.append(" config = load_config()")
55
+ lines.append(" success = run_workflow(config)")
56
+ lines.append(" sys.exit(0 if success else 1)")
57
+ lines.append("")
58
+
59
+ return "\n".join(lines)
60
+
61
+
62
+ def _generate_worklet_function(lines, wkl: WorkflowDef, folder: FolderDef):
63
+ wkl_safe = _safe_name(wkl.name)
64
+ lines.append(f"def run_worklet_{wkl_safe}(config):")
65
+ lines.append(f' """')
66
+ lines.append(f" Execute worklet: {wkl.name}")
67
+ if wkl.description:
68
+ lines.append(f" Description: {wkl.description}")
69
+ lines.append(f' """')
70
+ lines.append(f" logger.info(f'--- Starting Worklet: {wkl.name} ---')")
71
+ lines.append(f" wkl_start = datetime.now()")
72
+ lines.append(f" success = True")
73
+ lines.append(f" failed_tasks = []")
74
+ lines.append("")
75
+
76
+ if wkl.variables:
77
+ lines.append(" # Worklet Variables")
78
+ for var in wkl.variables:
79
+ var_name = _safe_name(var.name.replace("$$", ""))
80
+ default = var.default_value or "''"
81
+ lines.append(f" {var_name} = {default}")
82
+ lines.append("")
83
+
84
+ execution_order = _get_task_execution_order(wkl)
85
+
86
+ mapping_name_map = {}
87
+ for i, mapping in enumerate(folder.mappings, 1):
88
+ mapping_name_map[mapping.name] = f"run_{_safe_name(mapping.name)}"
89
+
90
+ session_to_mapping = {}
91
+ for session in folder.sessions:
92
+ if session.mapping_name:
93
+ session_to_mapping[session.name] = session.mapping_name
94
+
95
+ for task in execution_order:
96
+ _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wkl, [])
97
+
98
+ lines.append(f" elapsed = (datetime.now() - wkl_start).total_seconds()")
99
+ lines.append(f" if success:")
100
+ lines.append(f" logger.info(f'--- Worklet {wkl.name} completed successfully in {{elapsed:.2f}}s ---')")
101
+ lines.append(f" else:")
102
+ lines.append(f" logger.error(f'--- Worklet {wkl.name} failed in {{elapsed:.2f}}s. Failed tasks: {{failed_tasks}} ---')")
103
+ lines.append(f" return success")
104
+ lines.append("")
105
+ lines.append("")
106
+
107
+
108
+ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, worklets=None):
109
+ wf_safe = _safe_name(wf.name)
110
+ lines.append(f"def run_workflow(config, workflow_name='{wf.name}'):")
111
+ lines.append(f' """')
112
+ lines.append(f" Execute workflow: {wf.name}")
113
+ if wf.description:
114
+ lines.append(f" Description: {wf.description}")
115
+ lines.append(f' """')
116
+ lines.append(f" logger.info(f'=== Starting Workflow: {wf.name} ===')")
117
+ lines.append(f" wf_start = datetime.now()")
118
+ lines.append(f" success = True")
119
+ lines.append(f" failed_tasks = []")
120
+ lines.append("")
121
+
122
+ if wf.variables:
123
+ lines.append(" # Workflow Variables")
124
+ for var in wf.variables:
125
+ var_name = _safe_name(var.name.replace("$$", ""))
126
+ default = var.default_value or "''"
127
+ lines.append(f" {var_name} = {default}")
128
+ lines.append("")
129
+
130
+ execution_order = _get_task_execution_order(wf)
131
+
132
+ mapping_name_map = {}
133
+ for i, mapping in enumerate(folder.mappings, 1):
134
+ mapping_name_map[mapping.name] = f"run_{_safe_name(mapping.name)}"
135
+
136
+ session_to_mapping = {}
137
+ for session in folder.sessions:
138
+ if session.mapping_name:
139
+ session_to_mapping[session.name] = session.mapping_name
140
+
141
+ for task in execution_order:
142
+ _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets or [])
143
+
144
+ lines.append(f" elapsed = (datetime.now() - wf_start).total_seconds()")
145
+ lines.append(f" if success:")
146
+ lines.append(f" logger.info(f'=== Workflow {wf.name} completed successfully in {{elapsed:.2f}}s ===')")
147
+ lines.append(f" else:")
148
+ lines.append(f" logger.error(f'=== Workflow {wf.name} failed in {{elapsed:.2f}}s. Failed tasks: {{failed_tasks}} ===')")
149
+ lines.append(f" return success")
150
+ lines.append("")
151
+
152
+
153
+ def _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets):
154
+ task_safe = _safe_name(task.name)
155
+
156
+ if task.task_type == "Start Task":
157
+ lines.append(f" # Start Task: {task.name}")
158
+ lines.append(f" logger.info('Workflow started')")
159
+ lines.append("")
160
+ return
161
+
162
+ if task.task_type == "Session":
163
+ mapping_name = session_to_mapping.get(task.task_name or task.name, "")
164
+ run_func = mapping_name_map.get(mapping_name, None)
165
+
166
+ lines.append(f" # Session: {task.name}")
167
+ lines.append(f" try:")
168
+ lines.append(f" logger.info('Executing session: {task.name}')")
169
+ if run_func:
170
+ lines.append(f" {run_func}(config)")
171
+ else:
172
+ lines.append(f" # TODO: Map session '{task.name}' to corresponding mapping function")
173
+ lines.append(f" logger.warning('Session {task.name} has no mapped function')")
174
+ lines.append(f" except Exception as e:")
175
+ lines.append(f" logger.error(f'Session {task.name} failed: {{e}}')")
176
+
177
+ if task.fail_parent_if_instance_fails == "YES":
178
+ lines.append(f" success = False")
179
+ lines.append(f" failed_tasks.append('{task.name}')")
180
+ else:
181
+ lines.append(f" logger.warning('Continuing despite failure (fail_parent=NO)')")
182
+ lines.append("")
183
+
184
+ elif task.task_type == "Worklet":
185
+ worklet_name = task.task_name or task.name
186
+ worklet_safe = _safe_name(worklet_name)
187
+ matched_worklet = None
188
+ for wkl in worklets:
189
+ if wkl.name == worklet_name:
190
+ matched_worklet = wkl
191
+ break
192
+
193
+ lines.append(f" # Worklet: {task.name}")
194
+ lines.append(f" try:")
195
+ lines.append(f" logger.info('Executing worklet: {task.name}')")
196
+ if matched_worklet:
197
+ lines.append(f" worklet_result_{task_safe} = run_worklet_{worklet_safe}(config)")
198
+ lines.append(f" if not worklet_result_{task_safe}:")
199
+ lines.append(f" raise RuntimeError('Worklet {worklet_name} returned failure')")
200
+ else:
201
+ lines.append(f" # WARNING: Worklet '{worklet_name}' definition not found in folder")
202
+ lines.append(f" logger.warning('Worklet {worklet_name} not found — skipping')")
203
+ lines.append(f" except Exception as e:")
204
+ lines.append(f" logger.error(f'Worklet {task.name} failed: {{e}}')")
205
+ if task.fail_parent_if_instance_fails == "YES":
206
+ lines.append(f" success = False")
207
+ lines.append(f" failed_tasks.append('{task.name}')")
208
+ else:
209
+ lines.append(f" logger.warning('Continuing despite worklet failure (fail_parent=NO)')")
210
+ lines.append("")
211
+
212
+ elif task.task_type == "Command":
213
+ cmd = ""
214
+ for attr in task.attributes:
215
+ if attr.name in ("Command", "CmdLine"):
216
+ cmd = attr.value
217
+ lines.append(f" # Command Task: {task.name}")
218
+ lines.append(f" try:")
219
+ lines.append(f" import subprocess, shlex")
220
+ if cmd:
221
+ lines.append(f" subprocess.run(shlex.split({repr(cmd)}), check=True)")
222
+ else:
223
+ lines.append(f" # TODO: Configure command for task '{task.name}'")
224
+ lines.append(f" pass")
225
+ lines.append(f" except Exception as e:")
226
+ lines.append(f" logger.error(f'Command task {task.name} failed: {{e}}')")
227
+ if task.fail_parent_if_instance_fails == "YES":
228
+ lines.append(f" success = False")
229
+ lines.append(f" failed_tasks.append('{task.name}')")
230
+ lines.append("")
231
+
232
+ elif task.task_type == "Email Task":
233
+ lines.append(f" # Email Task: {task.name}")
234
+ lines.append(f" # TODO: Implement email notification logic")
235
+ lines.append(f" logger.info('Email task: {task.name} - skipped (implement email logic)')")
236
+ lines.append("")
237
+
238
+ elif task.task_type == "Decision":
239
+ decision_cond = ""
240
+ decision_name = ""
241
+ for attr in task.attributes:
242
+ if attr.name == "Decision Condition":
243
+ decision_cond = attr.value
244
+ elif attr.name == "Decision Name":
245
+ decision_name = attr.value
246
+
247
+ lines.append(f" # Decision Task: {task.name}")
248
+ if decision_cond:
249
+ py_cond = _convert_decision_condition(decision_cond)
250
+ lines.append(f" # Original condition: {decision_cond}")
251
+ lines.append(f" decision_{task_safe} = {py_cond}")
252
+ lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
253
+
254
+ succ_targets = []
255
+ fail_targets = []
256
+ for link in wf.links:
257
+ if link.from_instance == task.name:
258
+ cond_text = (link.condition or "").strip().upper()
259
+ if "FAILED" in cond_text or "FALSE" in cond_text:
260
+ fail_targets.append(link.to_instance)
261
+ elif "SUCCEEDED" in cond_text or "TRUE" in cond_text or cond_text:
262
+ succ_targets.append(link.to_instance)
263
+ else:
264
+ succ_targets.append(link.to_instance)
265
+
266
+ if succ_targets or fail_targets:
267
+ lines.append(f" if decision_{task_safe}:")
268
+ if succ_targets:
269
+ for t in succ_targets:
270
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
271
+ else:
272
+ lines.append(f" pass")
273
+ if fail_targets:
274
+ lines.append(f" else:")
275
+ for t in fail_targets:
276
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
277
+ else:
278
+ lines.append(f" if not decision_{task_safe}:")
279
+ lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
280
+ else:
281
+ lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
282
+ lines.append("")
283
+
284
+ elif task.task_type == "Timer":
285
+ lines.append(f" # Timer Task: {task.name}")
286
+ lines.append(f" import time")
287
+ lines.append(f" # TODO: Configure timer delay")
288
+ lines.append(f" logger.info('Timer task: {task.name}')")
289
+ lines.append("")
290
+
291
+ elif task.task_type == "Assignment":
292
+ lines.append(f" # Assignment Task: {task.name}")
293
+ for attr in task.attributes:
294
+ if attr.name and attr.value:
295
+ lines.append(f" # {attr.name} = {attr.value}")
296
+ lines.append(f" logger.info('Assignment task: {task.name}')")
297
+ lines.append("")
298
+
299
+ else:
300
+ lines.append(f" # Task: {task.name} (Type: {task.task_type})")
301
+ lines.append(f" logger.info('Executing task: {task.name} ({task.task_type})')")
302
+ lines.append(f" # TODO: Implement task logic")
303
+ lines.append("")
304
+
305
+
306
+ def _generate_default_workflow(lines, folder):
307
+ lines.append("def run_workflow(config, workflow_name='default'):")
308
+ lines.append(' """Execute all mappings in order."""')
309
+ lines.append(f" logger.info('=== Starting Default Workflow ===')")
310
+ lines.append(f" wf_start = datetime.now()")
311
+ lines.append(f" success = True")
312
+ lines.append("")
313
+
314
+ for i, mapping in enumerate(folder.mappings, 1):
315
+ safe_name = _safe_name(mapping.name)
316
+ lines.append(f" try:")
317
+ lines.append(f" logger.info('Executing mapping {i}: {mapping.name}')")
318
+ lines.append(f" run_{safe_name}(config)")
319
+ lines.append(f" except Exception as e:")
320
+ lines.append(f" logger.error(f'Mapping {mapping.name} failed: {{e}}')")
321
+ lines.append(f" success = False")
322
+ lines.append("")
323
+
324
+ lines.append(f" elapsed = (datetime.now() - wf_start).total_seconds()")
325
+ lines.append(f" logger.info(f'=== Workflow completed in {{elapsed:.2f}}s (success={{success}}) ===')")
326
+ lines.append(f" return success")
327
+ lines.append("")
328
+
329
+
330
+ def _get_task_execution_order(wf: WorkflowDef):
331
+ if not wf.task_instances:
332
+ return []
333
+
334
+ task_map = {t.name: t for t in wf.task_instances}
335
+ adj = {t.name: [] for t in wf.task_instances}
336
+ in_degree = {t.name: 0 for t in wf.task_instances}
337
+
338
+ for link in wf.links:
339
+ if link.from_instance in adj and link.to_instance in adj:
340
+ adj[link.from_instance].append(link.to_instance)
341
+ in_degree[link.to_instance] += 1
342
+
343
+ queue = []
344
+ for name, deg in in_degree.items():
345
+ if deg == 0:
346
+ queue.append(name)
347
+
348
+ ordered = []
349
+ while queue:
350
+ queue.sort()
351
+ node = queue.pop(0)
352
+ ordered.append(task_map[node])
353
+ for neighbor in adj.get(node, []):
354
+ in_degree[neighbor] -= 1
355
+ if in_degree[neighbor] == 0:
356
+ queue.append(neighbor)
357
+
358
+ for t in wf.task_instances:
359
+ if t not in ordered:
360
+ ordered.append(t)
361
+
362
+ return ordered
363
+
364
+
365
+ def _convert_decision_condition(condition):
366
+ import re
367
+ cond = condition.strip()
368
+ cond = re.sub(r'\$\$(\w+)', r'\1', cond)
369
+ cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
370
+ cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
371
+ cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
372
+ cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
373
+ cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
374
+ cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
375
+ cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
376
+ cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
377
+ cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
378
+ cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
379
+ cond = cond.replace('<>', '!=')
380
+ return cond
381
+
382
+
383
+ def _safe_name(name):
384
+ import re
385
+ safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
386
+ if safe and safe[0].isdigit():
387
+ safe = '_' + safe
388
+ return safe.lower()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.3.1
3
+ Version: 1.4.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.3.1"
7
+ version = "1.4.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -776,6 +776,200 @@ def test_inline_mapplet():
776
776
  print("PASS: test_inline_mapplet")
777
777
 
778
778
 
779
+ def test_session_connection_overrides():
780
+ from informatica_python.models import (
781
+ MappingDef, FolderDef, SourceDef, TargetDef,
782
+ TransformationDef, ConnectorDef, InstanceDef, FieldDef,
783
+ SessionDef, SessionTransformInst, ConnectionRef, TableAttribute,
784
+ )
785
+ from informatica_python.generators.mapping_gen import generate_mapping_code
786
+
787
+ mapping = MappingDef(
788
+ name="m_sess_test",
789
+ transformations=[
790
+ TransformationDef(name="SQ_SRC", type="Source Qualifier",
791
+ fields=[FieldDef(name="ID", datatype="integer", porttype="INPUT/OUTPUT")]),
792
+ ],
793
+ connectors=[
794
+ ConnectorDef(from_instance="MY_SRC", from_field="ID",
795
+ from_instance_type="Source Definition",
796
+ to_instance="SQ_SRC", to_field="ID",
797
+ to_instance_type="Source Qualifier"),
798
+ ConnectorDef(from_instance="SQ_SRC", from_field="ID",
799
+ from_instance_type="Source Qualifier",
800
+ to_instance="MY_TGT", to_field="ID",
801
+ to_instance_type="Target Definition"),
802
+ ],
803
+ instances=[
804
+ InstanceDef(name="MY_SRC", type="Source Definition", transformation_name="MY_SRC"),
805
+ InstanceDef(name="SQ_SRC", type="Source Qualifier"),
806
+ InstanceDef(name="MY_TGT", type="Target Definition", transformation_name="MY_TGT"),
807
+ ],
808
+ )
809
+ src = SourceDef(name="MY_SRC", database_type="Oracle", db_name="SRC_DB",
810
+ owner_name="EDW",
811
+ fields=[FieldDef(name="ID", datatype="integer")])
812
+ tgt = TargetDef(name="MY_TGT", database_type="Oracle",
813
+ fields=[FieldDef(name="ID", datatype="integer")])
814
+
815
+ session = SessionDef(
816
+ name="s_m_sess_test",
817
+ mapping_name="m_sess_test",
818
+ transform_instances=[
819
+ SessionTransformInst(
820
+ instance_name="SQ_SRC",
821
+ transformation_name="SQ_SRC",
822
+ connections=[
823
+ ConnectionRef(connection_name="PROD_ORA", connection_type="Oracle"),
824
+ ],
825
+ ),
826
+ SessionTransformInst(
827
+ instance_name="MY_TGT",
828
+ transformation_name="MY_TGT",
829
+ attributes=[
830
+ TableAttribute(name="Output File Directory", value="/data/output"),
831
+ TableAttribute(name="Output filename", value="result.csv"),
832
+ ],
833
+ ),
834
+ ],
835
+ )
836
+
837
+ folder = FolderDef(
838
+ name="test",
839
+ sources=[src],
840
+ targets=[tgt],
841
+ mappings=[mapping],
842
+ sessions=[session],
843
+ )
844
+ code = generate_mapping_code(mapping, folder)
845
+
846
+ assert "PROD_ORA" in code, "Session connection override should use PROD_ORA"
847
+ assert "_sess_overrides" in code, "Session overrides dict should be emitted"
848
+ assert "/data/output" in code, "Output file directory override should appear"
849
+ assert "result.csv" in code, "Output filename override should appear"
850
+ print("PASS: test_session_connection_overrides")
851
+
852
+
853
+ def test_worklet_generation():
854
+ from informatica_python.models import (
855
+ FolderDef, WorkflowDef, TaskInstanceDef, WorkflowLink,
856
+ MetadataExtension,
857
+ )
858
+ from informatica_python.generators.workflow_gen import generate_workflow_code
859
+
860
+ worklet = WorkflowDef(
861
+ name="wklt_error_handler",
862
+ description="Error handling worklet",
863
+ metadata_extensions=[
864
+ MetadataExtension(name="is_worklet", value="YES"),
865
+ ],
866
+ task_instances=[
867
+ TaskInstanceDef(name="Start_wklt", task_type="Start Task"),
868
+ TaskInstanceDef(name="CMD_LOG", task_type="Command",
869
+ attributes=[]),
870
+ ],
871
+ links=[
872
+ WorkflowLink(from_instance="Start_wklt", to_instance="CMD_LOG", condition=""),
873
+ ],
874
+ )
875
+
876
+ main_wf = WorkflowDef(
877
+ name="wf_main",
878
+ task_instances=[
879
+ TaskInstanceDef(name="Start_main", task_type="Start Task"),
880
+ TaskInstanceDef(name="WK_ERR", task_type="Worklet",
881
+ task_name="wklt_error_handler"),
882
+ ],
883
+ links=[
884
+ WorkflowLink(from_instance="Start_main", to_instance="WK_ERR", condition=""),
885
+ ],
886
+ )
887
+
888
+ folder = FolderDef(
889
+ name="test",
890
+ workflows=[worklet, main_wf],
891
+ )
892
+ code = generate_workflow_code(folder)
893
+
894
+ assert "def run_worklet_wklt_error_handler(config):" in code, \
895
+ "Worklet function should be generated"
896
+ assert "run_worklet_wklt_error_handler(config)" in code, \
897
+ "Main workflow should call the worklet function"
898
+ assert "Worklet: WK_ERR" in code, \
899
+ "Worklet task should appear in workflow"
900
+ print("PASS: test_worklet_generation")
901
+
902
+
903
+ def test_type_casting_at_target():
904
+ from informatica_python.models import (
905
+ MappingDef, FolderDef, SourceDef, TargetDef,
906
+ TransformationDef, ConnectorDef, InstanceDef, FieldDef,
907
+ )
908
+ from informatica_python.generators.mapping_gen import generate_mapping_code
909
+
910
+ mapping = MappingDef(
911
+ name="m_cast_test",
912
+ transformations=[
913
+ TransformationDef(name="SQ_DATA", type="Source Qualifier",
914
+ fields=[
915
+ FieldDef(name="AMOUNT", datatype="decimal", porttype="INPUT/OUTPUT"),
916
+ FieldDef(name="REC_ID", datatype="bigint", porttype="INPUT/OUTPUT"),
917
+ FieldDef(name="CREATED", datatype="date/time", porttype="INPUT/OUTPUT"),
918
+ FieldDef(name="IS_ACTIVE", datatype="bit", porttype="INPUT/OUTPUT"),
919
+ ]),
920
+ ],
921
+ connectors=[
922
+ ConnectorDef(from_instance="SRC", from_field="AMOUNT",
923
+ from_instance_type="Source Definition",
924
+ to_instance="SQ_DATA", to_field="AMOUNT",
925
+ to_instance_type="Source Qualifier"),
926
+ ConnectorDef(from_instance="SQ_DATA", from_field="AMOUNT",
927
+ from_instance_type="Source Qualifier",
928
+ to_instance="TGT", to_field="AMOUNT",
929
+ to_instance_type="Target Definition"),
930
+ ConnectorDef(from_instance="SQ_DATA", from_field="REC_ID",
931
+ from_instance_type="Source Qualifier",
932
+ to_instance="TGT", to_field="REC_ID",
933
+ to_instance_type="Target Definition"),
934
+ ConnectorDef(from_instance="SQ_DATA", from_field="CREATED",
935
+ from_instance_type="Source Qualifier",
936
+ to_instance="TGT", to_field="CREATED",
937
+ to_instance_type="Target Definition"),
938
+ ConnectorDef(from_instance="SQ_DATA", from_field="IS_ACTIVE",
939
+ from_instance_type="Source Qualifier",
940
+ to_instance="TGT", to_field="IS_ACTIVE",
941
+ to_instance_type="Target Definition"),
942
+ ],
943
+ instances=[
944
+ InstanceDef(name="SRC", type="Source Definition", transformation_name="SRC"),
945
+ InstanceDef(name="SQ_DATA", type="Source Qualifier"),
946
+ InstanceDef(name="TGT", type="Target Definition", transformation_name="TGT"),
947
+ ],
948
+ )
949
+ src = SourceDef(name="SRC", fields=[
950
+ FieldDef(name="AMOUNT", datatype="decimal"),
951
+ FieldDef(name="REC_ID", datatype="bigint"),
952
+ FieldDef(name="CREATED", datatype="date/time"),
953
+ FieldDef(name="IS_ACTIVE", datatype="bit"),
954
+ ])
955
+ tgt = TargetDef(name="TGT", fields=[
956
+ FieldDef(name="AMOUNT", datatype="decimal"),
957
+ FieldDef(name="REC_ID", datatype="bigint", nullable="NOT NULL"),
958
+ FieldDef(name="PARENT_ID", datatype="bigint", nullable="NULL"),
959
+ FieldDef(name="CREATED", datatype="date/time"),
960
+ FieldDef(name="IS_ACTIVE", datatype="bit"),
961
+ ])
962
+ folder = FolderDef(name="test", sources=[src], targets=[tgt], mappings=[mapping])
963
+ code = generate_mapping_code(mapping, folder)
964
+
965
+ assert "pd.to_numeric" in code, "Numeric casting should use pd.to_numeric"
966
+ assert "Int64" in code, "Bigint should cast to Int64 (nullable)"
967
+ assert "pd.to_datetime" in code, "Date/time should use pd.to_datetime"
968
+ assert ".fillna(0).astype(int)" in code, "NOT NULL bigint should use fillna(0).astype(int)"
969
+ assert "Type casting for target fields" in code, "Type casting comment should appear"
970
+ print("PASS: test_type_casting_at_target")
971
+
972
+
779
973
  if __name__ == "__main__":
780
974
  print("=" * 60)
781
975
  print("Running informatica-python tests")
@@ -805,6 +999,9 @@ if __name__ == "__main__":
805
999
  test_rank_with_groupby,
806
1000
  test_decision_task_if_else,
807
1001
  test_inline_mapplet,
1002
+ test_session_connection_overrides,
1003
+ test_worklet_generation,
1004
+ test_type_casting_at_target,
808
1005
  ]
809
1006
 
810
1007
  passed = 0
@@ -1,288 +0,0 @@
1
- from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
2
- from informatica_python.utils.expression_converter import convert_expression
3
-
4
-
5
- def generate_workflow_code(folder: FolderDef) -> str:
6
- lines = []
7
- lines.append('"""')
8
- lines.append(f"Workflow orchestration for folder: {folder.name}")
9
- lines.append("Auto-generated by informatica-python")
10
- lines.append('"""')
11
- lines.append("")
12
- lines.append("import sys")
13
- lines.append("import logging")
14
- lines.append("from datetime import datetime")
15
- lines.append("from helper_functions import load_config, logger")
16
- lines.append("")
17
-
18
- for i, mapping in enumerate(folder.mappings, 1):
19
- safe_name = _safe_name(mapping.name)
20
- lines.append(f"from mapping_{i} import run_{safe_name}")
21
- lines.append("")
22
- lines.append("")
23
-
24
- if folder.workflows:
25
- for wf in folder.workflows:
26
- _generate_workflow_function(lines, wf, folder)
27
- else:
28
- _generate_default_workflow(lines, folder)
29
-
30
- lines.append("")
31
- lines.append("if __name__ == '__main__':")
32
- lines.append(" config = load_config()")
33
- lines.append(" success = run_workflow(config)")
34
- lines.append(" sys.exit(0 if success else 1)")
35
- lines.append("")
36
-
37
- return "\n".join(lines)
38
-
39
-
40
- def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
41
- wf_safe = _safe_name(wf.name)
42
- lines.append(f"def run_workflow(config, workflow_name='{wf.name}'):")
43
- lines.append(f' """')
44
- lines.append(f" Execute workflow: {wf.name}")
45
- if wf.description:
46
- lines.append(f" Description: {wf.description}")
47
- lines.append(f' """')
48
- lines.append(f" logger.info(f'=== Starting Workflow: {wf.name} ===')")
49
- lines.append(f" wf_start = datetime.now()")
50
- lines.append(f" success = True")
51
- lines.append(f" failed_tasks = []")
52
- lines.append("")
53
-
54
- if wf.variables:
55
- lines.append(" # Workflow Variables")
56
- for var in wf.variables:
57
- var_name = _safe_name(var.name.replace("$$", ""))
58
- default = var.default_value or "''"
59
- lines.append(f" {var_name} = {default}")
60
- lines.append("")
61
-
62
- execution_order = _get_task_execution_order(wf)
63
-
64
- mapping_name_map = {}
65
- for i, mapping in enumerate(folder.mappings, 1):
66
- mapping_name_map[mapping.name] = f"run_{_safe_name(mapping.name)}"
67
-
68
- session_to_mapping = {}
69
- for session in folder.sessions:
70
- if session.mapping_name:
71
- session_to_mapping[session.name] = session.mapping_name
72
-
73
- for task in execution_order:
74
- task_safe = _safe_name(task.name)
75
-
76
- if task.task_type == "Start Task":
77
- lines.append(f" # Start Task: {task.name}")
78
- lines.append(f" logger.info('Workflow started')")
79
- lines.append("")
80
- continue
81
-
82
- if task.task_type == "Session":
83
- mapping_name = session_to_mapping.get(task.task_name or task.name, "")
84
- run_func = mapping_name_map.get(mapping_name, None)
85
-
86
- lines.append(f" # Session: {task.name}")
87
- lines.append(f" try:")
88
- lines.append(f" logger.info('Executing session: {task.name}')")
89
- if run_func:
90
- lines.append(f" {run_func}(config)")
91
- else:
92
- lines.append(f" # TODO: Map session '{task.name}' to corresponding mapping function")
93
- lines.append(f" logger.warning('Session {task.name} has no mapped function')")
94
- lines.append(f" except Exception as e:")
95
- lines.append(f" logger.error(f'Session {task.name} failed: {{e}}')")
96
-
97
- if task.fail_parent_if_instance_fails == "YES":
98
- lines.append(f" success = False")
99
- lines.append(f" failed_tasks.append('{task.name}')")
100
- else:
101
- lines.append(f" logger.warning('Continuing despite failure (fail_parent=NO)')")
102
- lines.append("")
103
-
104
- elif task.task_type == "Command":
105
- cmd = ""
106
- for attr in task.attributes:
107
- if attr.name in ("Command", "CmdLine"):
108
- cmd = attr.value
109
- lines.append(f" # Command Task: {task.name}")
110
- lines.append(f" try:")
111
- lines.append(f" import subprocess, shlex")
112
- if cmd:
113
- lines.append(f" subprocess.run(shlex.split({repr(cmd)}), check=True)")
114
- else:
115
- lines.append(f" # TODO: Configure command for task '{task.name}'")
116
- lines.append(f" pass")
117
- lines.append(f" except Exception as e:")
118
- lines.append(f" logger.error(f'Command task {task.name} failed: {{e}}')")
119
- if task.fail_parent_if_instance_fails == "YES":
120
- lines.append(f" success = False")
121
- lines.append(f" failed_tasks.append('{task.name}')")
122
- lines.append("")
123
-
124
- elif task.task_type == "Email Task":
125
- lines.append(f" # Email Task: {task.name}")
126
- lines.append(f" # TODO: Implement email notification logic")
127
- lines.append(f" logger.info('Email task: {task.name} - skipped (implement email logic)')")
128
- lines.append("")
129
-
130
- elif task.task_type == "Decision":
131
- decision_cond = ""
132
- decision_name = ""
133
- for attr in task.attributes:
134
- if attr.name == "Decision Condition":
135
- decision_cond = attr.value
136
- elif attr.name == "Decision Name":
137
- decision_name = attr.value
138
-
139
- lines.append(f" # Decision Task: {task.name}")
140
- if decision_cond:
141
- py_cond = _convert_decision_condition(decision_cond)
142
- lines.append(f" # Original condition: {decision_cond}")
143
- lines.append(f" decision_{task_safe} = {py_cond}")
144
- lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
145
-
146
- succ_targets = []
147
- fail_targets = []
148
- for link in wf.links:
149
- if link.from_instance == task.name:
150
- cond_text = (link.condition or "").strip().upper()
151
- if "FAILED" in cond_text or "FALSE" in cond_text:
152
- fail_targets.append(link.to_instance)
153
- elif "SUCCEEDED" in cond_text or "TRUE" in cond_text or cond_text:
154
- succ_targets.append(link.to_instance)
155
- else:
156
- succ_targets.append(link.to_instance)
157
-
158
- if succ_targets or fail_targets:
159
- lines.append(f" if decision_{task_safe}:")
160
- if succ_targets:
161
- for t in succ_targets:
162
- lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
163
- else:
164
- lines.append(f" pass")
165
- if fail_targets:
166
- lines.append(f" else:")
167
- for t in fail_targets:
168
- lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
169
- else:
170
- lines.append(f" if not decision_{task_safe}:")
171
- lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
172
- else:
173
- lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
174
- lines.append("")
175
-
176
- elif task.task_type == "Timer":
177
- lines.append(f" # Timer Task: {task.name}")
178
- lines.append(f" import time")
179
- lines.append(f" # TODO: Configure timer delay")
180
- lines.append(f" logger.info('Timer task: {task.name}')")
181
- lines.append("")
182
-
183
- elif task.task_type == "Assignment":
184
- lines.append(f" # Assignment Task: {task.name}")
185
- for attr in task.attributes:
186
- if attr.name and attr.value:
187
- lines.append(f" # {attr.name} = {attr.value}")
188
- lines.append(f" logger.info('Assignment task: {task.name}')")
189
- lines.append("")
190
-
191
- else:
192
- lines.append(f" # Task: {task.name} (Type: {task.task_type})")
193
- lines.append(f" logger.info('Executing task: {task.name} ({task.task_type})')")
194
- lines.append(f" # TODO: Implement task logic")
195
- lines.append("")
196
-
197
- lines.append(f" elapsed = (datetime.now() - wf_start).total_seconds()")
198
- lines.append(f" if success:")
199
- lines.append(f" logger.info(f'=== Workflow {wf.name} completed successfully in {{elapsed:.2f}}s ===')")
200
- lines.append(f" else:")
201
- lines.append(f" logger.error(f'=== Workflow {wf.name} failed in {{elapsed:.2f}}s. Failed tasks: {{failed_tasks}} ===')")
202
- lines.append(f" return success")
203
- lines.append("")
204
-
205
-
206
- def _generate_default_workflow(lines, folder):
207
- lines.append("def run_workflow(config, workflow_name='default'):")
208
- lines.append(' """Execute all mappings in order."""')
209
- lines.append(f" logger.info('=== Starting Default Workflow ===')")
210
- lines.append(f" wf_start = datetime.now()")
211
- lines.append(f" success = True")
212
- lines.append("")
213
-
214
- for i, mapping in enumerate(folder.mappings, 1):
215
- safe_name = _safe_name(mapping.name)
216
- lines.append(f" try:")
217
- lines.append(f" logger.info('Executing mapping {i}: {mapping.name}')")
218
- lines.append(f" run_{safe_name}(config)")
219
- lines.append(f" except Exception as e:")
220
- lines.append(f" logger.error(f'Mapping {mapping.name} failed: {{e}}')")
221
- lines.append(f" success = False")
222
- lines.append("")
223
-
224
- lines.append(f" elapsed = (datetime.now() - wf_start).total_seconds()")
225
- lines.append(f" logger.info(f'=== Workflow completed in {{elapsed:.2f}}s (success={{success}}) ===')")
226
- lines.append(f" return success")
227
- lines.append("")
228
-
229
-
230
- def _get_task_execution_order(wf: WorkflowDef):
231
- if not wf.task_instances:
232
- return []
233
-
234
- task_map = {t.name: t for t in wf.task_instances}
235
- adj = {t.name: [] for t in wf.task_instances}
236
- in_degree = {t.name: 0 for t in wf.task_instances}
237
-
238
- for link in wf.links:
239
- if link.from_instance in adj and link.to_instance in adj:
240
- adj[link.from_instance].append(link.to_instance)
241
- in_degree[link.to_instance] += 1
242
-
243
- queue = []
244
- for name, deg in in_degree.items():
245
- if deg == 0:
246
- queue.append(name)
247
-
248
- ordered = []
249
- while queue:
250
- queue.sort()
251
- node = queue.pop(0)
252
- ordered.append(task_map[node])
253
- for neighbor in adj.get(node, []):
254
- in_degree[neighbor] -= 1
255
- if in_degree[neighbor] == 0:
256
- queue.append(neighbor)
257
-
258
- for t in wf.task_instances:
259
- if t not in ordered:
260
- ordered.append(t)
261
-
262
- return ordered
263
-
264
-
265
- def _convert_decision_condition(condition):
266
- import re
267
- cond = condition.strip()
268
- cond = re.sub(r'\$\$(\w+)', r'\1', cond)
269
- cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
270
- cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
271
- cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
272
- cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
273
- cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
274
- cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
275
- cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
276
- cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
277
- cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
278
- cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
279
- cond = cond.replace('<>', '!=')
280
- return cond
281
-
282
-
283
- def _safe_name(name):
284
- import re
285
- safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
286
- if safe and safe[0].isdigit():
287
- safe = '_' + safe
288
- return safe.lower()