informatica-python 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {informatica_python-1.2.0 → informatica_python-1.3.0}/PKG-INFO +1 -1
  2. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/mapping_gen.py +386 -22
  3. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/workflow_gen.py +59 -3
  4. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/expression_converter.py +12 -7
  5. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/PKG-INFO +1 -1
  6. {informatica_python-1.2.0 → informatica_python-1.3.0}/pyproject.toml +1 -1
  7. {informatica_python-1.2.0 → informatica_python-1.3.0}/tests/test_converter.py +275 -1
  8. {informatica_python-1.2.0 → informatica_python-1.3.0}/README.md +0 -0
  9. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/__init__.py +0 -0
  10. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/cli.py +0 -0
  11. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/converter.py +0 -0
  12. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/__init__.py +0 -0
  13. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/config_gen.py +0 -0
  14. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/error_log_gen.py +0 -0
  15. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/helper_gen.py +0 -0
  16. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/sql_gen.py +0 -0
  17. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/models.py +0 -0
  18. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  22. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  23. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/entry_points.txt +0 -0
  24. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/requires.txt +0 -0
  25. {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/top_level.txt +0 -0
  26. {informatica_python-1.2.0 → informatica_python-1.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -1,7 +1,7 @@
1
1
  from typing import List, Dict
2
2
  from informatica_python.models import (
3
3
  MappingDef, FolderDef, SourceDef, TargetDef,
4
- TransformationDef, ConnectorDef, InstanceDef,
4
+ TransformationDef, ConnectorDef, InstanceDef, MappletDef,
5
5
  )
6
6
  from informatica_python.utils.expression_converter import (
7
7
  convert_expression, convert_sql_expression,
@@ -11,6 +11,130 @@ from informatica_python.utils.expression_converter import (
11
11
  from informatica_python.utils.datatype_map import get_python_type
12
12
 
13
13
 
14
+ def _inline_mapplets(mapping, folder):
15
+ mapplet_map = {m.name: m for m in folder.mapplets}
16
+ extra_transforms = []
17
+ extra_connectors = []
18
+ mapplet_instances = set()
19
+
20
+ for inst in mapping.instances:
21
+ if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
22
+ mapplet_name = inst.transformation_name or inst.name
23
+ mapplet = mapplet_map.get(mapplet_name)
24
+ if not mapplet:
25
+ continue
26
+ mapplet_instances.add(inst.name)
27
+ prefix = inst.name
28
+
29
+ for tx in mapplet.transformations:
30
+ inlined = TransformationDef(
31
+ name=f"{prefix}__{tx.name}",
32
+ type=tx.type,
33
+ description=tx.description,
34
+ reusable=tx.reusable,
35
+ fields=list(tx.fields),
36
+ attributes=list(tx.attributes),
37
+ groups=list(tx.groups),
38
+ metadata_extensions=list(tx.metadata_extensions),
39
+ )
40
+ extra_transforms.append(inlined)
41
+
42
+ for conn in mapplet.connectors:
43
+ from informatica_python.models import ConnectorDef
44
+ new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
45
+ new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
46
+ inlined_conn = ConnectorDef(
47
+ from_instance=new_from,
48
+ from_field=conn.from_field,
49
+ from_instance_type=conn.from_instance_type,
50
+ to_instance=new_to,
51
+ to_field=conn.to_field,
52
+ to_instance_type=conn.to_instance_type,
53
+ )
54
+ extra_connectors.append(inlined_conn)
55
+
56
+ rewired_connectors = []
57
+ mapplet_internal_names = set()
58
+ for inst_name in mapplet_instances:
59
+ mapplet_name = None
60
+ for inst in mapping.instances:
61
+ if inst.name == inst_name:
62
+ mapplet_name = inst.transformation_name or inst.name
63
+ break
64
+ mapplet = mapplet_map.get(mapplet_name) if mapplet_name else None
65
+ if mapplet:
66
+ for tx in mapplet.transformations:
67
+ mapplet_internal_names.add(f"{inst_name}__{tx.name}")
68
+
69
+ for conn in mapping.connectors:
70
+ if conn.to_instance in mapplet_instances:
71
+ first_tx = None
72
+ for ec in extra_connectors:
73
+ if ec.from_instance == conn.to_instance or ec.to_instance.startswith(f"{conn.to_instance}__"):
74
+ for et in extra_transforms:
75
+ if et.name.startswith(f"{conn.to_instance}__"):
76
+ has_input = any(
77
+ "INPUT" in (f.porttype or "").upper()
78
+ for f in et.fields
79
+ if f.name == conn.to_field
80
+ )
81
+ if has_input:
82
+ first_tx = et.name
83
+ break
84
+ if first_tx:
85
+ break
86
+ if not first_tx:
87
+ for et in extra_transforms:
88
+ if et.name.startswith(f"{conn.to_instance}__"):
89
+ first_tx = et.name
90
+ break
91
+ if first_tx:
92
+ from informatica_python.models import ConnectorDef
93
+ rewired_connectors.append(ConnectorDef(
94
+ from_instance=conn.from_instance,
95
+ from_field=conn.from_field,
96
+ from_instance_type=conn.from_instance_type,
97
+ to_instance=first_tx,
98
+ to_field=conn.to_field,
99
+ to_instance_type=conn.to_instance_type,
100
+ ))
101
+ else:
102
+ rewired_connectors.append(conn)
103
+ elif conn.from_instance in mapplet_instances:
104
+ last_tx = None
105
+ for et in reversed(extra_transforms):
106
+ if et.name.startswith(f"{conn.from_instance}__"):
107
+ has_output = any(
108
+ "OUTPUT" in (f.porttype or "").upper()
109
+ for f in et.fields
110
+ if f.name == conn.from_field
111
+ )
112
+ if has_output:
113
+ last_tx = et.name
114
+ break
115
+ if not last_tx:
116
+ for et in reversed(extra_transforms):
117
+ if et.name.startswith(f"{conn.from_instance}__"):
118
+ last_tx = et.name
119
+ break
120
+ if last_tx:
121
+ from informatica_python.models import ConnectorDef
122
+ rewired_connectors.append(ConnectorDef(
123
+ from_instance=last_tx,
124
+ from_field=conn.from_field,
125
+ from_instance_type=conn.from_instance_type,
126
+ to_instance=conn.to_instance,
127
+ to_field=conn.to_field,
128
+ to_instance_type=conn.to_instance_type,
129
+ ))
130
+ else:
131
+ rewired_connectors.append(conn)
132
+ else:
133
+ rewired_connectors.append(conn)
134
+
135
+ return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
136
+
137
+
14
138
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
15
139
  data_lib: str = "pandas", mapping_index: int = 1) -> str:
16
140
  lines = []
@@ -24,10 +148,17 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
24
148
  lines.append("")
25
149
  lines.append("")
26
150
 
151
+ inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
152
+
153
+ all_transforms = list(mapping.transformations) + inlined_transforms
154
+ all_connectors = [c for c in mapping.connectors
155
+ if c.from_instance not in mapplet_instance_names
156
+ and c.to_instance not in mapplet_instance_names] + inlined_connectors
157
+
27
158
  source_map = _build_source_map(mapping, folder)
28
159
  target_map = _build_target_map(mapping, folder)
29
- transform_map = {t.name: t for t in mapping.transformations}
30
- connector_graph = _build_connector_graph(mapping.connectors)
160
+ transform_map = {t.name: t for t in all_transforms}
161
+ connector_graph = _build_connector_graph(all_connectors)
31
162
  instance_map = {i.name: i for i in mapping.instances}
32
163
 
33
164
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
@@ -50,7 +181,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
50
181
  safe = _safe_name(src_name)
51
182
  source_dfs[src_name] = f"df_{safe}"
52
183
 
53
- sq_transforms = [t for t in mapping.transformations
184
+ sq_transforms = [t for t in all_transforms
54
185
  if t.type in ("Source Qualifier", "Application Source Qualifier")]
55
186
  if sq_transforms:
56
187
  for sq in sq_transforms:
@@ -63,12 +194,14 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
63
194
  conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
64
195
  schema = src_def.owner_name or "dbo"
65
196
  lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
197
+ elif src_def.flatfile:
198
+ _emit_flatfile_read(lines, safe, src_def)
66
199
  else:
67
200
  lines.append(f" df_{safe} = read_file(config.get('sources', {{}}).get('{src_name}', {{}}).get('file_path', '{src_name}'),")
68
201
  lines.append(f" config.get('sources', {{}}).get('{src_name}', {{}}))")
69
202
  lines.append("")
70
203
 
71
- processing_order = _get_processing_order(mapping.transformations, connector_graph, sq_transforms)
204
+ processing_order = _get_processing_order(all_transforms, connector_graph, sq_transforms)
72
205
 
73
206
  for tx in processing_order:
74
207
  if tx.type in ("Source Qualifier", "Application Source Qualifier"):
@@ -99,6 +232,107 @@ def _safe_name(name):
99
232
  return safe.lower()
100
233
 
101
234
 
235
+ def _flatfile_config_dict(ff):
236
+ cfg = {}
237
+ if not ff:
238
+ return cfg
239
+ if ff.delimiter and ff.delimiter != ",":
240
+ d = ff.delimiter
241
+ DELIMITER_MAP = {
242
+ "COMMA": ",", "TAB": "\\t", "PIPE": "|", "SEMICOLON": ";",
243
+ "SPACE": " ", "TILDE": "~", "CARET": "^",
244
+ }
245
+ d = DELIMITER_MAP.get(d.upper(), d)
246
+ cfg["delimiter"] = d
247
+ if ff.is_fixed_width == "YES":
248
+ cfg["fixed_width"] = True
249
+ if ff.header_lines:
250
+ cfg["header_lines"] = ff.header_lines
251
+ if ff.skip_rows:
252
+ cfg["skip_rows"] = ff.skip_rows
253
+ if ff.text_qualifier:
254
+ cfg["quotechar"] = ff.text_qualifier
255
+ if ff.escape_character:
256
+ cfg["escapechar"] = ff.escape_character
257
+ if ff.strip_trailing_blanks == "YES":
258
+ cfg["strip_trailing_blanks"] = True
259
+ if ff.code_page:
260
+ cfg["encoding"] = ff.code_page
261
+ if ff.row_delimiter:
262
+ cfg["lineterminator"] = ff.row_delimiter
263
+ return cfg
264
+
265
+
266
+ def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
267
+ ff = src_def.flatfile
268
+ fc = _flatfile_config_dict(ff)
269
+ if fc.get("fixed_width"):
270
+ widths = []
271
+ for fld in src_def.fields:
272
+ widths.append(fld.precision if fld.precision else 10)
273
+ lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
274
+ lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
275
+ lines.append(f"{indent} widths={widths},")
276
+ hdr = fc.get("header_lines", 0)
277
+ if hdr:
278
+ lines.append(f"{indent} header={hdr - 1},")
279
+ else:
280
+ lines.append(f"{indent} header=None,")
281
+ skip = fc.get("skip_rows", 0)
282
+ if skip:
283
+ lines.append(f"{indent} skiprows={skip},")
284
+ lines.append(f"{indent})")
285
+ return
286
+
287
+ file_cfg = {}
288
+ if "delimiter" in fc:
289
+ file_cfg["delimiter"] = fc["delimiter"]
290
+ if "quotechar" in fc:
291
+ file_cfg["quotechar"] = fc["quotechar"]
292
+ if "escapechar" in fc:
293
+ file_cfg["escapechar"] = fc["escapechar"]
294
+ if "encoding" in fc:
295
+ file_cfg["encoding"] = fc["encoding"]
296
+ if "lineterminator" in fc:
297
+ file_cfg["lineterminator"] = fc["lineterminator"]
298
+ hdr = fc.get("header_lines", 0)
299
+ if hdr:
300
+ file_cfg["header"] = True
301
+ file_cfg["header_row"] = hdr - 1
302
+ if fc.get("skip_rows"):
303
+ file_cfg["skip_rows"] = fc["skip_rows"]
304
+ if fc.get("strip_trailing_blanks"):
305
+ file_cfg["strip_trailing_blanks"] = True
306
+
307
+ if file_cfg:
308
+ lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
309
+ lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('sources', {{}}).get('{src_def.name}', {{}}))")
310
+ lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
311
+ else:
312
+ lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
313
+ lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
314
+
315
+
316
+ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
317
+ ff = tgt_def.flatfile
318
+ fc = _flatfile_config_dict(ff)
319
+ file_cfg = {}
320
+ if "delimiter" in fc:
321
+ file_cfg["delimiter"] = fc["delimiter"]
322
+ if "quotechar" in fc:
323
+ file_cfg["quotechar"] = fc["quotechar"]
324
+ if "encoding" in fc:
325
+ file_cfg["encoding"] = fc["encoding"]
326
+
327
+ if file_cfg:
328
+ lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
329
+ lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
330
+ lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
331
+ else:
332
+ lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
333
+ lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
334
+
335
+
102
336
  def _build_source_map(mapping, folder):
103
337
  source_map = {}
104
338
  for inst in mapping.instances:
@@ -221,6 +455,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
221
455
  schema = src_def.owner_name or "dbo"
222
456
  cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
223
457
  lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
458
+ elif src_def.flatfile:
459
+ _emit_flatfile_read(lines, sq_safe, src_def)
224
460
  else:
225
461
  lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
226
462
  lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
@@ -232,6 +468,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
232
468
  conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
233
469
  schema = src_def.owner_name or "dbo"
234
470
  lines.append(f" df_{safe_src} = read_from_db(config, 'SELECT * FROM {schema}.{src_def.name}', '{conn_name}')")
471
+ elif src_def.flatfile:
472
+ _emit_flatfile_read(lines, safe_src, src_def)
235
473
  else:
236
474
  lines.append(f" df_{safe_src} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
237
475
  lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
@@ -278,7 +516,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
278
516
  elif tx_type == "sorter":
279
517
  _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
280
518
  elif tx_type in ("joiner",):
281
- _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
519
+ _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
282
520
  elif tx_type in ("lookup procedure", "lookup"):
283
521
  _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
284
522
  elif tx_type == "router":
@@ -410,7 +648,7 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
410
648
  source_dfs[tx.name] = f"df_{tx_safe}"
411
649
 
412
650
 
413
- def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
651
+ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
414
652
  join_type = "inner"
415
653
  join_condition = ""
416
654
  for attr in tx.attributes:
@@ -436,10 +674,31 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
436
674
 
437
675
  left_keys, right_keys = parse_join_condition(join_condition)
438
676
 
677
+ master_src = None
678
+ detail_src = None
679
+ input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
680
+ for conn in input_conns:
681
+ to_field = conn.to_field
682
+ if to_field in master_fields:
683
+ master_src = conn.from_instance
684
+ elif to_field in detail_fields:
685
+ detail_src = conn.from_instance
686
+
439
687
  src_list = list(input_sources)
440
- if len(src_list) >= 2:
441
- df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
442
- df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
688
+ if not master_src and not detail_src and len(src_list) >= 2:
689
+ master_src = src_list[0]
690
+ detail_src = src_list[1]
691
+ elif not master_src and len(src_list) >= 1:
692
+ master_src = src_list[0]
693
+ if not detail_src:
694
+ for s in src_list:
695
+ if s != master_src:
696
+ detail_src = s
697
+ break
698
+
699
+ if master_src and detail_src:
700
+ df_master = source_dfs.get(master_src, f"df_{_safe_name(master_src)}")
701
+ df_detail = source_dfs.get(detail_src, f"df_{_safe_name(detail_src)}")
443
702
 
444
703
  lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
445
704
  if left_keys and right_keys:
@@ -451,9 +710,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
451
710
  lines.append(f" suffixes=('', '_master')")
452
711
  lines.append(f" )")
453
712
  else:
454
- common_cols = []
455
- if master_fields and detail_fields:
456
- common_cols = [f for f in detail_fields if f in master_fields]
713
+ common_cols = [f for f in detail_fields if f in master_fields]
457
714
  if common_cols:
458
715
  lines.append(f" df_{tx_safe} = {df_detail}.merge(")
459
716
  lines.append(f" {df_master},")
@@ -539,9 +796,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
539
796
 
540
797
  drop_cols = [k for k in lookup_keys if k not in input_keys]
541
798
  if drop_cols:
542
- lines.append(f" lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
799
+ lines.append(f" _lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns]")
800
+ lines.append(f" if _lkp_drop:")
801
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
543
802
 
544
803
  for rf in all_output_fields:
804
+ lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
805
+ lines.append(f" df_{tx_safe}['{rf.name}'] = None")
545
806
  if rf.default_value:
546
807
  lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
547
808
  else:
@@ -629,26 +890,127 @@ def _gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs):
629
890
 
630
891
 
631
892
  def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
632
- lines.append(f" # TODO: Normalizer transformation - implement based on specific normalization logic")
633
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
893
+ input_ports = []
894
+ output_ports = []
895
+ occurs_cols = []
896
+ id_cols = []
897
+
898
+ for fld in tx.fields:
899
+ pt = (fld.porttype or "").upper()
900
+ if "INPUT" in pt:
901
+ input_ports.append(fld)
902
+ if "OUTPUT" in pt:
903
+ output_ports.append(fld)
904
+
905
+ for fld in tx.fields:
906
+ if fld.field_number > 0:
907
+ occurs_cols.append(fld.name)
908
+
909
+ if not occurs_cols:
910
+ import re
911
+ base_groups = {}
912
+ for fld in input_ports:
913
+ m = re.match(r'^(.+?)(\d+)$', fld.name)
914
+ if m:
915
+ base = m.group(1)
916
+ idx = int(m.group(2))
917
+ if base not in base_groups:
918
+ base_groups[base] = []
919
+ base_groups[base].append(fld.name)
920
+ else:
921
+ id_cols.append(fld.name)
922
+
923
+ if base_groups:
924
+ longest_group = max(base_groups.values(), key=len)
925
+ occurs_cols = longest_group
926
+ id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
927
+ else:
928
+ for fld in input_ports:
929
+ pt = (fld.porttype or "").upper()
930
+ if "INPUT" in pt and "OUTPUT" in pt:
931
+ id_cols.append(fld.name)
932
+ elif "INPUT" in pt and "OUTPUT" not in pt:
933
+ occurs_cols.append(fld.name)
934
+
935
+ if not id_cols:
936
+ id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
937
+
938
+ gk_field = None
939
+ for fld in output_ports:
940
+ if "GK" in fld.name.upper() or "GENERATED" in fld.name.upper() or "KEY" in fld.name.upper():
941
+ gk_field = fld.name
942
+ break
943
+
944
+ lines.append(f" # Normalizer: unpivot repeated columns into rows")
945
+ if occurs_cols and id_cols:
946
+ lines.append(f" df_{tx_safe} = {input_df}.melt(")
947
+ lines.append(f" id_vars={id_cols},")
948
+ lines.append(f" value_vars={occurs_cols},")
949
+ lines.append(f" var_name='_norm_variable',")
950
+ lines.append(f" value_name='_norm_value'")
951
+ lines.append(f" )")
952
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.dropna(subset=['_norm_value']).reset_index(drop=True)")
953
+ elif occurs_cols:
954
+ lines.append(f" df_{tx_safe} = {input_df}[{occurs_cols}].stack().reset_index(drop=True).to_frame('_norm_value')")
955
+ else:
956
+ lines.append(f" df_{tx_safe} = {input_df}.copy()")
957
+
958
+ if gk_field:
959
+ lines.append(f" df_{tx_safe}['{gk_field}'] = range(1, len(df_{tx_safe}) + 1)")
960
+
634
961
  source_dfs[tx.name] = f"df_{tx_safe}"
635
962
 
636
963
 
637
964
  def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
638
965
  rank_port = None
639
- group_by = []
966
+ group_by_ports = []
967
+ top_bottom = "TOP"
968
+ top_n = 0
969
+
640
970
  for fld in tx.fields:
641
- if "RANK" in fld.name.upper():
971
+ pt = (fld.porttype or "").upper()
972
+ if "INPUT" in pt and "OUTPUT" in pt:
973
+ group_by_ports.append(fld.name)
974
+
975
+ for fld in tx.fields:
976
+ if fld.expression and fld.expression.strip() and fld.name.upper() not in ("RANKINDEX",):
642
977
  rank_port = fld.name
643
- top_bottom = "TOP"
978
+ break
979
+ if not rank_port:
980
+ for fld in tx.fields:
981
+ if fld.name.upper() == "RANKINDEX":
982
+ continue
983
+ pt = (fld.porttype or "").upper()
984
+ if "INPUT" in pt and "OUTPUT" not in pt:
985
+ rank_port = fld.name
986
+ break
987
+
644
988
  for attr in tx.attributes:
645
989
  if attr.name == "Top/Bottom":
646
990
  top_bottom = attr.value
991
+ elif attr.name == "Number Of Ranks":
992
+ try:
993
+ top_n = int(attr.value)
994
+ except (ValueError, TypeError):
995
+ top_n = 0
996
+
997
+ ascending = top_bottom.upper() != "TOP"
647
998
 
648
999
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
649
- if rank_port:
650
- ascending = top_bottom.upper() != "TOP"
651
- lines.append(f" df_{tx_safe}['RANK_INDEX'] = df_{tx_safe}['{rank_port}'].rank(ascending={ascending})")
1000
+ if rank_port and group_by_ports:
1001
+ lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1002
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1003
+ lines.append(f" method='min', ascending={ascending}")
1004
+ lines.append(f" ).astype(int)")
1005
+ if top_n:
1006
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1007
+ elif rank_port:
1008
+ lines.append(f" # Rank by '{rank_port}' (no group-by)")
1009
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending}).astype(int)")
1010
+ if top_n:
1011
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1012
+ else:
1013
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = range(1, len(df_{tx_safe}) + 1)")
652
1014
  source_dfs[tx.name] = f"df_{tx_safe}"
653
1015
 
654
1016
 
@@ -759,6 +1121,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
759
1121
 
760
1122
  if tgt_def.database_type and tgt_def.database_type != "Flat File":
761
1123
  lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
1124
+ elif tgt_def.flatfile:
1125
+ _emit_flatfile_write(lines, tgt_safe, tgt_def)
762
1126
  else:
763
1127
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
764
1128
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
@@ -1,4 +1,5 @@
1
1
  from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
2
+ from informatica_python.utils.expression_converter import convert_expression
2
3
 
3
4
 
4
5
  def generate_workflow_code(folder: FolderDef) -> str:
@@ -127,14 +128,51 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
127
128
  lines.append("")
128
129
 
129
130
  elif task.task_type == "Decision":
130
- lines.append(f" # Decision Task: {task.name}")
131
131
  decision_cond = ""
132
+ decision_name = ""
132
133
  for attr in task.attributes:
133
134
  if attr.name == "Decision Condition":
134
135
  decision_cond = attr.value
136
+ elif attr.name == "Decision Name":
137
+ decision_name = attr.value
138
+
139
+ lines.append(f" # Decision Task: {task.name}")
135
140
  if decision_cond:
136
- lines.append(f" # Condition: {decision_cond}")
137
- lines.append(f" logger.info('Decision task: {task.name}')")
141
+ py_cond = _convert_decision_condition(decision_cond)
142
+ lines.append(f" # Original condition: {decision_cond}")
143
+ lines.append(f" decision_{task_safe} = {py_cond}")
144
+ lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
145
+
146
+ succ_targets = []
147
+ fail_targets = []
148
+ for link in wf.links:
149
+ if link.from_instance == task.name:
150
+ cond_text = (link.condition or "").strip()
151
+ if cond_text and ("$" in cond_text or "SUCCEEDED" in cond_text.upper()
152
+ or "TRUE" in cond_text.upper()):
153
+ succ_targets.append(link.to_instance)
154
+ elif cond_text and ("FAILED" in cond_text.upper()
155
+ or "FALSE" in cond_text.upper()):
156
+ fail_targets.append(link.to_instance)
157
+ else:
158
+ succ_targets.append(link.to_instance)
159
+
160
+ if succ_targets or fail_targets:
161
+ lines.append(f" if decision_{task_safe}:")
162
+ if succ_targets:
163
+ for t in succ_targets:
164
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
165
+ else:
166
+ lines.append(f" pass")
167
+ if fail_targets:
168
+ lines.append(f" else:")
169
+ for t in fail_targets:
170
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
171
+ else:
172
+ lines.append(f" if not decision_{task_safe}:")
173
+ lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
174
+ else:
175
+ lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
138
176
  lines.append("")
139
177
 
140
178
  elif task.task_type == "Timer":
@@ -226,6 +264,24 @@ def _get_task_execution_order(wf: WorkflowDef):
226
264
  return ordered
227
265
 
228
266
 
267
+ def _convert_decision_condition(condition):
268
+ import re
269
+ cond = condition.strip()
270
+ cond = re.sub(r'\$\$(\w+)', r'\1', cond)
271
+ cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
272
+ cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
273
+ cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
274
+ cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
275
+ cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
276
+ cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
277
+ cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
278
+ cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
279
+ cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
280
+ cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
281
+ cond = cond.replace('<>', '!=')
282
+ return cond
283
+
284
+
229
285
  def _safe_name(name):
230
286
  import re
231
287
  safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
@@ -95,11 +95,11 @@ INFA_FUNC_MAP = {
95
95
  }
96
96
 
97
97
 
98
- AGG_FUNC_NAMES = {
99
- "SUM", "COUNT", "AVG", "MAX", "MIN", "MEDIAN",
100
- "STDDEV", "VARIANCE", "PERCENTILE", "FIRST", "LAST",
101
- "MOVINGAVG", "MOVINGSUM", "CUME",
102
- }
98
+ AGG_FUNC_NAMES = [
99
+ "MOVINGAVG", "MOVINGSUM", "PERCENTILE", "VARIANCE",
100
+ "STDDEV", "MEDIAN", "COUNT", "FIRST", "LAST",
101
+ "CUME", "SUM", "AVG", "MAX", "MIN",
102
+ ]
103
103
 
104
104
 
105
105
  def convert_expression(expr):
@@ -131,6 +131,8 @@ def convert_expression(expr):
131
131
 
132
132
  converted = re.sub(r'<>', '!=', converted)
133
133
 
134
+ converted = re.sub(r'(?<![<>!])=(?!=)', '==', converted)
135
+
134
136
  converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
135
137
 
136
138
  converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
@@ -202,8 +204,11 @@ def parse_aggregate_expression(expr):
202
204
  cleaned = expr.strip()
203
205
 
204
206
  for func_name in AGG_FUNC_NAMES:
205
- pattern = re.compile(r'\b' + func_name + r'\s*\(\s*([^)]*)\s*\)', re.IGNORECASE)
206
- match = pattern.search(cleaned)
207
+ pattern = re.compile(
208
+ r'^\s*' + func_name + r'\s*\(\s*([A-Za-z_][A-Za-z0-9_]*|\*)\s*\)\s*$',
209
+ re.IGNORECASE
210
+ )
211
+ match = pattern.match(cleaned)
207
212
  if match:
208
213
  col = match.group(1).strip()
209
214
  return func_name.lower(), col
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.2.0"
7
+ version = "1.3.0"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -239,6 +239,8 @@ def test_expression_converter_expanded():
239
239
 
240
240
  result = convert_expression("IIF(STATUS = 'A', 'Active', 'Inactive')")
241
241
  assert "iif_expr" in result
242
+ assert "==" in result, f"Expected == in result, got: {result}"
243
+ assert "= =" not in result
242
244
 
243
245
  result = convert_expression("DECODE(TYPE, 1, 'One', 2, 'Two', 'Other')")
244
246
  assert "decode_expr" in result
@@ -301,6 +303,17 @@ def test_expression_converter_expanded():
301
303
  result = convert_expression("STATUS <> 'X'")
302
304
  assert "!=" in result
303
305
 
306
+ result = convert_expression("AMOUNT >= 100")
307
+ assert ">=" in result
308
+ assert ">==" not in result
309
+
310
+ result = convert_expression("AMOUNT <= 100")
311
+ assert "<=" in result
312
+ assert "<==" not in result
313
+
314
+ result = convert_expression("SUM(A)/COUNT(*)")
315
+ assert "sum_val" in result or "count_val" in result
316
+
304
317
  result = convert_expression("$$MY_VARIABLE")
305
318
  assert 'get_variable("MY_VARIABLE")' in result
306
319
 
@@ -405,6 +418,13 @@ def test_parse_aggregate_expression():
405
418
  assert func is None
406
419
  assert col is None
407
420
 
421
+ func, col = parse_aggregate_expression("SUM(A)/COUNT(*)")
422
+ assert func is None, f"Compound expression should not match, got func={func}"
423
+ assert col is None
424
+
425
+ func, col = parse_aggregate_expression("AVG(A+B)")
426
+ assert func is None, f"Expression with operators should not match, got func={func}"
427
+
408
428
  print("PASS: test_parse_aggregate_expression")
409
429
 
410
430
 
@@ -459,7 +479,7 @@ def test_generated_joiner_code():
459
479
  lines = []
460
480
  source_dfs = {"SRC_CUST": "df_src_cust", "SRC_ORDER": "df_src_order"}
461
481
  input_sources = {"SRC_CUST", "SRC_ORDER"}
462
- _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs)
482
+ _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs, connector_graph=None)
463
483
  code = "\n".join(lines)
464
484
 
465
485
  assert "merge" in code
@@ -508,6 +528,254 @@ def test_generated_lookup_code():
508
528
  print(f"PASS: test_generated_lookup_code")
509
529
 
510
530
 
531
+ def test_flatfile_metadata_read():
532
+ from informatica_python.models import (
533
+ MappingDef, FolderDef, SourceDef, TargetDef, FlatFileDef,
534
+ FieldDef, TransformationDef, ConnectorDef, InstanceDef, TableAttribute,
535
+ )
536
+ from informatica_python.generators.mapping_gen import generate_mapping_code
537
+
538
+ ff = FlatFileDef(
539
+ name="test_file",
540
+ delimiter="|",
541
+ header_lines=1,
542
+ text_qualifier='"',
543
+ skip_rows=2,
544
+ code_page="UTF-8",
545
+ )
546
+ src = SourceDef(
547
+ name="PIPE_SOURCE",
548
+ database_type="Flat File",
549
+ flatfile=ff,
550
+ fields=[FieldDef(name="COL_A", datatype="string"), FieldDef(name="COL_B", datatype="integer")],
551
+ )
552
+ tgt_ff = FlatFileDef(name="tgt_file", delimiter="~")
553
+ tgt = TargetDef(
554
+ name="TILDE_TARGET",
555
+ database_type="Flat File",
556
+ flatfile=tgt_ff,
557
+ fields=[FieldDef(name="COL_A", datatype="string")],
558
+ )
559
+ mapping = MappingDef(
560
+ name="m_flatfile_test",
561
+ transformations=[],
562
+ connectors=[ConnectorDef(from_instance="PIPE_SOURCE", from_field="COL_A",
563
+ from_instance_type="Source Definition",
564
+ to_instance="TILDE_TARGET", to_field="COL_A",
565
+ to_instance_type="Target Definition")],
566
+ instances=[
567
+ InstanceDef(name="PIPE_SOURCE", type="Source Definition", transformation_name="PIPE_SOURCE"),
568
+ InstanceDef(name="TILDE_TARGET", type="Target Definition", transformation_name="TILDE_TARGET"),
569
+ ],
570
+ )
571
+ folder = FolderDef(name="test", sources=[src], targets=[tgt], mappings=[mapping])
572
+ code = generate_mapping_code(mapping, folder)
573
+
574
+ assert "ff_cfg_" in code, "Should emit flatfile config dict"
575
+ assert "'delimiter': '|'" in code, "Pipe delimiter should appear"
576
+ assert "'skip_rows': 2" in code, "Skip rows should appear"
577
+ assert "'~'" in code, "Tilde delimiter should appear for target"
578
+ print("PASS: test_flatfile_metadata_read")
579
+
580
+
581
+ def test_flatfile_fixed_width():
582
+ from informatica_python.models import (
583
+ MappingDef, FolderDef, SourceDef, FlatFileDef,
584
+ FieldDef, InstanceDef, TargetDef, ConnectorDef,
585
+ )
586
+ from informatica_python.generators.mapping_gen import generate_mapping_code
587
+
588
+ ff = FlatFileDef(name="fw_file", is_fixed_width="YES", header_lines=0)
589
+ src = SourceDef(
590
+ name="FW_SOURCE",
591
+ database_type="Flat File",
592
+ flatfile=ff,
593
+ fields=[FieldDef(name="F1", datatype="string", precision=10),
594
+ FieldDef(name="F2", datatype="string", precision=20)],
595
+ )
596
+ mapping = MappingDef(
597
+ name="m_fw_test",
598
+ transformations=[],
599
+ connectors=[],
600
+ instances=[InstanceDef(name="FW_SOURCE", type="Source Definition", transformation_name="FW_SOURCE")],
601
+ )
602
+ folder = FolderDef(name="test", sources=[src], targets=[], mappings=[mapping])
603
+ code = generate_mapping_code(mapping, folder)
604
+
605
+ assert "read_fwf" in code, "Fixed-width should use pd.read_fwf"
606
+ assert "[10, 20]" in code, "Widths should be derived from field precision"
607
+ print("PASS: test_flatfile_fixed_width")
608
+
609
+
610
+ def test_normalizer_transform():
611
+ from informatica_python.models import (
612
+ MappingDef, FolderDef, SourceDef, FieldDef,
613
+ TransformationDef, ConnectorDef, InstanceDef, TableAttribute, TargetDef,
614
+ )
615
+ from informatica_python.generators.mapping_gen import _gen_normalizer_transform
616
+
617
+ tx = TransformationDef(
618
+ name="NRM_PHONES",
619
+ type="Normalizer",
620
+ fields=[
621
+ FieldDef(name="CUST_ID", datatype="integer", porttype="INPUT/OUTPUT"),
622
+ FieldDef(name="PHONE1", datatype="string", porttype="INPUT"),
623
+ FieldDef(name="PHONE2", datatype="string", porttype="INPUT"),
624
+ FieldDef(name="PHONE3", datatype="string", porttype="INPUT"),
625
+ FieldDef(name="GK", datatype="integer", porttype="OUTPUT"),
626
+ ],
627
+ )
628
+ lines = []
629
+ source_dfs = {}
630
+ _gen_normalizer_transform(lines, tx, "nrm_phones", "df_input", source_dfs)
631
+ code = "\n".join(lines)
632
+
633
+ assert "melt(" in code, "Normalizer should use pd.melt()"
634
+ assert "PHONE1" in code, "Should reference PHONE columns"
635
+ assert "CUST_ID" in code, "Should reference ID column"
636
+ assert "GK" in code, "Should generate GK sequence"
637
+ assert source_dfs["NRM_PHONES"] == "df_nrm_phones"
638
+ print("PASS: test_normalizer_transform")
639
+
640
+
641
+ def test_rank_with_groupby():
642
+ from informatica_python.models import (
643
+ FieldDef, TransformationDef, TableAttribute,
644
+ )
645
+ from informatica_python.generators.mapping_gen import _gen_rank_transform
646
+
647
+ tx = TransformationDef(
648
+ name="RNK_SALES",
649
+ type="Rank",
650
+ fields=[
651
+ FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
652
+ FieldDef(name="AMOUNT", datatype="decimal", porttype="INPUT", expression="AMOUNT"),
653
+ FieldDef(name="RANKINDEX", datatype="integer", porttype="OUTPUT"),
654
+ ],
655
+ attributes=[
656
+ TableAttribute(name="Top/Bottom", value="TOP"),
657
+ TableAttribute(name="Number Of Ranks", value="5"),
658
+ ],
659
+ )
660
+ lines = []
661
+ source_dfs = {}
662
+ _gen_rank_transform(lines, tx, "rnk_sales", "df_input", source_dfs)
663
+ code = "\n".join(lines)
664
+
665
+ assert "groupby" in code, "Should use groupby for group-by rank"
666
+ assert "REGION" in code, "Should group by REGION"
667
+ assert "AMOUNT" in code, "Should rank by AMOUNT"
668
+ assert "RANKINDEX" in code, "Should produce RANKINDEX column"
669
+ assert "<= 5" in code, "Should filter top 5"
670
+ assert source_dfs["RNK_SALES"] == "df_rnk_sales"
671
+ print("PASS: test_rank_with_groupby")
672
+
673
+
674
+ def test_decision_task_if_else():
675
+ from informatica_python.models import (
676
+ FolderDef, WorkflowDef, TaskInstanceDef, WorkflowLink,
677
+ TableAttribute, MappingDef,
678
+ )
679
+ from informatica_python.generators.workflow_gen import generate_workflow_code
680
+
681
+ wf = WorkflowDef(
682
+ name="wf_test_decision",
683
+ task_instances=[
684
+ TaskInstanceDef(name="Start", task_name="Start", task_type="Start Task"),
685
+ TaskInstanceDef(
686
+ name="dec_check_status",
687
+ task_name="dec_check_status",
688
+ task_type="Decision",
689
+ attributes=[TableAttribute(name="Decision Condition", value="$$LOAD_FLAG = TRUE")],
690
+ ),
691
+ TaskInstanceDef(name="s_load_data", task_name="s_load_data", task_type="Session"),
692
+ TaskInstanceDef(name="s_skip_load", task_name="s_skip_load", task_type="Session"),
693
+ ],
694
+ links=[
695
+ WorkflowLink(from_instance="Start", to_instance="dec_check_status"),
696
+ WorkflowLink(from_instance="dec_check_status", to_instance="s_load_data", condition="$dec_check_status.SUCCEEDED"),
697
+ WorkflowLink(from_instance="dec_check_status", to_instance="s_skip_load", condition="$dec_check_status.FAILED"),
698
+ ],
699
+ )
700
+ folder = FolderDef(name="test", workflows=[wf], mappings=[])
701
+ code = generate_workflow_code(folder)
702
+
703
+ assert "decision_dec_check_status" in code, "Should create decision variable"
704
+ assert "if decision_dec_check_status" in code, "Should generate if branch"
705
+ assert "LOAD_FLAG" in code, "Should convert $$LOAD_FLAG"
706
+ assert "True" in code, "Should convert TRUE to Python True"
707
+ print("PASS: test_decision_task_if_else")
708
+
709
+
710
+ def test_inline_mapplet():
711
+ from informatica_python.models import (
712
+ MappingDef, FolderDef, SourceDef, TargetDef, MappletDef,
713
+ TransformationDef, ConnectorDef, InstanceDef, FieldDef,
714
+ TableAttribute,
715
+ )
716
+ from informatica_python.generators.mapping_gen import generate_mapping_code
717
+
718
+ mplt = MappletDef(
719
+ name="mplt_clean_name",
720
+ transformations=[
721
+ TransformationDef(
722
+ name="EXP_UPPER",
723
+ type="Expression",
724
+ fields=[
725
+ FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT",
726
+ expression="UPPER(FULL_NAME)"),
727
+ ],
728
+ ),
729
+ ],
730
+ connectors=[],
731
+ )
732
+
733
+ mapping = MappingDef(
734
+ name="m_with_mapplet",
735
+ transformations=[
736
+ TransformationDef(name="SQ_INPUT", type="Source Qualifier",
737
+ fields=[FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT")]),
738
+ ],
739
+ connectors=[
740
+ ConnectorDef(from_instance="SRC", from_field="FULL_NAME",
741
+ from_instance_type="Source Definition",
742
+ to_instance="SQ_INPUT", to_field="FULL_NAME",
743
+ to_instance_type="Source Qualifier"),
744
+ ConnectorDef(from_instance="SQ_INPUT", from_field="FULL_NAME",
745
+ from_instance_type="Source Qualifier",
746
+ to_instance="MPLT_INST", to_field="FULL_NAME",
747
+ to_instance_type="Mapplet"),
748
+ ConnectorDef(from_instance="MPLT_INST", from_field="FULL_NAME",
749
+ from_instance_type="Mapplet",
750
+ to_instance="TGT", to_field="FULL_NAME",
751
+ to_instance_type="Target Definition"),
752
+ ],
753
+ instances=[
754
+ InstanceDef(name="SRC", type="Source Definition", transformation_name="SRC"),
755
+ InstanceDef(name="SQ_INPUT", type="Source Qualifier"),
756
+ InstanceDef(name="MPLT_INST", type="Mapplet", transformation_name="mplt_clean_name",
757
+ transformation_type="Mapplet"),
758
+ InstanceDef(name="TGT", type="Target Definition", transformation_name="TGT"),
759
+ ],
760
+ )
761
+
762
+ src = SourceDef(name="SRC", fields=[FieldDef(name="FULL_NAME", datatype="string")])
763
+ tgt = TargetDef(name="TGT", fields=[FieldDef(name="FULL_NAME", datatype="string")])
764
+ folder = FolderDef(
765
+ name="test",
766
+ sources=[src],
767
+ targets=[tgt],
768
+ mappings=[mapping],
769
+ mapplets=[mplt],
770
+ )
771
+ code = generate_mapping_code(mapping, folder)
772
+
773
+ assert "MPLT_INST__EXP_UPPER" in code or "mplt_inst__exp_upper" in code, \
774
+ "Inlined mapplet transform should appear with prefix"
775
+ assert "UPPER" in code, "UPPER expression from mapplet should be present"
776
+ print("PASS: test_inline_mapplet")
777
+
778
+
511
779
  if __name__ == "__main__":
512
780
  print("=" * 60)
513
781
  print("Running informatica-python tests")
@@ -531,6 +799,12 @@ if __name__ == "__main__":
531
799
  test_generated_aggregator_code,
532
800
  test_generated_joiner_code,
533
801
  test_generated_lookup_code,
802
+ test_flatfile_metadata_read,
803
+ test_flatfile_fixed_width,
804
+ test_normalizer_transform,
805
+ test_rank_with_groupby,
806
+ test_decision_task_if_else,
807
+ test_inline_mapplet,
534
808
  ]
535
809
 
536
810
  passed = 0