informatica-python 1.2.1__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {informatica_python-1.2.1 → informatica_python-1.3.0}/PKG-INFO +1 -1
  2. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/mapping_gen.py +354 -13
  3. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/workflow_gen.py +59 -3
  4. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/PKG-INFO +1 -1
  5. {informatica_python-1.2.1 → informatica_python-1.3.0}/pyproject.toml +1 -1
  6. {informatica_python-1.2.1 → informatica_python-1.3.0}/tests/test_converter.py +254 -0
  7. {informatica_python-1.2.1 → informatica_python-1.3.0}/README.md +0 -0
  8. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/__init__.py +0 -0
  9. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/cli.py +0 -0
  10. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/converter.py +0 -0
  11. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/__init__.py +0 -0
  12. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/config_gen.py +0 -0
  13. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/error_log_gen.py +0 -0
  14. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/helper_gen.py +0 -0
  15. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/generators/sql_gen.py +0 -0
  16. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/models.py +0 -0
  17. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/parser.py +0 -0
  18. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/utils/__init__.py +0 -0
  19. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/utils/datatype_map.py +0 -0
  20. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python/utils/expression_converter.py +0 -0
  21. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  22. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  23. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/entry_points.txt +0 -0
  24. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/requires.txt +0 -0
  25. {informatica_python-1.2.1 → informatica_python-1.3.0}/informatica_python.egg-info/top_level.txt +0 -0
  26. {informatica_python-1.2.1 → informatica_python-1.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -1,7 +1,7 @@
1
1
  from typing import List, Dict
2
2
  from informatica_python.models import (
3
3
  MappingDef, FolderDef, SourceDef, TargetDef,
4
- TransformationDef, ConnectorDef, InstanceDef,
4
+ TransformationDef, ConnectorDef, InstanceDef, MappletDef,
5
5
  )
6
6
  from informatica_python.utils.expression_converter import (
7
7
  convert_expression, convert_sql_expression,
@@ -11,6 +11,130 @@ from informatica_python.utils.expression_converter import (
11
11
  from informatica_python.utils.datatype_map import get_python_type
12
12
 
13
13
 
14
+ def _inline_mapplets(mapping, folder):
15
+ mapplet_map = {m.name: m for m in folder.mapplets}
16
+ extra_transforms = []
17
+ extra_connectors = []
18
+ mapplet_instances = set()
19
+
20
+ for inst in mapping.instances:
21
+ if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
22
+ mapplet_name = inst.transformation_name or inst.name
23
+ mapplet = mapplet_map.get(mapplet_name)
24
+ if not mapplet:
25
+ continue
26
+ mapplet_instances.add(inst.name)
27
+ prefix = inst.name
28
+
29
+ for tx in mapplet.transformations:
30
+ inlined = TransformationDef(
31
+ name=f"{prefix}__{tx.name}",
32
+ type=tx.type,
33
+ description=tx.description,
34
+ reusable=tx.reusable,
35
+ fields=list(tx.fields),
36
+ attributes=list(tx.attributes),
37
+ groups=list(tx.groups),
38
+ metadata_extensions=list(tx.metadata_extensions),
39
+ )
40
+ extra_transforms.append(inlined)
41
+
42
+ for conn in mapplet.connectors:
43
+ from informatica_python.models import ConnectorDef
44
+ new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
45
+ new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
46
+ inlined_conn = ConnectorDef(
47
+ from_instance=new_from,
48
+ from_field=conn.from_field,
49
+ from_instance_type=conn.from_instance_type,
50
+ to_instance=new_to,
51
+ to_field=conn.to_field,
52
+ to_instance_type=conn.to_instance_type,
53
+ )
54
+ extra_connectors.append(inlined_conn)
55
+
56
+ rewired_connectors = []
57
+ mapplet_internal_names = set()
58
+ for inst_name in mapplet_instances:
59
+ mapplet_name = None
60
+ for inst in mapping.instances:
61
+ if inst.name == inst_name:
62
+ mapplet_name = inst.transformation_name or inst.name
63
+ break
64
+ mapplet = mapplet_map.get(mapplet_name) if mapplet_name else None
65
+ if mapplet:
66
+ for tx in mapplet.transformations:
67
+ mapplet_internal_names.add(f"{inst_name}__{tx.name}")
68
+
69
+ for conn in mapping.connectors:
70
+ if conn.to_instance in mapplet_instances:
71
+ first_tx = None
72
+ for ec in extra_connectors:
73
+ if ec.from_instance == conn.to_instance or ec.to_instance.startswith(f"{conn.to_instance}__"):
74
+ for et in extra_transforms:
75
+ if et.name.startswith(f"{conn.to_instance}__"):
76
+ has_input = any(
77
+ "INPUT" in (f.porttype or "").upper()
78
+ for f in et.fields
79
+ if f.name == conn.to_field
80
+ )
81
+ if has_input:
82
+ first_tx = et.name
83
+ break
84
+ if first_tx:
85
+ break
86
+ if not first_tx:
87
+ for et in extra_transforms:
88
+ if et.name.startswith(f"{conn.to_instance}__"):
89
+ first_tx = et.name
90
+ break
91
+ if first_tx:
92
+ from informatica_python.models import ConnectorDef
93
+ rewired_connectors.append(ConnectorDef(
94
+ from_instance=conn.from_instance,
95
+ from_field=conn.from_field,
96
+ from_instance_type=conn.from_instance_type,
97
+ to_instance=first_tx,
98
+ to_field=conn.to_field,
99
+ to_instance_type=conn.to_instance_type,
100
+ ))
101
+ else:
102
+ rewired_connectors.append(conn)
103
+ elif conn.from_instance in mapplet_instances:
104
+ last_tx = None
105
+ for et in reversed(extra_transforms):
106
+ if et.name.startswith(f"{conn.from_instance}__"):
107
+ has_output = any(
108
+ "OUTPUT" in (f.porttype or "").upper()
109
+ for f in et.fields
110
+ if f.name == conn.from_field
111
+ )
112
+ if has_output:
113
+ last_tx = et.name
114
+ break
115
+ if not last_tx:
116
+ for et in reversed(extra_transforms):
117
+ if et.name.startswith(f"{conn.from_instance}__"):
118
+ last_tx = et.name
119
+ break
120
+ if last_tx:
121
+ from informatica_python.models import ConnectorDef
122
+ rewired_connectors.append(ConnectorDef(
123
+ from_instance=last_tx,
124
+ from_field=conn.from_field,
125
+ from_instance_type=conn.from_instance_type,
126
+ to_instance=conn.to_instance,
127
+ to_field=conn.to_field,
128
+ to_instance_type=conn.to_instance_type,
129
+ ))
130
+ else:
131
+ rewired_connectors.append(conn)
132
+ else:
133
+ rewired_connectors.append(conn)
134
+
135
+ return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
136
+
137
+
14
138
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
15
139
  data_lib: str = "pandas", mapping_index: int = 1) -> str:
16
140
  lines = []
@@ -24,10 +148,17 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
24
148
  lines.append("")
25
149
  lines.append("")
26
150
 
151
+ inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
152
+
153
+ all_transforms = list(mapping.transformations) + inlined_transforms
154
+ all_connectors = [c for c in mapping.connectors
155
+ if c.from_instance not in mapplet_instance_names
156
+ and c.to_instance not in mapplet_instance_names] + inlined_connectors
157
+
27
158
  source_map = _build_source_map(mapping, folder)
28
159
  target_map = _build_target_map(mapping, folder)
29
- transform_map = {t.name: t for t in mapping.transformations}
30
- connector_graph = _build_connector_graph(mapping.connectors)
160
+ transform_map = {t.name: t for t in all_transforms}
161
+ connector_graph = _build_connector_graph(all_connectors)
31
162
  instance_map = {i.name: i for i in mapping.instances}
32
163
 
33
164
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
@@ -50,7 +181,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
50
181
  safe = _safe_name(src_name)
51
182
  source_dfs[src_name] = f"df_{safe}"
52
183
 
53
- sq_transforms = [t for t in mapping.transformations
184
+ sq_transforms = [t for t in all_transforms
54
185
  if t.type in ("Source Qualifier", "Application Source Qualifier")]
55
186
  if sq_transforms:
56
187
  for sq in sq_transforms:
@@ -63,12 +194,14 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
63
194
  conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
64
195
  schema = src_def.owner_name or "dbo"
65
196
  lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
197
+ elif src_def.flatfile:
198
+ _emit_flatfile_read(lines, safe, src_def)
66
199
  else:
67
200
  lines.append(f" df_{safe} = read_file(config.get('sources', {{}}).get('{src_name}', {{}}).get('file_path', '{src_name}'),")
68
201
  lines.append(f" config.get('sources', {{}}).get('{src_name}', {{}}))")
69
202
  lines.append("")
70
203
 
71
- processing_order = _get_processing_order(mapping.transformations, connector_graph, sq_transforms)
204
+ processing_order = _get_processing_order(all_transforms, connector_graph, sq_transforms)
72
205
 
73
206
  for tx in processing_order:
74
207
  if tx.type in ("Source Qualifier", "Application Source Qualifier"):
@@ -99,6 +232,107 @@ def _safe_name(name):
99
232
  return safe.lower()
100
233
 
101
234
 
235
+ def _flatfile_config_dict(ff):
236
+ cfg = {}
237
+ if not ff:
238
+ return cfg
239
+ if ff.delimiter and ff.delimiter != ",":
240
+ d = ff.delimiter
241
+ DELIMITER_MAP = {
242
+ "COMMA": ",", "TAB": "\\t", "PIPE": "|", "SEMICOLON": ";",
243
+ "SPACE": " ", "TILDE": "~", "CARET": "^",
244
+ }
245
+ d = DELIMITER_MAP.get(d.upper(), d)
246
+ cfg["delimiter"] = d
247
+ if ff.is_fixed_width == "YES":
248
+ cfg["fixed_width"] = True
249
+ if ff.header_lines:
250
+ cfg["header_lines"] = ff.header_lines
251
+ if ff.skip_rows:
252
+ cfg["skip_rows"] = ff.skip_rows
253
+ if ff.text_qualifier:
254
+ cfg["quotechar"] = ff.text_qualifier
255
+ if ff.escape_character:
256
+ cfg["escapechar"] = ff.escape_character
257
+ if ff.strip_trailing_blanks == "YES":
258
+ cfg["strip_trailing_blanks"] = True
259
+ if ff.code_page:
260
+ cfg["encoding"] = ff.code_page
261
+ if ff.row_delimiter:
262
+ cfg["lineterminator"] = ff.row_delimiter
263
+ return cfg
264
+
265
+
266
+ def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
267
+ ff = src_def.flatfile
268
+ fc = _flatfile_config_dict(ff)
269
+ if fc.get("fixed_width"):
270
+ widths = []
271
+ for fld in src_def.fields:
272
+ widths.append(fld.precision if fld.precision else 10)
273
+ lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
274
+ lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
275
+ lines.append(f"{indent} widths={widths},")
276
+ hdr = fc.get("header_lines", 0)
277
+ if hdr:
278
+ lines.append(f"{indent} header={hdr - 1},")
279
+ else:
280
+ lines.append(f"{indent} header=None,")
281
+ skip = fc.get("skip_rows", 0)
282
+ if skip:
283
+ lines.append(f"{indent} skiprows={skip},")
284
+ lines.append(f"{indent})")
285
+ return
286
+
287
+ file_cfg = {}
288
+ if "delimiter" in fc:
289
+ file_cfg["delimiter"] = fc["delimiter"]
290
+ if "quotechar" in fc:
291
+ file_cfg["quotechar"] = fc["quotechar"]
292
+ if "escapechar" in fc:
293
+ file_cfg["escapechar"] = fc["escapechar"]
294
+ if "encoding" in fc:
295
+ file_cfg["encoding"] = fc["encoding"]
296
+ if "lineterminator" in fc:
297
+ file_cfg["lineterminator"] = fc["lineterminator"]
298
+ hdr = fc.get("header_lines", 0)
299
+ if hdr:
300
+ file_cfg["header"] = True
301
+ file_cfg["header_row"] = hdr - 1
302
+ if fc.get("skip_rows"):
303
+ file_cfg["skip_rows"] = fc["skip_rows"]
304
+ if fc.get("strip_trailing_blanks"):
305
+ file_cfg["strip_trailing_blanks"] = True
306
+
307
+ if file_cfg:
308
+ lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
309
+ lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('sources', {{}}).get('{src_def.name}', {{}}))")
310
+ lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
311
+ else:
312
+ lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
313
+ lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
314
+
315
+
316
+ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
317
+ ff = tgt_def.flatfile
318
+ fc = _flatfile_config_dict(ff)
319
+ file_cfg = {}
320
+ if "delimiter" in fc:
321
+ file_cfg["delimiter"] = fc["delimiter"]
322
+ if "quotechar" in fc:
323
+ file_cfg["quotechar"] = fc["quotechar"]
324
+ if "encoding" in fc:
325
+ file_cfg["encoding"] = fc["encoding"]
326
+
327
+ if file_cfg:
328
+ lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
329
+ lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
330
+ lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
331
+ else:
332
+ lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
333
+ lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
334
+
335
+
102
336
  def _build_source_map(mapping, folder):
103
337
  source_map = {}
104
338
  for inst in mapping.instances:
@@ -221,6 +455,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
221
455
  schema = src_def.owner_name or "dbo"
222
456
  cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
223
457
  lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
458
+ elif src_def.flatfile:
459
+ _emit_flatfile_read(lines, sq_safe, src_def)
224
460
  else:
225
461
  lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
226
462
  lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
@@ -232,6 +468,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
232
468
  conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
233
469
  schema = src_def.owner_name or "dbo"
234
470
  lines.append(f" df_{safe_src} = read_from_db(config, 'SELECT * FROM {schema}.{src_def.name}', '{conn_name}')")
471
+ elif src_def.flatfile:
472
+ _emit_flatfile_read(lines, safe_src, src_def)
235
473
  else:
236
474
  lines.append(f" df_{safe_src} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
237
475
  lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
@@ -652,26 +890,127 @@ def _gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs):
652
890
 
653
891
 
654
892
  def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
655
- lines.append(f" # TODO: Normalizer transformation - implement based on specific normalization logic")
656
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
893
+ input_ports = []
894
+ output_ports = []
895
+ occurs_cols = []
896
+ id_cols = []
897
+
898
+ for fld in tx.fields:
899
+ pt = (fld.porttype or "").upper()
900
+ if "INPUT" in pt:
901
+ input_ports.append(fld)
902
+ if "OUTPUT" in pt:
903
+ output_ports.append(fld)
904
+
905
+ for fld in tx.fields:
906
+ if fld.field_number > 0:
907
+ occurs_cols.append(fld.name)
908
+
909
+ if not occurs_cols:
910
+ import re
911
+ base_groups = {}
912
+ for fld in input_ports:
913
+ m = re.match(r'^(.+?)(\d+)$', fld.name)
914
+ if m:
915
+ base = m.group(1)
916
+ idx = int(m.group(2))
917
+ if base not in base_groups:
918
+ base_groups[base] = []
919
+ base_groups[base].append(fld.name)
920
+ else:
921
+ id_cols.append(fld.name)
922
+
923
+ if base_groups:
924
+ longest_group = max(base_groups.values(), key=len)
925
+ occurs_cols = longest_group
926
+ id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
927
+ else:
928
+ for fld in input_ports:
929
+ pt = (fld.porttype or "").upper()
930
+ if "INPUT" in pt and "OUTPUT" in pt:
931
+ id_cols.append(fld.name)
932
+ elif "INPUT" in pt and "OUTPUT" not in pt:
933
+ occurs_cols.append(fld.name)
934
+
935
+ if not id_cols:
936
+ id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
937
+
938
+ gk_field = None
939
+ for fld in output_ports:
940
+ if "GK" in fld.name.upper() or "GENERATED" in fld.name.upper() or "KEY" in fld.name.upper():
941
+ gk_field = fld.name
942
+ break
943
+
944
+ lines.append(f" # Normalizer: unpivot repeated columns into rows")
945
+ if occurs_cols and id_cols:
946
+ lines.append(f" df_{tx_safe} = {input_df}.melt(")
947
+ lines.append(f" id_vars={id_cols},")
948
+ lines.append(f" value_vars={occurs_cols},")
949
+ lines.append(f" var_name='_norm_variable',")
950
+ lines.append(f" value_name='_norm_value'")
951
+ lines.append(f" )")
952
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.dropna(subset=['_norm_value']).reset_index(drop=True)")
953
+ elif occurs_cols:
954
+ lines.append(f" df_{tx_safe} = {input_df}[{occurs_cols}].stack().reset_index(drop=True).to_frame('_norm_value')")
955
+ else:
956
+ lines.append(f" df_{tx_safe} = {input_df}.copy()")
957
+
958
+ if gk_field:
959
+ lines.append(f" df_{tx_safe}['{gk_field}'] = range(1, len(df_{tx_safe}) + 1)")
960
+
657
961
  source_dfs[tx.name] = f"df_{tx_safe}"
658
962
 
659
963
 
660
964
  def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
661
965
  rank_port = None
662
- group_by = []
966
+ group_by_ports = []
967
+ top_bottom = "TOP"
968
+ top_n = 0
969
+
663
970
  for fld in tx.fields:
664
- if "RANK" in fld.name.upper():
971
+ pt = (fld.porttype or "").upper()
972
+ if "INPUT" in pt and "OUTPUT" in pt:
973
+ group_by_ports.append(fld.name)
974
+
975
+ for fld in tx.fields:
976
+ if fld.expression and fld.expression.strip() and fld.name.upper() not in ("RANKINDEX",):
665
977
  rank_port = fld.name
666
- top_bottom = "TOP"
978
+ break
979
+ if not rank_port:
980
+ for fld in tx.fields:
981
+ if fld.name.upper() == "RANKINDEX":
982
+ continue
983
+ pt = (fld.porttype or "").upper()
984
+ if "INPUT" in pt and "OUTPUT" not in pt:
985
+ rank_port = fld.name
986
+ break
987
+
667
988
  for attr in tx.attributes:
668
989
  if attr.name == "Top/Bottom":
669
990
  top_bottom = attr.value
991
+ elif attr.name == "Number Of Ranks":
992
+ try:
993
+ top_n = int(attr.value)
994
+ except (ValueError, TypeError):
995
+ top_n = 0
996
+
997
+ ascending = top_bottom.upper() != "TOP"
670
998
 
671
999
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
672
- if rank_port:
673
- ascending = top_bottom.upper() != "TOP"
674
- lines.append(f" df_{tx_safe}['RANK_INDEX'] = df_{tx_safe}['{rank_port}'].rank(ascending={ascending})")
1000
+ if rank_port and group_by_ports:
1001
+ lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1002
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1003
+ lines.append(f" method='min', ascending={ascending}")
1004
+ lines.append(f" ).astype(int)")
1005
+ if top_n:
1006
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1007
+ elif rank_port:
1008
+ lines.append(f" # Rank by '{rank_port}' (no group-by)")
1009
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending}).astype(int)")
1010
+ if top_n:
1011
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1012
+ else:
1013
+ lines.append(f" df_{tx_safe}['RANKINDEX'] = range(1, len(df_{tx_safe}) + 1)")
675
1014
  source_dfs[tx.name] = f"df_{tx_safe}"
676
1015
 
677
1016
 
@@ -782,6 +1121,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
782
1121
 
783
1122
  if tgt_def.database_type and tgt_def.database_type != "Flat File":
784
1123
  lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
1124
+ elif tgt_def.flatfile:
1125
+ _emit_flatfile_write(lines, tgt_safe, tgt_def)
785
1126
  else:
786
1127
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
787
1128
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
@@ -1,4 +1,5 @@
1
1
  from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
2
+ from informatica_python.utils.expression_converter import convert_expression
2
3
 
3
4
 
4
5
  def generate_workflow_code(folder: FolderDef) -> str:
@@ -127,14 +128,51 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
127
128
  lines.append("")
128
129
 
129
130
  elif task.task_type == "Decision":
130
- lines.append(f" # Decision Task: {task.name}")
131
131
  decision_cond = ""
132
+ decision_name = ""
132
133
  for attr in task.attributes:
133
134
  if attr.name == "Decision Condition":
134
135
  decision_cond = attr.value
136
+ elif attr.name == "Decision Name":
137
+ decision_name = attr.value
138
+
139
+ lines.append(f" # Decision Task: {task.name}")
135
140
  if decision_cond:
136
- lines.append(f" # Condition: {decision_cond}")
137
- lines.append(f" logger.info('Decision task: {task.name}')")
141
+ py_cond = _convert_decision_condition(decision_cond)
142
+ lines.append(f" # Original condition: {decision_cond}")
143
+ lines.append(f" decision_{task_safe} = {py_cond}")
144
+ lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
145
+
146
+ succ_targets = []
147
+ fail_targets = []
148
+ for link in wf.links:
149
+ if link.from_instance == task.name:
150
+ cond_text = (link.condition or "").strip()
151
+ if cond_text and ("$" in cond_text or "SUCCEEDED" in cond_text.upper()
152
+ or "TRUE" in cond_text.upper()):
153
+ succ_targets.append(link.to_instance)
154
+ elif cond_text and ("FAILED" in cond_text.upper()
155
+ or "FALSE" in cond_text.upper()):
156
+ fail_targets.append(link.to_instance)
157
+ else:
158
+ succ_targets.append(link.to_instance)
159
+
160
+ if succ_targets or fail_targets:
161
+ lines.append(f" if decision_{task_safe}:")
162
+ if succ_targets:
163
+ for t in succ_targets:
164
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
165
+ else:
166
+ lines.append(f" pass")
167
+ if fail_targets:
168
+ lines.append(f" else:")
169
+ for t in fail_targets:
170
+ lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
171
+ else:
172
+ lines.append(f" if not decision_{task_safe}:")
173
+ lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
174
+ else:
175
+ lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
138
176
  lines.append("")
139
177
 
140
178
  elif task.task_type == "Timer":
@@ -226,6 +264,24 @@ def _get_task_execution_order(wf: WorkflowDef):
226
264
  return ordered
227
265
 
228
266
 
267
+ def _convert_decision_condition(condition):
268
+ import re
269
+ cond = condition.strip()
270
+ cond = re.sub(r'\$\$(\w+)', r'\1', cond)
271
+ cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
272
+ cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
273
+ cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
274
+ cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
275
+ cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
276
+ cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
277
+ cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
278
+ cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
279
+ cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
280
+ cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
281
+ cond = cond.replace('<>', '!=')
282
+ return cond
283
+
284
+
229
285
  def _safe_name(name):
230
286
  import re
231
287
  safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.2.1"
7
+ version = "1.3.0"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -528,6 +528,254 @@ def test_generated_lookup_code():
528
528
  print(f"PASS: test_generated_lookup_code")
529
529
 
530
530
 
531
+ def test_flatfile_metadata_read():
532
+ from informatica_python.models import (
533
+ MappingDef, FolderDef, SourceDef, TargetDef, FlatFileDef,
534
+ FieldDef, TransformationDef, ConnectorDef, InstanceDef, TableAttribute,
535
+ )
536
+ from informatica_python.generators.mapping_gen import generate_mapping_code
537
+
538
+ ff = FlatFileDef(
539
+ name="test_file",
540
+ delimiter="|",
541
+ header_lines=1,
542
+ text_qualifier='"',
543
+ skip_rows=2,
544
+ code_page="UTF-8",
545
+ )
546
+ src = SourceDef(
547
+ name="PIPE_SOURCE",
548
+ database_type="Flat File",
549
+ flatfile=ff,
550
+ fields=[FieldDef(name="COL_A", datatype="string"), FieldDef(name="COL_B", datatype="integer")],
551
+ )
552
+ tgt_ff = FlatFileDef(name="tgt_file", delimiter="~")
553
+ tgt = TargetDef(
554
+ name="TILDE_TARGET",
555
+ database_type="Flat File",
556
+ flatfile=tgt_ff,
557
+ fields=[FieldDef(name="COL_A", datatype="string")],
558
+ )
559
+ mapping = MappingDef(
560
+ name="m_flatfile_test",
561
+ transformations=[],
562
+ connectors=[ConnectorDef(from_instance="PIPE_SOURCE", from_field="COL_A",
563
+ from_instance_type="Source Definition",
564
+ to_instance="TILDE_TARGET", to_field="COL_A",
565
+ to_instance_type="Target Definition")],
566
+ instances=[
567
+ InstanceDef(name="PIPE_SOURCE", type="Source Definition", transformation_name="PIPE_SOURCE"),
568
+ InstanceDef(name="TILDE_TARGET", type="Target Definition", transformation_name="TILDE_TARGET"),
569
+ ],
570
+ )
571
+ folder = FolderDef(name="test", sources=[src], targets=[tgt], mappings=[mapping])
572
+ code = generate_mapping_code(mapping, folder)
573
+
574
+ assert "ff_cfg_" in code, "Should emit flatfile config dict"
575
+ assert "'delimiter': '|'" in code, "Pipe delimiter should appear"
576
+ assert "'skip_rows': 2" in code, "Skip rows should appear"
577
+ assert "'~'" in code, "Tilde delimiter should appear for target"
578
+ print("PASS: test_flatfile_metadata_read")
579
+
580
+
581
+ def test_flatfile_fixed_width():
582
+ from informatica_python.models import (
583
+ MappingDef, FolderDef, SourceDef, FlatFileDef,
584
+ FieldDef, InstanceDef, TargetDef, ConnectorDef,
585
+ )
586
+ from informatica_python.generators.mapping_gen import generate_mapping_code
587
+
588
+ ff = FlatFileDef(name="fw_file", is_fixed_width="YES", header_lines=0)
589
+ src = SourceDef(
590
+ name="FW_SOURCE",
591
+ database_type="Flat File",
592
+ flatfile=ff,
593
+ fields=[FieldDef(name="F1", datatype="string", precision=10),
594
+ FieldDef(name="F2", datatype="string", precision=20)],
595
+ )
596
+ mapping = MappingDef(
597
+ name="m_fw_test",
598
+ transformations=[],
599
+ connectors=[],
600
+ instances=[InstanceDef(name="FW_SOURCE", type="Source Definition", transformation_name="FW_SOURCE")],
601
+ )
602
+ folder = FolderDef(name="test", sources=[src], targets=[], mappings=[mapping])
603
+ code = generate_mapping_code(mapping, folder)
604
+
605
+ assert "read_fwf" in code, "Fixed-width should use pd.read_fwf"
606
+ assert "[10, 20]" in code, "Widths should be derived from field precision"
607
+ print("PASS: test_flatfile_fixed_width")
608
+
609
+
610
+ def test_normalizer_transform():
611
+ from informatica_python.models import (
612
+ MappingDef, FolderDef, SourceDef, FieldDef,
613
+ TransformationDef, ConnectorDef, InstanceDef, TableAttribute, TargetDef,
614
+ )
615
+ from informatica_python.generators.mapping_gen import _gen_normalizer_transform
616
+
617
+ tx = TransformationDef(
618
+ name="NRM_PHONES",
619
+ type="Normalizer",
620
+ fields=[
621
+ FieldDef(name="CUST_ID", datatype="integer", porttype="INPUT/OUTPUT"),
622
+ FieldDef(name="PHONE1", datatype="string", porttype="INPUT"),
623
+ FieldDef(name="PHONE2", datatype="string", porttype="INPUT"),
624
+ FieldDef(name="PHONE3", datatype="string", porttype="INPUT"),
625
+ FieldDef(name="GK", datatype="integer", porttype="OUTPUT"),
626
+ ],
627
+ )
628
+ lines = []
629
+ source_dfs = {}
630
+ _gen_normalizer_transform(lines, tx, "nrm_phones", "df_input", source_dfs)
631
+ code = "\n".join(lines)
632
+
633
+ assert "melt(" in code, "Normalizer should use pd.melt()"
634
+ assert "PHONE1" in code, "Should reference PHONE columns"
635
+ assert "CUST_ID" in code, "Should reference ID column"
636
+ assert "GK" in code, "Should generate GK sequence"
637
+ assert source_dfs["NRM_PHONES"] == "df_nrm_phones"
638
+ print("PASS: test_normalizer_transform")
639
+
640
+
641
+ def test_rank_with_groupby():
642
+ from informatica_python.models import (
643
+ FieldDef, TransformationDef, TableAttribute,
644
+ )
645
+ from informatica_python.generators.mapping_gen import _gen_rank_transform
646
+
647
+ tx = TransformationDef(
648
+ name="RNK_SALES",
649
+ type="Rank",
650
+ fields=[
651
+ FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
652
+ FieldDef(name="AMOUNT", datatype="decimal", porttype="INPUT", expression="AMOUNT"),
653
+ FieldDef(name="RANKINDEX", datatype="integer", porttype="OUTPUT"),
654
+ ],
655
+ attributes=[
656
+ TableAttribute(name="Top/Bottom", value="TOP"),
657
+ TableAttribute(name="Number Of Ranks", value="5"),
658
+ ],
659
+ )
660
+ lines = []
661
+ source_dfs = {}
662
+ _gen_rank_transform(lines, tx, "rnk_sales", "df_input", source_dfs)
663
+ code = "\n".join(lines)
664
+
665
+ assert "groupby" in code, "Should use groupby for group-by rank"
666
+ assert "REGION" in code, "Should group by REGION"
667
+ assert "AMOUNT" in code, "Should rank by AMOUNT"
668
+ assert "RANKINDEX" in code, "Should produce RANKINDEX column"
669
+ assert "<= 5" in code, "Should filter top 5"
670
+ assert source_dfs["RNK_SALES"] == "df_rnk_sales"
671
+ print("PASS: test_rank_with_groupby")
672
+
673
+
674
+ def test_decision_task_if_else():
675
+ from informatica_python.models import (
676
+ FolderDef, WorkflowDef, TaskInstanceDef, WorkflowLink,
677
+ TableAttribute, MappingDef,
678
+ )
679
+ from informatica_python.generators.workflow_gen import generate_workflow_code
680
+
681
+ wf = WorkflowDef(
682
+ name="wf_test_decision",
683
+ task_instances=[
684
+ TaskInstanceDef(name="Start", task_name="Start", task_type="Start Task"),
685
+ TaskInstanceDef(
686
+ name="dec_check_status",
687
+ task_name="dec_check_status",
688
+ task_type="Decision",
689
+ attributes=[TableAttribute(name="Decision Condition", value="$$LOAD_FLAG = TRUE")],
690
+ ),
691
+ TaskInstanceDef(name="s_load_data", task_name="s_load_data", task_type="Session"),
692
+ TaskInstanceDef(name="s_skip_load", task_name="s_skip_load", task_type="Session"),
693
+ ],
694
+ links=[
695
+ WorkflowLink(from_instance="Start", to_instance="dec_check_status"),
696
+ WorkflowLink(from_instance="dec_check_status", to_instance="s_load_data", condition="$dec_check_status.SUCCEEDED"),
697
+ WorkflowLink(from_instance="dec_check_status", to_instance="s_skip_load", condition="$dec_check_status.FAILED"),
698
+ ],
699
+ )
700
+ folder = FolderDef(name="test", workflows=[wf], mappings=[])
701
+ code = generate_workflow_code(folder)
702
+
703
+ assert "decision_dec_check_status" in code, "Should create decision variable"
704
+ assert "if decision_dec_check_status" in code, "Should generate if branch"
705
+ assert "LOAD_FLAG" in code, "Should convert $$LOAD_FLAG"
706
+ assert "True" in code, "Should convert TRUE to Python True"
707
+ print("PASS: test_decision_task_if_else")
708
+
709
+
710
+ def test_inline_mapplet():
711
+ from informatica_python.models import (
712
+ MappingDef, FolderDef, SourceDef, TargetDef, MappletDef,
713
+ TransformationDef, ConnectorDef, InstanceDef, FieldDef,
714
+ TableAttribute,
715
+ )
716
+ from informatica_python.generators.mapping_gen import generate_mapping_code
717
+
718
+ mplt = MappletDef(
719
+ name="mplt_clean_name",
720
+ transformations=[
721
+ TransformationDef(
722
+ name="EXP_UPPER",
723
+ type="Expression",
724
+ fields=[
725
+ FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT",
726
+ expression="UPPER(FULL_NAME)"),
727
+ ],
728
+ ),
729
+ ],
730
+ connectors=[],
731
+ )
732
+
733
+ mapping = MappingDef(
734
+ name="m_with_mapplet",
735
+ transformations=[
736
+ TransformationDef(name="SQ_INPUT", type="Source Qualifier",
737
+ fields=[FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT")]),
738
+ ],
739
+ connectors=[
740
+ ConnectorDef(from_instance="SRC", from_field="FULL_NAME",
741
+ from_instance_type="Source Definition",
742
+ to_instance="SQ_INPUT", to_field="FULL_NAME",
743
+ to_instance_type="Source Qualifier"),
744
+ ConnectorDef(from_instance="SQ_INPUT", from_field="FULL_NAME",
745
+ from_instance_type="Source Qualifier",
746
+ to_instance="MPLT_INST", to_field="FULL_NAME",
747
+ to_instance_type="Mapplet"),
748
+ ConnectorDef(from_instance="MPLT_INST", from_field="FULL_NAME",
749
+ from_instance_type="Mapplet",
750
+ to_instance="TGT", to_field="FULL_NAME",
751
+ to_instance_type="Target Definition"),
752
+ ],
753
+ instances=[
754
+ InstanceDef(name="SRC", type="Source Definition", transformation_name="SRC"),
755
+ InstanceDef(name="SQ_INPUT", type="Source Qualifier"),
756
+ InstanceDef(name="MPLT_INST", type="Mapplet", transformation_name="mplt_clean_name",
757
+ transformation_type="Mapplet"),
758
+ InstanceDef(name="TGT", type="Target Definition", transformation_name="TGT"),
759
+ ],
760
+ )
761
+
762
+ src = SourceDef(name="SRC", fields=[FieldDef(name="FULL_NAME", datatype="string")])
763
+ tgt = TargetDef(name="TGT", fields=[FieldDef(name="FULL_NAME", datatype="string")])
764
+ folder = FolderDef(
765
+ name="test",
766
+ sources=[src],
767
+ targets=[tgt],
768
+ mappings=[mapping],
769
+ mapplets=[mplt],
770
+ )
771
+ code = generate_mapping_code(mapping, folder)
772
+
773
+ assert "MPLT_INST__EXP_UPPER" in code or "mplt_inst__exp_upper" in code, \
774
+ "Inlined mapplet transform should appear with prefix"
775
+ assert "UPPER" in code, "UPPER expression from mapplet should be present"
776
+ print("PASS: test_inline_mapplet")
777
+
778
+
531
779
  if __name__ == "__main__":
532
780
  print("=" * 60)
533
781
  print("Running informatica-python tests")
@@ -551,6 +799,12 @@ if __name__ == "__main__":
551
799
  test_generated_aggregator_code,
552
800
  test_generated_joiner_code,
553
801
  test_generated_lookup_code,
802
+ test_flatfile_metadata_read,
803
+ test_flatfile_fixed_width,
804
+ test_normalizer_transform,
805
+ test_rank_with_groupby,
806
+ test_decision_task_if_else,
807
+ test_inline_mapplet,
554
808
  ]
555
809
 
556
810
  passed = 0