informatica-python 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -278,7 +278,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
278
278
  elif tx_type == "sorter":
279
279
  _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
280
280
  elif tx_type in ("joiner",):
281
- _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
281
+ _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
282
282
  elif tx_type in ("lookup procedure", "lookup"):
283
283
  _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
284
284
  elif tx_type == "router":
@@ -410,7 +410,7 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
410
410
  source_dfs[tx.name] = f"df_{tx_safe}"
411
411
 
412
412
 
413
- def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
413
+ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
414
414
  join_type = "inner"
415
415
  join_condition = ""
416
416
  for attr in tx.attributes:
@@ -436,10 +436,31 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
436
436
 
437
437
  left_keys, right_keys = parse_join_condition(join_condition)
438
438
 
439
+ master_src = None
440
+ detail_src = None
441
+ input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
442
+ for conn in input_conns:
443
+ to_field = conn.to_field
444
+ if to_field in master_fields:
445
+ master_src = conn.from_instance
446
+ elif to_field in detail_fields:
447
+ detail_src = conn.from_instance
448
+
439
449
  src_list = list(input_sources)
440
- if len(src_list) >= 2:
441
- df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
442
- df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
450
+ if not master_src and not detail_src and len(src_list) >= 2:
451
+ master_src = src_list[0]
452
+ detail_src = src_list[1]
453
+ elif not master_src and len(src_list) >= 1:
454
+ master_src = src_list[0]
455
+ if not detail_src:
456
+ for s in src_list:
457
+ if s != master_src:
458
+ detail_src = s
459
+ break
460
+
461
+ if master_src and detail_src:
462
+ df_master = source_dfs.get(master_src, f"df_{_safe_name(master_src)}")
463
+ df_detail = source_dfs.get(detail_src, f"df_{_safe_name(detail_src)}")
443
464
 
444
465
  lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
445
466
  if left_keys and right_keys:
@@ -451,9 +472,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
451
472
  lines.append(f" suffixes=('', '_master')")
452
473
  lines.append(f" )")
453
474
  else:
454
- common_cols = []
455
- if master_fields and detail_fields:
456
- common_cols = [f for f in detail_fields if f in master_fields]
475
+ common_cols = [f for f in detail_fields if f in master_fields]
457
476
  if common_cols:
458
477
  lines.append(f" df_{tx_safe} = {df_detail}.merge(")
459
478
  lines.append(f" {df_master},")
@@ -539,9 +558,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
539
558
 
540
559
  drop_cols = [k for k in lookup_keys if k not in input_keys]
541
560
  if drop_cols:
542
- lines.append(f" lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
561
+ lines.append(f" _lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns]")
562
+ lines.append(f" if _lkp_drop:")
563
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
543
564
 
544
565
  for rf in all_output_fields:
566
+ lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
567
+ lines.append(f" df_{tx_safe}['{rf.name}'] = None")
545
568
  if rf.default_value:
546
569
  lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
547
570
  else:
@@ -95,11 +95,11 @@ INFA_FUNC_MAP = {
95
95
  }
96
96
 
97
97
 
98
- AGG_FUNC_NAMES = {
99
- "SUM", "COUNT", "AVG", "MAX", "MIN", "MEDIAN",
100
- "STDDEV", "VARIANCE", "PERCENTILE", "FIRST", "LAST",
101
- "MOVINGAVG", "MOVINGSUM", "CUME",
102
- }
98
+ AGG_FUNC_NAMES = [
99
+ "MOVINGAVG", "MOVINGSUM", "PERCENTILE", "VARIANCE",
100
+ "STDDEV", "MEDIAN", "COUNT", "FIRST", "LAST",
101
+ "CUME", "SUM", "AVG", "MAX", "MIN",
102
+ ]
103
103
 
104
104
 
105
105
  def convert_expression(expr):
@@ -131,6 +131,8 @@ def convert_expression(expr):
131
131
 
132
132
  converted = re.sub(r'<>', '!=', converted)
133
133
 
134
+ converted = re.sub(r'(?<![<>!])=(?!=)', '==', converted)
135
+
134
136
  converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
135
137
 
136
138
  converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
@@ -202,8 +204,11 @@ def parse_aggregate_expression(expr):
202
204
  cleaned = expr.strip()
203
205
 
204
206
  for func_name in AGG_FUNC_NAMES:
205
- pattern = re.compile(r'\b' + func_name + r'\s*\(\s*([^)]*)\s*\)', re.IGNORECASE)
206
- match = pattern.search(cleaned)
207
+ pattern = re.compile(
208
+ r'^\s*' + func_name + r'\s*\(\s*([A-Za-z_][A-Za-z0-9_]*|\*)\s*\)\s*$',
209
+ re.IGNORECASE
210
+ )
211
+ match = pattern.match(cleaned)
207
212
  if match:
208
213
  col = match.group(1).strip()
209
214
  return func_name.lower(), col
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -7,14 +7,14 @@ informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
7
7
  informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
8
8
  informatica_python/generators/error_log_gen.py,sha256=gCYoyIZYsxGtegq-QHBhyvYngckSwZY71_662aAzlZw,16314
9
9
  informatica_python/generators/helper_gen.py,sha256=MbK5HxB7ENsEdXWsosyoK7WojZQDeEsjconlad9BYKk,53039
10
- informatica_python/generators/mapping_gen.py,sha256=jOJIBSLpqGylWm0Vak-FfaXQ2ruvMpT5hGSNfib6UMQ,32493
10
+ informatica_python/generators/mapping_gen.py,sha256=27mxx9tScKmYaaoq1bEZMQ32inAvTUMgBW3GssqPd0g,33408
11
11
  informatica_python/generators/sql_gen.py,sha256=rwDy-sFpcPZoetUSppK7iF02aFxYIX8PLICnK021o6E,5711
12
12
  informatica_python/generators/workflow_gen.py,sha256=ltpDgQELPsERfqSIz1LQUFw_gs-wKqDTOMwb0IDxJpI,9402
13
13
  informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
15
- informatica_python/utils/expression_converter.py,sha256=9Hovm6qS5oDDsjvNlhj273AK0xlizkfGYXAWoRBKwQ8,7080
16
- informatica_python-1.2.0.dist-info/METADATA,sha256=JazvGhNdiVCi1oJYaP8Ggq4v8dC42tLPR_IOUjMjBn8,3355
17
- informatica_python-1.2.0.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
18
- informatica_python-1.2.0.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
19
- informatica_python-1.2.0.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
20
- informatica_python-1.2.0.dist-info/RECORD,,
15
+ informatica_python/utils/expression_converter.py,sha256=unO4xRl7LQP-eCgG1N9x_Zb1GiIiVXjPgxTUHGdYz7U,7201
16
+ informatica_python-1.2.1.dist-info/METADATA,sha256=D4Q-RACXeqHEAKaUrfySVGNb0tbG1fxT51MxK5nQ2gQ,3355
17
+ informatica_python-1.2.1.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
18
+ informatica_python-1.2.1.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
19
+ informatica_python-1.2.1.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
20
+ informatica_python-1.2.1.dist-info/RECORD,,