informatica-python 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {informatica_python-1.2.0 → informatica_python-1.2.1}/PKG-INFO +1 -1
  2. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/mapping_gen.py +32 -9
  3. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/utils/expression_converter.py +12 -7
  4. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/PKG-INFO +1 -1
  5. {informatica_python-1.2.0 → informatica_python-1.2.1}/pyproject.toml +1 -1
  6. {informatica_python-1.2.0 → informatica_python-1.2.1}/tests/test_converter.py +21 -1
  7. {informatica_python-1.2.0 → informatica_python-1.2.1}/README.md +0 -0
  8. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/__init__.py +0 -0
  9. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/cli.py +0 -0
  10. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/converter.py +0 -0
  11. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/__init__.py +0 -0
  12. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/config_gen.py +0 -0
  13. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/error_log_gen.py +0 -0
  14. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/helper_gen.py +0 -0
  15. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/sql_gen.py +0 -0
  16. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/workflow_gen.py +0 -0
  17. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/models.py +0 -0
  18. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/SOURCES.txt +0 -0
  22. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  23. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/entry_points.txt +0 -0
  24. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/requires.txt +0 -0
  25. {informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/top_level.txt +0 -0
  26. {informatica_python-1.2.0 → informatica_python-1.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -278,7 +278,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
278
278
  elif tx_type == "sorter":
279
279
  _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
280
280
  elif tx_type in ("joiner",):
281
- _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
281
+ _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
282
282
  elif tx_type in ("lookup procedure", "lookup"):
283
283
  _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
284
284
  elif tx_type == "router":
@@ -410,7 +410,7 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
410
410
  source_dfs[tx.name] = f"df_{tx_safe}"
411
411
 
412
412
 
413
- def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
413
+ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
414
414
  join_type = "inner"
415
415
  join_condition = ""
416
416
  for attr in tx.attributes:
@@ -436,10 +436,31 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
436
436
 
437
437
  left_keys, right_keys = parse_join_condition(join_condition)
438
438
 
439
+ master_src = None
440
+ detail_src = None
441
+ input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
442
+ for conn in input_conns:
443
+ to_field = conn.to_field
444
+ if to_field in master_fields:
445
+ master_src = conn.from_instance
446
+ elif to_field in detail_fields:
447
+ detail_src = conn.from_instance
448
+
439
449
  src_list = list(input_sources)
440
- if len(src_list) >= 2:
441
- df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
442
- df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
450
+ if not master_src and not detail_src and len(src_list) >= 2:
451
+ master_src = src_list[0]
452
+ detail_src = src_list[1]
453
+ elif not master_src and len(src_list) >= 1:
454
+ master_src = src_list[0]
455
+ if not detail_src:
456
+ for s in src_list:
457
+ if s != master_src:
458
+ detail_src = s
459
+ break
460
+
461
+ if master_src and detail_src:
462
+ df_master = source_dfs.get(master_src, f"df_{_safe_name(master_src)}")
463
+ df_detail = source_dfs.get(detail_src, f"df_{_safe_name(detail_src)}")
443
464
 
444
465
  lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
445
466
  if left_keys and right_keys:
@@ -451,9 +472,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
451
472
  lines.append(f" suffixes=('', '_master')")
452
473
  lines.append(f" )")
453
474
  else:
454
- common_cols = []
455
- if master_fields and detail_fields:
456
- common_cols = [f for f in detail_fields if f in master_fields]
475
+ common_cols = [f for f in detail_fields if f in master_fields]
457
476
  if common_cols:
458
477
  lines.append(f" df_{tx_safe} = {df_detail}.merge(")
459
478
  lines.append(f" {df_master},")
@@ -539,9 +558,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
539
558
 
540
559
  drop_cols = [k for k in lookup_keys if k not in input_keys]
541
560
  if drop_cols:
542
- lines.append(f" lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
561
+ lines.append(f" _lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns]")
562
+ lines.append(f" if _lkp_drop:")
563
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
543
564
 
544
565
  for rf in all_output_fields:
566
+ lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
567
+ lines.append(f" df_{tx_safe}['{rf.name}'] = None")
545
568
  if rf.default_value:
546
569
  lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
547
570
  else:
@@ -95,11 +95,11 @@ INFA_FUNC_MAP = {
95
95
  }
96
96
 
97
97
 
98
- AGG_FUNC_NAMES = {
99
- "SUM", "COUNT", "AVG", "MAX", "MIN", "MEDIAN",
100
- "STDDEV", "VARIANCE", "PERCENTILE", "FIRST", "LAST",
101
- "MOVINGAVG", "MOVINGSUM", "CUME",
102
- }
98
+ AGG_FUNC_NAMES = [
99
+ "MOVINGAVG", "MOVINGSUM", "PERCENTILE", "VARIANCE",
100
+ "STDDEV", "MEDIAN", "COUNT", "FIRST", "LAST",
101
+ "CUME", "SUM", "AVG", "MAX", "MIN",
102
+ ]
103
103
 
104
104
 
105
105
  def convert_expression(expr):
@@ -131,6 +131,8 @@ def convert_expression(expr):
131
131
 
132
132
  converted = re.sub(r'<>', '!=', converted)
133
133
 
134
+ converted = re.sub(r'(?<![<>!])=(?!=)', '==', converted)
135
+
134
136
  converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
135
137
 
136
138
  converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
@@ -202,8 +204,11 @@ def parse_aggregate_expression(expr):
202
204
  cleaned = expr.strip()
203
205
 
204
206
  for func_name in AGG_FUNC_NAMES:
205
- pattern = re.compile(r'\b' + func_name + r'\s*\(\s*([^)]*)\s*\)', re.IGNORECASE)
206
- match = pattern.search(cleaned)
207
+ pattern = re.compile(
208
+ r'^\s*' + func_name + r'\s*\(\s*([A-Za-z_][A-Za-z0-9_]*|\*)\s*\)\s*$',
209
+ re.IGNORECASE
210
+ )
211
+ match = pattern.match(cleaned)
207
212
  if match:
208
213
  col = match.group(1).strip()
209
214
  return func_name.lower(), col
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.2.0"
7
+ version = "1.2.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -239,6 +239,8 @@ def test_expression_converter_expanded():
239
239
 
240
240
  result = convert_expression("IIF(STATUS = 'A', 'Active', 'Inactive')")
241
241
  assert "iif_expr" in result
242
+ assert "==" in result, f"Expected == in result, got: {result}"
243
+ assert "= =" not in result
242
244
 
243
245
  result = convert_expression("DECODE(TYPE, 1, 'One', 2, 'Two', 'Other')")
244
246
  assert "decode_expr" in result
@@ -301,6 +303,17 @@ def test_expression_converter_expanded():
301
303
  result = convert_expression("STATUS <> 'X'")
302
304
  assert "!=" in result
303
305
 
306
+ result = convert_expression("AMOUNT >= 100")
307
+ assert ">=" in result
308
+ assert ">==" not in result
309
+
310
+ result = convert_expression("AMOUNT <= 100")
311
+ assert "<=" in result
312
+ assert "<==" not in result
313
+
314
+ result = convert_expression("SUM(A)/COUNT(*)")
315
+ assert "sum_val" in result or "count_val" in result
316
+
304
317
  result = convert_expression("$$MY_VARIABLE")
305
318
  assert 'get_variable("MY_VARIABLE")' in result
306
319
 
@@ -405,6 +418,13 @@ def test_parse_aggregate_expression():
405
418
  assert func is None
406
419
  assert col is None
407
420
 
421
+ func, col = parse_aggregate_expression("SUM(A)/COUNT(*)")
422
+ assert func is None, f"Compound expression should not match, got func={func}"
423
+ assert col is None
424
+
425
+ func, col = parse_aggregate_expression("AVG(A+B)")
426
+ assert func is None, f"Expression with operators should not match, got func={func}"
427
+
408
428
  print("PASS: test_parse_aggregate_expression")
409
429
 
410
430
 
@@ -459,7 +479,7 @@ def test_generated_joiner_code():
459
479
  lines = []
460
480
  source_dfs = {"SRC_CUST": "df_src_cust", "SRC_ORDER": "df_src_order"}
461
481
  input_sources = {"SRC_CUST", "SRC_ORDER"}
462
- _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs)
482
+ _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs, connector_graph=None)
463
483
  code = "\n".join(lines)
464
484
 
465
485
  assert "merge" in code