PyPI - informatica-python - Versions diffs - 1.2.0__tar.gz → 1.2.1__tar.gz - Mend

informatica-python 1.2.0tar.gz → 1.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{informatica_python-1.2.0 → informatica_python-1.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.2.0
+Version: 1.2.1
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 License-Expression: MIT
 Requires-Python: >=3.8

{informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/generators/mapping_gen.py RENAMED Viewed

@@ -278,7 +278,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     elif tx_type == "sorter":
         _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
     elif tx_type in ("joiner",):
-        _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
+        _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
     elif tx_type in ("lookup procedure", "lookup"):
         _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
     elif tx_type == "router":
@@ -410,7 +410,7 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
     source_dfs[tx.name] = f"df_{tx_safe}"
-def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
+def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
     join_type = "inner"
     join_condition = ""
     for attr in tx.attributes:
@@ -436,10 +436,31 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
     left_keys, right_keys = parse_join_condition(join_condition)
+    master_src = None
+    detail_src = None
+    input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
+    for conn in input_conns:
+        to_field = conn.to_field
+        if to_field in master_fields:
+            master_src = conn.from_instance
+        elif to_field in detail_fields:
+            detail_src = conn.from_instance
     src_list = list(input_sources)
-    if len(src_list) >= 2:
-        df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
-        df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
+    if not master_src and not detail_src and len(src_list) >= 2:
+        master_src = src_list[0]
+        detail_src = src_list[1]
+    elif not master_src and len(src_list) >= 1:
+        master_src = src_list[0]
+    if not detail_src:
+        for s in src_list:
+            if s != master_src:
+                detail_src = s
+                break
+    if master_src and detail_src:
+        df_master = source_dfs.get(master_src, f"df_{_safe_name(master_src)}")
+        df_detail = source_dfs.get(detail_src, f"df_{_safe_name(detail_src)}")
         lines.append(f"    # Join ({join_type}): {join_condition or 'auto'}")
         if left_keys and right_keys:
@@ -451,9 +472,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
             lines.append(f"        suffixes=('', '_master')")
             lines.append(f"    )")
         else:
-            common_cols = []
-            if master_fields and detail_fields:
-                common_cols = [f for f in detail_fields if f in master_fields]
+            common_cols = [f for f in detail_fields if f in master_fields]
             if common_cols:
                 lines.append(f"    df_{tx_safe} = {df_detail}.merge(")
                 lines.append(f"        {df_master},")
@@ -539,9 +558,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
         drop_cols = [k for k in lookup_keys if k not in input_keys]
         if drop_cols:
-            lines.append(f"    lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
+            lines.append(f"    _lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns]")
+            lines.append(f"    if _lkp_drop:")
+            lines.append(f"        df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
         for rf in all_output_fields:
+            lines.append(f"    if '{rf.name}' not in df_{tx_safe}.columns:")
+            lines.append(f"        df_{tx_safe}['{rf.name}'] = None")
             if rf.default_value:
                 lines.append(f"    df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
     else:

{informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python/utils/expression_converter.py RENAMED Viewed

@@ -95,11 +95,11 @@ INFA_FUNC_MAP = {
 }
-AGG_FUNC_NAMES = {
-    "SUM", "COUNT", "AVG", "MAX", "MIN", "MEDIAN",
-    "STDDEV", "VARIANCE", "PERCENTILE", "FIRST", "LAST",
-    "MOVINGAVG", "MOVINGSUM", "CUME",
-}
+AGG_FUNC_NAMES = [
+    "MOVINGAVG", "MOVINGSUM", "PERCENTILE", "VARIANCE",
+    "STDDEV", "MEDIAN", "COUNT", "FIRST", "LAST",
+    "CUME", "SUM", "AVG", "MAX", "MIN",
+]
 def convert_expression(expr):
@@ -131,6 +131,8 @@ def convert_expression(expr):
     converted = re.sub(r'<>', '!=', converted)
+    converted = re.sub(r'(?<![<>!])=(?!=)', '==', converted)
     converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
     converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
@@ -202,8 +204,11 @@ def parse_aggregate_expression(expr):
     cleaned = expr.strip()
     for func_name in AGG_FUNC_NAMES:
-        pattern = re.compile(r'\b' + func_name + r'\s*\(\s*([^)]*)\s*\)', re.IGNORECASE)
-        match = pattern.search(cleaned)
+        pattern = re.compile(
+            r'^\s*' + func_name + r'\s*\(\s*([A-Za-z_][A-Za-z0-9_]*|\*)\s*\)\s*$',
+            re.IGNORECASE
+        )
+        match = pattern.match(cleaned)
         if match:
             col = match.group(1).strip()
             return func_name.lower(), col

{informatica_python-1.2.0 → informatica_python-1.2.1}/informatica_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.2.0
+Version: 1.2.1
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 License-Expression: MIT
 Requires-Python: >=3.8

{informatica_python-1.2.0 → informatica_python-1.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "informatica-python"
-version = "1.2.0"
+version = "1.2.1"
 description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
 readme = "README.md"
 license = "MIT"

{informatica_python-1.2.0 → informatica_python-1.2.1}/tests/test_converter.py RENAMED Viewed

@@ -239,6 +239,8 @@ def test_expression_converter_expanded():
     result = convert_expression("IIF(STATUS = 'A', 'Active', 'Inactive')")
     assert "iif_expr" in result
+    assert "==" in result, f"Expected == in result, got: {result}"
+    assert "= =" not in result
     result = convert_expression("DECODE(TYPE, 1, 'One', 2, 'Two', 'Other')")
     assert "decode_expr" in result
@@ -301,6 +303,17 @@ def test_expression_converter_expanded():
     result = convert_expression("STATUS <> 'X'")
     assert "!=" in result
+    result = convert_expression("AMOUNT >= 100")
+    assert ">=" in result
+    assert ">==" not in result
+    result = convert_expression("AMOUNT <= 100")
+    assert "<=" in result
+    assert "<==" not in result
+    result = convert_expression("SUM(A)/COUNT(*)")
+    assert "sum_val" in result or "count_val" in result
     result = convert_expression("$$MY_VARIABLE")
     assert 'get_variable("MY_VARIABLE")' in result
@@ -405,6 +418,13 @@ def test_parse_aggregate_expression():
     assert func is None
     assert col is None
+    func, col = parse_aggregate_expression("SUM(A)/COUNT(*)")
+    assert func is None, f"Compound expression should not match, got func={func}"
+    assert col is None
+    func, col = parse_aggregate_expression("AVG(A+B)")
+    assert func is None, f"Expression with operators should not match, got func={func}"
     print("PASS: test_parse_aggregate_expression")
@@ -459,7 +479,7 @@ def test_generated_joiner_code():
     lines = []
     source_dfs = {"SRC_CUST": "df_src_cust", "SRC_ORDER": "df_src_order"}
     input_sources = {"SRC_CUST", "SRC_ORDER"}
-    _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs)
+    _gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs, connector_graph=None)
     code = "\n".join(lines)
     assert "merge" in code