PyPI - informatica-python - Versions diffs - 1.1.1__tar.gz → 1.2.0__tar.gz - Mend

informatica-python 1.1.1tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{informatica_python-1.1.1 → informatica_python-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.1.1
+Version: 1.2.0
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 License-Expression: MIT
 Requires-Python: >=3.8

{informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/helper_gen.py RENAMED Viewed

@@ -631,6 +631,402 @@ def _add_expression_helpers(lines):
     lines.append("    return date_val")
     lines.append("")
     lines.append("")
+    lines.append("def initcap(value):")
+    lines.append('    """Informatica INITCAP equivalent."""')
+    lines.append("    return str(value).title() if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def reverse_str(value):")
+    lines.append('    """Informatica REVERSE equivalent."""')
+    lines.append("    return str(value)[::-1] if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def chr_func(code):")
+    lines.append('    """Informatica CHR equivalent."""')
+    lines.append("    return chr(int(code)) if code is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def ascii_func(value):")
+    lines.append('    """Informatica ASCII equivalent."""')
+    lines.append("    if value is None or str(value) == '':")
+    lines.append("        return None")
+    lines.append("    return ord(str(value)[0])")
+    lines.append("")
+    lines.append("")
+    lines.append("def left_str(value, n):")
+    lines.append('    """Return leftmost n characters."""')
+    lines.append("    return str(value)[:int(n)] if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def right_str(value, n):")
+    lines.append('    """Return rightmost n characters."""')
+    lines.append("    return str(value)[-int(n):] if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def trim_func(value):")
+    lines.append('    """Informatica TRIM equivalent."""')
+    lines.append("    return str(value).strip() if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def indexof(value, search, start=1):")
+    lines.append('    """Informatica INDEXOF equivalent (1-based)."""')
+    lines.append("    if value is None or search is None:")
+    lines.append("        return 0")
+    lines.append("    idx = str(value).find(str(search), max(start - 1, 0))")
+    lines.append("    return idx + 1 if idx >= 0 else 0")
+    lines.append("")
+    lines.append("")
+    lines.append("def metaphone_func(value):")
+    lines.append('    """Informatica METAPHONE equivalent (simplified)."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    try:")
+    lines.append("        import jellyfish")
+    lines.append("        return jellyfish.metaphone(str(value))")
+    lines.append("    except ImportError:")
+    lines.append("        return str(value).upper()[:4]")
+    lines.append("")
+    lines.append("")
+    lines.append("def soundex_func(value):")
+    lines.append('    """Informatica SOUNDEX equivalent (simplified)."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    try:")
+    lines.append("        import jellyfish")
+    lines.append("        return jellyfish.soundex(str(value))")
+    lines.append("    except ImportError:")
+    lines.append("        s = str(value).upper()")
+    lines.append("        if not s:")
+    lines.append("            return '0000'")
+    lines.append("        codes = {'B':'1','F':'1','P':'1','V':'1','C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2','D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}")
+    lines.append("        result = s[0]")
+    lines.append("        for ch in s[1:]:")
+    lines.append("            code = codes.get(ch, '0')")
+    lines.append("            if code != '0' and code != result[-1]:")
+    lines.append("                result += code")
+    lines.append("        return (result + '0000')[:4]")
+    lines.append("")
+    lines.append("")
+    lines.append("def compress_func(value):")
+    lines.append('    """Informatica COMPRESS equivalent - removes spaces."""')
+    lines.append("    return str(value).replace(' ', '') if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def decompress_func(value):")
+    lines.append('    """Informatica DECOMPRESS equivalent."""')
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def to_timestamp_func(value, fmt=None):")
+    lines.append('    """Informatica TO_TIMESTAMP equivalent."""')
+    lines.append("    return to_date(value, fmt)")
+    lines.append("")
+    lines.append("")
+    lines.append("def cast_func(value, datatype):")
+    lines.append('    """Informatica CAST equivalent."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    dt = str(datatype).upper()")
+    lines.append("    if 'INT' in dt:")
+    lines.append("        return int(float(str(value)))")
+    lines.append("    elif 'FLOAT' in dt or 'DOUBLE' in dt or 'DECIMAL' in dt or 'NUMBER' in dt:")
+    lines.append("        return float(str(value))")
+    lines.append("    elif 'CHAR' in dt or 'STRING' in dt or 'VARCHAR' in dt:")
+    lines.append("        return str(value)")
+    lines.append("    elif 'DATE' in dt or 'TIMESTAMP' in dt:")
+    lines.append("        return to_date(value)")
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def set_date_part(part, date_val, value):")
+    lines.append('    """Informatica SET_DATE_PART equivalent."""')
+    lines.append("    if date_val is None:")
+    lines.append("        return None")
+    lines.append("    if isinstance(date_val, str):")
+    lines.append("        date_val = datetime.fromisoformat(date_val)")
+    lines.append("    p = part.upper()")
+    lines.append("    if p in ('YYYY', 'YY', 'YEAR'):")
+    lines.append("        return date_val.replace(year=int(value))")
+    lines.append("    elif p in ('MM', 'MON', 'MONTH'):")
+    lines.append("        return date_val.replace(month=int(value))")
+    lines.append("    elif p in ('DD', 'DAY'):")
+    lines.append("        return date_val.replace(day=int(value))")
+    lines.append("    elif p in ('HH', 'HH24', 'HOUR'):")
+    lines.append("        return date_val.replace(hour=int(value))")
+    lines.append("    elif p in ('MI', 'MINUTE'):")
+    lines.append("        return date_val.replace(minute=int(value))")
+    lines.append("    elif p in ('SS', 'SECOND'):")
+    lines.append("        return date_val.replace(second=int(value))")
+    lines.append("    return date_val")
+    lines.append("")
+    lines.append("")
+    lines.append("def date_diff(date1, date2, part='DD'):")
+    lines.append('    """Informatica DATE_DIFF equivalent."""')
+    lines.append("    if date1 is None or date2 is None:")
+    lines.append("        return None")
+    lines.append("    if isinstance(date1, str):")
+    lines.append("        date1 = datetime.fromisoformat(date1)")
+    lines.append("    if isinstance(date2, str):")
+    lines.append("        date2 = datetime.fromisoformat(date2)")
+    lines.append("    delta = date1 - date2")
+    lines.append("    p = part.upper()")
+    lines.append("    if p in ('DD', 'DAY', 'D'):")
+    lines.append("        return delta.days")
+    lines.append("    elif p in ('HH', 'HOUR'):")
+    lines.append("        return int(delta.total_seconds() / 3600)")
+    lines.append("    elif p in ('MI', 'MINUTE'):")
+    lines.append("        return int(delta.total_seconds() / 60)")
+    lines.append("    elif p in ('SS', 'SECOND'):")
+    lines.append("        return int(delta.total_seconds())")
+    lines.append("    elif p in ('MM', 'MONTH'):")
+    lines.append("        return (date1.year - date2.year) * 12 + (date1.month - date2.month)")
+    lines.append("    elif p in ('YYYY', 'YEAR'):")
+    lines.append("        return date1.year - date2.year")
+    lines.append("    return delta.days")
+    lines.append("")
+    lines.append("")
+    lines.append("def date_compare(date1, date2):")
+    lines.append('    """Informatica DATE_COMPARE equivalent. Returns -1, 0, or 1."""')
+    lines.append("    if date1 is None and date2 is None:")
+    lines.append("        return 0")
+    lines.append("    if date1 is None:")
+    lines.append("        return -1")
+    lines.append("    if date2 is None:")
+    lines.append("        return 1")
+    lines.append("    if isinstance(date1, str):")
+    lines.append("        date1 = datetime.fromisoformat(date1)")
+    lines.append("    if isinstance(date2, str):")
+    lines.append("        date2 = datetime.fromisoformat(date2)")
+    lines.append("    if date1 < date2:")
+    lines.append("        return -1")
+    lines.append("    elif date1 > date2:")
+    lines.append("        return 1")
+    lines.append("    return 0")
+    lines.append("")
+    lines.append("")
+    lines.append("def last_day(date_val):")
+    lines.append('    """Informatica LAST_DAY equivalent."""')
+    lines.append("    if date_val is None:")
+    lines.append("        return None")
+    lines.append("    if isinstance(date_val, str):")
+    lines.append("        date_val = datetime.fromisoformat(date_val)")
+    lines.append("    import calendar")
+    lines.append("    last = calendar.monthrange(date_val.year, date_val.month)[1]")
+    lines.append("    return date_val.replace(day=last)")
+    lines.append("")
+    lines.append("")
+    lines.append("def make_date_time(year, month, day, hour=0, minute=0, second=0):")
+    lines.append('    """Informatica MAKE_DATE_TIME equivalent."""')
+    lines.append("    return datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))")
+    lines.append("")
+    lines.append("")
+    lines.append("def trunc(value, precision=0):")
+    lines.append('    """Informatica TRUNC equivalent (numeric or date)."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    if hasattr(value, 'replace') and hasattr(value, 'year'):")
+    lines.append("        return value.replace(hour=0, minute=0, second=0, microsecond=0)")
+    lines.append("    import math")
+    lines.append("    factor = 10 ** int(precision)")
+    lines.append("    return math.trunc(float(value) * factor) / factor")
+    lines.append("")
+    lines.append("")
+    lines.append("def round_val(value, precision=0):")
+    lines.append('    """Informatica ROUND equivalent."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    return round(float(value), int(precision))")
+    lines.append("")
+    lines.append("")
+    lines.append("def abs_val(value):")
+    lines.append('    """Informatica ABS equivalent."""')
+    lines.append("    return abs(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def ceil_val(value):")
+    lines.append('    """Informatica CEIL equivalent."""')
+    lines.append("    import math")
+    lines.append("    return math.ceil(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def floor_val(value):")
+    lines.append('    """Informatica FLOOR equivalent."""')
+    lines.append("    import math")
+    lines.append("    return math.floor(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def mod_val(a, b):")
+    lines.append('    """Informatica MOD equivalent."""')
+    lines.append("    if a is None or b is None or float(b) == 0:")
+    lines.append("        return None")
+    lines.append("    return float(a) % float(b)")
+    lines.append("")
+    lines.append("")
+    lines.append("def power_val(base, exp):")
+    lines.append('    """Informatica POWER equivalent."""')
+    lines.append("    if base is None or exp is None:")
+    lines.append("        return None")
+    lines.append("    return float(base) ** float(exp)")
+    lines.append("")
+    lines.append("")
+    lines.append("def sqrt_val(value):")
+    lines.append('    """Informatica SQRT equivalent."""')
+    lines.append("    import math")
+    lines.append("    return math.sqrt(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def log_val(base, value):")
+    lines.append('    """Informatica LOG equivalent."""')
+    lines.append("    import math")
+    lines.append("    if value is None or base is None:")
+    lines.append("        return None")
+    lines.append("    return math.log(float(value), float(base))")
+    lines.append("")
+    lines.append("")
+    lines.append("def ln_val(value):")
+    lines.append('    """Informatica LN (natural log) equivalent."""')
+    lines.append("    import math")
+    lines.append("    return math.log(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def exp_val(value):")
+    lines.append('    """Informatica EXP equivalent."""')
+    lines.append("    import math")
+    lines.append("    return math.exp(float(value)) if value is not None else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def sign_val(value):")
+    lines.append('    """Informatica SIGN equivalent."""')
+    lines.append("    if value is None:")
+    lines.append("        return None")
+    lines.append("    v = float(value)")
+    lines.append("    return 1 if v > 0 else (-1 if v < 0 else 0)")
+    lines.append("")
+    lines.append("")
+    lines.append("def rand_val(seed=None):")
+    lines.append('    """Informatica RAND equivalent."""')
+    lines.append("    import random")
+    lines.append("    if seed is not None:")
+    lines.append("        random.seed(seed)")
+    lines.append("    return random.random()")
+    lines.append("")
+    lines.append("")
+    lines.append("def greatest_val(*args):")
+    lines.append('    """Informatica GREATEST equivalent."""')
+    lines.append("    filtered = [a for a in args if a is not None]")
+    lines.append("    return max(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def least_val(*args):")
+    lines.append('    """Informatica LEAST equivalent."""')
+    lines.append("    filtered = [a for a in args if a is not None]")
+    lines.append("    return min(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def choose_expr(index, *values):")
+    lines.append('    """Informatica CHOOSE equivalent."""')
+    lines.append("    idx = int(index)")
+    lines.append("    if 1 <= idx <= len(values):")
+    lines.append("        return values[idx - 1]")
+    lines.append("    return None")
+    lines.append("")
+    lines.append("")
+    lines.append("def in_expr(value, *candidates):")
+    lines.append('    """Informatica IN equivalent."""')
+    lines.append("    return value in candidates")
+    lines.append("")
+    lines.append("")
+    lines.append("def max_val(*args):")
+    lines.append('    """Informatica MAX equivalent (row-level)."""')
+    lines.append("    filtered = [a for a in args if a is not None]")
+    lines.append("    return max(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def min_val(*args):")
+    lines.append('    """Informatica MIN equivalent (row-level)."""')
+    lines.append("    filtered = [a for a in args if a is not None]")
+    lines.append("    return min(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def sum_val(*args):")
+    lines.append('    """Informatica SUM equivalent (row-level)."""')
+    lines.append("    return sum(float(a) for a in args if a is not None)")
+    lines.append("")
+    lines.append("")
+    lines.append("def count_val(*args):")
+    lines.append('    """Informatica COUNT equivalent (row-level)."""')
+    lines.append("    return sum(1 for a in args if a is not None)")
+    lines.append("")
+    lines.append("")
+    lines.append("def avg_val(*args):")
+    lines.append('    """Informatica AVG equivalent (row-level)."""')
+    lines.append("    filtered = [float(a) for a in args if a is not None]")
+    lines.append("    return sum(filtered) / len(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def median_val(*args):")
+    lines.append('    """Informatica MEDIAN equivalent (row-level)."""')
+    lines.append("    import statistics")
+    lines.append("    filtered = [float(a) for a in args if a is not None]")
+    lines.append("    return statistics.median(filtered) if filtered else None")
+    lines.append("")
+    lines.append("")
+    lines.append("def stddev_val(*args):")
+    lines.append('    """Informatica STDDEV equivalent (row-level)."""')
+    lines.append("    import statistics")
+    lines.append("    filtered = [float(a) for a in args if a is not None]")
+    lines.append("    return statistics.stdev(filtered) if len(filtered) > 1 else 0")
+    lines.append("")
+    lines.append("")
+    lines.append("def variance_val(*args):")
+    lines.append('    """Informatica VARIANCE equivalent (row-level)."""')
+    lines.append("    import statistics")
+    lines.append("    filtered = [float(a) for a in args if a is not None]")
+    lines.append("    return statistics.variance(filtered) if len(filtered) > 1 else 0")
+    lines.append("")
+    lines.append("")
+    lines.append("def percentile_val(value, pct):")
+    lines.append('    """Informatica PERCENTILE equivalent."""')
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def first_val(*args):")
+    lines.append('    """Informatica FIRST equivalent."""')
+    lines.append("    for a in args:")
+    lines.append("        if a is not None:")
+    lines.append("            return a")
+    lines.append("    return None")
+    lines.append("")
+    lines.append("")
+    lines.append("def last_val(*args):")
+    lines.append('    """Informatica LAST equivalent."""')
+    lines.append("    result = None")
+    lines.append("    for a in args:")
+    lines.append("        if a is not None:")
+    lines.append("            result = a")
+    lines.append("    return result")
+    lines.append("")
+    lines.append("")
+    lines.append("def moving_avg(value, window=3):")
+    lines.append('    """Informatica MOVINGAVG equivalent."""')
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def moving_sum(value, window=3):")
+    lines.append('    """Informatica MOVINGSUM equivalent."""')
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def cume(value):")
+    lines.append('    """Informatica CUME equivalent."""')
+    lines.append("    return value")
+    lines.append("")
+    lines.append("")
+    lines.append("def set_count_variable(var_name, value=1):")
+    lines.append('    """Informatica SETCOUNTVARIABLE equivalent."""')
+    lines.append("    return set_variable(var_name, value)")
+    lines.append("")
+    lines.append("")
     lines.append("def raise_error(message):")
     lines.append('    """Informatica ERROR function equivalent."""')
     lines.append("    logger.error(f'INFORMATICA ERROR: {message}')")

{informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/mapping_gen.py RENAMED Viewed

@@ -3,7 +3,11 @@ from informatica_python.models import (
     MappingDef, FolderDef, SourceDef, TargetDef,
     TransformationDef, ConnectorDef, InstanceDef,
 )
-from informatica_python.utils.expression_converter import convert_expression, convert_sql_expression
+from informatica_python.utils.expression_converter import (
+    convert_expression, convert_sql_expression,
+    parse_join_condition, parse_lookup_condition,
+    parse_aggregate_expression, PANDAS_AGG_MAP,
+)
 from informatica_python.utils.datatype_map import get_python_type
@@ -340,19 +344,56 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
     group_by_ports = []
     agg_ports = []
     for fld in tx.fields:
-        if "INPUT" in (fld.porttype or "").upper() and "OUTPUT" in (fld.porttype or "").upper():
+        pt = (fld.porttype or "").upper()
+        if "INPUT" in pt and "OUTPUT" in pt:
             group_by_ports.append(fld.name)
-        elif "OUTPUT" in (fld.porttype or "").upper():
+        elif "OUTPUT" in pt and "INPUT" not in pt:
             agg_ports.append(fld)
-    if group_by_ports:
-        lines.append(f"    df_{tx_safe} = {input_df}.groupby({group_by_ports}).agg(")
-        for ap in agg_ports:
-            expr = ap.expression or ap.name
-            lines.append(f"        # {ap.name}: {expr}")
-        lines.append(f"    ).reset_index()")
+    agg_dict = {}
+    rename_map = {}
+    computed_aggs = []
+    for ap in agg_ports:
+        expr_text = ap.expression or ap.name
+        agg_func, agg_col = parse_aggregate_expression(expr_text)
+        if agg_func and agg_col:
+            pandas_func = PANDAS_AGG_MAP.get(agg_func, agg_func)
+            if agg_col == "*":
+                agg_col = group_by_ports[0] if group_by_ports else ap.name
+                pandas_func = "count"
+            if agg_col in agg_dict:
+                temp_name = f"{agg_col}__{ap.name}"
+                agg_dict[temp_name] = (agg_col, pandas_func)
+                rename_map[temp_name] = ap.name
+            else:
+                agg_dict[ap.name] = (agg_col, pandas_func)
+        else:
+            computed_aggs.append((ap.name, expr_text))
+    if group_by_ports and agg_dict:
+        lines.append(f"    # Aggregator: group by {group_by_ports}")
+        agg_spec = {}
+        for out_name, (col, func) in agg_dict.items():
+            agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
+        lines.append(f"    df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
+        for out_name, spec in agg_spec.items():
+            lines.append(f"        {out_name}={spec},")
+        lines.append(f"    )")
+        if rename_map:
+            lines.append(f"    df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
+    elif group_by_ports:
+        lines.append(f"    # Aggregator: group by {group_by_ports}")
+        lines.append(f"    df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg('first')")
     else:
         lines.append(f"    df_{tx_safe} = {input_df}.copy()")
+    for col_name, expr_text in computed_aggs:
+        expr_py = convert_expression(expr_text)
+        lines.append(f"    # Computed aggregate: {col_name} = {expr_text}")
+        lines.append(f"    df_{tx_safe}['{col_name}'] = {expr_py}")
     source_dfs[tx.name] = f"df_{tx_safe}"
@@ -384,12 +425,47 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
         elif attr.name == "Join Condition":
             join_condition = attr.value
+    master_fields = []
+    detail_fields = []
+    for fld in tx.fields:
+        pt = (fld.porttype or "").upper()
+        if "MASTER" in pt:
+            master_fields.append(fld.name)
+        elif "DETAIL" in pt:
+            detail_fields.append(fld.name)
+    left_keys, right_keys = parse_join_condition(join_condition)
     src_list = list(input_sources)
     if len(src_list) >= 2:
+        df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
+        df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
+        lines.append(f"    # Join ({join_type}): {join_condition or 'auto'}")
+        if left_keys and right_keys:
+            lines.append(f"    df_{tx_safe} = {df_detail}.merge(")
+            lines.append(f"        {df_master},")
+            lines.append(f"        left_on={left_keys},")
+            lines.append(f"        right_on={right_keys},")
+            lines.append(f"        how='{join_type}',")
+            lines.append(f"        suffixes=('', '_master')")
+            lines.append(f"    )")
+        else:
+            common_cols = []
+            if master_fields and detail_fields:
+                common_cols = [f for f in detail_fields if f in master_fields]
+            if common_cols:
+                lines.append(f"    df_{tx_safe} = {df_detail}.merge(")
+                lines.append(f"        {df_master},")
+                lines.append(f"        on={common_cols},")
+                lines.append(f"        how='{join_type}',")
+                lines.append(f"        suffixes=('', '_master')")
+                lines.append(f"    )")
+            else:
+                lines.append(f"    df_{tx_safe} = {df_detail}.merge({df_master}, how='{join_type}', suffixes=('', '_master'))")
+    elif len(src_list) == 1:
         df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
-        df2 = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
-        lines.append(f"    # Join: {join_condition or 'auto'}")
-        lines.append(f"    df_{tx_safe} = {df1}.merge({df2}, how='{join_type}')")
+        lines.append(f"    df_{tx_safe} = {df1}.copy()")
     else:
         lines.append(f"    df_{tx_safe} = {input_df}.copy()")
     source_dfs[tx.name] = f"df_{tx_safe}"
@@ -400,6 +476,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
     lookup_sql = ""
     lookup_condition = ""
     lookup_cache = "YES"
+    lookup_policy = "ERROR"
+    default_values = {}
     for attr in tx.attributes:
         if attr.name == "Lookup table name":
             lookup_table = attr.value
@@ -409,11 +487,19 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
             lookup_condition = attr.value
         elif attr.name == "Lookup caching enabled":
             lookup_cache = attr.value
+        elif attr.name == "Lookup policy on multiple match":
+            lookup_policy = attr.value
     return_fields = [f for f in tx.fields if "RETURN" in (f.porttype or "").upper() or
                      ("LOOKUP" in (f.porttype or "").upper() and "OUTPUT" in (f.porttype or "").upper()
                       and "INPUT" not in (f.porttype or "").upper())]
     input_fields = [f for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
+    lookup_output_fields = [f for f in tx.fields if
+                            "OUTPUT" in (f.porttype or "").upper()
+                            and "INPUT" not in (f.porttype or "").upper()
+                            and "RETURN" not in (f.porttype or "").upper()]
+    all_output_fields = return_fields + lookup_output_fields
     lines.append(f"    # Lookup: {lookup_table or tx.name}")
     if lookup_sql:
@@ -425,19 +511,48 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
     elif lookup_table:
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
     else:
-        lines.append(f"    df_lkp_{tx_safe} = pd.DataFrame()  # TODO: Configure lookup source")
+        lines.append(f"    df_lkp_{tx_safe} = pd.DataFrame()")
-    if lookup_condition:
-        lines.append(f"    # Condition: {lookup_condition}")
+    input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
-    lines.append(f"    df_{tx_safe} = {input_df}.copy()")
+    if input_keys and lookup_keys:
+        lines.append(f"    # Lookup condition: {lookup_condition}")
-    if return_fields:
-        ret_names = [f.name for f in return_fields]
-        lines.append(f"    # Lookup returns: {ret_names}")
-        lines.append(f"    # TODO: Implement lookup merge logic based on condition: {lookup_condition}")
-        for rf in return_fields:
-            lines.append(f"    df_{tx_safe}['{rf.name}'] = None  # From lookup")
+        lkp_cols = [f.name for f in all_output_fields]
+        select_cols = list(set(lookup_keys + lkp_cols))
+        lines.append(f"    lkp_select_cols_{tx_safe} = [c for c in {select_cols} if c in df_lkp_{tx_safe}.columns]")
+        if lookup_policy and "FIRST" in lookup_policy.upper():
+            lines.append(f"    df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
+        elif lookup_policy and "LAST" in lookup_policy.upper():
+            lines.append(f"    df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='last')")
+        else:
+            lines.append(f"    df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
+        lines.append(f"    df_{tx_safe} = {input_df}.merge(")
+        lines.append(f"        df_lkp_{tx_safe},")
+        lines.append(f"        left_on={input_keys},")
+        lines.append(f"        right_on={lookup_keys},")
+        lines.append(f"        how='left',")
+        lines.append(f"        suffixes=('', '_lkp')")
+        lines.append(f"    )")
+        drop_cols = [k for k in lookup_keys if k not in input_keys]
+        if drop_cols:
+            lines.append(f"    lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
+        for rf in all_output_fields:
+            if rf.default_value:
+                lines.append(f"    df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
+    else:
+        lines.append(f"    df_{tx_safe} = {input_df}.copy()")
+        if all_output_fields:
+            ret_names = [f.name for f in all_output_fields]
+            lines.append(f"    # Lookup returns: {ret_names}")
+            lines.append(f"    # Could not auto-parse lookup condition: {lookup_condition}")
+            for rf in all_output_fields:
+                default = repr(rf.default_value) if rf.default_value else "None"
+                lines.append(f"    df_{tx_safe}['{rf.name}'] = {default}")
     source_dfs[tx.name] = f"df_{tx_safe}"

informatica-python 1.1.1__tar.gz → 1.2.0__tar.gz

informatica-python 1.1.1tar.gz → 1.2.0tar.gz