PyPI - informatica-python - Versions diffs - 1.9.2__py3-none-any.whl → 1.9.3__py3-none-any.whl - Mend

informatica-python 1.9.2py3-none-any.whl → 1.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

informatica_python/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Licensed under the MIT License.
 from informatica_python.converter import InformaticaConverter
-__version__ = "1.9.2"
+__version__ = "1.9.3"
 __author__ = "Nick"
 __license__ = "MIT"
 __all__ = ["InformaticaConverter"]

informatica_python/generators/mapping_gen.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from typing import List, Dict
 from informatica_python.models import (
     MappingDef, FolderDef, SourceDef, TargetDef,
@@ -228,7 +229,6 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
     lines.append("import logging")
     lines.append("import numpy as np")
     lines.append("import pandas as pd")
-    lines.append("from datetime import datetime")
     lines.append("from helper_functions import *")
     lines.append("")
     lines.append("logger = logging.getLogger(__name__)")
@@ -375,7 +375,40 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
     lines.append(f"    run_{_safe_name(mapping.name)}(config)")
     lines.append("")
-    return "\n".join(lines)
+    code = "\n".join(lines)
+    func_sig = f"def run_{_safe_name(mapping.name)}(config):"
+    sig_idx = code.index(func_sig) + len(func_sig)
+    docstring_end = code.index('"""', code.index('"""', sig_idx) + 3) + 3
+    before_body = code[:docstring_end]
+    after_docstring = code[docstring_end:]
+    main_sentinel = "\n\nif __name__"
+    body_end_idx = after_docstring.index(main_sentinel)
+    body = after_docstring[:body_end_idx]
+    rest = after_docstring[body_end_idx:]
+    body_lines = body.split("\n")
+    while body_lines and body_lines[0].strip() == "":
+        body_lines.pop(0)
+    while body_lines and body_lines[-1].strip() == "":
+        body_lines.pop()
+    wrapped = []
+    wrapped.append("")
+    wrapped.append("    try:")
+    prev_blank = False
+    for bl in body_lines:
+        if bl.strip() == "":
+            if not prev_blank:
+                wrapped.append("")
+            prev_blank = True
+        else:
+            wrapped.append("    " + bl)
+            prev_blank = False
+    wrapped.append("")
+    wrapped.append("    except Exception as _exc:")
+    wrapped.append(f"        logger.error(f'Mapping {mapping.name} failed: {{_exc}}')")
+    wrapped.append("        raise")
+    wrapped.append("")
+    return before_body + "\n".join(wrapped) + rest
 def _safe_name(name):
@@ -386,6 +419,22 @@ def _safe_name(name):
     return safe.lower()
+def _emit_sql_with_params(lines, sql_var_name, sql_text, indent="    "):
+    import re
+    params = re.findall(r'\$\$(\w+)', sql_text)
+    lines.append(f"{indent}{sql_var_name} = '''")
+    for sql_line in sql_text.strip().split("\n"):
+        lines.append(f"{indent}{sql_line}")
+    lines.append(f"{indent}'''")
+    if params:
+        seen = set()
+        for p in params:
+            if p in seen:
+                continue
+            seen.add(p)
+            lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
 def _flatfile_config_dict(ff):
     cfg = {}
     if not ff:
@@ -504,7 +553,7 @@ def _emit_flatfile_write(lines, var_name, tgt_def, indent="    ", file_path_over
 def _build_source_map(mapping, folder):
     source_map = {}
     for inst in mapping.instances:
-        if inst.type == "Source Definition":
+        if inst.type.upper() in ("SOURCE DEFINITION", "SOURCE"):
             tx_name = inst.transformation_name or inst.name
             for src in folder.sources:
                 if src.name == tx_name:
@@ -518,7 +567,7 @@ def _build_source_map(mapping, folder):
 def _build_target_map(mapping, folder):
     target_map = {}
     for inst in mapping.instances:
-        if inst.type == "Target Definition":
+        if inst.type.upper() in ("TARGET DEFINITION", "TARGET"):
             tx_name = inst.transformation_name or inst.name
             for tgt in folder.targets:
                 if tgt.name == tx_name:
@@ -594,7 +643,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
         if not connected_sources and source_map:
             connected_sources.add(next(iter(source_map)))
+    lines.append(f"    # -------------------------------------------------------------------")
     lines.append(f"    # Source Qualifier: {sq.name}")
+    lines.append(f"    # -------------------------------------------------------------------")
     if pre_sql:
         lines.append(f"    # Pre-SQL")
@@ -606,10 +657,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
     if not connected_sources:
         sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
         if sql_override:
-            lines.append(f"    sql_{sq_safe} = '''")
-            for sql_line in sql_override.strip().split("\n"):
-                lines.append(f"    {sql_line}")
-            lines.append(f"    '''")
+            _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
             lines.append(f"    df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
         else:
             lines.append(f"    df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
@@ -620,10 +668,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
         sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
         conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
-        lines.append(f"    sql_{sq_safe} = '''")
-        for sql_line in sql_override.strip().split("\n"):
-            lines.append(f"    {sql_line}")
-        lines.append(f"    '''")
+        _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
         lines.append(f"    df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
     elif len(connected_sources) == 1:
         src_name = next(iter(connected_sources))
@@ -656,10 +701,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
         lines.append(f"    df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
     source_dfs[sq.name] = f"df_{sq_safe}"
-    lines.append(f"    try:")
-    lines.append(f"        logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
-    lines.append(f"    except Exception:")
-    lines.append(f"        logger.info('Source {sq.name}: rows read (count unavailable)')")
+    lines.append(f"    logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
     if post_sql:
         lines.append(f"    # Post-SQL")
@@ -699,10 +741,10 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     lines.append(f"    # Input fields: {', '.join(in_fields[:10])}{' ...' if len(in_fields) > 10 else ''}")
     lines.append(f"    # Output fields: {', '.join(out_fields[:10])}{' ...' if len(out_fields) > 10 else ''}")
     lines.append(f"    # -------------------------------------------------------------------")
-    lines.append(f"    try:")
-    lines.append(f"        _input_rows_{tx_safe} = len({input_df})")
-    lines.append(f"    except Exception:")
-    lines.append(f"        _input_rows_{tx_safe} = -1")
+    if input_df == "df_input":
+        lines.append(f"    _input_rows_{tx_safe} = -1")
+    else:
+        lines.append(f"    _input_rows_{tx_safe} = len({input_df})")
     if tx_type == "expression":
         _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
@@ -742,28 +784,26 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
         lines.append(f"    df_{tx_safe} = {copy_expr}")
         source_dfs[tx.name] = f"df_{tx_safe}"
-    lines.append(f"    try:")
-    lines.append(f"        _output_rows_{tx_safe} = len(df_{tx_safe})")
-    lines.append(f"    except Exception:")
-    lines.append(f"        _output_rows_{tx_safe} = -1")
+    lines.append(f"    _output_rows_{tx_safe} = len(df_{tx_safe})")
     lines.append(f"    logger.info(f'{tx.name} ({tx.type}): {{_input_rows_{tx_safe}}} input rows -> {{_output_rows_{tx_safe}}} output rows')")
     lines.append("")
 def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
-    copy_expr = lib_copy(data_lib, input_df)
-    lines.append(f"    df_{tx_safe} = {copy_expr}")
-    has_expressions = False
+    active_fields = []
     for fld in tx.fields:
-        if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
-            has_expressions = True
+        if fld.expression and fld.expression.strip() and fld.expression.strip().lower() != fld.name.lower():
+            active_fields.append(fld)
+    if active_fields:
+        copy_expr = lib_copy(data_lib, input_df)
+        lines.append(f"    df_{tx_safe} = {copy_expr}")
+        for fld in active_fields:
             expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
             lines.append(f"    # {fld.name} = {fld.expression}")
-            if fld.porttype and "OUTPUT" in fld.porttype.upper() and "INPUT" not in fld.porttype.upper():
-                lines.append(f"    df_{tx_safe}['{fld.name}'] = {expr_vec}")
-            else:
-                lines.append(f"    df_{tx_safe}['{fld.name}'] = {expr_vec}")
-    if not has_expressions:
+            lines.append(f"    df_{tx_safe}['{fld.name}'] = {expr_vec}")
+    else:
+        lines.append(f"    df_{tx_safe} = {input_df}")
         lines.append(f"    # Pass-through expression (no transformations)")
     source_dfs[tx.name] = f"df_{tx_safe}"
@@ -842,7 +882,11 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
     sort_dirs = []
     for fld in tx.fields:
         sort_keys.append(fld.name)
-        sort_dirs.append(True)
+        direction = 'ASCENDING'
+        for fa in getattr(fld, 'field_attributes', []):
+            if isinstance(fa, dict) and fa.get('name', '').upper() == 'SORTDIRECTION':
+                direction = fa.get('value', 'ASCENDING') or 'ASCENDING'
+        sort_dirs.append(direction.upper() != 'DESCENDING')
     if sort_keys:
         sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
         lines.append(f"    df_{tx_safe} = {sort_expr}")
@@ -881,13 +925,23 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
     master_src = None
     detail_src = None
     input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
+    port_to_col = {}
+    master_fields_lower = {f.lower() for f in master_fields}
+    detail_fields_lower = {f.lower() for f in detail_fields}
     for conn in input_conns:
         to_field = conn.to_field
-        if to_field in master_fields:
+        port_to_col[to_field] = conn.from_field
+        port_to_col[to_field.lower()] = conn.from_field
+        if to_field in master_fields or to_field.lower() in master_fields_lower:
             master_src = conn.from_instance
-        elif to_field in detail_fields:
+        elif to_field in detail_fields or to_field.lower() in detail_fields_lower:
             detail_src = conn.from_instance
+    if left_keys and right_keys and port_to_col:
+        left_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in left_keys]
+        right_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in right_keys]
     src_list = list(input_sources)
     if not master_src and not detail_src and len(src_list) >= 2:
         master_src = src_list[0]
@@ -960,10 +1014,7 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
     lines.append(f"    # Lookup: {lookup_table or tx.name}")
     if lookup_sql:
-        lines.append(f"    lkp_sql_{tx_safe} = '''")
-        for sql_line in lookup_sql.strip().split("\n"):
-            lines.append(f"    {sql_line}")
-        lines.append(f"    '''")
+        _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
     elif lookup_table:
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
@@ -998,7 +1049,11 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
             lines.append(f"    if _lkp_drop:")
             lines.append(f"        df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
+        seen_output_cols = set()
         for rf in all_output_fields:
+            if rf.name in seen_output_cols:
+                continue
+            seen_output_cols.add(rf.name)
             lines.append(f"    if '{rf.name}' not in df_{tx_safe}.columns:")
             lines.append(f"        df_{tx_safe}['{rf.name}'] = None")
             if rf.default_value:
@@ -1073,14 +1128,19 @@ def _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs):
         for dd_const, label in dd_map.items():
             expr = expr.replace(dd_const, f"'{label}'")
         try:
-            converted = convert_expression(expr)
+            expr_vec = convert_expression_vectorized(expr, f"df_{tx_safe}")
             lines.append(f"    # Original expression: {strategy_expr}")
-            lines.append(f"    def _resolve_strategy(row):")
-            lines.append(f"        return {converted}")
-            lines.append(f"    df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
+            lines.append(f"    df_{tx_safe}['_update_strategy'] = {expr_vec}")
         except Exception:
-            lines.append(f"    # Could not parse strategy expression: {strategy_expr}")
-            lines.append(f"    df_{tx_safe}['_update_strategy'] = 'INSERT'")
+            try:
+                converted = convert_expression(expr)
+                lines.append(f"    # Original expression: {strategy_expr}")
+                lines.append(f"    def _resolve_strategy(row):")
+                lines.append(f"        return {converted}")
+                lines.append(f"    df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
+            except Exception:
+                lines.append(f"    # Could not parse strategy expression: {strategy_expr}")
+                lines.append(f"    df_{tx_safe}['_update_strategy'] = 'INSERT'")
     source_dfs[tx.name] = f"df_{tx_safe}"
@@ -1343,7 +1403,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
             sql_query = convert_sql_expression(attr.value)
     lines.append(f"    # SQL Transformation: {tx.name}")
     if sql_query:
-        lines.append(f"    sql_{tx_safe} = '''{sql_query}'''")
+        _emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
         lines.append(f"    df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
     else:
         lines.append(f"    df_{tx_safe} = {input_df}.copy()")
@@ -1371,12 +1431,21 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
     for c in to_conns:
         col_mapping[c.to_field] = c.from_field
+    lines.append(f"    # -------------------------------------------------------------------")
     lines.append(f"    # Write to target: {tgt_def.name}")
+    if tgt_def.database_type:
+        lines.append(f"    # Database type: {tgt_def.database_type}")
+    target_field_names = [f.name for f in tgt_def.fields] if tgt_def.fields else []
+    if target_field_names:
+        lines.append(f"    # Target fields: {', '.join(target_field_names[:10])}{' ...' if len(target_field_names) > 10 else ''}")
+    lines.append(f"    # -------------------------------------------------------------------")
     if col_mapping:
+        lines.append(f"    # Column mapping: source -> target")
         lines.append(f"    target_columns_{tgt_safe} = {col_mapping}")
         lines.append(f"    df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
         target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
         if target_cols:
+            lines.append(f"    # Select only target columns")
             lines.append(f"    available_cols = [c for c in {target_cols} if c in df_target_{tgt_safe}.columns]")
             lines.append(f"    if '_update_strategy' in df_target_{tgt_safe}.columns and '_update_strategy' not in available_cols:")
             lines.append(f"        available_cols.append('_update_strategy')")
@@ -1389,17 +1458,37 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
     tgt_override = (session_overrides or {}).get(tgt_name, {})
     tgt_conn = tgt_override.get("connection_name")
+    _FILE_EXTENSIONS = {".csv", ".dat", ".txt", ".xml", ".json", ".parquet", ".xlsx", ".xls", ".tsv", ".avro"}
+    _is_file_target = bool(
+        tgt_override.get("output_file_directory") or tgt_override.get("output_filename")
+        or tgt_def.flatfile
+        or (tgt_def.database_type and tgt_def.database_type == "Flat File")
+        or os.path.splitext(tgt_def.name)[1].lower() in _FILE_EXTENSIONS
+    )
+    _is_db_target = bool(
+        tgt_def.database_type and tgt_def.database_type != "Flat File"
+    )
     if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
         out_dir = tgt_override.get("output_file_directory", ".")
         out_file = tgt_override.get("output_filename", tgt_def.name)
+        lines.append(f"    # Write to file (session override path)")
         lines.append(f"    _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
         lines.append(f"        os.path.join('{out_dir}', '{out_file}'))")
         if tgt_def.flatfile:
             _emit_flatfile_write(lines, tgt_safe, tgt_def, file_path_override=True)
         else:
             lines.append(f"    write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
-    elif tgt_def.database_type and tgt_def.database_type != "Flat File":
+    elif tgt_def.flatfile:
+        lines.append(f"    # Write to flat file")
+        _emit_flatfile_write(lines, tgt_safe, tgt_def)
+    elif _is_file_target and not _is_db_target:
+        lines.append(f"    # Write to file")
+        lines.append(f"    write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
+        lines.append(f"              config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
+    else:
         conn_label = tgt_conn or "target"
+        lines.append(f"    # Write to database table")
         lines.append(f"    if '_update_strategy' in df_target_{tgt_safe}.columns:")
         key_cols = [f.name for f in tgt_def.fields if getattr(f, 'keytype', 'NOT A KEY') == 'PRIMARY KEY'] or None
         if key_cols:
@@ -1408,15 +1497,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
             lines.append(f"        write_with_update_strategy(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
         lines.append(f"    else:")
         lines.append(f"        write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
-    elif tgt_def.flatfile:
-        _emit_flatfile_write(lines, tgt_safe, tgt_def)
-    else:
-        lines.append(f"    write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
-        lines.append(f"              config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
-    lines.append(f"    try:")
-    lines.append(f"        logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
-    lines.append(f"    except Exception:")
-    lines.append(f"        logger.info('Target {tgt_def.name}: rows written (count unavailable)')")
+    lines.append(f"    logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
+    lines.append("")
 CAST_MAP = {

informatica_python/generators/workflow_gen.py CHANGED Viewed

@@ -179,24 +179,41 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, workl
 def _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets):
     task_safe = _safe_name(task.name)
-    if task.task_type == "Start Task":
+    if task.task_type in ("Start Task", "Start"):
         lines.append(f"    # Start Task: {task.name}")
         lines.append(f"    logger.info('Workflow started')")
         lines.append("")
         return
     if task.task_type == "Session":
-        mapping_name = session_to_mapping.get(task.task_name or task.name, "")
+        session_key = task.task_name or task.name
+        mapping_name = session_to_mapping.get(session_key, "")
         run_func = mapping_name_map.get(mapping_name, None)
+        if not run_func:
+            best_match = None
+            best_len = 0
+            session_lower = session_key.lower()
+            for mname, rfunc in mapping_name_map.items():
+                safe_mname = _safe_name(mname)
+                if session_lower.endswith(safe_mname) and len(safe_mname) > best_len:
+                    best_match = rfunc
+                    best_len = len(safe_mname)
+            if not best_match:
+                for mname, rfunc in mapping_name_map.items():
+                    safe_mname = _safe_name(mname)
+                    if safe_mname in session_lower and len(safe_mname) > best_len:
+                        best_match = rfunc
+                        best_len = len(safe_mname)
+            run_func = best_match
         lines.append(f"    # Session: {task.name}")
         lines.append(f"    try:")
         lines.append(f"        logger.info('Executing session: {task.name}')")
         if run_func:
             lines.append(f"        {run_func}(config)")
         else:
-            lines.append(f"        # TODO: Map session '{task.name}' to corresponding mapping function")
-            lines.append(f"        logger.warning('Session {task.name} has no mapped function')")
+            lines.append(f"        logger.warning('Session {task.name}: no mapped function found — verify mapping linkage')")
         lines.append(f"    except Exception as e:")
         lines.append(f"        logger.error(f'Session {task.name} failed: {{e}}')")

informatica_python/utils/expression_converter.py CHANGED Viewed

@@ -297,12 +297,132 @@ def _resolve_char_arg(arg, df_var):
     return arg
+def _strip_inline_comments(text):
+    result = []
+    i = 0
+    in_string = False
+    str_char = None
+    depth = 0
+    while i < len(text):
+        ch = text[i]
+        if in_string:
+            result.append(ch)
+            if ch == str_char and (i == 0 or text[i - 1] != '\\'):
+                in_string = False
+        elif ch in ("'", '"'):
+            in_string = True
+            str_char = ch
+            result.append(ch)
+        elif ch == '(':
+            depth += 1
+            result.append(ch)
+        elif ch == ')':
+            depth -= 1
+            result.append(ch)
+        elif ch == '-' and i + 1 < len(text) and text[i + 1] == '-' and depth == 0:
+            break
+        else:
+            result.append(ch)
+        i += 1
+    return ''.join(result).strip()
+def _trunc_vec(va):
+    if len(va) == 1:
+        return f'np.trunc({va[0]})'
+    raw2 = va[1].strip()
+    if re.match(r'^-?\d+$', raw2):
+        n = int(raw2)
+        if n == 0:
+            return f'np.trunc({va[0]})'
+        return f'(np.trunc({va[0]} * 10**{n}) / 10**{n})'
+    fmt = raw2.strip("'\"").upper()
+    if fmt in ('DD', 'D'):
+        return f'{va[0]}.dt.floor("D")'
+    elif fmt in ('MM', 'MON', 'MONTH'):
+        return f'{va[0]}.dt.to_period("M").dt.to_timestamp()'
+    elif fmt in ('YY', 'YYYY', 'YEAR'):
+        return f'{va[0]}.dt.to_period("Y").dt.to_timestamp()'
+    elif fmt in ('HH', 'HH24'):
+        return f'{va[0]}.dt.floor("H")'
+    return f'{va[0]}.dt.floor("{fmt}")'
+_VEC_INLINE = {
+    'TO_INTEGER': lambda va: f'pd.to_numeric({va[0]}, errors="coerce").fillna(0).astype(int)',
+    'TO_BIGINT': lambda va: f'pd.to_numeric({va[0]}, errors="coerce").astype("Int64")',
+    'TO_FLOAT': lambda va: f'pd.to_numeric({va[0]}, errors="coerce")',
+    'TO_DECIMAL': lambda va: f'pd.to_numeric({va[0]}, errors="coerce")',
+    'LENGTH': lambda va: f'{va[0]}.str.len()',
+    'ROUND': lambda va: (f'np.round({va[0]}, {va[1]})' if len(va) >= 2 else f'np.round({va[0]})'),
+    'ABS': lambda va: f'np.abs({va[0]})',
+    'CEIL': lambda va: f'np.ceil({va[0]})',
+    'CEILING': lambda va: f'np.ceil({va[0]})',
+    'FLOOR': lambda va: f'np.floor({va[0]})',
+    'MOD': lambda va: (f'({va[0]} % {va[1]})' if len(va) >= 2 else va[0]),
+    'POWER': lambda va: (f'np.power({va[0]}, {va[1]})' if len(va) >= 2 else va[0]),
+    'SQRT': lambda va: f'np.sqrt({va[0]})',
+    'LOG': lambda va: f'np.log10({va[0]})',
+    'LN': lambda va: f'np.log({va[0]})',
+    'EXP': lambda va: f'np.exp({va[0]})',
+    'SIGN': lambda va: f'np.sign({va[0]})',
+    'NVL': lambda va: (f'{va[0]}.fillna({va[1]})' if len(va) >= 2 else va[0]),
+    'ISNULL': lambda va: f'{va[0]}.isna()',
+    'IIF': lambda va: (f'np.where({va[0]}, {va[1]}, {va[2]})' if len(va) >= 3
+                        else (f'np.where({va[0]}, {va[1]}, None)' if len(va) >= 2 else va[0])),
+    'IS_NUMBER': lambda va: f'pd.to_numeric({va[0]}, errors="coerce").notna()',
+    'IS_SPACES': lambda va: f'{va[0]}.str.strip().eq("")',
+    'UPPER': lambda va: f'{va[0]}.str.upper()',
+    'LOWER': lambda va: f'{va[0]}.str.lower()',
+    'TRUNC': _trunc_vec,
+}
+_VEC_FUNC_ORDER = sorted(
+    set(list(_VEC_INLINE.keys()) + list(INFA_FUNC_MAP.keys())),
+    key=lambda x: -len(x),
+)
+def _convert_remaining_funcs(text, df_var):
+    converted = text
+    for fn in _VEC_FUNC_ORDER:
+        safety = 10
+        offset = 0
+        while safety > 0:
+            safety -= 1
+            fr = _find_func_call(converted[offset:], fn)
+            if not fr:
+                break
+            rel_start, rel_end, raw_args = fr
+            abs_start = offset + rel_start
+            abs_end = offset + rel_end
+            if abs_start > 0 and converted[abs_start - 1] == '.':
+                offset = abs_end
+                continue
+            va = [_vec_recursive(a.strip(), df_var) for a in raw_args] if raw_args else []
+            if fn in _VEC_INLINE and va:
+                repl = _VEC_INLINE[fn](va)
+            elif fn in INFA_FUNC_MAP:
+                py_func = INFA_FUNC_MAP[fn]
+                repl = f'{py_func}({", ".join(va)})'
+            else:
+                break
+            converted = converted[:abs_start] + repl + converted[abs_end:]
+            offset = abs_start + len(repl)
+    return converted
 def _vec_recursive(expr, df_var):
     if not expr or not expr.strip():
         return "None"
     cleaned = expr.strip()
+    if '--' in cleaned:
+        cleaned = _strip_inline_comments(cleaned)
+        if not cleaned:
+            return "None"
     if re.match(r'^-?\d+(\.\d+)?$', cleaned):
         return cleaned
@@ -317,6 +437,12 @@ def _vec_recursive(expr, df_var):
         return 'True'
     if upper == 'FALSE':
         return 'False'
+    if upper == 'SYSDATE' or upper == 'SYSTIMESTAMP':
+        return 'pd.Timestamp.now()'
+    if re.match(r'^SYSTIMESTAMP\s*\(\s*\)$', cleaned, re.IGNORECASE):
+        return 'pd.Timestamp.now()'
+    if re.match(r'^SYSDATE\s*\(\s*\)$', cleaned, re.IGNORECASE):
+        return 'pd.Timestamp.now()'
     if re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', cleaned):
         return f'{df_var}["{cleaned}"]'
@@ -338,6 +464,22 @@ def _vec_recursive(expr, df_var):
                 vec_args = ', '.join(_vec_recursive(a, df_var) for a in args)
                 return f'lookup_func("{lkp_name}", {vec_args})'
+    unconnected_lkp = re.match(r'^:(\w+)\.(\w+)\s*\(', cleaned, re.IGNORECASE)
+    if unconnected_lkp:
+        port_group = unconnected_lkp.group(1)
+        lkp_name = unconnected_lkp.group(2)
+        paren_start = cleaned.index('(')
+        paren_end = _find_matching_paren(cleaned, paren_start)
+        if paren_end != -1:
+            rest = cleaned[paren_end + 1:].strip()
+            inner = cleaned[paren_start + 1:paren_end]
+            args = _split_args(inner)
+            vec_args = ', '.join(_vec_recursive(a, df_var) for a in args)
+            result = f'lookup_func("{lkp_name}", {vec_args})'
+            if rest:
+                result = result + ' ' + _vec_recursive(rest, df_var)
+            return result
     iif_result = _find_func_call(cleaned, 'IIF')
     if iif_result and iif_result[0] == 0 and iif_result[1] == len(cleaned):
         _, _, args = iif_result
@@ -346,6 +488,43 @@ def _vec_recursive(expr, df_var):
             true_val = _vec_recursive(args[1], df_var)
             false_val = _vec_recursive(args[2], df_var)
             return f"np.where({cond}, {true_val}, {false_val})"
+        elif len(args) == 2:
+            cond = _vectorize_condition(args[0], df_var)
+            true_val = _vec_recursive(args[1], df_var)
+            return f"np.where({cond}, {true_val}, None)"
+    decode_result = _find_func_call(cleaned, 'DECODE')
+    if decode_result and decode_result[0] == 0 and decode_result[1] == len(cleaned):
+        _, _, args = decode_result
+        if len(args) >= 3:
+            first_arg = args[0].strip().upper()
+            if first_arg == 'TRUE':
+                pairs = args[1:]
+                if len(pairs) % 2 == 1:
+                    default_val = _vec_recursive(pairs[-1], df_var)
+                    pairs = pairs[:-1]
+                else:
+                    default_val = 'None'
+                result = default_val
+                for i in range(len(pairs) - 2, -1, -2):
+                    cond = _vectorize_condition(pairs[i], df_var)
+                    val = _vec_recursive(pairs[i + 1], df_var)
+                    result = f"np.where({cond}, {val}, {result})"
+                return result
+            else:
+                switch_val = _vec_recursive(args[0], df_var)
+                pairs = args[1:]
+                if len(pairs) % 2 == 1:
+                    default_val = _vec_recursive(pairs[-1], df_var)
+                    pairs = pairs[:-1]
+                else:
+                    default_val = 'None'
+                result = default_val
+                for i in range(len(pairs) - 2, -1, -2):
+                    case_val = _vec_recursive(pairs[i], df_var)
+                    then_val = _vec_recursive(pairs[i + 1], df_var)
+                    result = f"np.where({switch_val} == {case_val}, {then_val}, {result})"
+                return result
     nvl_result = _find_func_call(cleaned, 'NVL')
     if nvl_result and nvl_result[0] == 0 and nvl_result[1] == len(cleaned):
@@ -581,6 +760,99 @@ def _vec_recursive(expr, df_var):
                     parts.append(f'{v}.astype(str)')
             return ' + '.join(parts)
+    reverse_result = _find_func_call(cleaned, 'REVERSE')
+    if reverse_result and reverse_result[0] == 0 and reverse_result[1] == len(cleaned):
+        _, _, args = reverse_result
+        if len(args) >= 1:
+            inner_val = _vec_recursive(args[0], df_var)
+            return f'{inner_val}.str[::-1]'
+    is_spaces_result = _find_func_call(cleaned, 'IS_SPACES')
+    if is_spaces_result and is_spaces_result[0] == 0 and is_spaces_result[1] == len(cleaned):
+        _, _, args = is_spaces_result
+        if len(args) >= 1:
+            inner_val = _vec_recursive(args[0], df_var)
+            return f'{inner_val}.str.strip().eq("")'
+    is_number_result = _find_func_call(cleaned, 'IS_NUMBER')
+    if is_number_result and is_number_result[0] == 0 and is_number_result[1] == len(cleaned):
+        _, _, args = is_number_result
+        if len(args) >= 1:
+            inner_val = _vec_recursive(args[0], df_var)
+            return f'pd.to_numeric({inner_val}, errors="coerce").notna()'
+    trunc_result = _find_func_call(cleaned, 'TRUNC')
+    if trunc_result and trunc_result[0] == 0 and trunc_result[1] == len(cleaned):
+        _, _, args = trunc_result
+        if len(args) >= 1:
+            field_val = _vec_recursive(args[0], df_var)
+            if len(args) >= 2:
+                raw_arg2 = args[1].strip().strip("'\"")
+                try:
+                    precision = int(raw_arg2)
+                    if precision == 0:
+                        return f'np.trunc({field_val})'
+                    return f'(np.trunc({field_val} * 10**{precision}) / 10**{precision})'
+                except ValueError:
+                    pass
+                fmt = raw_arg2.upper()
+                if fmt in ('DD', 'D'):
+                    return f'{field_val}.dt.floor("D")'
+                elif fmt in ('MM', 'MON', 'MONTH'):
+                    return f'{field_val}.dt.to_period("M").dt.to_timestamp()'
+                elif fmt in ('YY', 'YYYY', 'YEAR'):
+                    return f'{field_val}.dt.to_period("Y").dt.to_timestamp()'
+                elif fmt in ('HH', 'HH24'):
+                    return f'{field_val}.dt.floor("H")'
+                return f'{field_val}.dt.floor("{fmt}")'
+            return f'np.trunc({field_val})'
+    add_to_date_result = _find_func_call(cleaned, 'ADD_TO_DATE')
+    if add_to_date_result and add_to_date_result[0] == 0 and add_to_date_result[1] == len(cleaned):
+        _, _, args = add_to_date_result
+        if len(args) >= 3:
+            date_val = _vec_recursive(args[0], df_var)
+            part = args[1].strip().strip("'\"").upper()
+            amount = _vec_recursive(args[2], df_var)
+            if part in ('YY', 'YYYY', 'YEAR'):
+                return f'{date_val} + pd.DateOffset(years={amount})'
+            elif part in ('MM', 'MON', 'MONTH'):
+                return f'{date_val} + pd.DateOffset(months={amount})'
+            else:
+                unit_map = {
+                    'DD': 'D', 'DAY': 'D', 'D': 'D', 'DDD': 'D',
+                    'HH': 'h', 'HH24': 'h', 'HOUR': 'h',
+                    'MI': 'min', 'MIN': 'min', 'MINUTE': 'min',
+                    'SS': 's', 'SEC': 's', 'SECOND': 's',
+                }
+                pd_unit = unit_map.get(part, 'D')
+                return f'{date_val} + pd.to_timedelta({amount}, unit="{pd_unit}")'
+    date_diff_result = _find_func_call(cleaned, 'DATE_DIFF')
+    if date_diff_result and date_diff_result[0] == 0 and date_diff_result[1] == len(cleaned):
+        _, _, args = date_diff_result
+        if len(args) >= 3:
+            date1 = _vec_recursive(args[0], df_var)
+            date2 = _vec_recursive(args[1], df_var)
+            part = args[2].strip().strip("'\"").upper()
+            if part in ('DD', 'DAY', 'D', 'DDD'):
+                return f'({date1} - {date2}).dt.days'
+            elif part in ('HH', 'HH24', 'HOUR'):
+                return f'({date1} - {date2}).dt.total_seconds() / 3600'
+            elif part in ('MI', 'MIN', 'MINUTE'):
+                return f'({date1} - {date2}).dt.total_seconds() / 60'
+            elif part in ('SS', 'SEC', 'SECOND'):
+                return f'({date1} - {date2}).dt.total_seconds()'
+            return f'({date1} - {date2}).dt.days'
+    in_result = _find_func_call(cleaned, 'IN')
+    if in_result and in_result[0] == 0 and in_result[1] == len(cleaned):
+        _, _, args = in_result
+        if len(args) >= 2:
+            field_val = _vec_recursive(args[0], df_var)
+            vals = ', '.join(_vec_recursive(a, df_var) for a in args[1:])
+            return f'{field_val}.isin([{vals}])'
     if "||" in cleaned:
         parts = _split_concat_parts(cleaned)
         vec_parts = []
@@ -616,6 +888,8 @@ def _vec_recursive(expr, df_var):
     converted = re.sub(r'\bFALSE\b', 'False', converted, flags=re.IGNORECASE)
     converted = re.sub(r'\bNULL\b', 'None', converted, flags=re.IGNORECASE)
+    converted = _convert_remaining_funcs(converted, df_var)
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
         'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
@@ -629,6 +903,7 @@ def _vec_recursive(expr, df_var):
     converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
     converted = re.sub(r'<>', '!=', converted)
     converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
+    converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
     converted = re.sub(r'\s+', ' ', converted).strip()
@@ -691,6 +966,39 @@ def _vectorize_value(val, df_var="df"):
 def _vectorize_simple(part, df_var):
     c = part.strip()
+    lkp_match = re.search(r':(\w+)\.(\w+)\s*\(', c)
+    if lkp_match:
+        start = lkp_match.start()
+        paren_start = c.index('(', start)
+        paren_end = _find_matching_paren(c, paren_start)
+        if paren_end != -1:
+            before = c[:start].strip()
+            lkp_expr = c[start:paren_end + 1]
+            after = c[paren_end + 1:].strip()
+            vec_lkp = _vec_recursive(lkp_expr, df_var)
+            c = f'{before}{vec_lkp}{after}'.strip()
+    in_result = _find_func_call(c, 'IN')
+    if in_result:
+        start, end, args = in_result
+        if len(args) >= 2:
+            before = c[:start].strip()
+            after = c[end:].strip()
+            vec = _vec_recursive(c[start:end], df_var)
+            c = f'{before}{vec}{after}'.strip()
+            if not before and not after:
+                return c
+    is_spaces_result = _find_func_call(c, 'IS_SPACES')
+    if is_spaces_result:
+        start, end, args = is_spaces_result
+        before = c[:start].strip()
+        after = c[end:].strip()
+        vec = _vec_recursive(c[start:end], df_var)
+        c = f'{before}{vec}{after}'.strip()
+        if not before and not after:
+            return c
     for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
         result = _find_func_call(c, func_name)
         if result:
@@ -701,7 +1009,7 @@ def _vectorize_simple(part, df_var):
             c = f'{before}{vec_inner}{after}'
             break
-    for func_name in ('UPPER', 'LOWER', 'LTRIM', 'RTRIM', 'TRIM', 'SUBSTR', 'INSTR', 'LENGTH', 'INITCAP'):
+    for func_name in ('UPPER', 'LOWER', 'LTRIM', 'RTRIM', 'TRIM', 'SUBSTR', 'INSTR', 'LENGTH', 'INITCAP', 'REVERSE', 'IS_NUMBER'):
         result = _find_func_call(c, func_name)
         if result:
             start, end, _ = result
@@ -710,8 +1018,16 @@ def _vectorize_simple(part, df_var):
             vec_inner = _vec_recursive(c[start:end], df_var)
             c = f'{before}{vec_inner}{after}'
-    c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
-               lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
+    isnull_result = _find_func_call(c, 'ISNULL')
+    if isnull_result:
+        start, end, args = isnull_result
+        before = c[:start]
+        after = c[end:]
+        vec_inner = _vec_recursive(c[start:end], df_var)
+        c = f'{before}{vec_inner}{after}'
+    else:
+        c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
+                   lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
     c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
                lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
     c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
@@ -727,7 +1043,7 @@ def _vectorize_simple(part, df_var):
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
         'str', 'int', 'float', 'isna', 'notna', 'fillna',
-        'get_variable', 'lookup_func',
+        'get_variable', 'lookup_func', 'isin', 'eq',
     }
     c = _substitute_fields(c, df_var, skip_words)

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.2
+Version: 1.9.3
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT
@@ -79,25 +79,26 @@ from informatica_python import InformaticaConverter
 converter = InformaticaConverter()
-# Parse and generate files
-converter.convert_to_files("workflow_export.xml", "output_dir")
+# Parse and generate files to a directory
+converter.convert("workflow_export.xml", output_dir="output_dir")
-# Parse and generate zip
-converter.convert_to_zip("workflow_export.xml", "output.zip")
+# Parse and generate zip archive
+converter.convert("workflow_export.xml", output_zip="output.zip")
-# Parse to structured dict
+# Parse to structured dict (no code generation)
 result = converter.parse_file("workflow_export.xml")
 # Use a different data library
-converter.convert_to_files("workflow_export.xml", "output_dir", data_lib="polars")
+converter = InformaticaConverter(data_lib="polars")
+converter.convert("workflow_export.xml", output_dir="output_dir")
 ```
 ## Generated Output Files
 | File | Description |
 |------|-------------|
-| `helper_functions.py` | Database/file I/O helpers, Informatica expression equivalents (80+ functions), window/analytic functions, stored procedure execution, state persistence |
-| `mapping_{name}.py` | One per mapping, named after the real Informatica mapping name — transformation logic with row-count logging, source reads, target writes, inline documentation |
+| `helper_functions.py` | Database/file I/O helpers, 90+ Informatica expression equivalents, window/analytic functions, stored procedure execution, state persistence |
+| `mapping_{name}.py` | One per mapping, named after the real Informatica mapping name — transformation logic with vectorized expressions, row-count logging, type casting, inline documentation |
 | `workflow.py` | Task orchestration with topological ordering, decision branching, worklet calls, and error handling |
 | `config.yml` | Connection configs, source/target metadata, runtime parameters |
 | `all_sql_queries.sql` | All SQL extracted from Source Qualifiers, Lookups, SQL transforms (with ANSI-translated variants) |
@@ -119,23 +120,22 @@ Select via `--data-lib` CLI flag or `data_lib` parameter:
 The code generator produces real, runnable Python for these transformation types:
-- **Source Qualifier** — SQL override, pre/post SQL, column selection, session connection overrides
-- **Expression** — Field-level expressions converted to vectorized pandas operations (`df["COL"]` style)
+- **Source Qualifier** — SQL override, pre/post SQL, column selection, session connection overrides, `$$PARAM` substitution in SQL
+- **Expression** — Field-level expressions converted to vectorized pandas operations (`df["COL"]` style) with 40+ vectorized function handlers
 - **Filter** — Row filtering with vectorized converted conditions
 - **Joiner** — `pd.merge()` with join type and condition parsing (inner/left/right/outer)
-- **Lookup** — `pd.merge()` lookups with connection-aware DB/file reads, multiple match policies, default values
+- **Lookup** — `pd.merge()` lookups with connection-aware DB reads, multiple match policies, default values, `$$PARAM` substitution
 - **Aggregator** — `groupby().agg()` with SUM/COUNT/AVG/MIN/MAX/FIRST/LAST, computed aggregates
-- **Sorter** — `sort_values()` with multi-key ascending/descending
+- **Sorter** — `sort_values()` with multi-key ascending/descending per-field direction from SORTDIRECTION attribute
 - **Router** — Multi-group conditional routing with named groups
 - **Union** — `pd.concat()` across multiple input groups
-- **Update Strategy** — DD_INSERT/DD_UPDATE/DD_DELETE/DD_REJECT routing with actual target INSERT/UPDATE/DELETE operations, dialect-aware SQL placeholders, auto-detected primary keys
+- **Update Strategy** — DD_INSERT/DD_UPDATE/DD_DELETE/DD_REJECT routing with actual target INSERT/UPDATE/DELETE operations, dialect-aware SQL placeholders, auto-detected primary keys; vectorized expression parsing with row-level fallback
 - **Sequence Generator** — Auto-incrementing ID columns
 - **Normalizer** — `pd.melt()` with auto-detected id/value vars
 - **Rank** — `groupby().rank()` with Top-N filtering
 - **Stored Procedure** — Full code generation with Oracle/MSSQL/generic support, input/output parameter mapping
-- **Transaction Control** — Commit/rollback logic
 - **Custom / Java** — Placeholder stubs with TODO markers
-- **SQL Transform** — Direct SQL execution pass-through
+- **SQL Transform** — Direct SQL execution pass-through with `$$PARAM` substitution
 ## Supported XML Tags (72 Tags)
@@ -153,6 +153,86 @@ The code generator produces real, runnable Python for these transformation types
 ## Key Features
+### Generated Code Quality (v1.9.3+)
+Generated code follows clean formatting and commenting standards:
+- Consistent section headers (`# ---`) for Source Qualifiers, Transformations, and Target Writes
+- Each section includes metadata: database type, field lists, descriptions
+- Column mapping comments (`# Column mapping: source -> target`) and write operation type comments (`# Write to database table` / `# Write to file`)
+- Expression inline comments showing original Informatica expression (e.g., `# FULL_NAME = UPPER(FIRST_NAME) || ' ' || UPPER(LAST_NAME)`)
+- Clean indentation: no blank line after `try:`, no consecutive blank lines inside function body
+- Mapping-level `try:/except` wrapper with `logger.error()` for runtime visibility
+### Smart Target Write Detection (v1.9.3+)
+Targets are automatically classified as database or file writes:
+- Targets with `database_type` set (Oracle, SQL Server, etc.) generate `write_to_db()` calls
+- Targets with flatfile metadata or file extensions (`.csv`, `.dat`, `.txt`, `.xml`, `.json`, `.parquet`, `.xlsx`, `.xls`, `.tsv`, `.avro`) generate `write_file()` calls
+- Bare targets (no metadata) default to `write_to_db()` since Informatica targets are typically database tables
+- Schema-qualified names (e.g., `dbo.MY_TABLE`) correctly route to database writes
+- Session file path overrides take priority when present
+### Vectorized Expression Engine (v1.9.2+)
+Column-level pandas operations instead of row-level iteration. The expression converter uses a recursive parenthesis-aware parser that handles:
+**Conditional / Null:**
+- `IIF(cond, val, else_val)` → `np.where()` — supports 2-arg form (missing else defaults to `None`)
+- `DECODE(TRUE, cond1, val1, ..., default)` → nested `np.where()` chains
+- `DECODE(field, val1, res1, ..., default)` → value-matching `np.where()`
+- `NVL(val, default)` → `.fillna()`
+- `IS_SPACES(field)` → `field.str.strip().eq("")`
+- `IS_NUMBER(field)` → `pd.to_numeric(field, errors="coerce").notna()`
+- `IN(field, val1, val2, ...)` → `field.isin([...])`
+**String:**
+- `UPPER/LOWER` → `.str.upper()/.str.lower()`
+- `LTRIM/RTRIM/TRIM` → `.str.lstrip()/.str.rstrip()/.str.strip()` with custom char support
+- `SUBSTR(val, start, len)` → `.str[start:end]`
+- `INSTR(val, search)` → `.str.find()`
+- `LPAD/RPAD` → `.str.pad()`
+- `REVERSE(val)` → `.str[::-1]`
+- `INITCAP(val)` → `.str.title()`
+- `REPLACECHR/REPLACESTR` → `.str.replace()`
+- `REG_EXTRACT/REG_REPLACE` → `.str.extract()/.str.replace(regex=True)`
+- `CHR(code)` → `chr(int(code))`
+- `||` concatenation → `+` with `.astype(str)` on non-literals
+**Date/Time:**
+- `TO_DATE(val, fmt)` → `pd.to_datetime()` with Informatica→Python format conversion
+- `TO_CHAR(val, fmt)` → `.dt.strftime()`
+- `ADD_TO_DATE(date, part, amount)` → `date + pd.to_timedelta()` with full unit mapping (YY/MM/DD/HH/MI/SS)
+- `DATE_DIFF(date1, date2, part)` → `(date1 - date2).dt.days` / `.dt.total_seconds() / 3600` etc.
+- `SYSDATE/SYSTIMESTAMP` → `pd.Timestamp.now()`
+- `TRUNC(date, 'DD')` → date truncation via `.dt.floor()/.dt.to_period()`
+- `MAKE_DATE_TIME(y, m, d, h, mi, s)` → `pd.Timestamp()`
+**Numeric:**
+- `TO_INTEGER/TO_BIGINT/TO_FLOAT/TO_DECIMAL` → `pd.to_numeric()`
+- `TRUNC(val)` → `np.trunc()` for numeric truncation
+- `ROUND/ABS/CEIL/FLOOR/POWER/SQRT/MOD/LOG/SIGN` → `np.*` equivalents
+**Special:**
+- `:LKP.TABLE(args)` — Connected lookup references → `df_lkp_table` merge
+- `:PORT.FUNC(args)` — Unconnected lookups → `lookup_func("FUNC", args)` calls
+- Inline `--` comment stripping (respects string literals)
+- String-literal-aware field substitution
+### Expression Converter (90+ Row-Level Functions)
+All Informatica expression functions are available as row-level Python equivalents in `helper_functions.py`:
+- **String:** `substr`, `ltrim`, `rtrim`, `upper`, `lower`, `lpad`, `rpad`, `instr`, `length`, `concat`, `replacechr`, `replacestr`, `reg_extract`, `reg_replace`, `reg_match`, `reverse_str`, `initcap`, `chr_func`, `ascii_func`, `left_str`, `right_str`, `trim_func`, `indexof`, `metaphone_func`, `soundex_func`, `compress_func`, `decompress_func`
+- **Date:** `add_to_date`, `date_diff`, `date_compare`, `get_date_part`, `set_date_part`, `last_day`, `make_date_time`, `to_date`, `to_char`, `to_timestamp_func`, `current_timestamp`, `session_start_time`
+- **Numeric:** `round_val`, `trunc`, `mod_val`, `abs_val`, `ceil_val`, `floor_val`, `power_val`, `sqrt_val`, `log_val`, `ln_val`, `exp_val`, `sign_val`, `rand_val`, `greatest_val`, `least_val`
+- **Conversion:** `to_integer`, `to_bigint`, `to_float`, `to_decimal`, `cast_func`
+- **Null/Conditional:** `iif_expr`, `decode_expr`, `nvl`, `nvl2`, `isnull`, `is_spaces`, `is_number`, `is_date`, `in_expr`, `choose_expr`
+- **Aggregate:** `sum_val`, `avg_val`, `count_val`, `min_val`, `max_val`, `first_val`, `last_val`, `median_val`, `stddev_val`, `variance_val`, `percentile_val`
+- **Window/Analytic:** `moving_avg`, `moving_avg_df`, `moving_sum`, `moving_sum_df`, `cume`, `cume_df`, `percentile_df`
+- **Lookup:** `lookup_func` — Placeholder for runtime lookup resolution
+- **Variable:** `get_variable`, `set_variable`, `set_count_variable`
+- **Control:** `raise_error`, `abort_func`
 ### Row-Count Logging (v1.8+)
 Generated code automatically logs row counts at every step of the data pipeline:
@@ -165,8 +245,6 @@ AGG_TOTALS (Aggregator): 8542 input rows -> 150 output rows
 Target TGT_SUMMARY: 150 rows written
 ```
-All row-count operations are backend-safe (wrapped in try/except), so Dask and other lazy-evaluation backends won't fail.
 ### Generated Code Documentation (v1.8+)
 Every generated mapping function includes a rich docstring describing:
@@ -179,14 +257,6 @@ Each transformation block is annotated with:
 - Transform type and description (from Informatica XML)
 - Input and output field lists (truncated at 10 for readability)
-### Window / Analytic Functions (v1.7+)
-DataFrame-level analytic functions for aggregation transforms:
-- `moving_avg_df(df, col, window)` — rolling mean via `.rolling().mean()`
-- `moving_sum_df(df, col, window)` — rolling sum via `.rolling().sum()`
-- `cume_df(df, col)` — cumulative sum via `.expanding().sum()`
-- `percentile_df(df, col, pct)` — quantile via `.quantile()`
 ### Update Strategy with Target Operations (v1.7+)
 Update Strategy transforms now generate real INSERT/UPDATE/DELETE operations:
@@ -196,6 +266,14 @@ Update Strategy transforms now generate real INSERT/UPDATE/DELETE operations:
 - Dialect-aware SQL placeholders (`?` for MSSQL, `%s` for PostgreSQL/Oracle)
 - Primary key columns auto-detected from target field definitions
+### Window / Analytic Functions (v1.7+)
+DataFrame-level analytic functions for aggregation transforms:
+- `moving_avg_df(df, col, window)` — rolling mean via `.rolling().mean()`
+- `moving_sum_df(df, col, window)` — rolling sum via `.rolling().sum()`
+- `cume_df(df, col)` — cumulative sum via `.expanding().sum()`
+- `percentile_df(df, col, pct)` — quantile via `.quantile()`
 ### Stored Procedure Execution (v1.7+)
 Full stored procedure code generation (not just stubs):
@@ -241,19 +319,13 @@ Optional `--validate-casts` flag generates null-count checks before/after type c
 - Logs warnings when coercion introduces new nulls
 - Helps identify data quality issues during test runs
-### Vectorized Expression Generation (v1.5+)
-Column-level pandas operations instead of row-level iteration:
-- IIF → `np.where()`, NVL → `.fillna()`, UPPER/LOWER → `.str.upper()/.str.lower()`
-- SUBSTR → `.str[start:end]`, TO_INTEGER → `pd.to_numeric()`, TO_DATE → `pd.to_datetime()`
-- IS NULL/IS NOT NULL → `.isna()`/`.notna()`
 ### Parameter File Support (v1.5+)
 Standard Informatica `.param` file parsing:
 - `[Global]` and `[folder.WF:workflow.ST:session]` section support
 - `get_param(config, var_name)` resolution chain: config → env vars → defaults
 - CLI `--param-file` flag for specifying parameter files
+- `$$PARAM` variables in SQL automatically substituted with `.replace()` calls
 ### Session Connection Overrides (v1.4+)
@@ -283,18 +355,49 @@ Expands Mapplet instances into prefixed transforms, rewires connectors, and elim
 Converts Informatica decision conditions to Python if/else branches with proper variable substitution.
-### Expression Converter (80+ Functions)
-Converts Informatica expressions to Python equivalents:
-- **String:** SUBSTR, LTRIM, RTRIM, UPPER, LOWER, LPAD, RPAD, INSTR, LENGTH, CONCAT, REPLACE, REG_EXTRACT, REG_REPLACE, REVERSE, INITCAP, CHR, ASCII
-- **Date:** ADD_TO_DATE, DATE_DIFF, GET_DATE_PART, SYSDATE, SYSTIMESTAMP, TO_DATE, TO_CHAR, TRUNC (date)
-- **Numeric:** ROUND, TRUNC, MOD, ABS, CEIL, FLOOR, POWER, SQRT, LOG, EXP, SIGN
-- **Conversion:** TO_INTEGER, TO_BIGINT, TO_FLOAT, TO_DECIMAL, TO_CHAR, TO_DATE
-- **Null handling:** IIF, DECODE, NVL, NVL2, ISNULL, IS_SPACES, IS_NUMBER
-- **Aggregate:** SUM, AVG, COUNT, MIN, MAX, FIRST, LAST, MEDIAN, STDDEV, VARIANCE
-- **Lookup:** :LKP expressions with dynamic lookup references
-- **Variable:** SETVARIABLE / mapping variable assignment
+## Helper Functions Library
+The generated `helper_functions.py` provides a complete runtime library:
+### Configuration & Parameters
+| Function | Description |
+|----------|-------------|
+| `load_config(path, param_file)` | Load YAML config with optional `.param` file merge |
+| `parse_param_file(path)` | Parse Informatica `.param` files (`[Global]`, `[folder.WF:...]` sections) |
+| `get_param(config, var_name, default)` | Resolve parameter: config → env vars → default |
+| `get_variable(var_name, config)` | Get workflow/mapping variable from params, env vars, or param store |
+| `set_variable(var_name, value)` | Set workflow/mapping variable in param store and env |
+### Database Operations
+| Function | Description |
+|----------|-------------|
+| `get_db_connection(config, conn_name)` | Create DB connection (pyodbc/pymssql/sqlalchemy fallback for MSSQL) |
+| `read_from_db(config, query, conn_name)` | Execute SQL query and return DataFrame |
+| `write_to_db(config, df, table, conn_name)` | Write DataFrame to database table via `.to_sql()` |
+| `execute_sql(config, sql, conn_name)` | Execute DDL/DML statement (INSERT, UPDATE, DELETE) |
+| `write_with_update_strategy(config, df, table, ...)` | Split rows by `_update_strategy` column into INSERT/UPDATE/DELETE/REJECT operations |
+| `call_stored_procedure(config, proc, params, ...)` | Execute stored procedure with input/output parameter mapping (Oracle/MSSQL/generic) |
+### File Operations
+| Function | Description |
+|----------|-------------|
+| `read_file(path, file_config)` | Read CSV/DAT/TXT/XML/XLSX/JSON/Parquet with auto-detection |
+| `write_file(df, path, file_config)` | Write DataFrame to file with format auto-detection |
+### State Persistence
+| Function | Description |
+|----------|-------------|
+| `load_persistent_state(file)` | Load JSON state file for persistent variables |
+| `save_persistent_state(file)` | Save persistent variables to JSON state file |
+| `get_persistent_variable(scope, var, default)` | Get scoped persistent variable |
+| `set_persistent_variable(scope, var, value)` | Set scoped persistent variable |
+### Logging & Monitoring
+| Function | Description |
+|----------|-------------|
+| `log_mapping_start(name)` | Log mapping start with timestamp |
+| `log_mapping_end(name, start_time, row_count)` | Log mapping completion with elapsed time |
+| `validate_row_count(df, name, min_rows)` | Validate minimum row count threshold |
 ## Requirements
@@ -304,7 +407,32 @@ Converts Informatica expressions to Python equivalents:
 ## Changelog
-### v1.9.x (Phase 8)
+### v1.9.3 (Current)
+- **Smart target write detection**: Bare targets default to `write_to_db()` instead of `write_file()`; file extension allowlist (`.csv`, `.dat`, `.txt`, `.xml`, `.json`, `.parquet`, `.xlsx`, `.xls`, `.tsv`, `.avro`) for file targets; schema-qualified names (`dbo.TABLE`) correctly route to database
+- **DECODE vectorization**: `DECODE(TRUE, cond1, val1, ..., default)` → nested `np.where()` chains; value-matching DECODE; handles IN() conditions and complex boolean nesting
+- **IS_SPACES vectorization**: `IS_SPACES(field)` → `field.str.strip().eq("")`
+- **2-arg IIF**: `IIF(cond, val)` without else clause defaults to `None`
+- **REVERSE vectorization**: `REVERSE(field)` → `field.str[::-1]`
+- **IN() vectorization**: `IN(field, val1, val2, ...)` → `field.isin([...])`
+- **IS_NUMBER vectorization**: `IS_NUMBER(field)` → `pd.to_numeric(field, errors="coerce").notna()`
+- **SYSDATE/SYSTIMESTAMP**: Bare `SYSDATE`/`SYSTIMESTAMP` → `pd.Timestamp.now()` in vectorized mode
+- **TRUNC vectorization**: Numeric `TRUNC(field)` → `np.trunc()`; date `TRUNC(field, 'DD')` → `.dt.floor()`
+- **ADD_TO_DATE vectorization**: `ADD_TO_DATE(date, part, amount)` → `pd.to_timedelta()` with YY/MM/DD/HH/MI/SS units
+- **DATE_DIFF vectorization**: `DATE_DIFF(date1, date2, part)` → arithmetic on timedelta components
+- **Unconnected lookup support**: `:PORT.FUNC_NAME(args)` → `lookup_func("FUNC_NAME", args)`
+- **Inline comment stripping**: `--` comments removed from expressions (respects string literals)
+- **`$$PARAM` SQL substitution**: Source Qualifier, Lookup, and SQL Transform SQL strings auto-substitute `$$VAR` with `get_param(config, 'VAR')` calls
+- **Sorter direction**: Reads `SORTDIRECTION` from field attributes, generates per-field `ascending=[True, False, ...]`
+- **Pass-through optimization**: Identity expressions skip `.copy()` and use direct reference
+- **Duplicate lookup deduplication**: `_gen_lookup_transform` uses `seen_output_cols` set to avoid duplicate column checks
+- **Mapping-level error handling**: Generated function body wrapped in `try:/except` with `logger.error()`
+- **Update strategy vectorized**: Tries vectorized expression first, falls back to row-level `apply()`
+- **Generated code formatting**: Consistent `# ---` section headers for Source Qualifiers, Transforms, and Target Writes; metadata comments (database type, field lists); column mapping and write operation comments; clean blank line handling
+- **Source/target detection**: Case-insensitive instance type matching
+- **Session→mapping inference**: Longest-suffix-match strategy for ambiguous mapping names
+- **646 tests** across unit, integration, expression, and formatting test suites
+### v1.9.2 (Phase 8)
 - Mapping output files now use real mapping names (e.g., `mapping_m_customer_load.py`) instead of generic numeric indices (`mapping_1.py`)
 - Workflow imports automatically match the named mapping files
 - **Expression converter rewrite**: Recursive parenthesis-aware parser replacing simple regex; fixes nested IIF/INSTR/LTRIM/RTRIM/REPLACECHR/REPLACESTR/SUBSTR/TO_CHAR/CHR/MAKE_DATE_TIME
@@ -367,7 +495,7 @@ Converts Informatica expressions to Python equivalents:
 cd informatica_python
 pip install -e ".[dev]"
-# Run tests (136 tests)
+# Run tests (646 tests)
 pytest tests/ -v
 ```

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-informatica_python/__init__.py,sha256=CeP4ZnqSH1TDyk6qec59cSv3YGDiZY5kdKWPzI5J_SM,337
+informatica_python/__init__.py,sha256=o9kEVkHnEwXAD7hhY8YbN6G8RP4Mqby_q8CpjfbiknQ,337
 informatica_python/cli.py,sha256=gFwg0O99vKM-OLO0HoHA4emd-6qrgjMNqa9T59e4e_s,2905
 informatica_python/converter.py,sha256=xCuWrYzDji0yN72D3QqOgZCVVM2j3k2_CvlGplCWxLU,22779
 informatica_python/models.py,sha256=G_C2WfQL-ykKjNj23m8vKFtLZYrQozp99HJzrLTKG1Y,17293
@@ -7,17 +7,17 @@ informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
 informatica_python/generators/error_log_gen.py,sha256=2cc0rEcblydHkb9VAMXlrH7WdSQ-CNqAXcwVk3FYZeM,21319
 informatica_python/generators/helper_gen.py,sha256=D6-UqNh09Qy2V7RimNgP-SzK_uB9YqAlsa0-cgLhf5o,72209
-informatica_python/generators/mapping_gen.py,sha256=9hWyvdx_qDG3kwUVlrIYdm9uHFQk_IWe8kpVJdJLbrU,66929
+informatica_python/generators/mapping_gen.py,sha256=gBVArcb8uODbgY3epdsldCbUywS-qo8CiKr7hcNjMnc,70654
 informatica_python/generators/sql_gen.py,sha256=O8Y-aJz9EyFJ0DXeuISRt5yKwC3wlp2K3B0BHrmxrXw,4872
-informatica_python/generators/workflow_gen.py,sha256=AKAyJjqRrcMYfuRs3zMf5UcQsH_vU3p-b1O3Y2C1Kp4,17358
+informatica_python/generators/workflow_gen.py,sha256=_uSlBg31ZRMhMlCYk4hWDRBPaBROrepD8_v3QGEWJxE,18089
 informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
-informatica_python/utils/expression_converter.py,sha256=qouWnHBxE-QyaHR_W93TCOutmQNyFodTFgMw-xvBoG8,32658
+informatica_python/utils/expression_converter.py,sha256=CqkkTESMKxcYmVsDpNfn7VcZZe771uCIMy_0YQYq6pc,45946
 informatica_python/utils/lib_adapters.py,sha256=1ZtuMbgDg9Ukf-OF_EG1L_BeeR-6JQk8Kx3WwMfvNRU,6516
 informatica_python/utils/sql_dialect.py,sha256=_IHJbfu8a3mT_OvHpybgSfZKqz6mwVy5ItTKDRChqnU,5461
-informatica_python-1.9.2.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
-informatica_python-1.9.2.dist-info/METADATA,sha256=x2cwuI9gBv3DiJCFELJADlt72McEmIYm-hGIZC6fdR8,16638
-informatica_python-1.9.2.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
-informatica_python-1.9.2.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
-informatica_python-1.9.2.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
-informatica_python-1.9.2.dist-info/RECORD,,
+informatica_python-1.9.3.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
+informatica_python-1.9.3.dist-info/METADATA,sha256=VbfZWdzKE382RnkR7F2rs7PNL397g3PfglvugN4XVTw,26097
+informatica_python-1.9.3.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
+informatica_python-1.9.3.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
+informatica_python-1.9.3.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
+informatica_python-1.9.3.dist-info/RECORD,,

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{informatica_python-1.9.2.dist-info → informatica_python-1.9.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

informatica-python 1.9.2__py3-none-any.whl → 1.9.3__py3-none-any.whl

informatica-python 1.9.2py3-none-any.whl → 1.9.3py3-none-any.whl