PyPI - informatica-python - Versions diffs - 1.9.5__tar.gz → 1.9.7__tar.gz - Mend

informatica-python 1.9.5tar.gz → 1.9.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{informatica_python-1.9.5 → informatica_python-1.9.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.5
+Version: 1.9.7
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ Licensed under the MIT License.
 from informatica_python.converter import InformaticaConverter
-__version__ = "1.9.5"
+__version__ = "1.9.6"
 __author__ = "Nick"
 __license__ = "MIT"
 __all__ = ["InformaticaConverter"]

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/helper_gen.py RENAMED Viewed

@@ -360,7 +360,7 @@ def _add_db_functions(lines, data_lib):
     lines.append('    """Execute a SQL statement (INSERT, UPDATE, DELETE, DDL)."""')
     lines.append("    conn = get_db_connection(config, connection_name)")
     lines.append("    try:")
-    lines.append("        if hasattr(conn, 'execute'):")
+    lines.append("        if hasattr(conn, 'dialect'):")
     lines.append("            from sqlalchemy import text")
     lines.append("            conn.execute(text(sql))")
     lines.append("            conn.commit()")

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/generators/mapping_gen.py RENAMED Viewed

@@ -316,7 +316,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
                      if t.type in ("Source Qualifier", "Application Source Qualifier")]
     if sq_transforms:
         for sq in sq_transforms:
-            _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
+            _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides, mapping_name=mapping.name, folder_name=folder.name)
     else:
         for src_name, src_def in source_map.items():
             safe = _safe_name(src_name)
@@ -347,7 +347,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
     for tx in processing_order:
         if tx.type in ("Source Qualifier", "Application Source Qualifier"):
             continue
-        _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
+        _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib, mapping_name=mapping.name, folder_name=folder.name)
     for tgt_name, tgt_def in target_map.items():
         _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
@@ -481,7 +481,12 @@ def _emit_flatfile_read(lines, var_name, src_def, indent="    ", file_path_overr
     if fc.get("fixed_width"):
         widths = []
         for fld in src_def.fields:
-            widths.append(fld.precision if fld.precision else 10)
+            if fld.physical_length and fld.physical_length > 0:
+                widths.append(fld.physical_length)
+            elif fld.precision:
+                widths.append(fld.precision)
+            else:
+                widths.append(10)
         lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
         lines.append(f"{indent}    {default_path},")
         lines.append(f"{indent}    widths={widths},")
@@ -626,7 +631,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
     return ordered
-def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
+def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None, mapping_name="", folder_name=""):
     sq_safe = _safe_name(sq.name)
     sql_override = ""
     pre_sql = ""
@@ -665,7 +670,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
     if not connected_sources:
         sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
         if sql_override:
-            _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
+            _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
             lines.append(f"    df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
         else:
             lines.append(f"    df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
@@ -676,7 +681,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
         sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
         conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
-        _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
+        _emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override, mapping_name=mapping_name, folder_name=folder_name)
         lines.append(f"    df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
     elif len(connected_sources) == 1:
         src_name = next(iter(connected_sources))
@@ -718,7 +723,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
     lines.append("")
-def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas"):
+def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib="pandas", mapping_name="", folder_name=""):
     tx_safe = _safe_name(tx.name)
     tx_type = tx.type.lower().strip()
@@ -765,7 +770,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     elif tx_type in ("joiner",):
         _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
     elif tx_type in ("lookup procedure", "lookup"):
-        _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
+        _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib, mapping_name=mapping_name, folder_name=folder_name)
     elif tx_type == "router":
         _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
     elif tx_type in ("union",):
@@ -785,7 +790,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     elif tx_type in ("java",):
         _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs)
     elif tx_type in ("sql",):
-        _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
+        _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name=mapping_name, folder_name=folder_name)
     else:
         lines.append(f"    # TODO: Unsupported transformation type '{tx.type}' - passing through")
         copy_expr = lib_copy(data_lib, input_df)
@@ -990,7 +995,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
     source_dfs[tx.name] = f"df_{tx_safe}"
-def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
+def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas", mapping_name="", folder_name=""):
     lookup_table = ""
     lookup_sql = ""
     lookup_condition = ""
@@ -1027,7 +1032,7 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_gr
     lines.append(f"    # Lookup: {lookup_table or tx.name}")
     if lookup_sql:
-        _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
+        _emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql, mapping_name=mapping_name, folder_name=folder_name)
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
     elif lookup_table:
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
@@ -1423,14 +1428,14 @@ def _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs):
     source_dfs[tx.name] = f"df_{tx_safe}"
-def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
+def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs, mapping_name="", folder_name=""):
     sql_query = ""
     for attr in tx.attributes:
         if attr.name == "Sql Query" and attr.value:
             sql_query = convert_sql_expression(attr.value)
     lines.append(f"    # SQL Transformation: {tx.name}")
     if sql_query:
-        _emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
+        _emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query, mapping_name=mapping_name, folder_name=folder_name)
         lines.append(f"    df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
     else:
         lines.append(f"    df_{tx_safe} = {input_df}.copy()")

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/models.py RENAMED Viewed

@@ -18,6 +18,9 @@ class FieldDef:
     field_number: int = 0
     hidden: str = "NO"
     business_name: str = ""
+    offset: int = 0
+    physical_offset: int = 0
+    physical_length: int = 0
     field_attributes: List[Dict[str, str]] = field(default_factory=list)

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/parser.py RENAMED Viewed

@@ -417,6 +417,9 @@ class InformaticaParser:
             hidden=self._attr(elem, "HIDDEN", "NO"),
             business_name=self._attr(elem, "BUSINESSNAME"),
             description=self._attr(elem, "DESCRIPTION"),
+            offset=self._int_attr(elem, "OFFSET"),
+            physical_offset=self._int_attr(elem, "PHYSICALOFFSET"),
+            physical_length=self._int_attr(elem, "PHYSICALLENGTH"),
         )
         for fa in elem.findall("FIELDATTRIBUTE"):
             fld.field_attributes.append({

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python/utils/expression_converter.py RENAMED Viewed

@@ -184,7 +184,9 @@ def convert_expression(expr):
         return cleaned
     if cleaned.startswith("'") and cleaned.endswith("'"):
-        return cleaned
+        close_pos = cleaned.find("'", 1)
+        if close_pos == len(cleaned) - 1:
+            return cleaned
     converted = cleaned
@@ -428,7 +430,9 @@ def _vec_recursive(expr, df_var):
         return cleaned
     if cleaned.startswith("'") and cleaned.endswith("'"):
-        return cleaned
+        close_pos = cleaned.find("'", 1)
+        if close_pos == len(cleaned) - 1:
+            return cleaned
     upper = cleaned.upper()
@@ -452,6 +456,17 @@ def _vec_recursive(expr, df_var):
         var_name = cleaned[2:]
         return f'get_variable("{var_name}")'
+    if re.match(r'^\$PM\w+$', cleaned):
+        var_name = cleaned[1:]
+        return f'resolve_builtin_variable("{var_name}")'
+    not_result = _find_func_call(cleaned, 'NOT')
+    if not_result and not_result[0] == 0 and not_result[1] == len(cleaned):
+        _, _, args = not_result
+        if len(args) >= 1:
+            inner = _vec_recursive(args[0], df_var)
+            return f'~({inner})'
     lkp_result = _find_func_call(cleaned, 'LKP')
     if lkp_result is None:
         lkp_match = re.match(r'^:LKP\.(\w+)\s*\(', cleaned, re.IGNORECASE)
@@ -666,6 +681,8 @@ def _vec_recursive(expr, df_var):
             if len(args) >= 2:
                 fmt = _convert_infa_date_format(args[1])
                 return f'{field_val}.dt.strftime("{fmt}")'
+            if any(op in field_val for op in (' + ', ' - ', ' * ', ' / ', ' % ')):
+                return f'({field_val}).astype(str)'
             return f'{field_val}.astype(str)'
     make_dt_result = _find_func_call(cleaned, 'MAKE_DATE_TIME')
@@ -883,6 +900,7 @@ def _vec_recursive(expr, df_var):
     converted = re.sub(r':LKP\.(\w+)\s*\(', r'lookup_func("\1", ', converted)
     converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
+    converted = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', converted)
     converted = re.sub(r'\b([A-Za-z_][A-Za-z0-9_]*)\s*IS\s+NOT\s+NULL\b',
                        lambda m: f'{df_var}["{m.group(1)}"].notna()', converted, flags=re.IGNORECASE)
@@ -895,8 +913,15 @@ def _vec_recursive(expr, df_var):
     converted = _convert_remaining_funcs(converted, df_var)
+    converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'\bNOT\b', ' ~ ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'<>', '!=', converted)
+    converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
+        'resolve_builtin_variable',
         'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
         'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
         'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
@@ -904,11 +929,6 @@ def _vec_recursive(expr, df_var):
     }
     converted = _substitute_fields(converted, df_var, skip_words)
-    converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'<>', '!=', converted)
-    converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
     converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
     converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
     converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
@@ -1041,6 +1061,8 @@ def _vectorize_simple(part, df_var):
     c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
                lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
+    c = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', c)
     c = re.sub(r'<>', '!=', c)
     c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
@@ -1048,8 +1070,13 @@ def _vectorize_simple(part, df_var):
     c = re.sub(r'\bTRUE\b', 'True', c, flags=re.IGNORECASE)
     c = re.sub(r'\bFALSE\b', 'False', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bNOT\b', ' ~ ', c, flags=re.IGNORECASE)
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
+        'resolve_builtin_variable',
         'str', 'int', 'float', 'isna', 'notna', 'fillna',
         'get_variable', 'lookup_func', 'isin', 'eq',
         'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
@@ -1089,8 +1116,9 @@ def _split_condition_tokens(text):
             current.append(ch)
         elif depth == 0:
             rest = text[i:]
-            and_match = re.match(r'\bAND\b', rest, re.IGNORECASE)
-            or_match = re.match(r'\bOR\b', rest, re.IGNORECASE)
+            prev_is_word = i > 0 and (text[i - 1].isalnum() or text[i - 1] == '_')
+            and_match = re.match(r'\bAND\b', rest, re.IGNORECASE) if not prev_is_word else None
+            or_match = re.match(r'\bOR\b', rest, re.IGNORECASE) if not prev_is_word else None
             if and_match:
                 tokens.append(''.join(current).strip())
                 current = []
@@ -1134,9 +1162,10 @@ def _vectorize_condition(cond, df_var="df"):
     for part in parts:
         negate = False
         inner = part.strip()
-        if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
+        not_match = re.match(r'^NOT\b\s*', inner, flags=re.IGNORECASE)
+        if not_match:
             negate = True
-            inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
+            inner = inner[not_match.end():].strip()
         v = _vectorize_simple(inner, df_var)
         if negate:

{informatica_python-1.9.5 → informatica_python-1.9.7}/informatica_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.5
+Version: 1.9.7
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.9.5 → informatica_python-1.9.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "informatica-python"
-version = "1.9.5"
+version = "1.9.7"
 description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
 readme = "README.md"
 license = {text = "MIT"}

{informatica_python-1.9.5 → informatica_python-1.9.7}/tests/test_integration.py RENAMED Viewed

@@ -2676,6 +2676,8 @@ class TestPMVariableHandling(unittest.TestCase):
                     if "$PMMappingName" in code:
                         assert "resolve_builtin_variable" in code, \
                             "SQL with $PMMappingName should call resolve_builtin_variable"
+                        assert "mapping_name='m_pm_vars'" in code, \
+                            "resolve_builtin_variable should receive actual mapping name"
                         break
         finally:
             shutil.rmtree(tmpdir)
@@ -2691,7 +2693,8 @@ class TestExecuteSqlAlchemy(unittest.TestCase):
             with open(os.path.join(tmpdir, "helper_functions.py")) as f:
                 code = f.read()
             exec_block = code.split("def execute_sql(")[1]
-            assert "sqlalchemy" in exec_block or "text(sql)" in exec_block
+            assert "text(sql)" in exec_block
+            assert "dialect" in exec_block, "Should check for dialect attribute to detect SQLAlchemy"
         finally:
             shutil.rmtree(tmpdir)
@@ -2708,3 +2711,203 @@ class TestImportRe(unittest.TestCase):
             assert "import re" in code
         finally:
             shutil.rmtree(tmpdir)
+class TestNotFunctionCallForm(unittest.TestCase):
+    def test_not_without_space_isnull(self):
+        result = convert_expression_vectorized("NOT(ISNULL(Postal_Code))")
+        assert "~" in result
+        assert "isna" in result
+        assert "NOT(" not in result
+    def test_not_with_space_isnull(self):
+        result = convert_expression_vectorized("NOT ISNULL(Postal_Code)")
+        assert "~" in result
+        assert "isna" in result
+    def test_not_in_iif_condition(self):
+        result = convert_expression_vectorized("IIF(NOT(ISNULL(X)), X, 'default')")
+        assert "np.where" in result
+        assert "~" in result
+        assert "isna" in result
+    def test_not_vectorize_condition_no_space(self):
+        result = convert_filter_vectorized("NOT(ISNULL(field1))")
+        assert "~" in result
+        assert "isna" in result
+        assert "NOT(" not in result
+    def test_not_vectorize_condition_with_space(self):
+        result = convert_filter_vectorized("NOT ISNULL(field1)")
+        assert "~" in result
+        assert "isna" in result
+class TestAndOrNotAsFieldNames(unittest.TestCase):
+    def test_and_not_treated_as_field(self):
+        result = convert_filter_vectorized("A = 1 AND B = 2")
+        assert 'df["AND"]' not in result
+        assert "&" in result
+    def test_or_not_treated_as_field(self):
+        result = convert_filter_vectorized("A = 'TRUE' OR B = 'FALSE'")
+        assert 'df["OR"]' not in result
+        assert "|" in result
+    def test_complex_and_or_filter(self):
+        expr = "FILTER_FLAG = 'TRUE' OR (FILTER_FLAG='FALSE' AND ACCBALANCE='Y')"
+        result = convert_filter_vectorized(expr)
+        assert 'df["AND"]' not in result
+        assert 'df["OR"]' not in result
+        assert "&" in result
+        assert "|" in result
+    def test_nested_and_in_iif(self):
+        expr = "IIF(UPPER(X) = 'A' AND UPPER(Y) = 'B', 1, 0)"
+        result = convert_expression_vectorized(expr)
+        assert "np.where" in result
+        assert 'df["AND"]' not in result
+        assert "&" in result
+    def test_and_or_in_vectorize_simple(self):
+        result = convert_filter_vectorized("(X = 1 AND Y = 2)")
+        assert 'df["AND"]' not in result
+        assert "&" in result
+class TestPMBuiltinVariableInExpression(unittest.TestCase):
+    def test_pm_mapping_name_standalone(self):
+        result = convert_expression_vectorized("$PMMappingName")
+        assert "resolve_builtin_variable" in result
+        assert "PMMappingName" in result
+        assert '$df[' not in result
+    def test_pm_in_concat(self):
+        result = convert_expression_vectorized("'prefix_' || $PMSessionName || '_suffix'")
+        assert "resolve_builtin_variable" in result
+        assert "PMSessionName" in result
+        assert '$df[' not in result
+    def test_pm_variable_not_mangled(self):
+        result = convert_expression_vectorized("IIF($PMMappingName = 'test', 1, 0)")
+        assert "resolve_builtin_variable" in result
+        assert '$df[' not in result
+class TestToCharParenthesization(unittest.TestCase):
+    def test_to_char_with_arithmetic(self):
+        result = convert_expression_vectorized("TO_CHAR(TO_INTEGER(x) - 1)")
+        assert ".astype(str)" in result
+        assert result.count("(") >= result.count(")")
+        assert "- 1.astype(str)" not in result
+        assert "- 1).astype(str)" in result
+    def test_to_char_simple_field(self):
+        result = convert_expression_vectorized("TO_CHAR(x)")
+        assert ".astype(str)" in result
+    def test_to_char_with_addition(self):
+        result = convert_expression_vectorized("TO_CHAR(x + y)")
+        assert "- 1.astype" not in result or "+ " not in result
+        if " + " in result:
+            assert ").astype(str)" in result
+class TestIifFieldEqualsNumeric(unittest.TestCase):
+    def test_iif_field_equals_zero(self):
+        result = convert_expression_vectorized("IIF(DeletedIndicator=0,'N','Y')")
+        assert "np.where" in result
+        assert "==" in result
+        assert 'DeletedIndicator' in result.replace('"', '')
+        assert "| (" not in result
+    def test_iif_field_equals_string(self):
+        result = convert_expression_vectorized("IIF(Status='A','Active','Inactive')")
+        assert "np.where" in result
+        assert "==" in result
+class TestFixedWidthPhysicalLength(unittest.TestCase):
+    def test_field_def_has_physical_length(self):
+        from informatica_python.models import FieldDef
+        fld = FieldDef(name="test", datatype="string", physical_length=20, offset=5)
+        assert fld.physical_length == 20
+        assert fld.offset == 5
+    def test_fixed_width_xml(self):
+        xml = '''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE POWERMART SYSTEM "powrmart.dtd">
+<POWERMART CREATION_DATE="01/01/2025" REPOSITORY_VERSION="1">
+<REPOSITORY NAME="repo" VERSION="1" CODEPAGE="UTF-8" DATABASETYPE="Oracle">
+<FOLDER NAME="TEST_FOLDER" OWNER="admin">
+  <SOURCE NAME="SRC_FW" DATABASETYPE="Flat File" DBDNAME="SRC_FW">
+    <FLATFILE ISFIXEDWIDTH="YES" PADBYTES="NO"/>
+    <SOURCEFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1" PHYSICALLENGTH="15" OFFSET="0"/>
+    <SOURCEFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2" PHYSICALLENGTH="25" OFFSET="15"/>
+  </SOURCE>
+  <TARGET NAME="TGT_FW" DATABASETYPE="Flat File">
+    <TARGETFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1"/>
+    <TARGETFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2"/>
+  </TARGET>
+  <MAPPING NAME="m_test_fw" ISVALID="YES">
+    <TRANSFORMATION NAME="SQ_SRC_FW" TYPE="Source Qualifier" REUSABLE="NO">
+      <TRANSFORMFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" PORTTYPE="INPUT/OUTPUT"/>
+      <TRANSFORMFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" PORTTYPE="INPUT/OUTPUT"/>
+      <TABLEATTRIBUTE NAME="Sql Query" VALUE=""/>
+      <TABLEATTRIBUTE NAME="User Defined Join" VALUE=""/>
+      <TABLEATTRIBUTE NAME="Source Filter" VALUE=""/>
+    </TRANSFORMATION>
+    <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD1" TOINSTANCE="TGT_FW" TOFIELD="FIELD1"/>
+    <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD2" TOINSTANCE="TGT_FW" TOFIELD="FIELD2"/>
+    <INSTANCE NAME="SQ_SRC_FW" TRANSFORMATION_NAME="SQ_SRC_FW" TYPE="Source Qualifier">
+      <ASSOCIATED_SOURCE_INSTANCE NAME="SRC_FW"/>
+    </INSTANCE>
+    <INSTANCE NAME="SRC_FW" TRANSFORMATION_NAME="SRC_FW" TYPE="Source Definition"/>
+    <INSTANCE NAME="TGT_FW" TRANSFORMATION_NAME="TGT_FW" TYPE="Target Definition"/>
+  </MAPPING>
+  <SESSION NAME="s_test_fw" MAPPINGNAME="m_test_fw" ISVALID="YES">
+    <SESSTRANSFORMATIONINST TRANSFORMATIONNAME="SQ_SRC_FW" SINSTANCENAME="SQ_SRC_FW"/>
+    <CONFIGREFERENCE REFOBJECTNAME="default_session_config" TYPE="Session Config"/>
+  </SESSION>
+  <WORKFLOW NAME="wf_test_fw" ISVALID="YES">
+    <TASKINSTANCE NAME="s_test_fw" TASKNAME="s_test_fw" TASKTYPE="Session"/>
+  </WORKFLOW>
+</FOLDER>
+</REPOSITORY>
+</POWERMART>'''
+        converter = InformaticaConverter()
+        tmpdir = tempfile.mkdtemp()
+        try:
+            converter.convert_string(xml, output_dir=tmpdir)
+            mapping_file = os.path.join(tmpdir, "mapping_m_test_fw.py")
+            assert os.path.exists(mapping_file), "mapping file not created"
+            with open(mapping_file) as f:
+                code = f.read()
+            assert "read_fwf" in code
+            assert "15" in code
+            assert "25" in code
+        finally:
+            shutil.rmtree(tmpdir)
+class TestConcatWithLtrimRtrim(unittest.TestCase):
+    def test_concat_ltrim_rtrim(self):
+        expr = "'PER_' || ltrim(rtrim(X)) || '_suffix'"
+        result = convert_expression_vectorized(expr)
+        assert "+" in result
+        assert "||" not in result
+        assert "lstrip" in result or "strip" in result
+        assert "rstrip" in result or "strip" in result
+    def test_concat_simple_fields(self):
+        expr = "A || '_' || B"
+        result = convert_expression_vectorized(expr)
+        assert "+" in result
+        assert "||" not in result