PyPI - informatica-python - Versions diffs - 1.9.6__tar.gz → 1.9.8__tar.gz - Mend

informatica-python 1.9.6tar.gz → 1.9.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{informatica_python-1.9.6 → informatica_python-1.9.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.6
+Version: 1.9.8
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.9.6 → informatica_python-1.9.8}/informatica_python/generators/mapping_gen.py RENAMED Viewed

@@ -481,7 +481,12 @@ def _emit_flatfile_read(lines, var_name, src_def, indent="    ", file_path_overr
     if fc.get("fixed_width"):
         widths = []
         for fld in src_def.fields:
-            widths.append(fld.precision if fld.precision else 10)
+            if fld.physical_length and fld.physical_length > 0:
+                widths.append(fld.physical_length)
+            elif fld.precision:
+                widths.append(fld.precision)
+            else:
+                widths.append(10)
         lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
         lines.append(f"{indent}    {default_path},")
         lines.append(f"{indent}    widths={widths},")

{informatica_python-1.9.6 → informatica_python-1.9.8}/informatica_python/models.py RENAMED Viewed

@@ -18,6 +18,9 @@ class FieldDef:
     field_number: int = 0
     hidden: str = "NO"
     business_name: str = ""
+    offset: int = 0
+    physical_offset: int = 0
+    physical_length: int = 0
     field_attributes: List[Dict[str, str]] = field(default_factory=list)

{informatica_python-1.9.6 → informatica_python-1.9.8}/informatica_python/parser.py RENAMED Viewed

@@ -417,6 +417,9 @@ class InformaticaParser:
             hidden=self._attr(elem, "HIDDEN", "NO"),
             business_name=self._attr(elem, "BUSINESSNAME"),
             description=self._attr(elem, "DESCRIPTION"),
+            offset=self._int_attr(elem, "OFFSET"),
+            physical_offset=self._int_attr(elem, "PHYSICALOFFSET"),
+            physical_length=self._int_attr(elem, "PHYSICALLENGTH"),
         )
         for fa in elem.findall("FIELDATTRIBUTE"):
             fld.field_attributes.append({

{informatica_python-1.9.6 → informatica_python-1.9.8}/informatica_python/utils/expression_converter.py RENAMED Viewed

@@ -184,7 +184,9 @@ def convert_expression(expr):
         return cleaned
     if cleaned.startswith("'") and cleaned.endswith("'"):
-        return cleaned
+        close_pos = cleaned.find("'", 1)
+        if close_pos == len(cleaned) - 1:
+            return cleaned
     converted = cleaned
@@ -428,7 +430,9 @@ def _vec_recursive(expr, df_var):
         return cleaned
     if cleaned.startswith("'") and cleaned.endswith("'"):
-        return cleaned
+        close_pos = cleaned.find("'", 1)
+        if close_pos == len(cleaned) - 1:
+            return cleaned
     upper = cleaned.upper()
@@ -452,6 +456,17 @@ def _vec_recursive(expr, df_var):
         var_name = cleaned[2:]
         return f'get_variable("{var_name}")'
+    if re.match(r'^\$PM\w+$', cleaned):
+        var_name = cleaned[1:]
+        return f'resolve_builtin_variable("{var_name}")'
+    not_result = _find_func_call(cleaned, 'NOT')
+    if not_result and not_result[0] == 0 and not_result[1] == len(cleaned):
+        _, _, args = not_result
+        if len(args) >= 1:
+            inner = _vec_recursive(args[0], df_var)
+            return f'~({inner})'
     lkp_result = _find_func_call(cleaned, 'LKP')
     if lkp_result is None:
         lkp_match = re.match(r'^:LKP\.(\w+)\s*\(', cleaned, re.IGNORECASE)
@@ -666,6 +681,8 @@ def _vec_recursive(expr, df_var):
             if len(args) >= 2:
                 fmt = _convert_infa_date_format(args[1])
                 return f'{field_val}.dt.strftime("{fmt}")'
+            if any(op in field_val for op in (' + ', ' - ', ' * ', ' / ', ' % ')):
+                return f'({field_val}).astype(str)'
             return f'{field_val}.astype(str)'
     make_dt_result = _find_func_call(cleaned, 'MAKE_DATE_TIME')
@@ -866,8 +883,10 @@ def _vec_recursive(expr, df_var):
             v = _vec_recursive(p, df_var)
             if v.startswith("'") and v.endswith("'"):
                 vec_parts.append(v)
-            else:
+            elif v.startswith(df_var + '[') or v.startswith('pd.') or '.str.' in v:
                 vec_parts.append(f'{v}.fillna(\'\').astype(str)')
+            else:
+                vec_parts.append(f'str({v})')
         return " + ".join(vec_parts)
     for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
@@ -883,6 +902,7 @@ def _vec_recursive(expr, df_var):
     converted = re.sub(r':LKP\.(\w+)\s*\(', r'lookup_func("\1", ', converted)
     converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
+    converted = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', converted)
     converted = re.sub(r'\b([A-Za-z_][A-Za-z0-9_]*)\s*IS\s+NOT\s+NULL\b',
                        lambda m: f'{df_var}["{m.group(1)}"].notna()', converted, flags=re.IGNORECASE)
@@ -895,8 +915,15 @@ def _vec_recursive(expr, df_var):
     converted = _convert_remaining_funcs(converted, df_var)
+    converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'\bNOT\b', ' ~ ', converted, flags=re.IGNORECASE)
+    converted = re.sub(r'<>', '!=', converted)
+    converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
+        'resolve_builtin_variable',
         'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
         'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
         'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
@@ -904,11 +931,6 @@ def _vec_recursive(expr, df_var):
     }
     converted = _substitute_fields(converted, df_var, skip_words)
-    converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
-    converted = re.sub(r'<>', '!=', converted)
-    converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
     converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
     converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
     converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
@@ -1041,6 +1063,8 @@ def _vectorize_simple(part, df_var):
     c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
                lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
+    c = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', c)
     c = re.sub(r'<>', '!=', c)
     c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
@@ -1048,8 +1072,13 @@ def _vectorize_simple(part, df_var):
     c = re.sub(r'\bTRUE\b', 'True', c, flags=re.IGNORECASE)
     c = re.sub(r'\bFALSE\b', 'False', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bNOT\b', ' ~ ', c, flags=re.IGNORECASE)
     skip_words = {
         'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
+        'resolve_builtin_variable',
         'str', 'int', 'float', 'isna', 'notna', 'fillna',
         'get_variable', 'lookup_func', 'isin', 'eq',
         'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
@@ -1089,8 +1118,9 @@ def _split_condition_tokens(text):
             current.append(ch)
         elif depth == 0:
             rest = text[i:]
-            and_match = re.match(r'\bAND\b', rest, re.IGNORECASE)
-            or_match = re.match(r'\bOR\b', rest, re.IGNORECASE)
+            prev_is_word = i > 0 and (text[i - 1].isalnum() or text[i - 1] == '_')
+            and_match = re.match(r'\bAND\b', rest, re.IGNORECASE) if not prev_is_word else None
+            or_match = re.match(r'\bOR\b', rest, re.IGNORECASE) if not prev_is_word else None
             if and_match:
                 tokens.append(''.join(current).strip())
                 current = []
@@ -1134,9 +1164,10 @@ def _vectorize_condition(cond, df_var="df"):
     for part in parts:
         negate = False
         inner = part.strip()
-        if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
+        not_match = re.match(r'^NOT\b\s*', inner, flags=re.IGNORECASE)
+        if not_match:
             negate = True
-            inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
+            inner = inner[not_match.end():].strip()
         v = _vectorize_simple(inner, df_var)
         if negate:

{informatica_python-1.9.6 → informatica_python-1.9.8}/informatica_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.6
+Version: 1.9.8
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.9.6 → informatica_python-1.9.8}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "informatica-python"
-version = "1.9.6"
+version = "1.9.8"
 description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
 readme = "README.md"
 license = {text = "MIT"}

{informatica_python-1.9.6 → informatica_python-1.9.8}/tests/test_integration.py RENAMED Viewed

@@ -2711,3 +2711,203 @@ class TestImportRe(unittest.TestCase):
             assert "import re" in code
         finally:
             shutil.rmtree(tmpdir)
+class TestNotFunctionCallForm(unittest.TestCase):
+    def test_not_without_space_isnull(self):
+        result = convert_expression_vectorized("NOT(ISNULL(Postal_Code))")
+        assert "~" in result
+        assert "isna" in result
+        assert "NOT(" not in result
+    def test_not_with_space_isnull(self):
+        result = convert_expression_vectorized("NOT ISNULL(Postal_Code)")
+        assert "~" in result
+        assert "isna" in result
+    def test_not_in_iif_condition(self):
+        result = convert_expression_vectorized("IIF(NOT(ISNULL(X)), X, 'default')")
+        assert "np.where" in result
+        assert "~" in result
+        assert "isna" in result
+    def test_not_vectorize_condition_no_space(self):
+        result = convert_filter_vectorized("NOT(ISNULL(field1))")
+        assert "~" in result
+        assert "isna" in result
+        assert "NOT(" not in result
+    def test_not_vectorize_condition_with_space(self):
+        result = convert_filter_vectorized("NOT ISNULL(field1)")
+        assert "~" in result
+        assert "isna" in result
+class TestAndOrNotAsFieldNames(unittest.TestCase):
+    def test_and_not_treated_as_field(self):
+        result = convert_filter_vectorized("A = 1 AND B = 2")
+        assert 'df["AND"]' not in result
+        assert "&" in result
+    def test_or_not_treated_as_field(self):
+        result = convert_filter_vectorized("A = 'TRUE' OR B = 'FALSE'")
+        assert 'df["OR"]' not in result
+        assert "|" in result
+    def test_complex_and_or_filter(self):
+        expr = "FILTER_FLAG = 'TRUE' OR (FILTER_FLAG='FALSE' AND ACCBALANCE='Y')"
+        result = convert_filter_vectorized(expr)
+        assert 'df["AND"]' not in result
+        assert 'df["OR"]' not in result
+        assert "&" in result
+        assert "|" in result
+    def test_nested_and_in_iif(self):
+        expr = "IIF(UPPER(X) = 'A' AND UPPER(Y) = 'B', 1, 0)"
+        result = convert_expression_vectorized(expr)
+        assert "np.where" in result
+        assert 'df["AND"]' not in result
+        assert "&" in result
+    def test_and_or_in_vectorize_simple(self):
+        result = convert_filter_vectorized("(X = 1 AND Y = 2)")
+        assert 'df["AND"]' not in result
+        assert "&" in result
+class TestPMBuiltinVariableInExpression(unittest.TestCase):
+    def test_pm_mapping_name_standalone(self):
+        result = convert_expression_vectorized("$PMMappingName")
+        assert "resolve_builtin_variable" in result
+        assert "PMMappingName" in result
+        assert '$df[' not in result
+    def test_pm_in_concat(self):
+        result = convert_expression_vectorized("'prefix_' || $PMSessionName || '_suffix'")
+        assert "resolve_builtin_variable" in result
+        assert "PMSessionName" in result
+        assert '$df[' not in result
+    def test_pm_variable_not_mangled(self):
+        result = convert_expression_vectorized("IIF($PMMappingName = 'test', 1, 0)")
+        assert "resolve_builtin_variable" in result
+        assert '$df[' not in result
+class TestToCharParenthesization(unittest.TestCase):
+    def test_to_char_with_arithmetic(self):
+        result = convert_expression_vectorized("TO_CHAR(TO_INTEGER(x) - 1)")
+        assert ".astype(str)" in result
+        assert result.count("(") >= result.count(")")
+        assert "- 1.astype(str)" not in result
+        assert "- 1).astype(str)" in result
+    def test_to_char_simple_field(self):
+        result = convert_expression_vectorized("TO_CHAR(x)")
+        assert ".astype(str)" in result
+    def test_to_char_with_addition(self):
+        result = convert_expression_vectorized("TO_CHAR(x + y)")
+        assert "- 1.astype" not in result or "+ " not in result
+        if " + " in result:
+            assert ").astype(str)" in result
+class TestIifFieldEqualsNumeric(unittest.TestCase):
+    def test_iif_field_equals_zero(self):
+        result = convert_expression_vectorized("IIF(DeletedIndicator=0,'N','Y')")
+        assert "np.where" in result
+        assert "==" in result
+        assert 'DeletedIndicator' in result.replace('"', '')
+        assert "| (" not in result
+    def test_iif_field_equals_string(self):
+        result = convert_expression_vectorized("IIF(Status='A','Active','Inactive')")
+        assert "np.where" in result
+        assert "==" in result
+class TestFixedWidthPhysicalLength(unittest.TestCase):
+    def test_field_def_has_physical_length(self):
+        from informatica_python.models import FieldDef
+        fld = FieldDef(name="test", datatype="string", physical_length=20, offset=5)
+        assert fld.physical_length == 20
+        assert fld.offset == 5
+    def test_fixed_width_xml(self):
+        xml = '''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE POWERMART SYSTEM "powrmart.dtd">
+<POWERMART CREATION_DATE="01/01/2025" REPOSITORY_VERSION="1">
+<REPOSITORY NAME="repo" VERSION="1" CODEPAGE="UTF-8" DATABASETYPE="Oracle">
+<FOLDER NAME="TEST_FOLDER" OWNER="admin">
+  <SOURCE NAME="SRC_FW" DATABASETYPE="Flat File" DBDNAME="SRC_FW">
+    <FLATFILE ISFIXEDWIDTH="YES" PADBYTES="NO"/>
+    <SOURCEFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1" PHYSICALLENGTH="15" OFFSET="0"/>
+    <SOURCEFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2" PHYSICALLENGTH="25" OFFSET="15"/>
+  </SOURCE>
+  <TARGET NAME="TGT_FW" DATABASETYPE="Flat File">
+    <TARGETFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" SCALE="0" FIELDNUMBER="1"/>
+    <TARGETFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" SCALE="0" FIELDNUMBER="2"/>
+  </TARGET>
+  <MAPPING NAME="m_test_fw" ISVALID="YES">
+    <TRANSFORMATION NAME="SQ_SRC_FW" TYPE="Source Qualifier" REUSABLE="NO">
+      <TRANSFORMFIELD NAME="FIELD1" DATATYPE="string" PRECISION="10" PORTTYPE="INPUT/OUTPUT"/>
+      <TRANSFORMFIELD NAME="FIELD2" DATATYPE="string" PRECISION="20" PORTTYPE="INPUT/OUTPUT"/>
+      <TABLEATTRIBUTE NAME="Sql Query" VALUE=""/>
+      <TABLEATTRIBUTE NAME="User Defined Join" VALUE=""/>
+      <TABLEATTRIBUTE NAME="Source Filter" VALUE=""/>
+    </TRANSFORMATION>
+    <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD1" TOINSTANCE="TGT_FW" TOFIELD="FIELD1"/>
+    <CONNECTOR FROMINSTANCE="SQ_SRC_FW" FROMFIELD="FIELD2" TOINSTANCE="TGT_FW" TOFIELD="FIELD2"/>
+    <INSTANCE NAME="SQ_SRC_FW" TRANSFORMATION_NAME="SQ_SRC_FW" TYPE="Source Qualifier">
+      <ASSOCIATED_SOURCE_INSTANCE NAME="SRC_FW"/>
+    </INSTANCE>
+    <INSTANCE NAME="SRC_FW" TRANSFORMATION_NAME="SRC_FW" TYPE="Source Definition"/>
+    <INSTANCE NAME="TGT_FW" TRANSFORMATION_NAME="TGT_FW" TYPE="Target Definition"/>
+  </MAPPING>
+  <SESSION NAME="s_test_fw" MAPPINGNAME="m_test_fw" ISVALID="YES">
+    <SESSTRANSFORMATIONINST TRANSFORMATIONNAME="SQ_SRC_FW" SINSTANCENAME="SQ_SRC_FW"/>
+    <CONFIGREFERENCE REFOBJECTNAME="default_session_config" TYPE="Session Config"/>
+  </SESSION>
+  <WORKFLOW NAME="wf_test_fw" ISVALID="YES">
+    <TASKINSTANCE NAME="s_test_fw" TASKNAME="s_test_fw" TASKTYPE="Session"/>
+  </WORKFLOW>
+</FOLDER>
+</REPOSITORY>
+</POWERMART>'''
+        converter = InformaticaConverter()
+        tmpdir = tempfile.mkdtemp()
+        try:
+            converter.convert_string(xml, output_dir=tmpdir)
+            mapping_file = os.path.join(tmpdir, "mapping_m_test_fw.py")
+            assert os.path.exists(mapping_file), "mapping file not created"
+            with open(mapping_file) as f:
+                code = f.read()
+            assert "read_fwf" in code
+            assert "15" in code
+            assert "25" in code
+        finally:
+            shutil.rmtree(tmpdir)
+class TestConcatWithLtrimRtrim(unittest.TestCase):
+    def test_concat_ltrim_rtrim(self):
+        expr = "'PER_' || ltrim(rtrim(X)) || '_suffix'"
+        result = convert_expression_vectorized(expr)
+        assert "+" in result
+        assert "||" not in result
+        assert "lstrip" in result or "strip" in result
+        assert "rstrip" in result or "strip" in result
+    def test_concat_simple_fields(self):
+        expr = "A || '_' || B"
+        result = convert_expression_vectorized(expr)
+        assert "+" in result
+        assert "||" not in result