PyPI - informatica-python - Versions diffs - 1.5.0__tar.gz → 1.5.1__tar.gz - Mend

informatica-python 1.5.0tar.gz → 1.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{informatica_python-1.5.0 → informatica_python-1.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.5.0
+Version: 1.5.1
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ Licensed under the MIT License.
 from informatica_python.converter import InformaticaConverter
-__version__ = "1.5.0"
+__version__ = "1.5.1"
 __author__ = "Nick"
 __license__ = "MIT"
 __all__ = ["InformaticaConverter"]

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/cli.py RENAMED Viewed

@@ -66,6 +66,7 @@ def main():
                 args.input_file,
                 output_dir=args.output,
                 output_zip=args.zip,
+                param_file=args.param_file,
             )
             print(f"Conversion complete! Output: {output_path}")
             print(f"Files generated:")

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/converter.py RENAMED Viewed

@@ -33,7 +33,8 @@ class InformaticaConverter:
         return self._powermart_to_dict(self.powermart)
     def convert(self, file_path: str, output_dir: str = "output",
-                output_zip: Optional[str] = None) -> str:
+                output_zip: Optional[str] = None,
+                param_file: Optional[str] = None) -> str:
         self.powermart = self.parser.parse_file(file_path)
         if not self.powermart.repositories:
@@ -47,7 +48,7 @@ class InformaticaConverter:
             raise ValueError("No folder found in XML file")
         if len(all_folders) == 1:
-            return self._convert_folder(all_folders[0], output_dir, output_zip)
+            return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
         result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
         for folder in all_folders:
@@ -56,7 +57,7 @@ class InformaticaConverter:
             if output_zip:
                 base, ext = os.path.splitext(output_zip)
                 folder_zip = f"{base}_{folder.name}{ext}"
-            self._convert_folder(folder, folder_dir, folder_zip)
+            self._convert_folder(folder, folder_dir, folder_zip, param_file)
         return result_path
     def convert_string(self, xml_string: str, output_dir: str = "output",
@@ -87,7 +88,8 @@ class InformaticaConverter:
         return result_path
     def _convert_folder(self, folder: FolderDef, output_dir: str,
-                        output_zip: Optional[str] = None) -> str:
+                        output_zip: Optional[str] = None,
+                        param_file: Optional[str] = None) -> str:
         files = {}
         files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py RENAMED Viewed

@@ -283,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
     lines.append("")
     lines.append("")
     lines.append("if __name__ == '__main__':")
-    lines.append("    config = load_config()")
+    lines.append("    import argparse as _ap")
+    lines.append("    _parser = _ap.ArgumentParser()")
+    lines.append("    _parser.add_argument('--param-file', default=None)")
+    lines.append("    _parser.add_argument('--config', default='config.yml')")
+    lines.append("    _args = _parser.parse_args()")
+    lines.append("    config = load_config(_args.config, param_file=_args.param_file)")
     lines.append(f"    run_{_safe_name(mapping.name)}(config)")
     lines.append("")
@@ -590,9 +595,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     lines.append(f"    # Transformation: {tx.name} (Type: {tx.type})")
     if tx_type == "expression":
-        _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
+        _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
     elif tx_type == "filter":
-        _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
+        _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
     elif tx_type in ("aggregator",):
         _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
     elif tx_type == "sorter":
@@ -630,8 +635,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     lines.append("")
-def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
-    lines.append(f"    df_{tx_safe} = {input_df}.copy()")
+def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
+    copy_expr = lib_copy(data_lib, input_df)
+    lines.append(f"    df_{tx_safe} = {copy_expr}")
     has_expressions = False
     for fld in tx.fields:
         if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
@@ -647,7 +653,7 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
     source_dfs[tx.name] = f"df_{tx_safe}"
-def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
+def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
     filter_condition = ""
     for attr in tx.attributes:
         if attr.name == "Filter Condition":
@@ -655,9 +661,11 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
     if filter_condition:
         expr_vec = convert_filter_vectorized(filter_condition, input_df)
         lines.append(f"    # Filter: {filter_condition}")
-        lines.append(f"    df_{tx_safe} = {input_df}[{expr_vec}].copy()")
+        copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
+        lines.append(f"    df_{tx_safe} = {copy_expr}")
     else:
-        lines.append(f"    df_{tx_safe} = {input_df}.copy()")
+        copy_expr = lib_copy(data_lib, input_df)
+        lines.append(f"    df_{tx_safe} = {copy_expr}")
     source_dfs[tx.name] = f"df_{tx_safe}"
@@ -845,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
     elif lookup_table:
         lines.append(f"    df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
     else:
-        lines.append(f"    df_lkp_{tx_safe} = pd.DataFrame()")
+        empty_expr = lib_empty_df(data_lib)
+        lines.append(f"    df_lkp_{tx_safe} = {empty_expr}")
     input_keys, lookup_keys = parse_lookup_condition(lookup_condition)

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py RENAMED Viewed

@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
     lines.append("")
     lines.append("if __name__ == '__main__':")
-    lines.append("    config = load_config()")
+    lines.append("    import argparse as _ap")
+    lines.append("    _parser = _ap.ArgumentParser()")
+    lines.append("    _parser.add_argument('--param-file', default=None)")
+    lines.append("    _parser.add_argument('--config', default='config.yml')")
+    lines.append("    _args = _parser.parse_args()")
+    lines.append("    config = load_config(_args.config, param_file=_args.param_file)")
     lines.append("    success = run_workflow(config)")
     lines.append("    sys.exit(0 if success else 1)")
     lines.append("")

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/expression_converter.py RENAMED Viewed

@@ -297,21 +297,28 @@ def _vectorize_value(val, df_var="df"):
 def _vectorize_condition(cond, df_var="df"):
     c = cond.strip()
     c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
                lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
-    c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
-               lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
     c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
                lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
-    c = re.sub(r'(?<!["\w])([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.])',
-               lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in (
-                   'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
-                   'str', 'int', 'float',
-               ) else m.group(1), c)
-    c = re.sub(r'<>', '!=', c)
-    c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
+    c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
+               lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
     c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
     c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
+    c = re.sub(r'\bNOT\s+', ' ~', c, flags=re.IGNORECASE)
+    c = re.sub(r'<>', '!=', c)
+    c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
+    skip_words = {
+        'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
+        'str', 'int', 'float', 'isna', 'notna', 'fillna',
+    }
+    df_base = re.escape(df_var)
+    c = re.sub(r'(?<!["\w])(?!' + df_base + r'\b)([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.\[])',
+               lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in skip_words else m.group(1),
+               c)
     return c

{informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.5.0
+Version: 1.5.1
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT

{informatica_python-1.5.0 → informatica_python-1.5.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "informatica-python"
-version = "1.5.0"
+version = "1.5.1"
 description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
 readme = "README.md"
 license = {text = "MIT"}

{informatica_python-1.5.0 → informatica_python-1.5.1}/tests/test_integration.py RENAMED Viewed

@@ -202,15 +202,37 @@ class TestFilterVectorized:
         result = convert_filter_vectorized("VALUE > 50", "df_src")
         assert ">" in result
         assert "50" in result
+        assert 'df_src["VALUE"]' in result
+    def test_and_condition(self):
+        result = convert_filter_vectorized("A > 1 AND B < 2", "df")
+        assert "&" in result
+        assert 'df["A"]' in result
+        assert 'df["B"]' in result
+        assert "AND" not in result
+    def test_or_condition(self):
+        result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
+        assert "|" in result
     def test_is_null_filter(self):
         result = convert_filter_vectorized("NAME IS NULL", "df_src")
         assert ".isna()" in result
+        assert 'df_src["NAME"]' in result
+    def test_is_not_null_filter(self):
+        result = convert_filter_vectorized("NAME IS NOT NULL", "df_src")
+        assert ".notna()" in result
     def test_empty_filter(self):
         assert convert_filter_vectorized("") == "True"
         assert convert_filter_vectorized(None) == "True"
+    def test_compound_iif(self):
+        result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
+        assert "np.where" in result
+        assert "&" in result
 class TestLibAdapters: