PyPI - informatica-python - Versions diffs - 1.9.8__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

informatica-python 1.9.8py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

informatica_python/generators/mapping_gen.py CHANGED Viewed

@@ -54,6 +54,8 @@ def _expand_mapplet_recursive(mapplet, mapplet_map, prefix, depth=0, max_depth=1
             to_instance=new_to,
             to_field=conn.to_field,
             to_instance_type=conn.to_instance_type,
+            from_instance_group=conn.from_instance_group,
+            to_instance_group=conn.to_instance_group,
         ))
     for inst in getattr(mapplet, 'instances', []):
@@ -138,6 +140,8 @@ def _inline_mapplets(mapping, folder):
                     to_instance=first_tx,
                     to_field=conn.to_field,
                     to_instance_type=conn.to_instance_type,
+                    from_instance_group=conn.from_instance_group,
+                    to_instance_group=conn.to_instance_group,
                 ))
             else:
                 rewired_connectors.append(conn)
@@ -167,6 +171,8 @@ def _inline_mapplets(mapping, folder):
                     to_instance=conn.to_instance,
                     to_field=conn.to_field,
                     to_instance_type=conn.to_instance_type,
+                    from_instance_group=conn.from_instance_group,
+                    to_instance_group=conn.to_instance_group,
                 ))
             else:
                 rewired_connectors.append(conn)
@@ -730,7 +736,11 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
     input_conns = connector_graph.get("to", {}).get(tx.name, [])
     input_sources = set()
     for c in input_conns:
-        input_sources.add(c.from_instance)
+        if c.from_instance_group:
+            group_key = f"{c.from_instance}:{c.from_instance_group}"
+            input_sources.add(group_key)
+        else:
+            input_sources.add(c.from_instance)
     input_df = None
     for src in input_sources:
@@ -739,7 +749,14 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
             break
     if not input_df:
         for src in input_sources:
-            input_df = f"df_{_safe_name(src)}"
+            base = src.split(":")[0] if ":" in src else src
+            if base in source_dfs:
+                input_df = source_dfs[base]
+                break
+    if not input_df:
+        for src in input_sources:
+            base = src.split(":")[0] if ":" in src else src
+            input_df = f"df_{_safe_name(base)}"
             break
     if not input_df:
         input_df = "df_input"
@@ -1095,9 +1112,24 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_gr
 def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
     lines.append(f"    # Router groups:")
     group_conditions = {}
-    for attr in tx.attributes:
-        if "Group Filter Condition" in attr.name:
-            group_conditions[attr.name] = attr.value
+    output_groups = [
+        g for g in tx.groups
+        if g.type.upper() not in ("INPUT", "") and "DEFAULT" not in g.type.upper()
+    ]
+    output_groups.sort(key=lambda g: g.order)
+    if output_groups:
+        for g in output_groups:
+            if g.expression and g.expression.strip():
+                group_conditions[g.name] = g.expression
+            else:
+                group_conditions[g.name] = ""
+    if not group_conditions:
+        for attr in tx.attributes:
+            if "Group Filter Condition" in attr.name:
+                group_conditions[attr.name] = attr.value
     remaining_mask_parts = []
     if group_conditions:
@@ -1108,15 +1140,21 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
                 expr_py = f"pd.Series(True, index={input_df}.index)"
             mask_var = f"_router_mask_{tx_safe}_{i}"
             lines.append(f"    {mask_var} = {expr_py}  # {gname}")
-            lines.append(f"    df_{tx_safe}_group{i} = {input_df}[{mask_var}].copy()")
-            source_dfs[f"{tx.name}_group{i}"] = f"df_{tx_safe}_group{i}"
+            group_df_name = f"df_{tx_safe}_{_safe_name(gname)}"
+            lines.append(f"    {group_df_name} = {input_df}[{mask_var}].copy()")
+            source_dfs[f"{tx.name}:{gname}"] = group_df_name
             remaining_mask_parts.append(f"~{mask_var}")
+    default_groups = [g for g in tx.groups if "DEFAULT" in g.type.upper()]
+    default_name = default_groups[0].name if default_groups else "DEFAULT"
     if remaining_mask_parts:
         lines.append(f"    _router_default_mask = {' & '.join(remaining_mask_parts)}")
-        lines.append(f"    df_{tx_safe} = {input_df}[_router_default_mask].copy()  # Default group")
+        lines.append(f"    df_{tx_safe} = {input_df}[_router_default_mask].copy()  # Default group ({default_name})")
     else:
-        lines.append(f"    df_{tx_safe} = {input_df}.copy()  # Default group")
+        lines.append(f"    df_{tx_safe} = {input_df}.copy()  # Default group ({default_name})")
     source_dfs[tx.name] = f"df_{tx_safe}"
+    source_dfs[f"{tx.name}:{default_name}"] = f"df_{tx_safe}"
 def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs, data_lib="pandas"):
@@ -1448,6 +1486,11 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
     to_conns = connector_graph.get("to", {}).get(tgt_name, [])
     input_df = None
     for c in to_conns:
+        if c.from_instance_group:
+            group_key = f"{c.from_instance}:{c.from_instance_group}"
+            if group_key in source_dfs:
+                input_df = source_dfs[group_key]
+                break
         if c.from_instance in source_dfs:
             input_df = source_dfs[c.from_instance]
             break

informatica_python/models.py CHANGED Viewed

@@ -80,6 +80,7 @@ class GroupDef:
     type: str = ""
     description: str = ""
     order: int = 0
+    expression: str = ""
     fields: List[FieldDef] = field(default_factory=list)
@@ -274,6 +275,8 @@ class ConnectorDef:
     to_field: str
     to_instance: str
     to_instance_type: str
+    from_instance_group: str = ""
+    to_instance_group: str = ""
 @dataclass

informatica_python/parser.py CHANGED Viewed

@@ -240,6 +240,7 @@ class InformaticaParser:
             type=self._attr(elem, "TYPE"),
             description=self._attr(elem, "DESCRIPTION"),
             order=self._int_attr(elem, "ORDER"),
+            expression=self._attr(elem, "EXPRESSION"),
         )
         for fld in elem.findall("SOURCEFIELD"):
             grp.fields.append(self._parse_source_field(fld))
@@ -580,6 +581,8 @@ class InformaticaParser:
             to_field=self._attr(elem, "TOFIELD"),
             to_instance=self._attr(elem, "TOINSTANCE"),
             to_instance_type=self._attr(elem, "TOINSTANCETYPE"),
+            from_instance_group=self._attr(elem, "FROMINSTANCEGROUP"),
+            to_instance_group=self._attr(elem, "TOINSTANCEGROUP"),
         )
     def _parse_instance(self, elem) -> InstanceDef:

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: informatica-python
-Version: 1.9.8
+Version: 1.10.0
 Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
 Author: Nick
 License: MIT
@@ -124,7 +124,7 @@ The code generator produces real, runnable Python for these transformation types
 - **Expression** — Field-level expressions converted to vectorized pandas operations (`df["COL"]` style) with 40+ vectorized function handlers
 - **Filter** — Row filtering with vectorized converted conditions
 - **Joiner** — `pd.merge()` with join type and condition parsing (inner/left/right/outer)
-- **Lookup** — `pd.merge()` lookups with connection-aware DB reads, multiple match policies, default values, `$$PARAM` substitution
+- **Lookup** — `pd.merge()` lookups with connection-aware DB reads, multiple match policies, default values, `$$PARAM` substitution, SQL override support, table caching via `lookup_func()`
 - **Aggregator** — `groupby().agg()` with SUM/COUNT/AVG/MIN/MAX/FIRST/LAST, computed aggregates
 - **Sorter** — `sort_values()` with multi-key ascending/descending per-field direction from SORTDIRECTION attribute
 - **Router** — Multi-group conditional routing with named groups
@@ -196,7 +196,7 @@ Column-level pandas operations instead of row-level iteration. The expression co
 - `REPLACECHR/REPLACESTR` → `.str.replace()`
 - `REG_EXTRACT/REG_REPLACE` → `.str.extract()/.str.replace(regex=True)`
 - `CHR(code)` → `chr(int(code))`
-- `||` concatenation → `+` with `.astype(str)` on non-literals
+- `||` concatenation → `+` with smart coercion: `.fillna('').astype(str)` for Series, `str()` for scalars
 **Date/Time:**
 - `TO_DATE(val, fmt)` → `pd.to_datetime()` with Informatica→Python format conversion
@@ -343,10 +343,12 @@ Target field datatypes are mapped to pandas types and generate proper casting co
 - Decimals/Floats: `pd.to_numeric(errors='coerce')`
 - Booleans: `.astype('boolean')`
-### Flat File Handling (v1.3+)
+### Flat File Handling (v1.3+, enhanced v1.9.8)
 Parses FLATFILE metadata for delimiter, fixed-width, header lines, skip rows, quote/escape chars. Generates `pd.read_fwf()` for fixed-width or enriched `read_file()` for delimited.
+**Fixed-width enhancements (v1.9.8):** `OFFSET`, `PHYSICALLENGTH`, and `PHYSICALOFFSET` are parsed from `SOURCEFIELD` attributes. `physical_length` is preferred over `precision` for accurate column width calculations in `pd.read_fwf()`.
 ### Mapplet Inlining (v1.3+)
 Expands Mapplet instances into prefixed transforms, rewires connectors, and eliminates duplication.
@@ -371,12 +373,17 @@ The generated `helper_functions.py` provides a complete runtime library:
 ### Database Operations
 | Function | Description |
 |----------|-------------|
-| `get_db_connection(config, conn_name)` | Create DB connection (pyodbc/pymssql/sqlalchemy fallback for MSSQL) |
+| `get_db_connection(config, conn_name)` | SQLAlchemy-first DB connection with engine caching and connection pooling; DBAPI fallback for pyodbc/pymssql |
 | `read_from_db(config, query, conn_name)` | Execute SQL query and return DataFrame |
 | `write_to_db(config, df, table, conn_name)` | Write DataFrame to database table via `.to_sql()` |
-| `execute_sql(config, sql, conn_name)` | Execute DDL/DML statement (INSERT, UPDATE, DELETE) |
+| `execute_sql(config, sql, conn_name)` | Execute DDL/DML statement; auto-detects SQLAlchemy vs DBAPI via `dialect` attribute |
 | `write_with_update_strategy(config, df, table, ...)` | Split rows by `_update_strategy` column into INSERT/UPDATE/DELETE/REJECT operations |
 | `call_stored_procedure(config, proc, params, ...)` | Execute stored procedure with input/output parameter mapping (Oracle/MSSQL/generic) |
+| `lookup_func(table, *args)` | Full lookup implementation with table caching, condition parsing, and default value support |
+| `resolve_env(value)` | Resolve `${VAR}` placeholders from environment variables with config fallback |
+| `resolve_builtin_variable(var_name, ...)` | Resolve `$PMMappingName`, `$PMSessionName`, `$PMFolderName`, etc. |
+| `rename_with_duplicates(df, col_map)` | Safe column rename supporting one-source-to-many-target mapping |
+| `_safe_close(conn)` | Safe connection cleanup handling both SQLAlchemy and raw DBAPI connections |
 ### File Operations
 | Function | Description |
@@ -407,7 +414,41 @@ The generated `helper_functions.py` provides a complete runtime library:
 ## Changelog
-### v1.9.3 (Current)
+### v1.10.0 (Current)
+- **Router multi-group output support**: Router transformations now properly handle `<GROUP>` elements with `EXPRESSION` attributes — generates separate filtered DataFrames for each named output group (e.g., `df_rtr_rest_type_per`, `df_rtr_rest_value_per`), not just the DEFAULT group
+- **Connector group routing**: `FROMINSTANCEGROUP` / `TOINSTANCEGROUP` attributes on `CONNECTOR` elements are now parsed and used to wire downstream transforms/targets to the correct Router output group
+- **GroupDef expression field**: `GroupDef` model now stores the `EXPRESSION` attribute from `<GROUP>` XML elements
+- **Backward-compatible Router fallback**: Existing `TABLEATTRIBUTE`-based Router group conditions (older XML format) continue to work — the code checks `<GROUP>` elements first, then falls back to `TABLEATTRIBUTE` entries
+- **223 tests** passing
+### v1.9.8
+- **NOT(expr) function-call form**: `NOT(ISNULL(x))` now correctly converts to `~(df["x"].isna())` — handles both `NOT ` (with space) and `NOT(` (without space) forms
+- **AND/OR/NOT as field names fix**: Logical operators no longer mangled into `df["AND"]` / `df["OR"]` — conversion moved before field substitution in both `_vec_recursive` fallback and `_vectorize_simple`
+- **Condition tokenizer word-boundary fix**: `_split_condition_tokens` no longer splits on `OR` inside field names like `DeletedIndicator` — verifies preceding character is a real word boundary
+- **`$PMMappingName` in expressions**: `$PM*` built-in variables in expression context properly convert to `resolve_builtin_variable("PMMappingName")` instead of being mangled to `$df["PMMappingName"]`
+- **TO_CHAR arithmetic parenthesization**: `TO_CHAR(TO_INTEGER(x) - 1)` now produces `(pd.to_numeric(...) - 1).astype(str)` instead of incorrect `- 1.astype(str)` binding
+- **String literal early-return fix**: Expressions like `'PER_' || X || '_suffix'` no longer short-circuit as a single string literal
+- **Fixed-width file enhancements**: `OFFSET`, `PHYSICALLENGTH`, `PHYSICALOFFSET` parsed from SOURCEFIELD XML; `physical_length` preferred over `precision` for `read_fwf` column widths
+- **Smart concat coercion**: Scalar returns (e.g. `resolve_builtin_variable()`, `get_variable()`) use `str()` wrapping; Series use `.fillna('').astype(str)`
+- **700 tests** passing
+### v1.9.5 / v1.9.6
+- **`rename_with_duplicates`** helper for one-source-to-many-target column mapping
+- **`resolve_env()`** for `${VAR}` placeholder resolution (env → config fallback)
+- **`resolve_builtin_variable()`** for `$PMMappingName`, `$PMSessionName`, `$PMFolderName`, etc.
+- **SQLAlchemy-first `get_db_connection`**: Engine caching and connection pooling; DBAPI fallback for pyodbc/pymssql
+- **`_safe_close()`**: Safe connection cleanup handling both SQLAlchemy and raw DBAPI connections
+- **Full `lookup_func()` implementation**: Table caching, condition parsing, default value support
+- **Null-safe `||` concatenation**: `.fillna('').astype(str)` prevents "nan" strings in concatenation
+- **`$PM*` variable substitution in SQL Override queries**
+- **`execute_sql` dialect detection**: Uses `dialect` attribute to choose SQLAlchemy `text()` vs DBAPI `cursor.execute()`
+- **678 tests** passing
+### v1.9.4
+- Extended expression function coverage and edge-case fixes
+- Improved mapplet and connector handling
+### v1.9.3
 - **Smart target write detection**: Bare targets default to `write_to_db()` instead of `write_file()`; file extension allowlist (`.csv`, `.dat`, `.txt`, `.xml`, `.json`, `.parquet`, `.xlsx`, `.xls`, `.tsv`, `.avro`) for file targets; schema-qualified names (`dbo.TABLE`) correctly route to database
 - **DECODE vectorization**: `DECODE(TRUE, cond1, val1, ..., default)` → nested `np.where()` chains; value-matching DECODE; handles IN() conditions and complex boolean nesting
 - **IS_SPACES vectorization**: `IS_SPACES(field)` → `field.str.strip().eq("")`
@@ -495,7 +536,7 @@ The generated `helper_functions.py` provides a complete runtime library:
 cd informatica_python
 pip install -e ".[dev]"
-# Run tests (663 tests)
+# Run tests (700 tests)
 pytest tests/ -v
 ```

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 informatica_python/__init__.py,sha256=JFO8fVMClSWe0SR-CBseX4RaPyyC3rZBdxxjy47ZT5E,337
 informatica_python/cli.py,sha256=gFwg0O99vKM-OLO0HoHA4emd-6qrgjMNqa9T59e4e_s,2905
 informatica_python/converter.py,sha256=xCuWrYzDji0yN72D3QqOgZCVVM2j3k2_CvlGplCWxLU,22779
-informatica_python/models.py,sha256=sZvVzYrEIRAfzV_HduN-qCeOAt5KZ_z7jzNTmPP3Oxs,17371
-informatica_python/parser.py,sha256=RVxoT1j6QTer2RyeG-PCEyKaoZAQhFepRcrRdsEm6OM,45410
+informatica_python/models.py,sha256=lBlxJBVSyW050KAGj8OyUd11Wyfk-MA2XWBT3qEQSK4,17462
+informatica_python/parser.py,sha256=LpX6Cg5jlqqZyLMr_VRUVvNaLdqls4aB-PdGCLbehZo,45603
 informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
 informatica_python/generators/error_log_gen.py,sha256=2cc0rEcblydHkb9VAMXlrH7WdSQ-CNqAXcwVk3FYZeM,21319
 informatica_python/generators/helper_gen.py,sha256=lC30hyZn6RIkbo4e_6sbqdrCfmZHWaXdr-p0tmtfILc,82376
-informatica_python/generators/mapping_gen.py,sha256=5wPS9t3OLpbo89gYsHMbVqCg9Jgfzmt13IqK4diOS2g,72781
+informatica_python/generators/mapping_gen.py,sha256=eBYJEGWKUPeRrdnWpJRFl6VL9N7qj5ybmKCgG7rFBDI,74554
 informatica_python/generators/sql_gen.py,sha256=O8Y-aJz9EyFJ0DXeuISRt5yKwC3wlp2K3B0BHrmxrXw,4872
 informatica_python/generators/workflow_gen.py,sha256=_uSlBg31ZRMhMlCYk4hWDRBPaBROrepD8_v3QGEWJxE,18089
 informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,9 +15,9 @@ informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9H
 informatica_python/utils/expression_converter.py,sha256=ynprsvZGvavML3Y8C485GyjaoqQ-k67OESXHShafeTo,48244
 informatica_python/utils/lib_adapters.py,sha256=1ZtuMbgDg9Ukf-OF_EG1L_BeeR-6JQk8Kx3WwMfvNRU,6516
 informatica_python/utils/sql_dialect.py,sha256=_IHJbfu8a3mT_OvHpybgSfZKqz6mwVy5ItTKDRChqnU,5461
-informatica_python-1.9.8.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
-informatica_python-1.9.8.dist-info/METADATA,sha256=YErpXHS5T-sSTEUwENMiaCWaYhj6xiQALyyLZACrc2g,26097
-informatica_python-1.9.8.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
-informatica_python-1.9.8.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
-informatica_python-1.9.8.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
-informatica_python-1.9.8.dist-info/RECORD,,
+informatica_python-1.10.0.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
+informatica_python-1.10.0.dist-info/METADATA,sha256=eL7mk2t5wXyw9J1g6MjSXuUzByXzyX0BxPtXXX_wTAs,30427
+informatica_python-1.10.0.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
+informatica_python-1.10.0.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
+informatica_python-1.10.0.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
+informatica_python-1.10.0.dist-info/RECORD,,

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{informatica_python-1.9.8.dist-info → informatica_python-1.10.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

informatica-python 1.9.8__py3-none-any.whl → 1.10.0__py3-none-any.whl

informatica-python 1.9.8py3-none-any.whl → 1.10.0py3-none-any.whl