informatica-python 1.4.1__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python-1.5.0/LICENSE +21 -0
- informatica_python-1.5.0/PKG-INFO +228 -0
- informatica_python-1.5.0/README.md +201 -0
- informatica_python-1.5.0/informatica_python/__init__.py +13 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/cli.py +5 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/helper_gen.py +81 -7
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/mapping_gen.py +79 -80
- informatica_python-1.5.0/informatica_python/utils/expression_converter.py +437 -0
- informatica_python-1.5.0/informatica_python/utils/lib_adapters.py +164 -0
- informatica_python-1.5.0/informatica_python.egg-info/PKG-INFO +228 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python.egg-info/SOURCES.txt +4 -1
- informatica_python-1.5.0/pyproject.toml +41 -0
- informatica_python-1.5.0/tests/test_integration.py +518 -0
- informatica_python-1.4.1/PKG-INFO +0 -118
- informatica_python-1.4.1/README.md +0 -106
- informatica_python-1.4.1/informatica_python/__init__.py +0 -4
- informatica_python-1.4.1/informatica_python/utils/expression_converter.py +0 -264
- informatica_python-1.4.1/informatica_python.egg-info/PKG-INFO +0 -118
- informatica_python-1.4.1/pyproject.toml +0 -24
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/converter.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/workflow_gen.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/models.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/setup.cfg +0 -0
- {informatica_python-1.4.1 → informatica_python-1.5.0}/tests/test_converter.py +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Nick
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: informatica-python
|
|
3
|
+
Version: 1.5.0
|
|
4
|
+
Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
|
|
5
|
+
Author: Nick
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: informatica,powercenter,etl,code-generator,pandas,pyspark,data-engineering
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
18
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: lxml>=4.9.0
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# informatica-python
|
|
29
|
+
|
|
30
|
+
Convert Informatica PowerCenter workflow XML exports into clean, runnable Python/PySpark code.
|
|
31
|
+
|
|
32
|
+
**Author:** Nick
|
|
33
|
+
**License:** MIT
|
|
34
|
+
**PyPI:** [informatica-python](https://pypi.org/project/informatica-python/)
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Overview
|
|
39
|
+
|
|
40
|
+
`informatica-python` parses Informatica PowerCenter XML export files and generates equivalent Python code using your choice of data library. It handles all 72 DTD tags from the PowerCenter XML schema and produces a complete, ready-to-run Python project.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install informatica-python
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
### Command Line
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Generate Python files to a directory
|
|
54
|
+
informatica-python workflow_export.xml -o output_dir
|
|
55
|
+
|
|
56
|
+
# Generate as a zip archive
|
|
57
|
+
informatica-python workflow_export.xml -z output.zip
|
|
58
|
+
|
|
59
|
+
# Use a different data library
|
|
60
|
+
informatica-python workflow_export.xml -o output_dir --data-lib polars
|
|
61
|
+
|
|
62
|
+
# Parse to JSON only (no code generation)
|
|
63
|
+
informatica-python workflow_export.xml --json
|
|
64
|
+
|
|
65
|
+
# Save parsed JSON to file
|
|
66
|
+
informatica-python workflow_export.xml --json-file parsed.json
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Python API
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from informatica_python import InformaticaConverter
|
|
73
|
+
|
|
74
|
+
converter = InformaticaConverter()
|
|
75
|
+
|
|
76
|
+
# Parse and generate files
|
|
77
|
+
converter.convert_to_files("workflow_export.xml", "output_dir")
|
|
78
|
+
|
|
79
|
+
# Parse and generate zip
|
|
80
|
+
converter.convert_to_zip("workflow_export.xml", "output.zip")
|
|
81
|
+
|
|
82
|
+
# Parse to structured dict
|
|
83
|
+
result = converter.parse_file("workflow_export.xml")
|
|
84
|
+
|
|
85
|
+
# Use a different data library
|
|
86
|
+
converter.convert_to_files("workflow_export.xml", "output_dir", data_lib="polars")
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Generated Output Files
|
|
90
|
+
|
|
91
|
+
| File | Description |
|
|
92
|
+
|------|-------------|
|
|
93
|
+
| `helper_functions.py` | Database/file I/O helpers, Informatica expression equivalents (80+ functions) |
|
|
94
|
+
| `mapping_N.py` | One per mapping — transformation logic, source reads, target writes |
|
|
95
|
+
| `workflow.py` | Task orchestration with topological ordering and error handling |
|
|
96
|
+
| `config.yml` | Connection configs, source/target metadata, runtime parameters |
|
|
97
|
+
| `all_sql_queries.sql` | All SQL extracted from Source Qualifiers, Lookups, SQL transforms |
|
|
98
|
+
| `error_log.txt` | Conversion summary, warnings, and unsupported feature notes |
|
|
99
|
+
|
|
100
|
+
## Supported Data Libraries
|
|
101
|
+
|
|
102
|
+
Select via `--data-lib` CLI flag or `data_lib` parameter:
|
|
103
|
+
|
|
104
|
+
| Library | Flag | Best For |
|
|
105
|
+
|---------|------|----------|
|
|
106
|
+
| **pandas** | `pandas` (default) | General-purpose, most compatible |
|
|
107
|
+
| **dask** | `dask` | Large datasets, parallel processing |
|
|
108
|
+
| **polars** | `polars` | High performance, Rust-backed |
|
|
109
|
+
| **vaex** | `vaex` | Out-of-core, billion-row datasets |
|
|
110
|
+
| **modin** | `modin` | Drop-in pandas replacement, multi-core |
|
|
111
|
+
|
|
112
|
+
## Supported Transformations
|
|
113
|
+
|
|
114
|
+
The code generator produces real, runnable Python for these transformation types:
|
|
115
|
+
|
|
116
|
+
- **Source Qualifier** — SQL override, pre/post SQL, column selection
|
|
117
|
+
- **Expression** — Field-level expressions converted to pandas operations
|
|
118
|
+
- **Filter** — Row filtering with converted conditions
|
|
119
|
+
- **Joiner** — `pd.merge()` with join type and condition parsing
|
|
120
|
+
- **Lookup** — `pd.merge()` lookups with connection-aware DB/file reads
|
|
121
|
+
- **Aggregator** — `groupby().agg()` with SUM/COUNT/AVG/MIN/MAX/FIRST/LAST
|
|
122
|
+
- **Sorter** — `sort_values()` with multi-key ascending/descending
|
|
123
|
+
- **Router** — Multi-group conditional routing with if/elif/else
|
|
124
|
+
- **Union** — `pd.concat()` across multiple input groups
|
|
125
|
+
- **Update Strategy** — Insert/Update/Delete/Reject flag generation
|
|
126
|
+
- **Sequence Generator** — Auto-incrementing ID columns
|
|
127
|
+
- **Normalizer** — `pd.melt()` with auto-detected id/value vars
|
|
128
|
+
- **Rank** — `groupby().rank()` with Top-N filtering
|
|
129
|
+
- **Stored Procedure** — Stub generation with SP name and parameters
|
|
130
|
+
- **Transaction Control** — Commit/rollback logic stubs
|
|
131
|
+
- **Custom / Java** — Placeholder stubs with TODO markers
|
|
132
|
+
- **SQL Transform** — Direct SQL execution pass-through
|
|
133
|
+
|
|
134
|
+
## Supported XML Tags (72 Tags)
|
|
135
|
+
|
|
136
|
+
**Top-level:** POWERMART, REPOSITORY, FOLDER, FOLDERVERSION
|
|
137
|
+
|
|
138
|
+
**Source/Target:** SOURCE, SOURCEFIELD, TARGET, TARGETFIELD, TARGETINDEX, TARGETINDEXFIELD, FLATFILE, XMLINFO, XMLTEXT, GROUP, TABLEATTRIBUTE, FIELDATTRIBUTE, METADATAEXTENSION, KEYWORD, ERPSRCINFO
|
|
139
|
+
|
|
140
|
+
**Mapping/Mapplet:** MAPPING, MAPPLET, TRANSFORMATION, TRANSFORMFIELD, TRANSFORMFIELDATTR, TRANSFORMFIELDATTRDEF, INSTANCE, ASSOCIATED_SOURCE_INSTANCE, CONNECTOR, MAPDEPENDENCY, TARGETLOADORDER, MAPPINGVARIABLE, FIELDDEPENDENCY, INITPROP, ERPINFO
|
|
141
|
+
|
|
142
|
+
**Task/Session/Workflow:** TASK, TIMER, VALUEPAIR, SCHEDULER, SCHEDULEINFO, STARTOPTIONS, ENDOPTIONS, SCHEDULEOPTIONS, RECURRING, CUSTOM, DAILYFREQUENCY, REPEAT, FILTER, SESSION, CONFIGREFERENCE, SESSTRANSFORMATIONINST, SESSTRANSFORMATIONGROUP, PARTITION, HASHKEY, KEYRANGE, CONFIG, SESSIONCOMPONENT, CONNECTIONREFERENCE, TASKINSTANCE, WORKFLOWLINK, WORKFLOWVARIABLE, WORKFLOWEVENT, WORKLET, WORKFLOW, ATTRIBUTE
|
|
143
|
+
|
|
144
|
+
**Shortcut:** SHORTCUT
|
|
145
|
+
|
|
146
|
+
**SAP:** SAPFUNCTION, SAPSTRUCTURE, SAPPROGRAM, SAPOUTPUTPORT, SAPVARIABLE, SAPPROGRAMFLOWOBJECT, SAPTABLEPARAM
|
|
147
|
+
|
|
148
|
+
## Key Features
|
|
149
|
+
|
|
150
|
+
### Session Connection Overrides (v1.4+)
|
|
151
|
+
When sessions define per-transform connection overrides (different database, file directory, or filename), the generated code uses those overrides instead of source/target defaults.
|
|
152
|
+
|
|
153
|
+
### Worklet Support (v1.4+)
|
|
154
|
+
Worklet workflows are detected and generate separate `run_worklet_NAME(config)` functions. The main workflow calls these automatically for Worklet task types.
|
|
155
|
+
|
|
156
|
+
### Type Casting at Target Writes (v1.4+)
|
|
157
|
+
Target field datatypes are mapped to pandas types and generate proper casting code:
|
|
158
|
+
- Integers: nullable `Int64`/`Int32` or `fillna(0).astype(int)` for NOT NULL
|
|
159
|
+
- Dates: `pd.to_datetime(errors='coerce')`
|
|
160
|
+
- Decimals/Floats: `pd.to_numeric(errors='coerce')`
|
|
161
|
+
- Booleans: `.astype('boolean')`
|
|
162
|
+
|
|
163
|
+
### Flat File Handling (v1.3+)
|
|
164
|
+
Parses FLATFILE metadata for delimiter, fixed-width, header lines, skip rows, quote/escape chars. Generates `pd.read_fwf()` for fixed-width or enriched `read_file()` for delimited.
|
|
165
|
+
|
|
166
|
+
### Mapplet Inlining (v1.3+)
|
|
167
|
+
Expands Mapplet instances into prefixed transforms, rewires connectors, and eliminates duplication.
|
|
168
|
+
|
|
169
|
+
### Decision Tasks (v1.3+)
|
|
170
|
+
Converts Informatica decision conditions to Python if/else branches with proper variable substitution.
|
|
171
|
+
|
|
172
|
+
### Expression Converter (80+ Functions)
|
|
173
|
+
|
|
174
|
+
Converts Informatica expressions to Python equivalents:
|
|
175
|
+
|
|
176
|
+
- **String:** SUBSTR, LTRIM, RTRIM, UPPER, LOWER, LPAD, RPAD, INSTR, LENGTH, CONCAT, REPLACE, REG_EXTRACT, REG_REPLACE, REVERSE, INITCAP, CHR, ASCII
|
|
177
|
+
- **Date:** ADD_TO_DATE, DATE_DIFF, GET_DATE_PART, SYSDATE, SYSTIMESTAMP, TO_DATE, TO_CHAR, TRUNC (date)
|
|
178
|
+
- **Numeric:** ROUND, TRUNC, MOD, ABS, CEIL, FLOOR, POWER, SQRT, LOG, EXP, SIGN
|
|
179
|
+
- **Conversion:** TO_INTEGER, TO_BIGINT, TO_FLOAT, TO_DECIMAL, TO_CHAR, TO_DATE
|
|
180
|
+
- **Null handling:** IIF, DECODE, NVL, NVL2, ISNULL, IS_SPACES, IS_NUMBER
|
|
181
|
+
- **Aggregate:** SUM, AVG, COUNT, MIN, MAX, FIRST, LAST, MEDIAN, STDDEV, VARIANCE
|
|
182
|
+
- **Lookup:** :LKP expressions with dynamic lookup references
|
|
183
|
+
- **Variable:** SETVARIABLE / mapping variable assignment
|
|
184
|
+
|
|
185
|
+
## Requirements
|
|
186
|
+
|
|
187
|
+
- Python >= 3.8
|
|
188
|
+
- lxml >= 4.9.0
|
|
189
|
+
- PyYAML >= 6.0
|
|
190
|
+
|
|
191
|
+
## Changelog
|
|
192
|
+
|
|
193
|
+
### v1.4.x (Phase 3)
|
|
194
|
+
- Session connection overrides for sources and targets
|
|
195
|
+
- Worklet function generation with safe invocation
|
|
196
|
+
- Type casting at target writes based on TARGETFIELD datatypes
|
|
197
|
+
- Flat-file session path overrides properly wired
|
|
198
|
+
|
|
199
|
+
### v1.3.x (Phase 2)
|
|
200
|
+
- FLATFILE metadata in source reads and target writes
|
|
201
|
+
- Normalizer with `pd.melt()`
|
|
202
|
+
- Rank with group-by and Top-N filtering
|
|
203
|
+
- Decision tasks with real if/else branches
|
|
204
|
+
- Mapplet instance inlining
|
|
205
|
+
|
|
206
|
+
### v1.2.x (Phase 1)
|
|
207
|
+
- Core parser for all 72 XML tags
|
|
208
|
+
- Expression converter with 80+ functions
|
|
209
|
+
- Aggregator, Joiner, Lookup code generation
|
|
210
|
+
- Workflow orchestration with topological task ordering
|
|
211
|
+
- Multi-library support (pandas, dask, polars, vaex, modin)
|
|
212
|
+
|
|
213
|
+
## Development
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Clone and install in development mode
|
|
217
|
+
cd informatica_python
|
|
218
|
+
pip install -e ".[dev]"
|
|
219
|
+
|
|
220
|
+
# Run tests (25 tests)
|
|
221
|
+
pytest tests/test_converter.py -v
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
MIT License - Copyright (c) 2025 Nick
|
|
227
|
+
|
|
228
|
+
See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# informatica-python
|
|
2
|
+
|
|
3
|
+
Convert Informatica PowerCenter workflow XML exports into clean, runnable Python/PySpark code.
|
|
4
|
+
|
|
5
|
+
**Author:** Nick
|
|
6
|
+
**License:** MIT
|
|
7
|
+
**PyPI:** [informatica-python](https://pypi.org/project/informatica-python/)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
`informatica-python` parses Informatica PowerCenter XML export files and generates equivalent Python code using your choice of data library. It handles all 72 DTD tags from the PowerCenter XML schema and produces a complete, ready-to-run Python project.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install informatica-python
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
### Command Line
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Generate Python files to a directory
|
|
27
|
+
informatica-python workflow_export.xml -o output_dir
|
|
28
|
+
|
|
29
|
+
# Generate as a zip archive
|
|
30
|
+
informatica-python workflow_export.xml -z output.zip
|
|
31
|
+
|
|
32
|
+
# Use a different data library
|
|
33
|
+
informatica-python workflow_export.xml -o output_dir --data-lib polars
|
|
34
|
+
|
|
35
|
+
# Parse to JSON only (no code generation)
|
|
36
|
+
informatica-python workflow_export.xml --json
|
|
37
|
+
|
|
38
|
+
# Save parsed JSON to file
|
|
39
|
+
informatica-python workflow_export.xml --json-file parsed.json
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Python API
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from informatica_python import InformaticaConverter
|
|
46
|
+
|
|
47
|
+
converter = InformaticaConverter()
|
|
48
|
+
|
|
49
|
+
# Parse and generate files
|
|
50
|
+
converter.convert_to_files("workflow_export.xml", "output_dir")
|
|
51
|
+
|
|
52
|
+
# Parse and generate zip
|
|
53
|
+
converter.convert_to_zip("workflow_export.xml", "output.zip")
|
|
54
|
+
|
|
55
|
+
# Parse to structured dict
|
|
56
|
+
result = converter.parse_file("workflow_export.xml")
|
|
57
|
+
|
|
58
|
+
# Use a different data library
|
|
59
|
+
converter.convert_to_files("workflow_export.xml", "output_dir", data_lib="polars")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Generated Output Files
|
|
63
|
+
|
|
64
|
+
| File | Description |
|
|
65
|
+
|------|-------------|
|
|
66
|
+
| `helper_functions.py` | Database/file I/O helpers, Informatica expression equivalents (80+ functions) |
|
|
67
|
+
| `mapping_N.py` | One per mapping — transformation logic, source reads, target writes |
|
|
68
|
+
| `workflow.py` | Task orchestration with topological ordering and error handling |
|
|
69
|
+
| `config.yml` | Connection configs, source/target metadata, runtime parameters |
|
|
70
|
+
| `all_sql_queries.sql` | All SQL extracted from Source Qualifiers, Lookups, SQL transforms |
|
|
71
|
+
| `error_log.txt` | Conversion summary, warnings, and unsupported feature notes |
|
|
72
|
+
|
|
73
|
+
## Supported Data Libraries
|
|
74
|
+
|
|
75
|
+
Select via `--data-lib` CLI flag or `data_lib` parameter:
|
|
76
|
+
|
|
77
|
+
| Library | Flag | Best For |
|
|
78
|
+
|---------|------|----------|
|
|
79
|
+
| **pandas** | `pandas` (default) | General-purpose, most compatible |
|
|
80
|
+
| **dask** | `dask` | Large datasets, parallel processing |
|
|
81
|
+
| **polars** | `polars` | High performance, Rust-backed |
|
|
82
|
+
| **vaex** | `vaex` | Out-of-core, billion-row datasets |
|
|
83
|
+
| **modin** | `modin` | Drop-in pandas replacement, multi-core |
|
|
84
|
+
|
|
85
|
+
## Supported Transformations
|
|
86
|
+
|
|
87
|
+
The code generator produces real, runnable Python for these transformation types:
|
|
88
|
+
|
|
89
|
+
- **Source Qualifier** — SQL override, pre/post SQL, column selection
|
|
90
|
+
- **Expression** — Field-level expressions converted to pandas operations
|
|
91
|
+
- **Filter** — Row filtering with converted conditions
|
|
92
|
+
- **Joiner** — `pd.merge()` with join type and condition parsing
|
|
93
|
+
- **Lookup** — `pd.merge()` lookups with connection-aware DB/file reads
|
|
94
|
+
- **Aggregator** — `groupby().agg()` with SUM/COUNT/AVG/MIN/MAX/FIRST/LAST
|
|
95
|
+
- **Sorter** — `sort_values()` with multi-key ascending/descending
|
|
96
|
+
- **Router** — Multi-group conditional routing with if/elif/else
|
|
97
|
+
- **Union** — `pd.concat()` across multiple input groups
|
|
98
|
+
- **Update Strategy** — Insert/Update/Delete/Reject flag generation
|
|
99
|
+
- **Sequence Generator** — Auto-incrementing ID columns
|
|
100
|
+
- **Normalizer** — `pd.melt()` with auto-detected id/value vars
|
|
101
|
+
- **Rank** — `groupby().rank()` with Top-N filtering
|
|
102
|
+
- **Stored Procedure** — Stub generation with SP name and parameters
|
|
103
|
+
- **Transaction Control** — Commit/rollback logic stubs
|
|
104
|
+
- **Custom / Java** — Placeholder stubs with TODO markers
|
|
105
|
+
- **SQL Transform** — Direct SQL execution pass-through
|
|
106
|
+
|
|
107
|
+
## Supported XML Tags (72 Tags)
|
|
108
|
+
|
|
109
|
+
**Top-level:** POWERMART, REPOSITORY, FOLDER, FOLDERVERSION
|
|
110
|
+
|
|
111
|
+
**Source/Target:** SOURCE, SOURCEFIELD, TARGET, TARGETFIELD, TARGETINDEX, TARGETINDEXFIELD, FLATFILE, XMLINFO, XMLTEXT, GROUP, TABLEATTRIBUTE, FIELDATTRIBUTE, METADATAEXTENSION, KEYWORD, ERPSRCINFO
|
|
112
|
+
|
|
113
|
+
**Mapping/Mapplet:** MAPPING, MAPPLET, TRANSFORMATION, TRANSFORMFIELD, TRANSFORMFIELDATTR, TRANSFORMFIELDATTRDEF, INSTANCE, ASSOCIATED_SOURCE_INSTANCE, CONNECTOR, MAPDEPENDENCY, TARGETLOADORDER, MAPPINGVARIABLE, FIELDDEPENDENCY, INITPROP, ERPINFO
|
|
114
|
+
|
|
115
|
+
**Task/Session/Workflow:** TASK, TIMER, VALUEPAIR, SCHEDULER, SCHEDULEINFO, STARTOPTIONS, ENDOPTIONS, SCHEDULEOPTIONS, RECURRING, CUSTOM, DAILYFREQUENCY, REPEAT, FILTER, SESSION, CONFIGREFERENCE, SESSTRANSFORMATIONINST, SESSTRANSFORMATIONGROUP, PARTITION, HASHKEY, KEYRANGE, CONFIG, SESSIONCOMPONENT, CONNECTIONREFERENCE, TASKINSTANCE, WORKFLOWLINK, WORKFLOWVARIABLE, WORKFLOWEVENT, WORKLET, WORKFLOW, ATTRIBUTE
|
|
116
|
+
|
|
117
|
+
**Shortcut:** SHORTCUT
|
|
118
|
+
|
|
119
|
+
**SAP:** SAPFUNCTION, SAPSTRUCTURE, SAPPROGRAM, SAPOUTPUTPORT, SAPVARIABLE, SAPPROGRAMFLOWOBJECT, SAPTABLEPARAM
|
|
120
|
+
|
|
121
|
+
## Key Features
|
|
122
|
+
|
|
123
|
+
### Session Connection Overrides (v1.4+)
|
|
124
|
+
When sessions define per-transform connection overrides (different database, file directory, or filename), the generated code uses those overrides instead of source/target defaults.
|
|
125
|
+
|
|
126
|
+
### Worklet Support (v1.4+)
|
|
127
|
+
Worklet workflows are detected and generate separate `run_worklet_NAME(config)` functions. The main workflow calls these automatically for Worklet task types.
|
|
128
|
+
|
|
129
|
+
### Type Casting at Target Writes (v1.4+)
|
|
130
|
+
Target field datatypes are mapped to pandas types and generate proper casting code:
|
|
131
|
+
- Integers: nullable `Int64`/`Int32` or `fillna(0).astype(int)` for NOT NULL
|
|
132
|
+
- Dates: `pd.to_datetime(errors='coerce')`
|
|
133
|
+
- Decimals/Floats: `pd.to_numeric(errors='coerce')`
|
|
134
|
+
- Booleans: `.astype('boolean')`
|
|
135
|
+
|
|
136
|
+
### Flat File Handling (v1.3+)
|
|
137
|
+
Parses FLATFILE metadata for delimiter, fixed-width, header lines, skip rows, quote/escape chars. Generates `pd.read_fwf()` for fixed-width or enriched `read_file()` for delimited.
|
|
138
|
+
|
|
139
|
+
### Mapplet Inlining (v1.3+)
|
|
140
|
+
Expands Mapplet instances into prefixed transforms, rewires connectors, and eliminates duplication.
|
|
141
|
+
|
|
142
|
+
### Decision Tasks (v1.3+)
|
|
143
|
+
Converts Informatica decision conditions to Python if/else branches with proper variable substitution.
|
|
144
|
+
|
|
145
|
+
### Expression Converter (80+ Functions)
|
|
146
|
+
|
|
147
|
+
Converts Informatica expressions to Python equivalents:
|
|
148
|
+
|
|
149
|
+
- **String:** SUBSTR, LTRIM, RTRIM, UPPER, LOWER, LPAD, RPAD, INSTR, LENGTH, CONCAT, REPLACE, REG_EXTRACT, REG_REPLACE, REVERSE, INITCAP, CHR, ASCII
|
|
150
|
+
- **Date:** ADD_TO_DATE, DATE_DIFF, GET_DATE_PART, SYSDATE, SYSTIMESTAMP, TO_DATE, TO_CHAR, TRUNC (date)
|
|
151
|
+
- **Numeric:** ROUND, TRUNC, MOD, ABS, CEIL, FLOOR, POWER, SQRT, LOG, EXP, SIGN
|
|
152
|
+
- **Conversion:** TO_INTEGER, TO_BIGINT, TO_FLOAT, TO_DECIMAL, TO_CHAR, TO_DATE
|
|
153
|
+
- **Null handling:** IIF, DECODE, NVL, NVL2, ISNULL, IS_SPACES, IS_NUMBER
|
|
154
|
+
- **Aggregate:** SUM, AVG, COUNT, MIN, MAX, FIRST, LAST, MEDIAN, STDDEV, VARIANCE
|
|
155
|
+
- **Lookup:** :LKP expressions with dynamic lookup references
|
|
156
|
+
- **Variable:** SETVARIABLE / mapping variable assignment
|
|
157
|
+
|
|
158
|
+
## Requirements
|
|
159
|
+
|
|
160
|
+
- Python >= 3.8
|
|
161
|
+
- lxml >= 4.9.0
|
|
162
|
+
- PyYAML >= 6.0
|
|
163
|
+
|
|
164
|
+
## Changelog
|
|
165
|
+
|
|
166
|
+
### v1.4.x (Phase 3)
|
|
167
|
+
- Session connection overrides for sources and targets
|
|
168
|
+
- Worklet function generation with safe invocation
|
|
169
|
+
- Type casting at target writes based on TARGETFIELD datatypes
|
|
170
|
+
- Flat-file session path overrides properly wired
|
|
171
|
+
|
|
172
|
+
### v1.3.x (Phase 2)
|
|
173
|
+
- FLATFILE metadata in source reads and target writes
|
|
174
|
+
- Normalizer with `pd.melt()`
|
|
175
|
+
- Rank with group-by and Top-N filtering
|
|
176
|
+
- Decision tasks with real if/else branches
|
|
177
|
+
- Mapplet instance inlining
|
|
178
|
+
|
|
179
|
+
### v1.2.x (Phase 1)
|
|
180
|
+
- Core parser for all 72 XML tags
|
|
181
|
+
- Expression converter with 80+ functions
|
|
182
|
+
- Aggregator, Joiner, Lookup code generation
|
|
183
|
+
- Workflow orchestration with topological task ordering
|
|
184
|
+
- Multi-library support (pandas, dask, polars, vaex, modin)
|
|
185
|
+
|
|
186
|
+
## Development
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# Clone and install in development mode
|
|
190
|
+
cd informatica_python
|
|
191
|
+
pip install -e ".[dev]"
|
|
192
|
+
|
|
193
|
+
# Run tests (25 tests)
|
|
194
|
+
pytest tests/test_converter.py -v
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## License
|
|
198
|
+
|
|
199
|
+
MIT License - Copyright (c) 2025 Nick
|
|
200
|
+
|
|
201
|
+
See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
informatica-python: Convert Informatica PowerCenter workflow XML to Python/PySpark code.
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025 Nick. All rights reserved.
|
|
5
|
+
Licensed under the MIT License.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from informatica_python.converter import InformaticaConverter
|
|
9
|
+
|
|
10
|
+
__version__ = "1.5.0"
|
|
11
|
+
__author__ = "Nick"
|
|
12
|
+
__license__ = "MIT"
|
|
13
|
+
__all__ = ["InformaticaConverter"]
|
{informatica_python-1.4.1 → informatica_python-1.5.0}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -44,13 +44,20 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
44
44
|
lines.append('logger = logging.getLogger("informatica_converter")')
|
|
45
45
|
lines.append("")
|
|
46
46
|
lines.append("")
|
|
47
|
-
lines.append("def load_config(config_path='config.yml'):")
|
|
48
|
-
lines.append(' """Load configuration from YAML file."""')
|
|
47
|
+
lines.append("def load_config(config_path='config.yml', param_file=None):")
|
|
48
|
+
lines.append(' """Load configuration from YAML file, optionally merging Informatica .param file."""')
|
|
49
49
|
lines.append(" with open(config_path, 'r') as f:")
|
|
50
|
-
lines.append("
|
|
50
|
+
lines.append(" config = yaml.safe_load(f) or {}")
|
|
51
|
+
lines.append(" if param_file:")
|
|
52
|
+
lines.append(" params = parse_param_file(param_file)")
|
|
53
|
+
lines.append(" config['params'] = params")
|
|
54
|
+
lines.append(" for key, val in params.items():")
|
|
55
|
+
lines.append(" os.environ[f'INFA_VAR_{key}'] = str(val)")
|
|
56
|
+
lines.append(" return config")
|
|
51
57
|
lines.append("")
|
|
52
58
|
lines.append("")
|
|
53
59
|
|
|
60
|
+
_add_param_file_functions(lines)
|
|
54
61
|
_add_db_functions(lines, data_lib)
|
|
55
62
|
_add_file_functions(lines, data_lib)
|
|
56
63
|
_add_expression_helpers(lines)
|
|
@@ -59,6 +66,61 @@ def generate_helper_functions(folder: FolderDef, data_lib: str = "pandas") -> st
|
|
|
59
66
|
return "\n".join(lines)
|
|
60
67
|
|
|
61
68
|
|
|
69
|
+
def _add_param_file_functions(lines):
|
|
70
|
+
lines.append("# ============================================================")
|
|
71
|
+
lines.append("# Informatica Parameter File Support")
|
|
72
|
+
lines.append("# ============================================================")
|
|
73
|
+
lines.append("")
|
|
74
|
+
lines.append("")
|
|
75
|
+
lines.append("def parse_param_file(param_path):")
|
|
76
|
+
lines.append(' """')
|
|
77
|
+
lines.append(" Parse an Informatica .param file into a flat dict of variable names to values.")
|
|
78
|
+
lines.append(" Supports standard Informatica parameter file format:")
|
|
79
|
+
lines.append(" [Global]")
|
|
80
|
+
lines.append(" $$VAR_NAME=value")
|
|
81
|
+
lines.append(" [folder_name.WF:workflow_name.ST:session_name]")
|
|
82
|
+
lines.append(" $$CONN_NAME=value")
|
|
83
|
+
lines.append(' """')
|
|
84
|
+
lines.append(" params = {}")
|
|
85
|
+
lines.append(" if not os.path.exists(param_path):")
|
|
86
|
+
lines.append(" logger.warning(f'Parameter file not found: {param_path}')")
|
|
87
|
+
lines.append(" return params")
|
|
88
|
+
lines.append("")
|
|
89
|
+
lines.append(" current_section = 'Global'")
|
|
90
|
+
lines.append(" with open(param_path, 'r') as f:")
|
|
91
|
+
lines.append(" for line_num, line in enumerate(f, 1):")
|
|
92
|
+
lines.append(" line = line.strip()")
|
|
93
|
+
lines.append(" if not line or line.startswith('#'):")
|
|
94
|
+
lines.append(" continue")
|
|
95
|
+
lines.append(" if line.startswith('[') and line.endswith(']'):")
|
|
96
|
+
lines.append(" current_section = line[1:-1].strip()")
|
|
97
|
+
lines.append(" continue")
|
|
98
|
+
lines.append(" if '=' in line:")
|
|
99
|
+
lines.append(" key, _, value = line.partition('=')")
|
|
100
|
+
lines.append(" key = key.strip()")
|
|
101
|
+
lines.append(" value = value.strip()")
|
|
102
|
+
lines.append(" clean_key = key.lstrip('$')")
|
|
103
|
+
lines.append(" params[clean_key] = value")
|
|
104
|
+
lines.append(" if current_section != 'Global':")
|
|
105
|
+
lines.append(" params[f'{current_section}.{clean_key}'] = value")
|
|
106
|
+
lines.append(" logger.info(f'Loaded {len(params)} parameters from {param_path}')")
|
|
107
|
+
lines.append(" return params")
|
|
108
|
+
lines.append("")
|
|
109
|
+
lines.append("")
|
|
110
|
+
lines.append("def get_param(config, var_name, default=''):")
|
|
111
|
+
lines.append(' """Get a parameter value from config params, then env vars, then default."""')
|
|
112
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
113
|
+
lines.append(" params = config.get('params', {})")
|
|
114
|
+
lines.append(" if clean in params:")
|
|
115
|
+
lines.append(" return params[clean]")
|
|
116
|
+
lines.append(" env_val = os.environ.get(f'INFA_VAR_{clean}')")
|
|
117
|
+
lines.append(" if env_val is not None:")
|
|
118
|
+
lines.append(" return env_val")
|
|
119
|
+
lines.append(" return default")
|
|
120
|
+
lines.append("")
|
|
121
|
+
lines.append("")
|
|
122
|
+
|
|
123
|
+
|
|
62
124
|
def _add_db_functions(lines, data_lib):
|
|
63
125
|
lines.append("# ============================================================")
|
|
64
126
|
lines.append("# Database Operations")
|
|
@@ -1060,14 +1122,26 @@ def _add_expression_helpers(lines):
|
|
|
1060
1122
|
lines.append(" return None")
|
|
1061
1123
|
lines.append("")
|
|
1062
1124
|
lines.append("")
|
|
1063
|
-
lines.append("
|
|
1064
|
-
lines.append(
|
|
1065
|
-
lines.append("
|
|
1125
|
+
lines.append("_param_store = {}")
|
|
1126
|
+
lines.append("")
|
|
1127
|
+
lines.append("")
|
|
1128
|
+
lines.append("def get_variable(var_name, config=None):")
|
|
1129
|
+
lines.append(' """Get workflow/mapping variable value from params, env vars, or param store."""')
|
|
1130
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1131
|
+
lines.append(" if config and 'params' in config:")
|
|
1132
|
+
lines.append(" val = config['params'].get(clean)")
|
|
1133
|
+
lines.append(" if val is not None:")
|
|
1134
|
+
lines.append(" return val")
|
|
1135
|
+
lines.append(" if clean in _param_store:")
|
|
1136
|
+
lines.append(" return _param_store[clean]")
|
|
1137
|
+
lines.append(" return os.environ.get(f'INFA_VAR_{clean}', '')")
|
|
1066
1138
|
lines.append("")
|
|
1067
1139
|
lines.append("")
|
|
1068
1140
|
lines.append("def set_variable(var_name, value):")
|
|
1069
1141
|
lines.append(' """Set workflow/mapping variable value."""')
|
|
1070
|
-
lines.append("
|
|
1142
|
+
lines.append(" clean = var_name.lstrip('$')")
|
|
1143
|
+
lines.append(" _param_store[clean] = value")
|
|
1144
|
+
lines.append(" os.environ[f'INFA_VAR_{clean}'] = str(value)")
|
|
1071
1145
|
lines.append(" return value")
|
|
1072
1146
|
lines.append("")
|
|
1073
1147
|
lines.append("")
|