informatica-python 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +4 -0
- informatica_python/cli.py +83 -0
- informatica_python/converter.py +285 -0
- informatica_python/generators/__init__.py +0 -0
- informatica_python/generators/config_gen.py +159 -0
- informatica_python/generators/error_log_gen.py +140 -0
- informatica_python/generators/helper_gen.py +693 -0
- informatica_python/generators/mapping_gen.py +649 -0
- informatica_python/generators/sql_gen.py +132 -0
- informatica_python/generators/workflow_gen.py +234 -0
- informatica_python/models.py +281 -0
- informatica_python/parser.py +468 -0
- informatica_python/utils/__init__.py +0 -0
- informatica_python/utils/datatype_map.py +105 -0
- informatica_python/utils/expression_converter.py +128 -0
- informatica_python-1.0.0.dist-info/METADATA +118 -0
- informatica_python-1.0.0.dist-info/RECORD +20 -0
- informatica_python-1.0.0.dist-info/WHEEL +5 -0
- informatica_python-1.0.0.dist-info/entry_points.txt +2 -0
- informatica_python-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
INFA_FUNC_MAP = {
|
|
5
|
+
"IIF": "iif_expr",
|
|
6
|
+
"DECODE": "decode_expr",
|
|
7
|
+
"LTRIM": "ltrim",
|
|
8
|
+
"RTRIM": "rtrim",
|
|
9
|
+
"UPPER": "upper",
|
|
10
|
+
"LOWER": "lower",
|
|
11
|
+
"SUBSTR": "substr",
|
|
12
|
+
"LPAD": "lpad",
|
|
13
|
+
"RPAD": "rpad",
|
|
14
|
+
"TO_CHAR": "to_char",
|
|
15
|
+
"TO_DATE": "to_date",
|
|
16
|
+
"TO_INTEGER": "to_integer",
|
|
17
|
+
"TO_BIGINT": "to_bigint",
|
|
18
|
+
"TO_FLOAT": "to_float",
|
|
19
|
+
"TO_DECIMAL": "to_decimal",
|
|
20
|
+
"SYSDATE": "current_timestamp",
|
|
21
|
+
"SYSTIMESTAMP": "current_timestamp",
|
|
22
|
+
"SESSSTARTTIME": "session_start_time",
|
|
23
|
+
"GET_DATE_PART": "get_date_part",
|
|
24
|
+
"ADD_TO_DATE": "add_to_date",
|
|
25
|
+
"TRUNC": "trunc",
|
|
26
|
+
"ROUND": "round",
|
|
27
|
+
"ABS": "abs",
|
|
28
|
+
"CEIL": "ceil",
|
|
29
|
+
"FLOOR": "floor",
|
|
30
|
+
"MOD": "mod",
|
|
31
|
+
"POWER": "power",
|
|
32
|
+
"SQRT": "sqrt",
|
|
33
|
+
"LENGTH": "length",
|
|
34
|
+
"CONCAT": "concat",
|
|
35
|
+
"INSTR": "instr",
|
|
36
|
+
"REPLACECHR": "replacechr",
|
|
37
|
+
"REPLACESTR": "replacestr",
|
|
38
|
+
"REG_EXTRACT": "reg_extract",
|
|
39
|
+
"REG_MATCH": "reg_match",
|
|
40
|
+
"REG_REPLACE": "reg_replace",
|
|
41
|
+
"IS_DATE": "is_date",
|
|
42
|
+
"IS_NUMBER": "is_number",
|
|
43
|
+
"IS_SPACES": "is_spaces",
|
|
44
|
+
"NVL": "nvl",
|
|
45
|
+
"NVL2": "nvl2",
|
|
46
|
+
"ISNULL": "isnull",
|
|
47
|
+
"ERROR": "raise_error",
|
|
48
|
+
"ABORT": "abort_func",
|
|
49
|
+
"LOOKUP": "lookup_func",
|
|
50
|
+
"MAX": "max_val",
|
|
51
|
+
"MIN": "min_val",
|
|
52
|
+
"SUM": "sum_val",
|
|
53
|
+
"COUNT": "count_val",
|
|
54
|
+
"AVG": "avg_val",
|
|
55
|
+
"FIRST": "first_val",
|
|
56
|
+
"LAST": "last_val",
|
|
57
|
+
"MOVINGAVG": "moving_avg",
|
|
58
|
+
"MOVINGSUM": "moving_sum",
|
|
59
|
+
"CUME": "cume",
|
|
60
|
+
"SETCOUNTVARIABLE": "set_count_variable",
|
|
61
|
+
"SETVARIABLE": "set_variable",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def convert_expression(expr):
|
|
66
|
+
if not expr or not expr.strip():
|
|
67
|
+
return "None"
|
|
68
|
+
|
|
69
|
+
cleaned = expr.strip()
|
|
70
|
+
|
|
71
|
+
if re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', cleaned):
|
|
72
|
+
return f'row["{cleaned}"]'
|
|
73
|
+
|
|
74
|
+
if re.match(r'^-?\d+(\.\d+)?$', cleaned):
|
|
75
|
+
return cleaned
|
|
76
|
+
|
|
77
|
+
if cleaned.startswith("'") and cleaned.endswith("'"):
|
|
78
|
+
return cleaned
|
|
79
|
+
|
|
80
|
+
converted = cleaned
|
|
81
|
+
|
|
82
|
+
for infa_func, py_func in INFA_FUNC_MAP.items():
|
|
83
|
+
pattern = re.compile(r'\b' + re.escape(infa_func) + r'\s*\(', re.IGNORECASE)
|
|
84
|
+
converted = pattern.sub(f'{py_func}(', converted)
|
|
85
|
+
|
|
86
|
+
converted = converted.replace("||", " + ")
|
|
87
|
+
|
|
88
|
+
converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
|
|
89
|
+
|
|
90
|
+
converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
|
|
91
|
+
|
|
92
|
+
converted = re.sub(r':(\w+)', r'row["\1"]', converted)
|
|
93
|
+
|
|
94
|
+
return converted
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def convert_sql_expression(sql_expr):
|
|
98
|
+
if not sql_expr or not sql_expr.strip():
|
|
99
|
+
return ""
|
|
100
|
+
|
|
101
|
+
converted = sql_expr.strip()
|
|
102
|
+
converted = converted.replace("
", "\n")
|
|
103
|
+
converted = converted.replace("
", "\r")
|
|
104
|
+
converted = converted.replace("
", "\n")
|
|
105
|
+
converted = converted.replace("	", "\t")
|
|
106
|
+
converted = converted.replace("'", "'")
|
|
107
|
+
converted = converted.replace(""", '"')
|
|
108
|
+
converted = converted.replace("&", "&")
|
|
109
|
+
converted = converted.replace("<", "<")
|
|
110
|
+
converted = converted.replace(">", ">")
|
|
111
|
+
|
|
112
|
+
return converted
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def detect_sql_dialect(sql_text):
|
|
116
|
+
if not sql_text:
|
|
117
|
+
return "generic"
|
|
118
|
+
|
|
119
|
+
sql_upper = sql_text.upper()
|
|
120
|
+
|
|
121
|
+
if "GETDATE()" in sql_upper or "ISNULL(" in sql_upper or "TOP " in sql_upper:
|
|
122
|
+
return "mssql"
|
|
123
|
+
if "NVL(" in sql_upper or "SYSDATE" in sql_upper or "ROWNUM" in sql_upper:
|
|
124
|
+
return "oracle"
|
|
125
|
+
if "NOW()" in sql_upper or "COALESCE(" in sql_upper:
|
|
126
|
+
return "postgresql"
|
|
127
|
+
|
|
128
|
+
return "generic"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: informatica-python
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: lxml>=4.9.0
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
12
|
+
|
|
13
|
+
# informatica-python
|
|
14
|
+
|
|
15
|
+
Convert Informatica PowerCenter workflow XML files to Python/PySpark code.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install informatica-python
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
### Command Line
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Convert XML to Python files in a directory
|
|
29
|
+
informatica-python workflow.xml -o output_dir
|
|
30
|
+
|
|
31
|
+
# Convert XML to a zip file
|
|
32
|
+
informatica-python workflow.xml -z output.zip
|
|
33
|
+
|
|
34
|
+
# Use a different data library (pandas, dask, polars, vaex, modin)
|
|
35
|
+
informatica-python workflow.xml -o output_dir --data-lib polars
|
|
36
|
+
|
|
37
|
+
# Parse XML to JSON (no code generation)
|
|
38
|
+
informatica-python workflow.xml --json
|
|
39
|
+
|
|
40
|
+
# Save parsed JSON to file
|
|
41
|
+
informatica-python workflow.xml --json-file parsed.json
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Python API
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from informatica_python import InformaticaConverter
|
|
48
|
+
|
|
49
|
+
# Convert XML to Python files
|
|
50
|
+
converter = InformaticaConverter(data_lib="pandas")
|
|
51
|
+
converter.convert("workflow.xml", output_dir="output")
|
|
52
|
+
|
|
53
|
+
# Convert to zip
|
|
54
|
+
converter.convert("workflow.xml", output_zip="output.zip")
|
|
55
|
+
|
|
56
|
+
# Parse XML to JSON dict
|
|
57
|
+
result = converter.parse_file("workflow.xml")
|
|
58
|
+
|
|
59
|
+
# Parse XML string
|
|
60
|
+
result = converter.parse_string(xml_string)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Generated Output Files
|
|
64
|
+
|
|
65
|
+
| File | Description |
|
|
66
|
+
|------|-------------|
|
|
67
|
+
| `helper_functions.py` | Database/file I/O functions plus Python equivalents for 50+ Informatica expression functions |
|
|
68
|
+
| `mapping_N.py` | One file per mapping with full transformation logic |
|
|
69
|
+
| `workflow.py` | Task orchestration with topological ordering |
|
|
70
|
+
| `config.yml` | Connection configs, source/target metadata, variables |
|
|
71
|
+
| `all_sql_queries.sql` | All extracted SQL queries (source qualifiers, lookups, pre/post SQL) |
|
|
72
|
+
| `error_log.txt` | Conversion summary, warnings, and coverage statistics |
|
|
73
|
+
|
|
74
|
+
## Supported Transformation Types
|
|
75
|
+
|
|
76
|
+
- Source Qualifier / Application Source Qualifier
|
|
77
|
+
- Expression
|
|
78
|
+
- Filter
|
|
79
|
+
- Aggregator
|
|
80
|
+
- Sorter
|
|
81
|
+
- Joiner
|
|
82
|
+
- Lookup Procedure
|
|
83
|
+
- Router
|
|
84
|
+
- Union
|
|
85
|
+
- Update Strategy
|
|
86
|
+
- Sequence Generator
|
|
87
|
+
- Normalizer
|
|
88
|
+
- Rank
|
|
89
|
+
- Stored Procedure (placeholder)
|
|
90
|
+
- Custom Transformation (placeholder)
|
|
91
|
+
- Java Transformation (placeholder)
|
|
92
|
+
- SQL Transformation
|
|
93
|
+
|
|
94
|
+
## Supported Data Libraries
|
|
95
|
+
|
|
96
|
+
Choose your preferred data manipulation library with `--data-lib`:
|
|
97
|
+
|
|
98
|
+
- **pandas** (default) — Standard Python data analysis
|
|
99
|
+
- **dask** — Parallel computing with pandas-like API
|
|
100
|
+
- **polars** — Fast DataFrame library written in Rust
|
|
101
|
+
- **vaex** — Out-of-core DataFrames for large datasets
|
|
102
|
+
- **modin** — Drop-in pandas replacement with parallel execution
|
|
103
|
+
|
|
104
|
+
## Informatica Expression Functions
|
|
105
|
+
|
|
106
|
+
The generated `helper_functions.py` includes Python equivalents for:
|
|
107
|
+
|
|
108
|
+
`IIF`, `DECODE`, `NVL`, `NVL2`, `ISNULL`, `LTRIM`, `RTRIM`, `UPPER`, `LOWER`, `SUBSTR`, `LPAD`, `RPAD`, `TO_CHAR`, `TO_DATE`, `TO_INTEGER`, `TO_BIGINT`, `TO_FLOAT`, `TO_DECIMAL`, `REPLACECHR`, `REPLACESTR`, `INSTR`, `LENGTH`, `CONCAT`, `REG_EXTRACT`, `REG_MATCH`, `REG_REPLACE`, `GET_DATE_PART`, `ADD_TO_DATE`, `IS_DATE`, `IS_NUMBER`, `IS_SPACES`, `SYSDATE`, `ERROR`, `ABORT`, and more.
|
|
109
|
+
|
|
110
|
+
## Requirements
|
|
111
|
+
|
|
112
|
+
- Python >= 3.8
|
|
113
|
+
- lxml >= 4.9.0
|
|
114
|
+
- PyYAML >= 6.0
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
informatica_python/__init__.py,sha256=r3DaPWzFarm6s91zz_VbU1rT20KK-XqXs1a1p6hpk-s,120
|
|
2
|
+
informatica_python/cli.py,sha256=79KpI8p29aATLzi5qH7H2YhMl9I8rzjwMEGzbV48UjM,2480
|
|
3
|
+
informatica_python/converter.py,sha256=6wFPmLc3JyZCgKRYFGYtdD-9SQ4_brkYdB9KIxS1GfY,10506
|
|
4
|
+
informatica_python/models.py,sha256=vZhWYVTKhYC9aqaO3YJFcczFS_mHbpncRs0ME2iLUoI,7190
|
|
5
|
+
informatica_python/parser.py,sha256=ZRcf9x0GV7WbDh5OK_CC2DpswBLIjUcvOMxlYEDcFU0,19756
|
|
6
|
+
informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
|
|
8
|
+
informatica_python/generators/error_log_gen.py,sha256=98cj2T0Hi3tdH6wQfFI5956QXpqeK11_eND9hzAKm2U,5685
|
|
9
|
+
informatica_python/generators/helper_gen.py,sha256=0i1VwAMZJwzEQWu_AXpWFVbGiGks7jMqp5eTGDZiYv8,34447
|
|
10
|
+
informatica_python/generators/mapping_gen.py,sha256=AEvA_AvoD9iQl9XPE6FuqobwUI5XjEXAboVEnegLfcY,27054
|
|
11
|
+
informatica_python/generators/sql_gen.py,sha256=rwDy-sFpcPZoetUSppK7iF02aFxYIX8PLICnK021o6E,5711
|
|
12
|
+
informatica_python/generators/workflow_gen.py,sha256=ltpDgQELPsERfqSIz1LQUFw_gs-wKqDTOMwb0IDxJpI,9402
|
|
13
|
+
informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
|
|
15
|
+
informatica_python/utils/expression_converter.py,sha256=pmTaY1R5KDVWtJiVD-8PgYIwas_rjKZ_SFhGvn91q2I,3451
|
|
16
|
+
informatica_python-1.0.0.dist-info/METADATA,sha256=LzB1RkDDa1uHI-slVt9mhlxDE82lyWDl-sy93KPP1PE,3344
|
|
17
|
+
informatica_python-1.0.0.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
|
|
18
|
+
informatica_python-1.0.0.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
|
|
19
|
+
informatica_python-1.0.0.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
|
|
20
|
+
informatica_python-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
informatica_python
|