informatica-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ import re
2
+
3
+
4
+ INFA_FUNC_MAP = {
5
+ "IIF": "iif_expr",
6
+ "DECODE": "decode_expr",
7
+ "LTRIM": "ltrim",
8
+ "RTRIM": "rtrim",
9
+ "UPPER": "upper",
10
+ "LOWER": "lower",
11
+ "SUBSTR": "substr",
12
+ "LPAD": "lpad",
13
+ "RPAD": "rpad",
14
+ "TO_CHAR": "to_char",
15
+ "TO_DATE": "to_date",
16
+ "TO_INTEGER": "to_integer",
17
+ "TO_BIGINT": "to_bigint",
18
+ "TO_FLOAT": "to_float",
19
+ "TO_DECIMAL": "to_decimal",
20
+ "SYSDATE": "current_timestamp",
21
+ "SYSTIMESTAMP": "current_timestamp",
22
+ "SESSSTARTTIME": "session_start_time",
23
+ "GET_DATE_PART": "get_date_part",
24
+ "ADD_TO_DATE": "add_to_date",
25
+ "TRUNC": "trunc",
26
+ "ROUND": "round",
27
+ "ABS": "abs",
28
+ "CEIL": "ceil",
29
+ "FLOOR": "floor",
30
+ "MOD": "mod",
31
+ "POWER": "power",
32
+ "SQRT": "sqrt",
33
+ "LENGTH": "length",
34
+ "CONCAT": "concat",
35
+ "INSTR": "instr",
36
+ "REPLACECHR": "replacechr",
37
+ "REPLACESTR": "replacestr",
38
+ "REG_EXTRACT": "reg_extract",
39
+ "REG_MATCH": "reg_match",
40
+ "REG_REPLACE": "reg_replace",
41
+ "IS_DATE": "is_date",
42
+ "IS_NUMBER": "is_number",
43
+ "IS_SPACES": "is_spaces",
44
+ "NVL": "nvl",
45
+ "NVL2": "nvl2",
46
+ "ISNULL": "isnull",
47
+ "ERROR": "raise_error",
48
+ "ABORT": "abort_func",
49
+ "LOOKUP": "lookup_func",
50
+ "MAX": "max_val",
51
+ "MIN": "min_val",
52
+ "SUM": "sum_val",
53
+ "COUNT": "count_val",
54
+ "AVG": "avg_val",
55
+ "FIRST": "first_val",
56
+ "LAST": "last_val",
57
+ "MOVINGAVG": "moving_avg",
58
+ "MOVINGSUM": "moving_sum",
59
+ "CUME": "cume",
60
+ "SETCOUNTVARIABLE": "set_count_variable",
61
+ "SETVARIABLE": "set_variable",
62
+ }
63
+
64
+
65
+ def convert_expression(expr):
66
+ if not expr or not expr.strip():
67
+ return "None"
68
+
69
+ cleaned = expr.strip()
70
+
71
+ if re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', cleaned):
72
+ return f'row["{cleaned}"]'
73
+
74
+ if re.match(r'^-?\d+(\.\d+)?$', cleaned):
75
+ return cleaned
76
+
77
+ if cleaned.startswith("'") and cleaned.endswith("'"):
78
+ return cleaned
79
+
80
+ converted = cleaned
81
+
82
+ for infa_func, py_func in INFA_FUNC_MAP.items():
83
+ pattern = re.compile(r'\b' + re.escape(infa_func) + r'\s*\(', re.IGNORECASE)
84
+ converted = pattern.sub(f'{py_func}(', converted)
85
+
86
+ converted = converted.replace("||", " + ")
87
+
88
+ converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
89
+
90
+ converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
91
+
92
+ converted = re.sub(r':(\w+)', r'row["\1"]', converted)
93
+
94
+ return converted
95
+
96
+
97
+ def convert_sql_expression(sql_expr):
98
+ if not sql_expr or not sql_expr.strip():
99
+ return ""
100
+
101
+ converted = sql_expr.strip()
102
+ converted = converted.replace("
", "\n")
103
+ converted = converted.replace("
", "\r")
104
+ converted = converted.replace("
", "\n")
105
+ converted = converted.replace("	", "\t")
106
+ converted = converted.replace("'", "'")
107
+ converted = converted.replace(""", '"')
108
+ converted = converted.replace("&", "&")
109
+ converted = converted.replace("&lt;", "<")
110
+ converted = converted.replace("&gt;", ">")
111
+
112
+ return converted
113
+
114
+
115
+ def detect_sql_dialect(sql_text):
116
+ if not sql_text:
117
+ return "generic"
118
+
119
+ sql_upper = sql_text.upper()
120
+
121
+ if "GETDATE()" in sql_upper or "ISNULL(" in sql_upper or "TOP " in sql_upper:
122
+ return "mssql"
123
+ if "NVL(" in sql_upper or "SYSDATE" in sql_upper or "ROWNUM" in sql_upper:
124
+ return "oracle"
125
+ if "NOW()" in sql_upper or "COALESCE(" in sql_upper:
126
+ return "postgresql"
127
+
128
+ return "generic"
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.4
2
+ Name: informatica-python
3
+ Version: 1.0.0
4
+ Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
+ License: MIT
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: lxml>=4.9.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7.0; extra == "dev"
12
+
13
+ # informatica-python
14
+
15
+ Convert Informatica PowerCenter workflow XML files to Python/PySpark code.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install informatica-python
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ### Command Line
26
+
27
+ ```bash
28
+ # Convert XML to Python files in a directory
29
+ informatica-python workflow.xml -o output_dir
30
+
31
+ # Convert XML to a zip file
32
+ informatica-python workflow.xml -z output.zip
33
+
34
+ # Use a different data library (pandas, dask, polars, vaex, modin)
35
+ informatica-python workflow.xml -o output_dir --data-lib polars
36
+
37
+ # Parse XML to JSON (no code generation)
38
+ informatica-python workflow.xml --json
39
+
40
+ # Save parsed JSON to file
41
+ informatica-python workflow.xml --json-file parsed.json
42
+ ```
43
+
44
+ ### Python API
45
+
46
+ ```python
47
+ from informatica_python import InformaticaConverter
48
+
49
+ # Convert XML to Python files
50
+ converter = InformaticaConverter(data_lib="pandas")
51
+ converter.convert("workflow.xml", output_dir="output")
52
+
53
+ # Convert to zip
54
+ converter.convert("workflow.xml", output_zip="output.zip")
55
+
56
+ # Parse XML to JSON dict
57
+ result = converter.parse_file("workflow.xml")
58
+
59
+ # Parse XML string
60
+ result = converter.parse_string(xml_string)
61
+ ```
62
+
63
+ ## Generated Output Files
64
+
65
+ | File | Description |
66
+ |------|-------------|
67
+ | `helper_functions.py` | Database/file I/O functions plus Python equivalents for 50+ Informatica expression functions |
68
+ | `mapping_N.py` | One file per mapping with full transformation logic |
69
+ | `workflow.py` | Task orchestration with topological ordering |
70
+ | `config.yml` | Connection configs, source/target metadata, variables |
71
+ | `all_sql_queries.sql` | All extracted SQL queries (source qualifiers, lookups, pre/post SQL) |
72
+ | `error_log.txt` | Conversion summary, warnings, and coverage statistics |
73
+
74
+ ## Supported Transformation Types
75
+
76
+ - Source Qualifier / Application Source Qualifier
77
+ - Expression
78
+ - Filter
79
+ - Aggregator
80
+ - Sorter
81
+ - Joiner
82
+ - Lookup Procedure
83
+ - Router
84
+ - Union
85
+ - Update Strategy
86
+ - Sequence Generator
87
+ - Normalizer
88
+ - Rank
89
+ - Stored Procedure (placeholder)
90
+ - Custom Transformation (placeholder)
91
+ - Java Transformation (placeholder)
92
+ - SQL Transformation
93
+
94
+ ## Supported Data Libraries
95
+
96
+ Choose your preferred data manipulation library with `--data-lib`:
97
+
98
+ - **pandas** (default) — Standard Python data analysis
99
+ - **dask** — Parallel computing with pandas-like API
100
+ - **polars** — Fast DataFrame library written in Rust
101
+ - **vaex** — Out-of-core DataFrames for large datasets
102
+ - **modin** — Drop-in pandas replacement with parallel execution
103
+
104
+ ## Informatica Expression Functions
105
+
106
+ The generated `helper_functions.py` includes Python equivalents for:
107
+
108
+ `IIF`, `DECODE`, `NVL`, `NVL2`, `ISNULL`, `LTRIM`, `RTRIM`, `UPPER`, `LOWER`, `SUBSTR`, `LPAD`, `RPAD`, `TO_CHAR`, `TO_DATE`, `TO_INTEGER`, `TO_BIGINT`, `TO_FLOAT`, `TO_DECIMAL`, `REPLACECHR`, `REPLACESTR`, `INSTR`, `LENGTH`, `CONCAT`, `REG_EXTRACT`, `REG_MATCH`, `REG_REPLACE`, `GET_DATE_PART`, `ADD_TO_DATE`, `IS_DATE`, `IS_NUMBER`, `IS_SPACES`, `SYSDATE`, `ERROR`, `ABORT`, and more.
109
+
110
+ ## Requirements
111
+
112
+ - Python >= 3.8
113
+ - lxml >= 4.9.0
114
+ - PyYAML >= 6.0
115
+
116
+ ## License
117
+
118
+ MIT
@@ -0,0 +1,20 @@
1
+ informatica_python/__init__.py,sha256=r3DaPWzFarm6s91zz_VbU1rT20KK-XqXs1a1p6hpk-s,120
2
+ informatica_python/cli.py,sha256=79KpI8p29aATLzi5qH7H2YhMl9I8rzjwMEGzbV48UjM,2480
3
+ informatica_python/converter.py,sha256=6wFPmLc3JyZCgKRYFGYtdD-9SQ4_brkYdB9KIxS1GfY,10506
4
+ informatica_python/models.py,sha256=vZhWYVTKhYC9aqaO3YJFcczFS_mHbpncRs0ME2iLUoI,7190
5
+ informatica_python/parser.py,sha256=ZRcf9x0GV7WbDh5OK_CC2DpswBLIjUcvOMxlYEDcFU0,19756
6
+ informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
8
+ informatica_python/generators/error_log_gen.py,sha256=98cj2T0Hi3tdH6wQfFI5956QXpqeK11_eND9hzAKm2U,5685
9
+ informatica_python/generators/helper_gen.py,sha256=0i1VwAMZJwzEQWu_AXpWFVbGiGks7jMqp5eTGDZiYv8,34447
10
+ informatica_python/generators/mapping_gen.py,sha256=AEvA_AvoD9iQl9XPE6FuqobwUI5XjEXAboVEnegLfcY,27054
11
+ informatica_python/generators/sql_gen.py,sha256=rwDy-sFpcPZoetUSppK7iF02aFxYIX8PLICnK021o6E,5711
12
+ informatica_python/generators/workflow_gen.py,sha256=ltpDgQELPsERfqSIz1LQUFw_gs-wKqDTOMwb0IDxJpI,9402
13
+ informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
15
+ informatica_python/utils/expression_converter.py,sha256=pmTaY1R5KDVWtJiVD-8PgYIwas_rjKZ_SFhGvn91q2I,3451
16
+ informatica_python-1.0.0.dist-info/METADATA,sha256=LzB1RkDDa1uHI-slVt9mhlxDE82lyWDl-sy93KPP1PE,3344
17
+ informatica_python-1.0.0.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
18
+ informatica_python-1.0.0.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
19
+ informatica_python-1.0.0.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
20
+ informatica_python-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0.post0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ informatica-python = informatica_python.cli:main
@@ -0,0 +1 @@
1
+ informatica_python