informatica-python 1.5.1__tar.gz → 1.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.5.1 → informatica_python-1.6.0}/PKG-INFO +1 -1
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/__init__.py +1 -1
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/cli.py +6 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/converter.py +11 -5
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/error_log_gen.py +117 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/mapping_gen.py +81 -31
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/sql_gen.py +43 -62
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/expression_converter.py +41 -5
- informatica_python-1.6.0/informatica_python/utils/sql_dialect.py +174 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/SOURCES.txt +1 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/pyproject.toml +1 -1
- {informatica_python-1.5.1 → informatica_python-1.6.0}/tests/test_integration.py +275 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/LICENSE +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/README.md +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/helper_gen.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/workflow_gen.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/models.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/lib_adapters.py +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/setup.cfg +0 -0
- {informatica_python-1.5.1 → informatica_python-1.6.0}/tests/test_converter.py +0 -0
|
@@ -46,6 +46,11 @@ def main():
|
|
|
46
46
|
default=None,
|
|
47
47
|
help="Path to Informatica .param file for variable substitution",
|
|
48
48
|
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--validate-casts",
|
|
51
|
+
action="store_true",
|
|
52
|
+
help="Generate data quality validation code that logs warnings on type coercion",
|
|
53
|
+
)
|
|
49
54
|
|
|
50
55
|
args = parser.parse_args()
|
|
51
56
|
|
|
@@ -67,6 +72,7 @@ def main():
|
|
|
67
72
|
output_dir=args.output,
|
|
68
73
|
output_zip=args.zip,
|
|
69
74
|
param_file=args.param_file,
|
|
75
|
+
validate_casts=args.validate_casts,
|
|
70
76
|
)
|
|
71
77
|
print(f"Conversion complete! Output: {output_path}")
|
|
72
78
|
print(f"Files generated:")
|
|
@@ -34,7 +34,8 @@ class InformaticaConverter:
|
|
|
34
34
|
|
|
35
35
|
def convert(self, file_path: str, output_dir: str = "output",
|
|
36
36
|
output_zip: Optional[str] = None,
|
|
37
|
-
param_file: Optional[str] = None
|
|
37
|
+
param_file: Optional[str] = None,
|
|
38
|
+
validate_casts: bool = False) -> str:
|
|
38
39
|
self.powermart = self.parser.parse_file(file_path)
|
|
39
40
|
|
|
40
41
|
if not self.powermart.repositories:
|
|
@@ -48,7 +49,7 @@ class InformaticaConverter:
|
|
|
48
49
|
raise ValueError("No folder found in XML file")
|
|
49
50
|
|
|
50
51
|
if len(all_folders) == 1:
|
|
51
|
-
return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
|
|
52
|
+
return self._convert_folder(all_folders[0], output_dir, output_zip, param_file, validate_casts)
|
|
52
53
|
|
|
53
54
|
result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
|
|
54
55
|
for folder in all_folders:
|
|
@@ -57,7 +58,7 @@ class InformaticaConverter:
|
|
|
57
58
|
if output_zip:
|
|
58
59
|
base, ext = os.path.splitext(output_zip)
|
|
59
60
|
folder_zip = f"{base}_{folder.name}{ext}"
|
|
60
|
-
self._convert_folder(folder, folder_dir, folder_zip, param_file)
|
|
61
|
+
self._convert_folder(folder, folder_dir, folder_zip, param_file, validate_casts)
|
|
61
62
|
return result_path
|
|
62
63
|
|
|
63
64
|
def convert_string(self, xml_string: str, output_dir: str = "output",
|
|
@@ -89,13 +90,18 @@ class InformaticaConverter:
|
|
|
89
90
|
|
|
90
91
|
def _convert_folder(self, folder: FolderDef, output_dir: str,
|
|
91
92
|
output_zip: Optional[str] = None,
|
|
92
|
-
param_file: Optional[str] = None
|
|
93
|
+
param_file: Optional[str] = None,
|
|
94
|
+
validate_casts: bool = False) -> str:
|
|
95
|
+
if param_file:
|
|
96
|
+
from informatica_python.utils.expression_converter import parse_param_file
|
|
97
|
+
parse_param_file(param_file)
|
|
98
|
+
|
|
93
99
|
files = {}
|
|
94
100
|
|
|
95
101
|
files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
|
|
96
102
|
|
|
97
103
|
for i, mapping in enumerate(folder.mappings, 1):
|
|
98
|
-
code = generate_mapping_code(mapping, folder, self.data_lib, i)
|
|
104
|
+
code = generate_mapping_code(mapping, folder, self.data_lib, i, validate_casts=validate_casts)
|
|
99
105
|
files[f"mapping_{i}.py"] = code
|
|
100
106
|
|
|
101
107
|
files["workflow.py"] = generate_workflow_code(folder)
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/error_log_gen.py
RENAMED
|
@@ -222,6 +222,123 @@ def generate_error_log(folder: FolderDef, parser_errors=None, parser_warnings=No
|
|
|
222
222
|
for part in sti.partitions:
|
|
223
223
|
lines.append(f"[INFO] Session '{session.name}': Partition '{part.name}' (type={part.partition_type}) on '{sti.instance_name}'")
|
|
224
224
|
|
|
225
|
+
lines.append("")
|
|
226
|
+
lines.append("-" * 70)
|
|
227
|
+
lines.append("UNSUPPORTED TRANSFORMS (Require Manual Review)")
|
|
228
|
+
lines.append("-" * 70)
|
|
229
|
+
lines.append("")
|
|
230
|
+
|
|
231
|
+
unsupported_types = {
|
|
232
|
+
"Custom Transformation", "Java", "Stored Procedure",
|
|
233
|
+
"External Procedure", "HTTP Transformation",
|
|
234
|
+
"Web Service Consumer", "SQL",
|
|
235
|
+
}
|
|
236
|
+
skipped_items = []
|
|
237
|
+
for mapping in folder.mappings:
|
|
238
|
+
for tx in mapping.transformations:
|
|
239
|
+
if tx.type in unsupported_types:
|
|
240
|
+
skipped_attrs = []
|
|
241
|
+
for attr in tx.attributes:
|
|
242
|
+
if attr.value and attr.value.strip():
|
|
243
|
+
skipped_attrs.append(attr.name)
|
|
244
|
+
skipped_items.append({
|
|
245
|
+
"mapping": mapping.name,
|
|
246
|
+
"transform": tx.name,
|
|
247
|
+
"type": tx.type,
|
|
248
|
+
"field_count": len(tx.fields),
|
|
249
|
+
"skipped_attrs": skipped_attrs,
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
if skipped_items:
|
|
253
|
+
for item in skipped_items:
|
|
254
|
+
lines.append(f" Mapping: {item['mapping']}")
|
|
255
|
+
lines.append(f" Transform: {item['transform']} (type={item['type']}, {item['field_count']} fields)")
|
|
256
|
+
if item['skipped_attrs']:
|
|
257
|
+
lines.append(f" Skipped attributes: {', '.join(item['skipped_attrs'])}")
|
|
258
|
+
lines.append("")
|
|
259
|
+
else:
|
|
260
|
+
lines.append(" None - all transformations are supported")
|
|
261
|
+
lines.append("")
|
|
262
|
+
|
|
263
|
+
lines.append("-" * 70)
|
|
264
|
+
lines.append("UNMAPPED PORTS (Fields with no connectors)")
|
|
265
|
+
lines.append("-" * 70)
|
|
266
|
+
lines.append("")
|
|
267
|
+
|
|
268
|
+
for mapping in folder.mappings:
|
|
269
|
+
connected_fields = set()
|
|
270
|
+
for conn in mapping.connectors:
|
|
271
|
+
connected_fields.add((conn.from_instance, conn.from_field))
|
|
272
|
+
connected_fields.add((conn.to_instance, conn.to_field))
|
|
273
|
+
|
|
274
|
+
unmapped = []
|
|
275
|
+
for tx in mapping.transformations:
|
|
276
|
+
for fld in tx.fields:
|
|
277
|
+
pt = (fld.porttype or "").upper()
|
|
278
|
+
if "OUTPUT" in pt or "INPUT/OUTPUT" in pt:
|
|
279
|
+
if (tx.name, fld.name) not in connected_fields:
|
|
280
|
+
unmapped.append((tx.name, tx.type, fld.name, fld.porttype or ""))
|
|
281
|
+
|
|
282
|
+
if unmapped:
|
|
283
|
+
lines.append(f" Mapping: {mapping.name}")
|
|
284
|
+
for tx_name, tx_type, fld_name, port_type in unmapped:
|
|
285
|
+
lines.append(f" {tx_name} ({tx_type}): {fld_name} [{port_type}]")
|
|
286
|
+
lines.append("")
|
|
287
|
+
|
|
288
|
+
lines.append("-" * 70)
|
|
289
|
+
lines.append("UNSUPPORTED EXPRESSION FUNCTIONS")
|
|
290
|
+
lines.append("-" * 70)
|
|
291
|
+
lines.append("")
|
|
292
|
+
|
|
293
|
+
import re
|
|
294
|
+
known_functions = {
|
|
295
|
+
"IIF", "DECODE", "CHOOSE", "IN", "LTRIM", "RTRIM", "TRIM",
|
|
296
|
+
"UPPER", "LOWER", "INITCAP", "SUBSTR", "LPAD", "RPAD",
|
|
297
|
+
"REVERSE", "CHR", "ASCII", "LEFT", "RIGHT", "INDEXOF",
|
|
298
|
+
"TO_CHAR", "TO_DATE", "TO_TIMESTAMP", "TO_INTEGER", "TO_BIGINT",
|
|
299
|
+
"TO_FLOAT", "TO_DECIMAL", "CAST", "SYSDATE", "SYSTIMESTAMP",
|
|
300
|
+
"GET_DATE_PART", "SET_DATE_PART", "ADD_TO_DATE", "DATE_DIFF",
|
|
301
|
+
"DATE_COMPARE", "LAST_DAY", "MAKE_DATE_TIME", "TRUNC", "ROUND",
|
|
302
|
+
"ABS", "CEIL", "CEILING", "FLOOR", "MOD", "POWER", "SQRT",
|
|
303
|
+
"LOG", "EXP", "SIGN", "LENGTH", "CONCAT", "INSTR", "REPLACE",
|
|
304
|
+
"REPLACESTR", "REPLACECHR", "REG_EXTRACT", "REG_REPLACE",
|
|
305
|
+
"REG_MATCH", "IS_SPACES", "IS_NUMBER", "IS_DATE", "NVL",
|
|
306
|
+
"NVL2", "ISNULL", "MAX", "MIN", "SUM", "AVG", "COUNT",
|
|
307
|
+
"FIRST", "LAST", "MEDIAN", "PERCENTILE", "VARIANCE", "STDDEV",
|
|
308
|
+
"LOOKUP", "ERROR", "ABORT", "SESSSTARTTIME",
|
|
309
|
+
"METAPHONE", "SOUNDEX", "COMPRESS", "DECOMPRESS",
|
|
310
|
+
"RANK", "MOVINGAVG", "MOVINGSUM", "CUME",
|
|
311
|
+
}
|
|
312
|
+
func_pattern = re.compile(r'\b([A-Z_][A-Z0-9_]*)\s*\(', re.IGNORECASE)
|
|
313
|
+
unsupported_funcs = {}
|
|
314
|
+
for mapping in folder.mappings:
|
|
315
|
+
for tx in mapping.transformations:
|
|
316
|
+
for fld in tx.fields:
|
|
317
|
+
if not fld.expression:
|
|
318
|
+
continue
|
|
319
|
+
for m in func_pattern.finditer(fld.expression):
|
|
320
|
+
func_name = m.group(1).upper()
|
|
321
|
+
if func_name not in known_functions:
|
|
322
|
+
key = func_name
|
|
323
|
+
if key not in unsupported_funcs:
|
|
324
|
+
unsupported_funcs[key] = []
|
|
325
|
+
unsupported_funcs[key].append(
|
|
326
|
+
f"{mapping.name} > {tx.name} > {fld.name}"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if unsupported_funcs:
|
|
330
|
+
for func_name in sorted(unsupported_funcs.keys()):
|
|
331
|
+
locations = unsupported_funcs[func_name]
|
|
332
|
+
lines.append(f" {func_name}() — found in {len(locations)} field(s):")
|
|
333
|
+
for loc in locations[:5]:
|
|
334
|
+
lines.append(f" - {loc}")
|
|
335
|
+
if len(locations) > 5:
|
|
336
|
+
lines.append(f" ... and {len(locations) - 5} more")
|
|
337
|
+
lines.append("")
|
|
338
|
+
else:
|
|
339
|
+
lines.append(" None - all expression functions are recognized")
|
|
340
|
+
lines.append("")
|
|
341
|
+
|
|
225
342
|
lines.append("")
|
|
226
343
|
lines.append("-" * 70)
|
|
227
344
|
lines.append("PARSED XML TAG COVERAGE")
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -16,6 +16,62 @@ from informatica_python.utils.lib_adapters import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def _expand_mapplet_recursive(mapplet, mapplet_map, prefix, depth=0, max_depth=10, visited=None):
|
|
20
|
+
if visited is None:
|
|
21
|
+
visited = set()
|
|
22
|
+
if depth > max_depth:
|
|
23
|
+
return [], []
|
|
24
|
+
if mapplet.name in visited:
|
|
25
|
+
return [], []
|
|
26
|
+
visited.add(mapplet.name)
|
|
27
|
+
|
|
28
|
+
transforms = []
|
|
29
|
+
connectors = []
|
|
30
|
+
tx_names = {t.name for t in mapplet.transformations}
|
|
31
|
+
|
|
32
|
+
for tx in mapplet.transformations:
|
|
33
|
+
inlined = TransformationDef(
|
|
34
|
+
name=f"{prefix}__{tx.name}",
|
|
35
|
+
type=tx.type,
|
|
36
|
+
description=tx.description,
|
|
37
|
+
reusable=tx.reusable,
|
|
38
|
+
fields=list(tx.fields),
|
|
39
|
+
attributes=list(tx.attributes),
|
|
40
|
+
groups=list(tx.groups),
|
|
41
|
+
metadata_extensions=list(tx.metadata_extensions),
|
|
42
|
+
)
|
|
43
|
+
transforms.append(inlined)
|
|
44
|
+
|
|
45
|
+
for conn in mapplet.connectors:
|
|
46
|
+
from informatica_python.models import ConnectorDef
|
|
47
|
+
new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in tx_names else conn.from_instance
|
|
48
|
+
new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in tx_names else conn.to_instance
|
|
49
|
+
connectors.append(ConnectorDef(
|
|
50
|
+
from_instance=new_from,
|
|
51
|
+
from_field=conn.from_field,
|
|
52
|
+
from_instance_type=conn.from_instance_type,
|
|
53
|
+
to_instance=new_to,
|
|
54
|
+
to_field=conn.to_field,
|
|
55
|
+
to_instance_type=conn.to_instance_type,
|
|
56
|
+
))
|
|
57
|
+
|
|
58
|
+
for inst in getattr(mapplet, 'instances', []):
|
|
59
|
+
if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
|
|
60
|
+
nested_name = inst.transformation_name or inst.name
|
|
61
|
+
nested_mapplet = mapplet_map.get(nested_name)
|
|
62
|
+
if not nested_mapplet:
|
|
63
|
+
continue
|
|
64
|
+
nested_prefix = f"{prefix}__{inst.name}"
|
|
65
|
+
nested_tx, nested_conn = _expand_mapplet_recursive(
|
|
66
|
+
nested_mapplet, mapplet_map, nested_prefix,
|
|
67
|
+
depth + 1, max_depth, visited.copy()
|
|
68
|
+
)
|
|
69
|
+
transforms.extend(nested_tx)
|
|
70
|
+
connectors.extend(nested_conn)
|
|
71
|
+
|
|
72
|
+
return transforms, connectors
|
|
73
|
+
|
|
74
|
+
|
|
19
75
|
def _inline_mapplets(mapping, folder):
|
|
20
76
|
mapplet_map = {m.name: m for m in folder.mapplets}
|
|
21
77
|
extra_transforms = []
|
|
@@ -31,32 +87,11 @@ def _inline_mapplets(mapping, folder):
|
|
|
31
87
|
mapplet_instances.add(inst.name)
|
|
32
88
|
prefix = inst.name
|
|
33
89
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
reusable=tx.reusable,
|
|
40
|
-
fields=list(tx.fields),
|
|
41
|
-
attributes=list(tx.attributes),
|
|
42
|
-
groups=list(tx.groups),
|
|
43
|
-
metadata_extensions=list(tx.metadata_extensions),
|
|
44
|
-
)
|
|
45
|
-
extra_transforms.append(inlined)
|
|
46
|
-
|
|
47
|
-
for conn in mapplet.connectors:
|
|
48
|
-
from informatica_python.models import ConnectorDef
|
|
49
|
-
new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
|
|
50
|
-
new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
|
|
51
|
-
inlined_conn = ConnectorDef(
|
|
52
|
-
from_instance=new_from,
|
|
53
|
-
from_field=conn.from_field,
|
|
54
|
-
from_instance_type=conn.from_instance_type,
|
|
55
|
-
to_instance=new_to,
|
|
56
|
-
to_field=conn.to_field,
|
|
57
|
-
to_instance_type=conn.to_instance_type,
|
|
58
|
-
)
|
|
59
|
-
extra_connectors.append(inlined_conn)
|
|
90
|
+
nested_tx, nested_conn = _expand_mapplet_recursive(
|
|
91
|
+
mapplet, mapplet_map, prefix
|
|
92
|
+
)
|
|
93
|
+
extra_transforms.extend(nested_tx)
|
|
94
|
+
extra_connectors.extend(nested_conn)
|
|
60
95
|
|
|
61
96
|
rewired_connectors = []
|
|
62
97
|
mapplet_internal_names = set()
|
|
@@ -181,7 +216,8 @@ def _build_session_conn_overrides(mapping, folder):
|
|
|
181
216
|
|
|
182
217
|
|
|
183
218
|
def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
184
|
-
data_lib: str = "pandas", mapping_index: int = 1
|
|
219
|
+
data_lib: str = "pandas", mapping_index: int = 1,
|
|
220
|
+
validate_casts: bool = False) -> str:
|
|
185
221
|
lines = []
|
|
186
222
|
lines.append('"""')
|
|
187
223
|
lines.append(f"Mapping: {mapping.name}")
|
|
@@ -275,7 +311,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
275
311
|
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map, data_lib)
|
|
276
312
|
|
|
277
313
|
for tgt_name, tgt_def in target_map.items():
|
|
278
|
-
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
|
|
314
|
+
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides, validate_casts=validate_casts)
|
|
279
315
|
|
|
280
316
|
lines.append("")
|
|
281
317
|
lines.append(f" log_mapping_end('{mapping.name}', start_time)")
|
|
@@ -287,6 +323,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
287
323
|
lines.append(" _parser = _ap.ArgumentParser()")
|
|
288
324
|
lines.append(" _parser.add_argument('--param-file', default=None)")
|
|
289
325
|
lines.append(" _parser.add_argument('--config', default='config.yml')")
|
|
326
|
+
lines.append(" _parser.add_argument('--validate-casts', action='store_true', help='Log data quality warnings on type coercion')")
|
|
290
327
|
lines.append(" _args = _parser.parse_args()")
|
|
291
328
|
lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
|
|
292
329
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
@@ -1181,7 +1218,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1181
1218
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1182
1219
|
|
|
1183
1220
|
|
|
1184
|
-
def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
|
|
1221
|
+
def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None, validate_casts=False):
|
|
1185
1222
|
tgt_safe = _safe_name(tgt_name)
|
|
1186
1223
|
|
|
1187
1224
|
to_conns = connector_graph.get("to", {}).get(tgt_name, [])
|
|
@@ -1213,7 +1250,7 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1213
1250
|
else:
|
|
1214
1251
|
lines.append(f" df_target_{tgt_safe} = {input_df}")
|
|
1215
1252
|
|
|
1216
|
-
_emit_type_casting(lines, tgt_safe, tgt_def)
|
|
1253
|
+
_emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=validate_casts)
|
|
1217
1254
|
|
|
1218
1255
|
tgt_override = (session_overrides or {}).get(tgt_name, {})
|
|
1219
1256
|
tgt_conn = tgt_override.get("connection_name")
|
|
@@ -1269,7 +1306,7 @@ CAST_MAP = {
|
|
|
1269
1306
|
}
|
|
1270
1307
|
|
|
1271
1308
|
|
|
1272
|
-
def _emit_type_casting(lines, tgt_safe, tgt_def):
|
|
1309
|
+
def _emit_type_casting(lines, tgt_safe, tgt_def, validate_casts=False):
|
|
1273
1310
|
cast_ops = []
|
|
1274
1311
|
for fld in tgt_def.fields:
|
|
1275
1312
|
dt_key = fld.datatype.lower().strip()
|
|
@@ -1289,8 +1326,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
|
|
|
1289
1326
|
return
|
|
1290
1327
|
|
|
1291
1328
|
lines.append(f" # Type casting for target fields")
|
|
1329
|
+
if validate_casts:
|
|
1330
|
+
lines.append(f" _cast_warnings = []")
|
|
1292
1331
|
for col_name, cast_type, pd_dtype, nullable in cast_ops:
|
|
1293
1332
|
lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
|
|
1333
|
+
if validate_casts:
|
|
1334
|
+
lines.append(f" _pre_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
|
|
1294
1335
|
if cast_type == "datetime":
|
|
1295
1336
|
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
|
|
1296
1337
|
elif cast_type == "int":
|
|
@@ -1302,3 +1343,12 @@ def _emit_type_casting(lines, tgt_safe, tgt_def):
|
|
|
1302
1343
|
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
|
|
1303
1344
|
elif cast_type == "bool":
|
|
1304
1345
|
lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")
|
|
1346
|
+
if validate_casts:
|
|
1347
|
+
lines.append(f" _post_null_{_safe_name(col_name)} = df_target_{tgt_safe}['{col_name}'].isna().sum()")
|
|
1348
|
+
lines.append(f" _coerced_{_safe_name(col_name)} = int(_post_null_{_safe_name(col_name)} - _pre_null_{_safe_name(col_name)})")
|
|
1349
|
+
lines.append(f" if _coerced_{_safe_name(col_name)} > 0:")
|
|
1350
|
+
lines.append(f" _cast_warnings.append('{col_name}: {{}} values coerced to null during {cast_type} cast'.format(_coerced_{_safe_name(col_name)}))")
|
|
1351
|
+
lines.append(f" logger.warning('Column {col_name}: %d values coerced to null during {cast_type} cast', _coerced_{_safe_name(col_name)})")
|
|
1352
|
+
if validate_casts:
|
|
1353
|
+
lines.append(f" if _cast_warnings:")
|
|
1354
|
+
lines.append(f" logger.warning('Data quality warnings for target {tgt_safe}: %s', '; '.join(_cast_warnings))")
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/sql_gen.py
RENAMED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
from informatica_python.models import FolderDef
|
|
2
2
|
from informatica_python.utils.expression_converter import convert_sql_expression, detect_sql_dialect
|
|
3
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
SQL_ATTR_NAMES = {
|
|
7
|
+
"Sql Query": "Sql Query",
|
|
8
|
+
"Lookup Sql Override": "Lookup SQL Override",
|
|
9
|
+
"Pre SQL": "Pre-SQL",
|
|
10
|
+
"Post SQL": "Post-SQL",
|
|
11
|
+
"User Defined Join": "User Defined Join",
|
|
12
|
+
"Source Filter": "Source Filter",
|
|
13
|
+
}
|
|
3
14
|
|
|
4
15
|
|
|
5
16
|
def generate_sql_file(folder: FolderDef) -> str:
|
|
@@ -7,82 +18,34 @@ def generate_sql_file(folder: FolderDef) -> str:
|
|
|
7
18
|
lines.append("-- ============================================================")
|
|
8
19
|
lines.append(f"-- All SQL Queries extracted from folder: {folder.name}")
|
|
9
20
|
lines.append("-- Auto-generated by informatica-python")
|
|
21
|
+
lines.append("-- Includes ANSI SQL translations where dialect was detected")
|
|
10
22
|
lines.append("-- ============================================================")
|
|
11
23
|
lines.append("")
|
|
12
24
|
|
|
13
25
|
sql_count = 0
|
|
26
|
+
translated_count = 0
|
|
14
27
|
|
|
15
28
|
for mapping in folder.mappings:
|
|
16
29
|
mapping_sqls = []
|
|
17
30
|
|
|
18
31
|
for tx in mapping.transformations:
|
|
19
32
|
for attr in tx.attributes:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
dialect = detect_sql_dialect(sql)
|
|
23
|
-
mapping_sqls.append({
|
|
24
|
-
"transformation": tx.name,
|
|
25
|
-
"type": tx.type,
|
|
26
|
-
"attribute": "Sql Query",
|
|
27
|
-
"sql": sql,
|
|
28
|
-
"dialect": dialect,
|
|
29
|
-
})
|
|
30
|
-
sql_count += 1
|
|
31
|
-
|
|
32
|
-
elif attr.name == "Lookup Sql Override" and attr.value and attr.value.strip():
|
|
33
|
+
display_name = SQL_ATTR_NAMES.get(attr.name)
|
|
34
|
+
if display_name and attr.value and attr.value.strip():
|
|
33
35
|
sql = convert_sql_expression(attr.value)
|
|
34
36
|
dialect = detect_sql_dialect(sql)
|
|
35
|
-
|
|
37
|
+
ansi = translate_sql(sql, source_dialect=dialect.lower())
|
|
38
|
+
entry = {
|
|
36
39
|
"transformation": tx.name,
|
|
37
40
|
"type": tx.type,
|
|
38
|
-
"attribute":
|
|
41
|
+
"attribute": display_name,
|
|
39
42
|
"sql": sql,
|
|
40
43
|
"dialect": dialect,
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
mapping_sqls.append({
|
|
47
|
-
"transformation": tx.name,
|
|
48
|
-
"type": tx.type,
|
|
49
|
-
"attribute": "Pre-SQL",
|
|
50
|
-
"sql": sql,
|
|
51
|
-
"dialect": detect_sql_dialect(sql),
|
|
52
|
-
})
|
|
53
|
-
sql_count += 1
|
|
54
|
-
|
|
55
|
-
elif attr.name == "Post SQL" and attr.value and attr.value.strip():
|
|
56
|
-
sql = convert_sql_expression(attr.value)
|
|
57
|
-
mapping_sqls.append({
|
|
58
|
-
"transformation": tx.name,
|
|
59
|
-
"type": tx.type,
|
|
60
|
-
"attribute": "Post-SQL",
|
|
61
|
-
"sql": sql,
|
|
62
|
-
"dialect": detect_sql_dialect(sql),
|
|
63
|
-
})
|
|
64
|
-
sql_count += 1
|
|
65
|
-
|
|
66
|
-
elif attr.name == "User Defined Join" and attr.value and attr.value.strip():
|
|
67
|
-
sql = convert_sql_expression(attr.value)
|
|
68
|
-
mapping_sqls.append({
|
|
69
|
-
"transformation": tx.name,
|
|
70
|
-
"type": tx.type,
|
|
71
|
-
"attribute": "User Defined Join",
|
|
72
|
-
"sql": sql,
|
|
73
|
-
"dialect": detect_sql_dialect(sql),
|
|
74
|
-
})
|
|
75
|
-
sql_count += 1
|
|
76
|
-
|
|
77
|
-
elif attr.name == "Source Filter" and attr.value and attr.value.strip():
|
|
78
|
-
sql = convert_sql_expression(attr.value)
|
|
79
|
-
mapping_sqls.append({
|
|
80
|
-
"transformation": tx.name,
|
|
81
|
-
"type": tx.type,
|
|
82
|
-
"attribute": "Source Filter",
|
|
83
|
-
"sql": sql,
|
|
84
|
-
"dialect": detect_sql_dialect(sql),
|
|
85
|
-
})
|
|
44
|
+
}
|
|
45
|
+
if ansi.strip() != sql.strip():
|
|
46
|
+
entry["translated"] = ansi
|
|
47
|
+
translated_count += 1
|
|
48
|
+
mapping_sqls.append(entry)
|
|
86
49
|
sql_count += 1
|
|
87
50
|
|
|
88
51
|
if mapping_sqls:
|
|
@@ -98,6 +61,10 @@ def generate_sql_file(folder: FolderDef) -> str:
|
|
|
98
61
|
lines.append(f"-- ----")
|
|
99
62
|
lines.append(sq["sql"].rstrip())
|
|
100
63
|
lines.append("")
|
|
64
|
+
if "translated" in sq:
|
|
65
|
+
lines.append(f"-- >> ANSI SQL Translation:")
|
|
66
|
+
lines.append(sq["translated"].rstrip())
|
|
67
|
+
lines.append("")
|
|
101
68
|
lines.append("")
|
|
102
69
|
|
|
103
70
|
for session in folder.sessions:
|
|
@@ -106,11 +73,18 @@ def generate_sql_file(folder: FolderDef) -> str:
|
|
|
106
73
|
for attr in sti.attributes:
|
|
107
74
|
if "sql" in attr.name.lower() and attr.value and attr.value.strip():
|
|
108
75
|
sql = convert_sql_expression(attr.value)
|
|
109
|
-
|
|
76
|
+
dialect = detect_sql_dialect(sql)
|
|
77
|
+
ansi = translate_sql(sql, source_dialect=dialect.lower())
|
|
78
|
+
entry = {
|
|
110
79
|
"instance": sti.instance_name,
|
|
111
80
|
"attribute": attr.name,
|
|
112
81
|
"sql": sql,
|
|
113
|
-
|
|
82
|
+
"dialect": dialect,
|
|
83
|
+
}
|
|
84
|
+
if ansi.strip() != sql.strip():
|
|
85
|
+
entry["translated"] = ansi
|
|
86
|
+
translated_count += 1
|
|
87
|
+
session_sqls.append(entry)
|
|
114
88
|
sql_count += 1
|
|
115
89
|
|
|
116
90
|
if session_sqls:
|
|
@@ -121,12 +95,19 @@ def generate_sql_file(folder: FolderDef) -> str:
|
|
|
121
95
|
for sq in session_sqls:
|
|
122
96
|
lines.append(f"-- Instance: {sq['instance']}")
|
|
123
97
|
lines.append(f"-- Attribute: {sq['attribute']}")
|
|
98
|
+
lines.append(f"-- Detected dialect: {sq['dialect']}")
|
|
124
99
|
lines.append(f"-- ----")
|
|
125
100
|
lines.append(sq["sql"].rstrip())
|
|
126
101
|
lines.append("")
|
|
102
|
+
if "translated" in sq:
|
|
103
|
+
lines.append(f"-- >> ANSI SQL Translation:")
|
|
104
|
+
lines.append(sq["translated"].rstrip())
|
|
105
|
+
lines.append("")
|
|
127
106
|
lines.append("")
|
|
128
107
|
|
|
129
108
|
lines.append(f"-- Total SQL queries extracted: {sql_count}")
|
|
109
|
+
if translated_count:
|
|
110
|
+
lines.append(f"-- SQL queries with dialect translation: {translated_count}")
|
|
130
111
|
lines.append("")
|
|
131
112
|
|
|
132
113
|
return "\n".join(lines)
|
|
@@ -295,8 +295,8 @@ def _vectorize_value(val, df_var="df"):
|
|
|
295
295
|
return val
|
|
296
296
|
|
|
297
297
|
|
|
298
|
-
def
|
|
299
|
-
c =
|
|
298
|
+
def _vectorize_simple(part, df_var):
|
|
299
|
+
c = part.strip()
|
|
300
300
|
|
|
301
301
|
c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
|
|
302
302
|
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
@@ -305,9 +305,6 @@ def _vectorize_condition(cond, df_var="df"):
|
|
|
305
305
|
c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
|
|
306
306
|
lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
|
|
307
307
|
|
|
308
|
-
c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
|
|
309
|
-
c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
|
|
310
|
-
c = re.sub(r'\bNOT\s+', ' ~', c, flags=re.IGNORECASE)
|
|
311
308
|
c = re.sub(r'<>', '!=', c)
|
|
312
309
|
c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
|
|
313
310
|
|
|
@@ -322,6 +319,45 @@ def _vectorize_condition(cond, df_var="df"):
|
|
|
322
319
|
return c
|
|
323
320
|
|
|
324
321
|
|
|
322
|
+
def _vectorize_condition(cond, df_var="df"):
|
|
323
|
+
c = cond.strip()
|
|
324
|
+
|
|
325
|
+
tokens = re.split(r'\b(AND|OR)\b', c, flags=re.IGNORECASE)
|
|
326
|
+
|
|
327
|
+
parts = []
|
|
328
|
+
ops = []
|
|
329
|
+
for tok in tokens:
|
|
330
|
+
stripped = tok.strip()
|
|
331
|
+
if stripped.upper() in ('AND', 'OR'):
|
|
332
|
+
ops.append('&' if stripped.upper() == 'AND' else '|')
|
|
333
|
+
elif stripped:
|
|
334
|
+
parts.append(stripped)
|
|
335
|
+
|
|
336
|
+
if not parts:
|
|
337
|
+
return "True"
|
|
338
|
+
|
|
339
|
+
vectorized = []
|
|
340
|
+
for part in parts:
|
|
341
|
+
negate = False
|
|
342
|
+
inner = part.strip()
|
|
343
|
+
if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
|
|
344
|
+
negate = True
|
|
345
|
+
inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
|
|
346
|
+
v = _vectorize_simple(inner, df_var)
|
|
347
|
+
if negate:
|
|
348
|
+
v = f"~({v})"
|
|
349
|
+
vectorized.append(v)
|
|
350
|
+
|
|
351
|
+
if len(vectorized) == 1:
|
|
352
|
+
return vectorized[0]
|
|
353
|
+
|
|
354
|
+
result_parts = [f"({vectorized[0]})"]
|
|
355
|
+
for i, op in enumerate(ops):
|
|
356
|
+
result_parts.append(f" {op} ")
|
|
357
|
+
result_parts.append(f"({vectorized[i + 1]})")
|
|
358
|
+
return "".join(result_parts)
|
|
359
|
+
|
|
360
|
+
|
|
325
361
|
def convert_filter_expression(expr):
|
|
326
362
|
if not expr or not expr.strip():
|
|
327
363
|
return "True"
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
ORACLE_TO_ANSI = [
|
|
5
|
+
(re.compile(r'\bNVL2\s*\(\s*([^,]+?)\s*,\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
|
|
6
|
+
r'CASE WHEN \1 IS NOT NULL THEN \2 ELSE \3 END'),
|
|
7
|
+
(re.compile(r'\bNVL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
|
|
8
|
+
r'COALESCE(\1, \2)'),
|
|
9
|
+
(re.compile(r'\bSYSDATE\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
|
|
10
|
+
(re.compile(r'\bSYSTIMESTAMP\b', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
|
|
11
|
+
(re.compile(r'\|\|', re.IGNORECASE), ' || '),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
MSSQL_TO_ANSI = [
|
|
15
|
+
(re.compile(r'\bISNULL\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
|
|
16
|
+
r'COALESCE(\1, \2)'),
|
|
17
|
+
(re.compile(r'\bGETDATE\s*\(\s*\)', re.IGNORECASE), 'CURRENT_TIMESTAMP'),
|
|
18
|
+
(re.compile(r'\bCONVERT\s*\(\s*VARCHAR\s*,\s*([^,)]+?)\s*,\s*\d+\s*\)', re.IGNORECASE),
|
|
19
|
+
r'CAST(\1 AS VARCHAR)'),
|
|
20
|
+
(re.compile(r'\bLEN\s*\(', re.IGNORECASE), 'LENGTH('),
|
|
21
|
+
(re.compile(r'\bCHARINDEX\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)', re.IGNORECASE),
|
|
22
|
+
r'POSITION(\1 IN \2)'),
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
_DECODE_RE = re.compile(
|
|
26
|
+
r'\bDECODE\s*\(', re.IGNORECASE
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
_ORACLE_JOIN_RE = re.compile(
|
|
30
|
+
r'(\w+\.\w+)\s*=\s*(\w+\.\w+)\s*\(\+\)'
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
_ORACLE_JOIN_RE2 = re.compile(
|
|
34
|
+
r'(\w+\.\w+)\s*\(\+\)\s*=\s*(\w+\.\w+)'
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_ROWNUM_RE = re.compile(
|
|
38
|
+
r'\bAND\s+ROWNUM\s*<=?\s*(\d+)\b|\bWHERE\s+ROWNUM\s*<=?\s*(\d+)\b',
|
|
39
|
+
re.IGNORECASE
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
_TOP_RE = re.compile(
|
|
43
|
+
r'\bSELECT\s+TOP\s+(\d+)\b', re.IGNORECASE
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _convert_decode(sql):
|
|
48
|
+
result = sql
|
|
49
|
+
idx = 0
|
|
50
|
+
while True:
|
|
51
|
+
m = _DECODE_RE.search(result, idx)
|
|
52
|
+
if not m:
|
|
53
|
+
break
|
|
54
|
+
start = m.start()
|
|
55
|
+
paren_start = m.end() - 1
|
|
56
|
+
depth = 1
|
|
57
|
+
pos = paren_start + 1
|
|
58
|
+
while pos < len(result) and depth > 0:
|
|
59
|
+
if result[pos] == '(':
|
|
60
|
+
depth += 1
|
|
61
|
+
elif result[pos] == ')':
|
|
62
|
+
depth -= 1
|
|
63
|
+
pos += 1
|
|
64
|
+
if depth != 0:
|
|
65
|
+
idx = pos
|
|
66
|
+
continue
|
|
67
|
+
inner = result[paren_start + 1:pos - 1]
|
|
68
|
+
args = _split_args(inner)
|
|
69
|
+
if len(args) < 3:
|
|
70
|
+
idx = pos
|
|
71
|
+
continue
|
|
72
|
+
expr = args[0].strip()
|
|
73
|
+
pairs = args[1:]
|
|
74
|
+
case_parts = [f"CASE {expr}"]
|
|
75
|
+
i = 0
|
|
76
|
+
while i < len(pairs) - 1:
|
|
77
|
+
case_parts.append(f" WHEN {pairs[i].strip()} THEN {pairs[i+1].strip()}")
|
|
78
|
+
i += 2
|
|
79
|
+
if i < len(pairs):
|
|
80
|
+
case_parts.append(f" ELSE {pairs[i].strip()}")
|
|
81
|
+
case_parts.append(" END")
|
|
82
|
+
replacement = "".join(case_parts)
|
|
83
|
+
result = result[:start] + replacement + result[pos:]
|
|
84
|
+
idx = start + len(replacement)
|
|
85
|
+
return result
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _split_args(s):
|
|
89
|
+
args = []
|
|
90
|
+
depth = 0
|
|
91
|
+
current = []
|
|
92
|
+
for ch in s:
|
|
93
|
+
if ch == '(':
|
|
94
|
+
depth += 1
|
|
95
|
+
current.append(ch)
|
|
96
|
+
elif ch == ')':
|
|
97
|
+
depth -= 1
|
|
98
|
+
current.append(ch)
|
|
99
|
+
elif ch == ',' and depth == 0:
|
|
100
|
+
args.append(''.join(current))
|
|
101
|
+
current = []
|
|
102
|
+
else:
|
|
103
|
+
current.append(ch)
|
|
104
|
+
if current:
|
|
105
|
+
args.append(''.join(current))
|
|
106
|
+
return args
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _convert_oracle_outer_join(sql):
|
|
110
|
+
result = _ORACLE_JOIN_RE.sub(
|
|
111
|
+
lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use LEFT JOIN',
|
|
112
|
+
sql
|
|
113
|
+
)
|
|
114
|
+
result = _ORACLE_JOIN_RE2.sub(
|
|
115
|
+
lambda m: f'{m.group(1)} = {m.group(2)} -- (+) converted: use RIGHT JOIN',
|
|
116
|
+
result
|
|
117
|
+
)
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _convert_rownum(sql):
|
|
122
|
+
m = _ROWNUM_RE.search(sql)
|
|
123
|
+
if m:
|
|
124
|
+
limit_val = m.group(1) or m.group(2)
|
|
125
|
+
cleaned = _ROWNUM_RE.sub('', sql).strip()
|
|
126
|
+
if cleaned.endswith('AND'):
|
|
127
|
+
cleaned = cleaned[:-3].strip()
|
|
128
|
+
if cleaned.endswith('WHERE'):
|
|
129
|
+
cleaned = cleaned[:-5].strip()
|
|
130
|
+
cleaned = cleaned.rstrip(';')
|
|
131
|
+
return f"{cleaned}\nLIMIT {limit_val}"
|
|
132
|
+
return sql
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _convert_top(sql):
|
|
136
|
+
m = _TOP_RE.search(sql)
|
|
137
|
+
if m:
|
|
138
|
+
limit_val = m.group(1)
|
|
139
|
+
cleaned = _TOP_RE.sub('SELECT', sql)
|
|
140
|
+
cleaned = cleaned.rstrip(';')
|
|
141
|
+
return f"{cleaned}\nLIMIT {limit_val}"
|
|
142
|
+
return sql
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def translate_sql(sql, source_dialect="auto", target_dialect="ansi"):
|
|
146
|
+
if not sql or not sql.strip():
|
|
147
|
+
return sql
|
|
148
|
+
|
|
149
|
+
from informatica_python.utils.expression_converter import detect_sql_dialect
|
|
150
|
+
|
|
151
|
+
if source_dialect == "auto":
|
|
152
|
+
source_dialect = detect_sql_dialect(sql).lower()
|
|
153
|
+
|
|
154
|
+
translated = sql
|
|
155
|
+
|
|
156
|
+
if source_dialect == "oracle":
|
|
157
|
+
translated = _convert_oracle_outer_join(translated)
|
|
158
|
+
translated = _convert_decode(translated)
|
|
159
|
+
translated = _convert_rownum(translated)
|
|
160
|
+
for pattern, replacement in ORACLE_TO_ANSI:
|
|
161
|
+
translated = pattern.sub(replacement, translated)
|
|
162
|
+
|
|
163
|
+
elif source_dialect in ("mssql", "sql server"):
|
|
164
|
+
translated = _convert_top(translated)
|
|
165
|
+
for pattern, replacement in MSSQL_TO_ANSI:
|
|
166
|
+
translated = pattern.sub(replacement, translated)
|
|
167
|
+
|
|
168
|
+
elif source_dialect in ("generic", "postgresql"):
|
|
169
|
+
for pattern, replacement in ORACLE_TO_ANSI:
|
|
170
|
+
translated = pattern.sub(replacement, translated)
|
|
171
|
+
for pattern, replacement in MSSQL_TO_ANSI:
|
|
172
|
+
translated = pattern.sub(replacement, translated)
|
|
173
|
+
|
|
174
|
+
return translated
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
@@ -23,5 +23,6 @@ informatica_python/utils/__init__.py
|
|
|
23
23
|
informatica_python/utils/datatype_map.py
|
|
24
24
|
informatica_python/utils/expression_converter.py
|
|
25
25
|
informatica_python/utils/lib_adapters.py
|
|
26
|
+
informatica_python/utils/sql_dialect.py
|
|
26
27
|
tests/test_converter.py
|
|
27
28
|
tests/test_integration.py
|
|
@@ -210,10 +210,18 @@ class TestFilterVectorized:
|
|
|
210
210
|
assert 'df["A"]' in result
|
|
211
211
|
assert 'df["B"]' in result
|
|
212
212
|
assert "AND" not in result
|
|
213
|
+
assert "(df[" in result
|
|
213
214
|
|
|
214
215
|
def test_or_condition(self):
|
|
215
216
|
result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
|
|
216
217
|
assert "|" in result
|
|
218
|
+
assert "(df[" in result
|
|
219
|
+
|
|
220
|
+
def test_not_condition(self):
|
|
221
|
+
result = convert_filter_vectorized("NOT A = 1", "df")
|
|
222
|
+
assert "~(" in result
|
|
223
|
+
assert 'df["A"]' in result
|
|
224
|
+
assert "==" in result
|
|
217
225
|
|
|
218
226
|
def test_is_null_filter(self):
|
|
219
227
|
result = convert_filter_vectorized("NAME IS NULL", "df_src")
|
|
@@ -232,6 +240,7 @@ class TestFilterVectorized:
|
|
|
232
240
|
result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
|
|
233
241
|
assert "np.where" in result
|
|
234
242
|
assert "&" in result
|
|
243
|
+
assert "(" in result
|
|
235
244
|
|
|
236
245
|
|
|
237
246
|
class TestLibAdapters:
|
|
@@ -538,3 +547,269 @@ class TestCLIParamFile:
|
|
|
538
547
|
help_text = f.getvalue()
|
|
539
548
|
from informatica_python.cli import main as cli_main
|
|
540
549
|
assert callable(cli_main)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
class TestSQLDialectTranslation:
|
|
553
|
+
|
|
554
|
+
def test_nvl_to_coalesce(self):
|
|
555
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
556
|
+
result = translate_sql("SELECT NVL(COL1, 0) FROM T", source_dialect="oracle")
|
|
557
|
+
assert "COALESCE" in result
|
|
558
|
+
assert "NVL" not in result
|
|
559
|
+
|
|
560
|
+
def test_sysdate_to_current_timestamp(self):
|
|
561
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
562
|
+
result = translate_sql("SELECT SYSDATE FROM DUAL", source_dialect="oracle")
|
|
563
|
+
assert "CURRENT_TIMESTAMP" in result
|
|
564
|
+
assert "SYSDATE" not in result
|
|
565
|
+
|
|
566
|
+
def test_decode_to_case(self):
|
|
567
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
568
|
+
result = translate_sql("SELECT DECODE(STATUS, 'A', 'Active', 'I', 'Inactive', 'Unknown') FROM T", source_dialect="oracle")
|
|
569
|
+
assert "CASE" in result
|
|
570
|
+
assert "WHEN" in result
|
|
571
|
+
assert "ELSE" in result
|
|
572
|
+
|
|
573
|
+
def test_nvl2_to_case(self):
|
|
574
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
575
|
+
result = translate_sql("SELECT NVL2(COL1, 'has value', 'null') FROM T", source_dialect="oracle")
|
|
576
|
+
assert "CASE WHEN" in result
|
|
577
|
+
assert "IS NOT NULL" in result
|
|
578
|
+
|
|
579
|
+
def test_getdate_to_current_timestamp(self):
|
|
580
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
581
|
+
result = translate_sql("SELECT GETDATE() FROM T", source_dialect="mssql")
|
|
582
|
+
assert "CURRENT_TIMESTAMP" in result
|
|
583
|
+
|
|
584
|
+
def test_isnull_mssql_to_coalesce(self):
|
|
585
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
586
|
+
result = translate_sql("SELECT ISNULL(COL1, 0) FROM T", source_dialect="mssql")
|
|
587
|
+
assert "COALESCE" in result
|
|
588
|
+
|
|
589
|
+
def test_top_to_limit(self):
|
|
590
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
591
|
+
result = translate_sql("SELECT TOP 10 * FROM T", source_dialect="mssql")
|
|
592
|
+
assert "LIMIT 10" in result
|
|
593
|
+
assert "TOP" not in result
|
|
594
|
+
|
|
595
|
+
def test_rownum_to_limit(self):
|
|
596
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
597
|
+
result = translate_sql("SELECT * FROM T WHERE ROWNUM <= 5", source_dialect="oracle")
|
|
598
|
+
assert "LIMIT 5" in result
|
|
599
|
+
|
|
600
|
+
def test_auto_dialect_detection(self):
|
|
601
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
602
|
+
result = translate_sql("SELECT NVL(A, 0), SYSDATE FROM T")
|
|
603
|
+
assert "COALESCE" in result
|
|
604
|
+
assert "CURRENT_TIMESTAMP" in result
|
|
605
|
+
|
|
606
|
+
def test_no_change_for_clean_sql(self):
|
|
607
|
+
from informatica_python.utils.sql_dialect import translate_sql
|
|
608
|
+
sql = "SELECT * FROM employees WHERE id = 1"
|
|
609
|
+
result = translate_sql(sql, source_dialect="generic")
|
|
610
|
+
assert result.strip() == sql.strip()
|
|
611
|
+
|
|
612
|
+
def test_sql_gen_includes_translation(self):
|
|
613
|
+
from informatica_python.generators.sql_gen import generate_sql_file
|
|
614
|
+
from informatica_python.models import (
|
|
615
|
+
FolderDef, MappingDef, TransformationDef, TableAttribute, FieldDef
|
|
616
|
+
)
|
|
617
|
+
tx = TransformationDef(
|
|
618
|
+
name="SQ_TEST", type="Source Qualifier",
|
|
619
|
+
attributes=[TableAttribute(name="Sql Query", value="SELECT NVL(A, 0), SYSDATE FROM T")],
|
|
620
|
+
)
|
|
621
|
+
mapping = MappingDef(name="m_test", transformations=[tx])
|
|
622
|
+
folder = FolderDef(name="F", mappings=[mapping])
|
|
623
|
+
result = generate_sql_file(folder)
|
|
624
|
+
assert "ANSI SQL Translation" in result
|
|
625
|
+
assert "COALESCE" in result
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
class TestEnhancedErrorReporting:
|
|
629
|
+
|
|
630
|
+
def test_unsupported_transforms_section(self):
|
|
631
|
+
from informatica_python.generators.error_log_gen import generate_error_log
|
|
632
|
+
from informatica_python.models import (
|
|
633
|
+
FolderDef, MappingDef, TransformationDef, FieldDef, TableAttribute
|
|
634
|
+
)
|
|
635
|
+
tx = TransformationDef(
|
|
636
|
+
name="JAVA_TX", type="Java",
|
|
637
|
+
attributes=[TableAttribute(name="Class Name", value="com.example.Transform")],
|
|
638
|
+
fields=[FieldDef(name="OUT1", datatype="string", porttype="OUTPUT")],
|
|
639
|
+
)
|
|
640
|
+
mapping = MappingDef(name="m_test", transformations=[tx])
|
|
641
|
+
folder = FolderDef(name="F", mappings=[mapping])
|
|
642
|
+
result = generate_error_log(folder)
|
|
643
|
+
assert "UNSUPPORTED TRANSFORMS" in result
|
|
644
|
+
assert "JAVA_TX" in result
|
|
645
|
+
assert "Java" in result
|
|
646
|
+
assert "Class Name" in result
|
|
647
|
+
|
|
648
|
+
def test_unmapped_ports_section(self):
|
|
649
|
+
from informatica_python.generators.error_log_gen import generate_error_log
|
|
650
|
+
from informatica_python.models import (
|
|
651
|
+
FolderDef, MappingDef, TransformationDef, FieldDef, ConnectorDef
|
|
652
|
+
)
|
|
653
|
+
tx = TransformationDef(
|
|
654
|
+
name="EXP1", type="Expression",
|
|
655
|
+
fields=[
|
|
656
|
+
FieldDef(name="IN1", datatype="string", porttype="INPUT"),
|
|
657
|
+
FieldDef(name="OUT1", datatype="string", porttype="OUTPUT"),
|
|
658
|
+
FieldDef(name="OUT2", datatype="string", porttype="OUTPUT"),
|
|
659
|
+
],
|
|
660
|
+
)
|
|
661
|
+
conn = ConnectorDef(
|
|
662
|
+
from_instance="EXP1", from_field="OUT1",
|
|
663
|
+
from_instance_type="Expression",
|
|
664
|
+
to_instance="TGT", to_field="COL1",
|
|
665
|
+
to_instance_type="Target Definition",
|
|
666
|
+
)
|
|
667
|
+
mapping = MappingDef(name="m_test", transformations=[tx], connectors=[conn])
|
|
668
|
+
folder = FolderDef(name="F", mappings=[mapping])
|
|
669
|
+
result = generate_error_log(folder)
|
|
670
|
+
assert "UNMAPPED PORTS" in result
|
|
671
|
+
assert "OUT2" in result
|
|
672
|
+
|
|
673
|
+
def test_unsupported_functions_section(self):
|
|
674
|
+
from informatica_python.generators.error_log_gen import generate_error_log
|
|
675
|
+
from informatica_python.models import (
|
|
676
|
+
FolderDef, MappingDef, TransformationDef, FieldDef
|
|
677
|
+
)
|
|
678
|
+
tx = TransformationDef(
|
|
679
|
+
name="EXP1", type="Expression",
|
|
680
|
+
fields=[
|
|
681
|
+
FieldDef(name="OUT1", datatype="string", porttype="OUTPUT",
|
|
682
|
+
expression="CUSTOM_FUNC(IN1, 'abc')"),
|
|
683
|
+
],
|
|
684
|
+
)
|
|
685
|
+
mapping = MappingDef(name="m_test", transformations=[tx])
|
|
686
|
+
folder = FolderDef(name="F", mappings=[mapping])
|
|
687
|
+
result = generate_error_log(folder)
|
|
688
|
+
assert "UNSUPPORTED EXPRESSION FUNCTIONS" in result
|
|
689
|
+
assert "CUSTOM_FUNC" in result
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
class TestNestedMapplets:
|
|
693
|
+
|
|
694
|
+
def test_recursive_expansion(self):
|
|
695
|
+
from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
|
|
696
|
+
from informatica_python.models import (
|
|
697
|
+
MappletDef, TransformationDef, FieldDef, ConnectorDef, InstanceDef
|
|
698
|
+
)
|
|
699
|
+
inner_mapplet = MappletDef(
|
|
700
|
+
name="INNER_MPL",
|
|
701
|
+
transformations=[
|
|
702
|
+
TransformationDef(name="INNER_EXP", type="Expression",
|
|
703
|
+
fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
|
|
704
|
+
],
|
|
705
|
+
connectors=[],
|
|
706
|
+
)
|
|
707
|
+
outer_mapplet = MappletDef(
|
|
708
|
+
name="OUTER_MPL",
|
|
709
|
+
transformations=[
|
|
710
|
+
TransformationDef(name="OUTER_EXP", type="Expression",
|
|
711
|
+
fields=[FieldDef(name="F1", datatype="string", porttype="INPUT/OUTPUT")]),
|
|
712
|
+
],
|
|
713
|
+
connectors=[],
|
|
714
|
+
instances=[
|
|
715
|
+
InstanceDef(name="INNER_INST", type="Mapplet",
|
|
716
|
+
transformation_name="INNER_MPL", transformation_type="Mapplet"),
|
|
717
|
+
],
|
|
718
|
+
)
|
|
719
|
+
mapplet_map = {"INNER_MPL": inner_mapplet, "OUTER_MPL": outer_mapplet}
|
|
720
|
+
transforms, connectors = _expand_mapplet_recursive(outer_mapplet, mapplet_map, "MPL1")
|
|
721
|
+
names = [t.name for t in transforms]
|
|
722
|
+
assert "MPL1__OUTER_EXP" in names
|
|
723
|
+
assert "MPL1__INNER_INST__INNER_EXP" in names
|
|
724
|
+
|
|
725
|
+
def test_circular_reference_protection(self):
|
|
726
|
+
from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
|
|
727
|
+
from informatica_python.models import (
|
|
728
|
+
MappletDef, TransformationDef, FieldDef, InstanceDef
|
|
729
|
+
)
|
|
730
|
+
circular = MappletDef(
|
|
731
|
+
name="SELF_REF",
|
|
732
|
+
transformations=[
|
|
733
|
+
TransformationDef(name="EXP1", type="Expression",
|
|
734
|
+
fields=[FieldDef(name="F1", datatype="string")]),
|
|
735
|
+
],
|
|
736
|
+
connectors=[],
|
|
737
|
+
instances=[
|
|
738
|
+
InstanceDef(name="SELF", type="Mapplet",
|
|
739
|
+
transformation_name="SELF_REF", transformation_type="Mapplet"),
|
|
740
|
+
],
|
|
741
|
+
)
|
|
742
|
+
mapplet_map = {"SELF_REF": circular}
|
|
743
|
+
transforms, _ = _expand_mapplet_recursive(circular, mapplet_map, "M")
|
|
744
|
+
assert len(transforms) == 1
|
|
745
|
+
|
|
746
|
+
def test_depth_limit(self):
|
|
747
|
+
from informatica_python.generators.mapping_gen import _expand_mapplet_recursive
|
|
748
|
+
from informatica_python.models import (
|
|
749
|
+
MappletDef, TransformationDef, FieldDef, InstanceDef
|
|
750
|
+
)
|
|
751
|
+
mapplets = {}
|
|
752
|
+
for i in range(15):
|
|
753
|
+
name = f"MPL_{i}"
|
|
754
|
+
instances = []
|
|
755
|
+
if i < 14:
|
|
756
|
+
instances = [InstanceDef(name=f"NEST_{i+1}", type="Mapplet",
|
|
757
|
+
transformation_name=f"MPL_{i+1}",
|
|
758
|
+
transformation_type="Mapplet")]
|
|
759
|
+
mapplets[name] = MappletDef(
|
|
760
|
+
name=name,
|
|
761
|
+
transformations=[
|
|
762
|
+
TransformationDef(name=f"TX_{i}", type="Expression",
|
|
763
|
+
fields=[FieldDef(name="F", datatype="string")]),
|
|
764
|
+
],
|
|
765
|
+
connectors=[],
|
|
766
|
+
instances=instances,
|
|
767
|
+
)
|
|
768
|
+
transforms, _ = _expand_mapplet_recursive(mapplets["MPL_0"], mapplets, "ROOT")
|
|
769
|
+
assert len(transforms) <= 11
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
class TestDataQualityValidation:
|
|
773
|
+
|
|
774
|
+
def test_validate_casts_generates_warnings(self):
|
|
775
|
+
from informatica_python.generators.mapping_gen import _emit_type_casting, _safe_name
|
|
776
|
+
from informatica_python.models import FieldDef
|
|
777
|
+
class FakeTgt:
|
|
778
|
+
fields = [
|
|
779
|
+
FieldDef(name="AGE", datatype="integer", nullable="NULL"),
|
|
780
|
+
FieldDef(name="CREATED", datatype="date/time", nullable="NULL"),
|
|
781
|
+
]
|
|
782
|
+
lines = []
|
|
783
|
+
_emit_type_casting(lines, "TGT1", FakeTgt(), validate_casts=True)
|
|
784
|
+
code = "\n".join(lines)
|
|
785
|
+
assert "_cast_warnings" in code
|
|
786
|
+
assert "_pre_null_" in code
|
|
787
|
+
assert "_post_null_" in code
|
|
788
|
+
assert "coerced to null" in code
|
|
789
|
+
assert "logger.warning" in code
|
|
790
|
+
|
|
791
|
+
def test_no_validation_by_default(self):
|
|
792
|
+
from informatica_python.generators.mapping_gen import _emit_type_casting
|
|
793
|
+
from informatica_python.models import FieldDef
|
|
794
|
+
class FakeTgt:
|
|
795
|
+
fields = [
|
|
796
|
+
FieldDef(name="AGE", datatype="integer", nullable="NULL"),
|
|
797
|
+
]
|
|
798
|
+
lines = []
|
|
799
|
+
_emit_type_casting(lines, "TGT1", FakeTgt())
|
|
800
|
+
code = "\n".join(lines)
|
|
801
|
+
assert "_cast_warnings" not in code
|
|
802
|
+
assert "_pre_null_" not in code
|
|
803
|
+
|
|
804
|
+
def test_validate_casts_cli_flag(self):
|
|
805
|
+
import io, contextlib
|
|
806
|
+
from informatica_python.cli import main
|
|
807
|
+
f = io.StringIO()
|
|
808
|
+
with contextlib.redirect_stdout(f):
|
|
809
|
+
try:
|
|
810
|
+
sys.argv = ["informatica-python", "--help"]
|
|
811
|
+
main()
|
|
812
|
+
except SystemExit:
|
|
813
|
+
pass
|
|
814
|
+
help_text = f.getvalue()
|
|
815
|
+
assert "--validate-casts" in help_text or "validate_casts" in help_text
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/helper_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/generators/workflow_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python/utils/lib_adapters.py
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.5.1 → informatica_python-1.6.0}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|