informatica-python 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.2.0 → informatica_python-1.3.0}/PKG-INFO +1 -1
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/mapping_gen.py +386 -22
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/workflow_gen.py +59 -3
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/expression_converter.py +12 -7
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.2.0 → informatica_python-1.3.0}/pyproject.toml +1 -1
- {informatica_python-1.2.0 → informatica_python-1.3.0}/tests/test_converter.py +275 -1
- {informatica_python-1.2.0 → informatica_python-1.3.0}/README.md +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/__init__.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/cli.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/converter.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/helper_gen.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/models.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.2.0 → informatica_python-1.3.0}/setup.cfg +0 -0
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List, Dict
|
|
2
2
|
from informatica_python.models import (
|
|
3
3
|
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
4
|
-
TransformationDef, ConnectorDef, InstanceDef,
|
|
4
|
+
TransformationDef, ConnectorDef, InstanceDef, MappletDef,
|
|
5
5
|
)
|
|
6
6
|
from informatica_python.utils.expression_converter import (
|
|
7
7
|
convert_expression, convert_sql_expression,
|
|
@@ -11,6 +11,130 @@ from informatica_python.utils.expression_converter import (
|
|
|
11
11
|
from informatica_python.utils.datatype_map import get_python_type
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _inline_mapplets(mapping, folder):
|
|
15
|
+
mapplet_map = {m.name: m for m in folder.mapplets}
|
|
16
|
+
extra_transforms = []
|
|
17
|
+
extra_connectors = []
|
|
18
|
+
mapplet_instances = set()
|
|
19
|
+
|
|
20
|
+
for inst in mapping.instances:
|
|
21
|
+
if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
|
|
22
|
+
mapplet_name = inst.transformation_name or inst.name
|
|
23
|
+
mapplet = mapplet_map.get(mapplet_name)
|
|
24
|
+
if not mapplet:
|
|
25
|
+
continue
|
|
26
|
+
mapplet_instances.add(inst.name)
|
|
27
|
+
prefix = inst.name
|
|
28
|
+
|
|
29
|
+
for tx in mapplet.transformations:
|
|
30
|
+
inlined = TransformationDef(
|
|
31
|
+
name=f"{prefix}__{tx.name}",
|
|
32
|
+
type=tx.type,
|
|
33
|
+
description=tx.description,
|
|
34
|
+
reusable=tx.reusable,
|
|
35
|
+
fields=list(tx.fields),
|
|
36
|
+
attributes=list(tx.attributes),
|
|
37
|
+
groups=list(tx.groups),
|
|
38
|
+
metadata_extensions=list(tx.metadata_extensions),
|
|
39
|
+
)
|
|
40
|
+
extra_transforms.append(inlined)
|
|
41
|
+
|
|
42
|
+
for conn in mapplet.connectors:
|
|
43
|
+
from informatica_python.models import ConnectorDef
|
|
44
|
+
new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
|
|
45
|
+
new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
|
|
46
|
+
inlined_conn = ConnectorDef(
|
|
47
|
+
from_instance=new_from,
|
|
48
|
+
from_field=conn.from_field,
|
|
49
|
+
from_instance_type=conn.from_instance_type,
|
|
50
|
+
to_instance=new_to,
|
|
51
|
+
to_field=conn.to_field,
|
|
52
|
+
to_instance_type=conn.to_instance_type,
|
|
53
|
+
)
|
|
54
|
+
extra_connectors.append(inlined_conn)
|
|
55
|
+
|
|
56
|
+
rewired_connectors = []
|
|
57
|
+
mapplet_internal_names = set()
|
|
58
|
+
for inst_name in mapplet_instances:
|
|
59
|
+
mapplet_name = None
|
|
60
|
+
for inst in mapping.instances:
|
|
61
|
+
if inst.name == inst_name:
|
|
62
|
+
mapplet_name = inst.transformation_name or inst.name
|
|
63
|
+
break
|
|
64
|
+
mapplet = mapplet_map.get(mapplet_name) if mapplet_name else None
|
|
65
|
+
if mapplet:
|
|
66
|
+
for tx in mapplet.transformations:
|
|
67
|
+
mapplet_internal_names.add(f"{inst_name}__{tx.name}")
|
|
68
|
+
|
|
69
|
+
for conn in mapping.connectors:
|
|
70
|
+
if conn.to_instance in mapplet_instances:
|
|
71
|
+
first_tx = None
|
|
72
|
+
for ec in extra_connectors:
|
|
73
|
+
if ec.from_instance == conn.to_instance or ec.to_instance.startswith(f"{conn.to_instance}__"):
|
|
74
|
+
for et in extra_transforms:
|
|
75
|
+
if et.name.startswith(f"{conn.to_instance}__"):
|
|
76
|
+
has_input = any(
|
|
77
|
+
"INPUT" in (f.porttype or "").upper()
|
|
78
|
+
for f in et.fields
|
|
79
|
+
if f.name == conn.to_field
|
|
80
|
+
)
|
|
81
|
+
if has_input:
|
|
82
|
+
first_tx = et.name
|
|
83
|
+
break
|
|
84
|
+
if first_tx:
|
|
85
|
+
break
|
|
86
|
+
if not first_tx:
|
|
87
|
+
for et in extra_transforms:
|
|
88
|
+
if et.name.startswith(f"{conn.to_instance}__"):
|
|
89
|
+
first_tx = et.name
|
|
90
|
+
break
|
|
91
|
+
if first_tx:
|
|
92
|
+
from informatica_python.models import ConnectorDef
|
|
93
|
+
rewired_connectors.append(ConnectorDef(
|
|
94
|
+
from_instance=conn.from_instance,
|
|
95
|
+
from_field=conn.from_field,
|
|
96
|
+
from_instance_type=conn.from_instance_type,
|
|
97
|
+
to_instance=first_tx,
|
|
98
|
+
to_field=conn.to_field,
|
|
99
|
+
to_instance_type=conn.to_instance_type,
|
|
100
|
+
))
|
|
101
|
+
else:
|
|
102
|
+
rewired_connectors.append(conn)
|
|
103
|
+
elif conn.from_instance in mapplet_instances:
|
|
104
|
+
last_tx = None
|
|
105
|
+
for et in reversed(extra_transforms):
|
|
106
|
+
if et.name.startswith(f"{conn.from_instance}__"):
|
|
107
|
+
has_output = any(
|
|
108
|
+
"OUTPUT" in (f.porttype or "").upper()
|
|
109
|
+
for f in et.fields
|
|
110
|
+
if f.name == conn.from_field
|
|
111
|
+
)
|
|
112
|
+
if has_output:
|
|
113
|
+
last_tx = et.name
|
|
114
|
+
break
|
|
115
|
+
if not last_tx:
|
|
116
|
+
for et in reversed(extra_transforms):
|
|
117
|
+
if et.name.startswith(f"{conn.from_instance}__"):
|
|
118
|
+
last_tx = et.name
|
|
119
|
+
break
|
|
120
|
+
if last_tx:
|
|
121
|
+
from informatica_python.models import ConnectorDef
|
|
122
|
+
rewired_connectors.append(ConnectorDef(
|
|
123
|
+
from_instance=last_tx,
|
|
124
|
+
from_field=conn.from_field,
|
|
125
|
+
from_instance_type=conn.from_instance_type,
|
|
126
|
+
to_instance=conn.to_instance,
|
|
127
|
+
to_field=conn.to_field,
|
|
128
|
+
to_instance_type=conn.to_instance_type,
|
|
129
|
+
))
|
|
130
|
+
else:
|
|
131
|
+
rewired_connectors.append(conn)
|
|
132
|
+
else:
|
|
133
|
+
rewired_connectors.append(conn)
|
|
134
|
+
|
|
135
|
+
return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
|
|
136
|
+
|
|
137
|
+
|
|
14
138
|
def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
15
139
|
data_lib: str = "pandas", mapping_index: int = 1) -> str:
|
|
16
140
|
lines = []
|
|
@@ -24,10 +148,17 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
24
148
|
lines.append("")
|
|
25
149
|
lines.append("")
|
|
26
150
|
|
|
151
|
+
inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
|
|
152
|
+
|
|
153
|
+
all_transforms = list(mapping.transformations) + inlined_transforms
|
|
154
|
+
all_connectors = [c for c in mapping.connectors
|
|
155
|
+
if c.from_instance not in mapplet_instance_names
|
|
156
|
+
and c.to_instance not in mapplet_instance_names] + inlined_connectors
|
|
157
|
+
|
|
27
158
|
source_map = _build_source_map(mapping, folder)
|
|
28
159
|
target_map = _build_target_map(mapping, folder)
|
|
29
|
-
transform_map = {t.name: t for t in
|
|
30
|
-
connector_graph = _build_connector_graph(
|
|
160
|
+
transform_map = {t.name: t for t in all_transforms}
|
|
161
|
+
connector_graph = _build_connector_graph(all_connectors)
|
|
31
162
|
instance_map = {i.name: i for i in mapping.instances}
|
|
32
163
|
|
|
33
164
|
lines.append(f"def run_{_safe_name(mapping.name)}(config):")
|
|
@@ -50,7 +181,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
50
181
|
safe = _safe_name(src_name)
|
|
51
182
|
source_dfs[src_name] = f"df_{safe}"
|
|
52
183
|
|
|
53
|
-
sq_transforms = [t for t in
|
|
184
|
+
sq_transforms = [t for t in all_transforms
|
|
54
185
|
if t.type in ("Source Qualifier", "Application Source Qualifier")]
|
|
55
186
|
if sq_transforms:
|
|
56
187
|
for sq in sq_transforms:
|
|
@@ -63,12 +194,14 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
63
194
|
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
64
195
|
schema = src_def.owner_name or "dbo"
|
|
65
196
|
lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
|
|
197
|
+
elif src_def.flatfile:
|
|
198
|
+
_emit_flatfile_read(lines, safe, src_def)
|
|
66
199
|
else:
|
|
67
200
|
lines.append(f" df_{safe} = read_file(config.get('sources', {{}}).get('{src_name}', {{}}).get('file_path', '{src_name}'),")
|
|
68
201
|
lines.append(f" config.get('sources', {{}}).get('{src_name}', {{}}))")
|
|
69
202
|
lines.append("")
|
|
70
203
|
|
|
71
|
-
processing_order = _get_processing_order(
|
|
204
|
+
processing_order = _get_processing_order(all_transforms, connector_graph, sq_transforms)
|
|
72
205
|
|
|
73
206
|
for tx in processing_order:
|
|
74
207
|
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
@@ -99,6 +232,107 @@ def _safe_name(name):
|
|
|
99
232
|
return safe.lower()
|
|
100
233
|
|
|
101
234
|
|
|
235
|
+
def _flatfile_config_dict(ff):
|
|
236
|
+
cfg = {}
|
|
237
|
+
if not ff:
|
|
238
|
+
return cfg
|
|
239
|
+
if ff.delimiter and ff.delimiter != ",":
|
|
240
|
+
d = ff.delimiter
|
|
241
|
+
DELIMITER_MAP = {
|
|
242
|
+
"COMMA": ",", "TAB": "\\t", "PIPE": "|", "SEMICOLON": ";",
|
|
243
|
+
"SPACE": " ", "TILDE": "~", "CARET": "^",
|
|
244
|
+
}
|
|
245
|
+
d = DELIMITER_MAP.get(d.upper(), d)
|
|
246
|
+
cfg["delimiter"] = d
|
|
247
|
+
if ff.is_fixed_width == "YES":
|
|
248
|
+
cfg["fixed_width"] = True
|
|
249
|
+
if ff.header_lines:
|
|
250
|
+
cfg["header_lines"] = ff.header_lines
|
|
251
|
+
if ff.skip_rows:
|
|
252
|
+
cfg["skip_rows"] = ff.skip_rows
|
|
253
|
+
if ff.text_qualifier:
|
|
254
|
+
cfg["quotechar"] = ff.text_qualifier
|
|
255
|
+
if ff.escape_character:
|
|
256
|
+
cfg["escapechar"] = ff.escape_character
|
|
257
|
+
if ff.strip_trailing_blanks == "YES":
|
|
258
|
+
cfg["strip_trailing_blanks"] = True
|
|
259
|
+
if ff.code_page:
|
|
260
|
+
cfg["encoding"] = ff.code_page
|
|
261
|
+
if ff.row_delimiter:
|
|
262
|
+
cfg["lineterminator"] = ff.row_delimiter
|
|
263
|
+
return cfg
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
|
|
267
|
+
ff = src_def.flatfile
|
|
268
|
+
fc = _flatfile_config_dict(ff)
|
|
269
|
+
if fc.get("fixed_width"):
|
|
270
|
+
widths = []
|
|
271
|
+
for fld in src_def.fields:
|
|
272
|
+
widths.append(fld.precision if fld.precision else 10)
|
|
273
|
+
lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
|
|
274
|
+
lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
275
|
+
lines.append(f"{indent} widths={widths},")
|
|
276
|
+
hdr = fc.get("header_lines", 0)
|
|
277
|
+
if hdr:
|
|
278
|
+
lines.append(f"{indent} header={hdr - 1},")
|
|
279
|
+
else:
|
|
280
|
+
lines.append(f"{indent} header=None,")
|
|
281
|
+
skip = fc.get("skip_rows", 0)
|
|
282
|
+
if skip:
|
|
283
|
+
lines.append(f"{indent} skiprows={skip},")
|
|
284
|
+
lines.append(f"{indent})")
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
file_cfg = {}
|
|
288
|
+
if "delimiter" in fc:
|
|
289
|
+
file_cfg["delimiter"] = fc["delimiter"]
|
|
290
|
+
if "quotechar" in fc:
|
|
291
|
+
file_cfg["quotechar"] = fc["quotechar"]
|
|
292
|
+
if "escapechar" in fc:
|
|
293
|
+
file_cfg["escapechar"] = fc["escapechar"]
|
|
294
|
+
if "encoding" in fc:
|
|
295
|
+
file_cfg["encoding"] = fc["encoding"]
|
|
296
|
+
if "lineterminator" in fc:
|
|
297
|
+
file_cfg["lineterminator"] = fc["lineterminator"]
|
|
298
|
+
hdr = fc.get("header_lines", 0)
|
|
299
|
+
if hdr:
|
|
300
|
+
file_cfg["header"] = True
|
|
301
|
+
file_cfg["header_row"] = hdr - 1
|
|
302
|
+
if fc.get("skip_rows"):
|
|
303
|
+
file_cfg["skip_rows"] = fc["skip_rows"]
|
|
304
|
+
if fc.get("strip_trailing_blanks"):
|
|
305
|
+
file_cfg["strip_trailing_blanks"] = True
|
|
306
|
+
|
|
307
|
+
if file_cfg:
|
|
308
|
+
lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
|
|
309
|
+
lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
310
|
+
lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
|
|
311
|
+
else:
|
|
312
|
+
lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
313
|
+
lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
|
|
317
|
+
ff = tgt_def.flatfile
|
|
318
|
+
fc = _flatfile_config_dict(ff)
|
|
319
|
+
file_cfg = {}
|
|
320
|
+
if "delimiter" in fc:
|
|
321
|
+
file_cfg["delimiter"] = fc["delimiter"]
|
|
322
|
+
if "quotechar" in fc:
|
|
323
|
+
file_cfg["quotechar"] = fc["quotechar"]
|
|
324
|
+
if "encoding" in fc:
|
|
325
|
+
file_cfg["encoding"] = fc["encoding"]
|
|
326
|
+
|
|
327
|
+
if file_cfg:
|
|
328
|
+
lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
|
|
329
|
+
lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
330
|
+
lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
|
|
331
|
+
else:
|
|
332
|
+
lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
333
|
+
lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
334
|
+
|
|
335
|
+
|
|
102
336
|
def _build_source_map(mapping, folder):
|
|
103
337
|
source_map = {}
|
|
104
338
|
for inst in mapping.instances:
|
|
@@ -221,6 +455,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
221
455
|
schema = src_def.owner_name or "dbo"
|
|
222
456
|
cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
|
|
223
457
|
lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
458
|
+
elif src_def.flatfile:
|
|
459
|
+
_emit_flatfile_read(lines, sq_safe, src_def)
|
|
224
460
|
else:
|
|
225
461
|
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
226
462
|
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
@@ -232,6 +468,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
232
468
|
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
233
469
|
schema = src_def.owner_name or "dbo"
|
|
234
470
|
lines.append(f" df_{safe_src} = read_from_db(config, 'SELECT * FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
471
|
+
elif src_def.flatfile:
|
|
472
|
+
_emit_flatfile_read(lines, safe_src, src_def)
|
|
235
473
|
else:
|
|
236
474
|
lines.append(f" df_{safe_src} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
237
475
|
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
@@ -278,7 +516,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
278
516
|
elif tx_type == "sorter":
|
|
279
517
|
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
280
518
|
elif tx_type in ("joiner",):
|
|
281
|
-
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
|
|
519
|
+
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph)
|
|
282
520
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
283
521
|
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
284
522
|
elif tx_type == "router":
|
|
@@ -410,7 +648,7 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
410
648
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
411
649
|
|
|
412
650
|
|
|
413
|
-
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
|
|
651
|
+
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph=None):
|
|
414
652
|
join_type = "inner"
|
|
415
653
|
join_condition = ""
|
|
416
654
|
for attr in tx.attributes:
|
|
@@ -436,10 +674,31 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
436
674
|
|
|
437
675
|
left_keys, right_keys = parse_join_condition(join_condition)
|
|
438
676
|
|
|
677
|
+
master_src = None
|
|
678
|
+
detail_src = None
|
|
679
|
+
input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
|
|
680
|
+
for conn in input_conns:
|
|
681
|
+
to_field = conn.to_field
|
|
682
|
+
if to_field in master_fields:
|
|
683
|
+
master_src = conn.from_instance
|
|
684
|
+
elif to_field in detail_fields:
|
|
685
|
+
detail_src = conn.from_instance
|
|
686
|
+
|
|
439
687
|
src_list = list(input_sources)
|
|
440
|
-
if len(src_list) >= 2:
|
|
441
|
-
|
|
442
|
-
|
|
688
|
+
if not master_src and not detail_src and len(src_list) >= 2:
|
|
689
|
+
master_src = src_list[0]
|
|
690
|
+
detail_src = src_list[1]
|
|
691
|
+
elif not master_src and len(src_list) >= 1:
|
|
692
|
+
master_src = src_list[0]
|
|
693
|
+
if not detail_src:
|
|
694
|
+
for s in src_list:
|
|
695
|
+
if s != master_src:
|
|
696
|
+
detail_src = s
|
|
697
|
+
break
|
|
698
|
+
|
|
699
|
+
if master_src and detail_src:
|
|
700
|
+
df_master = source_dfs.get(master_src, f"df_{_safe_name(master_src)}")
|
|
701
|
+
df_detail = source_dfs.get(detail_src, f"df_{_safe_name(detail_src)}")
|
|
443
702
|
|
|
444
703
|
lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
|
|
445
704
|
if left_keys and right_keys:
|
|
@@ -451,9 +710,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
451
710
|
lines.append(f" suffixes=('', '_master')")
|
|
452
711
|
lines.append(f" )")
|
|
453
712
|
else:
|
|
454
|
-
common_cols = []
|
|
455
|
-
if master_fields and detail_fields:
|
|
456
|
-
common_cols = [f for f in detail_fields if f in master_fields]
|
|
713
|
+
common_cols = [f for f in detail_fields if f in master_fields]
|
|
457
714
|
if common_cols:
|
|
458
715
|
lines.append(f" df_{tx_safe} = {df_detail}.merge(")
|
|
459
716
|
lines.append(f" {df_master},")
|
|
@@ -539,9 +796,13 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
539
796
|
|
|
540
797
|
drop_cols = [k for k in lookup_keys if k not in input_keys]
|
|
541
798
|
if drop_cols:
|
|
542
|
-
lines.append(f"
|
|
799
|
+
lines.append(f" _lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns]")
|
|
800
|
+
lines.append(f" if _lkp_drop:")
|
|
801
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
|
|
543
802
|
|
|
544
803
|
for rf in all_output_fields:
|
|
804
|
+
lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
|
|
805
|
+
lines.append(f" df_{tx_safe}['{rf.name}'] = None")
|
|
545
806
|
if rf.default_value:
|
|
546
807
|
lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
|
|
547
808
|
else:
|
|
@@ -629,26 +890,127 @@ def _gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
629
890
|
|
|
630
891
|
|
|
631
892
|
def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
632
|
-
|
|
633
|
-
|
|
893
|
+
input_ports = []
|
|
894
|
+
output_ports = []
|
|
895
|
+
occurs_cols = []
|
|
896
|
+
id_cols = []
|
|
897
|
+
|
|
898
|
+
for fld in tx.fields:
|
|
899
|
+
pt = (fld.porttype or "").upper()
|
|
900
|
+
if "INPUT" in pt:
|
|
901
|
+
input_ports.append(fld)
|
|
902
|
+
if "OUTPUT" in pt:
|
|
903
|
+
output_ports.append(fld)
|
|
904
|
+
|
|
905
|
+
for fld in tx.fields:
|
|
906
|
+
if fld.field_number > 0:
|
|
907
|
+
occurs_cols.append(fld.name)
|
|
908
|
+
|
|
909
|
+
if not occurs_cols:
|
|
910
|
+
import re
|
|
911
|
+
base_groups = {}
|
|
912
|
+
for fld in input_ports:
|
|
913
|
+
m = re.match(r'^(.+?)(\d+)$', fld.name)
|
|
914
|
+
if m:
|
|
915
|
+
base = m.group(1)
|
|
916
|
+
idx = int(m.group(2))
|
|
917
|
+
if base not in base_groups:
|
|
918
|
+
base_groups[base] = []
|
|
919
|
+
base_groups[base].append(fld.name)
|
|
920
|
+
else:
|
|
921
|
+
id_cols.append(fld.name)
|
|
922
|
+
|
|
923
|
+
if base_groups:
|
|
924
|
+
longest_group = max(base_groups.values(), key=len)
|
|
925
|
+
occurs_cols = longest_group
|
|
926
|
+
id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
|
|
927
|
+
else:
|
|
928
|
+
for fld in input_ports:
|
|
929
|
+
pt = (fld.porttype or "").upper()
|
|
930
|
+
if "INPUT" in pt and "OUTPUT" in pt:
|
|
931
|
+
id_cols.append(fld.name)
|
|
932
|
+
elif "INPUT" in pt and "OUTPUT" not in pt:
|
|
933
|
+
occurs_cols.append(fld.name)
|
|
934
|
+
|
|
935
|
+
if not id_cols:
|
|
936
|
+
id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
|
|
937
|
+
|
|
938
|
+
gk_field = None
|
|
939
|
+
for fld in output_ports:
|
|
940
|
+
if "GK" in fld.name.upper() or "GENERATED" in fld.name.upper() or "KEY" in fld.name.upper():
|
|
941
|
+
gk_field = fld.name
|
|
942
|
+
break
|
|
943
|
+
|
|
944
|
+
lines.append(f" # Normalizer: unpivot repeated columns into rows")
|
|
945
|
+
if occurs_cols and id_cols:
|
|
946
|
+
lines.append(f" df_{tx_safe} = {input_df}.melt(")
|
|
947
|
+
lines.append(f" id_vars={id_cols},")
|
|
948
|
+
lines.append(f" value_vars={occurs_cols},")
|
|
949
|
+
lines.append(f" var_name='_norm_variable',")
|
|
950
|
+
lines.append(f" value_name='_norm_value'")
|
|
951
|
+
lines.append(f" )")
|
|
952
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}.dropna(subset=['_norm_value']).reset_index(drop=True)")
|
|
953
|
+
elif occurs_cols:
|
|
954
|
+
lines.append(f" df_{tx_safe} = {input_df}[{occurs_cols}].stack().reset_index(drop=True).to_frame('_norm_value')")
|
|
955
|
+
else:
|
|
956
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
957
|
+
|
|
958
|
+
if gk_field:
|
|
959
|
+
lines.append(f" df_{tx_safe}['{gk_field}'] = range(1, len(df_{tx_safe}) + 1)")
|
|
960
|
+
|
|
634
961
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
635
962
|
|
|
636
963
|
|
|
637
964
|
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
638
965
|
rank_port = None
|
|
639
|
-
|
|
966
|
+
group_by_ports = []
|
|
967
|
+
top_bottom = "TOP"
|
|
968
|
+
top_n = 0
|
|
969
|
+
|
|
640
970
|
for fld in tx.fields:
|
|
641
|
-
|
|
971
|
+
pt = (fld.porttype or "").upper()
|
|
972
|
+
if "INPUT" in pt and "OUTPUT" in pt:
|
|
973
|
+
group_by_ports.append(fld.name)
|
|
974
|
+
|
|
975
|
+
for fld in tx.fields:
|
|
976
|
+
if fld.expression and fld.expression.strip() and fld.name.upper() not in ("RANKINDEX",):
|
|
642
977
|
rank_port = fld.name
|
|
643
|
-
|
|
978
|
+
break
|
|
979
|
+
if not rank_port:
|
|
980
|
+
for fld in tx.fields:
|
|
981
|
+
if fld.name.upper() == "RANKINDEX":
|
|
982
|
+
continue
|
|
983
|
+
pt = (fld.porttype or "").upper()
|
|
984
|
+
if "INPUT" in pt and "OUTPUT" not in pt:
|
|
985
|
+
rank_port = fld.name
|
|
986
|
+
break
|
|
987
|
+
|
|
644
988
|
for attr in tx.attributes:
|
|
645
989
|
if attr.name == "Top/Bottom":
|
|
646
990
|
top_bottom = attr.value
|
|
991
|
+
elif attr.name == "Number Of Ranks":
|
|
992
|
+
try:
|
|
993
|
+
top_n = int(attr.value)
|
|
994
|
+
except (ValueError, TypeError):
|
|
995
|
+
top_n = 0
|
|
996
|
+
|
|
997
|
+
ascending = top_bottom.upper() != "TOP"
|
|
647
998
|
|
|
648
999
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
649
|
-
if rank_port:
|
|
650
|
-
|
|
651
|
-
lines.append(f" df_{tx_safe}['
|
|
1000
|
+
if rank_port and group_by_ports:
|
|
1001
|
+
lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
|
|
1002
|
+
lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
|
|
1003
|
+
lines.append(f" method='min', ascending={ascending}")
|
|
1004
|
+
lines.append(f" ).astype(int)")
|
|
1005
|
+
if top_n:
|
|
1006
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
|
|
1007
|
+
elif rank_port:
|
|
1008
|
+
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1009
|
+
lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending}).astype(int)")
|
|
1010
|
+
if top_n:
|
|
1011
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
|
|
1012
|
+
else:
|
|
1013
|
+
lines.append(f" df_{tx_safe}['RANKINDEX'] = range(1, len(df_{tx_safe}) + 1)")
|
|
652
1014
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
653
1015
|
|
|
654
1016
|
|
|
@@ -759,6 +1121,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
759
1121
|
|
|
760
1122
|
if tgt_def.database_type and tgt_def.database_type != "Flat File":
|
|
761
1123
|
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
|
|
1124
|
+
elif tgt_def.flatfile:
|
|
1125
|
+
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
762
1126
|
else:
|
|
763
1127
|
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
764
1128
|
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/workflow_gen.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
|
|
2
|
+
from informatica_python.utils.expression_converter import convert_expression
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def generate_workflow_code(folder: FolderDef) -> str:
|
|
@@ -127,14 +128,51 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
|
|
|
127
128
|
lines.append("")
|
|
128
129
|
|
|
129
130
|
elif task.task_type == "Decision":
|
|
130
|
-
lines.append(f" # Decision Task: {task.name}")
|
|
131
131
|
decision_cond = ""
|
|
132
|
+
decision_name = ""
|
|
132
133
|
for attr in task.attributes:
|
|
133
134
|
if attr.name == "Decision Condition":
|
|
134
135
|
decision_cond = attr.value
|
|
136
|
+
elif attr.name == "Decision Name":
|
|
137
|
+
decision_name = attr.value
|
|
138
|
+
|
|
139
|
+
lines.append(f" # Decision Task: {task.name}")
|
|
135
140
|
if decision_cond:
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
py_cond = _convert_decision_condition(decision_cond)
|
|
142
|
+
lines.append(f" # Original condition: {decision_cond}")
|
|
143
|
+
lines.append(f" decision_{task_safe} = {py_cond}")
|
|
144
|
+
lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
|
|
145
|
+
|
|
146
|
+
succ_targets = []
|
|
147
|
+
fail_targets = []
|
|
148
|
+
for link in wf.links:
|
|
149
|
+
if link.from_instance == task.name:
|
|
150
|
+
cond_text = (link.condition or "").strip()
|
|
151
|
+
if cond_text and ("$" in cond_text or "SUCCEEDED" in cond_text.upper()
|
|
152
|
+
or "TRUE" in cond_text.upper()):
|
|
153
|
+
succ_targets.append(link.to_instance)
|
|
154
|
+
elif cond_text and ("FAILED" in cond_text.upper()
|
|
155
|
+
or "FALSE" in cond_text.upper()):
|
|
156
|
+
fail_targets.append(link.to_instance)
|
|
157
|
+
else:
|
|
158
|
+
succ_targets.append(link.to_instance)
|
|
159
|
+
|
|
160
|
+
if succ_targets or fail_targets:
|
|
161
|
+
lines.append(f" if decision_{task_safe}:")
|
|
162
|
+
if succ_targets:
|
|
163
|
+
for t in succ_targets:
|
|
164
|
+
lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
|
|
165
|
+
else:
|
|
166
|
+
lines.append(f" pass")
|
|
167
|
+
if fail_targets:
|
|
168
|
+
lines.append(f" else:")
|
|
169
|
+
for t in fail_targets:
|
|
170
|
+
lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
|
|
171
|
+
else:
|
|
172
|
+
lines.append(f" if not decision_{task_safe}:")
|
|
173
|
+
lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
|
|
174
|
+
else:
|
|
175
|
+
lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
|
|
138
176
|
lines.append("")
|
|
139
177
|
|
|
140
178
|
elif task.task_type == "Timer":
|
|
@@ -226,6 +264,24 @@ def _get_task_execution_order(wf: WorkflowDef):
|
|
|
226
264
|
return ordered
|
|
227
265
|
|
|
228
266
|
|
|
267
|
+
def _convert_decision_condition(condition):
|
|
268
|
+
import re
|
|
269
|
+
cond = condition.strip()
|
|
270
|
+
cond = re.sub(r'\$\$(\w+)', r'\1', cond)
|
|
271
|
+
cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
|
|
272
|
+
cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
|
|
273
|
+
cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
|
|
274
|
+
cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
|
|
275
|
+
cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
|
|
276
|
+
cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
|
|
277
|
+
cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
|
|
278
|
+
cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
|
|
279
|
+
cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
|
|
280
|
+
cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
|
|
281
|
+
cond = cond.replace('<>', '!=')
|
|
282
|
+
return cond
|
|
283
|
+
|
|
284
|
+
|
|
229
285
|
def _safe_name(name):
|
|
230
286
|
import re
|
|
231
287
|
safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
@@ -95,11 +95,11 @@ INFA_FUNC_MAP = {
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
AGG_FUNC_NAMES =
|
|
99
|
-
"
|
|
100
|
-
"STDDEV", "
|
|
101
|
-
"
|
|
102
|
-
|
|
98
|
+
AGG_FUNC_NAMES = [
|
|
99
|
+
"MOVINGAVG", "MOVINGSUM", "PERCENTILE", "VARIANCE",
|
|
100
|
+
"STDDEV", "MEDIAN", "COUNT", "FIRST", "LAST",
|
|
101
|
+
"CUME", "SUM", "AVG", "MAX", "MIN",
|
|
102
|
+
]
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
def convert_expression(expr):
|
|
@@ -131,6 +131,8 @@ def convert_expression(expr):
|
|
|
131
131
|
|
|
132
132
|
converted = re.sub(r'<>', '!=', converted)
|
|
133
133
|
|
|
134
|
+
converted = re.sub(r'(?<![<>!])=(?!=)', '==', converted)
|
|
135
|
+
|
|
134
136
|
converted = re.sub(r':LKP\.(\w+)\(', r'lookup_func("\1", ', converted)
|
|
135
137
|
|
|
136
138
|
converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
|
|
@@ -202,8 +204,11 @@ def parse_aggregate_expression(expr):
|
|
|
202
204
|
cleaned = expr.strip()
|
|
203
205
|
|
|
204
206
|
for func_name in AGG_FUNC_NAMES:
|
|
205
|
-
pattern = re.compile(
|
|
206
|
-
|
|
207
|
+
pattern = re.compile(
|
|
208
|
+
r'^\s*' + func_name + r'\s*\(\s*([A-Za-z_][A-Za-z0-9_]*|\*)\s*\)\s*$',
|
|
209
|
+
re.IGNORECASE
|
|
210
|
+
)
|
|
211
|
+
match = pattern.match(cleaned)
|
|
207
212
|
if match:
|
|
208
213
|
col = match.group(1).strip()
|
|
209
214
|
return func_name.lower(), col
|
|
@@ -239,6 +239,8 @@ def test_expression_converter_expanded():
|
|
|
239
239
|
|
|
240
240
|
result = convert_expression("IIF(STATUS = 'A', 'Active', 'Inactive')")
|
|
241
241
|
assert "iif_expr" in result
|
|
242
|
+
assert "==" in result, f"Expected == in result, got: {result}"
|
|
243
|
+
assert "= =" not in result
|
|
242
244
|
|
|
243
245
|
result = convert_expression("DECODE(TYPE, 1, 'One', 2, 'Two', 'Other')")
|
|
244
246
|
assert "decode_expr" in result
|
|
@@ -301,6 +303,17 @@ def test_expression_converter_expanded():
|
|
|
301
303
|
result = convert_expression("STATUS <> 'X'")
|
|
302
304
|
assert "!=" in result
|
|
303
305
|
|
|
306
|
+
result = convert_expression("AMOUNT >= 100")
|
|
307
|
+
assert ">=" in result
|
|
308
|
+
assert ">==" not in result
|
|
309
|
+
|
|
310
|
+
result = convert_expression("AMOUNT <= 100")
|
|
311
|
+
assert "<=" in result
|
|
312
|
+
assert "<==" not in result
|
|
313
|
+
|
|
314
|
+
result = convert_expression("SUM(A)/COUNT(*)")
|
|
315
|
+
assert "sum_val" in result or "count_val" in result
|
|
316
|
+
|
|
304
317
|
result = convert_expression("$$MY_VARIABLE")
|
|
305
318
|
assert 'get_variable("MY_VARIABLE")' in result
|
|
306
319
|
|
|
@@ -405,6 +418,13 @@ def test_parse_aggregate_expression():
|
|
|
405
418
|
assert func is None
|
|
406
419
|
assert col is None
|
|
407
420
|
|
|
421
|
+
func, col = parse_aggregate_expression("SUM(A)/COUNT(*)")
|
|
422
|
+
assert func is None, f"Compound expression should not match, got func={func}"
|
|
423
|
+
assert col is None
|
|
424
|
+
|
|
425
|
+
func, col = parse_aggregate_expression("AVG(A+B)")
|
|
426
|
+
assert func is None, f"Expression with operators should not match, got func={func}"
|
|
427
|
+
|
|
408
428
|
print("PASS: test_parse_aggregate_expression")
|
|
409
429
|
|
|
410
430
|
|
|
@@ -459,7 +479,7 @@ def test_generated_joiner_code():
|
|
|
459
479
|
lines = []
|
|
460
480
|
source_dfs = {"SRC_CUST": "df_src_cust", "SRC_ORDER": "df_src_order"}
|
|
461
481
|
input_sources = {"SRC_CUST", "SRC_ORDER"}
|
|
462
|
-
_gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs)
|
|
482
|
+
_gen_joiner_transform(lines, tx, "jnr_cust_order", "df_src_cust", input_sources, source_dfs, connector_graph=None)
|
|
463
483
|
code = "\n".join(lines)
|
|
464
484
|
|
|
465
485
|
assert "merge" in code
|
|
@@ -508,6 +528,254 @@ def test_generated_lookup_code():
|
|
|
508
528
|
print(f"PASS: test_generated_lookup_code")
|
|
509
529
|
|
|
510
530
|
|
|
531
|
+
def test_flatfile_metadata_read():
|
|
532
|
+
from informatica_python.models import (
|
|
533
|
+
MappingDef, FolderDef, SourceDef, TargetDef, FlatFileDef,
|
|
534
|
+
FieldDef, TransformationDef, ConnectorDef, InstanceDef, TableAttribute,
|
|
535
|
+
)
|
|
536
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
537
|
+
|
|
538
|
+
ff = FlatFileDef(
|
|
539
|
+
name="test_file",
|
|
540
|
+
delimiter="|",
|
|
541
|
+
header_lines=1,
|
|
542
|
+
text_qualifier='"',
|
|
543
|
+
skip_rows=2,
|
|
544
|
+
code_page="UTF-8",
|
|
545
|
+
)
|
|
546
|
+
src = SourceDef(
|
|
547
|
+
name="PIPE_SOURCE",
|
|
548
|
+
database_type="Flat File",
|
|
549
|
+
flatfile=ff,
|
|
550
|
+
fields=[FieldDef(name="COL_A", datatype="string"), FieldDef(name="COL_B", datatype="integer")],
|
|
551
|
+
)
|
|
552
|
+
tgt_ff = FlatFileDef(name="tgt_file", delimiter="~")
|
|
553
|
+
tgt = TargetDef(
|
|
554
|
+
name="TILDE_TARGET",
|
|
555
|
+
database_type="Flat File",
|
|
556
|
+
flatfile=tgt_ff,
|
|
557
|
+
fields=[FieldDef(name="COL_A", datatype="string")],
|
|
558
|
+
)
|
|
559
|
+
mapping = MappingDef(
|
|
560
|
+
name="m_flatfile_test",
|
|
561
|
+
transformations=[],
|
|
562
|
+
connectors=[ConnectorDef(from_instance="PIPE_SOURCE", from_field="COL_A",
|
|
563
|
+
from_instance_type="Source Definition",
|
|
564
|
+
to_instance="TILDE_TARGET", to_field="COL_A",
|
|
565
|
+
to_instance_type="Target Definition")],
|
|
566
|
+
instances=[
|
|
567
|
+
InstanceDef(name="PIPE_SOURCE", type="Source Definition", transformation_name="PIPE_SOURCE"),
|
|
568
|
+
InstanceDef(name="TILDE_TARGET", type="Target Definition", transformation_name="TILDE_TARGET"),
|
|
569
|
+
],
|
|
570
|
+
)
|
|
571
|
+
folder = FolderDef(name="test", sources=[src], targets=[tgt], mappings=[mapping])
|
|
572
|
+
code = generate_mapping_code(mapping, folder)
|
|
573
|
+
|
|
574
|
+
assert "ff_cfg_" in code, "Should emit flatfile config dict"
|
|
575
|
+
assert "'delimiter': '|'" in code, "Pipe delimiter should appear"
|
|
576
|
+
assert "'skip_rows': 2" in code, "Skip rows should appear"
|
|
577
|
+
assert "'~'" in code, "Tilde delimiter should appear for target"
|
|
578
|
+
print("PASS: test_flatfile_metadata_read")
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def test_flatfile_fixed_width():
|
|
582
|
+
from informatica_python.models import (
|
|
583
|
+
MappingDef, FolderDef, SourceDef, FlatFileDef,
|
|
584
|
+
FieldDef, InstanceDef, TargetDef, ConnectorDef,
|
|
585
|
+
)
|
|
586
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
587
|
+
|
|
588
|
+
ff = FlatFileDef(name="fw_file", is_fixed_width="YES", header_lines=0)
|
|
589
|
+
src = SourceDef(
|
|
590
|
+
name="FW_SOURCE",
|
|
591
|
+
database_type="Flat File",
|
|
592
|
+
flatfile=ff,
|
|
593
|
+
fields=[FieldDef(name="F1", datatype="string", precision=10),
|
|
594
|
+
FieldDef(name="F2", datatype="string", precision=20)],
|
|
595
|
+
)
|
|
596
|
+
mapping = MappingDef(
|
|
597
|
+
name="m_fw_test",
|
|
598
|
+
transformations=[],
|
|
599
|
+
connectors=[],
|
|
600
|
+
instances=[InstanceDef(name="FW_SOURCE", type="Source Definition", transformation_name="FW_SOURCE")],
|
|
601
|
+
)
|
|
602
|
+
folder = FolderDef(name="test", sources=[src], targets=[], mappings=[mapping])
|
|
603
|
+
code = generate_mapping_code(mapping, folder)
|
|
604
|
+
|
|
605
|
+
assert "read_fwf" in code, "Fixed-width should use pd.read_fwf"
|
|
606
|
+
assert "[10, 20]" in code, "Widths should be derived from field precision"
|
|
607
|
+
print("PASS: test_flatfile_fixed_width")
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def test_normalizer_transform():
|
|
611
|
+
from informatica_python.models import (
|
|
612
|
+
MappingDef, FolderDef, SourceDef, FieldDef,
|
|
613
|
+
TransformationDef, ConnectorDef, InstanceDef, TableAttribute, TargetDef,
|
|
614
|
+
)
|
|
615
|
+
from informatica_python.generators.mapping_gen import _gen_normalizer_transform
|
|
616
|
+
|
|
617
|
+
tx = TransformationDef(
|
|
618
|
+
name="NRM_PHONES",
|
|
619
|
+
type="Normalizer",
|
|
620
|
+
fields=[
|
|
621
|
+
FieldDef(name="CUST_ID", datatype="integer", porttype="INPUT/OUTPUT"),
|
|
622
|
+
FieldDef(name="PHONE1", datatype="string", porttype="INPUT"),
|
|
623
|
+
FieldDef(name="PHONE2", datatype="string", porttype="INPUT"),
|
|
624
|
+
FieldDef(name="PHONE3", datatype="string", porttype="INPUT"),
|
|
625
|
+
FieldDef(name="GK", datatype="integer", porttype="OUTPUT"),
|
|
626
|
+
],
|
|
627
|
+
)
|
|
628
|
+
lines = []
|
|
629
|
+
source_dfs = {}
|
|
630
|
+
_gen_normalizer_transform(lines, tx, "nrm_phones", "df_input", source_dfs)
|
|
631
|
+
code = "\n".join(lines)
|
|
632
|
+
|
|
633
|
+
assert "melt(" in code, "Normalizer should use pd.melt()"
|
|
634
|
+
assert "PHONE1" in code, "Should reference PHONE columns"
|
|
635
|
+
assert "CUST_ID" in code, "Should reference ID column"
|
|
636
|
+
assert "GK" in code, "Should generate GK sequence"
|
|
637
|
+
assert source_dfs["NRM_PHONES"] == "df_nrm_phones"
|
|
638
|
+
print("PASS: test_normalizer_transform")
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def test_rank_with_groupby():
|
|
642
|
+
from informatica_python.models import (
|
|
643
|
+
FieldDef, TransformationDef, TableAttribute,
|
|
644
|
+
)
|
|
645
|
+
from informatica_python.generators.mapping_gen import _gen_rank_transform
|
|
646
|
+
|
|
647
|
+
tx = TransformationDef(
|
|
648
|
+
name="RNK_SALES",
|
|
649
|
+
type="Rank",
|
|
650
|
+
fields=[
|
|
651
|
+
FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
|
|
652
|
+
FieldDef(name="AMOUNT", datatype="decimal", porttype="INPUT", expression="AMOUNT"),
|
|
653
|
+
FieldDef(name="RANKINDEX", datatype="integer", porttype="OUTPUT"),
|
|
654
|
+
],
|
|
655
|
+
attributes=[
|
|
656
|
+
TableAttribute(name="Top/Bottom", value="TOP"),
|
|
657
|
+
TableAttribute(name="Number Of Ranks", value="5"),
|
|
658
|
+
],
|
|
659
|
+
)
|
|
660
|
+
lines = []
|
|
661
|
+
source_dfs = {}
|
|
662
|
+
_gen_rank_transform(lines, tx, "rnk_sales", "df_input", source_dfs)
|
|
663
|
+
code = "\n".join(lines)
|
|
664
|
+
|
|
665
|
+
assert "groupby" in code, "Should use groupby for group-by rank"
|
|
666
|
+
assert "REGION" in code, "Should group by REGION"
|
|
667
|
+
assert "AMOUNT" in code, "Should rank by AMOUNT"
|
|
668
|
+
assert "RANKINDEX" in code, "Should produce RANKINDEX column"
|
|
669
|
+
assert "<= 5" in code, "Should filter top 5"
|
|
670
|
+
assert source_dfs["RNK_SALES"] == "df_rnk_sales"
|
|
671
|
+
print("PASS: test_rank_with_groupby")
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def test_decision_task_if_else():
|
|
675
|
+
from informatica_python.models import (
|
|
676
|
+
FolderDef, WorkflowDef, TaskInstanceDef, WorkflowLink,
|
|
677
|
+
TableAttribute, MappingDef,
|
|
678
|
+
)
|
|
679
|
+
from informatica_python.generators.workflow_gen import generate_workflow_code
|
|
680
|
+
|
|
681
|
+
wf = WorkflowDef(
|
|
682
|
+
name="wf_test_decision",
|
|
683
|
+
task_instances=[
|
|
684
|
+
TaskInstanceDef(name="Start", task_name="Start", task_type="Start Task"),
|
|
685
|
+
TaskInstanceDef(
|
|
686
|
+
name="dec_check_status",
|
|
687
|
+
task_name="dec_check_status",
|
|
688
|
+
task_type="Decision",
|
|
689
|
+
attributes=[TableAttribute(name="Decision Condition", value="$$LOAD_FLAG = TRUE")],
|
|
690
|
+
),
|
|
691
|
+
TaskInstanceDef(name="s_load_data", task_name="s_load_data", task_type="Session"),
|
|
692
|
+
TaskInstanceDef(name="s_skip_load", task_name="s_skip_load", task_type="Session"),
|
|
693
|
+
],
|
|
694
|
+
links=[
|
|
695
|
+
WorkflowLink(from_instance="Start", to_instance="dec_check_status"),
|
|
696
|
+
WorkflowLink(from_instance="dec_check_status", to_instance="s_load_data", condition="$dec_check_status.SUCCEEDED"),
|
|
697
|
+
WorkflowLink(from_instance="dec_check_status", to_instance="s_skip_load", condition="$dec_check_status.FAILED"),
|
|
698
|
+
],
|
|
699
|
+
)
|
|
700
|
+
folder = FolderDef(name="test", workflows=[wf], mappings=[])
|
|
701
|
+
code = generate_workflow_code(folder)
|
|
702
|
+
|
|
703
|
+
assert "decision_dec_check_status" in code, "Should create decision variable"
|
|
704
|
+
assert "if decision_dec_check_status" in code, "Should generate if branch"
|
|
705
|
+
assert "LOAD_FLAG" in code, "Should convert $$LOAD_FLAG"
|
|
706
|
+
assert "True" in code, "Should convert TRUE to Python True"
|
|
707
|
+
print("PASS: test_decision_task_if_else")
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def test_inline_mapplet():
|
|
711
|
+
from informatica_python.models import (
|
|
712
|
+
MappingDef, FolderDef, SourceDef, TargetDef, MappletDef,
|
|
713
|
+
TransformationDef, ConnectorDef, InstanceDef, FieldDef,
|
|
714
|
+
TableAttribute,
|
|
715
|
+
)
|
|
716
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
717
|
+
|
|
718
|
+
mplt = MappletDef(
|
|
719
|
+
name="mplt_clean_name",
|
|
720
|
+
transformations=[
|
|
721
|
+
TransformationDef(
|
|
722
|
+
name="EXP_UPPER",
|
|
723
|
+
type="Expression",
|
|
724
|
+
fields=[
|
|
725
|
+
FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT",
|
|
726
|
+
expression="UPPER(FULL_NAME)"),
|
|
727
|
+
],
|
|
728
|
+
),
|
|
729
|
+
],
|
|
730
|
+
connectors=[],
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
mapping = MappingDef(
|
|
734
|
+
name="m_with_mapplet",
|
|
735
|
+
transformations=[
|
|
736
|
+
TransformationDef(name="SQ_INPUT", type="Source Qualifier",
|
|
737
|
+
fields=[FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT")]),
|
|
738
|
+
],
|
|
739
|
+
connectors=[
|
|
740
|
+
ConnectorDef(from_instance="SRC", from_field="FULL_NAME",
|
|
741
|
+
from_instance_type="Source Definition",
|
|
742
|
+
to_instance="SQ_INPUT", to_field="FULL_NAME",
|
|
743
|
+
to_instance_type="Source Qualifier"),
|
|
744
|
+
ConnectorDef(from_instance="SQ_INPUT", from_field="FULL_NAME",
|
|
745
|
+
from_instance_type="Source Qualifier",
|
|
746
|
+
to_instance="MPLT_INST", to_field="FULL_NAME",
|
|
747
|
+
to_instance_type="Mapplet"),
|
|
748
|
+
ConnectorDef(from_instance="MPLT_INST", from_field="FULL_NAME",
|
|
749
|
+
from_instance_type="Mapplet",
|
|
750
|
+
to_instance="TGT", to_field="FULL_NAME",
|
|
751
|
+
to_instance_type="Target Definition"),
|
|
752
|
+
],
|
|
753
|
+
instances=[
|
|
754
|
+
InstanceDef(name="SRC", type="Source Definition", transformation_name="SRC"),
|
|
755
|
+
InstanceDef(name="SQ_INPUT", type="Source Qualifier"),
|
|
756
|
+
InstanceDef(name="MPLT_INST", type="Mapplet", transformation_name="mplt_clean_name",
|
|
757
|
+
transformation_type="Mapplet"),
|
|
758
|
+
InstanceDef(name="TGT", type="Target Definition", transformation_name="TGT"),
|
|
759
|
+
],
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
src = SourceDef(name="SRC", fields=[FieldDef(name="FULL_NAME", datatype="string")])
|
|
763
|
+
tgt = TargetDef(name="TGT", fields=[FieldDef(name="FULL_NAME", datatype="string")])
|
|
764
|
+
folder = FolderDef(
|
|
765
|
+
name="test",
|
|
766
|
+
sources=[src],
|
|
767
|
+
targets=[tgt],
|
|
768
|
+
mappings=[mapping],
|
|
769
|
+
mapplets=[mplt],
|
|
770
|
+
)
|
|
771
|
+
code = generate_mapping_code(mapping, folder)
|
|
772
|
+
|
|
773
|
+
assert "MPLT_INST__EXP_UPPER" in code or "mplt_inst__exp_upper" in code, \
|
|
774
|
+
"Inlined mapplet transform should appear with prefix"
|
|
775
|
+
assert "UPPER" in code, "UPPER expression from mapplet should be present"
|
|
776
|
+
print("PASS: test_inline_mapplet")
|
|
777
|
+
|
|
778
|
+
|
|
511
779
|
if __name__ == "__main__":
|
|
512
780
|
print("=" * 60)
|
|
513
781
|
print("Running informatica-python tests")
|
|
@@ -531,6 +799,12 @@ if __name__ == "__main__":
|
|
|
531
799
|
test_generated_aggregator_code,
|
|
532
800
|
test_generated_joiner_code,
|
|
533
801
|
test_generated_lookup_code,
|
|
802
|
+
test_flatfile_metadata_read,
|
|
803
|
+
test_flatfile_fixed_width,
|
|
804
|
+
test_normalizer_transform,
|
|
805
|
+
test_rank_with_groupby,
|
|
806
|
+
test_decision_task_if_else,
|
|
807
|
+
test_inline_mapplet,
|
|
534
808
|
]
|
|
535
809
|
|
|
536
810
|
passed = 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/error_log_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/helper_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/generators/sql_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.2.0 → informatica_python-1.3.0}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|