informatica-python 1.2.1__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.2.1 → informatica_python-1.3.1}/PKG-INFO +1 -1
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/helper_gen.py +19 -4
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/mapping_gen.py +368 -13
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/workflow_gen.py +57 -3
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.2.1 → informatica_python-1.3.1}/pyproject.toml +1 -1
- {informatica_python-1.2.1 → informatica_python-1.3.1}/tests/test_converter.py +254 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/README.md +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/__init__.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/cli.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/converter.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/models.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/parser.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/utils/expression_converter.py +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.2.1 → informatica_python-1.3.1}/setup.cfg +0 -0
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -208,7 +208,11 @@ def _add_file_functions(lines, data_lib):
|
|
|
208
208
|
lines.append(" delimiter = file_config.get('delimiter', ',')")
|
|
209
209
|
lines.append(" header = file_config.get('header', True)")
|
|
210
210
|
lines.append(" encoding = file_config.get('encoding', 'utf-8')")
|
|
211
|
-
lines.append(" header_row = 0 if header else None")
|
|
211
|
+
lines.append(" header_row = file_config.get('header_row', 0 if header else None)")
|
|
212
|
+
lines.append(" skip_rows = file_config.get('skip_rows', 0)")
|
|
213
|
+
lines.append(" quotechar = file_config.get('quotechar', '\"')")
|
|
214
|
+
lines.append(" escapechar = file_config.get('escapechar', None)")
|
|
215
|
+
lines.append(" lineterminator = file_config.get('lineterminator', None)")
|
|
212
216
|
lines.append("")
|
|
213
217
|
lines.append(" logger.info(f'Reading file: {file_path} (ext={ext})')")
|
|
214
218
|
lines.append("")
|
|
@@ -245,7 +249,17 @@ def _add_file_functions(lines, data_lib):
|
|
|
245
249
|
lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row)")
|
|
246
250
|
else:
|
|
247
251
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
248
|
-
lines.append("
|
|
252
|
+
lines.append(" csv_kwargs = dict(sep=delimiter, header=header_row, encoding=encoding,")
|
|
253
|
+
lines.append(" quotechar=quotechar, escapechar=escapechar)")
|
|
254
|
+
lines.append(" if skip_rows:")
|
|
255
|
+
lines.append(" csv_kwargs['skiprows'] = skip_rows")
|
|
256
|
+
lines.append(" if lineterminator:")
|
|
257
|
+
lines.append(" csv_kwargs['lineterminator'] = lineterminator")
|
|
258
|
+
lines.append(" df = pd.read_csv(file_path, **csv_kwargs)")
|
|
259
|
+
lines.append(" if file_config.get('strip_trailing_blanks'):")
|
|
260
|
+
lines.append(" str_cols = df.select_dtypes(include=['object']).columns")
|
|
261
|
+
lines.append(" df[str_cols] = df[str_cols].apply(lambda c: c.str.rstrip())")
|
|
262
|
+
lines.append(" return df")
|
|
249
263
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
250
264
|
lines.append(" return pd.read_excel(file_path, header=header_row)")
|
|
251
265
|
lines.append(" elif ext == '.xml':")
|
|
@@ -271,6 +285,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
271
285
|
lines.append(" delimiter = file_config.get('delimiter', ',')")
|
|
272
286
|
lines.append(" header = file_config.get('header', True)")
|
|
273
287
|
lines.append(" encoding = file_config.get('encoding', 'utf-8')")
|
|
288
|
+
lines.append(" quotechar = file_config.get('quotechar', '\"')")
|
|
274
289
|
lines.append("")
|
|
275
290
|
lines.append(" os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)")
|
|
276
291
|
lines.append(" logger.info(f'Writing file: {file_path}')")
|
|
@@ -290,7 +305,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
290
305
|
lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
|
|
291
306
|
elif data_lib == "dask":
|
|
292
307
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
293
|
-
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
|
|
308
|
+
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
|
|
294
309
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
295
310
|
lines.append(" df.compute().to_excel(file_path, header=header, index=False)")
|
|
296
311
|
lines.append(" elif ext == '.json':")
|
|
@@ -301,7 +316,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
301
316
|
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False)")
|
|
302
317
|
else:
|
|
303
318
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
304
|
-
lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
|
|
319
|
+
lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
|
|
305
320
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
306
321
|
lines.append(" df.to_excel(file_path, header=header, index=False)")
|
|
307
322
|
lines.append(" elif ext == '.json':")
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List, Dict
|
|
2
2
|
from informatica_python.models import (
|
|
3
3
|
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
4
|
-
TransformationDef, ConnectorDef, InstanceDef,
|
|
4
|
+
TransformationDef, ConnectorDef, InstanceDef, MappletDef,
|
|
5
5
|
)
|
|
6
6
|
from informatica_python.utils.expression_converter import (
|
|
7
7
|
convert_expression, convert_sql_expression,
|
|
@@ -11,6 +11,130 @@ from informatica_python.utils.expression_converter import (
|
|
|
11
11
|
from informatica_python.utils.datatype_map import get_python_type
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _inline_mapplets(mapping, folder):
|
|
15
|
+
mapplet_map = {m.name: m for m in folder.mapplets}
|
|
16
|
+
extra_transforms = []
|
|
17
|
+
extra_connectors = []
|
|
18
|
+
mapplet_instances = set()
|
|
19
|
+
|
|
20
|
+
for inst in mapping.instances:
|
|
21
|
+
if inst.type == "Mapplet" or (inst.transformation_type or "").lower() == "mapplet":
|
|
22
|
+
mapplet_name = inst.transformation_name or inst.name
|
|
23
|
+
mapplet = mapplet_map.get(mapplet_name)
|
|
24
|
+
if not mapplet:
|
|
25
|
+
continue
|
|
26
|
+
mapplet_instances.add(inst.name)
|
|
27
|
+
prefix = inst.name
|
|
28
|
+
|
|
29
|
+
for tx in mapplet.transformations:
|
|
30
|
+
inlined = TransformationDef(
|
|
31
|
+
name=f"{prefix}__{tx.name}",
|
|
32
|
+
type=tx.type,
|
|
33
|
+
description=tx.description,
|
|
34
|
+
reusable=tx.reusable,
|
|
35
|
+
fields=list(tx.fields),
|
|
36
|
+
attributes=list(tx.attributes),
|
|
37
|
+
groups=list(tx.groups),
|
|
38
|
+
metadata_extensions=list(tx.metadata_extensions),
|
|
39
|
+
)
|
|
40
|
+
extra_transforms.append(inlined)
|
|
41
|
+
|
|
42
|
+
for conn in mapplet.connectors:
|
|
43
|
+
from informatica_python.models import ConnectorDef
|
|
44
|
+
new_from = f"{prefix}__{conn.from_instance}" if conn.from_instance in {t.name for t in mapplet.transformations} else conn.from_instance
|
|
45
|
+
new_to = f"{prefix}__{conn.to_instance}" if conn.to_instance in {t.name for t in mapplet.transformations} else conn.to_instance
|
|
46
|
+
inlined_conn = ConnectorDef(
|
|
47
|
+
from_instance=new_from,
|
|
48
|
+
from_field=conn.from_field,
|
|
49
|
+
from_instance_type=conn.from_instance_type,
|
|
50
|
+
to_instance=new_to,
|
|
51
|
+
to_field=conn.to_field,
|
|
52
|
+
to_instance_type=conn.to_instance_type,
|
|
53
|
+
)
|
|
54
|
+
extra_connectors.append(inlined_conn)
|
|
55
|
+
|
|
56
|
+
rewired_connectors = []
|
|
57
|
+
mapplet_internal_names = set()
|
|
58
|
+
for inst_name in mapplet_instances:
|
|
59
|
+
mapplet_name = None
|
|
60
|
+
for inst in mapping.instances:
|
|
61
|
+
if inst.name == inst_name:
|
|
62
|
+
mapplet_name = inst.transformation_name or inst.name
|
|
63
|
+
break
|
|
64
|
+
mapplet = mapplet_map.get(mapplet_name) if mapplet_name else None
|
|
65
|
+
if mapplet:
|
|
66
|
+
for tx in mapplet.transformations:
|
|
67
|
+
mapplet_internal_names.add(f"{inst_name}__{tx.name}")
|
|
68
|
+
|
|
69
|
+
for conn in mapping.connectors:
|
|
70
|
+
if conn.to_instance in mapplet_instances:
|
|
71
|
+
first_tx = None
|
|
72
|
+
for ec in extra_connectors:
|
|
73
|
+
if ec.from_instance == conn.to_instance or ec.to_instance.startswith(f"{conn.to_instance}__"):
|
|
74
|
+
for et in extra_transforms:
|
|
75
|
+
if et.name.startswith(f"{conn.to_instance}__"):
|
|
76
|
+
has_input = any(
|
|
77
|
+
"INPUT" in (f.porttype or "").upper()
|
|
78
|
+
for f in et.fields
|
|
79
|
+
if f.name == conn.to_field
|
|
80
|
+
)
|
|
81
|
+
if has_input:
|
|
82
|
+
first_tx = et.name
|
|
83
|
+
break
|
|
84
|
+
if first_tx:
|
|
85
|
+
break
|
|
86
|
+
if not first_tx:
|
|
87
|
+
for et in extra_transforms:
|
|
88
|
+
if et.name.startswith(f"{conn.to_instance}__"):
|
|
89
|
+
first_tx = et.name
|
|
90
|
+
break
|
|
91
|
+
if first_tx:
|
|
92
|
+
from informatica_python.models import ConnectorDef
|
|
93
|
+
rewired_connectors.append(ConnectorDef(
|
|
94
|
+
from_instance=conn.from_instance,
|
|
95
|
+
from_field=conn.from_field,
|
|
96
|
+
from_instance_type=conn.from_instance_type,
|
|
97
|
+
to_instance=first_tx,
|
|
98
|
+
to_field=conn.to_field,
|
|
99
|
+
to_instance_type=conn.to_instance_type,
|
|
100
|
+
))
|
|
101
|
+
else:
|
|
102
|
+
rewired_connectors.append(conn)
|
|
103
|
+
elif conn.from_instance in mapplet_instances:
|
|
104
|
+
last_tx = None
|
|
105
|
+
for et in reversed(extra_transforms):
|
|
106
|
+
if et.name.startswith(f"{conn.from_instance}__"):
|
|
107
|
+
has_output = any(
|
|
108
|
+
"OUTPUT" in (f.porttype or "").upper()
|
|
109
|
+
for f in et.fields
|
|
110
|
+
if f.name == conn.from_field
|
|
111
|
+
)
|
|
112
|
+
if has_output:
|
|
113
|
+
last_tx = et.name
|
|
114
|
+
break
|
|
115
|
+
if not last_tx:
|
|
116
|
+
for et in reversed(extra_transforms):
|
|
117
|
+
if et.name.startswith(f"{conn.from_instance}__"):
|
|
118
|
+
last_tx = et.name
|
|
119
|
+
break
|
|
120
|
+
if last_tx:
|
|
121
|
+
from informatica_python.models import ConnectorDef
|
|
122
|
+
rewired_connectors.append(ConnectorDef(
|
|
123
|
+
from_instance=last_tx,
|
|
124
|
+
from_field=conn.from_field,
|
|
125
|
+
from_instance_type=conn.from_instance_type,
|
|
126
|
+
to_instance=conn.to_instance,
|
|
127
|
+
to_field=conn.to_field,
|
|
128
|
+
to_instance_type=conn.to_instance_type,
|
|
129
|
+
))
|
|
130
|
+
else:
|
|
131
|
+
rewired_connectors.append(conn)
|
|
132
|
+
else:
|
|
133
|
+
rewired_connectors.append(conn)
|
|
134
|
+
|
|
135
|
+
return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
|
|
136
|
+
|
|
137
|
+
|
|
14
138
|
def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
15
139
|
data_lib: str = "pandas", mapping_index: int = 1) -> str:
|
|
16
140
|
lines = []
|
|
@@ -24,10 +148,21 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
24
148
|
lines.append("")
|
|
25
149
|
lines.append("")
|
|
26
150
|
|
|
151
|
+
inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
|
|
152
|
+
|
|
153
|
+
all_transforms = list(mapping.transformations) + inlined_transforms
|
|
154
|
+
if mapplet_instance_names:
|
|
155
|
+
kept_originals = [c for c in mapping.connectors
|
|
156
|
+
if c.from_instance not in mapplet_instance_names
|
|
157
|
+
and c.to_instance not in mapplet_instance_names]
|
|
158
|
+
all_connectors = kept_originals + inlined_connectors
|
|
159
|
+
else:
|
|
160
|
+
all_connectors = list(mapping.connectors)
|
|
161
|
+
|
|
27
162
|
source_map = _build_source_map(mapping, folder)
|
|
28
163
|
target_map = _build_target_map(mapping, folder)
|
|
29
|
-
transform_map = {t.name: t for t in
|
|
30
|
-
connector_graph = _build_connector_graph(
|
|
164
|
+
transform_map = {t.name: t for t in all_transforms}
|
|
165
|
+
connector_graph = _build_connector_graph(all_connectors)
|
|
31
166
|
instance_map = {i.name: i for i in mapping.instances}
|
|
32
167
|
|
|
33
168
|
lines.append(f"def run_{_safe_name(mapping.name)}(config):")
|
|
@@ -50,7 +185,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
50
185
|
safe = _safe_name(src_name)
|
|
51
186
|
source_dfs[src_name] = f"df_{safe}"
|
|
52
187
|
|
|
53
|
-
sq_transforms = [t for t in
|
|
188
|
+
sq_transforms = [t for t in all_transforms
|
|
54
189
|
if t.type in ("Source Qualifier", "Application Source Qualifier")]
|
|
55
190
|
if sq_transforms:
|
|
56
191
|
for sq in sq_transforms:
|
|
@@ -63,12 +198,14 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
63
198
|
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
64
199
|
schema = src_def.owner_name or "dbo"
|
|
65
200
|
lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
|
|
201
|
+
elif src_def.flatfile:
|
|
202
|
+
_emit_flatfile_read(lines, safe, src_def)
|
|
66
203
|
else:
|
|
67
204
|
lines.append(f" df_{safe} = read_file(config.get('sources', {{}}).get('{src_name}', {{}}).get('file_path', '{src_name}'),")
|
|
68
205
|
lines.append(f" config.get('sources', {{}}).get('{src_name}', {{}}))")
|
|
69
206
|
lines.append("")
|
|
70
207
|
|
|
71
|
-
processing_order = _get_processing_order(
|
|
208
|
+
processing_order = _get_processing_order(all_transforms, connector_graph, sq_transforms)
|
|
72
209
|
|
|
73
210
|
for tx in processing_order:
|
|
74
211
|
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
@@ -99,6 +236,107 @@ def _safe_name(name):
|
|
|
99
236
|
return safe.lower()
|
|
100
237
|
|
|
101
238
|
|
|
239
|
+
def _flatfile_config_dict(ff):
|
|
240
|
+
cfg = {}
|
|
241
|
+
if not ff:
|
|
242
|
+
return cfg
|
|
243
|
+
if ff.delimiter and ff.delimiter != ",":
|
|
244
|
+
d = ff.delimiter
|
|
245
|
+
DELIMITER_MAP = {
|
|
246
|
+
"COMMA": ",", "TAB": "\\t", "PIPE": "|", "SEMICOLON": ";",
|
|
247
|
+
"SPACE": " ", "TILDE": "~", "CARET": "^",
|
|
248
|
+
}
|
|
249
|
+
d = DELIMITER_MAP.get(d.upper(), d)
|
|
250
|
+
cfg["delimiter"] = d
|
|
251
|
+
if ff.is_fixed_width == "YES":
|
|
252
|
+
cfg["fixed_width"] = True
|
|
253
|
+
if ff.header_lines:
|
|
254
|
+
cfg["header_lines"] = ff.header_lines
|
|
255
|
+
if ff.skip_rows:
|
|
256
|
+
cfg["skip_rows"] = ff.skip_rows
|
|
257
|
+
if ff.text_qualifier:
|
|
258
|
+
cfg["quotechar"] = ff.text_qualifier
|
|
259
|
+
if ff.escape_character:
|
|
260
|
+
cfg["escapechar"] = ff.escape_character
|
|
261
|
+
if ff.strip_trailing_blanks == "YES":
|
|
262
|
+
cfg["strip_trailing_blanks"] = True
|
|
263
|
+
if ff.code_page:
|
|
264
|
+
cfg["encoding"] = ff.code_page
|
|
265
|
+
if ff.row_delimiter:
|
|
266
|
+
cfg["lineterminator"] = ff.row_delimiter
|
|
267
|
+
return cfg
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _emit_flatfile_read(lines, var_name, src_def, indent=" "):
|
|
271
|
+
ff = src_def.flatfile
|
|
272
|
+
fc = _flatfile_config_dict(ff)
|
|
273
|
+
if fc.get("fixed_width"):
|
|
274
|
+
widths = []
|
|
275
|
+
for fld in src_def.fields:
|
|
276
|
+
widths.append(fld.precision if fld.precision else 10)
|
|
277
|
+
lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
|
|
278
|
+
lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
279
|
+
lines.append(f"{indent} widths={widths},")
|
|
280
|
+
hdr = fc.get("header_lines", 0)
|
|
281
|
+
if hdr:
|
|
282
|
+
lines.append(f"{indent} header={hdr - 1},")
|
|
283
|
+
else:
|
|
284
|
+
lines.append(f"{indent} header=None,")
|
|
285
|
+
skip = fc.get("skip_rows", 0)
|
|
286
|
+
if skip:
|
|
287
|
+
lines.append(f"{indent} skiprows={skip},")
|
|
288
|
+
lines.append(f"{indent})")
|
|
289
|
+
return
|
|
290
|
+
|
|
291
|
+
file_cfg = {}
|
|
292
|
+
if "delimiter" in fc:
|
|
293
|
+
file_cfg["delimiter"] = fc["delimiter"]
|
|
294
|
+
if "quotechar" in fc:
|
|
295
|
+
file_cfg["quotechar"] = fc["quotechar"]
|
|
296
|
+
if "escapechar" in fc:
|
|
297
|
+
file_cfg["escapechar"] = fc["escapechar"]
|
|
298
|
+
if "encoding" in fc:
|
|
299
|
+
file_cfg["encoding"] = fc["encoding"]
|
|
300
|
+
if "lineterminator" in fc:
|
|
301
|
+
file_cfg["lineterminator"] = fc["lineterminator"]
|
|
302
|
+
hdr = fc.get("header_lines", 0)
|
|
303
|
+
if hdr:
|
|
304
|
+
file_cfg["header"] = True
|
|
305
|
+
file_cfg["header_row"] = hdr - 1
|
|
306
|
+
if fc.get("skip_rows"):
|
|
307
|
+
file_cfg["skip_rows"] = fc["skip_rows"]
|
|
308
|
+
if fc.get("strip_trailing_blanks"):
|
|
309
|
+
file_cfg["strip_trailing_blanks"] = True
|
|
310
|
+
|
|
311
|
+
if file_cfg:
|
|
312
|
+
lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
|
|
313
|
+
lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
314
|
+
lines.append(f"{indent}df_{var_name} = read_file(ff_cfg_{var_name}.get('file_path', '{src_def.name}'), ff_cfg_{var_name})")
|
|
315
|
+
else:
|
|
316
|
+
lines.append(f"{indent}df_{var_name} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
317
|
+
lines.append(f"{indent} config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _emit_flatfile_write(lines, var_name, tgt_def, indent=" "):
|
|
321
|
+
ff = tgt_def.flatfile
|
|
322
|
+
fc = _flatfile_config_dict(ff)
|
|
323
|
+
file_cfg = {}
|
|
324
|
+
if "delimiter" in fc:
|
|
325
|
+
file_cfg["delimiter"] = fc["delimiter"]
|
|
326
|
+
if "quotechar" in fc:
|
|
327
|
+
file_cfg["quotechar"] = fc["quotechar"]
|
|
328
|
+
if "encoding" in fc:
|
|
329
|
+
file_cfg["encoding"] = fc["encoding"]
|
|
330
|
+
|
|
331
|
+
if file_cfg:
|
|
332
|
+
lines.append(f"{indent}ff_cfg_{var_name} = {repr(file_cfg)}")
|
|
333
|
+
lines.append(f"{indent}ff_cfg_{var_name}.update(config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
334
|
+
lines.append(f"{indent}write_file(df_target_{var_name}, ff_cfg_{var_name}.get('file_path', '{tgt_def.name}'), ff_cfg_{var_name})")
|
|
335
|
+
else:
|
|
336
|
+
lines.append(f"{indent}write_file(df_target_{var_name}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
337
|
+
lines.append(f"{indent} config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
338
|
+
|
|
339
|
+
|
|
102
340
|
def _build_source_map(mapping, folder):
|
|
103
341
|
source_map = {}
|
|
104
342
|
for inst in mapping.instances:
|
|
@@ -221,6 +459,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
221
459
|
schema = src_def.owner_name or "dbo"
|
|
222
460
|
cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
|
|
223
461
|
lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
462
|
+
elif src_def.flatfile:
|
|
463
|
+
_emit_flatfile_read(lines, sq_safe, src_def)
|
|
224
464
|
else:
|
|
225
465
|
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
226
466
|
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
@@ -232,6 +472,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
232
472
|
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
233
473
|
schema = src_def.owner_name or "dbo"
|
|
234
474
|
lines.append(f" df_{safe_src} = read_from_db(config, 'SELECT * FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
475
|
+
elif src_def.flatfile:
|
|
476
|
+
_emit_flatfile_read(lines, safe_src, src_def)
|
|
235
477
|
else:
|
|
236
478
|
lines.append(f" df_{safe_src} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
237
479
|
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
@@ -652,26 +894,137 @@ def _gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
652
894
|
|
|
653
895
|
|
|
654
896
|
def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
655
|
-
|
|
656
|
-
|
|
897
|
+
input_ports = []
|
|
898
|
+
output_ports = []
|
|
899
|
+
occurs_cols = []
|
|
900
|
+
id_cols = []
|
|
901
|
+
|
|
902
|
+
for fld in tx.fields:
|
|
903
|
+
pt = (fld.porttype or "").upper()
|
|
904
|
+
if "INPUT" in pt:
|
|
905
|
+
input_ports.append(fld)
|
|
906
|
+
if "OUTPUT" in pt:
|
|
907
|
+
output_ports.append(fld)
|
|
908
|
+
|
|
909
|
+
for fld in tx.fields:
|
|
910
|
+
if fld.field_number > 0:
|
|
911
|
+
occurs_cols.append(fld.name)
|
|
912
|
+
|
|
913
|
+
if not occurs_cols:
|
|
914
|
+
import re
|
|
915
|
+
base_groups = {}
|
|
916
|
+
for fld in input_ports:
|
|
917
|
+
m = re.match(r'^(.+?)(\d+)$', fld.name)
|
|
918
|
+
if m:
|
|
919
|
+
base = m.group(1)
|
|
920
|
+
idx = int(m.group(2))
|
|
921
|
+
if base not in base_groups:
|
|
922
|
+
base_groups[base] = []
|
|
923
|
+
base_groups[base].append(fld.name)
|
|
924
|
+
else:
|
|
925
|
+
id_cols.append(fld.name)
|
|
926
|
+
|
|
927
|
+
if base_groups:
|
|
928
|
+
longest_group = max(base_groups.values(), key=len)
|
|
929
|
+
occurs_cols = longest_group
|
|
930
|
+
id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
|
|
931
|
+
else:
|
|
932
|
+
for fld in input_ports:
|
|
933
|
+
pt = (fld.porttype or "").upper()
|
|
934
|
+
if "INPUT" in pt and "OUTPUT" in pt:
|
|
935
|
+
id_cols.append(fld.name)
|
|
936
|
+
elif "INPUT" in pt and "OUTPUT" not in pt:
|
|
937
|
+
occurs_cols.append(fld.name)
|
|
938
|
+
|
|
939
|
+
if not id_cols:
|
|
940
|
+
id_cols = [f.name for f in input_ports if f.name not in occurs_cols]
|
|
941
|
+
|
|
942
|
+
gk_field = None
|
|
943
|
+
for fld in output_ports:
|
|
944
|
+
if "GK" in fld.name.upper() or "GENERATED" in fld.name.upper() or "KEY" in fld.name.upper():
|
|
945
|
+
gk_field = fld.name
|
|
946
|
+
break
|
|
947
|
+
|
|
948
|
+
lines.append(f" # Normalizer: unpivot repeated columns into rows")
|
|
949
|
+
if occurs_cols and id_cols:
|
|
950
|
+
lines.append(f" df_{tx_safe} = {input_df}.melt(")
|
|
951
|
+
lines.append(f" id_vars={id_cols},")
|
|
952
|
+
lines.append(f" value_vars={occurs_cols},")
|
|
953
|
+
lines.append(f" var_name='_norm_variable',")
|
|
954
|
+
lines.append(f" value_name='_norm_value'")
|
|
955
|
+
lines.append(f" )")
|
|
956
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}.dropna(subset=['_norm_value']).reset_index(drop=True)")
|
|
957
|
+
elif occurs_cols:
|
|
958
|
+
lines.append(f" df_{tx_safe} = {input_df}[{occurs_cols}].stack().reset_index(drop=True).to_frame('_norm_value')")
|
|
959
|
+
else:
|
|
960
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
961
|
+
|
|
962
|
+
if gk_field:
|
|
963
|
+
lines.append(f" df_{tx_safe}['{gk_field}'] = range(1, len(df_{tx_safe}) + 1)")
|
|
964
|
+
|
|
657
965
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
658
966
|
|
|
659
967
|
|
|
660
968
|
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
661
969
|
rank_port = None
|
|
662
|
-
|
|
970
|
+
group_by_ports = []
|
|
971
|
+
top_bottom = "TOP"
|
|
972
|
+
top_n = 0
|
|
973
|
+
|
|
663
974
|
for fld in tx.fields:
|
|
664
|
-
|
|
975
|
+
pt = (fld.porttype or "").upper()
|
|
976
|
+
if "INPUT" in pt and "OUTPUT" in pt:
|
|
977
|
+
group_by_ports.append(fld.name)
|
|
978
|
+
|
|
979
|
+
for fld in tx.fields:
|
|
980
|
+
if fld.expression and fld.expression.strip() and fld.name.upper() not in ("RANKINDEX",):
|
|
665
981
|
rank_port = fld.name
|
|
666
|
-
|
|
982
|
+
break
|
|
983
|
+
if not rank_port:
|
|
984
|
+
for fld in tx.fields:
|
|
985
|
+
if fld.name.upper() == "RANKINDEX":
|
|
986
|
+
continue
|
|
987
|
+
pt = (fld.porttype or "").upper()
|
|
988
|
+
if "INPUT" in pt and "OUTPUT" not in pt:
|
|
989
|
+
rank_port = fld.name
|
|
990
|
+
break
|
|
991
|
+
|
|
667
992
|
for attr in tx.attributes:
|
|
668
993
|
if attr.name == "Top/Bottom":
|
|
669
994
|
top_bottom = attr.value
|
|
995
|
+
elif attr.name == "Number Of Ranks":
|
|
996
|
+
try:
|
|
997
|
+
top_n = int(attr.value)
|
|
998
|
+
except (ValueError, TypeError):
|
|
999
|
+
top_n = 0
|
|
1000
|
+
|
|
1001
|
+
ascending = top_bottom.upper() != "TOP"
|
|
1002
|
+
|
|
1003
|
+
rank_out_field = "RANKINDEX"
|
|
1004
|
+
for fld in tx.fields:
|
|
1005
|
+
if fld.name.upper() == "RANKINDEX" or "RANK" in fld.name.upper():
|
|
1006
|
+
pt = (fld.porttype or "").upper()
|
|
1007
|
+
if "OUTPUT" in pt and "INPUT" not in pt:
|
|
1008
|
+
rank_out_field = fld.name
|
|
1009
|
+
break
|
|
670
1010
|
|
|
671
1011
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
672
|
-
if rank_port:
|
|
673
|
-
|
|
674
|
-
lines.append(f"
|
|
1012
|
+
if rank_port and group_by_ports:
|
|
1013
|
+
lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
|
|
1014
|
+
lines.append(f" _rank_vals = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
|
|
1015
|
+
lines.append(f" method='min', ascending={ascending}")
|
|
1016
|
+
lines.append(f" )")
|
|
1017
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1018
|
+
if top_n:
|
|
1019
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1020
|
+
elif rank_port:
|
|
1021
|
+
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1022
|
+
lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
|
|
1023
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1024
|
+
if top_n:
|
|
1025
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1026
|
+
else:
|
|
1027
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = range(1, len(df_{tx_safe}) + 1)")
|
|
675
1028
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
676
1029
|
|
|
677
1030
|
|
|
@@ -782,6 +1135,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
782
1135
|
|
|
783
1136
|
if tgt_def.database_type and tgt_def.database_type != "Flat File":
|
|
784
1137
|
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
|
|
1138
|
+
elif tgt_def.flatfile:
|
|
1139
|
+
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
785
1140
|
else:
|
|
786
1141
|
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
787
1142
|
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/workflow_gen.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from informatica_python.models import FolderDef, WorkflowDef, TaskInstanceDef
|
|
2
|
+
from informatica_python.utils.expression_converter import convert_expression
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def generate_workflow_code(folder: FolderDef) -> str:
|
|
@@ -127,14 +128,49 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
|
|
|
127
128
|
lines.append("")
|
|
128
129
|
|
|
129
130
|
elif task.task_type == "Decision":
|
|
130
|
-
lines.append(f" # Decision Task: {task.name}")
|
|
131
131
|
decision_cond = ""
|
|
132
|
+
decision_name = ""
|
|
132
133
|
for attr in task.attributes:
|
|
133
134
|
if attr.name == "Decision Condition":
|
|
134
135
|
decision_cond = attr.value
|
|
136
|
+
elif attr.name == "Decision Name":
|
|
137
|
+
decision_name = attr.value
|
|
138
|
+
|
|
139
|
+
lines.append(f" # Decision Task: {task.name}")
|
|
135
140
|
if decision_cond:
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
py_cond = _convert_decision_condition(decision_cond)
|
|
142
|
+
lines.append(f" # Original condition: {decision_cond}")
|
|
143
|
+
lines.append(f" decision_{task_safe} = {py_cond}")
|
|
144
|
+
lines.append(f" logger.info(f'Decision {task.name}: {{decision_{task_safe}}}')")
|
|
145
|
+
|
|
146
|
+
succ_targets = []
|
|
147
|
+
fail_targets = []
|
|
148
|
+
for link in wf.links:
|
|
149
|
+
if link.from_instance == task.name:
|
|
150
|
+
cond_text = (link.condition or "").strip().upper()
|
|
151
|
+
if "FAILED" in cond_text or "FALSE" in cond_text:
|
|
152
|
+
fail_targets.append(link.to_instance)
|
|
153
|
+
elif "SUCCEEDED" in cond_text or "TRUE" in cond_text or cond_text:
|
|
154
|
+
succ_targets.append(link.to_instance)
|
|
155
|
+
else:
|
|
156
|
+
succ_targets.append(link.to_instance)
|
|
157
|
+
|
|
158
|
+
if succ_targets or fail_targets:
|
|
159
|
+
lines.append(f" if decision_{task_safe}:")
|
|
160
|
+
if succ_targets:
|
|
161
|
+
for t in succ_targets:
|
|
162
|
+
lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
|
|
163
|
+
else:
|
|
164
|
+
lines.append(f" pass")
|
|
165
|
+
if fail_targets:
|
|
166
|
+
lines.append(f" else:")
|
|
167
|
+
for t in fail_targets:
|
|
168
|
+
lines.append(f" logger.info('Decision {task.name} => proceeding to {t}')")
|
|
169
|
+
else:
|
|
170
|
+
lines.append(f" if not decision_{task_safe}:")
|
|
171
|
+
lines.append(f" logger.warning('Decision {task.name} evaluated to False')")
|
|
172
|
+
else:
|
|
173
|
+
lines.append(f" logger.info('Decision task: {task.name} (no condition specified)')")
|
|
138
174
|
lines.append("")
|
|
139
175
|
|
|
140
176
|
elif task.task_type == "Timer":
|
|
@@ -226,6 +262,24 @@ def _get_task_execution_order(wf: WorkflowDef):
|
|
|
226
262
|
return ordered
|
|
227
263
|
|
|
228
264
|
|
|
265
|
+
def _convert_decision_condition(condition):
|
|
266
|
+
import re
|
|
267
|
+
cond = condition.strip()
|
|
268
|
+
cond = re.sub(r'\$\$(\w+)', r'\1', cond)
|
|
269
|
+
cond = re.sub(r'\$(\w+)\.(\w+)\.(Status|PrevTaskStatus)', r"'\2_status'", cond)
|
|
270
|
+
cond = re.sub(r'\bSUCCEEDED\b', "'SUCCEEDED'", cond, flags=re.IGNORECASE)
|
|
271
|
+
cond = re.sub(r'\bFAILED\b', "'FAILED'", cond, flags=re.IGNORECASE)
|
|
272
|
+
cond = re.sub(r'\bABORTED\b', "'ABORTED'", cond, flags=re.IGNORECASE)
|
|
273
|
+
cond = re.sub(r'\bAND\b', 'and', cond, flags=re.IGNORECASE)
|
|
274
|
+
cond = re.sub(r'\bOR\b', 'or', cond, flags=re.IGNORECASE)
|
|
275
|
+
cond = re.sub(r'\bNOT\b', 'not', cond, flags=re.IGNORECASE)
|
|
276
|
+
cond = re.sub(r'\bTRUE\b', 'True', cond, flags=re.IGNORECASE)
|
|
277
|
+
cond = re.sub(r'\bFALSE\b', 'False', cond, flags=re.IGNORECASE)
|
|
278
|
+
cond = re.sub(r'(?<!=)=(?!=)', '==', cond)
|
|
279
|
+
cond = cond.replace('<>', '!=')
|
|
280
|
+
return cond
|
|
281
|
+
|
|
282
|
+
|
|
229
283
|
def _safe_name(name):
|
|
230
284
|
import re
|
|
231
285
|
safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
@@ -528,6 +528,254 @@ def test_generated_lookup_code():
|
|
|
528
528
|
print(f"PASS: test_generated_lookup_code")
|
|
529
529
|
|
|
530
530
|
|
|
531
|
+
def test_flatfile_metadata_read():
|
|
532
|
+
from informatica_python.models import (
|
|
533
|
+
MappingDef, FolderDef, SourceDef, TargetDef, FlatFileDef,
|
|
534
|
+
FieldDef, TransformationDef, ConnectorDef, InstanceDef, TableAttribute,
|
|
535
|
+
)
|
|
536
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
537
|
+
|
|
538
|
+
ff = FlatFileDef(
|
|
539
|
+
name="test_file",
|
|
540
|
+
delimiter="|",
|
|
541
|
+
header_lines=1,
|
|
542
|
+
text_qualifier='"',
|
|
543
|
+
skip_rows=2,
|
|
544
|
+
code_page="UTF-8",
|
|
545
|
+
)
|
|
546
|
+
src = SourceDef(
|
|
547
|
+
name="PIPE_SOURCE",
|
|
548
|
+
database_type="Flat File",
|
|
549
|
+
flatfile=ff,
|
|
550
|
+
fields=[FieldDef(name="COL_A", datatype="string"), FieldDef(name="COL_B", datatype="integer")],
|
|
551
|
+
)
|
|
552
|
+
tgt_ff = FlatFileDef(name="tgt_file", delimiter="~")
|
|
553
|
+
tgt = TargetDef(
|
|
554
|
+
name="TILDE_TARGET",
|
|
555
|
+
database_type="Flat File",
|
|
556
|
+
flatfile=tgt_ff,
|
|
557
|
+
fields=[FieldDef(name="COL_A", datatype="string")],
|
|
558
|
+
)
|
|
559
|
+
mapping = MappingDef(
|
|
560
|
+
name="m_flatfile_test",
|
|
561
|
+
transformations=[],
|
|
562
|
+
connectors=[ConnectorDef(from_instance="PIPE_SOURCE", from_field="COL_A",
|
|
563
|
+
from_instance_type="Source Definition",
|
|
564
|
+
to_instance="TILDE_TARGET", to_field="COL_A",
|
|
565
|
+
to_instance_type="Target Definition")],
|
|
566
|
+
instances=[
|
|
567
|
+
InstanceDef(name="PIPE_SOURCE", type="Source Definition", transformation_name="PIPE_SOURCE"),
|
|
568
|
+
InstanceDef(name="TILDE_TARGET", type="Target Definition", transformation_name="TILDE_TARGET"),
|
|
569
|
+
],
|
|
570
|
+
)
|
|
571
|
+
folder = FolderDef(name="test", sources=[src], targets=[tgt], mappings=[mapping])
|
|
572
|
+
code = generate_mapping_code(mapping, folder)
|
|
573
|
+
|
|
574
|
+
assert "ff_cfg_" in code, "Should emit flatfile config dict"
|
|
575
|
+
assert "'delimiter': '|'" in code, "Pipe delimiter should appear"
|
|
576
|
+
assert "'skip_rows': 2" in code, "Skip rows should appear"
|
|
577
|
+
assert "'~'" in code, "Tilde delimiter should appear for target"
|
|
578
|
+
print("PASS: test_flatfile_metadata_read")
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def test_flatfile_fixed_width():
|
|
582
|
+
from informatica_python.models import (
|
|
583
|
+
MappingDef, FolderDef, SourceDef, FlatFileDef,
|
|
584
|
+
FieldDef, InstanceDef, TargetDef, ConnectorDef,
|
|
585
|
+
)
|
|
586
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
587
|
+
|
|
588
|
+
ff = FlatFileDef(name="fw_file", is_fixed_width="YES", header_lines=0)
|
|
589
|
+
src = SourceDef(
|
|
590
|
+
name="FW_SOURCE",
|
|
591
|
+
database_type="Flat File",
|
|
592
|
+
flatfile=ff,
|
|
593
|
+
fields=[FieldDef(name="F1", datatype="string", precision=10),
|
|
594
|
+
FieldDef(name="F2", datatype="string", precision=20)],
|
|
595
|
+
)
|
|
596
|
+
mapping = MappingDef(
|
|
597
|
+
name="m_fw_test",
|
|
598
|
+
transformations=[],
|
|
599
|
+
connectors=[],
|
|
600
|
+
instances=[InstanceDef(name="FW_SOURCE", type="Source Definition", transformation_name="FW_SOURCE")],
|
|
601
|
+
)
|
|
602
|
+
folder = FolderDef(name="test", sources=[src], targets=[], mappings=[mapping])
|
|
603
|
+
code = generate_mapping_code(mapping, folder)
|
|
604
|
+
|
|
605
|
+
assert "read_fwf" in code, "Fixed-width should use pd.read_fwf"
|
|
606
|
+
assert "[10, 20]" in code, "Widths should be derived from field precision"
|
|
607
|
+
print("PASS: test_flatfile_fixed_width")
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def test_normalizer_transform():
|
|
611
|
+
from informatica_python.models import (
|
|
612
|
+
MappingDef, FolderDef, SourceDef, FieldDef,
|
|
613
|
+
TransformationDef, ConnectorDef, InstanceDef, TableAttribute, TargetDef,
|
|
614
|
+
)
|
|
615
|
+
from informatica_python.generators.mapping_gen import _gen_normalizer_transform
|
|
616
|
+
|
|
617
|
+
tx = TransformationDef(
|
|
618
|
+
name="NRM_PHONES",
|
|
619
|
+
type="Normalizer",
|
|
620
|
+
fields=[
|
|
621
|
+
FieldDef(name="CUST_ID", datatype="integer", porttype="INPUT/OUTPUT"),
|
|
622
|
+
FieldDef(name="PHONE1", datatype="string", porttype="INPUT"),
|
|
623
|
+
FieldDef(name="PHONE2", datatype="string", porttype="INPUT"),
|
|
624
|
+
FieldDef(name="PHONE3", datatype="string", porttype="INPUT"),
|
|
625
|
+
FieldDef(name="GK", datatype="integer", porttype="OUTPUT"),
|
|
626
|
+
],
|
|
627
|
+
)
|
|
628
|
+
lines = []
|
|
629
|
+
source_dfs = {}
|
|
630
|
+
_gen_normalizer_transform(lines, tx, "nrm_phones", "df_input", source_dfs)
|
|
631
|
+
code = "\n".join(lines)
|
|
632
|
+
|
|
633
|
+
assert "melt(" in code, "Normalizer should use pd.melt()"
|
|
634
|
+
assert "PHONE1" in code, "Should reference PHONE columns"
|
|
635
|
+
assert "CUST_ID" in code, "Should reference ID column"
|
|
636
|
+
assert "GK" in code, "Should generate GK sequence"
|
|
637
|
+
assert source_dfs["NRM_PHONES"] == "df_nrm_phones"
|
|
638
|
+
print("PASS: test_normalizer_transform")
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def test_rank_with_groupby():
|
|
642
|
+
from informatica_python.models import (
|
|
643
|
+
FieldDef, TransformationDef, TableAttribute,
|
|
644
|
+
)
|
|
645
|
+
from informatica_python.generators.mapping_gen import _gen_rank_transform
|
|
646
|
+
|
|
647
|
+
tx = TransformationDef(
|
|
648
|
+
name="RNK_SALES",
|
|
649
|
+
type="Rank",
|
|
650
|
+
fields=[
|
|
651
|
+
FieldDef(name="REGION", datatype="string", porttype="INPUT/OUTPUT"),
|
|
652
|
+
FieldDef(name="AMOUNT", datatype="decimal", porttype="INPUT", expression="AMOUNT"),
|
|
653
|
+
FieldDef(name="RANKINDEX", datatype="integer", porttype="OUTPUT"),
|
|
654
|
+
],
|
|
655
|
+
attributes=[
|
|
656
|
+
TableAttribute(name="Top/Bottom", value="TOP"),
|
|
657
|
+
TableAttribute(name="Number Of Ranks", value="5"),
|
|
658
|
+
],
|
|
659
|
+
)
|
|
660
|
+
lines = []
|
|
661
|
+
source_dfs = {}
|
|
662
|
+
_gen_rank_transform(lines, tx, "rnk_sales", "df_input", source_dfs)
|
|
663
|
+
code = "\n".join(lines)
|
|
664
|
+
|
|
665
|
+
assert "groupby" in code, "Should use groupby for group-by rank"
|
|
666
|
+
assert "REGION" in code, "Should group by REGION"
|
|
667
|
+
assert "AMOUNT" in code, "Should rank by AMOUNT"
|
|
668
|
+
assert "RANKINDEX" in code, "Should produce RANKINDEX column"
|
|
669
|
+
assert "<= 5" in code, "Should filter top 5"
|
|
670
|
+
assert source_dfs["RNK_SALES"] == "df_rnk_sales"
|
|
671
|
+
print("PASS: test_rank_with_groupby")
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def test_decision_task_if_else():
|
|
675
|
+
from informatica_python.models import (
|
|
676
|
+
FolderDef, WorkflowDef, TaskInstanceDef, WorkflowLink,
|
|
677
|
+
TableAttribute, MappingDef,
|
|
678
|
+
)
|
|
679
|
+
from informatica_python.generators.workflow_gen import generate_workflow_code
|
|
680
|
+
|
|
681
|
+
wf = WorkflowDef(
|
|
682
|
+
name="wf_test_decision",
|
|
683
|
+
task_instances=[
|
|
684
|
+
TaskInstanceDef(name="Start", task_name="Start", task_type="Start Task"),
|
|
685
|
+
TaskInstanceDef(
|
|
686
|
+
name="dec_check_status",
|
|
687
|
+
task_name="dec_check_status",
|
|
688
|
+
task_type="Decision",
|
|
689
|
+
attributes=[TableAttribute(name="Decision Condition", value="$$LOAD_FLAG = TRUE")],
|
|
690
|
+
),
|
|
691
|
+
TaskInstanceDef(name="s_load_data", task_name="s_load_data", task_type="Session"),
|
|
692
|
+
TaskInstanceDef(name="s_skip_load", task_name="s_skip_load", task_type="Session"),
|
|
693
|
+
],
|
|
694
|
+
links=[
|
|
695
|
+
WorkflowLink(from_instance="Start", to_instance="dec_check_status"),
|
|
696
|
+
WorkflowLink(from_instance="dec_check_status", to_instance="s_load_data", condition="$dec_check_status.SUCCEEDED"),
|
|
697
|
+
WorkflowLink(from_instance="dec_check_status", to_instance="s_skip_load", condition="$dec_check_status.FAILED"),
|
|
698
|
+
],
|
|
699
|
+
)
|
|
700
|
+
folder = FolderDef(name="test", workflows=[wf], mappings=[])
|
|
701
|
+
code = generate_workflow_code(folder)
|
|
702
|
+
|
|
703
|
+
assert "decision_dec_check_status" in code, "Should create decision variable"
|
|
704
|
+
assert "if decision_dec_check_status" in code, "Should generate if branch"
|
|
705
|
+
assert "LOAD_FLAG" in code, "Should convert $$LOAD_FLAG"
|
|
706
|
+
assert "True" in code, "Should convert TRUE to Python True"
|
|
707
|
+
print("PASS: test_decision_task_if_else")
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def test_inline_mapplet():
|
|
711
|
+
from informatica_python.models import (
|
|
712
|
+
MappingDef, FolderDef, SourceDef, TargetDef, MappletDef,
|
|
713
|
+
TransformationDef, ConnectorDef, InstanceDef, FieldDef,
|
|
714
|
+
TableAttribute,
|
|
715
|
+
)
|
|
716
|
+
from informatica_python.generators.mapping_gen import generate_mapping_code
|
|
717
|
+
|
|
718
|
+
mplt = MappletDef(
|
|
719
|
+
name="mplt_clean_name",
|
|
720
|
+
transformations=[
|
|
721
|
+
TransformationDef(
|
|
722
|
+
name="EXP_UPPER",
|
|
723
|
+
type="Expression",
|
|
724
|
+
fields=[
|
|
725
|
+
FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT",
|
|
726
|
+
expression="UPPER(FULL_NAME)"),
|
|
727
|
+
],
|
|
728
|
+
),
|
|
729
|
+
],
|
|
730
|
+
connectors=[],
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
mapping = MappingDef(
|
|
734
|
+
name="m_with_mapplet",
|
|
735
|
+
transformations=[
|
|
736
|
+
TransformationDef(name="SQ_INPUT", type="Source Qualifier",
|
|
737
|
+
fields=[FieldDef(name="FULL_NAME", datatype="string", porttype="INPUT/OUTPUT")]),
|
|
738
|
+
],
|
|
739
|
+
connectors=[
|
|
740
|
+
ConnectorDef(from_instance="SRC", from_field="FULL_NAME",
|
|
741
|
+
from_instance_type="Source Definition",
|
|
742
|
+
to_instance="SQ_INPUT", to_field="FULL_NAME",
|
|
743
|
+
to_instance_type="Source Qualifier"),
|
|
744
|
+
ConnectorDef(from_instance="SQ_INPUT", from_field="FULL_NAME",
|
|
745
|
+
from_instance_type="Source Qualifier",
|
|
746
|
+
to_instance="MPLT_INST", to_field="FULL_NAME",
|
|
747
|
+
to_instance_type="Mapplet"),
|
|
748
|
+
ConnectorDef(from_instance="MPLT_INST", from_field="FULL_NAME",
|
|
749
|
+
from_instance_type="Mapplet",
|
|
750
|
+
to_instance="TGT", to_field="FULL_NAME",
|
|
751
|
+
to_instance_type="Target Definition"),
|
|
752
|
+
],
|
|
753
|
+
instances=[
|
|
754
|
+
InstanceDef(name="SRC", type="Source Definition", transformation_name="SRC"),
|
|
755
|
+
InstanceDef(name="SQ_INPUT", type="Source Qualifier"),
|
|
756
|
+
InstanceDef(name="MPLT_INST", type="Mapplet", transformation_name="mplt_clean_name",
|
|
757
|
+
transformation_type="Mapplet"),
|
|
758
|
+
InstanceDef(name="TGT", type="Target Definition", transformation_name="TGT"),
|
|
759
|
+
],
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
src = SourceDef(name="SRC", fields=[FieldDef(name="FULL_NAME", datatype="string")])
|
|
763
|
+
tgt = TargetDef(name="TGT", fields=[FieldDef(name="FULL_NAME", datatype="string")])
|
|
764
|
+
folder = FolderDef(
|
|
765
|
+
name="test",
|
|
766
|
+
sources=[src],
|
|
767
|
+
targets=[tgt],
|
|
768
|
+
mappings=[mapping],
|
|
769
|
+
mapplets=[mplt],
|
|
770
|
+
)
|
|
771
|
+
code = generate_mapping_code(mapping, folder)
|
|
772
|
+
|
|
773
|
+
assert "MPLT_INST__EXP_UPPER" in code or "mplt_inst__exp_upper" in code, \
|
|
774
|
+
"Inlined mapplet transform should appear with prefix"
|
|
775
|
+
assert "UPPER" in code, "UPPER expression from mapplet should be present"
|
|
776
|
+
print("PASS: test_inline_mapplet")
|
|
777
|
+
|
|
778
|
+
|
|
531
779
|
if __name__ == "__main__":
|
|
532
780
|
print("=" * 60)
|
|
533
781
|
print("Running informatica-python tests")
|
|
@@ -551,6 +799,12 @@ if __name__ == "__main__":
|
|
|
551
799
|
test_generated_aggregator_code,
|
|
552
800
|
test_generated_joiner_code,
|
|
553
801
|
test_generated_lookup_code,
|
|
802
|
+
test_flatfile_metadata_read,
|
|
803
|
+
test_flatfile_fixed_width,
|
|
804
|
+
test_normalizer_transform,
|
|
805
|
+
test_rank_with_groupby,
|
|
806
|
+
test_decision_task_if_else,
|
|
807
|
+
test_inline_mapplet,
|
|
554
808
|
]
|
|
555
809
|
|
|
556
810
|
passed = 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/__init__.py
RENAMED
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/config_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/error_log_gen.py
RENAMED
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/generators/sql_gen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python/utils/datatype_map.py
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/requires.txt
RENAMED
|
File without changes
|
{informatica_python-1.2.1 → informatica_python-1.3.1}/informatica_python.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|