informatica-python 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +4 -0
- informatica_python/cli.py +83 -0
- informatica_python/converter.py +285 -0
- informatica_python/generators/__init__.py +0 -0
- informatica_python/generators/config_gen.py +159 -0
- informatica_python/generators/error_log_gen.py +140 -0
- informatica_python/generators/helper_gen.py +693 -0
- informatica_python/generators/mapping_gen.py +649 -0
- informatica_python/generators/sql_gen.py +132 -0
- informatica_python/generators/workflow_gen.py +234 -0
- informatica_python/models.py +281 -0
- informatica_python/parser.py +468 -0
- informatica_python/utils/__init__.py +0 -0
- informatica_python/utils/datatype_map.py +105 -0
- informatica_python/utils/expression_converter.py +128 -0
- informatica_python-1.0.0.dist-info/METADATA +118 -0
- informatica_python-1.0.0.dist-info/RECORD +20 -0
- informatica_python-1.0.0.dist-info/WHEEL +5 -0
- informatica_python-1.0.0.dist-info/entry_points.txt +2 -0
- informatica_python-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
from typing import List, Dict
|
|
2
|
+
from informatica_python.models import (
|
|
3
|
+
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
4
|
+
TransformationDef, ConnectorDef, InstanceDef,
|
|
5
|
+
)
|
|
6
|
+
from informatica_python.utils.expression_converter import convert_expression, convert_sql_expression
|
|
7
|
+
from informatica_python.utils.datatype_map import get_python_type
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
11
|
+
data_lib: str = "pandas", mapping_index: int = 1) -> str:
|
|
12
|
+
lines = []
|
|
13
|
+
lines.append('"""')
|
|
14
|
+
lines.append(f"Mapping: {mapping.name}")
|
|
15
|
+
lines.append(f"Description: {mapping.description or 'N/A'}")
|
|
16
|
+
lines.append(f"Auto-generated by informatica-python")
|
|
17
|
+
lines.append('"""')
|
|
18
|
+
lines.append("")
|
|
19
|
+
lines.append("from helper_functions import *")
|
|
20
|
+
lines.append("")
|
|
21
|
+
lines.append("")
|
|
22
|
+
|
|
23
|
+
source_map = _build_source_map(mapping, folder)
|
|
24
|
+
target_map = _build_target_map(mapping, folder)
|
|
25
|
+
transform_map = {t.name: t for t in mapping.transformations}
|
|
26
|
+
connector_graph = _build_connector_graph(mapping.connectors)
|
|
27
|
+
instance_map = {i.name: i for i in mapping.instances}
|
|
28
|
+
|
|
29
|
+
lines.append(f"def run_{_safe_name(mapping.name)}(config):")
|
|
30
|
+
lines.append(f' """Execute mapping: {mapping.name}"""')
|
|
31
|
+
lines.append(f" start_time = log_mapping_start('{mapping.name}')")
|
|
32
|
+
lines.append("")
|
|
33
|
+
|
|
34
|
+
if mapping.variables:
|
|
35
|
+
lines.append(" # Mapping Variables")
|
|
36
|
+
for var in mapping.variables:
|
|
37
|
+
safe_var = _safe_name(var.name.replace("$$", ""))
|
|
38
|
+
default = var.default_value or "''"
|
|
39
|
+
if var.datatype.lower() in ("integer", "bigint", "int"):
|
|
40
|
+
default = var.default_value or "0"
|
|
41
|
+
lines.append(f" {safe_var} = {default}")
|
|
42
|
+
lines.append("")
|
|
43
|
+
|
|
44
|
+
source_dfs = {}
|
|
45
|
+
for src_name, src_def in source_map.items():
|
|
46
|
+
safe = _safe_name(src_name)
|
|
47
|
+
source_dfs[src_name] = f"df_{safe}"
|
|
48
|
+
|
|
49
|
+
sq_transforms = [t for t in mapping.transformations
|
|
50
|
+
if t.type in ("Source Qualifier", "Application Source Qualifier")]
|
|
51
|
+
if sq_transforms:
|
|
52
|
+
for sq in sq_transforms:
|
|
53
|
+
_generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map)
|
|
54
|
+
else:
|
|
55
|
+
for src_name, src_def in source_map.items():
|
|
56
|
+
safe = _safe_name(src_name)
|
|
57
|
+
lines.append(f" # Read source: {src_name}")
|
|
58
|
+
if src_def.database_type and src_def.database_type != "Flat File":
|
|
59
|
+
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
60
|
+
schema = src_def.owner_name or "dbo"
|
|
61
|
+
lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
|
|
62
|
+
else:
|
|
63
|
+
lines.append(f" df_{safe} = read_file(config.get('sources', {{}}).get('{src_name}', {{}}).get('file_path', '{src_name}'),")
|
|
64
|
+
lines.append(f" config.get('sources', {{}}).get('{src_name}', {{}}))")
|
|
65
|
+
lines.append("")
|
|
66
|
+
|
|
67
|
+
processing_order = _get_processing_order(mapping.transformations, connector_graph, sq_transforms)
|
|
68
|
+
|
|
69
|
+
for tx in processing_order:
|
|
70
|
+
if tx.type in ("Source Qualifier", "Application Source Qualifier"):
|
|
71
|
+
continue
|
|
72
|
+
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
|
|
73
|
+
|
|
74
|
+
for tgt_name, tgt_def in target_map.items():
|
|
75
|
+
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map)
|
|
76
|
+
|
|
77
|
+
lines.append("")
|
|
78
|
+
lines.append(f" log_mapping_end('{mapping.name}', start_time)")
|
|
79
|
+
lines.append(f" logger.info('Mapping {mapping.name} completed successfully')")
|
|
80
|
+
lines.append("")
|
|
81
|
+
lines.append("")
|
|
82
|
+
lines.append("if __name__ == '__main__':")
|
|
83
|
+
lines.append(" config = load_config()")
|
|
84
|
+
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
85
|
+
lines.append("")
|
|
86
|
+
|
|
87
|
+
return "\n".join(lines)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _safe_name(name):
|
|
91
|
+
import re
|
|
92
|
+
safe = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
93
|
+
if safe and safe[0].isdigit():
|
|
94
|
+
safe = '_' + safe
|
|
95
|
+
return safe.lower()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _build_source_map(mapping, folder):
|
|
99
|
+
source_map = {}
|
|
100
|
+
for inst in mapping.instances:
|
|
101
|
+
if inst.type == "Source Definition":
|
|
102
|
+
tx_name = inst.transformation_name or inst.name
|
|
103
|
+
for src in folder.sources:
|
|
104
|
+
if src.name == tx_name:
|
|
105
|
+
source_map[inst.name] = src
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
source_map[inst.name] = SourceDef(name=tx_name)
|
|
109
|
+
return source_map
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _build_target_map(mapping, folder):
|
|
113
|
+
target_map = {}
|
|
114
|
+
for inst in mapping.instances:
|
|
115
|
+
if inst.type == "Target Definition":
|
|
116
|
+
tx_name = inst.transformation_name or inst.name
|
|
117
|
+
for tgt in folder.targets:
|
|
118
|
+
if tgt.name == tx_name:
|
|
119
|
+
target_map[inst.name] = tgt
|
|
120
|
+
break
|
|
121
|
+
else:
|
|
122
|
+
target_map[inst.name] = TargetDef(name=tx_name)
|
|
123
|
+
return target_map
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _build_connector_graph(connectors: List[ConnectorDef]) -> Dict:
|
|
127
|
+
graph = {
|
|
128
|
+
"from": {},
|
|
129
|
+
"to": {},
|
|
130
|
+
}
|
|
131
|
+
for conn in connectors:
|
|
132
|
+
key_from = conn.from_instance
|
|
133
|
+
key_to = conn.to_instance
|
|
134
|
+
if key_from not in graph["from"]:
|
|
135
|
+
graph["from"][key_from] = []
|
|
136
|
+
graph["from"][key_from].append(conn)
|
|
137
|
+
if key_to not in graph["to"]:
|
|
138
|
+
graph["to"][key_to] = []
|
|
139
|
+
graph["to"][key_to].append(conn)
|
|
140
|
+
return graph
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _get_processing_order(transformations, connector_graph, sq_transforms):
|
|
144
|
+
sq_names = {t.name for t in sq_transforms}
|
|
145
|
+
remaining = [t for t in transformations if t.name not in sq_names]
|
|
146
|
+
|
|
147
|
+
ordered = []
|
|
148
|
+
processed = set(sq_names)
|
|
149
|
+
max_iterations = len(remaining) * 2 + 1
|
|
150
|
+
|
|
151
|
+
for _ in range(max_iterations):
|
|
152
|
+
if not remaining:
|
|
153
|
+
break
|
|
154
|
+
for tx in list(remaining):
|
|
155
|
+
inputs = connector_graph.get("to", {}).get(tx.name, [])
|
|
156
|
+
input_instances = {c.from_instance for c in inputs}
|
|
157
|
+
if not input_instances or input_instances.issubset(processed):
|
|
158
|
+
ordered.append(tx)
|
|
159
|
+
processed.add(tx.name)
|
|
160
|
+
remaining.remove(tx)
|
|
161
|
+
|
|
162
|
+
ordered.extend(remaining)
|
|
163
|
+
return ordered
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map):
|
|
167
|
+
sq_safe = _safe_name(sq.name)
|
|
168
|
+
sql_override = ""
|
|
169
|
+
pre_sql = ""
|
|
170
|
+
post_sql = ""
|
|
171
|
+
for attr in sq.attributes:
|
|
172
|
+
if attr.name == "Sql Query" and attr.value:
|
|
173
|
+
sql_override = convert_sql_expression(attr.value)
|
|
174
|
+
elif attr.name == "Pre SQL" and attr.value:
|
|
175
|
+
pre_sql = convert_sql_expression(attr.value)
|
|
176
|
+
elif attr.name == "Post SQL" and attr.value:
|
|
177
|
+
post_sql = convert_sql_expression(attr.value)
|
|
178
|
+
|
|
179
|
+
connected_sources = set()
|
|
180
|
+
to_conns = connector_graph.get("to", {}).get(sq.name, [])
|
|
181
|
+
for c in to_conns:
|
|
182
|
+
if c.from_instance in source_map:
|
|
183
|
+
connected_sources.add(c.from_instance)
|
|
184
|
+
if not connected_sources:
|
|
185
|
+
for src_name in source_map:
|
|
186
|
+
if sq.name.upper().startswith("SQ_") and src_name.upper() in sq.name.upper():
|
|
187
|
+
connected_sources.add(src_name)
|
|
188
|
+
if not connected_sources and source_map:
|
|
189
|
+
connected_sources.add(next(iter(source_map)))
|
|
190
|
+
|
|
191
|
+
lines.append(f" # Source Qualifier: {sq.name}")
|
|
192
|
+
|
|
193
|
+
if pre_sql:
|
|
194
|
+
lines.append(f" # Pre-SQL")
|
|
195
|
+
for sql_line in pre_sql.strip().split("\n"):
|
|
196
|
+
lines.append(f" # {sql_line}")
|
|
197
|
+
lines.append(f" execute_sql(config, '''{pre_sql}''')")
|
|
198
|
+
lines.append("")
|
|
199
|
+
|
|
200
|
+
if sql_override:
|
|
201
|
+
src_name = next(iter(connected_sources)) if connected_sources else "source"
|
|
202
|
+
src_def = source_map.get(src_name, SourceDef(name=src_name))
|
|
203
|
+
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
204
|
+
|
|
205
|
+
lines.append(f" sql_{sq_safe} = '''")
|
|
206
|
+
for sql_line in sql_override.strip().split("\n"):
|
|
207
|
+
lines.append(f" {sql_line}")
|
|
208
|
+
lines.append(f" '''")
|
|
209
|
+
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
|
|
210
|
+
else:
|
|
211
|
+
if len(connected_sources) == 1:
|
|
212
|
+
src_name = next(iter(connected_sources))
|
|
213
|
+
src_def = source_map.get(src_name, SourceDef(name=src_name))
|
|
214
|
+
safe_src = _safe_name(src_name)
|
|
215
|
+
if src_def.database_type and src_def.database_type != "Flat File":
|
|
216
|
+
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
217
|
+
schema = src_def.owner_name or "dbo"
|
|
218
|
+
cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
|
|
219
|
+
lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
220
|
+
else:
|
|
221
|
+
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
222
|
+
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
223
|
+
else:
|
|
224
|
+
for src_name in connected_sources:
|
|
225
|
+
src_def = source_map.get(src_name, SourceDef(name=src_name))
|
|
226
|
+
safe_src = _safe_name(src_name)
|
|
227
|
+
if src_def.database_type and src_def.database_type != "Flat File":
|
|
228
|
+
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
229
|
+
schema = src_def.owner_name or "dbo"
|
|
230
|
+
lines.append(f" df_{safe_src} = read_from_db(config, 'SELECT * FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
231
|
+
else:
|
|
232
|
+
lines.append(f" df_{safe_src} = read_file(config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path', '{src_def.name}'),")
|
|
233
|
+
lines.append(f" config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
234
|
+
lines.append(f" df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
|
|
235
|
+
|
|
236
|
+
source_dfs[sq.name] = f"df_{sq_safe}"
|
|
237
|
+
|
|
238
|
+
if post_sql:
|
|
239
|
+
lines.append(f" # Post-SQL")
|
|
240
|
+
lines.append(f" execute_sql(config, '''{post_sql}''')")
|
|
241
|
+
|
|
242
|
+
lines.append("")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map):
|
|
246
|
+
tx_safe = _safe_name(tx.name)
|
|
247
|
+
tx_type = tx.type.lower().strip()
|
|
248
|
+
|
|
249
|
+
input_conns = connector_graph.get("to", {}).get(tx.name, [])
|
|
250
|
+
input_sources = set()
|
|
251
|
+
for c in input_conns:
|
|
252
|
+
input_sources.add(c.from_instance)
|
|
253
|
+
|
|
254
|
+
input_df = None
|
|
255
|
+
for src in input_sources:
|
|
256
|
+
if src in source_dfs:
|
|
257
|
+
input_df = source_dfs[src]
|
|
258
|
+
break
|
|
259
|
+
if not input_df:
|
|
260
|
+
for src in input_sources:
|
|
261
|
+
input_df = f"df_{_safe_name(src)}"
|
|
262
|
+
break
|
|
263
|
+
if not input_df:
|
|
264
|
+
input_df = "df_input"
|
|
265
|
+
|
|
266
|
+
lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
|
|
267
|
+
|
|
268
|
+
if tx_type == "expression":
|
|
269
|
+
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
270
|
+
elif tx_type == "filter":
|
|
271
|
+
_gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
272
|
+
elif tx_type in ("aggregator",):
|
|
273
|
+
_gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
274
|
+
elif tx_type == "sorter":
|
|
275
|
+
_gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
276
|
+
elif tx_type in ("joiner",):
|
|
277
|
+
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
|
|
278
|
+
elif tx_type in ("lookup procedure", "lookup"):
|
|
279
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
280
|
+
elif tx_type == "router":
|
|
281
|
+
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
282
|
+
elif tx_type in ("union",):
|
|
283
|
+
_gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs)
|
|
284
|
+
elif tx_type in ("update strategy",):
|
|
285
|
+
_gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs)
|
|
286
|
+
elif tx_type == "sequence generator":
|
|
287
|
+
_gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs)
|
|
288
|
+
elif tx_type in ("normalizer",):
|
|
289
|
+
_gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
290
|
+
elif tx_type in ("rank",):
|
|
291
|
+
_gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
292
|
+
elif tx_type in ("custom transformation",):
|
|
293
|
+
_gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs)
|
|
294
|
+
elif tx_type in ("stored procedure",):
|
|
295
|
+
_gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs)
|
|
296
|
+
elif tx_type in ("java",):
|
|
297
|
+
_gen_java_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
298
|
+
elif tx_type in ("sql",):
|
|
299
|
+
_gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
300
|
+
else:
|
|
301
|
+
lines.append(f" # TODO: Unsupported transformation type '{tx.type}' - passing through")
|
|
302
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy() if hasattr({input_df}, 'copy') else {input_df}")
|
|
303
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
304
|
+
|
|
305
|
+
lines.append("")
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
309
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
310
|
+
has_expressions = False
|
|
311
|
+
for fld in tx.fields:
|
|
312
|
+
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
313
|
+
has_expressions = True
|
|
314
|
+
expr_py = convert_expression(fld.expression)
|
|
315
|
+
lines.append(f" # {fld.name} = {fld.expression}")
|
|
316
|
+
if fld.porttype and "OUTPUT" in fld.porttype.upper() and "INPUT" not in fld.porttype.upper():
|
|
317
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_py} # output-only port")
|
|
318
|
+
else:
|
|
319
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_py}")
|
|
320
|
+
if not has_expressions:
|
|
321
|
+
lines.append(f" # Pass-through expression (no transformations)")
|
|
322
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
326
|
+
filter_condition = ""
|
|
327
|
+
for attr in tx.attributes:
|
|
328
|
+
if attr.name == "Filter Condition":
|
|
329
|
+
filter_condition = attr.value
|
|
330
|
+
if filter_condition:
|
|
331
|
+
expr_py = convert_expression(filter_condition)
|
|
332
|
+
lines.append(f" # Filter: {filter_condition}")
|
|
333
|
+
lines.append(f" df_{tx_safe} = {input_df}[{expr_py}].copy()")
|
|
334
|
+
else:
|
|
335
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
336
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
340
|
+
group_by_ports = []
|
|
341
|
+
agg_ports = []
|
|
342
|
+
for fld in tx.fields:
|
|
343
|
+
if "INPUT" in (fld.porttype or "").upper() and "OUTPUT" in (fld.porttype or "").upper():
|
|
344
|
+
group_by_ports.append(fld.name)
|
|
345
|
+
elif "OUTPUT" in (fld.porttype or "").upper():
|
|
346
|
+
agg_ports.append(fld)
|
|
347
|
+
|
|
348
|
+
if group_by_ports:
|
|
349
|
+
lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}).agg(")
|
|
350
|
+
for ap in agg_ports:
|
|
351
|
+
expr = ap.expression or ap.name
|
|
352
|
+
lines.append(f" # {ap.name}: {expr}")
|
|
353
|
+
lines.append(f" ).reset_index()")
|
|
354
|
+
else:
|
|
355
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
356
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
360
|
+
sort_keys = []
|
|
361
|
+
sort_dirs = []
|
|
362
|
+
for fld in tx.fields:
|
|
363
|
+
sort_keys.append(fld.name)
|
|
364
|
+
sort_dirs.append(True)
|
|
365
|
+
if sort_keys:
|
|
366
|
+
lines.append(f" df_{tx_safe} = {input_df}.sort_values(by={sort_keys}, ascending={sort_dirs}).reset_index(drop=True)")
|
|
367
|
+
else:
|
|
368
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
369
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
|
|
373
|
+
join_type = "inner"
|
|
374
|
+
join_condition = ""
|
|
375
|
+
for attr in tx.attributes:
|
|
376
|
+
if attr.name == "Join Type":
|
|
377
|
+
jt = attr.value.upper()
|
|
378
|
+
if "FULL" in jt:
|
|
379
|
+
join_type = "outer"
|
|
380
|
+
elif "MASTER" in jt:
|
|
381
|
+
join_type = "left"
|
|
382
|
+
elif "DETAIL" in jt:
|
|
383
|
+
join_type = "right"
|
|
384
|
+
elif attr.name == "Join Condition":
|
|
385
|
+
join_condition = attr.value
|
|
386
|
+
|
|
387
|
+
src_list = list(input_sources)
|
|
388
|
+
if len(src_list) >= 2:
|
|
389
|
+
df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
|
|
390
|
+
df2 = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
|
|
391
|
+
lines.append(f" # Join: {join_condition or 'auto'}")
|
|
392
|
+
lines.append(f" df_{tx_safe} = {df1}.merge({df2}, how='{join_type}')")
|
|
393
|
+
else:
|
|
394
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
395
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
399
|
+
lookup_table = ""
|
|
400
|
+
lookup_sql = ""
|
|
401
|
+
lookup_condition = ""
|
|
402
|
+
lookup_cache = "YES"
|
|
403
|
+
for attr in tx.attributes:
|
|
404
|
+
if attr.name == "Lookup table name":
|
|
405
|
+
lookup_table = attr.value
|
|
406
|
+
elif attr.name == "Lookup Sql Override" and attr.value:
|
|
407
|
+
lookup_sql = convert_sql_expression(attr.value)
|
|
408
|
+
elif attr.name == "Lookup condition":
|
|
409
|
+
lookup_condition = attr.value
|
|
410
|
+
elif attr.name == "Lookup caching enabled":
|
|
411
|
+
lookup_cache = attr.value
|
|
412
|
+
|
|
413
|
+
return_fields = [f for f in tx.fields if "RETURN" in (f.porttype or "").upper() or
|
|
414
|
+
("LOOKUP" in (f.porttype or "").upper() and "OUTPUT" in (f.porttype or "").upper()
|
|
415
|
+
and "INPUT" not in (f.porttype or "").upper())]
|
|
416
|
+
input_fields = [f for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
|
|
417
|
+
|
|
418
|
+
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
419
|
+
if lookup_sql:
|
|
420
|
+
lines.append(f" lkp_sql_{tx_safe} = '''")
|
|
421
|
+
for sql_line in lookup_sql.strip().split("\n"):
|
|
422
|
+
lines.append(f" {sql_line}")
|
|
423
|
+
lines.append(f" '''")
|
|
424
|
+
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
|
|
425
|
+
elif lookup_table:
|
|
426
|
+
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
427
|
+
else:
|
|
428
|
+
lines.append(f" df_lkp_{tx_safe} = pd.DataFrame() # TODO: Configure lookup source")
|
|
429
|
+
|
|
430
|
+
if lookup_condition:
|
|
431
|
+
lines.append(f" # Condition: {lookup_condition}")
|
|
432
|
+
|
|
433
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
434
|
+
|
|
435
|
+
if return_fields:
|
|
436
|
+
ret_names = [f.name for f in return_fields]
|
|
437
|
+
lines.append(f" # Lookup returns: {ret_names}")
|
|
438
|
+
lines.append(f" # TODO: Implement lookup merge logic based on condition: {lookup_condition}")
|
|
439
|
+
for rf in return_fields:
|
|
440
|
+
lines.append(f" df_{tx_safe}['{rf.name}'] = None # From lookup")
|
|
441
|
+
|
|
442
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
446
|
+
lines.append(f" # Router groups:")
|
|
447
|
+
group_conditions = {}
|
|
448
|
+
for attr in tx.attributes:
|
|
449
|
+
if "Group Filter Condition" in attr.name:
|
|
450
|
+
group_conditions[attr.name] = attr.value
|
|
451
|
+
|
|
452
|
+
if group_conditions:
|
|
453
|
+
for i, (gname, cond) in enumerate(group_conditions.items()):
|
|
454
|
+
expr_py = convert_expression(cond) if cond else "True"
|
|
455
|
+
lines.append(f" df_{tx_safe}_group{i} = {input_df}[{expr_py}].copy() # {gname}")
|
|
456
|
+
source_dfs[f"{tx.name}_group{i}"] = f"df_{tx_safe}_group{i}"
|
|
457
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy() # Default group")
|
|
458
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _gen_union_transform(lines, tx, tx_safe, input_sources, source_dfs):
|
|
462
|
+
dfs_to_union = []
|
|
463
|
+
for src in input_sources:
|
|
464
|
+
df_name = source_dfs.get(src, f"df_{_safe_name(src)}")
|
|
465
|
+
dfs_to_union.append(df_name)
|
|
466
|
+
|
|
467
|
+
if len(dfs_to_union) > 1:
|
|
468
|
+
df_list = ", ".join(dfs_to_union)
|
|
469
|
+
lines.append(f" df_{tx_safe} = pd.concat([{df_list}], ignore_index=True)")
|
|
470
|
+
elif dfs_to_union:
|
|
471
|
+
lines.append(f" df_{tx_safe} = {dfs_to_union[0]}.copy()")
|
|
472
|
+
else:
|
|
473
|
+
lines.append(f" df_{tx_safe} = pd.DataFrame()")
|
|
474
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs):
|
|
478
|
+
strategy_expr = "0"
|
|
479
|
+
for attr in tx.attributes:
|
|
480
|
+
if attr.name == "Update Strategy Expression":
|
|
481
|
+
strategy_expr = attr.value
|
|
482
|
+
strategy_map = {"0": "INSERT", "1": "UPDATE", "2": "DELETE", "3": "REJECT"}
|
|
483
|
+
strategy_name = strategy_map.get(strategy_expr, f"EXPR({strategy_expr})")
|
|
484
|
+
lines.append(f" # Update Strategy: {strategy_name}")
|
|
485
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
486
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = '{strategy_name}'")
|
|
487
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _gen_sequence_generator(lines, tx, tx_safe, input_df, source_dfs):
|
|
491
|
+
start_val = "1"
|
|
492
|
+
increment = "1"
|
|
493
|
+
for attr in tx.attributes:
|
|
494
|
+
if attr.name == "Start Value":
|
|
495
|
+
start_val = attr.value or "1"
|
|
496
|
+
elif attr.name == "Increment By":
|
|
497
|
+
increment = attr.value or "1"
|
|
498
|
+
|
|
499
|
+
seq_field = None
|
|
500
|
+
for fld in tx.fields:
|
|
501
|
+
if fld.porttype and "OUTPUT" in fld.porttype.upper() and "NEXTVAL" in fld.name.upper():
|
|
502
|
+
seq_field = fld.name
|
|
503
|
+
break
|
|
504
|
+
if not seq_field:
|
|
505
|
+
for fld in tx.fields:
|
|
506
|
+
if fld.porttype and "OUTPUT" in fld.porttype.upper():
|
|
507
|
+
seq_field = fld.name
|
|
508
|
+
break
|
|
509
|
+
|
|
510
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
511
|
+
if seq_field:
|
|
512
|
+
lines.append(f" df_{tx_safe}['{seq_field}'] = range({start_val}, {start_val} + len(df_{tx_safe}) * {increment}, {increment})")
|
|
513
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _gen_normalizer_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
517
|
+
lines.append(f" # TODO: Normalizer transformation - implement based on specific normalization logic")
|
|
518
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
519
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
523
|
+
rank_port = None
|
|
524
|
+
group_by = []
|
|
525
|
+
for fld in tx.fields:
|
|
526
|
+
if "RANK" in fld.name.upper():
|
|
527
|
+
rank_port = fld.name
|
|
528
|
+
top_bottom = "TOP"
|
|
529
|
+
for attr in tx.attributes:
|
|
530
|
+
if attr.name == "Top/Bottom":
|
|
531
|
+
top_bottom = attr.value
|
|
532
|
+
|
|
533
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
534
|
+
if rank_port:
|
|
535
|
+
ascending = top_bottom.upper() != "TOP"
|
|
536
|
+
lines.append(f" df_{tx_safe}['RANK_INDEX'] = df_{tx_safe}['{rank_port}'].rank(ascending={ascending})")
|
|
537
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def _gen_custom_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs):
|
|
541
|
+
is_union = False
|
|
542
|
+
output_fields = []
|
|
543
|
+
input_groups = {}
|
|
544
|
+
|
|
545
|
+
for fld in tx.fields:
|
|
546
|
+
if "OUTPUT" in (fld.porttype or "").upper():
|
|
547
|
+
output_fields.append(fld)
|
|
548
|
+
group_suffix_match = None
|
|
549
|
+
import re
|
|
550
|
+
m = re.match(r'^(.+?)(\d+)$', fld.name)
|
|
551
|
+
if m and "INPUT" in (fld.porttype or "").upper():
|
|
552
|
+
base_name = m.group(1)
|
|
553
|
+
group_idx = m.group(2)
|
|
554
|
+
if group_idx not in input_groups:
|
|
555
|
+
input_groups[group_idx] = []
|
|
556
|
+
input_groups[group_idx].append(fld)
|
|
557
|
+
|
|
558
|
+
if len(input_groups) > 1:
|
|
559
|
+
is_union = True
|
|
560
|
+
|
|
561
|
+
if is_union:
|
|
562
|
+
dfs_to_union = []
|
|
563
|
+
for src in input_sources:
|
|
564
|
+
df_name = source_dfs.get(src, f"df_{_safe_name(src)}")
|
|
565
|
+
dfs_to_union.append(df_name)
|
|
566
|
+
if len(dfs_to_union) > 1:
|
|
567
|
+
df_list = ", ".join(dfs_to_union)
|
|
568
|
+
lines.append(f" df_{tx_safe} = pd.concat([{df_list}], ignore_index=True)")
|
|
569
|
+
elif dfs_to_union:
|
|
570
|
+
lines.append(f" df_{tx_safe} = {dfs_to_union[0]}.copy()")
|
|
571
|
+
else:
|
|
572
|
+
lines.append(f" df_{tx_safe} = pd.DataFrame()")
|
|
573
|
+
else:
|
|
574
|
+
lines.append(f" # Custom transformation: {tx.name}")
|
|
575
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
576
|
+
|
|
577
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _gen_stored_proc(lines, tx, tx_safe, input_df, source_dfs):
|
|
581
|
+
proc_name = ""
|
|
582
|
+
for attr in tx.attributes:
|
|
583
|
+
if attr.name in ("Stored Procedure Name", "sp name"):
|
|
584
|
+
proc_name = attr.value
|
|
585
|
+
|
|
586
|
+
lines.append(f" # Stored Procedure: {proc_name or tx.name}")
|
|
587
|
+
lines.append(f" # TODO: Execute stored procedure and capture results")
|
|
588
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
589
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _gen_java_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
593
|
+
lines.append(f" # Java Transformation: {tx.name}")
|
|
594
|
+
lines.append(f" # TODO: Port Java logic to Python")
|
|
595
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
596
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
600
|
+
sql_query = ""
|
|
601
|
+
for attr in tx.attributes:
|
|
602
|
+
if attr.name == "Sql Query" and attr.value:
|
|
603
|
+
sql_query = convert_sql_expression(attr.value)
|
|
604
|
+
lines.append(f" # SQL Transformation: {tx.name}")
|
|
605
|
+
if sql_query:
|
|
606
|
+
lines.append(f" sql_{tx_safe} = '''{sql_query}'''")
|
|
607
|
+
lines.append(f" df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
|
|
608
|
+
else:
|
|
609
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
610
|
+
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map):
|
|
614
|
+
tgt_safe = _safe_name(tgt_name)
|
|
615
|
+
|
|
616
|
+
to_conns = connector_graph.get("to", {}).get(tgt_name, [])
|
|
617
|
+
input_df = None
|
|
618
|
+
for c in to_conns:
|
|
619
|
+
if c.from_instance in source_dfs:
|
|
620
|
+
input_df = source_dfs[c.from_instance]
|
|
621
|
+
break
|
|
622
|
+
if not input_df:
|
|
623
|
+
for c in to_conns:
|
|
624
|
+
input_df = f"df_{_safe_name(c.from_instance)}"
|
|
625
|
+
break
|
|
626
|
+
|
|
627
|
+
if not input_df:
|
|
628
|
+
input_df = "df_output"
|
|
629
|
+
|
|
630
|
+
col_mapping = {}
|
|
631
|
+
for c in to_conns:
|
|
632
|
+
col_mapping[c.to_field] = c.from_field
|
|
633
|
+
|
|
634
|
+
lines.append(f" # Write to target: {tgt_def.name}")
|
|
635
|
+
if col_mapping:
|
|
636
|
+
lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
|
|
637
|
+
lines.append(f" df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
|
|
638
|
+
target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
|
|
639
|
+
if target_cols:
|
|
640
|
+
lines.append(f" available_cols = [c for c in {target_cols} if c in df_target_{tgt_safe}.columns]")
|
|
641
|
+
lines.append(f" df_target_{tgt_safe} = df_target_{tgt_safe}[available_cols]")
|
|
642
|
+
else:
|
|
643
|
+
lines.append(f" df_target_{tgt_safe} = {input_df}")
|
|
644
|
+
|
|
645
|
+
if tgt_def.database_type and tgt_def.database_type != "Flat File":
|
|
646
|
+
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
|
|
647
|
+
else:
|
|
648
|
+
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
649
|
+
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|