informatica-python 1.9.1__py3-none-any.whl → 1.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +1 -1
- informatica_python/generators/helper_gen.py +11 -0
- informatica_python/generators/mapping_gen.py +141 -57
- informatica_python/generators/workflow_gen.py +21 -4
- informatica_python/utils/expression_converter.py +865 -84
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/METADATA +181 -47
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/RECORD +11 -11
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/WHEEL +0 -0
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/entry_points.txt +0 -0
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/licenses/LICENSE +0 -0
- {informatica_python-1.9.1.dist-info → informatica_python-1.9.3.dist-info}/top_level.txt +0 -0
informatica_python/__init__.py
CHANGED
|
@@ -151,6 +151,17 @@ def _add_db_functions(lines, data_lib):
|
|
|
151
151
|
lines.append(" return pyodbc.connect(conn_str)")
|
|
152
152
|
lines.append(" except ImportError:")
|
|
153
153
|
lines.append(" pass")
|
|
154
|
+
lines.append(" try:")
|
|
155
|
+
lines.append(" import pymssql")
|
|
156
|
+
lines.append(" return pymssql.connect(server=host, port=int(port), database=database, user=username, password=password)")
|
|
157
|
+
lines.append(" except ImportError:")
|
|
158
|
+
lines.append(" pass")
|
|
159
|
+
lines.append(" try:")
|
|
160
|
+
lines.append(" from sqlalchemy import create_engine")
|
|
161
|
+
lines.append(" engine = create_engine(f'mssql+pymssql://{username}:{password}@{host}:{port}/{database}')")
|
|
162
|
+
lines.append(" return engine.connect()")
|
|
163
|
+
lines.append(" except ImportError:")
|
|
164
|
+
lines.append(" pass")
|
|
154
165
|
lines.append("")
|
|
155
166
|
lines.append(" if db_type == 'postgresql':")
|
|
156
167
|
lines.append(" try:")
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import List, Dict
|
|
2
3
|
from informatica_python.models import (
|
|
3
4
|
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
@@ -227,6 +228,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
227
228
|
lines.append("")
|
|
228
229
|
lines.append("import logging")
|
|
229
230
|
lines.append("import numpy as np")
|
|
231
|
+
lines.append("import pandas as pd")
|
|
230
232
|
lines.append("from helper_functions import *")
|
|
231
233
|
lines.append("")
|
|
232
234
|
lines.append("logger = logging.getLogger(__name__)")
|
|
@@ -373,7 +375,40 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
373
375
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
374
376
|
lines.append("")
|
|
375
377
|
|
|
376
|
-
|
|
378
|
+
code = "\n".join(lines)
|
|
379
|
+
func_sig = f"def run_{_safe_name(mapping.name)}(config):"
|
|
380
|
+
sig_idx = code.index(func_sig) + len(func_sig)
|
|
381
|
+
docstring_end = code.index('"""', code.index('"""', sig_idx) + 3) + 3
|
|
382
|
+
before_body = code[:docstring_end]
|
|
383
|
+
after_docstring = code[docstring_end:]
|
|
384
|
+
main_sentinel = "\n\nif __name__"
|
|
385
|
+
body_end_idx = after_docstring.index(main_sentinel)
|
|
386
|
+
body = after_docstring[:body_end_idx]
|
|
387
|
+
rest = after_docstring[body_end_idx:]
|
|
388
|
+
body_lines = body.split("\n")
|
|
389
|
+
while body_lines and body_lines[0].strip() == "":
|
|
390
|
+
body_lines.pop(0)
|
|
391
|
+
while body_lines and body_lines[-1].strip() == "":
|
|
392
|
+
body_lines.pop()
|
|
393
|
+
wrapped = []
|
|
394
|
+
wrapped.append("")
|
|
395
|
+
wrapped.append(" try:")
|
|
396
|
+
prev_blank = False
|
|
397
|
+
for bl in body_lines:
|
|
398
|
+
if bl.strip() == "":
|
|
399
|
+
if not prev_blank:
|
|
400
|
+
wrapped.append("")
|
|
401
|
+
prev_blank = True
|
|
402
|
+
else:
|
|
403
|
+
wrapped.append(" " + bl)
|
|
404
|
+
prev_blank = False
|
|
405
|
+
wrapped.append("")
|
|
406
|
+
wrapped.append(" except Exception as _exc:")
|
|
407
|
+
wrapped.append(f" logger.error(f'Mapping {mapping.name} failed: {{_exc}}')")
|
|
408
|
+
wrapped.append(" raise")
|
|
409
|
+
wrapped.append("")
|
|
410
|
+
|
|
411
|
+
return before_body + "\n".join(wrapped) + rest
|
|
377
412
|
|
|
378
413
|
|
|
379
414
|
def _safe_name(name):
|
|
@@ -384,6 +419,22 @@ def _safe_name(name):
|
|
|
384
419
|
return safe.lower()
|
|
385
420
|
|
|
386
421
|
|
|
422
|
+
def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
|
|
423
|
+
import re
|
|
424
|
+
params = re.findall(r'\$\$(\w+)', sql_text)
|
|
425
|
+
lines.append(f"{indent}{sql_var_name} = '''")
|
|
426
|
+
for sql_line in sql_text.strip().split("\n"):
|
|
427
|
+
lines.append(f"{indent}{sql_line}")
|
|
428
|
+
lines.append(f"{indent}'''")
|
|
429
|
+
if params:
|
|
430
|
+
seen = set()
|
|
431
|
+
for p in params:
|
|
432
|
+
if p in seen:
|
|
433
|
+
continue
|
|
434
|
+
seen.add(p)
|
|
435
|
+
lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
|
|
436
|
+
|
|
437
|
+
|
|
387
438
|
def _flatfile_config_dict(ff):
|
|
388
439
|
cfg = {}
|
|
389
440
|
if not ff:
|
|
@@ -502,7 +553,7 @@ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" ", file_path_over
|
|
|
502
553
|
def _build_source_map(mapping, folder):
|
|
503
554
|
source_map = {}
|
|
504
555
|
for inst in mapping.instances:
|
|
505
|
-
if inst.type
|
|
556
|
+
if inst.type.upper() in ("SOURCE DEFINITION", "SOURCE"):
|
|
506
557
|
tx_name = inst.transformation_name or inst.name
|
|
507
558
|
for src in folder.sources:
|
|
508
559
|
if src.name == tx_name:
|
|
@@ -516,7 +567,7 @@ def _build_source_map(mapping, folder):
|
|
|
516
567
|
def _build_target_map(mapping, folder):
|
|
517
568
|
target_map = {}
|
|
518
569
|
for inst in mapping.instances:
|
|
519
|
-
if inst.type
|
|
570
|
+
if inst.type.upper() in ("TARGET DEFINITION", "TARGET"):
|
|
520
571
|
tx_name = inst.transformation_name or inst.name
|
|
521
572
|
for tgt in folder.targets:
|
|
522
573
|
if tgt.name == tx_name:
|
|
@@ -592,7 +643,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
592
643
|
if not connected_sources and source_map:
|
|
593
644
|
connected_sources.add(next(iter(source_map)))
|
|
594
645
|
|
|
646
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
595
647
|
lines.append(f" # Source Qualifier: {sq.name}")
|
|
648
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
596
649
|
|
|
597
650
|
if pre_sql:
|
|
598
651
|
lines.append(f" # Pre-SQL")
|
|
@@ -604,10 +657,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
604
657
|
if not connected_sources:
|
|
605
658
|
sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
|
|
606
659
|
if sql_override:
|
|
607
|
-
lines
|
|
608
|
-
for sql_line in sql_override.strip().split("\n"):
|
|
609
|
-
lines.append(f" {sql_line}")
|
|
610
|
-
lines.append(f" '''")
|
|
660
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
611
661
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
|
|
612
662
|
else:
|
|
613
663
|
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
|
|
@@ -618,10 +668,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
618
668
|
sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
|
|
619
669
|
conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
620
670
|
|
|
621
|
-
lines
|
|
622
|
-
for sql_line in sql_override.strip().split("\n"):
|
|
623
|
-
lines.append(f" {sql_line}")
|
|
624
|
-
lines.append(f" '''")
|
|
671
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
625
672
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
|
|
626
673
|
elif len(connected_sources) == 1:
|
|
627
674
|
src_name = next(iter(connected_sources))
|
|
@@ -654,10 +701,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
654
701
|
lines.append(f" df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
|
|
655
702
|
|
|
656
703
|
source_dfs[sq.name] = f"df_{sq_safe}"
|
|
657
|
-
lines.append(f"
|
|
658
|
-
lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
|
|
659
|
-
lines.append(f" except Exception:")
|
|
660
|
-
lines.append(f" logger.info('Source {sq.name}: rows read (count unavailable)')")
|
|
704
|
+
lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
|
|
661
705
|
|
|
662
706
|
if post_sql:
|
|
663
707
|
lines.append(f" # Post-SQL")
|
|
@@ -697,10 +741,10 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
697
741
|
lines.append(f" # Input fields: {', '.join(in_fields[:10])}{' ...' if len(in_fields) > 10 else ''}")
|
|
698
742
|
lines.append(f" # Output fields: {', '.join(out_fields[:10])}{' ...' if len(out_fields) > 10 else ''}")
|
|
699
743
|
lines.append(f" # -------------------------------------------------------------------")
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
744
|
+
if input_df == "df_input":
|
|
745
|
+
lines.append(f" _input_rows_{tx_safe} = -1")
|
|
746
|
+
else:
|
|
747
|
+
lines.append(f" _input_rows_{tx_safe} = len({input_df})")
|
|
704
748
|
|
|
705
749
|
if tx_type == "expression":
|
|
706
750
|
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
@@ -740,28 +784,26 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
740
784
|
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
741
785
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
742
786
|
|
|
743
|
-
lines.append(f"
|
|
744
|
-
lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe})")
|
|
745
|
-
lines.append(f" except Exception:")
|
|
746
|
-
lines.append(f" _output_rows_{tx_safe} = -1")
|
|
787
|
+
lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe})")
|
|
747
788
|
lines.append(f" logger.info(f'{tx.name} ({tx.type}): {{_input_rows_{tx_safe}}} input rows -> {{_output_rows_{tx_safe}}} output rows')")
|
|
748
789
|
lines.append("")
|
|
749
790
|
|
|
750
791
|
|
|
751
792
|
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
752
|
-
|
|
753
|
-
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
754
|
-
has_expressions = False
|
|
793
|
+
active_fields = []
|
|
755
794
|
for fld in tx.fields:
|
|
756
|
-
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
757
|
-
|
|
795
|
+
if fld.expression and fld.expression.strip() and fld.expression.strip().lower() != fld.name.lower():
|
|
796
|
+
active_fields.append(fld)
|
|
797
|
+
|
|
798
|
+
if active_fields:
|
|
799
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
800
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
801
|
+
for fld in active_fields:
|
|
758
802
|
expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
|
|
759
803
|
lines.append(f" # {fld.name} = {fld.expression}")
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
764
|
-
if not has_expressions:
|
|
804
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
805
|
+
else:
|
|
806
|
+
lines.append(f" df_{tx_safe} = {input_df}")
|
|
765
807
|
lines.append(f" # Pass-through expression (no transformations)")
|
|
766
808
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
767
809
|
|
|
@@ -840,7 +882,11 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
840
882
|
sort_dirs = []
|
|
841
883
|
for fld in tx.fields:
|
|
842
884
|
sort_keys.append(fld.name)
|
|
843
|
-
|
|
885
|
+
direction = 'ASCENDING'
|
|
886
|
+
for fa in getattr(fld, 'field_attributes', []):
|
|
887
|
+
if isinstance(fa, dict) and fa.get('name', '').upper() == 'SORTDIRECTION':
|
|
888
|
+
direction = fa.get('value', 'ASCENDING') or 'ASCENDING'
|
|
889
|
+
sort_dirs.append(direction.upper() != 'DESCENDING')
|
|
844
890
|
if sort_keys:
|
|
845
891
|
sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
|
|
846
892
|
lines.append(f" df_{tx_safe} = {sort_expr}")
|
|
@@ -879,13 +925,23 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
879
925
|
master_src = None
|
|
880
926
|
detail_src = None
|
|
881
927
|
input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
|
|
928
|
+
|
|
929
|
+
port_to_col = {}
|
|
930
|
+
master_fields_lower = {f.lower() for f in master_fields}
|
|
931
|
+
detail_fields_lower = {f.lower() for f in detail_fields}
|
|
882
932
|
for conn in input_conns:
|
|
883
933
|
to_field = conn.to_field
|
|
884
|
-
|
|
934
|
+
port_to_col[to_field] = conn.from_field
|
|
935
|
+
port_to_col[to_field.lower()] = conn.from_field
|
|
936
|
+
if to_field in master_fields or to_field.lower() in master_fields_lower:
|
|
885
937
|
master_src = conn.from_instance
|
|
886
|
-
elif to_field in detail_fields:
|
|
938
|
+
elif to_field in detail_fields or to_field.lower() in detail_fields_lower:
|
|
887
939
|
detail_src = conn.from_instance
|
|
888
940
|
|
|
941
|
+
if left_keys and right_keys and port_to_col:
|
|
942
|
+
left_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in left_keys]
|
|
943
|
+
right_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in right_keys]
|
|
944
|
+
|
|
889
945
|
src_list = list(input_sources)
|
|
890
946
|
if not master_src and not detail_src and len(src_list) >= 2:
|
|
891
947
|
master_src = src_list[0]
|
|
@@ -958,10 +1014,7 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
958
1014
|
|
|
959
1015
|
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
960
1016
|
if lookup_sql:
|
|
961
|
-
lines
|
|
962
|
-
for sql_line in lookup_sql.strip().split("\n"):
|
|
963
|
-
lines.append(f" {sql_line}")
|
|
964
|
-
lines.append(f" '''")
|
|
1017
|
+
_emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
|
|
965
1018
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
|
|
966
1019
|
elif lookup_table:
|
|
967
1020
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
@@ -996,7 +1049,11 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
996
1049
|
lines.append(f" if _lkp_drop:")
|
|
997
1050
|
lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
|
|
998
1051
|
|
|
1052
|
+
seen_output_cols = set()
|
|
999
1053
|
for rf in all_output_fields:
|
|
1054
|
+
if rf.name in seen_output_cols:
|
|
1055
|
+
continue
|
|
1056
|
+
seen_output_cols.add(rf.name)
|
|
1000
1057
|
lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
|
|
1001
1058
|
lines.append(f" df_{tx_safe}['{rf.name}'] = None")
|
|
1002
1059
|
if rf.default_value:
|
|
@@ -1071,14 +1128,19 @@ def _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1071
1128
|
for dd_const, label in dd_map.items():
|
|
1072
1129
|
expr = expr.replace(dd_const, f"'{label}'")
|
|
1073
1130
|
try:
|
|
1074
|
-
|
|
1131
|
+
expr_vec = convert_expression_vectorized(expr, f"df_{tx_safe}")
|
|
1075
1132
|
lines.append(f" # Original expression: {strategy_expr}")
|
|
1076
|
-
lines.append(f"
|
|
1077
|
-
lines.append(f" return {converted}")
|
|
1078
|
-
lines.append(f" df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
|
|
1133
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = {expr_vec}")
|
|
1079
1134
|
except Exception:
|
|
1080
|
-
|
|
1081
|
-
|
|
1135
|
+
try:
|
|
1136
|
+
converted = convert_expression(expr)
|
|
1137
|
+
lines.append(f" # Original expression: {strategy_expr}")
|
|
1138
|
+
lines.append(f" def _resolve_strategy(row):")
|
|
1139
|
+
lines.append(f" return {converted}")
|
|
1140
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
|
|
1141
|
+
except Exception:
|
|
1142
|
+
lines.append(f" # Could not parse strategy expression: {strategy_expr}")
|
|
1143
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = 'INSERT'")
|
|
1082
1144
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1083
1145
|
|
|
1084
1146
|
|
|
@@ -1341,7 +1403,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1341
1403
|
sql_query = convert_sql_expression(attr.value)
|
|
1342
1404
|
lines.append(f" # SQL Transformation: {tx.name}")
|
|
1343
1405
|
if sql_query:
|
|
1344
|
-
lines
|
|
1406
|
+
_emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
|
|
1345
1407
|
lines.append(f" df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
|
|
1346
1408
|
else:
|
|
1347
1409
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
@@ -1369,12 +1431,21 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1369
1431
|
for c in to_conns:
|
|
1370
1432
|
col_mapping[c.to_field] = c.from_field
|
|
1371
1433
|
|
|
1434
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
1372
1435
|
lines.append(f" # Write to target: {tgt_def.name}")
|
|
1436
|
+
if tgt_def.database_type:
|
|
1437
|
+
lines.append(f" # Database type: {tgt_def.database_type}")
|
|
1438
|
+
target_field_names = [f.name for f in tgt_def.fields] if tgt_def.fields else []
|
|
1439
|
+
if target_field_names:
|
|
1440
|
+
lines.append(f" # Target fields: {', '.join(target_field_names[:10])}{' ...' if len(target_field_names) > 10 else ''}")
|
|
1441
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
1373
1442
|
if col_mapping:
|
|
1443
|
+
lines.append(f" # Column mapping: source -> target")
|
|
1374
1444
|
lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
|
|
1375
1445
|
lines.append(f" df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
|
|
1376
1446
|
target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
|
|
1377
1447
|
if target_cols:
|
|
1448
|
+
lines.append(f" # Select only target columns")
|
|
1378
1449
|
lines.append(f" available_cols = [c for c in {target_cols} if c in df_target_{tgt_safe}.columns]")
|
|
1379
1450
|
lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns and '_update_strategy' not in available_cols:")
|
|
1380
1451
|
lines.append(f" available_cols.append('_update_strategy')")
|
|
@@ -1387,17 +1458,37 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1387
1458
|
tgt_override = (session_overrides or {}).get(tgt_name, {})
|
|
1388
1459
|
tgt_conn = tgt_override.get("connection_name")
|
|
1389
1460
|
|
|
1461
|
+
_FILE_EXTENSIONS = {".csv", ".dat", ".txt", ".xml", ".json", ".parquet", ".xlsx", ".xls", ".tsv", ".avro"}
|
|
1462
|
+
_is_file_target = bool(
|
|
1463
|
+
tgt_override.get("output_file_directory") or tgt_override.get("output_filename")
|
|
1464
|
+
or tgt_def.flatfile
|
|
1465
|
+
or (tgt_def.database_type and tgt_def.database_type == "Flat File")
|
|
1466
|
+
or os.path.splitext(tgt_def.name)[1].lower() in _FILE_EXTENSIONS
|
|
1467
|
+
)
|
|
1468
|
+
_is_db_target = bool(
|
|
1469
|
+
tgt_def.database_type and tgt_def.database_type != "Flat File"
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1390
1472
|
if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
|
|
1391
1473
|
out_dir = tgt_override.get("output_file_directory", ".")
|
|
1392
1474
|
out_file = tgt_override.get("output_filename", tgt_def.name)
|
|
1475
|
+
lines.append(f" # Write to file (session override path)")
|
|
1393
1476
|
lines.append(f" _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
|
|
1394
1477
|
lines.append(f" os.path.join('{out_dir}', '{out_file}'))")
|
|
1395
1478
|
if tgt_def.flatfile:
|
|
1396
1479
|
_emit_flatfile_write(lines, tgt_safe, tgt_def, file_path_override=True)
|
|
1397
1480
|
else:
|
|
1398
1481
|
lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1399
|
-
elif tgt_def.
|
|
1482
|
+
elif tgt_def.flatfile:
|
|
1483
|
+
lines.append(f" # Write to flat file")
|
|
1484
|
+
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
1485
|
+
elif _is_file_target and not _is_db_target:
|
|
1486
|
+
lines.append(f" # Write to file")
|
|
1487
|
+
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
1488
|
+
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1489
|
+
else:
|
|
1400
1490
|
conn_label = tgt_conn or "target"
|
|
1491
|
+
lines.append(f" # Write to database table")
|
|
1401
1492
|
lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns:")
|
|
1402
1493
|
key_cols = [f.name for f in tgt_def.fields if getattr(f, 'keytype', 'NOT A KEY') == 'PRIMARY KEY'] or None
|
|
1403
1494
|
if key_cols:
|
|
@@ -1406,15 +1497,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1406
1497
|
lines.append(f" write_with_update_strategy(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
|
|
1407
1498
|
lines.append(f" else:")
|
|
1408
1499
|
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
else:
|
|
1412
|
-
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
1413
|
-
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1414
|
-
lines.append(f" try:")
|
|
1415
|
-
lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
|
|
1416
|
-
lines.append(f" except Exception:")
|
|
1417
|
-
lines.append(f" logger.info('Target {tgt_def.name}: rows written (count unavailable)')")
|
|
1500
|
+
lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
|
|
1501
|
+
lines.append("")
|
|
1418
1502
|
|
|
1419
1503
|
|
|
1420
1504
|
CAST_MAP = {
|
|
@@ -179,24 +179,41 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, workl
|
|
|
179
179
|
def _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets):
|
|
180
180
|
task_safe = _safe_name(task.name)
|
|
181
181
|
|
|
182
|
-
if task.task_type
|
|
182
|
+
if task.task_type in ("Start Task", "Start"):
|
|
183
183
|
lines.append(f" # Start Task: {task.name}")
|
|
184
184
|
lines.append(f" logger.info('Workflow started')")
|
|
185
185
|
lines.append("")
|
|
186
186
|
return
|
|
187
187
|
|
|
188
188
|
if task.task_type == "Session":
|
|
189
|
-
|
|
189
|
+
session_key = task.task_name or task.name
|
|
190
|
+
mapping_name = session_to_mapping.get(session_key, "")
|
|
190
191
|
run_func = mapping_name_map.get(mapping_name, None)
|
|
191
192
|
|
|
193
|
+
if not run_func:
|
|
194
|
+
best_match = None
|
|
195
|
+
best_len = 0
|
|
196
|
+
session_lower = session_key.lower()
|
|
197
|
+
for mname, rfunc in mapping_name_map.items():
|
|
198
|
+
safe_mname = _safe_name(mname)
|
|
199
|
+
if session_lower.endswith(safe_mname) and len(safe_mname) > best_len:
|
|
200
|
+
best_match = rfunc
|
|
201
|
+
best_len = len(safe_mname)
|
|
202
|
+
if not best_match:
|
|
203
|
+
for mname, rfunc in mapping_name_map.items():
|
|
204
|
+
safe_mname = _safe_name(mname)
|
|
205
|
+
if safe_mname in session_lower and len(safe_mname) > best_len:
|
|
206
|
+
best_match = rfunc
|
|
207
|
+
best_len = len(safe_mname)
|
|
208
|
+
run_func = best_match
|
|
209
|
+
|
|
192
210
|
lines.append(f" # Session: {task.name}")
|
|
193
211
|
lines.append(f" try:")
|
|
194
212
|
lines.append(f" logger.info('Executing session: {task.name}')")
|
|
195
213
|
if run_func:
|
|
196
214
|
lines.append(f" {run_func}(config)")
|
|
197
215
|
else:
|
|
198
|
-
lines.append(f"
|
|
199
|
-
lines.append(f" logger.warning('Session {task.name} has no mapped function')")
|
|
216
|
+
lines.append(f" logger.warning('Session {task.name}: no mapped function found — verify mapping linkage')")
|
|
200
217
|
lines.append(f" except Exception as e:")
|
|
201
218
|
lines.append(f" logger.error(f'Session {task.name} failed: {{e}}')")
|
|
202
219
|
|