informatica-python 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +1 -1
- informatica_python/generators/mapping_gen.py +165 -64
- informatica_python/generators/workflow_gen.py +21 -4
- informatica_python/utils/expression_converter.py +338 -8
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/METADATA +175 -47
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/RECORD +10 -10
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/WHEEL +0 -0
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/entry_points.txt +0 -0
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/licenses/LICENSE +0 -0
- {informatica_python-1.9.2.dist-info → informatica_python-1.9.4.dist-info}/top_level.txt +0 -0
informatica_python/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import List, Dict
|
|
2
3
|
from informatica_python.models import (
|
|
3
4
|
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
@@ -228,7 +229,6 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
228
229
|
lines.append("import logging")
|
|
229
230
|
lines.append("import numpy as np")
|
|
230
231
|
lines.append("import pandas as pd")
|
|
231
|
-
lines.append("from datetime import datetime")
|
|
232
232
|
lines.append("from helper_functions import *")
|
|
233
233
|
lines.append("")
|
|
234
234
|
lines.append("logger = logging.getLogger(__name__)")
|
|
@@ -375,7 +375,40 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
375
375
|
lines.append(f" run_{_safe_name(mapping.name)}(config)")
|
|
376
376
|
lines.append("")
|
|
377
377
|
|
|
378
|
-
|
|
378
|
+
code = "\n".join(lines)
|
|
379
|
+
func_sig = f"def run_{_safe_name(mapping.name)}(config):"
|
|
380
|
+
sig_idx = code.index(func_sig) + len(func_sig)
|
|
381
|
+
docstring_end = code.index('"""', code.index('"""', sig_idx) + 3) + 3
|
|
382
|
+
before_body = code[:docstring_end]
|
|
383
|
+
after_docstring = code[docstring_end:]
|
|
384
|
+
main_sentinel = "\n\nif __name__"
|
|
385
|
+
body_end_idx = after_docstring.index(main_sentinel)
|
|
386
|
+
body = after_docstring[:body_end_idx]
|
|
387
|
+
rest = after_docstring[body_end_idx:]
|
|
388
|
+
body_lines = body.split("\n")
|
|
389
|
+
while body_lines and body_lines[0].strip() == "":
|
|
390
|
+
body_lines.pop(0)
|
|
391
|
+
while body_lines and body_lines[-1].strip() == "":
|
|
392
|
+
body_lines.pop()
|
|
393
|
+
wrapped = []
|
|
394
|
+
wrapped.append("")
|
|
395
|
+
wrapped.append(" try:")
|
|
396
|
+
prev_blank = False
|
|
397
|
+
for bl in body_lines:
|
|
398
|
+
if bl.strip() == "":
|
|
399
|
+
if not prev_blank:
|
|
400
|
+
wrapped.append("")
|
|
401
|
+
prev_blank = True
|
|
402
|
+
else:
|
|
403
|
+
wrapped.append(" " + bl)
|
|
404
|
+
prev_blank = False
|
|
405
|
+
wrapped.append("")
|
|
406
|
+
wrapped.append(" except Exception as _exc:")
|
|
407
|
+
wrapped.append(f" logger.error(f'Mapping {mapping.name} failed: {{_exc}}')")
|
|
408
|
+
wrapped.append(" raise")
|
|
409
|
+
wrapped.append("")
|
|
410
|
+
|
|
411
|
+
return before_body + "\n".join(wrapped) + rest
|
|
379
412
|
|
|
380
413
|
|
|
381
414
|
def _safe_name(name):
|
|
@@ -386,6 +419,22 @@ def _safe_name(name):
|
|
|
386
419
|
return safe.lower()
|
|
387
420
|
|
|
388
421
|
|
|
422
|
+
def _emit_sql_with_params(lines, sql_var_name, sql_text, indent=" "):
|
|
423
|
+
import re
|
|
424
|
+
params = re.findall(r'\$\$(\w+)', sql_text)
|
|
425
|
+
lines.append(f"{indent}{sql_var_name} = '''")
|
|
426
|
+
for sql_line in sql_text.strip().split("\n"):
|
|
427
|
+
lines.append(f"{indent}{sql_line}")
|
|
428
|
+
lines.append(f"{indent}'''")
|
|
429
|
+
if params:
|
|
430
|
+
seen = set()
|
|
431
|
+
for p in params:
|
|
432
|
+
if p in seen:
|
|
433
|
+
continue
|
|
434
|
+
seen.add(p)
|
|
435
|
+
lines.append(f"{indent}{sql_var_name} = {sql_var_name}.replace('$${p}', str(get_param(config, '{p}')))")
|
|
436
|
+
|
|
437
|
+
|
|
389
438
|
def _flatfile_config_dict(ff):
|
|
390
439
|
cfg = {}
|
|
391
440
|
if not ff:
|
|
@@ -504,7 +553,7 @@ def _emit_flatfile_write(lines, var_name, tgt_def, indent=" ", file_path_over
|
|
|
504
553
|
def _build_source_map(mapping, folder):
|
|
505
554
|
source_map = {}
|
|
506
555
|
for inst in mapping.instances:
|
|
507
|
-
if inst.type
|
|
556
|
+
if inst.type.upper() in ("SOURCE DEFINITION", "SOURCE"):
|
|
508
557
|
tx_name = inst.transformation_name or inst.name
|
|
509
558
|
for src in folder.sources:
|
|
510
559
|
if src.name == tx_name:
|
|
@@ -518,7 +567,7 @@ def _build_source_map(mapping, folder):
|
|
|
518
567
|
def _build_target_map(mapping, folder):
|
|
519
568
|
target_map = {}
|
|
520
569
|
for inst in mapping.instances:
|
|
521
|
-
if inst.type
|
|
570
|
+
if inst.type.upper() in ("TARGET DEFINITION", "TARGET"):
|
|
522
571
|
tx_name = inst.transformation_name or inst.name
|
|
523
572
|
for tgt in folder.targets:
|
|
524
573
|
if tgt.name == tx_name:
|
|
@@ -594,7 +643,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
594
643
|
if not connected_sources and source_map:
|
|
595
644
|
connected_sources.add(next(iter(source_map)))
|
|
596
645
|
|
|
646
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
597
647
|
lines.append(f" # Source Qualifier: {sq.name}")
|
|
648
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
598
649
|
|
|
599
650
|
if pre_sql:
|
|
600
651
|
lines.append(f" # Pre-SQL")
|
|
@@ -606,10 +657,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
606
657
|
if not connected_sources:
|
|
607
658
|
sq_src_name = sq.name[3:] if sq.name.upper().startswith("SQ_") else sq.name
|
|
608
659
|
if sql_override:
|
|
609
|
-
lines
|
|
610
|
-
for sql_line in sql_override.strip().split("\n"):
|
|
611
|
-
lines.append(f" {sql_line}")
|
|
612
|
-
lines.append(f" '''")
|
|
660
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
613
661
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, 'default')")
|
|
614
662
|
else:
|
|
615
663
|
lines.append(f" df_{sq_safe} = read_file(config.get('sources', {{}}).get('{sq_src_name}', {{}}).get('file_path', '{sq_src_name}'),")
|
|
@@ -620,10 +668,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
620
668
|
sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
|
|
621
669
|
conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
622
670
|
|
|
623
|
-
lines
|
|
624
|
-
for sql_line in sql_override.strip().split("\n"):
|
|
625
|
-
lines.append(f" {sql_line}")
|
|
626
|
-
lines.append(f" '''")
|
|
671
|
+
_emit_sql_with_params(lines, f"sql_{sq_safe}", sql_override)
|
|
627
672
|
lines.append(f" df_{sq_safe} = read_from_db(config, sql_{sq_safe}, '{conn_name}')")
|
|
628
673
|
elif len(connected_sources) == 1:
|
|
629
674
|
src_name = next(iter(connected_sources))
|
|
@@ -656,10 +701,7 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
656
701
|
lines.append(f" df_{sq_safe} = df_{_safe_name(next(iter(connected_sources)))}")
|
|
657
702
|
|
|
658
703
|
source_dfs[sq.name] = f"df_{sq_safe}"
|
|
659
|
-
lines.append(f"
|
|
660
|
-
lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
|
|
661
|
-
lines.append(f" except Exception:")
|
|
662
|
-
lines.append(f" logger.info('Source {sq.name}: rows read (count unavailable)')")
|
|
704
|
+
lines.append(f" logger.info(f'Source {sq.name}: {{len(df_{sq_safe})}} rows read')")
|
|
663
705
|
|
|
664
706
|
if post_sql:
|
|
665
707
|
lines.append(f" # Post-SQL")
|
|
@@ -699,10 +741,10 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
699
741
|
lines.append(f" # Input fields: {', '.join(in_fields[:10])}{' ...' if len(in_fields) > 10 else ''}")
|
|
700
742
|
lines.append(f" # Output fields: {', '.join(out_fields[:10])}{' ...' if len(out_fields) > 10 else ''}")
|
|
701
743
|
lines.append(f" # -------------------------------------------------------------------")
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
744
|
+
if input_df == "df_input":
|
|
745
|
+
lines.append(f" _input_rows_{tx_safe} = -1")
|
|
746
|
+
else:
|
|
747
|
+
lines.append(f" _input_rows_{tx_safe} = len({input_df})")
|
|
706
748
|
|
|
707
749
|
if tx_type == "expression":
|
|
708
750
|
_gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
@@ -715,7 +757,7 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
715
757
|
elif tx_type in ("joiner",):
|
|
716
758
|
_gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_dfs, connector_graph, data_lib)
|
|
717
759
|
elif tx_type in ("lookup procedure", "lookup"):
|
|
718
|
-
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
|
|
760
|
+
_gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph, data_lib)
|
|
719
761
|
elif tx_type == "router":
|
|
720
762
|
_gen_router_transform(lines, tx, tx_safe, input_df, source_dfs)
|
|
721
763
|
elif tx_type in ("union",):
|
|
@@ -742,28 +784,26 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
|
|
|
742
784
|
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
743
785
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
744
786
|
|
|
745
|
-
lines.append(f"
|
|
746
|
-
lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe})")
|
|
747
|
-
lines.append(f" except Exception:")
|
|
748
|
-
lines.append(f" _output_rows_{tx_safe} = -1")
|
|
787
|
+
lines.append(f" _output_rows_{tx_safe} = len(df_{tx_safe})")
|
|
749
788
|
lines.append(f" logger.info(f'{tx.name} ({tx.type}): {{_input_rows_{tx_safe}}} input rows -> {{_output_rows_{tx_safe}}} output rows')")
|
|
750
789
|
lines.append("")
|
|
751
790
|
|
|
752
791
|
|
|
753
792
|
def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
754
|
-
|
|
755
|
-
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
756
|
-
has_expressions = False
|
|
793
|
+
active_fields = []
|
|
757
794
|
for fld in tx.fields:
|
|
758
|
-
if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
|
|
759
|
-
|
|
795
|
+
if fld.expression and fld.expression.strip() and fld.expression.strip().lower() != fld.name.lower():
|
|
796
|
+
active_fields.append(fld)
|
|
797
|
+
|
|
798
|
+
if active_fields:
|
|
799
|
+
copy_expr = lib_copy(data_lib, input_df)
|
|
800
|
+
lines.append(f" df_{tx_safe} = {copy_expr}")
|
|
801
|
+
for fld in active_fields:
|
|
760
802
|
expr_vec = convert_expression_vectorized(fld.expression, f"df_{tx_safe}")
|
|
761
803
|
lines.append(f" # {fld.name} = {fld.expression}")
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
766
|
-
if not has_expressions:
|
|
804
|
+
lines.append(f" df_{tx_safe}['{fld.name}'] = {expr_vec}")
|
|
805
|
+
else:
|
|
806
|
+
lines.append(f" df_{tx_safe} = {input_df}")
|
|
767
807
|
lines.append(f" # Pass-through expression (no transformations)")
|
|
768
808
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
769
809
|
|
|
@@ -842,7 +882,11 @@ def _gen_sorter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
842
882
|
sort_dirs = []
|
|
843
883
|
for fld in tx.fields:
|
|
844
884
|
sort_keys.append(fld.name)
|
|
845
|
-
|
|
885
|
+
direction = 'ASCENDING'
|
|
886
|
+
for fa in getattr(fld, 'field_attributes', []):
|
|
887
|
+
if isinstance(fa, dict) and fa.get('name', '').upper() == 'SORTDIRECTION':
|
|
888
|
+
direction = fa.get('value', 'ASCENDING') or 'ASCENDING'
|
|
889
|
+
sort_dirs.append(direction.upper() != 'DESCENDING')
|
|
846
890
|
if sort_keys:
|
|
847
891
|
sort_expr = lib_sort(data_lib, input_df, sort_keys, sort_dirs)
|
|
848
892
|
lines.append(f" df_{tx_safe} = {sort_expr}")
|
|
@@ -881,13 +925,23 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
881
925
|
master_src = None
|
|
882
926
|
detail_src = None
|
|
883
927
|
input_conns = connector_graph.get("to", {}).get(tx.name, []) if connector_graph else []
|
|
928
|
+
|
|
929
|
+
port_to_col = {}
|
|
930
|
+
master_fields_lower = {f.lower() for f in master_fields}
|
|
931
|
+
detail_fields_lower = {f.lower() for f in detail_fields}
|
|
884
932
|
for conn in input_conns:
|
|
885
933
|
to_field = conn.to_field
|
|
886
|
-
|
|
934
|
+
port_to_col[to_field] = conn.from_field
|
|
935
|
+
port_to_col[to_field.lower()] = conn.from_field
|
|
936
|
+
if to_field in master_fields or to_field.lower() in master_fields_lower:
|
|
887
937
|
master_src = conn.from_instance
|
|
888
|
-
elif to_field in detail_fields:
|
|
938
|
+
elif to_field in detail_fields or to_field.lower() in detail_fields_lower:
|
|
889
939
|
detail_src = conn.from_instance
|
|
890
940
|
|
|
941
|
+
if left_keys and right_keys and port_to_col:
|
|
942
|
+
left_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in left_keys]
|
|
943
|
+
right_keys = [port_to_col.get(k, port_to_col.get(k.lower(), k)) for k in right_keys]
|
|
944
|
+
|
|
891
945
|
src_list = list(input_sources)
|
|
892
946
|
if not master_src and not detail_src and len(src_list) >= 2:
|
|
893
947
|
master_src = src_list[0]
|
|
@@ -928,7 +982,7 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
928
982
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
929
983
|
|
|
930
984
|
|
|
931
|
-
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
|
|
985
|
+
def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, connector_graph=None, data_lib="pandas"):
|
|
932
986
|
lookup_table = ""
|
|
933
987
|
lookup_sql = ""
|
|
934
988
|
lookup_condition = ""
|
|
@@ -958,21 +1012,26 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
958
1012
|
|
|
959
1013
|
all_output_fields = return_fields + lookup_output_fields
|
|
960
1014
|
|
|
1015
|
+
port_to_col = {}
|
|
1016
|
+
if connector_graph and tx.name in connector_graph.get("to", {}):
|
|
1017
|
+
for conn in connector_graph["to"][tx.name]:
|
|
1018
|
+
port_to_col[conn.to_field.lower()] = conn.from_field
|
|
1019
|
+
|
|
961
1020
|
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
962
1021
|
if lookup_sql:
|
|
963
|
-
lines
|
|
964
|
-
for sql_line in lookup_sql.strip().split("\n"):
|
|
965
|
-
lines.append(f" {sql_line}")
|
|
966
|
-
lines.append(f" '''")
|
|
1022
|
+
_emit_sql_with_params(lines, f"lkp_sql_{tx_safe}", lookup_sql)
|
|
967
1023
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, lkp_sql_{tx_safe}, 'default')")
|
|
968
1024
|
elif lookup_table:
|
|
969
1025
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
970
1026
|
else:
|
|
971
1027
|
empty_expr = lib_empty_df(data_lib)
|
|
972
|
-
lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
|
|
1028
|
+
lines.append(f" df_lkp_{tx_safe} = {empty_expr} # WARNING: no lookup table/SQL override found")
|
|
973
1029
|
|
|
974
1030
|
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
975
1031
|
|
|
1032
|
+
if input_keys and port_to_col:
|
|
1033
|
+
input_keys = [port_to_col.get(k.lower(), k) for k in input_keys]
|
|
1034
|
+
|
|
976
1035
|
if input_keys and lookup_keys:
|
|
977
1036
|
lines.append(f" # Lookup condition: {lookup_condition}")
|
|
978
1037
|
|
|
@@ -998,7 +1057,11 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
|
|
|
998
1057
|
lines.append(f" if _lkp_drop:")
|
|
999
1058
|
lines.append(f" df_{tx_safe} = df_{tx_safe}.drop(columns=_lkp_drop)")
|
|
1000
1059
|
|
|
1060
|
+
seen_output_cols = set()
|
|
1001
1061
|
for rf in all_output_fields:
|
|
1062
|
+
if rf.name in seen_output_cols:
|
|
1063
|
+
continue
|
|
1064
|
+
seen_output_cols.add(rf.name)
|
|
1002
1065
|
lines.append(f" if '{rf.name}' not in df_{tx_safe}.columns:")
|
|
1003
1066
|
lines.append(f" df_{tx_safe}['{rf.name}'] = None")
|
|
1004
1067
|
if rf.default_value:
|
|
@@ -1023,12 +1086,23 @@ def _gen_router_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1023
1086
|
if "Group Filter Condition" in attr.name:
|
|
1024
1087
|
group_conditions[attr.name] = attr.value
|
|
1025
1088
|
|
|
1089
|
+
remaining_mask_parts = []
|
|
1026
1090
|
if group_conditions:
|
|
1027
1091
|
for i, (gname, cond) in enumerate(group_conditions.items()):
|
|
1028
|
-
|
|
1029
|
-
|
|
1092
|
+
if cond and cond.strip():
|
|
1093
|
+
expr_py = convert_filter_vectorized(cond, input_df)
|
|
1094
|
+
else:
|
|
1095
|
+
expr_py = f"pd.Series(True, index={input_df}.index)"
|
|
1096
|
+
mask_var = f"_router_mask_{tx_safe}_{i}"
|
|
1097
|
+
lines.append(f" {mask_var} = {expr_py} # {gname}")
|
|
1098
|
+
lines.append(f" df_{tx_safe}_group{i} = {input_df}[{mask_var}].copy()")
|
|
1030
1099
|
source_dfs[f"{tx.name}_group{i}"] = f"df_{tx_safe}_group{i}"
|
|
1031
|
-
|
|
1100
|
+
remaining_mask_parts.append(f"~{mask_var}")
|
|
1101
|
+
if remaining_mask_parts:
|
|
1102
|
+
lines.append(f" _router_default_mask = {' & '.join(remaining_mask_parts)}")
|
|
1103
|
+
lines.append(f" df_{tx_safe} = {input_df}[_router_default_mask].copy() # Default group")
|
|
1104
|
+
else:
|
|
1105
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy() # Default group")
|
|
1032
1106
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1033
1107
|
|
|
1034
1108
|
|
|
@@ -1073,14 +1147,19 @@ def _gen_update_strategy(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1073
1147
|
for dd_const, label in dd_map.items():
|
|
1074
1148
|
expr = expr.replace(dd_const, f"'{label}'")
|
|
1075
1149
|
try:
|
|
1076
|
-
|
|
1150
|
+
expr_vec = convert_expression_vectorized(expr, f"df_{tx_safe}")
|
|
1077
1151
|
lines.append(f" # Original expression: {strategy_expr}")
|
|
1078
|
-
lines.append(f"
|
|
1079
|
-
lines.append(f" return {converted}")
|
|
1080
|
-
lines.append(f" df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
|
|
1152
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = {expr_vec}")
|
|
1081
1153
|
except Exception:
|
|
1082
|
-
|
|
1083
|
-
|
|
1154
|
+
try:
|
|
1155
|
+
converted = convert_expression(expr)
|
|
1156
|
+
lines.append(f" # Original expression: {strategy_expr}")
|
|
1157
|
+
lines.append(f" def _resolve_strategy(row):")
|
|
1158
|
+
lines.append(f" return {converted}")
|
|
1159
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = df_{tx_safe}.apply(_resolve_strategy, axis=1)")
|
|
1160
|
+
except Exception:
|
|
1161
|
+
lines.append(f" # Could not parse strategy expression: {strategy_expr}")
|
|
1162
|
+
lines.append(f" df_{tx_safe}['_update_strategy'] = 'INSERT'")
|
|
1084
1163
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1085
1164
|
|
|
1086
1165
|
|
|
@@ -1343,7 +1422,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1343
1422
|
sql_query = convert_sql_expression(attr.value)
|
|
1344
1423
|
lines.append(f" # SQL Transformation: {tx.name}")
|
|
1345
1424
|
if sql_query:
|
|
1346
|
-
lines
|
|
1425
|
+
_emit_sql_with_params(lines, f"sql_{tx_safe}", sql_query)
|
|
1347
1426
|
lines.append(f" df_{tx_safe} = read_from_db(config, sql_{tx_safe}, 'default')")
|
|
1348
1427
|
else:
|
|
1349
1428
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
@@ -1371,12 +1450,21 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1371
1450
|
for c in to_conns:
|
|
1372
1451
|
col_mapping[c.to_field] = c.from_field
|
|
1373
1452
|
|
|
1453
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
1374
1454
|
lines.append(f" # Write to target: {tgt_def.name}")
|
|
1455
|
+
if tgt_def.database_type:
|
|
1456
|
+
lines.append(f" # Database type: {tgt_def.database_type}")
|
|
1457
|
+
target_field_names = [f.name for f in tgt_def.fields] if tgt_def.fields else []
|
|
1458
|
+
if target_field_names:
|
|
1459
|
+
lines.append(f" # Target fields: {', '.join(target_field_names[:10])}{' ...' if len(target_field_names) > 10 else ''}")
|
|
1460
|
+
lines.append(f" # -------------------------------------------------------------------")
|
|
1375
1461
|
if col_mapping:
|
|
1462
|
+
lines.append(f" # Column mapping: source -> target")
|
|
1376
1463
|
lines.append(f" target_columns_{tgt_safe} = {col_mapping}")
|
|
1377
1464
|
lines.append(f" df_target_{tgt_safe} = {input_df}.rename(columns={{v: k for k, v in target_columns_{tgt_safe}.items()}})")
|
|
1378
1465
|
target_cols = [f.name for f in tgt_def.fields] if tgt_def.fields else None
|
|
1379
1466
|
if target_cols:
|
|
1467
|
+
lines.append(f" # Select only target columns")
|
|
1380
1468
|
lines.append(f" available_cols = [c for c in {target_cols} if c in df_target_{tgt_safe}.columns]")
|
|
1381
1469
|
lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns and '_update_strategy' not in available_cols:")
|
|
1382
1470
|
lines.append(f" available_cols.append('_update_strategy')")
|
|
@@ -1389,17 +1477,37 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1389
1477
|
tgt_override = (session_overrides or {}).get(tgt_name, {})
|
|
1390
1478
|
tgt_conn = tgt_override.get("connection_name")
|
|
1391
1479
|
|
|
1480
|
+
_FILE_EXTENSIONS = {".csv", ".dat", ".txt", ".xml", ".json", ".parquet", ".xlsx", ".xls", ".tsv", ".avro"}
|
|
1481
|
+
_is_file_target = bool(
|
|
1482
|
+
tgt_override.get("output_file_directory") or tgt_override.get("output_filename")
|
|
1483
|
+
or tgt_def.flatfile
|
|
1484
|
+
or (tgt_def.database_type and tgt_def.database_type == "Flat File")
|
|
1485
|
+
or os.path.splitext(tgt_def.name)[1].lower() in _FILE_EXTENSIONS
|
|
1486
|
+
)
|
|
1487
|
+
_is_db_target = bool(
|
|
1488
|
+
tgt_def.database_type and tgt_def.database_type != "Flat File"
|
|
1489
|
+
)
|
|
1490
|
+
|
|
1392
1491
|
if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
|
|
1393
1492
|
out_dir = tgt_override.get("output_file_directory", ".")
|
|
1394
1493
|
out_file = tgt_override.get("output_filename", tgt_def.name)
|
|
1494
|
+
lines.append(f" # Write to file (session override path)")
|
|
1395
1495
|
lines.append(f" _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
|
|
1396
1496
|
lines.append(f" os.path.join('{out_dir}', '{out_file}'))")
|
|
1397
1497
|
if tgt_def.flatfile:
|
|
1398
1498
|
_emit_flatfile_write(lines, tgt_safe, tgt_def, file_path_override=True)
|
|
1399
1499
|
else:
|
|
1400
1500
|
lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1401
|
-
elif tgt_def.
|
|
1501
|
+
elif tgt_def.flatfile:
|
|
1502
|
+
lines.append(f" # Write to flat file")
|
|
1503
|
+
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
1504
|
+
elif _is_file_target and not _is_db_target:
|
|
1505
|
+
lines.append(f" # Write to file")
|
|
1506
|
+
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
1507
|
+
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1508
|
+
else:
|
|
1402
1509
|
conn_label = tgt_conn or "target"
|
|
1510
|
+
lines.append(f" # Write to database table")
|
|
1403
1511
|
lines.append(f" if '_update_strategy' in df_target_{tgt_safe}.columns:")
|
|
1404
1512
|
key_cols = [f.name for f in tgt_def.fields if getattr(f, 'keytype', 'NOT A KEY') == 'PRIMARY KEY'] or None
|
|
1405
1513
|
if key_cols:
|
|
@@ -1408,15 +1516,8 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1408
1516
|
lines.append(f" write_with_update_strategy(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
|
|
1409
1517
|
lines.append(f" else:")
|
|
1410
1518
|
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
else:
|
|
1414
|
-
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
1415
|
-
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1416
|
-
lines.append(f" try:")
|
|
1417
|
-
lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
|
|
1418
|
-
lines.append(f" except Exception:")
|
|
1419
|
-
lines.append(f" logger.info('Target {tgt_def.name}: rows written (count unavailable)')")
|
|
1519
|
+
lines.append(f" logger.info(f'Target {tgt_def.name}: {{len(df_target_{tgt_safe})}} rows written')")
|
|
1520
|
+
lines.append("")
|
|
1420
1521
|
|
|
1421
1522
|
|
|
1422
1523
|
CAST_MAP = {
|
|
@@ -179,24 +179,41 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef, workl
|
|
|
179
179
|
def _emit_task_code(lines, task, mapping_name_map, session_to_mapping, wf, worklets):
|
|
180
180
|
task_safe = _safe_name(task.name)
|
|
181
181
|
|
|
182
|
-
if task.task_type
|
|
182
|
+
if task.task_type in ("Start Task", "Start"):
|
|
183
183
|
lines.append(f" # Start Task: {task.name}")
|
|
184
184
|
lines.append(f" logger.info('Workflow started')")
|
|
185
185
|
lines.append("")
|
|
186
186
|
return
|
|
187
187
|
|
|
188
188
|
if task.task_type == "Session":
|
|
189
|
-
|
|
189
|
+
session_key = task.task_name or task.name
|
|
190
|
+
mapping_name = session_to_mapping.get(session_key, "")
|
|
190
191
|
run_func = mapping_name_map.get(mapping_name, None)
|
|
191
192
|
|
|
193
|
+
if not run_func:
|
|
194
|
+
best_match = None
|
|
195
|
+
best_len = 0
|
|
196
|
+
session_lower = session_key.lower()
|
|
197
|
+
for mname, rfunc in mapping_name_map.items():
|
|
198
|
+
safe_mname = _safe_name(mname)
|
|
199
|
+
if session_lower.endswith(safe_mname) and len(safe_mname) > best_len:
|
|
200
|
+
best_match = rfunc
|
|
201
|
+
best_len = len(safe_mname)
|
|
202
|
+
if not best_match:
|
|
203
|
+
for mname, rfunc in mapping_name_map.items():
|
|
204
|
+
safe_mname = _safe_name(mname)
|
|
205
|
+
if safe_mname in session_lower and len(safe_mname) > best_len:
|
|
206
|
+
best_match = rfunc
|
|
207
|
+
best_len = len(safe_mname)
|
|
208
|
+
run_func = best_match
|
|
209
|
+
|
|
192
210
|
lines.append(f" # Session: {task.name}")
|
|
193
211
|
lines.append(f" try:")
|
|
194
212
|
lines.append(f" logger.info('Executing session: {task.name}')")
|
|
195
213
|
if run_func:
|
|
196
214
|
lines.append(f" {run_func}(config)")
|
|
197
215
|
else:
|
|
198
|
-
lines.append(f"
|
|
199
|
-
lines.append(f" logger.warning('Session {task.name} has no mapped function')")
|
|
216
|
+
lines.append(f" logger.warning('Session {task.name}: no mapped function found — verify mapping linkage')")
|
|
200
217
|
lines.append(f" except Exception as e:")
|
|
201
218
|
lines.append(f" logger.error(f'Session {task.name} failed: {{e}}')")
|
|
202
219
|
|