informatica-python 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {informatica_python-1.1.1 → informatica_python-1.2.0}/PKG-INFO +1 -1
  2. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/helper_gen.py +396 -0
  3. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/mapping_gen.py +137 -22
  4. informatica_python-1.2.0/informatica_python/utils/expression_converter.py +259 -0
  5. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/PKG-INFO +1 -1
  6. {informatica_python-1.1.1 → informatica_python-1.2.0}/pyproject.toml +1 -1
  7. informatica_python-1.2.0/tests/test_converter.py +551 -0
  8. informatica_python-1.1.1/informatica_python/utils/expression_converter.py +0 -128
  9. informatica_python-1.1.1/tests/test_converter.py +0 -260
  10. {informatica_python-1.1.1 → informatica_python-1.2.0}/README.md +0 -0
  11. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/__init__.py +0 -0
  12. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/cli.py +0 -0
  13. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/converter.py +0 -0
  14. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/__init__.py +0 -0
  15. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/config_gen.py +0 -0
  16. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/error_log_gen.py +0 -0
  17. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/sql_gen.py +0 -0
  18. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/workflow_gen.py +0 -0
  19. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/models.py +0 -0
  20. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/parser.py +0 -0
  21. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/utils/__init__.py +0 -0
  22. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/utils/datatype_map.py +0 -0
  23. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  24. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  25. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/entry_points.txt +0 -0
  26. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/requires.txt +0 -0
  27. {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/top_level.txt +0 -0
  28. {informatica_python-1.1.1 → informatica_python-1.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -631,6 +631,402 @@ def _add_expression_helpers(lines):
631
631
  lines.append(" return date_val")
632
632
  lines.append("")
633
633
  lines.append("")
634
+ lines.append("def initcap(value):")
635
+ lines.append(' """Informatica INITCAP equivalent."""')
636
+ lines.append(" return str(value).title() if value is not None else None")
637
+ lines.append("")
638
+ lines.append("")
639
+ lines.append("def reverse_str(value):")
640
+ lines.append(' """Informatica REVERSE equivalent."""')
641
+ lines.append(" return str(value)[::-1] if value is not None else None")
642
+ lines.append("")
643
+ lines.append("")
644
+ lines.append("def chr_func(code):")
645
+ lines.append(' """Informatica CHR equivalent."""')
646
+ lines.append(" return chr(int(code)) if code is not None else None")
647
+ lines.append("")
648
+ lines.append("")
649
+ lines.append("def ascii_func(value):")
650
+ lines.append(' """Informatica ASCII equivalent."""')
651
+ lines.append(" if value is None or str(value) == '':")
652
+ lines.append(" return None")
653
+ lines.append(" return ord(str(value)[0])")
654
+ lines.append("")
655
+ lines.append("")
656
+ lines.append("def left_str(value, n):")
657
+ lines.append(' """Return leftmost n characters."""')
658
+ lines.append(" return str(value)[:int(n)] if value is not None else None")
659
+ lines.append("")
660
+ lines.append("")
661
+ lines.append("def right_str(value, n):")
662
+ lines.append(' """Return rightmost n characters."""')
663
+ lines.append(" return str(value)[-int(n):] if value is not None else None")
664
+ lines.append("")
665
+ lines.append("")
666
+ lines.append("def trim_func(value):")
667
+ lines.append(' """Informatica TRIM equivalent."""')
668
+ lines.append(" return str(value).strip() if value is not None else None")
669
+ lines.append("")
670
+ lines.append("")
671
+ lines.append("def indexof(value, search, start=1):")
672
+ lines.append(' """Informatica INDEXOF equivalent (1-based)."""')
673
+ lines.append(" if value is None or search is None:")
674
+ lines.append(" return 0")
675
+ lines.append(" idx = str(value).find(str(search), max(start - 1, 0))")
676
+ lines.append(" return idx + 1 if idx >= 0 else 0")
677
+ lines.append("")
678
+ lines.append("")
679
+ lines.append("def metaphone_func(value):")
680
+ lines.append(' """Informatica METAPHONE equivalent (simplified)."""')
681
+ lines.append(" if value is None:")
682
+ lines.append(" return None")
683
+ lines.append(" try:")
684
+ lines.append(" import jellyfish")
685
+ lines.append(" return jellyfish.metaphone(str(value))")
686
+ lines.append(" except ImportError:")
687
+ lines.append(" return str(value).upper()[:4]")
688
+ lines.append("")
689
+ lines.append("")
690
+ lines.append("def soundex_func(value):")
691
+ lines.append(' """Informatica SOUNDEX equivalent (simplified)."""')
692
+ lines.append(" if value is None:")
693
+ lines.append(" return None")
694
+ lines.append(" try:")
695
+ lines.append(" import jellyfish")
696
+ lines.append(" return jellyfish.soundex(str(value))")
697
+ lines.append(" except ImportError:")
698
+ lines.append(" s = str(value).upper()")
699
+ lines.append(" if not s:")
700
+ lines.append(" return '0000'")
701
+ lines.append(" codes = {'B':'1','F':'1','P':'1','V':'1','C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2','D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}")
702
+ lines.append(" result = s[0]")
703
+ lines.append(" for ch in s[1:]:")
704
+ lines.append(" code = codes.get(ch, '0')")
705
+ lines.append(" if code != '0' and code != result[-1]:")
706
+ lines.append(" result += code")
707
+ lines.append(" return (result + '0000')[:4]")
708
+ lines.append("")
709
+ lines.append("")
710
+ lines.append("def compress_func(value):")
711
+ lines.append(' """Informatica COMPRESS equivalent - removes spaces."""')
712
+ lines.append(" return str(value).replace(' ', '') if value is not None else None")
713
+ lines.append("")
714
+ lines.append("")
715
+ lines.append("def decompress_func(value):")
716
+ lines.append(' """Informatica DECOMPRESS equivalent."""')
717
+ lines.append(" return value")
718
+ lines.append("")
719
+ lines.append("")
720
+ lines.append("def to_timestamp_func(value, fmt=None):")
721
+ lines.append(' """Informatica TO_TIMESTAMP equivalent."""')
722
+ lines.append(" return to_date(value, fmt)")
723
+ lines.append("")
724
+ lines.append("")
725
+ lines.append("def cast_func(value, datatype):")
726
+ lines.append(' """Informatica CAST equivalent."""')
727
+ lines.append(" if value is None:")
728
+ lines.append(" return None")
729
+ lines.append(" dt = str(datatype).upper()")
730
+ lines.append(" if 'INT' in dt:")
731
+ lines.append(" return int(float(str(value)))")
732
+ lines.append(" elif 'FLOAT' in dt or 'DOUBLE' in dt or 'DECIMAL' in dt or 'NUMBER' in dt:")
733
+ lines.append(" return float(str(value))")
734
+ lines.append(" elif 'CHAR' in dt or 'STRING' in dt or 'VARCHAR' in dt:")
735
+ lines.append(" return str(value)")
736
+ lines.append(" elif 'DATE' in dt or 'TIMESTAMP' in dt:")
737
+ lines.append(" return to_date(value)")
738
+ lines.append(" return value")
739
+ lines.append("")
740
+ lines.append("")
741
+ lines.append("def set_date_part(part, date_val, value):")
742
+ lines.append(' """Informatica SET_DATE_PART equivalent."""')
743
+ lines.append(" if date_val is None:")
744
+ lines.append(" return None")
745
+ lines.append(" if isinstance(date_val, str):")
746
+ lines.append(" date_val = datetime.fromisoformat(date_val)")
747
+ lines.append(" p = part.upper()")
748
+ lines.append(" if p in ('YYYY', 'YY', 'YEAR'):")
749
+ lines.append(" return date_val.replace(year=int(value))")
750
+ lines.append(" elif p in ('MM', 'MON', 'MONTH'):")
751
+ lines.append(" return date_val.replace(month=int(value))")
752
+ lines.append(" elif p in ('DD', 'DAY'):")
753
+ lines.append(" return date_val.replace(day=int(value))")
754
+ lines.append(" elif p in ('HH', 'HH24', 'HOUR'):")
755
+ lines.append(" return date_val.replace(hour=int(value))")
756
+ lines.append(" elif p in ('MI', 'MINUTE'):")
757
+ lines.append(" return date_val.replace(minute=int(value))")
758
+ lines.append(" elif p in ('SS', 'SECOND'):")
759
+ lines.append(" return date_val.replace(second=int(value))")
760
+ lines.append(" return date_val")
761
+ lines.append("")
762
+ lines.append("")
763
+ lines.append("def date_diff(date1, date2, part='DD'):")
764
+ lines.append(' """Informatica DATE_DIFF equivalent."""')
765
+ lines.append(" if date1 is None or date2 is None:")
766
+ lines.append(" return None")
767
+ lines.append(" if isinstance(date1, str):")
768
+ lines.append(" date1 = datetime.fromisoformat(date1)")
769
+ lines.append(" if isinstance(date2, str):")
770
+ lines.append(" date2 = datetime.fromisoformat(date2)")
771
+ lines.append(" delta = date1 - date2")
772
+ lines.append(" p = part.upper()")
773
+ lines.append(" if p in ('DD', 'DAY', 'D'):")
774
+ lines.append(" return delta.days")
775
+ lines.append(" elif p in ('HH', 'HOUR'):")
776
+ lines.append(" return int(delta.total_seconds() / 3600)")
777
+ lines.append(" elif p in ('MI', 'MINUTE'):")
778
+ lines.append(" return int(delta.total_seconds() / 60)")
779
+ lines.append(" elif p in ('SS', 'SECOND'):")
780
+ lines.append(" return int(delta.total_seconds())")
781
+ lines.append(" elif p in ('MM', 'MONTH'):")
782
+ lines.append(" return (date1.year - date2.year) * 12 + (date1.month - date2.month)")
783
+ lines.append(" elif p in ('YYYY', 'YEAR'):")
784
+ lines.append(" return date1.year - date2.year")
785
+ lines.append(" return delta.days")
786
+ lines.append("")
787
+ lines.append("")
788
+ lines.append("def date_compare(date1, date2):")
789
+ lines.append(' """Informatica DATE_COMPARE equivalent. Returns -1, 0, or 1."""')
790
+ lines.append(" if date1 is None and date2 is None:")
791
+ lines.append(" return 0")
792
+ lines.append(" if date1 is None:")
793
+ lines.append(" return -1")
794
+ lines.append(" if date2 is None:")
795
+ lines.append(" return 1")
796
+ lines.append(" if isinstance(date1, str):")
797
+ lines.append(" date1 = datetime.fromisoformat(date1)")
798
+ lines.append(" if isinstance(date2, str):")
799
+ lines.append(" date2 = datetime.fromisoformat(date2)")
800
+ lines.append(" if date1 < date2:")
801
+ lines.append(" return -1")
802
+ lines.append(" elif date1 > date2:")
803
+ lines.append(" return 1")
804
+ lines.append(" return 0")
805
+ lines.append("")
806
+ lines.append("")
807
+ lines.append("def last_day(date_val):")
808
+ lines.append(' """Informatica LAST_DAY equivalent."""')
809
+ lines.append(" if date_val is None:")
810
+ lines.append(" return None")
811
+ lines.append(" if isinstance(date_val, str):")
812
+ lines.append(" date_val = datetime.fromisoformat(date_val)")
813
+ lines.append(" import calendar")
814
+ lines.append(" last = calendar.monthrange(date_val.year, date_val.month)[1]")
815
+ lines.append(" return date_val.replace(day=last)")
816
+ lines.append("")
817
+ lines.append("")
818
+ lines.append("def make_date_time(year, month, day, hour=0, minute=0, second=0):")
819
+ lines.append(' """Informatica MAKE_DATE_TIME equivalent."""')
820
+ lines.append(" return datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))")
821
+ lines.append("")
822
+ lines.append("")
823
+ lines.append("def trunc(value, precision=0):")
824
+ lines.append(' """Informatica TRUNC equivalent (numeric or date)."""')
825
+ lines.append(" if value is None:")
826
+ lines.append(" return None")
827
+ lines.append(" if hasattr(value, 'replace') and hasattr(value, 'year'):")
828
+ lines.append(" return value.replace(hour=0, minute=0, second=0, microsecond=0)")
829
+ lines.append(" import math")
830
+ lines.append(" factor = 10 ** int(precision)")
831
+ lines.append(" return math.trunc(float(value) * factor) / factor")
832
+ lines.append("")
833
+ lines.append("")
834
+ lines.append("def round_val(value, precision=0):")
835
+ lines.append(' """Informatica ROUND equivalent."""')
836
+ lines.append(" if value is None:")
837
+ lines.append(" return None")
838
+ lines.append(" return round(float(value), int(precision))")
839
+ lines.append("")
840
+ lines.append("")
841
+ lines.append("def abs_val(value):")
842
+ lines.append(' """Informatica ABS equivalent."""')
843
+ lines.append(" return abs(float(value)) if value is not None else None")
844
+ lines.append("")
845
+ lines.append("")
846
+ lines.append("def ceil_val(value):")
847
+ lines.append(' """Informatica CEIL equivalent."""')
848
+ lines.append(" import math")
849
+ lines.append(" return math.ceil(float(value)) if value is not None else None")
850
+ lines.append("")
851
+ lines.append("")
852
+ lines.append("def floor_val(value):")
853
+ lines.append(' """Informatica FLOOR equivalent."""')
854
+ lines.append(" import math")
855
+ lines.append(" return math.floor(float(value)) if value is not None else None")
856
+ lines.append("")
857
+ lines.append("")
858
+ lines.append("def mod_val(a, b):")
859
+ lines.append(' """Informatica MOD equivalent."""')
860
+ lines.append(" if a is None or b is None or float(b) == 0:")
861
+ lines.append(" return None")
862
+ lines.append(" return float(a) % float(b)")
863
+ lines.append("")
864
+ lines.append("")
865
+ lines.append("def power_val(base, exp):")
866
+ lines.append(' """Informatica POWER equivalent."""')
867
+ lines.append(" if base is None or exp is None:")
868
+ lines.append(" return None")
869
+ lines.append(" return float(base) ** float(exp)")
870
+ lines.append("")
871
+ lines.append("")
872
+ lines.append("def sqrt_val(value):")
873
+ lines.append(' """Informatica SQRT equivalent."""')
874
+ lines.append(" import math")
875
+ lines.append(" return math.sqrt(float(value)) if value is not None else None")
876
+ lines.append("")
877
+ lines.append("")
878
+ lines.append("def log_val(base, value):")
879
+ lines.append(' """Informatica LOG equivalent."""')
880
+ lines.append(" import math")
881
+ lines.append(" if value is None or base is None:")
882
+ lines.append(" return None")
883
+ lines.append(" return math.log(float(value), float(base))")
884
+ lines.append("")
885
+ lines.append("")
886
+ lines.append("def ln_val(value):")
887
+ lines.append(' """Informatica LN (natural log) equivalent."""')
888
+ lines.append(" import math")
889
+ lines.append(" return math.log(float(value)) if value is not None else None")
890
+ lines.append("")
891
+ lines.append("")
892
+ lines.append("def exp_val(value):")
893
+ lines.append(' """Informatica EXP equivalent."""')
894
+ lines.append(" import math")
895
+ lines.append(" return math.exp(float(value)) if value is not None else None")
896
+ lines.append("")
897
+ lines.append("")
898
+ lines.append("def sign_val(value):")
899
+ lines.append(' """Informatica SIGN equivalent."""')
900
+ lines.append(" if value is None:")
901
+ lines.append(" return None")
902
+ lines.append(" v = float(value)")
903
+ lines.append(" return 1 if v > 0 else (-1 if v < 0 else 0)")
904
+ lines.append("")
905
+ lines.append("")
906
+ lines.append("def rand_val(seed=None):")
907
+ lines.append(' """Informatica RAND equivalent."""')
908
+ lines.append(" import random")
909
+ lines.append(" if seed is not None:")
910
+ lines.append(" random.seed(seed)")
911
+ lines.append(" return random.random()")
912
+ lines.append("")
913
+ lines.append("")
914
+ lines.append("def greatest_val(*args):")
915
+ lines.append(' """Informatica GREATEST equivalent."""')
916
+ lines.append(" filtered = [a for a in args if a is not None]")
917
+ lines.append(" return max(filtered) if filtered else None")
918
+ lines.append("")
919
+ lines.append("")
920
+ lines.append("def least_val(*args):")
921
+ lines.append(' """Informatica LEAST equivalent."""')
922
+ lines.append(" filtered = [a for a in args if a is not None]")
923
+ lines.append(" return min(filtered) if filtered else None")
924
+ lines.append("")
925
+ lines.append("")
926
+ lines.append("def choose_expr(index, *values):")
927
+ lines.append(' """Informatica CHOOSE equivalent."""')
928
+ lines.append(" idx = int(index)")
929
+ lines.append(" if 1 <= idx <= len(values):")
930
+ lines.append(" return values[idx - 1]")
931
+ lines.append(" return None")
932
+ lines.append("")
933
+ lines.append("")
934
+ lines.append("def in_expr(value, *candidates):")
935
+ lines.append(' """Informatica IN equivalent."""')
936
+ lines.append(" return value in candidates")
937
+ lines.append("")
938
+ lines.append("")
939
+ lines.append("def max_val(*args):")
940
+ lines.append(' """Informatica MAX equivalent (row-level)."""')
941
+ lines.append(" filtered = [a for a in args if a is not None]")
942
+ lines.append(" return max(filtered) if filtered else None")
943
+ lines.append("")
944
+ lines.append("")
945
+ lines.append("def min_val(*args):")
946
+ lines.append(' """Informatica MIN equivalent (row-level)."""')
947
+ lines.append(" filtered = [a for a in args if a is not None]")
948
+ lines.append(" return min(filtered) if filtered else None")
949
+ lines.append("")
950
+ lines.append("")
951
+ lines.append("def sum_val(*args):")
952
+ lines.append(' """Informatica SUM equivalent (row-level)."""')
953
+ lines.append(" return sum(float(a) for a in args if a is not None)")
954
+ lines.append("")
955
+ lines.append("")
956
+ lines.append("def count_val(*args):")
957
+ lines.append(' """Informatica COUNT equivalent (row-level)."""')
958
+ lines.append(" return sum(1 for a in args if a is not None)")
959
+ lines.append("")
960
+ lines.append("")
961
+ lines.append("def avg_val(*args):")
962
+ lines.append(' """Informatica AVG equivalent (row-level)."""')
963
+ lines.append(" filtered = [float(a) for a in args if a is not None]")
964
+ lines.append(" return sum(filtered) / len(filtered) if filtered else None")
965
+ lines.append("")
966
+ lines.append("")
967
+ lines.append("def median_val(*args):")
968
+ lines.append(' """Informatica MEDIAN equivalent (row-level)."""')
969
+ lines.append(" import statistics")
970
+ lines.append(" filtered = [float(a) for a in args if a is not None]")
971
+ lines.append(" return statistics.median(filtered) if filtered else None")
972
+ lines.append("")
973
+ lines.append("")
974
+ lines.append("def stddev_val(*args):")
975
+ lines.append(' """Informatica STDDEV equivalent (row-level)."""')
976
+ lines.append(" import statistics")
977
+ lines.append(" filtered = [float(a) for a in args if a is not None]")
978
+ lines.append(" return statistics.stdev(filtered) if len(filtered) > 1 else 0")
979
+ lines.append("")
980
+ lines.append("")
981
+ lines.append("def variance_val(*args):")
982
+ lines.append(' """Informatica VARIANCE equivalent (row-level)."""')
983
+ lines.append(" import statistics")
984
+ lines.append(" filtered = [float(a) for a in args if a is not None]")
985
+ lines.append(" return statistics.variance(filtered) if len(filtered) > 1 else 0")
986
+ lines.append("")
987
+ lines.append("")
988
+ lines.append("def percentile_val(value, pct):")
989
+ lines.append(' """Informatica PERCENTILE equivalent."""')
990
+ lines.append(" return value")
991
+ lines.append("")
992
+ lines.append("")
993
+ lines.append("def first_val(*args):")
994
+ lines.append(' """Informatica FIRST equivalent."""')
995
+ lines.append(" for a in args:")
996
+ lines.append(" if a is not None:")
997
+ lines.append(" return a")
998
+ lines.append(" return None")
999
+ lines.append("")
1000
+ lines.append("")
1001
+ lines.append("def last_val(*args):")
1002
+ lines.append(' """Informatica LAST equivalent."""')
1003
+ lines.append(" result = None")
1004
+ lines.append(" for a in args:")
1005
+ lines.append(" if a is not None:")
1006
+ lines.append(" result = a")
1007
+ lines.append(" return result")
1008
+ lines.append("")
1009
+ lines.append("")
1010
+ lines.append("def moving_avg(value, window=3):")
1011
+ lines.append(' """Informatica MOVINGAVG equivalent."""')
1012
+ lines.append(" return value")
1013
+ lines.append("")
1014
+ lines.append("")
1015
+ lines.append("def moving_sum(value, window=3):")
1016
+ lines.append(' """Informatica MOVINGSUM equivalent."""')
1017
+ lines.append(" return value")
1018
+ lines.append("")
1019
+ lines.append("")
1020
+ lines.append("def cume(value):")
1021
+ lines.append(' """Informatica CUME equivalent."""')
1022
+ lines.append(" return value")
1023
+ lines.append("")
1024
+ lines.append("")
1025
+ lines.append("def set_count_variable(var_name, value=1):")
1026
+ lines.append(' """Informatica SETCOUNTVARIABLE equivalent."""')
1027
+ lines.append(" return set_variable(var_name, value)")
1028
+ lines.append("")
1029
+ lines.append("")
634
1030
  lines.append("def raise_error(message):")
635
1031
  lines.append(' """Informatica ERROR function equivalent."""')
636
1032
  lines.append(" logger.error(f'INFORMATICA ERROR: {message}')")
@@ -3,7 +3,11 @@ from informatica_python.models import (
3
3
  MappingDef, FolderDef, SourceDef, TargetDef,
4
4
  TransformationDef, ConnectorDef, InstanceDef,
5
5
  )
6
- from informatica_python.utils.expression_converter import convert_expression, convert_sql_expression
6
+ from informatica_python.utils.expression_converter import (
7
+ convert_expression, convert_sql_expression,
8
+ parse_join_condition, parse_lookup_condition,
9
+ parse_aggregate_expression, PANDAS_AGG_MAP,
10
+ )
7
11
  from informatica_python.utils.datatype_map import get_python_type
8
12
 
9
13
 
@@ -340,19 +344,56 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
340
344
  group_by_ports = []
341
345
  agg_ports = []
342
346
  for fld in tx.fields:
343
- if "INPUT" in (fld.porttype or "").upper() and "OUTPUT" in (fld.porttype or "").upper():
347
+ pt = (fld.porttype or "").upper()
348
+ if "INPUT" in pt and "OUTPUT" in pt:
344
349
  group_by_ports.append(fld.name)
345
- elif "OUTPUT" in (fld.porttype or "").upper():
350
+ elif "OUTPUT" in pt and "INPUT" not in pt:
346
351
  agg_ports.append(fld)
347
352
 
348
- if group_by_ports:
349
- lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}).agg(")
350
- for ap in agg_ports:
351
- expr = ap.expression or ap.name
352
- lines.append(f" # {ap.name}: {expr}")
353
- lines.append(f" ).reset_index()")
353
+ agg_dict = {}
354
+ rename_map = {}
355
+ computed_aggs = []
356
+ for ap in agg_ports:
357
+ expr_text = ap.expression or ap.name
358
+ agg_func, agg_col = parse_aggregate_expression(expr_text)
359
+ if agg_func and agg_col:
360
+ pandas_func = PANDAS_AGG_MAP.get(agg_func, agg_func)
361
+ if agg_col == "*":
362
+ agg_col = group_by_ports[0] if group_by_ports else ap.name
363
+ pandas_func = "count"
364
+ if agg_col in agg_dict:
365
+ temp_name = f"{agg_col}__{ap.name}"
366
+ agg_dict[temp_name] = (agg_col, pandas_func)
367
+ rename_map[temp_name] = ap.name
368
+ else:
369
+ agg_dict[ap.name] = (agg_col, pandas_func)
370
+ else:
371
+ computed_aggs.append((ap.name, expr_text))
372
+
373
+ if group_by_ports and agg_dict:
374
+ lines.append(f" # Aggregator: group by {group_by_ports}")
375
+ agg_spec = {}
376
+ for out_name, (col, func) in agg_dict.items():
377
+ agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
378
+
379
+ lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
380
+ for out_name, spec in agg_spec.items():
381
+ lines.append(f" {out_name}={spec},")
382
+ lines.append(f" )")
383
+
384
+ if rename_map:
385
+ lines.append(f" df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
386
+ elif group_by_ports:
387
+ lines.append(f" # Aggregator: group by {group_by_ports}")
388
+ lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg('first')")
354
389
  else:
355
390
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
391
+
392
+ for col_name, expr_text in computed_aggs:
393
+ expr_py = convert_expression(expr_text)
394
+ lines.append(f" # Computed aggregate: {col_name} = {expr_text}")
395
+ lines.append(f" df_{tx_safe}['{col_name}'] = {expr_py}")
396
+
356
397
  source_dfs[tx.name] = f"df_{tx_safe}"
357
398
 
358
399
 
@@ -384,12 +425,47 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
384
425
  elif attr.name == "Join Condition":
385
426
  join_condition = attr.value
386
427
 
428
+ master_fields = []
429
+ detail_fields = []
430
+ for fld in tx.fields:
431
+ pt = (fld.porttype or "").upper()
432
+ if "MASTER" in pt:
433
+ master_fields.append(fld.name)
434
+ elif "DETAIL" in pt:
435
+ detail_fields.append(fld.name)
436
+
437
+ left_keys, right_keys = parse_join_condition(join_condition)
438
+
387
439
  src_list = list(input_sources)
388
440
  if len(src_list) >= 2:
441
+ df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
442
+ df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
443
+
444
+ lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
445
+ if left_keys and right_keys:
446
+ lines.append(f" df_{tx_safe} = {df_detail}.merge(")
447
+ lines.append(f" {df_master},")
448
+ lines.append(f" left_on={left_keys},")
449
+ lines.append(f" right_on={right_keys},")
450
+ lines.append(f" how='{join_type}',")
451
+ lines.append(f" suffixes=('', '_master')")
452
+ lines.append(f" )")
453
+ else:
454
+ common_cols = []
455
+ if master_fields and detail_fields:
456
+ common_cols = [f for f in detail_fields if f in master_fields]
457
+ if common_cols:
458
+ lines.append(f" df_{tx_safe} = {df_detail}.merge(")
459
+ lines.append(f" {df_master},")
460
+ lines.append(f" on={common_cols},")
461
+ lines.append(f" how='{join_type}',")
462
+ lines.append(f" suffixes=('', '_master')")
463
+ lines.append(f" )")
464
+ else:
465
+ lines.append(f" df_{tx_safe} = {df_detail}.merge({df_master}, how='{join_type}', suffixes=('', '_master'))")
466
+ elif len(src_list) == 1:
389
467
  df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
390
- df2 = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
391
- lines.append(f" # Join: {join_condition or 'auto'}")
392
- lines.append(f" df_{tx_safe} = {df1}.merge({df2}, how='{join_type}')")
468
+ lines.append(f" df_{tx_safe} = {df1}.copy()")
393
469
  else:
394
470
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
395
471
  source_dfs[tx.name] = f"df_{tx_safe}"
@@ -400,6 +476,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
400
476
  lookup_sql = ""
401
477
  lookup_condition = ""
402
478
  lookup_cache = "YES"
479
+ lookup_policy = "ERROR"
480
+ default_values = {}
403
481
  for attr in tx.attributes:
404
482
  if attr.name == "Lookup table name":
405
483
  lookup_table = attr.value
@@ -409,11 +487,19 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
409
487
  lookup_condition = attr.value
410
488
  elif attr.name == "Lookup caching enabled":
411
489
  lookup_cache = attr.value
490
+ elif attr.name == "Lookup policy on multiple match":
491
+ lookup_policy = attr.value
412
492
 
413
493
  return_fields = [f for f in tx.fields if "RETURN" in (f.porttype or "").upper() or
414
494
  ("LOOKUP" in (f.porttype or "").upper() and "OUTPUT" in (f.porttype or "").upper()
415
495
  and "INPUT" not in (f.porttype or "").upper())]
416
496
  input_fields = [f for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
497
+ lookup_output_fields = [f for f in tx.fields if
498
+ "OUTPUT" in (f.porttype or "").upper()
499
+ and "INPUT" not in (f.porttype or "").upper()
500
+ and "RETURN" not in (f.porttype or "").upper()]
501
+
502
+ all_output_fields = return_fields + lookup_output_fields
417
503
 
418
504
  lines.append(f" # Lookup: {lookup_table or tx.name}")
419
505
  if lookup_sql:
@@ -425,19 +511,48 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
425
511
  elif lookup_table:
426
512
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
427
513
  else:
428
- lines.append(f" df_lkp_{tx_safe} = pd.DataFrame() # TODO: Configure lookup source")
514
+ lines.append(f" df_lkp_{tx_safe} = pd.DataFrame()")
429
515
 
430
- if lookup_condition:
431
- lines.append(f" # Condition: {lookup_condition}")
516
+ input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
432
517
 
433
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
518
+ if input_keys and lookup_keys:
519
+ lines.append(f" # Lookup condition: {lookup_condition}")
434
520
 
435
- if return_fields:
436
- ret_names = [f.name for f in return_fields]
437
- lines.append(f" # Lookup returns: {ret_names}")
438
- lines.append(f" # TODO: Implement lookup merge logic based on condition: {lookup_condition}")
439
- for rf in return_fields:
440
- lines.append(f" df_{tx_safe}['{rf.name}'] = None # From lookup")
521
+ lkp_cols = [f.name for f in all_output_fields]
522
+ select_cols = list(set(lookup_keys + lkp_cols))
523
+ lines.append(f" lkp_select_cols_{tx_safe} = [c for c in {select_cols} if c in df_lkp_{tx_safe}.columns]")
524
+
525
+ if lookup_policy and "FIRST" in lookup_policy.upper():
526
+ lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
527
+ elif lookup_policy and "LAST" in lookup_policy.upper():
528
+ lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='last')")
529
+ else:
530
+ lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
531
+
532
+ lines.append(f" df_{tx_safe} = {input_df}.merge(")
533
+ lines.append(f" df_lkp_{tx_safe},")
534
+ lines.append(f" left_on={input_keys},")
535
+ lines.append(f" right_on={lookup_keys},")
536
+ lines.append(f" how='left',")
537
+ lines.append(f" suffixes=('', '_lkp')")
538
+ lines.append(f" )")
539
+
540
+ drop_cols = [k for k in lookup_keys if k not in input_keys]
541
+ if drop_cols:
542
+ lines.append(f" lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
543
+
544
+ for rf in all_output_fields:
545
+ if rf.default_value:
546
+ lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
547
+ else:
548
+ lines.append(f" df_{tx_safe} = {input_df}.copy()")
549
+ if all_output_fields:
550
+ ret_names = [f.name for f in all_output_fields]
551
+ lines.append(f" # Lookup returns: {ret_names}")
552
+ lines.append(f" # Could not auto-parse lookup condition: {lookup_condition}")
553
+ for rf in all_output_fields:
554
+ default = repr(rf.default_value) if rf.default_value else "None"
555
+ lines.append(f" df_{tx_safe}['{rf.name}'] = {default}")
441
556
 
442
557
  source_dfs[tx.name] = f"df_{tx_safe}"
443
558