informatica-python 1.1.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.1.1 → informatica_python-1.2.0}/PKG-INFO +1 -1
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/helper_gen.py +396 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/mapping_gen.py +137 -22
- informatica_python-1.2.0/informatica_python/utils/expression_converter.py +259 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.1.1 → informatica_python-1.2.0}/pyproject.toml +1 -1
- informatica_python-1.2.0/tests/test_converter.py +551 -0
- informatica_python-1.1.1/informatica_python/utils/expression_converter.py +0 -128
- informatica_python-1.1.1/tests/test_converter.py +0 -260
- {informatica_python-1.1.1 → informatica_python-1.2.0}/README.md +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/__init__.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/cli.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/converter.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/workflow_gen.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/models.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.1.1 → informatica_python-1.2.0}/setup.cfg +0 -0
{informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -631,6 +631,402 @@ def _add_expression_helpers(lines):
|
|
|
631
631
|
lines.append(" return date_val")
|
|
632
632
|
lines.append("")
|
|
633
633
|
lines.append("")
|
|
634
|
+
lines.append("def initcap(value):")
|
|
635
|
+
lines.append(' """Informatica INITCAP equivalent."""')
|
|
636
|
+
lines.append(" return str(value).title() if value is not None else None")
|
|
637
|
+
lines.append("")
|
|
638
|
+
lines.append("")
|
|
639
|
+
lines.append("def reverse_str(value):")
|
|
640
|
+
lines.append(' """Informatica REVERSE equivalent."""')
|
|
641
|
+
lines.append(" return str(value)[::-1] if value is not None else None")
|
|
642
|
+
lines.append("")
|
|
643
|
+
lines.append("")
|
|
644
|
+
lines.append("def chr_func(code):")
|
|
645
|
+
lines.append(' """Informatica CHR equivalent."""')
|
|
646
|
+
lines.append(" return chr(int(code)) if code is not None else None")
|
|
647
|
+
lines.append("")
|
|
648
|
+
lines.append("")
|
|
649
|
+
lines.append("def ascii_func(value):")
|
|
650
|
+
lines.append(' """Informatica ASCII equivalent."""')
|
|
651
|
+
lines.append(" if value is None or str(value) == '':")
|
|
652
|
+
lines.append(" return None")
|
|
653
|
+
lines.append(" return ord(str(value)[0])")
|
|
654
|
+
lines.append("")
|
|
655
|
+
lines.append("")
|
|
656
|
+
lines.append("def left_str(value, n):")
|
|
657
|
+
lines.append(' """Return leftmost n characters."""')
|
|
658
|
+
lines.append(" return str(value)[:int(n)] if value is not None else None")
|
|
659
|
+
lines.append("")
|
|
660
|
+
lines.append("")
|
|
661
|
+
lines.append("def right_str(value, n):")
|
|
662
|
+
lines.append(' """Return rightmost n characters."""')
|
|
663
|
+
lines.append(" return str(value)[-int(n):] if value is not None else None")
|
|
664
|
+
lines.append("")
|
|
665
|
+
lines.append("")
|
|
666
|
+
lines.append("def trim_func(value):")
|
|
667
|
+
lines.append(' """Informatica TRIM equivalent."""')
|
|
668
|
+
lines.append(" return str(value).strip() if value is not None else None")
|
|
669
|
+
lines.append("")
|
|
670
|
+
lines.append("")
|
|
671
|
+
lines.append("def indexof(value, search, start=1):")
|
|
672
|
+
lines.append(' """Informatica INDEXOF equivalent (1-based)."""')
|
|
673
|
+
lines.append(" if value is None or search is None:")
|
|
674
|
+
lines.append(" return 0")
|
|
675
|
+
lines.append(" idx = str(value).find(str(search), max(start - 1, 0))")
|
|
676
|
+
lines.append(" return idx + 1 if idx >= 0 else 0")
|
|
677
|
+
lines.append("")
|
|
678
|
+
lines.append("")
|
|
679
|
+
lines.append("def metaphone_func(value):")
|
|
680
|
+
lines.append(' """Informatica METAPHONE equivalent (simplified)."""')
|
|
681
|
+
lines.append(" if value is None:")
|
|
682
|
+
lines.append(" return None")
|
|
683
|
+
lines.append(" try:")
|
|
684
|
+
lines.append(" import jellyfish")
|
|
685
|
+
lines.append(" return jellyfish.metaphone(str(value))")
|
|
686
|
+
lines.append(" except ImportError:")
|
|
687
|
+
lines.append(" return str(value).upper()[:4]")
|
|
688
|
+
lines.append("")
|
|
689
|
+
lines.append("")
|
|
690
|
+
lines.append("def soundex_func(value):")
|
|
691
|
+
lines.append(' """Informatica SOUNDEX equivalent (simplified)."""')
|
|
692
|
+
lines.append(" if value is None:")
|
|
693
|
+
lines.append(" return None")
|
|
694
|
+
lines.append(" try:")
|
|
695
|
+
lines.append(" import jellyfish")
|
|
696
|
+
lines.append(" return jellyfish.soundex(str(value))")
|
|
697
|
+
lines.append(" except ImportError:")
|
|
698
|
+
lines.append(" s = str(value).upper()")
|
|
699
|
+
lines.append(" if not s:")
|
|
700
|
+
lines.append(" return '0000'")
|
|
701
|
+
lines.append(" codes = {'B':'1','F':'1','P':'1','V':'1','C':'2','G':'2','J':'2','K':'2','Q':'2','S':'2','X':'2','Z':'2','D':'3','T':'3','L':'4','M':'5','N':'5','R':'6'}")
|
|
702
|
+
lines.append(" result = s[0]")
|
|
703
|
+
lines.append(" for ch in s[1:]:")
|
|
704
|
+
lines.append(" code = codes.get(ch, '0')")
|
|
705
|
+
lines.append(" if code != '0' and code != result[-1]:")
|
|
706
|
+
lines.append(" result += code")
|
|
707
|
+
lines.append(" return (result + '0000')[:4]")
|
|
708
|
+
lines.append("")
|
|
709
|
+
lines.append("")
|
|
710
|
+
lines.append("def compress_func(value):")
|
|
711
|
+
lines.append(' """Informatica COMPRESS equivalent - removes spaces."""')
|
|
712
|
+
lines.append(" return str(value).replace(' ', '') if value is not None else None")
|
|
713
|
+
lines.append("")
|
|
714
|
+
lines.append("")
|
|
715
|
+
lines.append("def decompress_func(value):")
|
|
716
|
+
lines.append(' """Informatica DECOMPRESS equivalent."""')
|
|
717
|
+
lines.append(" return value")
|
|
718
|
+
lines.append("")
|
|
719
|
+
lines.append("")
|
|
720
|
+
lines.append("def to_timestamp_func(value, fmt=None):")
|
|
721
|
+
lines.append(' """Informatica TO_TIMESTAMP equivalent."""')
|
|
722
|
+
lines.append(" return to_date(value, fmt)")
|
|
723
|
+
lines.append("")
|
|
724
|
+
lines.append("")
|
|
725
|
+
lines.append("def cast_func(value, datatype):")
|
|
726
|
+
lines.append(' """Informatica CAST equivalent."""')
|
|
727
|
+
lines.append(" if value is None:")
|
|
728
|
+
lines.append(" return None")
|
|
729
|
+
lines.append(" dt = str(datatype).upper()")
|
|
730
|
+
lines.append(" if 'INT' in dt:")
|
|
731
|
+
lines.append(" return int(float(str(value)))")
|
|
732
|
+
lines.append(" elif 'FLOAT' in dt or 'DOUBLE' in dt or 'DECIMAL' in dt or 'NUMBER' in dt:")
|
|
733
|
+
lines.append(" return float(str(value))")
|
|
734
|
+
lines.append(" elif 'CHAR' in dt or 'STRING' in dt or 'VARCHAR' in dt:")
|
|
735
|
+
lines.append(" return str(value)")
|
|
736
|
+
lines.append(" elif 'DATE' in dt or 'TIMESTAMP' in dt:")
|
|
737
|
+
lines.append(" return to_date(value)")
|
|
738
|
+
lines.append(" return value")
|
|
739
|
+
lines.append("")
|
|
740
|
+
lines.append("")
|
|
741
|
+
lines.append("def set_date_part(part, date_val, value):")
|
|
742
|
+
lines.append(' """Informatica SET_DATE_PART equivalent."""')
|
|
743
|
+
lines.append(" if date_val is None:")
|
|
744
|
+
lines.append(" return None")
|
|
745
|
+
lines.append(" if isinstance(date_val, str):")
|
|
746
|
+
lines.append(" date_val = datetime.fromisoformat(date_val)")
|
|
747
|
+
lines.append(" p = part.upper()")
|
|
748
|
+
lines.append(" if p in ('YYYY', 'YY', 'YEAR'):")
|
|
749
|
+
lines.append(" return date_val.replace(year=int(value))")
|
|
750
|
+
lines.append(" elif p in ('MM', 'MON', 'MONTH'):")
|
|
751
|
+
lines.append(" return date_val.replace(month=int(value))")
|
|
752
|
+
lines.append(" elif p in ('DD', 'DAY'):")
|
|
753
|
+
lines.append(" return date_val.replace(day=int(value))")
|
|
754
|
+
lines.append(" elif p in ('HH', 'HH24', 'HOUR'):")
|
|
755
|
+
lines.append(" return date_val.replace(hour=int(value))")
|
|
756
|
+
lines.append(" elif p in ('MI', 'MINUTE'):")
|
|
757
|
+
lines.append(" return date_val.replace(minute=int(value))")
|
|
758
|
+
lines.append(" elif p in ('SS', 'SECOND'):")
|
|
759
|
+
lines.append(" return date_val.replace(second=int(value))")
|
|
760
|
+
lines.append(" return date_val")
|
|
761
|
+
lines.append("")
|
|
762
|
+
lines.append("")
|
|
763
|
+
lines.append("def date_diff(date1, date2, part='DD'):")
|
|
764
|
+
lines.append(' """Informatica DATE_DIFF equivalent."""')
|
|
765
|
+
lines.append(" if date1 is None or date2 is None:")
|
|
766
|
+
lines.append(" return None")
|
|
767
|
+
lines.append(" if isinstance(date1, str):")
|
|
768
|
+
lines.append(" date1 = datetime.fromisoformat(date1)")
|
|
769
|
+
lines.append(" if isinstance(date2, str):")
|
|
770
|
+
lines.append(" date2 = datetime.fromisoformat(date2)")
|
|
771
|
+
lines.append(" delta = date1 - date2")
|
|
772
|
+
lines.append(" p = part.upper()")
|
|
773
|
+
lines.append(" if p in ('DD', 'DAY', 'D'):")
|
|
774
|
+
lines.append(" return delta.days")
|
|
775
|
+
lines.append(" elif p in ('HH', 'HOUR'):")
|
|
776
|
+
lines.append(" return int(delta.total_seconds() / 3600)")
|
|
777
|
+
lines.append(" elif p in ('MI', 'MINUTE'):")
|
|
778
|
+
lines.append(" return int(delta.total_seconds() / 60)")
|
|
779
|
+
lines.append(" elif p in ('SS', 'SECOND'):")
|
|
780
|
+
lines.append(" return int(delta.total_seconds())")
|
|
781
|
+
lines.append(" elif p in ('MM', 'MONTH'):")
|
|
782
|
+
lines.append(" return (date1.year - date2.year) * 12 + (date1.month - date2.month)")
|
|
783
|
+
lines.append(" elif p in ('YYYY', 'YEAR'):")
|
|
784
|
+
lines.append(" return date1.year - date2.year")
|
|
785
|
+
lines.append(" return delta.days")
|
|
786
|
+
lines.append("")
|
|
787
|
+
lines.append("")
|
|
788
|
+
lines.append("def date_compare(date1, date2):")
|
|
789
|
+
lines.append(' """Informatica DATE_COMPARE equivalent. Returns -1, 0, or 1."""')
|
|
790
|
+
lines.append(" if date1 is None and date2 is None:")
|
|
791
|
+
lines.append(" return 0")
|
|
792
|
+
lines.append(" if date1 is None:")
|
|
793
|
+
lines.append(" return -1")
|
|
794
|
+
lines.append(" if date2 is None:")
|
|
795
|
+
lines.append(" return 1")
|
|
796
|
+
lines.append(" if isinstance(date1, str):")
|
|
797
|
+
lines.append(" date1 = datetime.fromisoformat(date1)")
|
|
798
|
+
lines.append(" if isinstance(date2, str):")
|
|
799
|
+
lines.append(" date2 = datetime.fromisoformat(date2)")
|
|
800
|
+
lines.append(" if date1 < date2:")
|
|
801
|
+
lines.append(" return -1")
|
|
802
|
+
lines.append(" elif date1 > date2:")
|
|
803
|
+
lines.append(" return 1")
|
|
804
|
+
lines.append(" return 0")
|
|
805
|
+
lines.append("")
|
|
806
|
+
lines.append("")
|
|
807
|
+
lines.append("def last_day(date_val):")
|
|
808
|
+
lines.append(' """Informatica LAST_DAY equivalent."""')
|
|
809
|
+
lines.append(" if date_val is None:")
|
|
810
|
+
lines.append(" return None")
|
|
811
|
+
lines.append(" if isinstance(date_val, str):")
|
|
812
|
+
lines.append(" date_val = datetime.fromisoformat(date_val)")
|
|
813
|
+
lines.append(" import calendar")
|
|
814
|
+
lines.append(" last = calendar.monthrange(date_val.year, date_val.month)[1]")
|
|
815
|
+
lines.append(" return date_val.replace(day=last)")
|
|
816
|
+
lines.append("")
|
|
817
|
+
lines.append("")
|
|
818
|
+
lines.append("def make_date_time(year, month, day, hour=0, minute=0, second=0):")
|
|
819
|
+
lines.append(' """Informatica MAKE_DATE_TIME equivalent."""')
|
|
820
|
+
lines.append(" return datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))")
|
|
821
|
+
lines.append("")
|
|
822
|
+
lines.append("")
|
|
823
|
+
lines.append("def trunc(value, precision=0):")
|
|
824
|
+
lines.append(' """Informatica TRUNC equivalent (numeric or date)."""')
|
|
825
|
+
lines.append(" if value is None:")
|
|
826
|
+
lines.append(" return None")
|
|
827
|
+
lines.append(" if hasattr(value, 'replace') and hasattr(value, 'year'):")
|
|
828
|
+
lines.append(" return value.replace(hour=0, minute=0, second=0, microsecond=0)")
|
|
829
|
+
lines.append(" import math")
|
|
830
|
+
lines.append(" factor = 10 ** int(precision)")
|
|
831
|
+
lines.append(" return math.trunc(float(value) * factor) / factor")
|
|
832
|
+
lines.append("")
|
|
833
|
+
lines.append("")
|
|
834
|
+
lines.append("def round_val(value, precision=0):")
|
|
835
|
+
lines.append(' """Informatica ROUND equivalent."""')
|
|
836
|
+
lines.append(" if value is None:")
|
|
837
|
+
lines.append(" return None")
|
|
838
|
+
lines.append(" return round(float(value), int(precision))")
|
|
839
|
+
lines.append("")
|
|
840
|
+
lines.append("")
|
|
841
|
+
lines.append("def abs_val(value):")
|
|
842
|
+
lines.append(' """Informatica ABS equivalent."""')
|
|
843
|
+
lines.append(" return abs(float(value)) if value is not None else None")
|
|
844
|
+
lines.append("")
|
|
845
|
+
lines.append("")
|
|
846
|
+
lines.append("def ceil_val(value):")
|
|
847
|
+
lines.append(' """Informatica CEIL equivalent."""')
|
|
848
|
+
lines.append(" import math")
|
|
849
|
+
lines.append(" return math.ceil(float(value)) if value is not None else None")
|
|
850
|
+
lines.append("")
|
|
851
|
+
lines.append("")
|
|
852
|
+
lines.append("def floor_val(value):")
|
|
853
|
+
lines.append(' """Informatica FLOOR equivalent."""')
|
|
854
|
+
lines.append(" import math")
|
|
855
|
+
lines.append(" return math.floor(float(value)) if value is not None else None")
|
|
856
|
+
lines.append("")
|
|
857
|
+
lines.append("")
|
|
858
|
+
lines.append("def mod_val(a, b):")
|
|
859
|
+
lines.append(' """Informatica MOD equivalent."""')
|
|
860
|
+
lines.append(" if a is None or b is None or float(b) == 0:")
|
|
861
|
+
lines.append(" return None")
|
|
862
|
+
lines.append(" return float(a) % float(b)")
|
|
863
|
+
lines.append("")
|
|
864
|
+
lines.append("")
|
|
865
|
+
lines.append("def power_val(base, exp):")
|
|
866
|
+
lines.append(' """Informatica POWER equivalent."""')
|
|
867
|
+
lines.append(" if base is None or exp is None:")
|
|
868
|
+
lines.append(" return None")
|
|
869
|
+
lines.append(" return float(base) ** float(exp)")
|
|
870
|
+
lines.append("")
|
|
871
|
+
lines.append("")
|
|
872
|
+
lines.append("def sqrt_val(value):")
|
|
873
|
+
lines.append(' """Informatica SQRT equivalent."""')
|
|
874
|
+
lines.append(" import math")
|
|
875
|
+
lines.append(" return math.sqrt(float(value)) if value is not None else None")
|
|
876
|
+
lines.append("")
|
|
877
|
+
lines.append("")
|
|
878
|
+
lines.append("def log_val(base, value):")
|
|
879
|
+
lines.append(' """Informatica LOG equivalent."""')
|
|
880
|
+
lines.append(" import math")
|
|
881
|
+
lines.append(" if value is None or base is None:")
|
|
882
|
+
lines.append(" return None")
|
|
883
|
+
lines.append(" return math.log(float(value), float(base))")
|
|
884
|
+
lines.append("")
|
|
885
|
+
lines.append("")
|
|
886
|
+
lines.append("def ln_val(value):")
|
|
887
|
+
lines.append(' """Informatica LN (natural log) equivalent."""')
|
|
888
|
+
lines.append(" import math")
|
|
889
|
+
lines.append(" return math.log(float(value)) if value is not None else None")
|
|
890
|
+
lines.append("")
|
|
891
|
+
lines.append("")
|
|
892
|
+
lines.append("def exp_val(value):")
|
|
893
|
+
lines.append(' """Informatica EXP equivalent."""')
|
|
894
|
+
lines.append(" import math")
|
|
895
|
+
lines.append(" return math.exp(float(value)) if value is not None else None")
|
|
896
|
+
lines.append("")
|
|
897
|
+
lines.append("")
|
|
898
|
+
lines.append("def sign_val(value):")
|
|
899
|
+
lines.append(' """Informatica SIGN equivalent."""')
|
|
900
|
+
lines.append(" if value is None:")
|
|
901
|
+
lines.append(" return None")
|
|
902
|
+
lines.append(" v = float(value)")
|
|
903
|
+
lines.append(" return 1 if v > 0 else (-1 if v < 0 else 0)")
|
|
904
|
+
lines.append("")
|
|
905
|
+
lines.append("")
|
|
906
|
+
lines.append("def rand_val(seed=None):")
|
|
907
|
+
lines.append(' """Informatica RAND equivalent."""')
|
|
908
|
+
lines.append(" import random")
|
|
909
|
+
lines.append(" if seed is not None:")
|
|
910
|
+
lines.append(" random.seed(seed)")
|
|
911
|
+
lines.append(" return random.random()")
|
|
912
|
+
lines.append("")
|
|
913
|
+
lines.append("")
|
|
914
|
+
lines.append("def greatest_val(*args):")
|
|
915
|
+
lines.append(' """Informatica GREATEST equivalent."""')
|
|
916
|
+
lines.append(" filtered = [a for a in args if a is not None]")
|
|
917
|
+
lines.append(" return max(filtered) if filtered else None")
|
|
918
|
+
lines.append("")
|
|
919
|
+
lines.append("")
|
|
920
|
+
lines.append("def least_val(*args):")
|
|
921
|
+
lines.append(' """Informatica LEAST equivalent."""')
|
|
922
|
+
lines.append(" filtered = [a for a in args if a is not None]")
|
|
923
|
+
lines.append(" return min(filtered) if filtered else None")
|
|
924
|
+
lines.append("")
|
|
925
|
+
lines.append("")
|
|
926
|
+
lines.append("def choose_expr(index, *values):")
|
|
927
|
+
lines.append(' """Informatica CHOOSE equivalent."""')
|
|
928
|
+
lines.append(" idx = int(index)")
|
|
929
|
+
lines.append(" if 1 <= idx <= len(values):")
|
|
930
|
+
lines.append(" return values[idx - 1]")
|
|
931
|
+
lines.append(" return None")
|
|
932
|
+
lines.append("")
|
|
933
|
+
lines.append("")
|
|
934
|
+
lines.append("def in_expr(value, *candidates):")
|
|
935
|
+
lines.append(' """Informatica IN equivalent."""')
|
|
936
|
+
lines.append(" return value in candidates")
|
|
937
|
+
lines.append("")
|
|
938
|
+
lines.append("")
|
|
939
|
+
lines.append("def max_val(*args):")
|
|
940
|
+
lines.append(' """Informatica MAX equivalent (row-level)."""')
|
|
941
|
+
lines.append(" filtered = [a for a in args if a is not None]")
|
|
942
|
+
lines.append(" return max(filtered) if filtered else None")
|
|
943
|
+
lines.append("")
|
|
944
|
+
lines.append("")
|
|
945
|
+
lines.append("def min_val(*args):")
|
|
946
|
+
lines.append(' """Informatica MIN equivalent (row-level)."""')
|
|
947
|
+
lines.append(" filtered = [a for a in args if a is not None]")
|
|
948
|
+
lines.append(" return min(filtered) if filtered else None")
|
|
949
|
+
lines.append("")
|
|
950
|
+
lines.append("")
|
|
951
|
+
lines.append("def sum_val(*args):")
|
|
952
|
+
lines.append(' """Informatica SUM equivalent (row-level)."""')
|
|
953
|
+
lines.append(" return sum(float(a) for a in args if a is not None)")
|
|
954
|
+
lines.append("")
|
|
955
|
+
lines.append("")
|
|
956
|
+
lines.append("def count_val(*args):")
|
|
957
|
+
lines.append(' """Informatica COUNT equivalent (row-level)."""')
|
|
958
|
+
lines.append(" return sum(1 for a in args if a is not None)")
|
|
959
|
+
lines.append("")
|
|
960
|
+
lines.append("")
|
|
961
|
+
lines.append("def avg_val(*args):")
|
|
962
|
+
lines.append(' """Informatica AVG equivalent (row-level)."""')
|
|
963
|
+
lines.append(" filtered = [float(a) for a in args if a is not None]")
|
|
964
|
+
lines.append(" return sum(filtered) / len(filtered) if filtered else None")
|
|
965
|
+
lines.append("")
|
|
966
|
+
lines.append("")
|
|
967
|
+
lines.append("def median_val(*args):")
|
|
968
|
+
lines.append(' """Informatica MEDIAN equivalent (row-level)."""')
|
|
969
|
+
lines.append(" import statistics")
|
|
970
|
+
lines.append(" filtered = [float(a) for a in args if a is not None]")
|
|
971
|
+
lines.append(" return statistics.median(filtered) if filtered else None")
|
|
972
|
+
lines.append("")
|
|
973
|
+
lines.append("")
|
|
974
|
+
lines.append("def stddev_val(*args):")
|
|
975
|
+
lines.append(' """Informatica STDDEV equivalent (row-level)."""')
|
|
976
|
+
lines.append(" import statistics")
|
|
977
|
+
lines.append(" filtered = [float(a) for a in args if a is not None]")
|
|
978
|
+
lines.append(" return statistics.stdev(filtered) if len(filtered) > 1 else 0")
|
|
979
|
+
lines.append("")
|
|
980
|
+
lines.append("")
|
|
981
|
+
lines.append("def variance_val(*args):")
|
|
982
|
+
lines.append(' """Informatica VARIANCE equivalent (row-level)."""')
|
|
983
|
+
lines.append(" import statistics")
|
|
984
|
+
lines.append(" filtered = [float(a) for a in args if a is not None]")
|
|
985
|
+
lines.append(" return statistics.variance(filtered) if len(filtered) > 1 else 0")
|
|
986
|
+
lines.append("")
|
|
987
|
+
lines.append("")
|
|
988
|
+
lines.append("def percentile_val(value, pct):")
|
|
989
|
+
lines.append(' """Informatica PERCENTILE equivalent."""')
|
|
990
|
+
lines.append(" return value")
|
|
991
|
+
lines.append("")
|
|
992
|
+
lines.append("")
|
|
993
|
+
lines.append("def first_val(*args):")
|
|
994
|
+
lines.append(' """Informatica FIRST equivalent."""')
|
|
995
|
+
lines.append(" for a in args:")
|
|
996
|
+
lines.append(" if a is not None:")
|
|
997
|
+
lines.append(" return a")
|
|
998
|
+
lines.append(" return None")
|
|
999
|
+
lines.append("")
|
|
1000
|
+
lines.append("")
|
|
1001
|
+
lines.append("def last_val(*args):")
|
|
1002
|
+
lines.append(' """Informatica LAST equivalent."""')
|
|
1003
|
+
lines.append(" result = None")
|
|
1004
|
+
lines.append(" for a in args:")
|
|
1005
|
+
lines.append(" if a is not None:")
|
|
1006
|
+
lines.append(" result = a")
|
|
1007
|
+
lines.append(" return result")
|
|
1008
|
+
lines.append("")
|
|
1009
|
+
lines.append("")
|
|
1010
|
+
lines.append("def moving_avg(value, window=3):")
|
|
1011
|
+
lines.append(' """Informatica MOVINGAVG equivalent."""')
|
|
1012
|
+
lines.append(" return value")
|
|
1013
|
+
lines.append("")
|
|
1014
|
+
lines.append("")
|
|
1015
|
+
lines.append("def moving_sum(value, window=3):")
|
|
1016
|
+
lines.append(' """Informatica MOVINGSUM equivalent."""')
|
|
1017
|
+
lines.append(" return value")
|
|
1018
|
+
lines.append("")
|
|
1019
|
+
lines.append("")
|
|
1020
|
+
lines.append("def cume(value):")
|
|
1021
|
+
lines.append(' """Informatica CUME equivalent."""')
|
|
1022
|
+
lines.append(" return value")
|
|
1023
|
+
lines.append("")
|
|
1024
|
+
lines.append("")
|
|
1025
|
+
lines.append("def set_count_variable(var_name, value=1):")
|
|
1026
|
+
lines.append(' """Informatica SETCOUNTVARIABLE equivalent."""')
|
|
1027
|
+
lines.append(" return set_variable(var_name, value)")
|
|
1028
|
+
lines.append("")
|
|
1029
|
+
lines.append("")
|
|
634
1030
|
lines.append("def raise_error(message):")
|
|
635
1031
|
lines.append(' """Informatica ERROR function equivalent."""')
|
|
636
1032
|
lines.append(" logger.error(f'INFORMATICA ERROR: {message}')")
|
{informatica_python-1.1.1 → informatica_python-1.2.0}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -3,7 +3,11 @@ from informatica_python.models import (
|
|
|
3
3
|
MappingDef, FolderDef, SourceDef, TargetDef,
|
|
4
4
|
TransformationDef, ConnectorDef, InstanceDef,
|
|
5
5
|
)
|
|
6
|
-
from informatica_python.utils.expression_converter import
|
|
6
|
+
from informatica_python.utils.expression_converter import (
|
|
7
|
+
convert_expression, convert_sql_expression,
|
|
8
|
+
parse_join_condition, parse_lookup_condition,
|
|
9
|
+
parse_aggregate_expression, PANDAS_AGG_MAP,
|
|
10
|
+
)
|
|
7
11
|
from informatica_python.utils.datatype_map import get_python_type
|
|
8
12
|
|
|
9
13
|
|
|
@@ -340,19 +344,56 @@ def _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
340
344
|
group_by_ports = []
|
|
341
345
|
agg_ports = []
|
|
342
346
|
for fld in tx.fields:
|
|
343
|
-
|
|
347
|
+
pt = (fld.porttype or "").upper()
|
|
348
|
+
if "INPUT" in pt and "OUTPUT" in pt:
|
|
344
349
|
group_by_ports.append(fld.name)
|
|
345
|
-
elif "OUTPUT" in
|
|
350
|
+
elif "OUTPUT" in pt and "INPUT" not in pt:
|
|
346
351
|
agg_ports.append(fld)
|
|
347
352
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
353
|
+
agg_dict = {}
|
|
354
|
+
rename_map = {}
|
|
355
|
+
computed_aggs = []
|
|
356
|
+
for ap in agg_ports:
|
|
357
|
+
expr_text = ap.expression or ap.name
|
|
358
|
+
agg_func, agg_col = parse_aggregate_expression(expr_text)
|
|
359
|
+
if agg_func and agg_col:
|
|
360
|
+
pandas_func = PANDAS_AGG_MAP.get(agg_func, agg_func)
|
|
361
|
+
if agg_col == "*":
|
|
362
|
+
agg_col = group_by_ports[0] if group_by_ports else ap.name
|
|
363
|
+
pandas_func = "count"
|
|
364
|
+
if agg_col in agg_dict:
|
|
365
|
+
temp_name = f"{agg_col}__{ap.name}"
|
|
366
|
+
agg_dict[temp_name] = (agg_col, pandas_func)
|
|
367
|
+
rename_map[temp_name] = ap.name
|
|
368
|
+
else:
|
|
369
|
+
agg_dict[ap.name] = (agg_col, pandas_func)
|
|
370
|
+
else:
|
|
371
|
+
computed_aggs.append((ap.name, expr_text))
|
|
372
|
+
|
|
373
|
+
if group_by_ports and agg_dict:
|
|
374
|
+
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
375
|
+
agg_spec = {}
|
|
376
|
+
for out_name, (col, func) in agg_dict.items():
|
|
377
|
+
agg_spec[out_name] = f"pd.NamedAgg(column='{col}', aggfunc='{func}')"
|
|
378
|
+
|
|
379
|
+
lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg(")
|
|
380
|
+
for out_name, spec in agg_spec.items():
|
|
381
|
+
lines.append(f" {out_name}={spec},")
|
|
382
|
+
lines.append(f" )")
|
|
383
|
+
|
|
384
|
+
if rename_map:
|
|
385
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}.rename(columns={rename_map})")
|
|
386
|
+
elif group_by_ports:
|
|
387
|
+
lines.append(f" # Aggregator: group by {group_by_ports}")
|
|
388
|
+
lines.append(f" df_{tx_safe} = {input_df}.groupby({group_by_ports}, as_index=False).agg('first')")
|
|
354
389
|
else:
|
|
355
390
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
391
|
+
|
|
392
|
+
for col_name, expr_text in computed_aggs:
|
|
393
|
+
expr_py = convert_expression(expr_text)
|
|
394
|
+
lines.append(f" # Computed aggregate: {col_name} = {expr_text}")
|
|
395
|
+
lines.append(f" df_{tx_safe}['{col_name}'] = {expr_py}")
|
|
396
|
+
|
|
356
397
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
357
398
|
|
|
358
399
|
|
|
@@ -384,12 +425,47 @@ def _gen_joiner_transform(lines, tx, tx_safe, input_df, input_sources, source_df
|
|
|
384
425
|
elif attr.name == "Join Condition":
|
|
385
426
|
join_condition = attr.value
|
|
386
427
|
|
|
428
|
+
master_fields = []
|
|
429
|
+
detail_fields = []
|
|
430
|
+
for fld in tx.fields:
|
|
431
|
+
pt = (fld.porttype or "").upper()
|
|
432
|
+
if "MASTER" in pt:
|
|
433
|
+
master_fields.append(fld.name)
|
|
434
|
+
elif "DETAIL" in pt:
|
|
435
|
+
detail_fields.append(fld.name)
|
|
436
|
+
|
|
437
|
+
left_keys, right_keys = parse_join_condition(join_condition)
|
|
438
|
+
|
|
387
439
|
src_list = list(input_sources)
|
|
388
440
|
if len(src_list) >= 2:
|
|
441
|
+
df_master = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
|
|
442
|
+
df_detail = source_dfs.get(src_list[1], f"df_{_safe_name(src_list[1])}")
|
|
443
|
+
|
|
444
|
+
lines.append(f" # Join ({join_type}): {join_condition or 'auto'}")
|
|
445
|
+
if left_keys and right_keys:
|
|
446
|
+
lines.append(f" df_{tx_safe} = {df_detail}.merge(")
|
|
447
|
+
lines.append(f" {df_master},")
|
|
448
|
+
lines.append(f" left_on={left_keys},")
|
|
449
|
+
lines.append(f" right_on={right_keys},")
|
|
450
|
+
lines.append(f" how='{join_type}',")
|
|
451
|
+
lines.append(f" suffixes=('', '_master')")
|
|
452
|
+
lines.append(f" )")
|
|
453
|
+
else:
|
|
454
|
+
common_cols = []
|
|
455
|
+
if master_fields and detail_fields:
|
|
456
|
+
common_cols = [f for f in detail_fields if f in master_fields]
|
|
457
|
+
if common_cols:
|
|
458
|
+
lines.append(f" df_{tx_safe} = {df_detail}.merge(")
|
|
459
|
+
lines.append(f" {df_master},")
|
|
460
|
+
lines.append(f" on={common_cols},")
|
|
461
|
+
lines.append(f" how='{join_type}',")
|
|
462
|
+
lines.append(f" suffixes=('', '_master')")
|
|
463
|
+
lines.append(f" )")
|
|
464
|
+
else:
|
|
465
|
+
lines.append(f" df_{tx_safe} = {df_detail}.merge({df_master}, how='{join_type}', suffixes=('', '_master'))")
|
|
466
|
+
elif len(src_list) == 1:
|
|
389
467
|
df1 = source_dfs.get(src_list[0], f"df_{_safe_name(src_list[0])}")
|
|
390
|
-
|
|
391
|
-
lines.append(f" # Join: {join_condition or 'auto'}")
|
|
392
|
-
lines.append(f" df_{tx_safe} = {df1}.merge({df2}, how='{join_type}')")
|
|
468
|
+
lines.append(f" df_{tx_safe} = {df1}.copy()")
|
|
393
469
|
else:
|
|
394
470
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
395
471
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
@@ -400,6 +476,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
400
476
|
lookup_sql = ""
|
|
401
477
|
lookup_condition = ""
|
|
402
478
|
lookup_cache = "YES"
|
|
479
|
+
lookup_policy = "ERROR"
|
|
480
|
+
default_values = {}
|
|
403
481
|
for attr in tx.attributes:
|
|
404
482
|
if attr.name == "Lookup table name":
|
|
405
483
|
lookup_table = attr.value
|
|
@@ -409,11 +487,19 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
409
487
|
lookup_condition = attr.value
|
|
410
488
|
elif attr.name == "Lookup caching enabled":
|
|
411
489
|
lookup_cache = attr.value
|
|
490
|
+
elif attr.name == "Lookup policy on multiple match":
|
|
491
|
+
lookup_policy = attr.value
|
|
412
492
|
|
|
413
493
|
return_fields = [f for f in tx.fields if "RETURN" in (f.porttype or "").upper() or
|
|
414
494
|
("LOOKUP" in (f.porttype or "").upper() and "OUTPUT" in (f.porttype or "").upper()
|
|
415
495
|
and "INPUT" not in (f.porttype or "").upper())]
|
|
416
496
|
input_fields = [f for f in tx.fields if "INPUT" in (f.porttype or "").upper()]
|
|
497
|
+
lookup_output_fields = [f for f in tx.fields if
|
|
498
|
+
"OUTPUT" in (f.porttype or "").upper()
|
|
499
|
+
and "INPUT" not in (f.porttype or "").upper()
|
|
500
|
+
and "RETURN" not in (f.porttype or "").upper()]
|
|
501
|
+
|
|
502
|
+
all_output_fields = return_fields + lookup_output_fields
|
|
417
503
|
|
|
418
504
|
lines.append(f" # Lookup: {lookup_table or tx.name}")
|
|
419
505
|
if lookup_sql:
|
|
@@ -425,19 +511,48 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
425
511
|
elif lookup_table:
|
|
426
512
|
lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
|
|
427
513
|
else:
|
|
428
|
-
lines.append(f" df_lkp_{tx_safe} = pd.DataFrame()
|
|
514
|
+
lines.append(f" df_lkp_{tx_safe} = pd.DataFrame()")
|
|
429
515
|
|
|
430
|
-
|
|
431
|
-
lines.append(f" # Condition: {lookup_condition}")
|
|
516
|
+
input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
|
|
432
517
|
|
|
433
|
-
|
|
518
|
+
if input_keys and lookup_keys:
|
|
519
|
+
lines.append(f" # Lookup condition: {lookup_condition}")
|
|
434
520
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
lines.append(f"
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
lines.append(f"
|
|
521
|
+
lkp_cols = [f.name for f in all_output_fields]
|
|
522
|
+
select_cols = list(set(lookup_keys + lkp_cols))
|
|
523
|
+
lines.append(f" lkp_select_cols_{tx_safe} = [c for c in {select_cols} if c in df_lkp_{tx_safe}.columns]")
|
|
524
|
+
|
|
525
|
+
if lookup_policy and "FIRST" in lookup_policy.upper():
|
|
526
|
+
lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
|
|
527
|
+
elif lookup_policy and "LAST" in lookup_policy.upper():
|
|
528
|
+
lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='last')")
|
|
529
|
+
else:
|
|
530
|
+
lines.append(f" df_lkp_{tx_safe} = df_lkp_{tx_safe}[lkp_select_cols_{tx_safe}].drop_duplicates(subset={lookup_keys}, keep='first')")
|
|
531
|
+
|
|
532
|
+
lines.append(f" df_{tx_safe} = {input_df}.merge(")
|
|
533
|
+
lines.append(f" df_lkp_{tx_safe},")
|
|
534
|
+
lines.append(f" left_on={input_keys},")
|
|
535
|
+
lines.append(f" right_on={lookup_keys},")
|
|
536
|
+
lines.append(f" how='left',")
|
|
537
|
+
lines.append(f" suffixes=('', '_lkp')")
|
|
538
|
+
lines.append(f" )")
|
|
539
|
+
|
|
540
|
+
drop_cols = [k for k in lookup_keys if k not in input_keys]
|
|
541
|
+
if drop_cols:
|
|
542
|
+
lines.append(f" lkp_drop = [c for c in {drop_cols} if c in df_{tx_safe}.columns and c + '_lkp' not in df_{tx_safe}.columns]")
|
|
543
|
+
|
|
544
|
+
for rf in all_output_fields:
|
|
545
|
+
if rf.default_value:
|
|
546
|
+
lines.append(f" df_{tx_safe}['{rf.name}'] = df_{tx_safe}['{rf.name}'].fillna({repr(rf.default_value)})")
|
|
547
|
+
else:
|
|
548
|
+
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
549
|
+
if all_output_fields:
|
|
550
|
+
ret_names = [f.name for f in all_output_fields]
|
|
551
|
+
lines.append(f" # Lookup returns: {ret_names}")
|
|
552
|
+
lines.append(f" # Could not auto-parse lookup condition: {lookup_condition}")
|
|
553
|
+
for rf in all_output_fields:
|
|
554
|
+
default = repr(rf.default_value) if rf.default_value else "None"
|
|
555
|
+
lines.append(f" df_{tx_safe}['{rf.name}'] = {default}")
|
|
441
556
|
|
|
442
557
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
443
558
|
|