py2ls 0.2.4.24__py3-none-any.whl → 0.2.4.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ec2ls.py +61 -0
- py2ls/ips.py +496 -138
- py2ls/ml2ls.py +994 -288
- py2ls/netfinder.py +16 -20
- py2ls/nl2ls.py +283 -0
- py2ls/plot.py +1244 -158
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.26.dist-info}/METADATA +5 -1
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.26.dist-info}/RECORD +17 -14
- py2ls/data/usages_pd copy.json +0 -1105
- py2ls/ml2ls copy.py +0 -2906
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.26.dist-info}/WHEEL +0 -0
    
        py2ls/netfinder.py
    CHANGED
    
    | @@ -626,7 +626,7 @@ def filter_links(links, contains="html", driver="requ", booster=False): | |
| 626 626 | 
             
                    )
         | 
| 627 627 | 
             
                    if condition:
         | 
| 628 628 | 
             
                        filtered_links.append(link)
         | 
| 629 | 
            -
                return filtered_links
         | 
| 629 | 
            +
                return ips.unique(filtered_links)
         | 
| 630 630 |  | 
| 631 631 |  | 
| 632 632 | 
             
            def find_domain(links):
         | 
| @@ -717,7 +717,7 @@ def downloader( | |
| 717 717 | 
             
                kind=[".pdf"],
         | 
| 718 718 | 
             
                contains=None,
         | 
| 719 719 | 
             
                rm_folder=False,
         | 
| 720 | 
            -
                booster= | 
| 720 | 
            +
                booster=True,# use find_links
         | 
| 721 721 | 
             
                verbose=True,
         | 
| 722 722 | 
             
                timeout=30,
         | 
| 723 723 | 
             
                n_try=3,
         | 
| @@ -726,7 +726,7 @@ def downloader( | |
| 726 726 |  | 
| 727 727 | 
             
                from requests.exceptions import ChunkedEncodingError, ConnectionError
         | 
| 728 728 |  | 
| 729 | 
            -
                if verbose:
         | 
| 729 | 
            +
                if verbose and ips.run_once_within():
         | 
| 730 730 | 
             
                    print(
         | 
| 731 731 | 
             
                        "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
         | 
| 732 732 | 
             
                    )
         | 
| @@ -734,8 +734,11 @@ def downloader( | |
| 734 734 | 
             
                def fname_corrector(fname, ext):
         | 
| 735 735 | 
             
                    if not ext.startswith("."):
         | 
| 736 736 | 
             
                        ext = "." + ext
         | 
| 737 | 
            -
                    if not fname.endswith( | 
| 737 | 
            +
                    if not fname.endswith(ext):  # if not ext in fname:
         | 
| 738 738 | 
             
                        fname = fname[: -len(ext)] + ext
         | 
| 739 | 
            +
                    if not any(fname[: -len(ext)]):
         | 
| 740 | 
            +
                        from datetime import datetime
         | 
| 741 | 
            +
                        fname = datetime.now().strftime("%H%M%S") + ext
         | 
| 739 742 | 
             
                    return fname
         | 
| 740 743 |  | 
| 741 744 | 
             
                def check_and_modify_filename(directory, filename):
         | 
| @@ -784,8 +787,8 @@ def downloader( | |
| 784 787 | 
             
                        kind[i] = "." + kind[i]
         | 
| 785 788 | 
             
                file_links_all = []
         | 
| 786 789 | 
             
                for kind_ in kind:
         | 
| 787 | 
            -
                    if isinstance(contains, str):
         | 
| 788 | 
            -
             | 
| 790 | 
            +
                    # if isinstance(contains, str):
         | 
| 791 | 
            +
                    #     contains = [contains]
         | 
| 789 792 | 
             
                    if isinstance(url, str):
         | 
| 790 793 | 
             
                        if any(ext in url for ext in kind):
         | 
| 791 794 | 
             
                            file_links = [url]
         | 
| @@ -799,7 +802,7 @@ def downloader( | |
| 799 802 | 
             
                            if contains is not None:
         | 
| 800 803 | 
             
                                file_links = filter_links(links_all, contains=contains + kind_)
         | 
| 801 804 | 
             
                            else:
         | 
| 802 | 
            -
                                file_links =  | 
| 805 | 
            +
                                file_links = filter_links(links_all, contains=kind_)#links_all  # 
         | 
| 803 806 | 
             
                    elif isinstance(url, list):
         | 
| 804 807 | 
             
                        links_all = url
         | 
| 805 808 | 
             
                        if contains is not None:
         | 
| @@ -812,6 +815,7 @@ def downloader( | |
| 812 815 | 
             
                            file_links = filter_links(links_all, contains=contains + kind_)
         | 
| 813 816 | 
             
                        else:
         | 
| 814 817 | 
             
                            file_links = filter_links(links_all, contains=kind_)
         | 
| 818 | 
            +
                    file_links=ips.unique(file_links)
         | 
| 815 819 | 
             
                    if verbose:
         | 
| 816 820 | 
             
                        if file_links:
         | 
| 817 821 | 
             
                            from pprint import pp
         | 
| @@ -825,6 +829,7 @@ def downloader( | |
| 825 829 | 
             
                        file_links_all = [file_links]
         | 
| 826 830 | 
             
                    elif isinstance(file_links, list):
         | 
| 827 831 | 
             
                        file_links_all.extend(file_links)
         | 
| 832 | 
            +
                file_links_all=ips.unique(file_links_all)
         | 
| 828 833 | 
             
                if dir_save:
         | 
| 829 834 | 
             
                    if rm_folder:
         | 
| 830 835 | 
             
                        ips.rm_folder(dir_save)
         | 
| @@ -847,7 +852,7 @@ def downloader( | |
| 847 852 | 
             
                                    )
         | 
| 848 853 | 
             
                                    if ext is None:
         | 
| 849 854 | 
             
                                        ext = kind_
         | 
| 850 | 
            -
             | 
| 855 | 
            +
             | 
| 851 856 | 
             
                                    if ext:
         | 
| 852 857 | 
             
                                        corrected_fname = fname_corrector(fnames[idx], ext)
         | 
| 853 858 | 
             
                                        corrected_fname = check_and_modify_filename(
         | 
| @@ -860,13 +865,13 @@ def downloader( | |
| 860 865 | 
             
                                                datetime.now().strftime("%y%m%d_%H%M%S_")
         | 
| 861 866 | 
             
                                                + corrected_fname
         | 
| 862 867 | 
             
                                            )
         | 
| 863 | 
            -
                                        fpath_tmp = os.path.join(dir_save, corrected_fname)
         | 
| 868 | 
            +
                                        fpath_tmp = os.path.join(dir_save, corrected_fname) 
         | 
| 864 869 | 
             
                                        with open(fpath_tmp, "wb") as file:
         | 
| 865 870 | 
             
                                            for chunk in response.iter_content(chunk_size=8192):
         | 
| 866 871 | 
             
                                                if chunk:  # Filter out keep-alive chunks
         | 
| 867 872 | 
             
                                                    file.write(chunk)
         | 
| 868 873 | 
             
                                        if verbose:
         | 
| 869 | 
            -
                                            print(f"Done | 
| 874 | 
            +
                                            print(f"Done⤵{fnames[idx]}")
         | 
| 870 875 | 
             
                                    else:
         | 
| 871 876 | 
             
                                        if verbose:
         | 
| 872 877 | 
             
                                            print(f"Unknown file type for {file_link}")
         | 
| @@ -886,16 +891,7 @@ def downloader( | |
| 886 891 |  | 
| 887 892 | 
             
                        if itry == n_try:
         | 
| 888 893 | 
             
                            print(f"Failed to download {file_link} after {n_try} attempts.")
         | 
| 889 | 
            -
             | 
| 890 | 
            -
                    # print(f"\n{len(fnames)} files were downloaded:")
         | 
| 891 | 
            -
                    if verbose:
         | 
| 892 | 
            -
                        from pprint import pp
         | 
| 893 | 
            -
             | 
| 894 | 
            -
                        if corrected_fname:
         | 
| 895 | 
            -
                            pp(corrected_fname)
         | 
| 896 | 
            -
                            print(f"\n\nsaved @:\n{dir_save}")
         | 
| 897 | 
            -
                        else:
         | 
| 898 | 
            -
                            pp(fnames)
         | 
| 894 | 
            +
             
         | 
| 899 895 |  | 
| 900 896 |  | 
| 901 897 | 
             
            def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=True):
         | 
    
        py2ls/nl2ls.py
    ADDED
    
    | @@ -0,0 +1,283 @@ | |
| 1 | 
            +
            from . import translator,ips,plot
         | 
| 2 | 
            +
            import numpy as np
         | 
| 3 | 
            +
            import pandas as pd
         | 
| 4 | 
            +
            import matplotlib.pyplot as plt 
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            def detect(text, method: str = "vader", nb_model=None, device=-1,overall_method="major",overall_threhold=0.8,overall_weight=None,plot_=True,verbose=True, **kwargs) -> dict:
         | 
| 8 | 
            +
                """
         | 
| 9 | 
            +
                Analyze the sentiment of a text or a list of texts using different methods.
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                Parameters:
         | 
| 12 | 
            +
                - text (str or list of str): The text(s) to analyze. Can be a single text or a list of texts.
         | 
| 13 | 
            +
                - method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers', 'senta').
         | 
| 14 | 
            +
                - nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
         | 
| 15 | 
            +
                - vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
         | 
| 16 | 
            +
                - device (int): Device to run the model on (-1 for CPU, 0 for GPU).
         | 
| 17 | 
            +
                - transformer_model_name (str): Transformer model name for 'transformers' method.
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                Returns:
         | 
| 20 | 
            +
                - dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                result = {
         | 
| 23 | 
            +
                    "method": method,
         | 
| 24 | 
            +
                    "score": None,
         | 
| 25 | 
            +
                    "label": None,
         | 
| 26 | 
            +
                    "language": None,
         | 
| 27 | 
            +
                }
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)']
         | 
| 30 | 
            +
                if ips.run_once_within(10, reverse=True) and verbose:
         | 
| 31 | 
            +
                    print(f"methods: {methods}")
         | 
| 32 | 
            +
                    
         | 
| 33 | 
            +
                overall_methods=["majority","average","mean","threshold","weighted","detailed"]
         | 
| 34 | 
            +
                if ips.run_once_within(10, reverse=True) and verbose:
         | 
| 35 | 
            +
                    print(f"overall_methods: {overall_methods}")
         | 
| 36 | 
            +
                # If the input is a list of texts, loop through each one
         | 
| 37 | 
            +
                if isinstance(text, list):
         | 
| 38 | 
            +
                    results = []
         | 
| 39 | 
            +
                    for text_ in text:
         | 
| 40 | 
            +
                        results.append(detect_single_text(text_, method=method, nb_model=nb_model, device=device, **kwargs))
         | 
| 41 | 
            +
                    res_overall=get_overall_results(results, method=overall_method, threshold=overall_threhold, weight=overall_weight)
         | 
| 42 | 
            +
                    if plot_:
         | 
| 43 | 
            +
                        res_detail=get_overall_results(results, method='detail', threshold=overall_threhold, weight=overall_weight)
         | 
| 44 | 
            +
                        plot.pie(res_detail["label"].value_counts(),explode=None,verbose=False)
         | 
| 45 | 
            +
                    return res_overall
         | 
| 46 | 
            +
                else:
         | 
| 47 | 
            +
                    return detect_single_text(text=text, method=method, nb_model=nb_model, device=device, **kwargs)
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 50 | 
            +
            def detect_single_text(text: str, method: str = "vader", nb_model=None, device=-1, **kwargs) -> dict:
         | 
| 51 | 
            +
                """
         | 
| 52 | 
            +
                Analyze the sentiment of a text using different methods.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                Parameters:
         | 
| 55 | 
            +
                - text (str): The text to analyze.
         | 
| 56 | 
            +
                - method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers').
         | 
| 57 | 
            +
                - nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
         | 
| 58 | 
            +
                - vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
         | 
| 59 | 
            +
                - transformer_model_name (str): Transformer model name for 'transformers' method.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                Returns:
         | 
| 62 | 
            +
                - dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
         | 
| 63 | 
            +
                """
         | 
| 64 | 
            +
                result = {
         | 
| 65 | 
            +
                    "text":text,
         | 
| 66 | 
            +
                    "method": method,
         | 
| 67 | 
            +
                    "score": None,
         | 
| 68 | 
            +
                    "label": None,
         | 
| 69 | 
            +
                    "language": None,
         | 
| 70 | 
            +
                }
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                # Detect language for additional insights
         | 
| 73 | 
            +
                language = translator.detect_lang(text)
         | 
| 74 | 
            +
                result["language"] = language
         | 
| 75 | 
            +
                if language != "English" and method in ["vader", "textblob", "naive_bayes"]:
         | 
| 76 | 
            +
                    print("Detected non-English language, results may be inaccurate.")
         | 
| 77 | 
            +
                methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)'] 
         | 
| 78 | 
            +
                method=ips.strcmp(method,methods)[0]
         | 
| 79 | 
            +
                if method == "vader":
         | 
| 80 | 
            +
                    import nltk, os
         | 
| 81 | 
            +
                    from nltk.sentiment import SentimentIntensityAnalyzer
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    # check if it is downloaded
         | 
| 84 | 
            +
                    is_local = os.path.isfile(
         | 
| 85 | 
            +
                        os.path.join(nltk.data.path[0], "sentiment", "vader_lexicon.zip")
         | 
| 86 | 
            +
                    )
         | 
| 87 | 
            +
                    if not is_local:
         | 
| 88 | 
            +
                        nltk.download("vader_lexicon")
         | 
| 89 | 
            +
                    try:
         | 
| 90 | 
            +
                        sia = SentimentIntensityAnalyzer()
         | 
| 91 | 
            +
                        scores = sia.polarity_scores(text)
         | 
| 92 | 
            +
                        result["score"] = scores["compound"]
         | 
| 93 | 
            +
                        result["label"] = (
         | 
| 94 | 
            +
                            "Positive"
         | 
| 95 | 
            +
                            if scores["compound"] >= 0.05
         | 
| 96 | 
            +
                            else "Negative" if scores["compound"] <= -0.05 else "Neutral"
         | 
| 97 | 
            +
                        )
         | 
| 98 | 
            +
                    except Exception as e:
         | 
| 99 | 
            +
                        print(f"Error in VADER analysis: {e}")
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                elif method == "textblob":
         | 
| 102 | 
            +
                    from textblob import TextBlob
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                    try:
         | 
| 105 | 
            +
                        blob = TextBlob(text)
         | 
| 106 | 
            +
                        polarity = blob.sentiment.polarity
         | 
| 107 | 
            +
                        result["score"] = polarity
         | 
| 108 | 
            +
                        result["label"] = (
         | 
| 109 | 
            +
                            "Positive"
         | 
| 110 | 
            +
                            if polarity > 0
         | 
| 111 | 
            +
                            else "Negative" if polarity < 0 else "Neutral"
         | 
| 112 | 
            +
                        )
         | 
| 113 | 
            +
                    except Exception as e:
         | 
| 114 | 
            +
                        print(f"Error in TextBlob analysis: {e}")
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                elif method == "naive_bayes":
         | 
| 117 | 
            +
                    from sklearn.naive_bayes import MultinomialNB
         | 
| 118 | 
            +
                    from sklearn.feature_extraction.text import TfidfVectorizer
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                    try:
         | 
| 121 | 
            +
                        if nb_model is None or vectorizer is None:
         | 
| 122 | 
            +
                            from sklearn.model_selection import train_test_split
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                            # Sample data for Naive Bayes training if model not provided
         | 
| 125 | 
            +
                            sample_texts = [
         | 
| 126 | 
            +
                                "I love this product",
         | 
| 127 | 
            +
                                "I hate this product",
         | 
| 128 | 
            +
                                "It's okay, not great",
         | 
| 129 | 
            +
                                "Absolutely fantastic!",
         | 
| 130 | 
            +
                                "Not satisfied",
         | 
| 131 | 
            +
                            ]
         | 
| 132 | 
            +
                            sample_labels = [1, 0, 0, 1, 0]  # 1 = Positive, 0 = Negative
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                            # Train Naive Bayes model
         | 
| 135 | 
            +
                            vectorizer = TfidfVectorizer()
         | 
| 136 | 
            +
                            X_train_tfidf = vectorizer.fit_transform(sample_texts)
         | 
| 137 | 
            +
                            nb_model = MultinomialNB()
         | 
| 138 | 
            +
                            nb_model.fit(X_train_tfidf, sample_labels)
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                        transformed_text = vectorizer.transform([text])
         | 
| 141 | 
            +
                        prediction = nb_model.predict(transformed_text)[0]
         | 
| 142 | 
            +
                        result["score"] = max(nb_model.predict_proba(transformed_text)[0])
         | 
| 143 | 
            +
                        result["label"] = "Positive" if prediction == 1 else "Negative"
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    except Exception as e:
         | 
| 146 | 
            +
                        print(f"Error in Naive Bayes analysis: {e}")
         | 
| 147 | 
            +
                elif method=="transformer":
         | 
| 148 | 
            +
                    try:
         | 
| 149 | 
            +
                        from transformers import pipeline
         | 
| 150 | 
            +
                        # Load pre-trained sentiment analysis pipeline with a Chinese model
         | 
| 151 | 
            +
                        classifier = pipeline('sentiment-analysis', model='bert-base-chinese', device=device)
         | 
| 152 | 
            +
                        analysis_result = classifier(text)
         | 
| 153 | 
            +
                        result["score"] = analysis_result[0]['score']
         | 
| 154 | 
            +
                        result["label"] = analysis_result[0]['label']
         | 
| 155 | 
            +
                    except Exception as e:
         | 
| 156 | 
            +
                        print(f"Error in Transformer analysis: {e}")
         | 
| 157 | 
            +
                elif method == "senta":
         | 
| 158 | 
            +
                    from transformers import pipeline
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                    try:
         | 
| 161 | 
            +
                        # Load the Senta model for sentiment analysis
         | 
| 162 | 
            +
                        classifier = pipeline('sentiment-analysis', model='junnyu/senta', device=device)
         | 
| 163 | 
            +
                        analysis_result = classifier(text)
         | 
| 164 | 
            +
                        
         | 
| 165 | 
            +
                        # Senta model output will be a list with one result (since it's single text input)
         | 
| 166 | 
            +
                        result["score"] = analysis_result[0]["score"]
         | 
| 167 | 
            +
                        result["label"] = analysis_result[0]["label"]
         | 
| 168 | 
            +
                        
         | 
| 169 | 
            +
                    except Exception as e:
         | 
| 170 | 
            +
                        print(f"Error in Senta analysis: {e}")
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                else:
         | 
| 173 | 
            +
                    print(
         | 
| 174 | 
            +
                        f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
         | 
| 175 | 
            +
                    )
         | 
| 176 | 
            +
                    raise ValueError(
         | 
| 177 | 
            +
                        f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
         | 
| 178 | 
            +
                    )
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                return result
         | 
| 181 | 
            +
             | 
| 182 | 
            +
            def get_overall_results(results, method="majority", threshold=0.8, weight=None,verbose=False):
         | 
| 183 | 
            +
                from collections import Counter
         | 
| 184 | 
            +
                """
         | 
| 185 | 
            +
                Aggregates sentiment analysis results based on the selected method.
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                Parameters:
         | 
| 188 | 
            +
                - results (list): A list of sentiment analysis results, each being a dictionary.
         | 
| 189 | 
            +
                - method (str): The aggregation method to use ('majority', 'average', 'threshold', 'weighted', 'detailed').
         | 
| 190 | 
            +
                - threshold (float): Confidence threshold for 'threshold' method.
         | 
| 191 | 
            +
                - weight (dict): Optional dictionary for weighted aggregation (e.g., model name as key and weight as value).
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                Returns:
         | 
| 194 | 
            +
                - dict: Aggregated sentiment result with final label and score.
         | 
| 195 | 
            +
                """
         | 
| 196 | 
            +
                def majority_voting(results):
         | 
| 197 | 
            +
                    """Aggregates sentiment using majority voting."""
         | 
| 198 | 
            +
                    labels = [result['label'] for result in results]
         | 
| 199 | 
            +
                    label_counts = Counter(labels)
         | 
| 200 | 
            +
                    final_label = label_counts.most_common(1)[0][0]  # Get the most common label
         | 
| 201 | 
            +
                    return {"label": final_label}
         | 
| 202 | 
            +
             | 
| 203 | 
            +
             | 
| 204 | 
            +
                def average_score(results):
         | 
| 205 | 
            +
                    """Aggregates sentiment by calculating the average score."""
         | 
| 206 | 
            +
                    scores = [result['score'] for result in results]
         | 
| 207 | 
            +
                    avg_score = sum(scores) / len(scores)
         | 
| 208 | 
            +
                    
         | 
| 209 | 
            +
                    if avg_score > 0.05:
         | 
| 210 | 
            +
                        label = 'Positive'
         | 
| 211 | 
            +
                    elif avg_score < -0.05:
         | 
| 212 | 
            +
                        label = 'Negative'
         | 
| 213 | 
            +
                    else:
         | 
| 214 | 
            +
                        label = 'Neutral'
         | 
| 215 | 
            +
                        
         | 
| 216 | 
            +
                    return {"score": avg_score, "label": label}
         | 
| 217 | 
            +
             | 
| 218 | 
            +
             | 
| 219 | 
            +
                def confidence_threshold(results, threshold=0.8):
         | 
| 220 | 
            +
                    """Aggregates sentiment based on a confidence threshold."""
         | 
| 221 | 
            +
                    labels = [result['label'] for result in results]
         | 
| 222 | 
            +
                    label_counts = Counter(labels)
         | 
| 223 | 
            +
                    total_results = len(results)
         | 
| 224 | 
            +
                    
         | 
| 225 | 
            +
                    for label, count in label_counts.items():
         | 
| 226 | 
            +
                        if count / total_results >= threshold:
         | 
| 227 | 
            +
                            return {"label": label}
         | 
| 228 | 
            +
                    
         | 
| 229 | 
            +
                    return {"label": 'Neutral'}  # If no label exceeds the threshold, return neutral
         | 
| 230 | 
            +
             | 
| 231 | 
            +
             | 
| 232 | 
            +
                def weighted_average(results, weight=None):
         | 
| 233 | 
            +
                    """Aggregates sentiment based on a weighted average."""
         | 
| 234 | 
            +
                    if weight is None:
         | 
| 235 | 
            +
                        weight = {"vader": 2} 
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    weighted_scores = 0
         | 
| 238 | 
            +
                    total_weight = 0
         | 
| 239 | 
            +
                    
         | 
| 240 | 
            +
                    for result in results:
         | 
| 241 | 
            +
                        model = result.get('method', 'default')
         | 
| 242 | 
            +
                        model_weight = weight.get(model, 1)  # Default weight is 1 if model not in weight dict
         | 
| 243 | 
            +
                        weighted_scores += result['score'] * model_weight
         | 
| 244 | 
            +
                        total_weight += model_weight
         | 
| 245 | 
            +
                    
         | 
| 246 | 
            +
                    avg_weighted_score = weighted_scores / total_weight
         | 
| 247 | 
            +
                    
         | 
| 248 | 
            +
                    # Assign label based on weighted average score
         | 
| 249 | 
            +
                    if avg_weighted_score > 0.05:
         | 
| 250 | 
            +
                        label = 'Positive'
         | 
| 251 | 
            +
                    elif avg_weighted_score < -0.05:
         | 
| 252 | 
            +
                        label = 'Negative'
         | 
| 253 | 
            +
                    else:
         | 
| 254 | 
            +
                        label = 'Neutral'
         | 
| 255 | 
            +
                    
         | 
| 256 | 
            +
                    return {"score": avg_weighted_score, "label": label}
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                def detailed_output(results,verbose=False):
         | 
| 259 | 
            +
                    """Prints the detailed sentiment results."""
         | 
| 260 | 
            +
                    for result in results:
         | 
| 261 | 
            +
                        if verbose:
         | 
| 262 | 
            +
                            print(f"Label: {result['label']} | Score: {result['score']}")
         | 
| 263 | 
            +
                    return {"detailed_results": results}
         | 
| 264 | 
            +
                overall_methods=["majority","average","mean","threshold","weighted","detailed"] 
         | 
| 265 | 
            +
                method=ips.strcmp(method, overall_methods)[0]
         | 
| 266 | 
            +
                if method == "majority":
         | 
| 267 | 
            +
                    return majority_voting(results)
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                elif method in ["mean","average"]:
         | 
| 270 | 
            +
                    return average_score(results)
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                elif method == "threshold":
         | 
| 273 | 
            +
                    return confidence_threshold(results, threshold)
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                elif method == "weighted":
         | 
| 276 | 
            +
                    return weighted_average(results, weight)
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                elif method == "detailed":
         | 
| 279 | 
            +
                    return pd.DataFrame(results)
         | 
| 280 | 
            +
                else:
         | 
| 281 | 
            +
                    raise ValueError(f"Unknown method '{method}'. Available methods: 'majority', 'average', 'threshold', 'weighted', 'detailed'")
         | 
| 282 | 
            +
             | 
| 283 | 
            +
             |