py2ls 0.2.4.23__py3-none-any.whl → 0.2.4.25__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/nl2ls.py ADDED
@@ -0,0 +1,283 @@
1
+ from . import translator,ips,plot
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+
6
+
7
+ def detect(text, method: str = "vader", nb_model=None, device=-1,overall_method="major",overall_threhold=0.8,overall_weight=None,plot_=True,verbose=True, **kwargs) -> dict:
8
+ """
9
+ Analyze the sentiment of a text or a list of texts using different methods.
10
+
11
+ Parameters:
12
+ - text (str or list of str): The text(s) to analyze. Can be a single text or a list of texts.
13
+ - method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers', 'senta').
14
+ - nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
15
+ - vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
16
+ - device (int): Device to run the model on (-1 for CPU, 0 for GPU).
17
+ - transformer_model_name (str): Transformer model name for 'transformers' method.
18
+
19
+ Returns:
20
+ - dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
21
+ """
22
+ result = {
23
+ "method": method,
24
+ "score": None,
25
+ "label": None,
26
+ "language": None,
27
+ }
28
+
29
+ methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)']
30
+ if ips.run_once_within(10, reverse=True) and verbose:
31
+ print(f"methods: {methods}")
32
+
33
+ overall_methods=["majority","average","mean","threshold","weighted","detailed"]
34
+ if ips.run_once_within(10, reverse=True) and verbose:
35
+ print(f"overall_methods: {overall_methods}")
36
+ # If the input is a list of texts, loop through each one
37
+ if isinstance(text, list):
38
+ results = []
39
+ for text_ in text:
40
+ results.append(detect_single_text(text_, method=method, nb_model=nb_model, device=device, **kwargs))
41
+ res_overall=get_overall_results(results, method=overall_method, threshold=overall_threhold, weight=overall_weight)
42
+ if plot_:
43
+ res_detail=get_overall_results(results, method='detail', threshold=overall_threhold, weight=overall_weight)
44
+ plot.pie(res_detail["label"].value_counts(),explode=None,verbose=False)
45
+ return res_overall
46
+ else:
47
+ return detect_single_text(text=text, method=method, nb_model=nb_model, device=device, **kwargs)
48
+
49
+
50
+ def detect_single_text(text: str, method: str = "vader", nb_model=None, device=-1, **kwargs) -> dict:
51
+ """
52
+ Analyze the sentiment of a text using different methods.
53
+
54
+ Parameters:
55
+ - text (str): The text to analyze.
56
+ - method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers').
57
+ - nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
58
+ - vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
59
+ - transformer_model_name (str): Transformer model name for 'transformers' method.
60
+
61
+ Returns:
62
+ - dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
63
+ """
64
+ result = {
65
+ "text":text,
66
+ "method": method,
67
+ "score": None,
68
+ "label": None,
69
+ "language": None,
70
+ }
71
+
72
+ # Detect language for additional insights
73
+ language = translator.detect_lang(text)
74
+ result["language"] = language
75
+ if language != "English" and method in ["vader", "textblob", "naive_bayes"]:
76
+ print("Detected non-English language, results may be inaccurate.")
77
+ methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)']
78
+ method=ips.strcmp(method,methods)[0]
79
+ if method == "vader":
80
+ import nltk, os
81
+ from nltk.sentiment import SentimentIntensityAnalyzer
82
+
83
+ # check if it is downloaded
84
+ is_local = os.path.isfile(
85
+ os.path.join(nltk.data.path[0], "sentiment", "vader_lexicon.zip")
86
+ )
87
+ if not is_local:
88
+ nltk.download("vader_lexicon")
89
+ try:
90
+ sia = SentimentIntensityAnalyzer()
91
+ scores = sia.polarity_scores(text)
92
+ result["score"] = scores["compound"]
93
+ result["label"] = (
94
+ "Positive"
95
+ if scores["compound"] >= 0.05
96
+ else "Negative" if scores["compound"] <= -0.05 else "Neutral"
97
+ )
98
+ except Exception as e:
99
+ print(f"Error in VADER analysis: {e}")
100
+
101
+ elif method == "textblob":
102
+ from textblob import TextBlob
103
+
104
+ try:
105
+ blob = TextBlob(text)
106
+ polarity = blob.sentiment.polarity
107
+ result["score"] = polarity
108
+ result["label"] = (
109
+ "Positive"
110
+ if polarity > 0
111
+ else "Negative" if polarity < 0 else "Neutral"
112
+ )
113
+ except Exception as e:
114
+ print(f"Error in TextBlob analysis: {e}")
115
+
116
+ elif method == "naive_bayes":
117
+ from sklearn.naive_bayes import MultinomialNB
118
+ from sklearn.feature_extraction.text import TfidfVectorizer
119
+
120
+ try:
121
+ if nb_model is None or vectorizer is None:
122
+ from sklearn.model_selection import train_test_split
123
+
124
+ # Sample data for Naive Bayes training if model not provided
125
+ sample_texts = [
126
+ "I love this product",
127
+ "I hate this product",
128
+ "It's okay, not great",
129
+ "Absolutely fantastic!",
130
+ "Not satisfied",
131
+ ]
132
+ sample_labels = [1, 0, 0, 1, 0] # 1 = Positive, 0 = Negative
133
+
134
+ # Train Naive Bayes model
135
+ vectorizer = TfidfVectorizer()
136
+ X_train_tfidf = vectorizer.fit_transform(sample_texts)
137
+ nb_model = MultinomialNB()
138
+ nb_model.fit(X_train_tfidf, sample_labels)
139
+
140
+ transformed_text = vectorizer.transform([text])
141
+ prediction = nb_model.predict(transformed_text)[0]
142
+ result["score"] = max(nb_model.predict_proba(transformed_text)[0])
143
+ result["label"] = "Positive" if prediction == 1 else "Negative"
144
+
145
+ except Exception as e:
146
+ print(f"Error in Naive Bayes analysis: {e}")
147
+ elif method=="transformer":
148
+ try:
149
+ from transformers import pipeline
150
+ # Load pre-trained sentiment analysis pipeline with a Chinese model
151
+ classifier = pipeline('sentiment-analysis', model='bert-base-chinese', device=device)
152
+ analysis_result = classifier(text)
153
+ result["score"] = analysis_result[0]['score']
154
+ result["label"] = analysis_result[0]['label']
155
+ except Exception as e:
156
+ print(f"Error in Transformer analysis: {e}")
157
+ elif method == "senta":
158
+ from transformers import pipeline
159
+
160
+ try:
161
+ # Load the Senta model for sentiment analysis
162
+ classifier = pipeline('sentiment-analysis', model='junnyu/senta', device=device)
163
+ analysis_result = classifier(text)
164
+
165
+ # Senta model output will be a list with one result (since it's single text input)
166
+ result["score"] = analysis_result[0]["score"]
167
+ result["label"] = analysis_result[0]["label"]
168
+
169
+ except Exception as e:
170
+ print(f"Error in Senta analysis: {e}")
171
+
172
+ else:
173
+ print(
174
+ f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
175
+ )
176
+ raise ValueError(
177
+ f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
178
+ )
179
+
180
+ return result
181
+
182
+ def get_overall_results(results, method="majority", threshold=0.8, weight=None,verbose=False):
183
+ from collections import Counter
184
+ """
185
+ Aggregates sentiment analysis results based on the selected method.
186
+
187
+ Parameters:
188
+ - results (list): A list of sentiment analysis results, each being a dictionary.
189
+ - method (str): The aggregation method to use ('majority', 'average', 'threshold', 'weighted', 'detailed').
190
+ - threshold (float): Confidence threshold for 'threshold' method.
191
+ - weight (dict): Optional dictionary for weighted aggregation (e.g., model name as key and weight as value).
192
+
193
+ Returns:
194
+ - dict: Aggregated sentiment result with final label and score.
195
+ """
196
+ def majority_voting(results):
197
+ """Aggregates sentiment using majority voting."""
198
+ labels = [result['label'] for result in results]
199
+ label_counts = Counter(labels)
200
+ final_label = label_counts.most_common(1)[0][0] # Get the most common label
201
+ return {"label": final_label}
202
+
203
+
204
+ def average_score(results):
205
+ """Aggregates sentiment by calculating the average score."""
206
+ scores = [result['score'] for result in results]
207
+ avg_score = sum(scores) / len(scores)
208
+
209
+ if avg_score > 0.05:
210
+ label = 'Positive'
211
+ elif avg_score < -0.05:
212
+ label = 'Negative'
213
+ else:
214
+ label = 'Neutral'
215
+
216
+ return {"score": avg_score, "label": label}
217
+
218
+
219
+ def confidence_threshold(results, threshold=0.8):
220
+ """Aggregates sentiment based on a confidence threshold."""
221
+ labels = [result['label'] for result in results]
222
+ label_counts = Counter(labels)
223
+ total_results = len(results)
224
+
225
+ for label, count in label_counts.items():
226
+ if count / total_results >= threshold:
227
+ return {"label": label}
228
+
229
+ return {"label": 'Neutral'} # If no label exceeds the threshold, return neutral
230
+
231
+
232
+ def weighted_average(results, weight=None):
233
+ """Aggregates sentiment based on a weighted average."""
234
+ if weight is None:
235
+ weight = {"vader": 2}
236
+
237
+ weighted_scores = 0
238
+ total_weight = 0
239
+
240
+ for result in results:
241
+ model = result.get('method', 'default')
242
+ model_weight = weight.get(model, 1) # Default weight is 1 if model not in weight dict
243
+ weighted_scores += result['score'] * model_weight
244
+ total_weight += model_weight
245
+
246
+ avg_weighted_score = weighted_scores / total_weight
247
+
248
+ # Assign label based on weighted average score
249
+ if avg_weighted_score > 0.05:
250
+ label = 'Positive'
251
+ elif avg_weighted_score < -0.05:
252
+ label = 'Negative'
253
+ else:
254
+ label = 'Neutral'
255
+
256
+ return {"score": avg_weighted_score, "label": label}
257
+
258
+ def detailed_output(results,verbose=False):
259
+ """Prints the detailed sentiment results."""
260
+ for result in results:
261
+ if verbose:
262
+ print(f"Label: {result['label']} | Score: {result['score']}")
263
+ return {"detailed_results": results}
264
+ overall_methods=["majority","average","mean","threshold","weighted","detailed"]
265
+ method=ips.strcmp(method, overall_methods)[0]
266
+ if method == "majority":
267
+ return majority_voting(results)
268
+
269
+ elif method in ["mean","average"]:
270
+ return average_score(results)
271
+
272
+ elif method == "threshold":
273
+ return confidence_threshold(results, threshold)
274
+
275
+ elif method == "weighted":
276
+ return weighted_average(results, weight)
277
+
278
+ elif method == "detailed":
279
+ return pd.DataFrame(results)
280
+ else:
281
+ raise ValueError(f"Unknown method '{method}'. Available methods: 'majority', 'average', 'threshold', 'weighted', 'detailed'")
282
+
283
+