py2ls 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/index +0 -0
- py2ls/ec2ls.py +61 -0
- py2ls/ips.py +105 -55
- py2ls/ml2ls.py +244 -110
- py2ls/nl2ls.py +283 -0
- py2ls/plot.py +351 -40
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/METADATA +1 -1
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/RECORD +9 -8
- py2ls/ml2ls copy.py +0 -2906
- {py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/WHEEL +0 -0
py2ls/nl2ls.py
ADDED
@@ -0,0 +1,283 @@
|
|
1
|
+
from . import translator,ips,plot
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
import matplotlib.pyplot as plt
|
5
|
+
|
6
|
+
|
7
|
+
def detect(text, method: str = "vader", nb_model=None, device=-1,overall_method="major",overall_threhold=0.8,overall_weight=None,plot_=True,verbose=True, **kwargs) -> dict:
|
8
|
+
"""
|
9
|
+
Analyze the sentiment of a text or a list of texts using different methods.
|
10
|
+
|
11
|
+
Parameters:
|
12
|
+
- text (str or list of str): The text(s) to analyze. Can be a single text or a list of texts.
|
13
|
+
- method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers', 'senta').
|
14
|
+
- nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
|
15
|
+
- vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
|
16
|
+
- device (int): Device to run the model on (-1 for CPU, 0 for GPU).
|
17
|
+
- transformer_model_name (str): Transformer model name for 'transformers' method.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
- dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
|
21
|
+
"""
|
22
|
+
result = {
|
23
|
+
"method": method,
|
24
|
+
"score": None,
|
25
|
+
"label": None,
|
26
|
+
"language": None,
|
27
|
+
}
|
28
|
+
|
29
|
+
methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)']
|
30
|
+
if ips.run_once_within(10, reverse=True) and verbose:
|
31
|
+
print(f"methods: {methods}")
|
32
|
+
|
33
|
+
overall_methods=["majority","average","mean","threshold","weighted","detailed"]
|
34
|
+
if ips.run_once_within(10, reverse=True) and verbose:
|
35
|
+
print(f"overall_methods: {overall_methods}")
|
36
|
+
# If the input is a list of texts, loop through each one
|
37
|
+
if isinstance(text, list):
|
38
|
+
results = []
|
39
|
+
for text_ in text:
|
40
|
+
results.append(detect_single_text(text_, method=method, nb_model=nb_model, device=device, **kwargs))
|
41
|
+
res_overall=get_overall_results(results, method=overall_method, threshold=overall_threhold, weight=overall_weight)
|
42
|
+
if plot_:
|
43
|
+
res_detail=get_overall_results(results, method='detail', threshold=overall_threhold, weight=overall_weight)
|
44
|
+
plot.pie(res_detail["label"].value_counts(),explode=None,verbose=False)
|
45
|
+
return res_overall
|
46
|
+
else:
|
47
|
+
return detect_single_text(text=text, method=method, nb_model=nb_model, device=device, **kwargs)
|
48
|
+
|
49
|
+
|
50
|
+
def detect_single_text(text: str, method: str = "vader", nb_model=None, device=-1, **kwargs) -> dict:
|
51
|
+
"""
|
52
|
+
Analyze the sentiment of a text using different methods.
|
53
|
+
|
54
|
+
Parameters:
|
55
|
+
- text (str): The text to analyze.
|
56
|
+
- method (str): The method to use ('vader', 'textblob', 'naive_bayes', 'transformers').
|
57
|
+
- nb_model (Optional[MultinomialNB]): Pre-trained Naive Bayes model (required if method='naive_bayes').
|
58
|
+
- vectorizer (Optional[TfidfVectorizer]): Vectorizer trained with Naive Bayes model (required if method='naive_bayes').
|
59
|
+
- transformer_model_name (str): Transformer model name for 'transformers' method.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
- dict: A dictionary with sentiment score, sentiment label, analysis method, and language.
|
63
|
+
"""
|
64
|
+
result = {
|
65
|
+
"text":text,
|
66
|
+
"method": method,
|
67
|
+
"score": None,
|
68
|
+
"label": None,
|
69
|
+
"language": None,
|
70
|
+
}
|
71
|
+
|
72
|
+
# Detect language for additional insights
|
73
|
+
language = translator.detect_lang(text)
|
74
|
+
result["language"] = language
|
75
|
+
if language != "English" and method in ["vader", "textblob", "naive_bayes"]:
|
76
|
+
print("Detected non-English language, results may be inaccurate.")
|
77
|
+
methods=['vader','textblob','naive_bayes','transformer(not ready)','senta(not ready)']
|
78
|
+
method=ips.strcmp(method,methods)[0]
|
79
|
+
if method == "vader":
|
80
|
+
import nltk, os
|
81
|
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
82
|
+
|
83
|
+
# check if it is downloaded
|
84
|
+
is_local = os.path.isfile(
|
85
|
+
os.path.join(nltk.data.path[0], "sentiment", "vader_lexicon.zip")
|
86
|
+
)
|
87
|
+
if not is_local:
|
88
|
+
nltk.download("vader_lexicon")
|
89
|
+
try:
|
90
|
+
sia = SentimentIntensityAnalyzer()
|
91
|
+
scores = sia.polarity_scores(text)
|
92
|
+
result["score"] = scores["compound"]
|
93
|
+
result["label"] = (
|
94
|
+
"Positive"
|
95
|
+
if scores["compound"] >= 0.05
|
96
|
+
else "Negative" if scores["compound"] <= -0.05 else "Neutral"
|
97
|
+
)
|
98
|
+
except Exception as e:
|
99
|
+
print(f"Error in VADER analysis: {e}")
|
100
|
+
|
101
|
+
elif method == "textblob":
|
102
|
+
from textblob import TextBlob
|
103
|
+
|
104
|
+
try:
|
105
|
+
blob = TextBlob(text)
|
106
|
+
polarity = blob.sentiment.polarity
|
107
|
+
result["score"] = polarity
|
108
|
+
result["label"] = (
|
109
|
+
"Positive"
|
110
|
+
if polarity > 0
|
111
|
+
else "Negative" if polarity < 0 else "Neutral"
|
112
|
+
)
|
113
|
+
except Exception as e:
|
114
|
+
print(f"Error in TextBlob analysis: {e}")
|
115
|
+
|
116
|
+
elif method == "naive_bayes":
|
117
|
+
from sklearn.naive_bayes import MultinomialNB
|
118
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
119
|
+
|
120
|
+
try:
|
121
|
+
if nb_model is None or vectorizer is None:
|
122
|
+
from sklearn.model_selection import train_test_split
|
123
|
+
|
124
|
+
# Sample data for Naive Bayes training if model not provided
|
125
|
+
sample_texts = [
|
126
|
+
"I love this product",
|
127
|
+
"I hate this product",
|
128
|
+
"It's okay, not great",
|
129
|
+
"Absolutely fantastic!",
|
130
|
+
"Not satisfied",
|
131
|
+
]
|
132
|
+
sample_labels = [1, 0, 0, 1, 0] # 1 = Positive, 0 = Negative
|
133
|
+
|
134
|
+
# Train Naive Bayes model
|
135
|
+
vectorizer = TfidfVectorizer()
|
136
|
+
X_train_tfidf = vectorizer.fit_transform(sample_texts)
|
137
|
+
nb_model = MultinomialNB()
|
138
|
+
nb_model.fit(X_train_tfidf, sample_labels)
|
139
|
+
|
140
|
+
transformed_text = vectorizer.transform([text])
|
141
|
+
prediction = nb_model.predict(transformed_text)[0]
|
142
|
+
result["score"] = max(nb_model.predict_proba(transformed_text)[0])
|
143
|
+
result["label"] = "Positive" if prediction == 1 else "Negative"
|
144
|
+
|
145
|
+
except Exception as e:
|
146
|
+
print(f"Error in Naive Bayes analysis: {e}")
|
147
|
+
elif method=="transformer":
|
148
|
+
try:
|
149
|
+
from transformers import pipeline
|
150
|
+
# Load pre-trained sentiment analysis pipeline with a Chinese model
|
151
|
+
classifier = pipeline('sentiment-analysis', model='bert-base-chinese', device=device)
|
152
|
+
analysis_result = classifier(text)
|
153
|
+
result["score"] = analysis_result[0]['score']
|
154
|
+
result["label"] = analysis_result[0]['label']
|
155
|
+
except Exception as e:
|
156
|
+
print(f"Error in Transformer analysis: {e}")
|
157
|
+
elif method == "senta":
|
158
|
+
from transformers import pipeline
|
159
|
+
|
160
|
+
try:
|
161
|
+
# Load the Senta model for sentiment analysis
|
162
|
+
classifier = pipeline('sentiment-analysis', model='junnyu/senta', device=device)
|
163
|
+
analysis_result = classifier(text)
|
164
|
+
|
165
|
+
# Senta model output will be a list with one result (since it's single text input)
|
166
|
+
result["score"] = analysis_result[0]["score"]
|
167
|
+
result["label"] = analysis_result[0]["label"]
|
168
|
+
|
169
|
+
except Exception as e:
|
170
|
+
print(f"Error in Senta analysis: {e}")
|
171
|
+
|
172
|
+
else:
|
173
|
+
print(
|
174
|
+
f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
|
175
|
+
)
|
176
|
+
raise ValueError(
|
177
|
+
f"Unknown method '{method}'. Available methods: 'vader', 'textblob', 'naive_bayes', 'transformers'"
|
178
|
+
)
|
179
|
+
|
180
|
+
return result
|
181
|
+
|
182
|
+
def get_overall_results(results, method="majority", threshold=0.8, weight=None,verbose=False):
|
183
|
+
from collections import Counter
|
184
|
+
"""
|
185
|
+
Aggregates sentiment analysis results based on the selected method.
|
186
|
+
|
187
|
+
Parameters:
|
188
|
+
- results (list): A list of sentiment analysis results, each being a dictionary.
|
189
|
+
- method (str): The aggregation method to use ('majority', 'average', 'threshold', 'weighted', 'detailed').
|
190
|
+
- threshold (float): Confidence threshold for 'threshold' method.
|
191
|
+
- weight (dict): Optional dictionary for weighted aggregation (e.g., model name as key and weight as value).
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
- dict: Aggregated sentiment result with final label and score.
|
195
|
+
"""
|
196
|
+
def majority_voting(results):
|
197
|
+
"""Aggregates sentiment using majority voting."""
|
198
|
+
labels = [result['label'] for result in results]
|
199
|
+
label_counts = Counter(labels)
|
200
|
+
final_label = label_counts.most_common(1)[0][0] # Get the most common label
|
201
|
+
return {"label": final_label}
|
202
|
+
|
203
|
+
|
204
|
+
def average_score(results):
|
205
|
+
"""Aggregates sentiment by calculating the average score."""
|
206
|
+
scores = [result['score'] for result in results]
|
207
|
+
avg_score = sum(scores) / len(scores)
|
208
|
+
|
209
|
+
if avg_score > 0.05:
|
210
|
+
label = 'Positive'
|
211
|
+
elif avg_score < -0.05:
|
212
|
+
label = 'Negative'
|
213
|
+
else:
|
214
|
+
label = 'Neutral'
|
215
|
+
|
216
|
+
return {"score": avg_score, "label": label}
|
217
|
+
|
218
|
+
|
219
|
+
def confidence_threshold(results, threshold=0.8):
|
220
|
+
"""Aggregates sentiment based on a confidence threshold."""
|
221
|
+
labels = [result['label'] for result in results]
|
222
|
+
label_counts = Counter(labels)
|
223
|
+
total_results = len(results)
|
224
|
+
|
225
|
+
for label, count in label_counts.items():
|
226
|
+
if count / total_results >= threshold:
|
227
|
+
return {"label": label}
|
228
|
+
|
229
|
+
return {"label": 'Neutral'} # If no label exceeds the threshold, return neutral
|
230
|
+
|
231
|
+
|
232
|
+
def weighted_average(results, weight=None):
|
233
|
+
"""Aggregates sentiment based on a weighted average."""
|
234
|
+
if weight is None:
|
235
|
+
weight = {"vader": 2}
|
236
|
+
|
237
|
+
weighted_scores = 0
|
238
|
+
total_weight = 0
|
239
|
+
|
240
|
+
for result in results:
|
241
|
+
model = result.get('method', 'default')
|
242
|
+
model_weight = weight.get(model, 1) # Default weight is 1 if model not in weight dict
|
243
|
+
weighted_scores += result['score'] * model_weight
|
244
|
+
total_weight += model_weight
|
245
|
+
|
246
|
+
avg_weighted_score = weighted_scores / total_weight
|
247
|
+
|
248
|
+
# Assign label based on weighted average score
|
249
|
+
if avg_weighted_score > 0.05:
|
250
|
+
label = 'Positive'
|
251
|
+
elif avg_weighted_score < -0.05:
|
252
|
+
label = 'Negative'
|
253
|
+
else:
|
254
|
+
label = 'Neutral'
|
255
|
+
|
256
|
+
return {"score": avg_weighted_score, "label": label}
|
257
|
+
|
258
|
+
def detailed_output(results,verbose=False):
|
259
|
+
"""Prints the detailed sentiment results."""
|
260
|
+
for result in results:
|
261
|
+
if verbose:
|
262
|
+
print(f"Label: {result['label']} | Score: {result['score']}")
|
263
|
+
return {"detailed_results": results}
|
264
|
+
overall_methods=["majority","average","mean","threshold","weighted","detailed"]
|
265
|
+
method=ips.strcmp(method, overall_methods)[0]
|
266
|
+
if method == "majority":
|
267
|
+
return majority_voting(results)
|
268
|
+
|
269
|
+
elif method in ["mean","average"]:
|
270
|
+
return average_score(results)
|
271
|
+
|
272
|
+
elif method == "threshold":
|
273
|
+
return confidence_threshold(results, threshold)
|
274
|
+
|
275
|
+
elif method == "weighted":
|
276
|
+
return weighted_average(results, weight)
|
277
|
+
|
278
|
+
elif method == "detailed":
|
279
|
+
return pd.DataFrame(results)
|
280
|
+
else:
|
281
|
+
raise ValueError(f"Unknown method '{method}'. Available methods: 'majority', 'average', 'threshold', 'weighted', 'detailed'")
|
282
|
+
|
283
|
+
|