opsci-toolbox 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opsci_toolbox/apis/webscraping.py +75 -0
- opsci_toolbox/helpers/common.py +39 -20
- opsci_toolbox/helpers/dataviz.py +4262 -1975
- opsci_toolbox/helpers/nlp.py +121 -33
- opsci_toolbox-0.0.12.dist-info/METADATA +53 -0
- {opsci_toolbox-0.0.10.dist-info → opsci_toolbox-0.0.12.dist-info}/RECORD +8 -8
- {opsci_toolbox-0.0.10.dist-info → opsci_toolbox-0.0.12.dist-info}/WHEEL +1 -1
- opsci_toolbox-0.0.10.dist-info/METADATA +0 -53
- {opsci_toolbox-0.0.10.dist-info → opsci_toolbox-0.0.12.dist-info}/top_level.txt +0 -0
opsci_toolbox/helpers/nlp.py
CHANGED
@@ -48,6 +48,28 @@ def remove_html_tags(text: str) -> str:
|
|
48
48
|
soup = BeautifulSoup(text, "html.parser")
|
49
49
|
return soup.get_text()
|
50
50
|
|
51
|
+
def remove_rt(text: str) -> str:
|
52
|
+
"""
|
53
|
+
Remove the retweet tag from a given text.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
- text (str): The input text possibly containing a retweet tag in the format "RT @username: ".
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
- str: The cleaned text with the retweet tag removed.
|
60
|
+
|
61
|
+
Example:
|
62
|
+
>>> remove_rt("RT @user123: Check out this tweet!")
|
63
|
+
'Check out this tweet!'
|
64
|
+
"""
|
65
|
+
# Regular expression pattern to match "RT @username: "
|
66
|
+
pattern = r'RT @\w+: '
|
67
|
+
|
68
|
+
# Substitute the pattern with an empty string
|
69
|
+
cleaned_text = re.sub(pattern, '', text)
|
70
|
+
|
71
|
+
return cleaned_text
|
72
|
+
|
51
73
|
def filter_by_query(df: pd.DataFrame, col_text: str, query: str, ignore_case: bool = True, ignore_accent: bool = True, match_word: bool = False) -> pd.DataFrame:
|
52
74
|
"""
|
53
75
|
Filter DataFrame rows by a query on a specific text column.
|
@@ -91,7 +113,8 @@ def TM_clean_text(df: pd.DataFrame, col: str, col_clean: str) -> pd.DataFrame:
|
|
91
113
|
df : pandas DataFrame
|
92
114
|
The DataFrame with cleaned text data.
|
93
115
|
"""
|
94
|
-
df[col_clean] = df[col].apply(
|
116
|
+
df[col_clean] = df[col].apply(remove_rt)
|
117
|
+
df[col_clean] = df[col_clean].apply(lambda x : urls(x, repl= ''))
|
95
118
|
df[col_clean] = df.apply(lambda row: " ".join(filter(lambda x: x[0] != "@", row[col_clean].split())), 1)
|
96
119
|
df[col_clean] = df[col_clean].apply(remove_extra_spaces)
|
97
120
|
# df = df.loc[(df[col_clean] != ""), :]
|
@@ -1019,6 +1042,73 @@ def sample_most_engaging_posts(df: pd.DataFrame, col_topic: str, col_engagement:
|
|
1019
1042
|
def get_lang_detector(nlp, name):
|
1020
1043
|
return LanguageDetector(seed=42) # We use the seed 42
|
1021
1044
|
|
1045
|
+
def PRarmy_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str = "lemmatized_text", pos_to_keep: list = ["VERB","NOUN","ADJ", "ADV", "PROPN"], entities_to_keep: list = ['PERSON','ORG', 'LOC'], stopwords: list = [], batch_size: int = 100, n_process: int = 1) -> pd.DataFrame:
|
1046
|
+
"""
|
1047
|
+
Perform natural language processing tasks using spaCy for PR Army project.
|
1048
|
+
Its main tasks are lemmatization and named entity recognition (NER).
|
1049
|
+
|
1050
|
+
Args:
|
1051
|
+
nlp : spacy.Language
|
1052
|
+
The spaCy language model.
|
1053
|
+
df : pandas.DataFrame
|
1054
|
+
The DataFrame containing the text data.
|
1055
|
+
col_text : str
|
1056
|
+
The name of the column containing the text data.
|
1057
|
+
col_lemma : str
|
1058
|
+
The name of the column to store the lemmatized text data.
|
1059
|
+
pos_to_keep : list
|
1060
|
+
A list of part-of-speech tags to keep during lemmatization.
|
1061
|
+
entities_to_keep : list
|
1062
|
+
A list of NER tags to keep.
|
1063
|
+
stopwords : list
|
1064
|
+
A list of stopwords to remove during processing.
|
1065
|
+
batch_size : int, optional
|
1066
|
+
The batch size for spaCy processing. Default is 100.
|
1067
|
+
n_process : int, optional
|
1068
|
+
The number of processes for parallel processing. Default is 1.
|
1069
|
+
Returns:
|
1070
|
+
pandas.DataFrame
|
1071
|
+
The DataFrame with processed text data.
|
1072
|
+
|
1073
|
+
"""
|
1074
|
+
all_records = []
|
1075
|
+
text=list(df[col_text].astype('unicode').values)
|
1076
|
+
|
1077
|
+
for doc in tqdm(nlp.pipe(text, batch_size=batch_size, n_process=n_process), total= len(text), desc = "NLP Process"):
|
1078
|
+
NER_type = []
|
1079
|
+
NER_text = []
|
1080
|
+
|
1081
|
+
### LEMMATIZATION
|
1082
|
+
|
1083
|
+
if len(pos_to_keep)>0 and len(stopwords)>0:
|
1084
|
+
lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords and tok.pos_ in pos_to_keep]
|
1085
|
+
elif len(pos_to_keep)>0 and len(stopwords) < 1:
|
1086
|
+
lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.pos_ in pos_to_keep]
|
1087
|
+
elif len(pos_to_keep) < 1 and len(stopwords) > 0:
|
1088
|
+
lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords]
|
1089
|
+
else :
|
1090
|
+
lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space)]
|
1091
|
+
|
1092
|
+
### NER
|
1093
|
+
if len(entities_to_keep)>0:
|
1094
|
+
for ent in doc.ents:
|
1095
|
+
if ent.label_ in entities_to_keep:
|
1096
|
+
NER_type.append(ent.label_)
|
1097
|
+
NER_text.append(ent.text)
|
1098
|
+
|
1099
|
+
else:
|
1100
|
+
for ent in doc.ents:
|
1101
|
+
NER_type.append(ent.label_)
|
1102
|
+
NER_text.append(ent.text)
|
1103
|
+
|
1104
|
+
record = (NER_type, NER_text, ' '.join(map(str, lemmas_list)))
|
1105
|
+
all_records.append(record)
|
1106
|
+
|
1107
|
+
|
1108
|
+
df[['NER_type', 'NER_text', col_lemma]] = pd.DataFrame(all_records, index=df.index)
|
1109
|
+
|
1110
|
+
return df
|
1111
|
+
|
1022
1112
|
def TM_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str, pos_to_keep: list, stopwords: list, batch_size: int = 100, n_process: int = 1, stats: bool = True, join_list: bool = False) -> pd.DataFrame:
|
1023
1113
|
"""
|
1024
1114
|
Perform natural language processing tasks using spaCy for topic modeling.
|
@@ -1358,14 +1448,14 @@ def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1
|
|
1358
1448
|
return df
|
1359
1449
|
|
1360
1450
|
|
1361
|
-
def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['PERSON','ORG'], explode: bool = True) -> pd.DataFrame:
|
1451
|
+
def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['PERSON','ORG'], explode: bool = True, batch_size : int = 100, n_process: int =1) -> pd.DataFrame:
|
1362
1452
|
"""
|
1363
1453
|
Spacy implementation of NER.
|
1364
1454
|
To define entities type to keep, call get_labels(nlp, pipe_step="ner", explanations=False)
|
1365
1455
|
explode = False means it returns 1 list of entities per document
|
1366
1456
|
explode = True means it returns 1 entity per row
|
1367
1457
|
|
1368
|
-
|
1458
|
+
Args:
|
1369
1459
|
nlp : spacy.language.Language
|
1370
1460
|
The spaCy language processing pipeline.
|
1371
1461
|
df : pd.DataFrame
|
@@ -1376,6 +1466,10 @@ def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['P
|
|
1376
1466
|
List of entity types to keep. Default is ['PERSON','ORG'].
|
1377
1467
|
explode : bool, optional
|
1378
1468
|
Flag indicating whether to explode the DataFrame to have one entity per row. Default is True.
|
1469
|
+
batch_size : int, optional
|
1470
|
+
Batch sizes
|
1471
|
+
n_process : int, optional
|
1472
|
+
Number of processes
|
1379
1473
|
|
1380
1474
|
Returns:
|
1381
1475
|
pd.DataFrame
|
@@ -1385,43 +1479,40 @@ def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['P
|
|
1385
1479
|
This function performs Named Entity Recognition (NER) using spaCy on a DataFrame with text data. It extracts entities of the specified types
|
1386
1480
|
and stores the NER information in separate columns. If 'explode' is set to True, it returns one entity per row in the DataFrame.
|
1387
1481
|
"""
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
doc = nlp(row[col_text])
|
1397
|
-
entities_data = []
|
1482
|
+
l_text = df[col_text].tolist()
|
1483
|
+
all_records = []
|
1484
|
+
for doc in tqdm(nlp.pipe(l_text, batch_size=batch_size, n_process=n_process), total= len(l_text), desc = "NLP Process"):
|
1485
|
+
NER_type = []
|
1486
|
+
NER_text = []
|
1487
|
+
NER_start_char = []
|
1488
|
+
NER_end_char=[]
|
1489
|
+
# entities_data = []
|
1398
1490
|
|
1399
1491
|
if len(entities_to_keep)>0:
|
1400
1492
|
for ent in doc.ents:
|
1401
1493
|
if ent.label_ in entities_to_keep:
|
1402
|
-
|
1494
|
+
NER_type.append(ent.label_)
|
1495
|
+
NER_text.append(ent.text)
|
1496
|
+
NER_start_char.append(ent.start_char)
|
1497
|
+
NER_end_char.append(ent.end_char)
|
1498
|
+
# entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
|
1403
1499
|
else:
|
1404
1500
|
for ent in doc.ents:
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
row['NER_end_char'] = end_char
|
1501
|
+
NER_type.append(ent.label_)
|
1502
|
+
NER_text.append(ent.text)
|
1503
|
+
NER_start_char.append(ent.start_char)
|
1504
|
+
NER_end_char.append(ent.end_char)
|
1505
|
+
# entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
|
1506
|
+
record = (NER_type, NER_text, NER_start_char, NER_end_char)
|
1507
|
+
all_records.append(record)
|
1413
1508
|
|
1414
|
-
|
1415
|
-
|
1416
|
-
# Apply the processing function to each row
|
1417
|
-
df = df.apply(process_row, axis=1)
|
1509
|
+
df[['NER_type', 'NER_text','NER_start_char','NER_end_char']] = pd.DataFrame(all_records, index=df.index)
|
1418
1510
|
|
1419
1511
|
if explode:
|
1420
1512
|
df= df.explode(['NER_type', 'NER_text','NER_start_char','NER_end_char'])
|
1421
1513
|
|
1422
1514
|
return df
|
1423
1515
|
|
1424
|
-
|
1425
1516
|
def tokenize(nlp, df: pd.DataFrame, col_text: str, col_tokens: str, pos_to_keep: list, stopwords: list, batch_size: int = 100, n_process: int = 1, stats: bool = True) -> pd.DataFrame:
|
1426
1517
|
"""
|
1427
1518
|
Spacy implementation to tokenize text
|
@@ -1901,15 +1992,13 @@ def agglomerative_clustering(embeddings, n_clusters=15, metric="euclidean", link
|
|
1901
1992
|
|
1902
1993
|
|
1903
1994
|
|
1904
|
-
def hdbscan_clustering(embeddings, algorithm='best', alpha=1.0, cluster_selection_epsilon=0.0, approx_min_span_tree=True,
|
1905
|
-
gen_min_span_tree=True, leaf_size=40, metric='euclidean', min_cluster_size=5, min_samples=None,
|
1906
|
-
p=None, cluster_selection_method='eom', prediction_data = True):
|
1907
|
-
|
1995
|
+
def hdbscan_clustering(embeddings, algorithm='best', alpha=1.0, cluster_selection_epsilon=0.0, approx_min_span_tree=True, gen_min_span_tree=True, leaf_size=40, metric='euclidean', min_cluster_size=5, min_samples=None, p=None, cluster_selection_method='eom', prediction_data = True):
|
1908
1996
|
"""
|
1909
1997
|
This function performs clustering using the HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise) algorithm. It clusters the input data based on the specified parameters and returns the clusterer object, cluster labels for each point, and the probability of each sample being an outlier.
|
1998
|
+
|
1910
1999
|
Args
|
1911
2000
|
embeddings : array-like or sparse matrix, shape (n_samples, n_features). The input data to be clustered.
|
1912
|
-
algorithm : {'best', 'generic', 'prims_kdtree', 'boruvka_kdtree', 'boruvka_balltree', 'prims_balltree'}, optional. The algorithm to use for computation. Default is
|
2001
|
+
algorithm : {'best', 'generic', 'prims_kdtree', 'boruvka_kdtree', 'boruvka_balltree', 'prims_balltree'}, optional. The algorithm to use for computation. Default is best.
|
1913
2002
|
alpha : float, optional. Scaling factor determining the individual weight of the (unnormalized) density estimate. Default is 1.0.
|
1914
2003
|
cluster_selection_epsilon : float, optional. The epsilon value to specify a minimum cluster size. Default is 0.0.
|
1915
2004
|
approx_min_span_tree : bool, optional. Whether to compute an approximation of the minimum spanning tree. Default is True.
|
@@ -2054,7 +2143,6 @@ def HF_sentiment_classifier(tokenizer, model, text, col_text, filename, dir_json
|
|
2054
2143
|
proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()[0]
|
2055
2144
|
label = model.config.id2label[proba.argmax()]
|
2056
2145
|
results = {"label":label, "score" : float(proba.max()), col_text : text}
|
2057
|
-
print(results)
|
2058
2146
|
write_json(results, dir_json , str(filename))
|
2059
2147
|
|
2060
2148
|
return results
|
@@ -0,0 +1,53 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: opsci-toolbox
|
3
|
+
Version: 0.0.12
|
4
|
+
Summary: a complete toolbox
|
5
|
+
Home-page: UNKNOWN
|
6
|
+
Author: Erwan Le Nagard
|
7
|
+
Author-email: erwan@opsci.ai
|
8
|
+
License: MIT
|
9
|
+
Platform: UNKNOWN
|
10
|
+
Requires-Dist: requests <3,>=2.31.0
|
11
|
+
Requires-Dist: beautifulsoup4 ==4.9.3
|
12
|
+
Requires-Dist: chardet >=4.0.0
|
13
|
+
Requires-Dist: chart-studio ==1.1.0
|
14
|
+
Requires-Dist: eldar ==0.0.8
|
15
|
+
Requires-Dist: emoji ==2.10.1
|
16
|
+
Requires-Dist: fa2-modified ==0.3.10
|
17
|
+
Requires-Dist: google-api-python-client ==2.122.0
|
18
|
+
Requires-Dist: gspread ==6.1.2
|
19
|
+
Requires-Dist: hdbscan ==0.8.33
|
20
|
+
Requires-Dist: jusText ==3.0.0
|
21
|
+
Requires-Dist: langchain ==0.1.20
|
22
|
+
Requires-Dist: matplotlib >=3.9.0
|
23
|
+
Requires-Dist: mysql-connector-python >=9.0.0
|
24
|
+
Requires-Dist: networkx ==3.2.1
|
25
|
+
Requires-Dist: nltk ==3.8.1
|
26
|
+
Requires-Dist: numpy <1.25.0,>=1.21.5
|
27
|
+
Requires-Dist: opencv-python-headless ==4.9.0.80
|
28
|
+
Requires-Dist: openpyxl ==3.1.3
|
29
|
+
Requires-Dist: pandas >=1.5.3
|
30
|
+
Requires-Dist: Pillow >=9.0.1
|
31
|
+
Requires-Dist: plotly ==5.19.0
|
32
|
+
Requires-Dist: protobuf ==4.23.4
|
33
|
+
Requires-Dist: pyarrow >=14.0.2
|
34
|
+
Requires-Dist: python-louvain ==0.16
|
35
|
+
Requires-Dist: scikit-learn ==1.4.1.post1
|
36
|
+
Requires-Dist: scipy <2.0.0,>=1.8.0
|
37
|
+
Requires-Dist: sentence-transformers ==2.5.1
|
38
|
+
Requires-Dist: setuptools ==59.6.0
|
39
|
+
Requires-Dist: spacy ==3.7.4
|
40
|
+
Requires-Dist: spacy-language-detection ==0.2.1
|
41
|
+
Requires-Dist: spacymoji ==3.1.0
|
42
|
+
Requires-Dist: supervision ==0.21.0
|
43
|
+
Requires-Dist: textacy ==0.13.0
|
44
|
+
Requires-Dist: torch ==2.0.1
|
45
|
+
Requires-Dist: tqdm >=4.66.2
|
46
|
+
Requires-Dist: trafilatura ==1.7.0
|
47
|
+
Requires-Dist: transformers ==4.38.2
|
48
|
+
Requires-Dist: umap-learn ==0.5.5
|
49
|
+
Requires-Dist: urlextract ==1.9.0
|
50
|
+
Requires-Dist: wordcloud ==1.9.3
|
51
|
+
|
52
|
+
UNKNOWN
|
53
|
+
|
@@ -1,14 +1,14 @@
|
|
1
1
|
opsci_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
opsci_toolbox/apis/rapidapi_helpers.py,sha256=k_hYcRNww5noNkX7zyz5Htggxb15BPoKSlbY7NLuQXI,26696
|
4
|
-
opsci_toolbox/apis/webscraping.py,sha256=
|
4
|
+
opsci_toolbox/apis/webscraping.py,sha256=1DAIYbywZoPwTSyoqFGxyF0-q_nUsGg_VK51zLL_bB0,21465
|
5
5
|
opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
|
6
6
|
opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
opsci_toolbox/helpers/common.py,sha256=
|
7
|
+
opsci_toolbox/helpers/common.py,sha256=nqg9wzgU5DxVTCxEb5LSw2lUnp0f_hKF_Q-DhpRtu6g,45158
|
8
8
|
opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
|
9
|
-
opsci_toolbox/helpers/dataviz.py,sha256=
|
9
|
+
opsci_toolbox/helpers/dataviz.py,sha256=1cIGb-u81cD5iSIkkkrzyrBnfim7fbhm0x_CguHUbf0,202128
|
10
10
|
opsci_toolbox/helpers/dates.py,sha256=Wf7HxaUY62IRrY3XPdRIuoaMbGi3QqWf-vStqbRRY_o,2633
|
11
|
-
opsci_toolbox/helpers/nlp.py,sha256=
|
11
|
+
opsci_toolbox/helpers/nlp.py,sha256=n7nNEU0cuu7bqXYRRBH4D-xIzpdNwKm0nj-eRYh3aPY,91956
|
12
12
|
opsci_toolbox/helpers/nlp_cuml.py,sha256=XzBfoFMpVIehpRbp60E4wGokpoqJP0lJxs1plOxQqBY,28882
|
13
13
|
opsci_toolbox/helpers/sna.py,sha256=XL1BZ-x83xWRNbGsvh7-m8Mdy6iOrWx8vjgaL2_TSmo,31905
|
14
14
|
opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
|
@@ -16,7 +16,7 @@ opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUK
|
|
16
16
|
opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
|
18
18
|
opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
|
19
|
-
opsci_toolbox-0.0.
|
20
|
-
opsci_toolbox-0.0.
|
21
|
-
opsci_toolbox-0.0.
|
22
|
-
opsci_toolbox-0.0.
|
19
|
+
opsci_toolbox-0.0.12.dist-info/METADATA,sha256=LosT5jzu7Z0TXIslwVUSvPG6AKMrblGp8A6odUN_N9U,1633
|
20
|
+
opsci_toolbox-0.0.12.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
21
|
+
opsci_toolbox-0.0.12.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
|
22
|
+
opsci_toolbox-0.0.12.dist-info/RECORD,,
|
@@ -1,53 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: opsci-toolbox
|
3
|
-
Version: 0.0.10
|
4
|
-
Summary: a complete toolbox
|
5
|
-
Home-page: UNKNOWN
|
6
|
-
Author: Erwan Le Nagard
|
7
|
-
Author-email: erwan@opsci.ai
|
8
|
-
License: MIT
|
9
|
-
Platform: UNKNOWN
|
10
|
-
Requires-Dist: Pillow (>=9.0.1)
|
11
|
-
Requires-Dist: Requests (==2.32.3)
|
12
|
-
Requires-Dist: beautifulsoup4 (==4.10.0)
|
13
|
-
Requires-Dist: chardet (>=4.0.0)
|
14
|
-
Requires-Dist: chart-studio (==1.1.0)
|
15
|
-
Requires-Dist: eldar (==0.0.8)
|
16
|
-
Requires-Dist: emoji (==2.10.1)
|
17
|
-
Requires-Dist: fa2-modified (==0.3.10)
|
18
|
-
Requires-Dist: google-api-python-client (==2.122.0)
|
19
|
-
Requires-Dist: gspread (==6.1.2)
|
20
|
-
Requires-Dist: hdbscan (==0.8.33)
|
21
|
-
Requires-Dist: jusText (==3.0.0)
|
22
|
-
Requires-Dist: langchain (==0.1.20)
|
23
|
-
Requires-Dist: matplotlib (>=3.9.0)
|
24
|
-
Requires-Dist: mysql-connector-repackaged (==0.3.1)
|
25
|
-
Requires-Dist: networkx (==3.2.1)
|
26
|
-
Requires-Dist: nltk (==3.8.1)
|
27
|
-
Requires-Dist: numpy (<1.25.0,>=1.21.5)
|
28
|
-
Requires-Dist: opencv-python-headless (==4.9.0.80)
|
29
|
-
Requires-Dist: openpyxl (==3.1.3)
|
30
|
-
Requires-Dist: pandas (>=1.5.3)
|
31
|
-
Requires-Dist: plotly (==5.19.0)
|
32
|
-
Requires-Dist: protobuf (==4.23.4)
|
33
|
-
Requires-Dist: pyarrow (>=14.0.2)
|
34
|
-
Requires-Dist: python-louvain (==0.16)
|
35
|
-
Requires-Dist: scikit-learn (==1.4.1.post1)
|
36
|
-
Requires-Dist: scipy (<2.0.0,>=1.8.0)
|
37
|
-
Requires-Dist: sentence-transformers (==2.5.1)
|
38
|
-
Requires-Dist: setuptools (==59.6.0)
|
39
|
-
Requires-Dist: spacy (==3.7.4)
|
40
|
-
Requires-Dist: spacy-language-detection (==0.2.1)
|
41
|
-
Requires-Dist: spacymoji (==3.1.0)
|
42
|
-
Requires-Dist: supervision (==0.21.0)
|
43
|
-
Requires-Dist: textacy (==0.13.0)
|
44
|
-
Requires-Dist: torch (==2.0.1)
|
45
|
-
Requires-Dist: tqdm (==4.66.2)
|
46
|
-
Requires-Dist: trafilatura (==1.7.0)
|
47
|
-
Requires-Dist: transformers (==4.38.2)
|
48
|
-
Requires-Dist: umap-learn (==0.5.5)
|
49
|
-
Requires-Dist: urlextract (==1.9.0)
|
50
|
-
Requires-Dist: wordcloud (==1.9.3)
|
51
|
-
|
52
|
-
UNKNOWN
|
53
|
-
|
File without changes
|