opsci-toolbox 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,28 @@ def remove_html_tags(text: str) -> str:
48
48
  soup = BeautifulSoup(text, "html.parser")
49
49
  return soup.get_text()
50
50
 
51
+ def remove_rt(text: str) -> str:
52
+ """
53
+ Remove the retweet tag from a given text.
54
+
55
+ Args:
56
+ - text (str): The input text possibly containing a retweet tag in the format "RT @username: ".
57
+
58
+ Returns:
59
+ - str: The cleaned text with the retweet tag removed.
60
+
61
+ Example:
62
+ >>> remove_rt("RT @user123: Check out this tweet!")
63
+ 'Check out this tweet!'
64
+ """
65
+ # Regular expression pattern to match "RT @username: "
66
+ pattern = r'RT @\w+: '
67
+
68
+ # Substitute the pattern with an empty string
69
+ cleaned_text = re.sub(pattern, '', text)
70
+
71
+ return cleaned_text
72
+
51
73
  def filter_by_query(df: pd.DataFrame, col_text: str, query: str, ignore_case: bool = True, ignore_accent: bool = True, match_word: bool = False) -> pd.DataFrame:
52
74
  """
53
75
  Filter DataFrame rows by a query on a specific text column.
@@ -91,7 +113,8 @@ def TM_clean_text(df: pd.DataFrame, col: str, col_clean: str) -> pd.DataFrame:
91
113
  df : pandas DataFrame
92
114
  The DataFrame with cleaned text data.
93
115
  """
94
- df[col_clean] = df[col].apply(lambda x : urls(x, repl= ''))
116
+ df[col_clean] = df[col].apply(remove_rt)
117
+ df[col_clean] = df[col_clean].apply(lambda x : urls(x, repl= ''))
95
118
  df[col_clean] = df.apply(lambda row: " ".join(filter(lambda x: x[0] != "@", row[col_clean].split())), 1)
96
119
  df[col_clean] = df[col_clean].apply(remove_extra_spaces)
97
120
  # df = df.loc[(df[col_clean] != ""), :]
@@ -1019,6 +1042,73 @@ def sample_most_engaging_posts(df: pd.DataFrame, col_topic: str, col_engagement:
1019
1042
  def get_lang_detector(nlp, name):
1020
1043
  return LanguageDetector(seed=42) # We use the seed 42
1021
1044
 
1045
+ def PRarmy_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str = "lemmatized_text", pos_to_keep: list = ["VERB","NOUN","ADJ", "ADV", "PROPN"], entities_to_keep: list = ['PERSON','ORG', 'LOC'], stopwords: list = [], batch_size: int = 100, n_process: int = 1) -> pd.DataFrame:
1046
+ """
1047
+ Perform natural language processing tasks using spaCy for PR Army project.
1048
+ Its main tasks are lemmatization and named entity recognition (NER).
1049
+
1050
+ Args:
1051
+ nlp : spacy.Language
1052
+ The spaCy language model.
1053
+ df : pandas.DataFrame
1054
+ The DataFrame containing the text data.
1055
+ col_text : str
1056
+ The name of the column containing the text data.
1057
+ col_lemma : str
1058
+ The name of the column to store the lemmatized text data.
1059
+ pos_to_keep : list
1060
+ A list of part-of-speech tags to keep during lemmatization.
1061
+ entities_to_keep : list
1062
+ A list of NER tags to keep.
1063
+ stopwords : list
1064
+ A list of stopwords to remove during processing.
1065
+ batch_size : int, optional
1066
+ The batch size for spaCy processing. Default is 100.
1067
+ n_process : int, optional
1068
+ The number of processes for parallel processing. Default is 1.
1069
+ Returns:
1070
+ pandas.DataFrame
1071
+ The DataFrame with processed text data.
1072
+
1073
+ """
1074
+ all_records = []
1075
+ text=list(df[col_text].astype('unicode').values)
1076
+
1077
+ for doc in tqdm(nlp.pipe(text, batch_size=batch_size, n_process=n_process), total= len(text), desc = "NLP Process"):
1078
+ NER_type = []
1079
+ NER_text = []
1080
+
1081
+ ### LEMMATIZATION
1082
+
1083
+ if len(pos_to_keep)>0 and len(stopwords)>0:
1084
+ lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords and tok.pos_ in pos_to_keep]
1085
+ elif len(pos_to_keep)>0 and len(stopwords) < 1:
1086
+ lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.pos_ in pos_to_keep]
1087
+ elif len(pos_to_keep) < 1 and len(stopwords) > 0:
1088
+ lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords]
1089
+ else :
1090
+ lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space)]
1091
+
1092
+ ### NER
1093
+ if len(entities_to_keep)>0:
1094
+ for ent in doc.ents:
1095
+ if ent.label_ in entities_to_keep:
1096
+ NER_type.append(ent.label_)
1097
+ NER_text.append(ent.text)
1098
+
1099
+ else:
1100
+ for ent in doc.ents:
1101
+ NER_type.append(ent.label_)
1102
+ NER_text.append(ent.text)
1103
+
1104
+ record = (NER_type, NER_text, ' '.join(map(str, lemmas_list)))
1105
+ all_records.append(record)
1106
+
1107
+
1108
+ df[['NER_type', 'NER_text', col_lemma]] = pd.DataFrame(all_records, index=df.index)
1109
+
1110
+ return df
1111
+
1022
1112
  def TM_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str, pos_to_keep: list, stopwords: list, batch_size: int = 100, n_process: int = 1, stats: bool = True, join_list: bool = False) -> pd.DataFrame:
1023
1113
  """
1024
1114
  Perform natural language processing tasks using spaCy for topic modeling.
@@ -1358,14 +1448,14 @@ def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1
1358
1448
  return df
1359
1449
 
1360
1450
 
1361
- def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['PERSON','ORG'], explode: bool = True) -> pd.DataFrame:
1451
+ def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['PERSON','ORG'], explode: bool = True, batch_size : int = 100, n_process: int =1) -> pd.DataFrame:
1362
1452
  """
1363
1453
  Spacy implementation of NER.
1364
1454
  To define entities type to keep, call get_labels(nlp, pipe_step="ner", explanations=False)
1365
1455
  explode = False means it returns 1 list of entities per document
1366
1456
  explode = True means it returns 1 entity per row
1367
1457
 
1368
- Parameters:
1458
+ Args:
1369
1459
  nlp : spacy.language.Language
1370
1460
  The spaCy language processing pipeline.
1371
1461
  df : pd.DataFrame
@@ -1376,6 +1466,10 @@ def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['P
1376
1466
  List of entity types to keep. Default is ['PERSON','ORG'].
1377
1467
  explode : bool, optional
1378
1468
  Flag indicating whether to explode the DataFrame to have one entity per row. Default is True.
1469
+ batch_size : int, optional
1470
+ Batch sizes
1471
+ n_process : int, optional
1472
+ Number of processes
1379
1473
 
1380
1474
  Returns:
1381
1475
  pd.DataFrame
@@ -1385,43 +1479,40 @@ def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['P
1385
1479
  This function performs Named Entity Recognition (NER) using spaCy on a DataFrame with text data. It extracts entities of the specified types
1386
1480
  and stores the NER information in separate columns. If 'explode' is set to True, it returns one entity per row in the DataFrame.
1387
1481
  """
1388
- # Create columns to store the NER information
1389
- df['NER_type'] = None
1390
- df['NER_text'] = None
1391
- df['NER_start_char'] = None
1392
- df['NER_end_char'] = None
1393
-
1394
- # Function to process each row in the DataFrame
1395
- def process_row(row):
1396
- doc = nlp(row[col_text])
1397
- entities_data = []
1482
+ l_text = df[col_text].tolist()
1483
+ all_records = []
1484
+ for doc in tqdm(nlp.pipe(l_text, batch_size=batch_size, n_process=n_process), total= len(l_text), desc = "NLP Process"):
1485
+ NER_type = []
1486
+ NER_text = []
1487
+ NER_start_char = []
1488
+ NER_end_char=[]
1489
+ # entities_data = []
1398
1490
 
1399
1491
  if len(entities_to_keep)>0:
1400
1492
  for ent in doc.ents:
1401
1493
  if ent.label_ in entities_to_keep:
1402
- entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
1494
+ NER_type.append(ent.label_)
1495
+ NER_text.append(ent.text)
1496
+ NER_start_char.append(ent.start_char)
1497
+ NER_end_char.append(ent.end_char)
1498
+ # entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
1403
1499
  else:
1404
1500
  for ent in doc.ents:
1405
- entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
1406
-
1407
- if entities_data:
1408
- entity_label, entity_text, start_char, end_char = zip(*entities_data)
1409
- row['NER_type'] = entity_label
1410
- row['NER_text'] = entity_text
1411
- row['NER_start_char'] = start_char
1412
- row['NER_end_char'] = end_char
1501
+ NER_type.append(ent.label_)
1502
+ NER_text.append(ent.text)
1503
+ NER_start_char.append(ent.start_char)
1504
+ NER_end_char.append(ent.end_char)
1505
+ # entities_data.append([ent.label_, ent.text, ent.start_char, ent.end_char])
1506
+ record = (NER_type, NER_text, NER_start_char, NER_end_char)
1507
+ all_records.append(record)
1413
1508
 
1414
- return row
1415
-
1416
- # Apply the processing function to each row
1417
- df = df.apply(process_row, axis=1)
1509
+ df[['NER_type', 'NER_text','NER_start_char','NER_end_char']] = pd.DataFrame(all_records, index=df.index)
1418
1510
 
1419
1511
  if explode:
1420
1512
  df= df.explode(['NER_type', 'NER_text','NER_start_char','NER_end_char'])
1421
1513
 
1422
1514
  return df
1423
1515
 
1424
-
1425
1516
  def tokenize(nlp, df: pd.DataFrame, col_text: str, col_tokens: str, pos_to_keep: list, stopwords: list, batch_size: int = 100, n_process: int = 1, stats: bool = True) -> pd.DataFrame:
1426
1517
  """
1427
1518
  Spacy implementation to tokenize text
@@ -1901,15 +1992,13 @@ def agglomerative_clustering(embeddings, n_clusters=15, metric="euclidean", link
1901
1992
 
1902
1993
 
1903
1994
 
1904
- def hdbscan_clustering(embeddings, algorithm='best', alpha=1.0, cluster_selection_epsilon=0.0, approx_min_span_tree=True,
1905
- gen_min_span_tree=True, leaf_size=40, metric='euclidean', min_cluster_size=5, min_samples=None,
1906
- p=None, cluster_selection_method='eom', prediction_data = True):
1907
-
1995
+ def hdbscan_clustering(embeddings, algorithm='best', alpha=1.0, cluster_selection_epsilon=0.0, approx_min_span_tree=True, gen_min_span_tree=True, leaf_size=40, metric='euclidean', min_cluster_size=5, min_samples=None, p=None, cluster_selection_method='eom', prediction_data = True):
1908
1996
  """
1909
1997
  This function performs clustering using the HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise) algorithm. It clusters the input data based on the specified parameters and returns the clusterer object, cluster labels for each point, and the probability of each sample being an outlier.
1998
+
1910
1999
  Args
1911
2000
  embeddings : array-like or sparse matrix, shape (n_samples, n_features). The input data to be clustered.
1912
- algorithm : {'best', 'generic', 'prims_kdtree', 'boruvka_kdtree', 'boruvka_balltree', 'prims_balltree'}, optional. The algorithm to use for computation. Default is 'best'.
2001
+ algorithm : {'best', 'generic', 'prims_kdtree', 'boruvka_kdtree', 'boruvka_balltree', 'prims_balltree'}, optional. The algorithm to use for computation. Default is best.
1913
2002
  alpha : float, optional. Scaling factor determining the individual weight of the (unnormalized) density estimate. Default is 1.0.
1914
2003
  cluster_selection_epsilon : float, optional. The epsilon value to specify a minimum cluster size. Default is 0.0.
1915
2004
  approx_min_span_tree : bool, optional. Whether to compute an approximation of the minimum spanning tree. Default is True.
@@ -2054,7 +2143,6 @@ def HF_sentiment_classifier(tokenizer, model, text, col_text, filename, dir_json
2054
2143
  proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()[0]
2055
2144
  label = model.config.id2label[proba.argmax()]
2056
2145
  results = {"label":label, "score" : float(proba.max()), col_text : text}
2057
- print(results)
2058
2146
  write_json(results, dir_json , str(filename))
2059
2147
 
2060
2148
  return results
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.1
2
+ Name: opsci-toolbox
3
+ Version: 0.0.12
4
+ Summary: a complete toolbox
5
+ Home-page: UNKNOWN
6
+ Author: Erwan Le Nagard
7
+ Author-email: erwan@opsci.ai
8
+ License: MIT
9
+ Platform: UNKNOWN
10
+ Requires-Dist: requests <3,>=2.31.0
11
+ Requires-Dist: beautifulsoup4 ==4.9.3
12
+ Requires-Dist: chardet >=4.0.0
13
+ Requires-Dist: chart-studio ==1.1.0
14
+ Requires-Dist: eldar ==0.0.8
15
+ Requires-Dist: emoji ==2.10.1
16
+ Requires-Dist: fa2-modified ==0.3.10
17
+ Requires-Dist: google-api-python-client ==2.122.0
18
+ Requires-Dist: gspread ==6.1.2
19
+ Requires-Dist: hdbscan ==0.8.33
20
+ Requires-Dist: jusText ==3.0.0
21
+ Requires-Dist: langchain ==0.1.20
22
+ Requires-Dist: matplotlib >=3.9.0
23
+ Requires-Dist: mysql-connector-python >=9.0.0
24
+ Requires-Dist: networkx ==3.2.1
25
+ Requires-Dist: nltk ==3.8.1
26
+ Requires-Dist: numpy <1.25.0,>=1.21.5
27
+ Requires-Dist: opencv-python-headless ==4.9.0.80
28
+ Requires-Dist: openpyxl ==3.1.3
29
+ Requires-Dist: pandas >=1.5.3
30
+ Requires-Dist: Pillow >=9.0.1
31
+ Requires-Dist: plotly ==5.19.0
32
+ Requires-Dist: protobuf ==4.23.4
33
+ Requires-Dist: pyarrow >=14.0.2
34
+ Requires-Dist: python-louvain ==0.16
35
+ Requires-Dist: scikit-learn ==1.4.1.post1
36
+ Requires-Dist: scipy <2.0.0,>=1.8.0
37
+ Requires-Dist: sentence-transformers ==2.5.1
38
+ Requires-Dist: setuptools ==59.6.0
39
+ Requires-Dist: spacy ==3.7.4
40
+ Requires-Dist: spacy-language-detection ==0.2.1
41
+ Requires-Dist: spacymoji ==3.1.0
42
+ Requires-Dist: supervision ==0.21.0
43
+ Requires-Dist: textacy ==0.13.0
44
+ Requires-Dist: torch ==2.0.1
45
+ Requires-Dist: tqdm >=4.66.2
46
+ Requires-Dist: trafilatura ==1.7.0
47
+ Requires-Dist: transformers ==4.38.2
48
+ Requires-Dist: umap-learn ==0.5.5
49
+ Requires-Dist: urlextract ==1.9.0
50
+ Requires-Dist: wordcloud ==1.9.3
51
+
52
+ UNKNOWN
53
+
@@ -1,14 +1,14 @@
1
1
  opsci_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  opsci_toolbox/apis/rapidapi_helpers.py,sha256=k_hYcRNww5noNkX7zyz5Htggxb15BPoKSlbY7NLuQXI,26696
4
- opsci_toolbox/apis/webscraping.py,sha256=Gz3hOfhOHUpwHU1Pzj3mB2WdBAcKa2WisYBHMi3lcVE,18343
4
+ opsci_toolbox/apis/webscraping.py,sha256=1DAIYbywZoPwTSyoqFGxyF0-q_nUsGg_VK51zLL_bB0,21465
5
5
  opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
6
6
  opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- opsci_toolbox/helpers/common.py,sha256=lemGhNwWIxaMwo-X7UsksUMGLV-IOuX_XwC82a50GD4,44672
7
+ opsci_toolbox/helpers/common.py,sha256=nqg9wzgU5DxVTCxEb5LSw2lUnp0f_hKF_Q-DhpRtu6g,45158
8
8
  opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
9
- opsci_toolbox/helpers/dataviz.py,sha256=IfHByNWAU2rErZMfs3LuwZwJApLN5w320JEbBPuVp6U,115856
9
+ opsci_toolbox/helpers/dataviz.py,sha256=1cIGb-u81cD5iSIkkkrzyrBnfim7fbhm0x_CguHUbf0,202128
10
10
  opsci_toolbox/helpers/dates.py,sha256=Wf7HxaUY62IRrY3XPdRIuoaMbGi3QqWf-vStqbRRY_o,2633
11
- opsci_toolbox/helpers/nlp.py,sha256=r4o7V9tJrj3xt34O_4hN0szbSB4RmveP8qmwCqHOxEY,87988
11
+ opsci_toolbox/helpers/nlp.py,sha256=n7nNEU0cuu7bqXYRRBH4D-xIzpdNwKm0nj-eRYh3aPY,91956
12
12
  opsci_toolbox/helpers/nlp_cuml.py,sha256=XzBfoFMpVIehpRbp60E4wGokpoqJP0lJxs1plOxQqBY,28882
13
13
  opsci_toolbox/helpers/sna.py,sha256=XL1BZ-x83xWRNbGsvh7-m8Mdy6iOrWx8vjgaL2_TSmo,31905
14
14
  opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
@@ -16,7 +16,7 @@ opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUK
16
16
  opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
18
18
  opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
19
- opsci_toolbox-0.0.10.dist-info/METADATA,sha256=DAYpwkedg6Tf4p_JS0ntxq9qUBx9hxWagStKN972RoU,1717
20
- opsci_toolbox-0.0.10.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
21
- opsci_toolbox-0.0.10.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
22
- opsci_toolbox-0.0.10.dist-info/RECORD,,
19
+ opsci_toolbox-0.0.12.dist-info/METADATA,sha256=LosT5jzu7Z0TXIslwVUSvPG6AKMrblGp8A6odUN_N9U,1633
20
+ opsci_toolbox-0.0.12.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
21
+ opsci_toolbox-0.0.12.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
22
+ opsci_toolbox-0.0.12.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,53 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: opsci-toolbox
3
- Version: 0.0.10
4
- Summary: a complete toolbox
5
- Home-page: UNKNOWN
6
- Author: Erwan Le Nagard
7
- Author-email: erwan@opsci.ai
8
- License: MIT
9
- Platform: UNKNOWN
10
- Requires-Dist: Pillow (>=9.0.1)
11
- Requires-Dist: Requests (==2.32.3)
12
- Requires-Dist: beautifulsoup4 (==4.10.0)
13
- Requires-Dist: chardet (>=4.0.0)
14
- Requires-Dist: chart-studio (==1.1.0)
15
- Requires-Dist: eldar (==0.0.8)
16
- Requires-Dist: emoji (==2.10.1)
17
- Requires-Dist: fa2-modified (==0.3.10)
18
- Requires-Dist: google-api-python-client (==2.122.0)
19
- Requires-Dist: gspread (==6.1.2)
20
- Requires-Dist: hdbscan (==0.8.33)
21
- Requires-Dist: jusText (==3.0.0)
22
- Requires-Dist: langchain (==0.1.20)
23
- Requires-Dist: matplotlib (>=3.9.0)
24
- Requires-Dist: mysql-connector-repackaged (==0.3.1)
25
- Requires-Dist: networkx (==3.2.1)
26
- Requires-Dist: nltk (==3.8.1)
27
- Requires-Dist: numpy (<1.25.0,>=1.21.5)
28
- Requires-Dist: opencv-python-headless (==4.9.0.80)
29
- Requires-Dist: openpyxl (==3.1.3)
30
- Requires-Dist: pandas (>=1.5.3)
31
- Requires-Dist: plotly (==5.19.0)
32
- Requires-Dist: protobuf (==4.23.4)
33
- Requires-Dist: pyarrow (>=14.0.2)
34
- Requires-Dist: python-louvain (==0.16)
35
- Requires-Dist: scikit-learn (==1.4.1.post1)
36
- Requires-Dist: scipy (<2.0.0,>=1.8.0)
37
- Requires-Dist: sentence-transformers (==2.5.1)
38
- Requires-Dist: setuptools (==59.6.0)
39
- Requires-Dist: spacy (==3.7.4)
40
- Requires-Dist: spacy-language-detection (==0.2.1)
41
- Requires-Dist: spacymoji (==3.1.0)
42
- Requires-Dist: supervision (==0.21.0)
43
- Requires-Dist: textacy (==0.13.0)
44
- Requires-Dist: torch (==2.0.1)
45
- Requires-Dist: tqdm (==4.66.2)
46
- Requires-Dist: trafilatura (==1.7.0)
47
- Requires-Dist: transformers (==4.38.2)
48
- Requires-Dist: umap-learn (==0.5.5)
49
- Requires-Dist: urlextract (==1.9.0)
50
- Requires-Dist: wordcloud (==1.9.3)
51
-
52
- UNKNOWN
53
-