opsci-toolbox 0.0.15__py3-none-any.whl → 0.0.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opsci_toolbox/apis/webscraping.py +8 -3
- opsci_toolbox/helpers/common.py +27 -4
- opsci_toolbox/helpers/dataviz.py +110 -3
- opsci_toolbox/helpers/nlp.py +85 -11
- opsci_toolbox/helpers/nlp_cuml.py +7 -4
- opsci_toolbox/helpers/sna.py +1 -1
- {opsci_toolbox-0.0.15.dist-info → opsci_toolbox-0.0.17.dist-info}/METADATA +1 -1
- {opsci_toolbox-0.0.15.dist-info → opsci_toolbox-0.0.17.dist-info}/RECORD +11 -11
- {opsci_toolbox-0.0.15.dist-info → opsci_toolbox-0.0.17.dist-info}/WHEEL +0 -0
- {opsci_toolbox-0.0.15.dist-info → opsci_toolbox-0.0.17.dist-info}/dependency_links.txt +0 -0
- {opsci_toolbox-0.0.15.dist-info → opsci_toolbox-0.0.17.dist-info}/top_level.txt +0 -0
@@ -97,9 +97,14 @@ def url_get_domain(url: str) -> str:
|
|
97
97
|
Returns:
|
98
98
|
str: The domain name extracted from the URL.
|
99
99
|
"""
|
100
|
-
|
101
|
-
|
102
|
-
|
100
|
+
try:
|
101
|
+
parsed_url = urlparse(url)
|
102
|
+
domain = parsed_url.hostname if parsed_url.hostname else parsed_url.netloc
|
103
|
+
return domain
|
104
|
+
except Exception as e:
|
105
|
+
pass
|
106
|
+
print(url, e)
|
107
|
+
return url
|
103
108
|
|
104
109
|
|
105
110
|
def url_get_extension(url: str) -> str:
|
opsci_toolbox/helpers/common.py
CHANGED
@@ -1502,6 +1502,28 @@ def custom_ordering(df : pd.DataFrame, col_to_order : str, custom_order : list)
|
|
1502
1502
|
df[col_to_order] = pd.Categorical(df[col_to_order], categories=custom_order, ordered=True).to_numpy()
|
1503
1503
|
return df
|
1504
1504
|
|
1505
|
+
# def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict) -> pd.DataFrame:
|
1506
|
+
# """
|
1507
|
+
# Calculates the total and percentage values for the given metrics based on a grouping column.
|
1508
|
+
# Args:
|
1509
|
+
# df (DataFrame): The input DataFrame.
|
1510
|
+
# col_gb (list): Names of the columns to group by.
|
1511
|
+
# metrics (dict): A dictionary of metrics to calculate.
|
1512
|
+
# Returns:
|
1513
|
+
# DataFrame: The modified DataFrame with total and percentage values added.
|
1514
|
+
|
1515
|
+
# """
|
1516
|
+
# percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
|
1517
|
+
|
1518
|
+
# df = (df.join(df.groupby(col_gb)
|
1519
|
+
# .agg(metrics)
|
1520
|
+
# .add_prefix("total_"), on=col_gb
|
1521
|
+
# )
|
1522
|
+
# .assign(**percentage_agregations).fillna(0)
|
1523
|
+
# )
|
1524
|
+
|
1525
|
+
# return df
|
1526
|
+
|
1505
1527
|
def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict) -> pd.DataFrame:
|
1506
1528
|
"""
|
1507
1529
|
Calculates the total and percentage values for the given metrics based on a grouping column.
|
@@ -1513,14 +1535,15 @@ def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict
|
|
1513
1535
|
DataFrame: The modified DataFrame with total and percentage values added.
|
1514
1536
|
|
1515
1537
|
"""
|
1516
|
-
percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
|
1538
|
+
# percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
|
1517
1539
|
|
1518
1540
|
df = (df.join(df.groupby(col_gb)
|
1519
1541
|
.agg(metrics)
|
1520
1542
|
.add_prefix("total_"), on=col_gb
|
1521
1543
|
)
|
1522
|
-
.assign(**percentage_agregations).fillna(0)
|
1523
1544
|
)
|
1524
|
-
|
1525
|
-
|
1545
|
+
for key in list(metrics.keys()):
|
1546
|
+
df['per_' + key] = df[key] / df['total_' + key]
|
1547
|
+
df['per_' + key] = df['per_' + key].fillna(0)
|
1526
1548
|
|
1549
|
+
return df
|
opsci_toolbox/helpers/dataviz.py
CHANGED
@@ -2007,6 +2007,115 @@ def horizontal_stacked_bars(df: pd.DataFrame,
|
|
2007
2007
|
|
2008
2008
|
return fig
|
2009
2009
|
|
2010
|
+
def bar_stacked(df: pd.DataFrame,
|
2011
|
+
col_x: str,
|
2012
|
+
col_y: str,
|
2013
|
+
col_cat: str,
|
2014
|
+
col_color: str,
|
2015
|
+
**kwargs) -> go.Figure:
|
2016
|
+
"""
|
2017
|
+
Create horizontal stacked bar plots.
|
2018
|
+
|
2019
|
+
Args:
|
2020
|
+
df (pd.DataFrame): DataFrame containing data for the bar plots.
|
2021
|
+
col_x (str): Name of the column containing x-axis values.
|
2022
|
+
col_y (str): Name of the column containing y-axis values.
|
2023
|
+
col_percentage (str): Name of the column containing percentage values.
|
2024
|
+
col_cat (str): Name of the column containing categories.
|
2025
|
+
col_color (str): Name of the column containing colors.
|
2026
|
+
**kwargs: Additional keyword arguments to update default plotting parameters.
|
2027
|
+
|
2028
|
+
Returns:
|
2029
|
+
go.Figure: Plotly Figure object representing the horizontal stacked bar plots.
|
2030
|
+
"""
|
2031
|
+
params = general_kwargs()
|
2032
|
+
params.update(kwargs)
|
2033
|
+
|
2034
|
+
categories = df[col_cat].unique()
|
2035
|
+
|
2036
|
+
col_hover = params["col_hover"]
|
2037
|
+
|
2038
|
+
fig = go.Figure()
|
2039
|
+
|
2040
|
+
for cat in categories:
|
2041
|
+
current_df = df[df[col_cat] == cat]
|
2042
|
+
hovertemplate= "<b>Catégorie</b> : "+str(cat)+"<br><b>"+str(col_x)+"</b> : "+current_df[col_x].astype(str)+ str(col_y) + "</b> : "+current_df[col_y].astype(str)
|
2043
|
+
|
2044
|
+
for c in col_hover:
|
2045
|
+
hovertemplate += (
|
2046
|
+
"<br><b>"
|
2047
|
+
+ str(c)
|
2048
|
+
+ "</b>:"
|
2049
|
+
+ current_df[c].astype(str).apply(wrap_text)
|
2050
|
+
)
|
2051
|
+
|
2052
|
+
fig.add_trace(
|
2053
|
+
go.Bar(
|
2054
|
+
x=current_df[col_x],
|
2055
|
+
y=current_df[col_y],
|
2056
|
+
orientation=params['orientation'],
|
2057
|
+
text = current_df[col_x],
|
2058
|
+
textposition=params["textposition"],
|
2059
|
+
name=cat,
|
2060
|
+
marker=dict(color=current_df[col_color]),
|
2061
|
+
hovertemplate=hovertemplate+'<extra></extra>',
|
2062
|
+
textangle=params["xaxis_tickangle"],
|
2063
|
+
)
|
2064
|
+
)
|
2065
|
+
|
2066
|
+
fig.update_layout(
|
2067
|
+
barmode='stack',
|
2068
|
+
title_text=params["title_text"],
|
2069
|
+
showlegend=params['showlegend'],
|
2070
|
+
width = params["width"],
|
2071
|
+
height= params["height"],
|
2072
|
+
font_family=params["font_family"],
|
2073
|
+
font_size=params["font_size"],
|
2074
|
+
template=params["template"],
|
2075
|
+
plot_bgcolor=params["plot_bgcolor"], # background color (plot)
|
2076
|
+
paper_bgcolor=params["paper_bgcolor"],
|
2077
|
+
uniformtext_minsize=params["uniformtext_minsize"],
|
2078
|
+
uniformtext_mode=params["uniformtext_mode"],
|
2079
|
+
|
2080
|
+
)
|
2081
|
+
|
2082
|
+
fig.update_yaxes(
|
2083
|
+
# title=params["yaxis_title"],
|
2084
|
+
title_font_size=params["yaxis_title_font_size"],
|
2085
|
+
tickangle=params["yaxis_tickangle"],
|
2086
|
+
tickfont_size=params["yaxis_tickfont_size"],
|
2087
|
+
range=params["yaxis_range"],
|
2088
|
+
showgrid=params["yaxis_showgrid"],
|
2089
|
+
showline=params["yaxis_showline"],
|
2090
|
+
zeroline=params["yaxis_zeroline"],
|
2091
|
+
gridwidth=params["yaxis_gridwidth"],
|
2092
|
+
gridcolor=params["yaxis_gridcolor"],
|
2093
|
+
linewidth=params["yaxis_linewidth"],
|
2094
|
+
linecolor=params["yaxis_linecolor"],
|
2095
|
+
mirror=params["yaxis_mirror"],
|
2096
|
+
)
|
2097
|
+
|
2098
|
+
fig.update_xaxes(
|
2099
|
+
# title=params["xaxis_title"],
|
2100
|
+
title_font_size=params["xaxis_title_font_size"],
|
2101
|
+
tickangle=params["xaxis_tickangle"],
|
2102
|
+
tickfont_size=params["xaxis_tickfont_size"],
|
2103
|
+
# range=params["xaxis_range"],
|
2104
|
+
showgrid=params["xaxis_showgrid"],
|
2105
|
+
showline=params["xaxis_showline"],
|
2106
|
+
zeroline=params["xaxis_zeroline"],
|
2107
|
+
gridwidth=params["xaxis_gridwidth"],
|
2108
|
+
gridcolor=params["xaxis_gridcolor"],
|
2109
|
+
linewidth=params["xaxis_linewidth"],
|
2110
|
+
linecolor=params["xaxis_linecolor"],
|
2111
|
+
mirror=params["xaxis_mirror"]
|
2112
|
+
)
|
2113
|
+
fig.update_xaxes(title_text=params["xaxis_title"])
|
2114
|
+
fig.update_yaxes(title_text=params["yaxis_title"])
|
2115
|
+
fig.update_yaxes(showticklabels = False)
|
2116
|
+
|
2117
|
+
return fig
|
2118
|
+
|
2010
2119
|
def bar_trend_per_cat(df: pd.DataFrame,
|
2011
2120
|
col_x: str,
|
2012
2121
|
col_cat: str,
|
@@ -3597,13 +3706,11 @@ def density_map(df_posts: pd.DataFrame,
|
|
3597
3706
|
show_topics: bool = True,
|
3598
3707
|
show_halo: bool = False,
|
3599
3708
|
show_histogram: bool = True,
|
3600
|
-
|
3601
3709
|
colorscale: str = "Portland",
|
3602
3710
|
marker_color: str = "#ff7f0e",
|
3603
3711
|
arrow_color: str = "#ff7f0e",
|
3604
3712
|
width: int = 1000,
|
3605
3713
|
height: int = 1000,
|
3606
|
-
|
3607
3714
|
label_size_ratio: int = 100,
|
3608
3715
|
n_words: int = 3,
|
3609
3716
|
title_text: str = "Clustering",
|
@@ -3625,7 +3732,7 @@ def density_map(df_posts: pd.DataFrame,
|
|
3625
3732
|
col_engagement (str): Column name corresponding to a metric.
|
3626
3733
|
col_text (str): Column name corresponding to a text separated by |.
|
3627
3734
|
col_text_dots (str): Column name corresponding to the text for dots.
|
3628
|
-
colorscale (str, optional): Possible values are
|
3735
|
+
colorscale (str, optional): Possible values are ``https://plotly.com/python/builtin-colorscales/``. Defaults to "Portland".
|
3629
3736
|
marker_color (str, optional): Dots color value. Defaults to "#ff7f0e".
|
3630
3737
|
arrow_color (str, optional): Arrow pointing to topic centroid color value. Defaults to "#ff7f0e".
|
3631
3738
|
width (int, optional): Width of the plot. Defaults to 1000.
|
opsci_toolbox/helpers/nlp.py
CHANGED
@@ -30,7 +30,7 @@ from eldar import Query
|
|
30
30
|
import torch
|
31
31
|
from transformers import TextClassificationPipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
|
32
32
|
from bs4 import BeautifulSoup
|
33
|
-
|
33
|
+
from nltk.tokenize import PunktSentenceTokenizer
|
34
34
|
|
35
35
|
####################################################################
|
36
36
|
# CLEANING
|
@@ -1660,6 +1660,84 @@ def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1
|
|
1660
1660
|
return df
|
1661
1661
|
|
1662
1662
|
|
1663
|
+
def split_n_sentences_nltk(df: pd.DataFrame, col_text: str, n_sentences: int = 1, threshold: int = None, stats: bool = False) -> pd.DataFrame:
|
1664
|
+
"""
|
1665
|
+
Split a text into chunks of n sentences, returning their start and end indexes in separate columns using NLTK PunktSentenceTokenizer.
|
1666
|
+
|
1667
|
+
Parameters:
|
1668
|
+
df : pd.DataFrame
|
1669
|
+
DataFrame containing the text data to split.
|
1670
|
+
col_text : str
|
1671
|
+
The name of the column containing the text data.
|
1672
|
+
n_sentences : int, optional
|
1673
|
+
The number of sentences to group together. Default is 1.
|
1674
|
+
threshold : int, optional
|
1675
|
+
Maximum number of sentence batches to return per text. If None, all batches are returned. Default is None.
|
1676
|
+
stats : bool, optional
|
1677
|
+
Flag indicating whether to compute statistics about the splitting process. Default is False.
|
1678
|
+
|
1679
|
+
Returns:
|
1680
|
+
pd.DataFrame
|
1681
|
+
DataFrame containing the split sentences with their start and end indexes in separate columns.
|
1682
|
+
|
1683
|
+
"""
|
1684
|
+
tokenizer = PunktSentenceTokenizer()
|
1685
|
+
text = list(df[col_text].astype('unicode').values)
|
1686
|
+
|
1687
|
+
count_sentences = []
|
1688
|
+
count_batches = []
|
1689
|
+
results = []
|
1690
|
+
start_indexes = []
|
1691
|
+
end_indexes = []
|
1692
|
+
|
1693
|
+
for doc in tqdm(text, total=len(text), desc="Sentence splitting"):
|
1694
|
+
sentences = []
|
1695
|
+
start_pos = 0
|
1696
|
+
|
1697
|
+
# Tokenize sentences and compute positions
|
1698
|
+
for sent in tokenizer.tokenize(doc):
|
1699
|
+
start_idx = doc.find(sent, start_pos)
|
1700
|
+
end_idx = start_idx + len(sent)
|
1701
|
+
sentences.append((sent, start_idx, end_idx))
|
1702
|
+
start_pos = end_idx
|
1703
|
+
|
1704
|
+
if stats:
|
1705
|
+
count_sentences.append(len(sentences))
|
1706
|
+
|
1707
|
+
if n_sentences > 1:
|
1708
|
+
# Split sentences into batches of size n_sentences
|
1709
|
+
batches = [sentences[i:i + n_sentences] for i in range(0, len(sentences), n_sentences)]
|
1710
|
+
|
1711
|
+
# Concatenate batches of sentences and adjust spans accordingly
|
1712
|
+
concatenate_batches = [" ".join([sub[0] for sub in sublist]) for sublist in batches]
|
1713
|
+
concatenate_spans = [(sublist[0][1], sublist[-1][2]) for sublist in batches]
|
1714
|
+
|
1715
|
+
if threshold is not None:
|
1716
|
+
concatenate_batches = concatenate_batches[:threshold]
|
1717
|
+
concatenate_spans = concatenate_spans[:threshold]
|
1718
|
+
|
1719
|
+
results.append(concatenate_batches)
|
1720
|
+
start_indexes.append([span[0] for span in concatenate_spans])
|
1721
|
+
end_indexes.append([span[1] for span in concatenate_spans])
|
1722
|
+
|
1723
|
+
if stats:
|
1724
|
+
count_batches.append(len(concatenate_batches))
|
1725
|
+
else:
|
1726
|
+
sentences = sentences[:threshold] if threshold is not None else sentences
|
1727
|
+
|
1728
|
+
results.append([sub[0] for sub in sentences])
|
1729
|
+
start_indexes.append([sub[1] for sub in sentences])
|
1730
|
+
end_indexes.append([sub[2] for sub in sentences])
|
1731
|
+
|
1732
|
+
df['sentences'] = results
|
1733
|
+
df['start_indexes'] = start_indexes
|
1734
|
+
df['end_indexes'] = end_indexes
|
1735
|
+
|
1736
|
+
df = df.explode(['sentences', 'start_indexes', 'end_indexes']).reset_index(drop=True)
|
1737
|
+
|
1738
|
+
return df
|
1739
|
+
|
1740
|
+
|
1663
1741
|
def spacy_NER(nlp, df: pd.DataFrame, col_text: str, entities_to_keep: list = ['PERSON','ORG'], explode: bool = True, batch_size : int = 100, n_process: int =1) -> pd.DataFrame:
|
1664
1742
|
"""
|
1665
1743
|
Spacy implementation of NER.
|
@@ -1996,9 +2074,9 @@ def encode_chunked_files(chunk_files_paths: list,
|
|
1996
2074
|
Encode text from files and save the results in another pickle file.
|
1997
2075
|
|
1998
2076
|
Parameters:
|
1999
|
-
chunk_files_paths (
|
2077
|
+
chunk_files_paths (list): List of file paths containing documents.
|
2000
2078
|
HF_encoder (Encoder): Encoder object for text vectorization.
|
2001
|
-
cols (
|
2079
|
+
cols (list): Columns to keep in the resulting DataFrame.
|
2002
2080
|
col_text (str): Column containing text data in the DataFrame.
|
2003
2081
|
path_embedded_chunks (str): Path to save the embedded chunks.
|
2004
2082
|
reencode (bool, optional): Whether to re-encode files even if they already exist. Defaults to False.
|
@@ -2040,12 +2118,10 @@ def encode_labels(data_to_encode: np.ndarray) -> tuple:
|
|
2040
2118
|
Encodes a list of labels using a LabelEncoder.
|
2041
2119
|
|
2042
2120
|
Args:
|
2043
|
-
|
2044
|
-
but strings or integers are typical.
|
2121
|
+
data_to_encode (List[Union[str, int]]): The list of labels to encode. Labels can be of any hashable type, but strings or integers are typical.
|
2045
2122
|
|
2046
2123
|
Returns:
|
2047
|
-
|
2048
|
-
of encoded labels.
|
2124
|
+
Tuple[LabelEncoder, np.ndarray]: A tuple containing the fitted LabelEncoder instance and a numpy array of encoded labels.
|
2049
2125
|
"""
|
2050
2126
|
label_encoder = LabelEncoder()
|
2051
2127
|
label_encoder.fit(data_to_encode)
|
@@ -2072,12 +2148,10 @@ def one_hot_encode(data_to_encode:np.ndarray) -> tuple:
|
|
2072
2148
|
One-hot encodes a list of categorical values using OneHotEncoder.
|
2073
2149
|
|
2074
2150
|
Args:
|
2075
|
-
- data_to_encode (List[Union[str, int]]): The list of categorical values to encode. The values can be of
|
2076
|
-
any hashable type, typically strings or integers.
|
2151
|
+
- data_to_encode (List[Union[str, int]]): The list of categorical values to encode. The values can be of any hashable type, typically strings or integers.
|
2077
2152
|
|
2078
2153
|
Returns:
|
2079
|
-
- Tuple[OneHotEncoder, np.ndarray]: A tuple containing the fitted OneHotEncoder instance and a numpy array
|
2080
|
-
of one-hot encoded values.
|
2154
|
+
- Tuple[OneHotEncoder, np.ndarray]: A tuple containing the fitted OneHotEncoder instance and a numpy array of one-hot encoded values.
|
2081
2155
|
"""
|
2082
2156
|
one_hot_encoder = OneHotEncoder(sparse=False)
|
2083
2157
|
data_to_encode_reshaped = np.array(data_to_encode).reshape(-1, 1) # Reshape for OneHotEncoder
|
@@ -258,7 +258,7 @@ def process_UMAP(embedded_chunks_paths: list, path_reduced_embeddings_id: str, r
|
|
258
258
|
new_file_paths=[]
|
259
259
|
for file_path in tqdm(embedded_chunks_paths, total=len(embedded_chunks_paths), desc="UMAP transform from files"):
|
260
260
|
|
261
|
-
filename = os.path.splitext(os.path.basename(file_path))[0]
|
261
|
+
filename = os.path.splitext(os.path.basename(file_path))[0]
|
262
262
|
new_filename = filename+"_reduce_embeddings.parquet"
|
263
263
|
new_file_path = os.path.join(path_reduced_embeddings_id, new_filename)
|
264
264
|
|
@@ -309,7 +309,7 @@ def process_HDBSCAN(clusterer,
|
|
309
309
|
new_file_paths=[]
|
310
310
|
for file_path in tqdm(reduced_embeddings_paths, total=len(reduced_embeddings_paths), desc="HDBSCAN transform from files"):
|
311
311
|
|
312
|
-
filename = os.path.splitext(os.path.basename(file_path))[0]
|
312
|
+
filename = os.path.splitext(os.path.basename(file_path))[0]
|
313
313
|
new_filename = filename+ "_predictions.parquet"
|
314
314
|
new_file_path = os.path.join(path_predictions_dataset_id, new_filename)
|
315
315
|
if not os.path.exists(new_file_path) or reencode:
|
@@ -566,7 +566,7 @@ def cudf_write_parquet(df: cudf.DataFrame, path: str, filename: str) -> str:
|
|
566
566
|
df.to_parquet(file_path)
|
567
567
|
return file_path
|
568
568
|
|
569
|
-
def cudf_read_parquet(path: str) -> cudf.DataFrame:
|
569
|
+
def cudf_read_parquet(path: str, cols : list = None) -> cudf.DataFrame:
|
570
570
|
"""
|
571
571
|
Read a Parquet file into a cuDF DataFrame.
|
572
572
|
|
@@ -576,7 +576,10 @@ def cudf_read_parquet(path: str) -> cudf.DataFrame:
|
|
576
576
|
Returns:
|
577
577
|
cudf.DataFrame: The read cuDF DataFrame.
|
578
578
|
"""
|
579
|
-
|
579
|
+
if cols :
|
580
|
+
df = cudf.read_parquet(path, columns=cols)
|
581
|
+
else :
|
582
|
+
df = cudf.read_parquet(path)
|
580
583
|
return df
|
581
584
|
|
582
585
|
def convert_df_to_cudf(df: pd.DataFrame) -> cudf.DataFrame:
|
opsci_toolbox/helpers/sna.py
CHANGED
@@ -44,7 +44,7 @@ def create_subgraph_min_metric(G: nx.Graph, metric: str = "degree", min_value: f
|
|
44
44
|
|
45
45
|
subgraph = G.subgraph(nodes_with_min_metric).copy()
|
46
46
|
return subgraph
|
47
|
-
|
47
|
+
|
48
48
|
def group_nodes_by_values(dictionnary : dict) -> dict:
|
49
49
|
"""
|
50
50
|
Group nodes by their values from a dictionary.
|
@@ -3,24 +3,24 @@ opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
3
3
|
opsci_toolbox/apis/rapidapi_helpers.py,sha256=plX0uoGXWBEmeRqK7QfB_CVYJnW15kVUWtitESxPLNw,26669
|
4
4
|
opsci_toolbox/apis/reddit.py,sha256=b_dJFZ_bOB9LLugGBBw5bCbUZdq8VnwtVCGaTYljIIg,21096
|
5
5
|
opsci_toolbox/apis/telegram.py,sha256=JjmAk6tKvpnFIYpZDKthxS_mgqhWQpDPUOvyC7SiWPA,60920
|
6
|
-
opsci_toolbox/apis/webscraping.py,sha256=
|
6
|
+
opsci_toolbox/apis/webscraping.py,sha256=fo6H2OaH0m_LHJB9IyN-q0Vkk8L9OvHxNn4O_A6a6yc,21572
|
7
7
|
opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
|
8
8
|
opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
opsci_toolbox/helpers/common.py,sha256=
|
9
|
+
opsci_toolbox/helpers/common.py,sha256=gM0QzLsdjMQTTT522CqzpFO86YWaxPaK48EXemjw9nI,54298
|
10
10
|
opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
|
11
|
-
opsci_toolbox/helpers/dataviz.py,sha256=
|
11
|
+
opsci_toolbox/helpers/dataviz.py,sha256=viIrTrnxFzCRLY5sJDEz3jJtsB-gZTZb2uLoq0yvTlU,212762
|
12
12
|
opsci_toolbox/helpers/dates.py,sha256=Pq-SKP2n1z0_jzU8NxGSv8CHLH_MOKjP_rNYeny0Tb8,4752
|
13
13
|
opsci_toolbox/helpers/gliner.py,sha256=qLkpuoCDezQyYmg_TE3XYETSpobHods6WBjCLo0Gjqw,3579
|
14
|
-
opsci_toolbox/helpers/nlp.py,sha256=
|
15
|
-
opsci_toolbox/helpers/nlp_cuml.py,sha256=
|
16
|
-
opsci_toolbox/helpers/sna.py,sha256=
|
14
|
+
opsci_toolbox/helpers/nlp.py,sha256=MC2ibMi0j9BCysloEPXpvpvRlzlMvRn8krOAcFF-4VU,108286
|
15
|
+
opsci_toolbox/helpers/nlp_cuml.py,sha256=sLvaDfVL0aoGi3mNXUkW47tWVrrYK5wxbf8QPgljQNA,30991
|
16
|
+
opsci_toolbox/helpers/sna.py,sha256=yzBTQXYXow_lKGhlSMz8hYl2JcSlle95YEDht9v-_fY,33734
|
17
17
|
opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
|
18
18
|
opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUKvurl1z4,7093
|
19
19
|
opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
20
|
opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
|
21
21
|
opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
|
22
|
-
opsci_toolbox-0.0.
|
23
|
-
opsci_toolbox-0.0.
|
24
|
-
opsci_toolbox-0.0.
|
25
|
-
opsci_toolbox-0.0.
|
26
|
-
opsci_toolbox-0.0.
|
22
|
+
opsci_toolbox-0.0.17.dist-info/METADATA,sha256=RvPoecg-cflzmh0PcNj9dDZm_RLp5KsK2n-hRTXdEUs,1727
|
23
|
+
opsci_toolbox-0.0.17.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
24
|
+
opsci_toolbox-0.0.17.dist-info/dependency_links.txt,sha256=bEiJsgyh9M0F_pGpJBwUYDefiTNq9F6QEGfQS5RH1Os,39
|
25
|
+
opsci_toolbox-0.0.17.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
|
26
|
+
opsci_toolbox-0.0.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|