PyPI - opsci-toolbox - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl - Mend

opsci-toolbox 0.0.13py3-none-any.whl → 0.0.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

opsci_toolbox/apis/rapidapi_helpers.py +1 -2
opsci_toolbox/apis/reddit.py +342 -334
opsci_toolbox/apis/telegram.py +471 -41
opsci_toolbox/helpers/common.py +3 -1
opsci_toolbox/helpers/dates.py +1 -1
opsci_toolbox/helpers/nlp.py +178 -33
opsci_toolbox/helpers/nlp_cuml.py +47 -2
opsci_toolbox/helpers/sna.py +34 -0
{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/METADATA +2 -2
{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/RECORD +13 -12
opsci_toolbox-0.0.15.dist-info/dependency_links.txt +1 -0
{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/WHEEL +0 -0
{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/top_level.txt +0 -0

opsci_toolbox/helpers/common.py CHANGED Viewed

@@ -383,6 +383,8 @@ def write_json(json_dict: dict, path: str, name: str) -> str:
     return file_path
 def write_dataframe_to_json(df: pd.DataFrame, path: str, name: str, orient: str = 'records') -> str:
     """
     Write a DataFrame to a JSON file.
@@ -603,7 +605,7 @@ def list_files_in_subdirectories(path: str, filetype: str = '*.json') -> list:
     return files
-def copy_file(source_path: str, destination_path: str, new_filename: str = '') -> str:
+def copy_file(source_path: str, destination_path: str, new_filename: str = None) -> str:
     """
     Copy a file from a source path to a destination path.

opsci_toolbox/helpers/dates.py CHANGED Viewed

@@ -58,7 +58,7 @@ def number_of_days(start_date: datetime, end_date: datetime) -> int:
         days_difference (int): The number of days between the start and end dates.
     """
     # Calculate the difference
-    time_difference = start_date - end_date
+    time_difference = end_date -  start_date
     # Extract the number of days from the timedelta object
     days_difference = time_difference.days
     return days_difference

opsci_toolbox/helpers/nlp.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
 from sklearn.manifold import TSNE
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
 from sklearn.cluster import AgglomerativeClustering
 from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
@@ -94,7 +94,7 @@ def filter_by_query(df: pd.DataFrame, col_text: str, query: str, ignore_case: bo
             The filtered DataFrame.
     """
     eldar_query=Query(query, ignore_case = ignore_case, ignore_accent=ignore_accent, match_word=match_word)
-    df[col_text] = df[df[col_text].apply(eldar_query)]
+    df = df[df[col_text].apply(eldar_query)]
     df=df.reset_index(drop=True)
     return df
@@ -126,7 +126,7 @@ def TM_clean_text(df: pd.DataFrame, col: str, col_clean: str) -> pd.DataFrame:
     df[col_clean] = df[col_clean].apply(lambda x : brackets(x))
     df[col_clean] = df[col_clean].apply(lambda x : urls(x, repl= ''))
     df[col_clean] = df.apply(lambda row: " ".join(filter(lambda x: x[0] != "@", row[col_clean].split())), 1)
-    df[col_clean] = df[col_clean].apply(remove_multiple_hashtags)
+    # df[col_clean] = df[col_clean].apply(remove_multiple_hashtags)
     df[col_clean] = df[col_clean].apply(remove_extra_spaces)
     # df = df.loc[(df[col_clean] != ""), :]
     return df
@@ -541,7 +541,7 @@ def substitute_punctuations_with_white_space(text : str) -> str:
     text = re.sub(r"[%s]" % re.escape('!"#$%&\()*+,-./:;<=>?@[\\]^_`{|}~“…”’'), " ", text)
     return text
-def translate_wt_libre(text: str, source: str, target: str, filename: str, dir_json: str, url: str = "http://127.0.0.1:5000/translate") -> dict:
+def translate_wt_libre(text: str, source: str, target: str, filename: str, dir_json: str, url: str = "http://127.0.0.1:5000/translate", format_payload="html") -> dict:
     """
     Translate text using LibreTranslate service.
@@ -558,6 +558,8 @@ def translate_wt_libre(text: str, source: str, target: str, filename: str, dir_j
             The directory to save the translation result JSON file.
         url : str, optional
             The URL of the WT Libre translation service. Default is "http://127.0.0.1:5000/translate".
+        format_payload : str, optional
+            Possible values are html or text.
     Returns:
         json_data : dict
@@ -568,7 +570,7 @@ def translate_wt_libre(text: str, source: str, target: str, filename: str, dir_j
         "q": text,
         "source": source,
         "target": target,
-        "format": "text",
+        "format": format_payload,
         "api_key": ""
     }
@@ -580,7 +582,7 @@ def translate_wt_libre(text: str, source: str, target: str, filename: str, dir_j
         write_json(json_data, dir_json , str(filename))
         return json_data
-def translate_batch(batch_text: list, source: str, target: str, filename: str, dir_json: str, url: str = "http://127.0.0.1:5000/translate") -> list:
+def translate_batch(batch_text: list, source: str, target: str, filename: str, dir_json: str, url: str = "http://127.0.0.1:5000/translate", format_payload="html") -> list:
     """
     Translate a batch of texts using LibreTranslate service.
@@ -597,6 +599,8 @@ def translate_batch(batch_text: list, source: str, target: str, filename: str, d
             The directory to save the translation result JSONL file.
         url : str, optional
             The URL of the WT Libre translation service. Default is "http://127.0.0.1:5000/translate".
+        format_payload : str, optional
+            Possible values are html or text.
     Returns:
         json_results : list of dict
@@ -607,7 +611,7 @@ def translate_batch(batch_text: list, source: str, target: str, filename: str, d
         "q": batch_text,
         "source": source,
         "target": target,
-        "format": "text",
+        "format": format_payload,
         "api_key": ""
     }
@@ -623,7 +627,7 @@ def translate_batch(batch_text: list, source: str, target: str, filename: str, d
         write_jsonl(json_results, dir_json , str(filename))
         return json_results
-def translate(text: str, source: str, target: str, url: str = "http://127.0.0.1:5000/translate") -> str:
+def translate(text: str, source: str, target: str, url: str = "http://127.0.0.1:5000/translate", format_payload="html") -> str:
     """
     Translate text using LibreTranslate service.
@@ -636,6 +640,8 @@ def translate(text: str, source: str, target: str, url: str = "http://127.0.0.1:
             The target language code.
         url : str, optional
             The URL of the translation service. Default is "http://127.0.0.1:5000/translate".
+        format_payload : str, optional
+            Possible values are html or text.
     Returns:
         translatedText : str
@@ -646,7 +652,7 @@ def translate(text: str, source: str, target: str, url: str = "http://127.0.0.1:
         "q": text,
         "source": source,
         "target": target,
-        "format": "text",
+        "format": format_payload,
         "api_key": ""
     }
@@ -905,6 +911,8 @@ def topic_aggregate_chunks(df: pd.DataFrame, col_id: str, col_topic : str, col_c
     """
     metrics_dict = dict()
     # metrics_dict[col_id]=(col_id,'first')
+    # if col_id != col_chunk_id:
+    #     metrics_dict[col_chunk_id]=(col_chunk_id,"nunique")
     metrics_dict[col_chunk_id]=(col_chunk_id,"nunique")
     metrics_dict[col_engagement]=(col_engagement,'first')
@@ -1280,10 +1288,12 @@ def PRarmy_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str = "l
                 NER_type.append(ent.label_)
                 NER_text.append(ent.text)
         record = (NER_type, NER_text, ' '.join(map(str, lemmas_list)))
         all_records.append(record)
     df[['NER_type', 'NER_text', col_lemma]] = pd.DataFrame(all_records, index=df.index)
     return df
@@ -1570,10 +1580,10 @@ def extract_emojis(nlp, df: pd.DataFrame, col_text: str, batch_size: int = 100,
     return df
-def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1, batch_size: int = 100, n_process: int = 1, stats: bool = False) -> pd.DataFrame:
+def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1, batch_size: int = 100, n_process: int = 1, stats: bool = False, threshold: int = None) -> pd.DataFrame:
     """
-    Split a text into chunks of n sentences
+    Split a text into chunks of n sentences, returning their start and end indexes in separate columns.
     Parameters:
         nlp : spacy.language.Language
             The spaCy language processing pipeline.
@@ -1589,41 +1599,64 @@ def split_n_sentences(nlp, df: pd.DataFrame, col_text: str, n_sentences: int = 1
             The number of processes to use for text processing. Default is 1.
         stats : bool, optional
             Flag indicating whether to compute statistics about the splitting process. Default is False.
+        threshold : int, optional
+            Maximum number of sentence batches to return per text. If None, all batches are returned. Default is None.
     Returns:
         pd.DataFrame
-            DataFrame containing the split sentences.
+            DataFrame containing the split sentences with their start and end indexes in separate columns.
-    Description:
-        This function splits text in a DataFrame into chunks of n sentences. It returns a DataFrame containing the split sentences.
-        Optionally, it can compute statistics such as the count of sentences and batches if the 'stats' parameter is set to True.
     """
+    text = list(df[col_text].astype('unicode').values)
+    count_sentences = []
+    count_batches = []
+    results = []
+    start_indexes = []
+    end_indexes = []
+    for doc in tqdm(nlp.pipe(text, batch_size=batch_size, n_process=n_process), total=len(text), desc="Sentence splitting"):
+        sentences = []
+        # Extract sentences and their positions
+        for sent in doc.sents:
+            sentences.append((sent.text, sent.start_char, sent.end_char))
-    text=list(df[col_text].astype('unicode').values)
-    count_sentences=[]
-    count_batches=[]
-    results=[]
-    for doc in tqdm(nlp.pipe(text, batch_size=batch_size, n_process=n_process), total= len(text), desc = "Sentence splitting"):
-        # Split the text into sentences
-        sentences = [sent.text for sent in doc.sents]
         if stats:
             count_sentences.append(len(sentences))
-        if n_sentences>1:
-            # Split the sentences into batches of size n
+        if n_sentences > 1:
+            # # Split sentences into batches of size n_sentences
             batches = [sentences[i:i + n_sentences] for i in range(0, len(sentences), n_sentences)]
-            concatenate_batches=[" ".join(sublist) for sublist in batches]
+            # Concatenate batches of sentences and adjust spans accordingly
+            concatenate_batches = [" ".join([sub[0] for sub in sublist]) for sublist in batches]
+            concatenate_spans = [(sublist[0][1], sublist[-1][2]) for sublist in batches]
+            if threshold is not None:
+                concatenate_batches = concatenate_batches[:threshold]
+                concatenate_spans = concatenate_spans[:threshold]
             results.append(concatenate_batches)
+            start_indexes.append([span[0] for span in concatenate_spans])
+            end_indexes.append([span[1] for span in concatenate_spans])
             if stats:
                 count_batches.append(len(concatenate_batches))
         else:
-            results.append(sentences)
+            sentences = sentences[:threshold] if threshold is not None else sentences
+            results.append([sub[0] for sub in sentences])
+            start_indexes.append([sub[1] for sub in sentences])
+            end_indexes.append([sub[2] for sub in sentences])
     df['sentences'] = results
-    if stats:
-        df['sentences_count']=count_sentences
-        df['batch_sentences_count']=count_batches
+    df['start_indexes'] = start_indexes
+    df['end_indexes'] = end_indexes
+    df = df.explode(['sentences','start_indexes', 'end_indexes']).reset_index(drop=True)
     return df
@@ -1998,6 +2031,75 @@ def encode_chunked_files(chunk_files_paths: list,
     return new_file_paths
+####################################################################
+# ENCODING FEATURES
+####################################################################
+def encode_labels(data_to_encode: np.ndarray) -> tuple:
+    """
+    Encodes a list of labels using a LabelEncoder.
+    Args:
+    - data_to_encode (List[Union[str, int]]): The list of labels to encode. Labels can be of any hashable type,
+      but strings or integers are typical.
+    Returns:
+    - Tuple[LabelEncoder, np.ndarray]: A tuple containing the fitted LabelEncoder instance and a numpy array
+      of encoded labels.
+    """
+    label_encoder = LabelEncoder()
+    label_encoder.fit(data_to_encode)
+    encoded_labels = label_encoder.transform(data_to_encode)
+    return label_encoder, encoded_labels
+def encode_new_labels(label_encoder : LabelEncoder, data_to_encode : np.ndarray) -> np.ndarray:
+    """
+    Encodes a list of new labels using an already fitted LabelEncoder.
+    Args:
+    - label_encoder (LabelEncoder): A pre-fitted LabelEncoder instance.
+    - data_to_encode (List[Union[str, int]]): The list of new labels to encode using the pre-fitted encoder.
+    Returns:
+    - np.ndarray: A numpy array of encoded labels.
+    """
+    encoded_labels = label_encoder.transform(data_to_encode)
+    return encoded_labels
+def one_hot_encode(data_to_encode:np.ndarray) -> tuple:
+    """
+    One-hot encodes a list of categorical values using OneHotEncoder.
+    Args:
+    - data_to_encode (List[Union[str, int]]): The list of categorical values to encode. The values can be of
+      any hashable type, typically strings or integers.
+    Returns:
+    - Tuple[OneHotEncoder, np.ndarray]: A tuple containing the fitted OneHotEncoder instance and a numpy array
+      of one-hot encoded values.
+    """
+    one_hot_encoder = OneHotEncoder(sparse=False)
+    data_to_encode_reshaped = np.array(data_to_encode).reshape(-1, 1)  # Reshape for OneHotEncoder
+    one_hot_encoder.fit(data_to_encode_reshaped)
+    encoded_array = one_hot_encoder.transform(data_to_encode_reshaped)
+    return one_hot_encoder, encoded_array
+def one_hot_encode_new_data(one_hot_encoder: OneHotEncoder, data_to_encode: np.ndarray) -> np.ndarray:
+    """
+    One-hot encodes a list of new categorical values using an already fitted OneHotEncoder.
+    Args:
+    - one_hot_encoder (OneHotEncoder): A pre-fitted OneHotEncoder instance.
+    - data_to_encode (List[Union[str, int]]): The list of new categorical values to encode using the pre-fitted encoder.
+    Returns:
+    - np.ndarray: A numpy array of one-hot encoded values.
+    """
+    data_to_encode_reshaped = np.array(data_to_encode).reshape(-1, 1)  # Reshape for OneHotEncoder
+    encoded_array = one_hot_encoder.transform(data_to_encode_reshaped)
+    return encoded_array
 ####################################################################
 # SCALING FEATURES
@@ -2327,3 +2429,46 @@ def HF_sentiment_classifier(tokenizer, model, text, col_text, filename, dir_json
             write_json(results, dir_json , str(filename))
     return results
+def add_tag_libretranslate_not_translate(text):
+    """
+    This function add fake html tag around words such as mentions, hashtags, urls and emojis to avoid translation of those tokens.
+    Args:
+    text (str): The text to process
+    Returns:
+    str: The text with the fake html tags
+    """
+    # This regex finds words starting with # and followed by alphanumeric characters or underscores
+    mention_pattern = r"(?:RT\s|QT\s)?(?<=^|(?<=[^a-zA-Z0-9-_\.]))(@[A-Za-z0-9_]{4,15})"
+    hashtag_pattern = r"(\B#\w+)"
+    url_pattern = r"(https?://[^ ]+)"
+    emoji_pattern = r':[a-zA-Z_]+:'
+    pattern = re.compile(emoji_pattern+ "|" + mention_pattern + "|" + hashtag_pattern + "|" + url_pattern)
+    # This function replaces the hashtag with an HTML link tag
+    def replace_with_link(match):
+        matcher_group = match.group(0)
+        return f'<a href="{matcher_group}"></a>'
+    # Use re.sub to substitute the hashtags with the HTML link tags
+    text_no_emojis = emoji.demojize(text)
+    result = re.sub(pattern, replace_with_link, text_no_emojis)
+    return result
+def clean_libre_translate_tags(text):
+    """
+    This function remove fake tags added by add_tag_libretranslate_not_translate() function.
+    Args:
+    text (str): The text to process
+    Returns:
+    str: The text with the fake html tags
+    """
+    cleaned_string = text.replace('<a href="', '').replace('"></a>', '')
+    return cleaned_string

opsci_toolbox/helpers/nlp_cuml.py CHANGED Viewed

@@ -18,7 +18,8 @@ def reduce_with_cuml_UMAP(embeddings: np.ndarray,
                           metric: str = "cosine",
                           spread: float = 1.0,
                           learning_rate: float = 1.0,
-                          n_epochs:int = 300
+                          n_epochs:int = 300,
+                          random_state:int = None
                            ) -> tuple:
     """
     Reduces the dimensionality of embeddings using UMAP with cuML library.
@@ -41,7 +42,48 @@ def reduce_with_cuml_UMAP(embeddings: np.ndarray,
                    metric=metric,
                    spread = spread,
                    n_epochs=n_epochs,
-                   learning_rate=learning_rate).fit(embeddings)
+                   learning_rate=learning_rate,
+                   random_state=random_state).fit(embeddings)
+    reduced_embeddings = reducer.transform(embeddings)
+    return reducer, reduced_embeddings
+def supervised_reduce_with_cuml_UMAP(embeddings: np.ndarray,
+                          n_neighbors: int = 5,
+                          n_components: int = 3,
+                          min_dist: float = 0.0,
+                          metric: str = "cosine",
+                          spread: float = 1.0,
+                          learning_rate: float = 1.0,
+                          n_epochs:int = 300,
+                          y: np.ndarray = None,
+                          convert_dtype: bool = False,
+                          random_state:int=None
+                           ) -> tuple:
+    """
+    Reduces the dimensionality of embeddings using UMAP with cuML library.
+    Args:
+        embeddings (np.ndarray): The input embeddings to be reduced.
+        n_neighbors (int, optional): The number of nearest neighbors to consider. Defaults to 5.
+        n_components (int, optional): The number of dimensions of the embedded space. Defaults to 3.
+        min_dist (float, optional): The minimum distance between embedded points. Defaults to 0.0.
+        metric (str, optional): The metric to use for distance computation. Defaults to "cosine".
+        spread (float, optional): The effective scale of embedded points. Defaults to 1.0.
+    Returns:
+        reducer (UMAP): The UMAP reducer object.
+        reduced_embeddings (np.ndarray): The reduced embeddings.
+    """
+    reducer = UMAP(n_neighbors=n_neighbors,
+                   n_components=n_components,
+                   min_dist=min_dist,
+                   metric=metric,
+                   spread = spread,
+                   n_epochs=n_epochs,
+                   learning_rate=learning_rate,
+                   random_state=random_state).fit(X = embeddings, y = y, convert_dtype = convert_dtype)
     reduced_embeddings = reducer.transform(embeddings)
     return reducer, reduced_embeddings
@@ -409,6 +451,9 @@ def cuml_word_frequency_per_categorie(gdf: pd.DataFrame, col_text: str, col_cat:
     # Convert the result back to pandas DataFrame
     return df_count.to_pandas()
 # def cuml_chi2_per_category(lst_text: list, lst_categorie: list, col_cat: str, n_words: int = 10, p_value_limit: float = 0.95, min_freq: int = 3) -> pd.DataFrame:
 #     # Convert input lists to cuDF Series

opsci_toolbox/helpers/sna.py CHANGED Viewed

@@ -11,6 +11,40 @@ from collections import Counter
 from opsci_toolbox.helpers.dataviz import boxplot
 from fa2_modified import ForceAtlas2
+def create_subgraph_min_metric(G: nx.Graph, metric: str = "degree", min_value: float = 2) -> nx.Graph:
+    """
+    Creates a subgraph containing only the nodes that have at least the specified minimum value for a given metric.
+    Args:
+        G (nx.Graph): The input graph.
+        metric (str, optional): The node metric to filter nodes by (e.g., "degree", "in_degree", "out_degree", "degree_centrality"). Default is "degree".
+        min_value (float, optional): The minimum value required for nodes to be included in the subgraph. Default is 2.
+    Returns:
+        subgraph (nx.Graph): A subgraph containing only the nodes with at least the specified minimum metric value.
+    """
+    if metric == "degree":
+        nodes_with_min_metric = [node for node, value in G.degree() if value >= min_value]
+    elif metric == "in_degree" and G.is_directed():
+        nodes_with_min_metric = [node for node, value in G.in_degree() if value >= min_value]
+    elif metric == "out_degree" and G.is_directed():
+        nodes_with_min_metric = [node for node, value in G.out_degree() if value >= min_value]
+    elif metric == "degree_centrality":
+        centrality = nx.degree_centrality(G)
+        nodes_with_min_metric = [node for node, value in centrality.items() if value >= min_value]
+    elif metric == "betweenness_centrality":
+        centrality = nx.betweenness_centrality(G)
+        nodes_with_min_metric = [node for node, value in centrality.items() if value >= min_value]
+    elif metric == "closeness_centrality":
+        centrality = nx.closeness_centrality(G)
+        nodes_with_min_metric = [node for node, value in centrality.items() if value >= min_value]
+    else:
+        raise ValueError(f"Unsupported metric: {metric}")
+    subgraph = G.subgraph(nodes_with_min_metric).copy()
+    return subgraph
 def group_nodes_by_values(dictionnary : dict) -> dict:
     """
     Group nodes by their values from a dictionary.

{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: opsci-toolbox
-Version: 0.0.13
+Version: 0.0.15
 Summary: a complete toolbox
 Home-page: UNKNOWN
 Author: Erwan Le Nagard
@@ -41,7 +41,7 @@ Requires-Dist: spacy-language-detection ==0.2.1
 Requires-Dist: spacymoji ==3.1.0
 Requires-Dist: supervision ==0.21.0
 Requires-Dist: textacy ==0.13.0
-Requires-Dist: torch ==2.0.1
+Requires-Dist: torch >=2.4.0
 Requires-Dist: tqdm >=4.66.2
 Requires-Dist: trafilatura ==1.7.0
 Requires-Dist: transformers ==4.38.2

{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,26 @@
 opsci_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/apis/rapidapi_helpers.py,sha256=k_hYcRNww5noNkX7zyz5Htggxb15BPoKSlbY7NLuQXI,26696
-opsci_toolbox/apis/reddit.py,sha256=zhK2CY9CkCezNcekQFdv1So3NmHHYxB7-tgMVErHOGI,15763
-opsci_toolbox/apis/telegram.py,sha256=GKDLpZg1fc9D_PGCgi9pfTaW7Jjm_2luQ-2trXTr38A,42208
+opsci_toolbox/apis/rapidapi_helpers.py,sha256=plX0uoGXWBEmeRqK7QfB_CVYJnW15kVUWtitESxPLNw,26669
+opsci_toolbox/apis/reddit.py,sha256=b_dJFZ_bOB9LLugGBBw5bCbUZdq8VnwtVCGaTYljIIg,21096
+opsci_toolbox/apis/telegram.py,sha256=JjmAk6tKvpnFIYpZDKthxS_mgqhWQpDPUOvyC7SiWPA,60920
 opsci_toolbox/apis/webscraping.py,sha256=1DAIYbywZoPwTSyoqFGxyF0-q_nUsGg_VK51zLL_bB0,21465
 opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
 opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/helpers/common.py,sha256=ZGjWIPEpqr-gIYjkfsS97PmCtQWHa_iF8tBbVxrQsOQ,53321
+opsci_toolbox/helpers/common.py,sha256=zmi-FbN39Rci_hGEKj2bmkcucrVwnHhMgKU6AAIap3Q,53327
 opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
 opsci_toolbox/helpers/dataviz.py,sha256=U2Kj-xoF1wHvYXUKxLsrSvKnhky9PrPUy61s1WEKp44,208743
-opsci_toolbox/helpers/dates.py,sha256=CxbXSo61GPZ2L37PV0ujvp78vwl0DoBq7t0nkk9qHp8,4751
+opsci_toolbox/helpers/dates.py,sha256=Pq-SKP2n1z0_jzU8NxGSv8CHLH_MOKjP_rNYeny0Tb8,4752
 opsci_toolbox/helpers/gliner.py,sha256=qLkpuoCDezQyYmg_TE3XYETSpobHods6WBjCLo0Gjqw,3579
-opsci_toolbox/helpers/nlp.py,sha256=I72F32ieofZaCIkjZ9kqpiJLktfRoM7mMhzzxyXDQ3I,99316
-opsci_toolbox/helpers/nlp_cuml.py,sha256=CGyThKNgo6fdFPV-iooPG0oNrzA__Hvv08t_sdEp3BE,28919
-opsci_toolbox/helpers/sna.py,sha256=E5D_1aGDmq_YQYseHxZggEtWQOwbXJJ0GHu3YtZLGtg,31906
+opsci_toolbox/helpers/nlp.py,sha256=TXf1_dvmfDY9tR0gjQ1C-KzPRib7t74_ZcvmcYZWcPs,105096
+opsci_toolbox/helpers/nlp_cuml.py,sha256=KfgC0hMqLCKoOME2DOu3Wje4ormV19fEB8Fyq8G7D-E,30901
+opsci_toolbox/helpers/sna.py,sha256=3qx1WBQwLKpZNGR0bLSMB2-LBRx-vtNHp8puzoj-84A,33730
 opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
 opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUKvurl1z4,7093
 opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
 opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
-opsci_toolbox-0.0.13.dist-info/METADATA,sha256=G_JhKg5tmYPkRUhAN2Uj9B6orX7x3TKWqIOKU_TjeIA,1727
-opsci_toolbox-0.0.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-opsci_toolbox-0.0.13.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
-opsci_toolbox-0.0.13.dist-info/RECORD,,
+opsci_toolbox-0.0.15.dist-info/METADATA,sha256=ppE13xf4E90LfW9Eir5U30xOI91F96wQqAam7kZwV1o,1727
+opsci_toolbox-0.0.15.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+opsci_toolbox-0.0.15.dist-info/dependency_links.txt,sha256=bEiJsgyh9M0F_pGpJBwUYDefiTNq9F6QEGfQS5RH1Os,39
+opsci_toolbox-0.0.15.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
+opsci_toolbox-0.0.15.dist-info/RECORD,,

opsci_toolbox-0.0.15.dist-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ https://download.pytorch.org/whl/cu124

{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/WHEEL RENAMED Viewed

File without changes

{opsci_toolbox-0.0.13.dist-info → opsci_toolbox-0.0.15.dist-info}/top_level.txt RENAMED Viewed

File without changes

opsci-toolbox 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

opsci-toolbox 0.0.13py3-none-any.whl → 0.0.15py3-none-any.whl