PyPI - opsci-toolbox - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

opsci-toolbox 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

opsci_toolbox/apis/reddit.py +399 -0
opsci_toolbox/apis/telegram.py +1035 -0
opsci_toolbox/apis/webscraping.py +75 -0
opsci_toolbox/helpers/common.py +176 -4
opsci_toolbox/helpers/dataviz.py +184 -26
opsci_toolbox/helpers/dates.py +46 -0
opsci_toolbox/helpers/gliner.py +88 -0
opsci_toolbox/helpers/nlp.py +256 -8
opsci_toolbox/helpers/nlp_cuml.py +3 -3
opsci_toolbox/helpers/sna.py +1 -0
{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/METADATA +4 -1
opsci_toolbox-0.0.13.dist-info/RECORD +25 -0
opsci_toolbox-0.0.11.dist-info/RECORD +0 -22
{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/WHEEL +0 -0
{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/top_level.txt +0 -0

opsci_toolbox/helpers/gliner.py ADDED Viewed

@@ -0,0 +1,88 @@
+from gliner import GLiNER
+def load_gliner_model(model_name : str, map_location="cpu") -> GLiNER:
+    """
+    Load the GLINER named entity recognition (NER) model.
+    Args:
+        model: The model name to load.
+        map_location: The device to load the model on. Possible values are cpu or cuda .
+    Returns:
+        A list of predicted entities.
+    """
+    model = GLiNER.from_pretrained(model_name, map_location=map_location)
+    return model
+def gliner_predict(model : GLiNER, text : str, labels : list, threshold : float = 0.5) -> list:
+    """
+    Predicts entities using the given model.
+    Args:
+        model: The model used for prediction.
+        text: A list of texts to predict entities from.
+        labels: A list of labels corresponding to the texts.
+        threshold: The threshold value for entity prediction (default: 0.5).
+    Returns:
+        A list of predicted entities.
+    """
+    entities = model.predict_entities(text, labels, threshold=threshold)
+    return entities
+def gliner_batch_predict(model : GLiNER, text : list, labels : list, threshold : float = 0.5) -> list:
+    """
+    Batch inference. Predicts entities using the given model.
+    Args:
+        model: The model used for prediction.
+        text: A list of texts to predict entities from.
+        labels: A list of labels corresponding to the texts.
+        threshold: The threshold value for entity prediction (default: 0.5).
+    Returns:
+        A list of predicted entities.
+    """
+    entities = model.batch_predict_entities(text, labels, threshold=threshold)
+    return entities
+def parse_predictions(predictions : list) -> tuple:
+    """
+    Parse the predictions generated by a GLINER named entity recognition (NER) model for batch processing.
+    Args:
+        predictions (list): A list of dictionaries representing the predictions. Each dictionary contains the following keys:
+            - "start" (int): The starting index of the predicted entity in the input text.
+            - "end" (int): The ending index of the predicted entity in the input text.
+            - "text" (str): The predicted entity text.
+            - "label" (str): The predicted entity label.
+            - "score" (float): The confidence score of the prediction.
+    Returns:
+        tuple: A tuple containing lists of the extracted information from the predictions. The tuple contains the following lists:
+            - starts (list): A list of lists, where each inner list contains the starting indices of the predicted entities.
+            - ends (list): A list of lists, where each inner list contains the ending indices of the predicted entities.
+            - texts (list): A list of lists, where each inner list contains the predicted entity texts.
+            - labels (list): A list of lists, where each inner list contains the predicted entity labels.
+            - scores (list): A list of lists, where each inner list contains the confidence scores of the predictions.
+    """
+    starts, ends, texts, labels, scores = [], [], [], [], []
+    for prediction in predictions:
+        start, end, text, label, score = [], [], [], [], []
+        for item in prediction:
+            start.append(item.get("start"))
+            end.append(item.get("end"))
+            text.append(item.get("text"))
+            label.append(item.get("label"))
+            score.append(item.get("score"))
+        starts.append(start)
+        ends.append(end)
+        texts.append(text)
+        labels.append(label)
+        scores.append(score)
+    return starts, ends, texts, labels, scores

opsci_toolbox/helpers/nlp.py CHANGED Viewed

@@ -25,9 +25,10 @@ import requests
 import json
 from opsci_toolbox.helpers.common import write_json, write_pickle, load_pickle, create_dir, copy_file, write_jsonl
 from textacy.preprocessing.replace import urls
+from textacy.preprocessing.remove import brackets
 from eldar import Query
 import torch
-from transformers import TextClassificationPipeline, AutoModelForSequenceClassification, AutoTokenizer
+from transformers import TextClassificationPipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 from bs4 import BeautifulSoup
@@ -97,6 +98,11 @@ def filter_by_query(df: pd.DataFrame, col_text: str, query: str, ignore_case: bo
     df=df.reset_index(drop=True)
     return df
+def remove_trailing_dots(text):
+    if text.endswith('…'):
+        return text[:-3].strip()
+    return text
 def TM_clean_text(df: pd.DataFrame, col: str, col_clean: str) -> pd.DataFrame:
     """
     Generic cleaning process for topic modeling.
@@ -114,12 +120,19 @@ def TM_clean_text(df: pd.DataFrame, col: str, col_clean: str) -> pd.DataFrame:
             The DataFrame with cleaned text data.
     """
     df[col_clean] = df[col].apply(remove_rt)
-    df[col_clean] = df[col].apply(lambda x : urls(x, repl= ''))
+    df[col_clean] = df[col_clean].apply(remove_emoji)
+    df[col_clean] = df[col_clean].apply(remove_trailing_dots)
+    df[col_clean] = df[col_clean].apply(remove_html_tags)
+    df[col_clean] = df[col_clean].apply(lambda x : brackets(x))
+    df[col_clean] = df[col_clean].apply(lambda x : urls(x, repl= ''))
     df[col_clean] = df.apply(lambda row: " ".join(filter(lambda x: x[0] != "@", row[col_clean].split())), 1)
+    df[col_clean] = df[col_clean].apply(remove_multiple_hashtags)
     df[col_clean] = df[col_clean].apply(remove_extra_spaces)
     # df = df.loc[(df[col_clean] != ""), :]
     return df
 def extract_insta_shortcode(url: str) -> str:
     """
     Extracts the shortcode from an Instagram URL.
@@ -151,6 +164,39 @@ def remove_parentheses_content(text: str) -> str:
     result = re.sub(r'\([^)]*\)', '', text)
     return result
+def remove_hashtags(text: str) -> str:
+    """
+    Removes any hashtag from text.
+    Args:
+        text : str
+            The input text string to clean.
+    Returns:
+        result : str
+            The input text string with hashtags removed.
+    """
+    pattern = r'\B#\w+'
+    result = re.sub(pattern, '', text).strip()
+    return result
+def remove_multiple_hashtags(text: str) -> str:
+    """
+    Removes series of hashtags separated by spaces.
+    Args:
+        text : str
+            The input text string to clean.
+    Returns:
+        result : str
+            The input text string with series of hashtags removed.
+    """
+    pattern = r'(?:\B#\w+\s*){2,}'
+    result = re.sub(pattern, '', text).strip()
+    return result
 def remove_emojis(text: str) -> str:
     """
     Removes emojis and their textual representations from a text string.
@@ -171,6 +217,31 @@ def remove_emojis(text: str) -> str:
     return text_no_emojis
+def remove_emoji(string):
+    emoji_pattern = re.compile(
+        "["
+        u"\U0001F600-\U0001F64F"  # emoticons
+        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+        u"\U0001F680-\U0001F6FF"  # transport & map symbols
+        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+        u"\U00002500-\U00002BEF"  # chinese char
+        u"\U00002702-\U000027B0"
+        u"\U00002702-\U000027B0"
+        u"\U000024C2-\U0001F251"
+        u"\U0001f926-\U0001f937"
+        u"\U00010000-\U0010ffff"
+        u"\u2640-\u2642"
+        u"\u2600-\u2B55"
+        u"\u200d"
+        u"\u23cf"
+        u"\u23e9"
+        u"\u231a"
+        u"\ufe0f"  # dingbats
+        u"\u3030"
+        "]+", flags=re.UNICODE)
+    return emoji_pattern.sub(r'', string)
 def extract_numbers(text: str) -> list:
     """
     Extracts all numeric values from a given text string and returns them as a list of floats.
@@ -421,6 +492,23 @@ def remove_stopwords(lang: str, stopwords: list) -> pd.DataFrame:
     df.to_csv(file_path,  encoding="utf-8", index=False)
     print("File saved -", file_path)
     return df
+def keep_valid_filename_chars(text: str, replace: str = '') -> str:
+    """
+    Replace all characters not typically allowed in filenames with a specified replacement string.
+    Args:
+        text : str
+            The input text string.
+        replace : str, optional
+            The string to replace invalid filename characters with. Default is an empty string.
+    Returns:
+        cleaned_text : str
+            The input text string with invalid filename characters replaced.
+    """
+    return re.sub(r'[.<>:"/\\|?*\x00-\x1F]', replace, text)
 def keep_alphanum_char(text: str, replace: str = '') -> str:
@@ -788,7 +876,95 @@ def top_items_per_category(df: pd.DataFrame, col_lst: str = "hashtags", col_cat:
             )
     return df_count
-def topic_representation(df_processed_data: pd.DataFrame, col_topic: str, col_id: str, col_engagement: str, col_user_id: str, metrics: dict) -> pd.DataFrame:
+def topic_aggregate_chunks(df: pd.DataFrame, col_id: str, col_topic : str, col_chunk_id: str, col_engagement: str, col_user_id: str=None, metrics : dict =dict())-> pd.DataFrame:
+    """
+    Calculate the intermediate agregation of chunks per Post ID and topic
+    Args:
+        df : pandas DataFrame
+            DataFrame containing processed data.
+        col_id : str
+            Name of the column containing unique posts identifiers.
+        col_topic : str
+            Name of the column containing topic labels.
+        col_chunk_id : str
+            Name of the column containing unique sentences identifiers.
+        col_engagement : str
+            Name of the column containing engagement metrics.
+        col_user_id : str
+            Name of the column containing user identifiers.
+        metrics : dict
+            Dictionary containing additional metrics to aggregate.
+    Returns:
+        DataFrame
+            DataFrame containing the agregated posts per topic
+    Description:
+        This function aggregates various metrics for each post and topic, including verbatim counts, engagement sums, average word counts, occurrences of emojis, hashtags, and mentions, as well as unique counts for emojis, hashtags, and mentions. Additionally, it computes the average topic coordinates (x and y) if available. Finally, it calculates percentages for verbatims, engagements, users (if applicable), occurrences of emojis, hashtags, and mentions, and their respective combinations with verbatims.
+    """
+    metrics_dict = dict()
+    # metrics_dict[col_id]=(col_id,'first')
+    metrics_dict[col_chunk_id]=(col_chunk_id,"nunique")
+    metrics_dict[col_engagement]=(col_engagement,'first')
+    if col_user_id:
+        metrics_dict[col_user_id]=(col_user_id,"first")
+    if "sentiment" in df.columns:
+        metrics_dict["sentiment"] = ("sentiment", "mean")
+    if "sentiment_score" in df.columns:
+        metrics_dict["sentiment_score"] = ("sentiment_score", "mean")
+    metrics_dict["tokens_count"] = ("tokens_count", "sum")
+    metrics_dict["lemmas_count"] = ("lemmas_count", "sum")
+    metrics_dict["emojis_count"] = ("emojis_count", "sum")
+    metrics_dict["unique_emojis"] = ("unique_emojis", lambda x: set(emoji for sublist in x for emoji in sublist))
+    metrics_dict["unique_emojis_count"] = ("unique_emojis", len)
+    metrics_dict["hashtags"] = ("hashtags", lambda x: list(hashtag for sublist in x for hashtag in sublist))
+    metrics_dict["hashtags_count"] = ("hashtags_count", "sum")
+    metrics_dict["mentions"] = ("mentions", lambda x: list(mention for sublist in x for mention in sublist))
+    metrics_dict["mentions_count"] = ("mentions_count", "sum")
+    metrics_dict["extracted_urls_from_text"] = ("extracted_urls_from_text", lambda x: list(url for sublist in x for url in sublist))
+    metrics_dict["domain"] = ("domain", lambda x: list(domain for sublist in x for domain in sublist))
+    metrics_dict["len_numbers"] = ("len_numbers", "sum")
+    metrics_dict["interrogation"] = ("interrogation", "sum")
+    metrics_dict["exclamation"] = ("exclamation", "sum")
+    metrics_dict["x"] = ("x", "mean")
+    metrics_dict["y"] = ("y", "mean")
+    metrics_dict.update(metrics)
+    df_gb = df.groupby([col_id, col_topic]).agg(**metrics_dict).reset_index()
+    df_gb[col_topic]=df_gb[col_topic].astype(str)
+    return df_gb
+def sentiment_to_category(sentiment : float, boundaries : list = [-1.0, -0.5, 0.5, 1.0], labels :list = ['negative', 'neutral', 'positive']) -> str:
+    """
+    Assign a sentiment category to a sentiment score.
+    Args:
+        sentiment : float
+            sentiment score
+        boundaries : list
+            list of boundaries for each category
+        labels : list
+            list of labels for each category
+    Returns:
+        str
+            category label
+    Description:
+        This function assigns a sentiment category to a sentiment score based on a list of boundaries and labels. If the sentiment score is outside the boundaries, it is assigned to the last category.
+    """
+    for i in range(len(boundaries) - 1):
+        if boundaries[i] <= sentiment < boundaries[i + 1]:
+            return labels[i]
+    return labels[-1]
+def topic_representation(df: pd.DataFrame, col_topic: str, col_id: str, col_engagement: str, col_user_id: str, metrics: dict) -> pd.DataFrame:
     """
     Calculate the representation of topics in a processed DataFrame.
@@ -822,11 +998,15 @@ def topic_representation(df_processed_data: pd.DataFrame, col_topic: str, col_id
     metrics_dict['engagements']=(col_engagement,'sum')
     if col_user_id:
         metrics_dict["users"]=(col_user_id,"nunique")
+        panel_cols = [col for col in df.columns if col[:6] == 'panel_']
+        if len(panel_cols)>0:
+            for panel_col in panel_cols:
+                metrics_dict[panel_col+'_verbatims'] = (panel_col, "sum")
+                metrics_dict[panel_col+'_users'] = (col_user_id, lambda x : x[df[panel_col]].nunique())
+                metrics_dict[panel_col+'_engagements'] = (col_engagement, lambda x : x[df[panel_col]].sum())
     metrics_dict.update(metrics)
-    print(metrics_dict)
     metrics_dict['avg_word_count']=("tokens_count", lambda x: round(x.mean(),2))
     metrics_dict['verbatims_with_emoji']=("emojis_count", lambda x: (x > 0).sum() )
     metrics_dict['emojis_occurences']=("emojis_count", "sum")
@@ -843,9 +1023,8 @@ def topic_representation(df_processed_data: pd.DataFrame, col_topic: str, col_id
     metrics_dict['topic_x']=("x", "mean")
     metrics_dict['topic_y']=("y", "mean")
     # on produit la représentation des topics finale
-    df_distrib_all = (df_processed_data.groupby(col_topic)
+    df_distrib_all = (df.groupby(col_topic)
                       .agg(**metrics_dict)
                       .sort_values(by="verbatims", ascending=False)
                       .assign(engagement_per_verbatims = lambda x : x["engagements"] / x["verbatims"])
@@ -1042,6 +1221,73 @@ def sample_most_engaging_posts(df: pd.DataFrame, col_topic: str, col_engagement:
 def get_lang_detector(nlp, name):
     return LanguageDetector(seed=42)  # We use the seed 42
+def PRarmy_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str = "lemmatized_text", pos_to_keep: list = ["VERB","NOUN","ADJ", "ADV", "PROPN"], entities_to_keep: list = ['PERSON','ORG', 'LOC'], stopwords: list = [], batch_size: int = 100, n_process: int = 1) -> pd.DataFrame:
+    """
+    Perform natural language processing tasks using spaCy for PR Army project.
+    Its main tasks are lemmatization and named entity recognition (NER).
+    Args:
+        nlp : spacy.Language
+            The spaCy language model.
+        df : pandas.DataFrame
+            The DataFrame containing the text data.
+        col_text : str
+            The name of the column containing the text data.
+        col_lemma : str
+            The name of the column to store the lemmatized text data.
+        pos_to_keep : list
+            A list of part-of-speech tags to keep during lemmatization.
+        entities_to_keep : list
+            A list of NER tags to keep.
+        stopwords : list
+            A list of stopwords to remove during processing.
+        batch_size : int, optional
+            The batch size for spaCy processing. Default is 100.
+        n_process : int, optional
+            The number of processes for parallel processing. Default is 1.
+    Returns:
+        pandas.DataFrame
+            The DataFrame with processed text data.
+    """
+    all_records = []
+    text=list(df[col_text].astype('unicode').values)
+    for doc in tqdm(nlp.pipe(text, batch_size=batch_size, n_process=n_process), total= len(text), desc = "NLP Process"):
+        NER_type = []
+        NER_text = []
+        ### LEMMATIZATION
+        if len(pos_to_keep)>0 and len(stopwords)>0:
+            lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords and tok.pos_ in pos_to_keep]
+        elif len(pos_to_keep)>0 and len(stopwords) < 1:
+            lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.pos_ in pos_to_keep]
+        elif len(pos_to_keep) < 1 and len(stopwords) > 0:
+            lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space) and tok.text.lower() not in stopwords]
+        else :
+            lemmas_list = [str(tok.lemma_).lower() for tok in doc if not (tok.is_punct or tok.is_space)]
+        ### NER
+        if len(entities_to_keep)>0:
+            for ent in doc.ents:
+                if ent.label_ in entities_to_keep:
+                    NER_type.append(ent.label_)
+                    NER_text.append(ent.text)
+        else:
+            for ent in doc.ents:
+                NER_type.append(ent.label_)
+                NER_text.append(ent.text)
+        record = (NER_type, NER_text, ' '.join(map(str, lemmas_list)))
+        all_records.append(record)
+    df[['NER_type', 'NER_text', col_lemma]] = pd.DataFrame(all_records, index=df.index)
+    return df
 def TM_nlp_process(nlp, df: pd.DataFrame, col_text: str, col_lemma: str, pos_to_keep: list, stopwords: list, batch_size: int = 100, n_process: int = 1, stats: bool = True, join_list: bool = False) -> pd.DataFrame:
     """
     Perform natural language processing tasks using spaCy for topic modeling.
@@ -2063,13 +2309,15 @@ def check_gpu():
 def HF_load_model(model_checkpoint):
     tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
     model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
+    config = AutoConfig.from_pretrained(model_checkpoint)
     if torch.cuda.is_available():
         model.cuda()
-    return model, tokenizer
+    return model, tokenizer, config
 def HF_sentiment_classifier(tokenizer, model, text, col_text, filename, dir_json):
     """ Calculate sentiment of a text. `return_type` can be 'label', 'score' or 'proba' """
     file_path= os.path.join(dir_json , str(filename)+'.json')
+    results = {}
     if not os.path.exists(file_path):
         with torch.no_grad():
             inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)

opsci_toolbox/helpers/nlp_cuml.py CHANGED Viewed

@@ -384,7 +384,6 @@ def cuml_word_frequency_per_categorie(gdf: pd.DataFrame, col_text: str, col_cat:
     # Initialize cuML's CountVectorizer
     count_vectorizer = CountVectorizer(analyzer='word', ngram_range=ngram_range, stop_words=stop_words)
-    print(type(gdf[col_text]))
     # Fit and transform the text data
     X_train_count = count_vectorizer.fit_transform(cudf.Series(gdf[col_text]))
     X_names_count = count_vectorizer.get_feature_names()
@@ -402,7 +401,8 @@ def cuml_word_frequency_per_categorie(gdf: pd.DataFrame, col_text: str, col_cat:
             df_count_tmp = df_count_tmp.head(n_words)
         if min_freq:
             df_count_tmp = df_count_tmp[df_count_tmp["freq"] > min_freq]
+        df_count_tmp['word'] = df_count_tmp['word'].astype(str)
         # Concatenate the result to the main DataFrame
         df_count = cudf.concat([df_count, df_count_tmp])
@@ -588,7 +588,7 @@ def cudf_encode_chunked_files(chunk_files_paths: list,
             current_df = cudf_read_parquet(file)
             text_list = current_df[col_text].to_arrow().to_pylist()
             # text vectorization
             embeddings = HF_encoder.embed_documents(text_list)

opsci_toolbox/helpers/sna.py CHANGED Viewed

@@ -421,6 +421,7 @@ def select_top_nodes_by_degrees(G: nx.Graph, degree_type : str = "degree", N : i
     return subgraph
 def scale_size(G, size_attribute, min_node_size = 10, max_node_size = 100):
     """
     Scale the sizes of nodes in a graph based on a specified attribute.

{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: opsci-toolbox
-Version: 0.0.11
+Version: 0.0.13
 Summary: a complete toolbox
 Home-page: UNKNOWN
 Author: Erwan Le Nagard
@@ -48,6 +48,9 @@ Requires-Dist: transformers ==4.38.2
 Requires-Dist: umap-learn ==0.5.5
 Requires-Dist: urlextract ==1.9.0
 Requires-Dist: wordcloud ==1.9.3
+Requires-Dist: Unidecode ==1.3.8
+Requires-Dist: kaleido ==0.2.1
+Requires-Dist: gliner ==0.2.8
 UNKNOWN

opsci_toolbox-0.0.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,25 @@
+opsci_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+opsci_toolbox/apis/rapidapi_helpers.py,sha256=k_hYcRNww5noNkX7zyz5Htggxb15BPoKSlbY7NLuQXI,26696
+opsci_toolbox/apis/reddit.py,sha256=zhK2CY9CkCezNcekQFdv1So3NmHHYxB7-tgMVErHOGI,15763
+opsci_toolbox/apis/telegram.py,sha256=GKDLpZg1fc9D_PGCgi9pfTaW7Jjm_2luQ-2trXTr38A,42208
+opsci_toolbox/apis/webscraping.py,sha256=1DAIYbywZoPwTSyoqFGxyF0-q_nUsGg_VK51zLL_bB0,21465
+opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
+opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+opsci_toolbox/helpers/common.py,sha256=ZGjWIPEpqr-gIYjkfsS97PmCtQWHa_iF8tBbVxrQsOQ,53321
+opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
+opsci_toolbox/helpers/dataviz.py,sha256=U2Kj-xoF1wHvYXUKxLsrSvKnhky9PrPUy61s1WEKp44,208743
+opsci_toolbox/helpers/dates.py,sha256=CxbXSo61GPZ2L37PV0ujvp78vwl0DoBq7t0nkk9qHp8,4751
+opsci_toolbox/helpers/gliner.py,sha256=qLkpuoCDezQyYmg_TE3XYETSpobHods6WBjCLo0Gjqw,3579
+opsci_toolbox/helpers/nlp.py,sha256=I72F32ieofZaCIkjZ9kqpiJLktfRoM7mMhzzxyXDQ3I,99316
+opsci_toolbox/helpers/nlp_cuml.py,sha256=CGyThKNgo6fdFPV-iooPG0oNrzA__Hvv08t_sdEp3BE,28919
+opsci_toolbox/helpers/sna.py,sha256=E5D_1aGDmq_YQYseHxZggEtWQOwbXJJ0GHu3YtZLGtg,31906
+opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
+opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUKvurl1z4,7093
+opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
+opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
+opsci_toolbox-0.0.13.dist-info/METADATA,sha256=G_JhKg5tmYPkRUhAN2Uj9B6orX7x3TKWqIOKU_TjeIA,1727
+opsci_toolbox-0.0.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+opsci_toolbox-0.0.13.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
+opsci_toolbox-0.0.13.dist-info/RECORD,,

opsci_toolbox-0.0.11.dist-info/RECORD DELETED Viewed

@@ -1,22 +0,0 @@
-opsci_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/apis/rapidapi_helpers.py,sha256=k_hYcRNww5noNkX7zyz5Htggxb15BPoKSlbY7NLuQXI,26696
-opsci_toolbox/apis/webscraping.py,sha256=Gz3hOfhOHUpwHU1Pzj3mB2WdBAcKa2WisYBHMi3lcVE,18343
-opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
-opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/helpers/common.py,sha256=nqg9wzgU5DxVTCxEb5LSw2lUnp0f_hKF_Q-DhpRtu6g,45158
-opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
-opsci_toolbox/helpers/dataviz.py,sha256=1cIGb-u81cD5iSIkkkrzyrBnfim7fbhm0x_CguHUbf0,202128
-opsci_toolbox/helpers/dates.py,sha256=Wf7HxaUY62IRrY3XPdRIuoaMbGi3QqWf-vStqbRRY_o,2633
-opsci_toolbox/helpers/nlp.py,sha256=baq4BsSgeLBgToPOU5RTmDA80dFJwH9xf0jppuAVseU,88947
-opsci_toolbox/helpers/nlp_cuml.py,sha256=XzBfoFMpVIehpRbp60E4wGokpoqJP0lJxs1plOxQqBY,28882
-opsci_toolbox/helpers/sna.py,sha256=XL1BZ-x83xWRNbGsvh7-m8Mdy6iOrWx8vjgaL2_TSmo,31905
-opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
-opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUKvurl1z4,7093
-opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
-opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
-opsci_toolbox-0.0.11.dist-info/METADATA,sha256=5h-cfwhi31VKlzrOfdAeZuoKTLB1iyDIA4qqsz-bZGQ,1633
-opsci_toolbox-0.0.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-opsci_toolbox-0.0.11.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
-opsci_toolbox-0.0.11.dist-info/RECORD,,

{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{opsci_toolbox-0.0.11.dist-info → opsci_toolbox-0.0.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

opsci-toolbox 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

opsci-toolbox 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl