PyPI - opsci-toolbox - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.17__py3-none-any.whl - Mend

opsci-toolbox 0.0.16py3-none-any.whl → 0.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

opsci_toolbox/helpers/common.py CHANGED Viewed

@@ -1502,6 +1502,28 @@ def custom_ordering(df : pd.DataFrame, col_to_order : str, custom_order : list)
     df[col_to_order] = pd.Categorical(df[col_to_order], categories=custom_order, ordered=True).to_numpy()
     return df
+# def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict) -> pd.DataFrame:
+#     """
+#     Calculates the total and percentage values for the given metrics based on a grouping column.
+#     Args:
+#         df (DataFrame): The input DataFrame.
+#         col_gb (list):  Names of the columns to group by.
+#         metrics (dict): A dictionary of metrics to calculate.
+#     Returns:
+#         DataFrame: The modified DataFrame with total and percentage values added.
+#     """
+#     percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
+#     df = (df.join(df.groupby(col_gb)
+#                   .agg(metrics)
+#                   .add_prefix("total_"), on=col_gb
+#                   )
+#                 .assign(**percentage_agregations).fillna(0)
+#         )
+#     return df
 def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict) -> pd.DataFrame:
     """
     Calculates the total and percentage values for the given metrics based on a grouping column.
@@ -1513,14 +1535,15 @@ def calcul_total_et_pourcentage(df : pd.DataFrame, col_gb : list, metrics : dict
         DataFrame: The modified DataFrame with total and percentage values added.
     """
-    percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
+    # percentage_agregations = {f'per_{key}': lambda x: x[key] / x[f"total_{key}"] for key in list(metrics.keys())}
     df = (df.join(df.groupby(col_gb)
                   .agg(metrics)
                   .add_prefix("total_"), on=col_gb
                   )
-                .assign(**percentage_agregations).fillna(0)
         )
-    return df
+    for key in list(metrics.keys()):
+        df['per_' + key] = df[key] / df['total_' + key]
+        df['per_' + key] = df['per_' + key].fillna(0)
+    return df

opsci_toolbox/helpers/dataviz.py CHANGED Viewed

@@ -2007,6 +2007,115 @@ def horizontal_stacked_bars(df: pd.DataFrame,
     return fig
+def bar_stacked(df: pd.DataFrame,
+                             col_x: str,
+                             col_y: str,
+                             col_cat: str,
+                             col_color: str,
+                             **kwargs) -> go.Figure:
+    """
+    Create horizontal stacked bar plots.
+    Args:
+        df (pd.DataFrame): DataFrame containing data for the bar plots.
+        col_x (str): Name of the column containing x-axis values.
+        col_y (str): Name of the column containing y-axis values.
+        col_percentage (str): Name of the column containing percentage values.
+        col_cat (str): Name of the column containing categories.
+        col_color (str): Name of the column containing colors.
+        **kwargs: Additional keyword arguments to update default plotting parameters.
+    Returns:
+        go.Figure: Plotly Figure object representing the horizontal stacked bar plots.
+    """
+    params = general_kwargs()
+    params.update(kwargs)
+    categories = df[col_cat].unique()
+    col_hover = params["col_hover"]
+    fig = go.Figure()
+    for cat in categories:
+        current_df = df[df[col_cat] == cat]
+        hovertemplate= "<b>Catégorie</b> : "+str(cat)+"<br><b>"+str(col_x)+"</b> : "+current_df[col_x].astype(str)+ str(col_y) + "</b> : "+current_df[col_y].astype(str)
+        for c in col_hover:
+            hovertemplate += (
+                "<br><b>"
+                + str(c)
+                + "</b>:"
+                + current_df[c].astype(str).apply(wrap_text)
+            )
+        fig.add_trace(
+            go.Bar(
+                x=current_df[col_x],
+                y=current_df[col_y],
+                orientation=params['orientation'],
+                text = current_df[col_x],
+                textposition=params["textposition"],
+                name=cat,
+                marker=dict(color=current_df[col_color]),
+                hovertemplate=hovertemplate+'<extra></extra>',
+                textangle=params["xaxis_tickangle"],
+                )
+        )
+    fig.update_layout(
+            barmode='stack',
+            title_text=params["title_text"],
+            showlegend=params['showlegend'],
+            width = params["width"],
+            height= params["height"],
+            font_family=params["font_family"],
+            font_size=params["font_size"],
+            template=params["template"],
+            plot_bgcolor=params["plot_bgcolor"],  # background color (plot)
+            paper_bgcolor=params["paper_bgcolor"],
+            uniformtext_minsize=params["uniformtext_minsize"],
+            uniformtext_mode=params["uniformtext_mode"],
+        )
+    fig.update_yaxes(
+        # title=params["yaxis_title"],
+        title_font_size=params["yaxis_title_font_size"],
+        tickangle=params["yaxis_tickangle"],
+        tickfont_size=params["yaxis_tickfont_size"],
+        range=params["yaxis_range"],
+        showgrid=params["yaxis_showgrid"],
+        showline=params["yaxis_showline"],
+        zeroline=params["yaxis_zeroline"],
+        gridwidth=params["yaxis_gridwidth"],
+        gridcolor=params["yaxis_gridcolor"],
+        linewidth=params["yaxis_linewidth"],
+        linecolor=params["yaxis_linecolor"],
+        mirror=params["yaxis_mirror"],
+    )
+    fig.update_xaxes(
+        # title=params["xaxis_title"],
+        title_font_size=params["xaxis_title_font_size"],
+        tickangle=params["xaxis_tickangle"],
+        tickfont_size=params["xaxis_tickfont_size"],
+        # range=params["xaxis_range"],
+        showgrid=params["xaxis_showgrid"],
+        showline=params["xaxis_showline"],
+        zeroline=params["xaxis_zeroline"],
+        gridwidth=params["xaxis_gridwidth"],
+        gridcolor=params["xaxis_gridcolor"],
+        linewidth=params["xaxis_linewidth"],
+        linecolor=params["xaxis_linecolor"],
+        mirror=params["xaxis_mirror"]
+    )
+    fig.update_xaxes(title_text=params["xaxis_title"])
+    fig.update_yaxes(title_text=params["yaxis_title"])
+    fig.update_yaxes(showticklabels = False)
+    return fig
 def bar_trend_per_cat(df: pd.DataFrame,
                               col_x: str,
                               col_cat: str,
@@ -3597,13 +3706,11 @@ def density_map(df_posts: pd.DataFrame,
                 show_topics: bool = True,
                 show_halo: bool = False,
                 show_histogram: bool = True,
                 colorscale: str = "Portland",
                 marker_color: str = "#ff7f0e",
                 arrow_color: str = "#ff7f0e",
                 width: int = 1000,
                 height: int = 1000,
                 label_size_ratio: int = 100,
                 n_words: int = 3,
                 title_text: str = "Clustering",
@@ -3625,7 +3732,7 @@ def density_map(df_posts: pd.DataFrame,
         col_engagement (str): Column name corresponding to a metric.
         col_text (str): Column name corresponding to a text separated by |.
         col_text_dots (str): Column name corresponding to the text for dots.
-        colorscale (str, optional): Possible values are 'https://plotly.com/python/builtin-colorscales/'. Defaults to "Portland".
+        colorscale (str, optional): Possible values are ``https://plotly.com/python/builtin-colorscales/``. Defaults to "Portland".
         marker_color (str, optional): Dots color value. Defaults to "#ff7f0e".
         arrow_color (str, optional): Arrow pointing to topic centroid color value. Defaults to "#ff7f0e".
         width (int, optional): Width of the plot. Defaults to 1000.

opsci_toolbox/helpers/nlp.py CHANGED Viewed

@@ -2074,9 +2074,9 @@ def encode_chunked_files(chunk_files_paths: list,
     Encode text from files and save the results in another pickle file.
     Parameters:
-        chunk_files_paths (List[str]): List of file paths containing documents.
+        chunk_files_paths (list): List of file paths containing documents.
         HF_encoder (Encoder): Encoder object for text vectorization.
-        cols (List[str]): Columns to keep in the resulting DataFrame.
+        cols (list): Columns to keep in the resulting DataFrame.
         col_text (str): Column containing text data in the DataFrame.
         path_embedded_chunks (str): Path to save the embedded chunks.
         reencode (bool, optional): Whether to re-encode files even if they already exist. Defaults to False.
@@ -2118,12 +2118,10 @@ def encode_labels(data_to_encode: np.ndarray) -> tuple:
     Encodes a list of labels using a LabelEncoder.
     Args:
-    - data_to_encode (List[Union[str, int]]): The list of labels to encode. Labels can be of any hashable type,
-      but strings or integers are typical.
+        data_to_encode (List[Union[str, int]]): The list of labels to encode. Labels can be of any hashable type, but strings or integers are typical.
     Returns:
-    - Tuple[LabelEncoder, np.ndarray]: A tuple containing the fitted LabelEncoder instance and a numpy array
-      of encoded labels.
+        Tuple[LabelEncoder, np.ndarray]: A tuple containing the fitted LabelEncoder instance and a numpy array of encoded labels.
     """
     label_encoder = LabelEncoder()
     label_encoder.fit(data_to_encode)
@@ -2150,12 +2148,10 @@ def one_hot_encode(data_to_encode:np.ndarray) -> tuple:
     One-hot encodes a list of categorical values using OneHotEncoder.
     Args:
-    - data_to_encode (List[Union[str, int]]): The list of categorical values to encode. The values can be of
-      any hashable type, typically strings or integers.
+    - data_to_encode (List[Union[str, int]]): The list of categorical values to encode. The values can be of any hashable type, typically strings or integers.
     Returns:
-    - Tuple[OneHotEncoder, np.ndarray]: A tuple containing the fitted OneHotEncoder instance and a numpy array
-      of one-hot encoded values.
+    - Tuple[OneHotEncoder, np.ndarray]: A tuple containing the fitted OneHotEncoder instance and a numpy array of one-hot encoded values.
     """
     one_hot_encoder = OneHotEncoder(sparse=False)
     data_to_encode_reshaped = np.array(data_to_encode).reshape(-1, 1)  # Reshape for OneHotEncoder

opsci_toolbox/helpers/nlp_cuml.py CHANGED Viewed

@@ -258,7 +258,7 @@ def process_UMAP(embedded_chunks_paths: list, path_reduced_embeddings_id: str, r
     new_file_paths=[]
     for file_path in tqdm(embedded_chunks_paths, total=len(embedded_chunks_paths), desc="UMAP transform from files"):
-        filename = os.path.splitext(os.path.basename(file_path))[0][:-9]
+        filename = os.path.splitext(os.path.basename(file_path))[0]
         new_filename = filename+"_reduce_embeddings.parquet"
         new_file_path = os.path.join(path_reduced_embeddings_id, new_filename)
@@ -309,7 +309,7 @@ def process_HDBSCAN(clusterer,
     new_file_paths=[]
     for file_path in tqdm(reduced_embeddings_paths, total=len(reduced_embeddings_paths), desc="HDBSCAN transform from files"):
-        filename = os.path.splitext(os.path.basename(file_path))[0][:-18]
+        filename = os.path.splitext(os.path.basename(file_path))[0]
         new_filename = filename+ "_predictions.parquet"
         new_file_path = os.path.join(path_predictions_dataset_id, new_filename)
         if not os.path.exists(new_file_path) or reencode:
@@ -566,7 +566,7 @@ def cudf_write_parquet(df: cudf.DataFrame, path: str, filename: str) -> str:
     df.to_parquet(file_path)
     return file_path
-def cudf_read_parquet(path: str) -> cudf.DataFrame:
+def cudf_read_parquet(path: str, cols : list = None) -> cudf.DataFrame:
     """
     Read a Parquet file into a cuDF DataFrame.
@@ -576,7 +576,10 @@ def cudf_read_parquet(path: str) -> cudf.DataFrame:
     Returns:
         cudf.DataFrame: The read cuDF DataFrame.
     """
-    df = cudf.read_parquet(path)
+    if cols :
+        df = cudf.read_parquet(path, columns=cols)
+    else :
+        df = cudf.read_parquet(path)
     return df
 def convert_df_to_cudf(df: pd.DataFrame) -> cudf.DataFrame:

opsci_toolbox/helpers/sna.py CHANGED Viewed

@@ -44,7 +44,7 @@ def create_subgraph_min_metric(G: nx.Graph, metric: str = "degree", min_value: f
     subgraph = G.subgraph(nodes_with_min_metric).copy()
     return subgraph
 def group_nodes_by_values(dictionnary : dict) -> dict:
     """
     Group nodes by their values from a dictionary.

{opsci_toolbox-0.0.16.dist-info → opsci_toolbox-0.0.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: opsci-toolbox
-Version: 0.0.16
+Version: 0.0.17
 Summary: a complete toolbox
 Home-page: UNKNOWN
 Author: Erwan Le Nagard

{opsci_toolbox-0.0.16.dist-info → opsci_toolbox-0.0.17.dist-info}/RECORD RENAMED Viewed

@@ -6,21 +6,21 @@ opsci_toolbox/apis/telegram.py,sha256=JjmAk6tKvpnFIYpZDKthxS_mgqhWQpDPUOvyC7SiWP
 opsci_toolbox/apis/webscraping.py,sha256=fo6H2OaH0m_LHJB9IyN-q0Vkk8L9OvHxNn4O_A6a6yc,21572
 opsci_toolbox/apis/youtube_helpers.py,sha256=j4hwCS2BEWRJjd9Q5XBN9FeCrL3lqteyz5dqbtfypdo,17418
 opsci_toolbox/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-opsci_toolbox/helpers/common.py,sha256=zmi-FbN39Rci_hGEKj2bmkcucrVwnHhMgKU6AAIap3Q,53327
+opsci_toolbox/helpers/common.py,sha256=gM0QzLsdjMQTTT522CqzpFO86YWaxPaK48EXemjw9nI,54298
 opsci_toolbox/helpers/cv.py,sha256=N3hnLX223UQbdw_YEdUYj10xUXT_95O6BpQt6TbAE08,21092
-opsci_toolbox/helpers/dataviz.py,sha256=U2Kj-xoF1wHvYXUKxLsrSvKnhky9PrPUy61s1WEKp44,208743
+opsci_toolbox/helpers/dataviz.py,sha256=viIrTrnxFzCRLY5sJDEz3jJtsB-gZTZb2uLoq0yvTlU,212762
 opsci_toolbox/helpers/dates.py,sha256=Pq-SKP2n1z0_jzU8NxGSv8CHLH_MOKjP_rNYeny0Tb8,4752
 opsci_toolbox/helpers/gliner.py,sha256=qLkpuoCDezQyYmg_TE3XYETSpobHods6WBjCLo0Gjqw,3579
-opsci_toolbox/helpers/nlp.py,sha256=4edA5JZ4vzpU4U9w-INNspW2oTQ-yYpm5rFXExKB4YI,108324
-opsci_toolbox/helpers/nlp_cuml.py,sha256=KfgC0hMqLCKoOME2DOu3Wje4ormV19fEB8Fyq8G7D-E,30901
-opsci_toolbox/helpers/sna.py,sha256=3qx1WBQwLKpZNGR0bLSMB2-LBRx-vtNHp8puzoj-84A,33730
+opsci_toolbox/helpers/nlp.py,sha256=MC2ibMi0j9BCysloEPXpvpvRlzlMvRn8krOAcFF-4VU,108286
+opsci_toolbox/helpers/nlp_cuml.py,sha256=sLvaDfVL0aoGi3mNXUkW47tWVrrYK5wxbf8QPgljQNA,30991
+opsci_toolbox/helpers/sna.py,sha256=yzBTQXYXow_lKGhlSMz8hYl2JcSlle95YEDht9v-_fY,33734
 opsci_toolbox/helpers/sql.py,sha256=LMrDWcv1QpfE8HyyrqiKuhhkt930lvME3-AKU89LF38,1928
 opsci_toolbox/helpers/surreaction.py,sha256=JjVvHs7Sf9IJxX0QdHpQ_3E8-c_OS6q_bfUKvurl1z4,7093
 opsci_toolbox/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 opsci_toolbox/lexicons/stop_words_en.csv,sha256=4lzjBZHCn_b3lg_CUNkmA_MDQ7DLEpS83k6-dWpkC2o,1957
 opsci_toolbox/lexicons/stop_words_fr.csv,sha256=sPdA8VmyNYbiHg-M8O3tg7ayHvCE3GDg6cF-oSZxICM,6776
-opsci_toolbox-0.0.16.dist-info/METADATA,sha256=-SCFUBnwnWlUrOGgQwxib8ZfCjWxXm3iVVwnfErQ9Fk,1727
-opsci_toolbox-0.0.16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-opsci_toolbox-0.0.16.dist-info/dependency_links.txt,sha256=bEiJsgyh9M0F_pGpJBwUYDefiTNq9F6QEGfQS5RH1Os,39
-opsci_toolbox-0.0.16.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
-opsci_toolbox-0.0.16.dist-info/RECORD,,
+opsci_toolbox-0.0.17.dist-info/METADATA,sha256=RvPoecg-cflzmh0PcNj9dDZm_RLp5KsK2n-hRTXdEUs,1727
+opsci_toolbox-0.0.17.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+opsci_toolbox-0.0.17.dist-info/dependency_links.txt,sha256=bEiJsgyh9M0F_pGpJBwUYDefiTNq9F6QEGfQS5RH1Os,39
+opsci_toolbox-0.0.17.dist-info/top_level.txt,sha256=fUiqxou4FPec_tOfauTLCKAuepeYLfRyhedycWxVnq4,14
+opsci_toolbox-0.0.17.dist-info/RECORD,,

{opsci_toolbox-0.0.16.dist-info → opsci_toolbox-0.0.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{opsci_toolbox-0.0.16.dist-info → opsci_toolbox-0.0.17.dist-info}/dependency_links.txt RENAMED Viewed

File without changes

{opsci_toolbox-0.0.16.dist-info → opsci_toolbox-0.0.17.dist-info}/top_level.txt RENAMED Viewed

File without changes

opsci-toolbox 0.0.16__py3-none-any.whl → 0.0.17__py3-none-any.whl

opsci-toolbox 0.0.16py3-none-any.whl → 0.0.17py3-none-any.whl