PyPI - risk-network - Versions diffs - 0.0.14b2__py3-none-any.whl → 0.0.14b3__py3-none-any.whl - Mend

risk-network 0.0.14b2py3-none-any.whl → 0.0.14b3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

risk/__init__.py CHANGED Viewed

@@ -8,4 +8,4 @@ RISK: Regional Inference of Significant Kinships
 from ._risk import RISK
 __all__ = ["RISK"]
-__version__ = "0.0.14-beta.2"
+__version__ = "0.0.14-beta.3"

risk/_neighborhoods/_domains.py CHANGED Viewed

@@ -54,37 +54,48 @@ def define_domains(
     Raises:
         ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
     """
-    try:
-        if linkage_criterion == "off":
-            raise ValueError("Clustering is turned off.")
+    # Validate args first; let user mistakes raise immediately
+    clustering_off = _validate_clustering_args(
+        linkage_criterion, linkage_method, linkage_metric, linkage_threshold
+    )
+    # If clustering is turned off, assign unique domains and skip
+    if clustering_off:
+        n_rows = len(top_annotation)
+        logger.warning("Clustering is turned off. Skipping clustering.")
+        top_annotation["domain"] = range(1, n_rows + 1)
+    else:
         # Transpose the matrix to cluster annotations
         m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
         # Safeguard the matrix by replacing NaN, Inf, and -Inf values
         m = _safeguard_matrix(m)
-        # Optimize silhouette score across different linkage methods and distance metrics
-        best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
-            m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
-        )
-        # Perform hierarchical clustering
-        Z = linkage(m, method=best_linkage, metric=best_metric)
-        logger.warning(
-            f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
-        )
-        # Calculate the optimal threshold for clustering
-        max_d_optimal = np.max(Z[:, 2]) * best_threshold
-        # Assign domains to the annotation matrix
-        domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
-        top_annotation["domain"] = 0
-        top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
-    except (ValueError, LinAlgError):
-        # If a ValueError is encountered, handle it by assigning unique domains
-        n_rows = len(top_annotation)
-        if linkage_criterion == "off":
-            logger.warning("Clustering is turned off. Skipping clustering.")
-        else:
-            logger.error("Error encountered. Skipping clustering.")
-        top_annotation["domain"] = range(1, n_rows + 1)  # Assign unique domains
+        try:
+            # Optimize silhouette score across different linkage methods and distance metrics
+            (
+                best_linkage,
+                best_metric,
+                best_threshold,
+            ) = _optimize_silhouette_across_linkage_and_metrics(
+                m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
+            )
+            # Perform hierarchical clustering
+            Z = linkage(m, method=best_linkage, metric=best_metric)
+            logger.warning(
+                f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
+            )
+            # Calculate the optimal threshold for clustering
+            max_d_optimal = np.max(Z[:, 2]) * best_threshold
+            # Assign domains to the annotation matrix
+            domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
+            top_annotation["domain"] = 0
+            top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
+        except (LinAlgError, ValueError):
+            # Numerical errors or degenerate input are handled gracefully (not user error)
+            n_rows = len(top_annotation)
+            logger.error(
+                "Clustering failed due to numerical or data degeneracy. Assigning unique domains."
+            )
+            top_annotation["domain"] = range(1, n_rows + 1)
     # Create DataFrames to store domain information
     node_to_significance = pd.DataFrame(
@@ -184,6 +195,46 @@ def trim_domains(
     return valid_domains, valid_trimmed_domains_matrix
+def _validate_clustering_args(
+    linkage_criterion: str,
+    linkage_method: str,
+    linkage_metric: str,
+    linkage_threshold: Union[float, str],
+) -> bool:
+    """
+    Validate user-provided clustering arguments.
+    Returns:
+        bool: True if clustering is turned off (criterion == 'off'); False otherwise.
+    Raises:
+        ValueError: If any argument is invalid (user error).
+    """
+    # Allow opting out of clustering without raising
+    if linkage_criterion == "off":
+        return True
+    # Validate linkage method (allow "auto")
+    if linkage_method != "auto" and linkage_method not in LINKAGE_METHODS:
+        raise ValueError(
+            f"Invalid linkage_method '{linkage_method}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METHODS)}"
+        )
+    # Validate linkage metric (allow "auto")
+    if linkage_metric != "auto" and linkage_metric not in LINKAGE_METRICS:
+        raise ValueError(
+            f"Invalid linkage_metric '{linkage_metric}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METRICS)}"
+        )
+    # Validate linkage threshold (allow "auto"; otherwise must be float in (0, 1])
+    if linkage_threshold != "auto":
+        try:
+            lt = float(linkage_threshold)
+        except (TypeError, ValueError):
+            raise ValueError("linkage_threshold must be 'auto' or a float in the interval (0, 1].")
+        if not (0.0 < lt <= 1.0):
+            raise ValueError(f"linkage_threshold must be within (0, 1]. Received: {lt}")
+    return False
 def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
     """
     Safeguard the matrix by replacing NaN, Inf, and -Inf values.

risk/_neighborhoods/_neighborhoods.py CHANGED Viewed

@@ -394,34 +394,33 @@ def _prune_neighbors(
     # Identify indices with non-zero rows in the binary significance matrix
     non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
     median_distances = []
+    distance_lookup = {}
     for node in non_zero_indices:
-        neighbors = [
-            n
-            for n in network.neighbors(node)
-            if significant_binary_significance_matrix[n].sum() != 0
-        ]
-        if neighbors:
-            median_distance = np.median(
-                [_get_euclidean_distance(node, n, network) for n in neighbors]
-            )
-            median_distances.append(median_distance)
+        dist = _median_distance_to_significant_neighbors(
+            node, network, significant_binary_significance_matrix
+        )
+        if dist is not None:
+            median_distances.append(dist)
+            distance_lookup[node] = dist
+    if not median_distances:
+        logger.warning("No significant neighbors found for pruning.")
+        significant_significance_matrix = np.where(
+            significant_binary_significance_matrix == 1, significance_matrix, 0
+        )
+        return (
+            significance_matrix,
+            significant_binary_significance_matrix,
+            significant_significance_matrix,
+        )
     # Calculate the distance threshold value based on rank
     distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
     # Prune nodes that are outliers based on the distance threshold
-    for row_index in non_zero_indices:
-        neighbors = [
-            n
-            for n in network.neighbors(row_index)
-            if significant_binary_significance_matrix[n].sum() != 0
-        ]
-        if neighbors:
-            median_distance = np.median(
-                [_get_euclidean_distance(row_index, n, network) for n in neighbors]
-            )
-            if median_distance >= distance_threshold_value:
-                significance_matrix[row_index] = 0
-                significant_binary_significance_matrix[row_index] = 0
+    for node, dist in distance_lookup.items():
+        if dist >= distance_threshold_value:
+            significance_matrix[node] = 0
+            significant_binary_significance_matrix[node] = 0
     # Create a matrix where non-significant entries are set to zero
     significant_significance_matrix = np.where(
@@ -435,6 +434,29 @@ def _prune_neighbors(
     )
+def _median_distance_to_significant_neighbors(
+    node, network, significance_mask
+) -> Union[float, None]:
+    """
+    Calculate the median distance from a node to its significant neighbors.
+    Args:
+        node (Any): The node for which the median distance is being calculated.
+        network (nx.Graph): The network graph containing the nodes.
+        significance_mask (np.ndarray): Binary matrix indicating significant nodes.
+    Returns:
+        Union[float, None]: The median distance to significant neighbors, or None if no significant neighbors exist.
+    """
+    neighbors = [n for n in network.neighbors(node) if significance_mask[n].sum() != 0]
+    if not neighbors:
+        return None
+    # Calculate distances to significant neighbors
+    distances = [_get_euclidean_distance(node, n, network) for n in neighbors]
+    return np.median(distances)
 def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
     """
     Calculate the Euclidean distance between two nodes in the network.

risk/_network/_graph/_summary.py CHANGED Viewed

@@ -84,7 +84,7 @@ class Summary:
         Returns:
             pd.DataFrame: Processed DataFrame containing significance scores, p-values, q-values,
-                and annotation member information.
+                and matched annotation members information.
         """
         log_header("Loading analysis summary")
         # Calculate significance and depletion q-values from p-value matrices in annotation
@@ -109,9 +109,9 @@ class Summary:
         # Add minimum p-values and q-values to DataFrame
         results[
             [
-                "Enrichment P-Value",
+                "Enrichment P-value",
                 "Enrichment Q-value",
-                "Depletion P-Value",
+                "Depletion P-value",
                 "Depletion Q-value",
             ]
         ] = results.apply(
@@ -126,13 +126,13 @@ class Summary:
             axis=1,
             result_type="expand",
         )
-        # Add annotation members and their counts
-        results["Annotation Members in Network"] = results["Annotation"].apply(
+        # Add matched annotation members and their counts
+        results["Matched Members"] = results["Annotation"].apply(
             lambda desc: self._get_annotation_members(desc)
         )
-        results["Annotation Members in Network Count"] = results[
-            "Annotation Members in Network"
-        ].apply(lambda x: len(x.split(";")) if x else 0)
+        results["Matched Count"] = results["Matched Members"].apply(
+            lambda x: len(x.split(";")) if x else 0
+        )
         # Reorder columns and drop rows with NaN values
         results = (
@@ -140,12 +140,12 @@ class Summary:
                 [
                     "Domain ID",
                     "Annotation",
-                    "Annotation Members in Network",
-                    "Annotation Members in Network Count",
+                    "Matched Members",
+                    "Matched Count",
                     "Summed Significance Score",
-                    "Enrichment P-Value",
+                    "Enrichment P-value",
                     "Enrichment Q-value",
-                    "Depletion P-Value",
+                    "Depletion P-value",
                     "Depletion Q-value",
                 ]
             ]
@@ -159,20 +159,18 @@ class Summary:
         results = pd.merge(ordered_annotation, results, on="Annotation", how="left").fillna(
             {
                 "Domain ID": -1,
-                "Annotation Members in Network": "",
-                "Annotation Members in Network Count": 0,
+                "Matched Members": "",
+                "Matched Count": 0,
                 "Summed Significance Score": 0.0,
-                "Enrichment P-Value": 1.0,
+                "Enrichment P-value": 1.0,
                 "Enrichment Q-value": 1.0,
-                "Depletion P-Value": 1.0,
+                "Depletion P-value": 1.0,
                 "Depletion Q-value": 1.0,
             }
         )
-        # Convert "Domain ID" and "Annotation Members in Network Count" to integers
+        # Convert "Domain ID" and "Matched Count" to integers
         results["Domain ID"] = results["Domain ID"].astype(int)
-        results["Annotation Members in Network Count"] = results[
-            "Annotation Members in Network Count"
-        ].astype(int)
+        results["Matched Count"] = results["Matched Count"].astype(int)
         return results

{risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: risk-network
-Version: 0.0.14b2
+Version: 0.0.14b3
 Summary: A Python package for scalable network analysis and high-quality visualization.
 Author-email: Ira Horecka <ira89@icloud.com>
 License: GPL-3.0-or-later

{risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-risk/__init__.py,sha256=Kit6ilMaj_3F16dnLJ_Dd7fE5jeZGmNqur97zzK7CRI,143
+risk/__init__.py,sha256=14fTdsWCVA1DS1M7axwUvQzyssu4dRwwhdLdnN-5h1M,143
 risk/_risk.py,sha256=VULCdM41BlWKM1ou4Qc579ffZ9dMZkfhAwKYgbaEeKM,1054
 risk/_annotation/__init__.py,sha256=zr7w1DHkmvrkKFGKdPhrcvZHV-xsfd5TZOaWtFiP4Dc,164
 risk/_annotation/_annotation.py,sha256=03vcnkdi4HGH5UUyokUyOdyyjXOLoKSmLFuK7VAl41c,15174
@@ -10,8 +10,8 @@ risk/_log/_parameters.py,sha256=8FkeeBtULDFVw3UijLArK-G3OIjy6YXyRXmPPckK7fU,5893
 risk/_neighborhoods/__init__.py,sha256=eKwjpEUKSUmAirRZ_qPTVF7MLkvhCn_fulPVq158wM8,185
 risk/_neighborhoods/_api.py,sha256=s1f4d_nEPWc66KDmOUUpRNXzp6dfoevw45ewOg9eMNo,23298
 risk/_neighborhoods/_community.py,sha256=Tr-EHO91EWbMmNr_z21UCngiqWOlWIqcjwBig_VXI8c,17850
-risk/_neighborhoods/_domains.py,sha256=He8G2-E9-yYQB8ChUtMFr51HVlfRj5EaxGu3sGVNUCo,14630
-risk/_neighborhoods/_neighborhoods.py,sha256=9H7BickJx9GdnOo5d5wpdtXkcWyvzq2w6FAy1rwLBtk,20614
+risk/_neighborhoods/_domains.py,sha256=Q3MUWW9KjuERpxs4H1dNFhalDjdatMkWSnB12BerUDU,16580
+risk/_neighborhoods/_neighborhoods.py,sha256=9hpQCYG0d9fZLYj-fVACgLJBtw3dW8C-0YbE2OWuX-M,21436
 risk/_neighborhoods/_stats/__init__.py,sha256=nL83A3unzpCTzRDPanCiqU1RsKPJJNDe46S9igoe3pg,264
 risk/_neighborhoods/_stats/_tests.py,sha256=-ioHdyrsgW63YnypKFpanatauuKrF3LT7aMZ3b6otrU,12091
 risk/_neighborhoods/_stats/_permutation/__init__.py,sha256=nfTaW29CK8OZCdFnpMVlHnFaqr1E4AZp6mvhlUazHXM,140
@@ -23,7 +23,7 @@ risk/_network/_graph/__init__.py,sha256=SFgxgxUiZK4vvw6bdQ04DSMXEr8xjMaQV-Wne6wA
 risk/_network/_graph/_api.py,sha256=sp3_mLJDP_xQexYBjyM17iyzLb2oGmiC050kcw-jVho,8474
 risk/_network/_graph/_graph.py,sha256=x2EWT_ZVwxh7m9a01yG4WMdmAxBxiaxX3CvkqP9QAXE,12486
 risk/_network/_graph/_stats.py,sha256=6mxZkuL6LJlwKDsBbP22DAVkNUEhq-JZwYMKhFKD08k,7359
-risk/_network/_graph/_summary.py,sha256=4eGhCArssePDg4LXr3sg5bUpNn7KFK9oPZcCz5lJKEQ,10334
+risk/_network/_graph/_summary.py,sha256=I8FhMdpawGbvCJHPpsyvbtM7Qa0xXzwKvjnX9N8HSm8,10141
 risk/_network/_plotter/__init__.py,sha256=qFRtQKSBGIqmUGwmA7VPL7hTHBb9yvRIt0nLISXnwkY,84
 risk/_network/_plotter/_api.py,sha256=OaV1CCRGsz98wEEzyEhaq2CqEuZh6t2qS7g_rY6HJJs,1727
 risk/_network/_plotter/_canvas.py,sha256=H7rPz4Gv7ED3bDHMif4cf2usdU4ifmxzXeug5A_no68,13599
@@ -34,8 +34,8 @@ risk/_network/_plotter/_plotter.py,sha256=F2hw-spUdsXjvuG36o0YFR3Pnd-CZOHYUq4vW0
 risk/_network/_plotter/_utils/__init__.py,sha256=JXgjKiBWvXx0X2IeFnrOh5YZQGQoELbhJZ0Zh2mFEOo,211
 risk/_network/_plotter/_utils/_colors.py,sha256=JCliSvz8_-TsjilaRHSEsqdXFBUYlzhXKOSRGdCm9Kw,19177
 risk/_network/_plotter/_utils/_layout.py,sha256=GyGLc2U1WWUVL1Te9uPi_CLqlW_E4TImXRAL5TeA5D8,3633
-risk_network-0.0.14b2.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.14b2.dist-info/METADATA,sha256=8Ymwky3eLiYB9OMO0kVzfF40uvnD3uFCBmY7q6pfitI,6853
-risk_network-0.0.14b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-risk_network-0.0.14b2.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.14b2.dist-info/RECORD,,
+risk_network-0.0.14b3.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.14b3.dist-info/METADATA,sha256=SG8HbB0TBqNd_zgtKV1Ri23RoBIRy_poTAfeN9ZaSBA,6853
+risk_network-0.0.14b3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+risk_network-0.0.14b3.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.14b3.dist-info/RECORD,,

{risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.14b2__py3-none-any.whl → 0.0.14b3__py3-none-any.whl

risk-network 0.0.14b2py3-none-any.whl → 0.0.14b3py3-none-any.whl