PyPI - risk-network - Versions diffs - 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

risk-network 0.0.10py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
 from risk.risk import RISK
-__version__ = "0.0.10"
+__version__ = "0.0.11"

risk/annotations/annotations.py CHANGED Viewed

@@ -3,76 +3,36 @@ risk/annotations/annotations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
-import os
 import re
-import zipfile
 from collections import Counter
 from itertools import compress
 from typing import Any, Dict, List, Set
 import networkx as nx
-import nltk
 import numpy as np
 import pandas as pd
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
 from nltk.tokenize import word_tokenize
+from scipy.sparse import coo_matrix
+from risk.annotations.nltk_setup import setup_nltk_resources
 from risk.log import logger
-from scipy.sparse import coo_matrix
-def ensure_nltk_resource(resource: str) -> None:
-    """Ensure the specified NLTK resource is available."""
-    # Define the path to the resource within the NLTK data directory
-    resource_path = f"corpora/{resource}"
-    # Check if the resource is already available.
-    try:
-        nltk.data.find(resource_path)
-        return
-    except LookupError:
-        print(f"Resource '{resource}' not found. Attempting to download...")
-    # Download the resource.
-    nltk.download(resource)
-    # Check again after downloading.
-    try:
-        nltk.data.find(resource_path)
-        return
-    except LookupError:
-        print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
-    # Look for a ZIP file in all known NLTK data directories.
-    for data_path in nltk.data.path:
-        zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
-        if os.path.isfile(zip_path):
-            print(f"Found ZIP file for '{resource}' at: {zip_path}")
-            target_dir = os.path.join(data_path, "corpora")
-            with zipfile.ZipFile(zip_path, "r") as z:
-                z.extractall(path=target_dir)
-            print(f"Unzipped '{resource}' successfully.")
-            break  # Stop after unzipping the first found ZIP.
-    # Final check: Try to check resource one last time. If it fails, rai
-    try:
-        nltk.data.find(resource_path)
-        print(f"Resource '{resource}' is now available.")
-    except LookupError:
-        raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
-# Ensure the NLTK stopwords and WordNet resources are available
-# punkt is known to have issues with the default download method, so we use a custom function if it fails
-try:
-    ensure_nltk_resource("punkt")
-except LookupError:
-    nltk.download("punkt")
-ensure_nltk_resource("stopwords")
-ensure_nltk_resource("wordnet")
-# Use NLTK's stopwords - load all languages
-STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
-# Initialize the WordNet lemmatizer, which is used for normalizing words
-LEMMATIZER = WordNetLemmatizer()
+def initialize_nltk():
+    """Initialize all required NLTK components."""
+    setup_nltk_resources()
+    # After resources are available, initialize the components
+    from nltk.corpus import stopwords
+    from nltk.stem import WordNetLemmatizer
+    global STOP_WORDS, LEMMATIZER
+    STOP_WORDS = set(stopwords.words("english"))
+    LEMMATIZER = WordNetLemmatizer()
+# Initialize NLTK components
+initialize_nltk()
 def load_annotations(

risk/annotations/nltk_setup.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""
+risk/annotations/nltk_setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+import os
+import zipfile
+from typing import List, Tuple
+import nltk
+from nltk.data import find, path as nltk_data_path
+from risk.log import logger
+def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
+    """Ensures all required NLTK resources are available and properly extracted.
+    Uses NLTK's default paths and mechanisms.
+    Args:
+        required_resources (List[Tuple[str, str]], optional): List of required resources
+            to download and extract. Each tuple should contain the resource path within
+            NLTK data and the package name. Defaults to None.
+    """
+    if required_resources is None:
+        required_resources = [
+            ("tokenizers/punkt", "punkt"),
+            ("tokenizers/punkt_tab", "punkt_tab"),
+            ("corpora/stopwords", "stopwords"),
+            ("corpora/wordnet", "wordnet"),
+        ]
+    # Process each resource
+    for resource_path, package_name in required_resources:
+        try:
+            # First try to find the resource - this is how NLTK checks if it's available
+            find(resource_path)
+        except LookupError:
+            # Resource not found, download it
+            logger.info(f"Downloading missing NLTK resource: {package_name}")
+            nltk.download(package_name, quiet=True)
+        # Even if find() succeeded, the resource might be a zip that failed to extract
+        # Check if we need to manually extract zips
+        verify_and_extract_if_needed(resource_path, package_name)
+def verify_and_extract_if_needed(resource_path: str, package_name: str) -> None:
+    """Verifies if the resource is properly extracted and extracts if needed. Respects
+    NLTK's directory structure where the extracted content should be in the same directory
+    as the zip file.
+    Args:
+        resource_path (str): Path to the resource within NLTK data.
+        package_name (str): Name of the NLTK package.
+    """
+    # Get the directory and base name from the resource path
+    path_parts = resource_path.split("/")
+    resource_type = path_parts[0]  # 'corpora', 'tokenizers', etc.
+    resource_name = path_parts[-1]  # 'wordnet', 'punkt', etc.
+    # Check all NLTK data directories
+    for base in nltk_data_path:
+        # For resource paths like 'corpora/wordnet', the zip file is at '~/nltk_data/corpora/wordnet.zip'
+        # and the extracted directory should be at '~/nltk_data/corpora/wordnet'
+        resource_dir = os.path.join(base, resource_type)
+        zip_path = os.path.join(resource_dir, f"{resource_name}.zip")
+        folder_path = os.path.join(resource_dir, resource_name)
+        # If zip exists but folder doesn't, extraction is needed
+        if os.path.exists(zip_path) and not os.path.exists(folder_path):
+            logger.info(f"Found unextracted zip for {package_name}, extracting...")
+            try:
+                with zipfile.ZipFile(zip_path, "r") as zf:
+                    # Extract files to the same directory where the zip file is located
+                    zf.extractall(path=resource_dir)
+                if os.path.exists(folder_path):
+                    logger.info(f"Successfully extracted {package_name}")
+                else:
+                    logger.warning(
+                        f"Extraction completed but resource directory not found for {package_name}"
+                    )
+            except Exception as e:
+                logger.error(f"Failed to extract {package_name}: {e}")

risk/risk.py CHANGED Viewed

@@ -3,14 +3,13 @@ risk/risk
 ~~~~~~~~~
 """
-from risk.network import NetworkIO
 from risk.annotations import AnnotationsIO
+from risk.log import params, set_global_verbosity
 from risk.neighborhoods import NeighborhoodsAPI
+from risk.network import NetworkIO
 from risk.network.graph import GraphAPI
 from risk.network.plotter import PlotterAPI
-from risk.log import params, set_global_verbosity
 class RISK(NetworkIO, AnnotationsIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
     """RISK: A class for network analysis and visualization.

risk/stats/permutation/test_functions.py CHANGED Viewed

@@ -8,6 +8,7 @@ from scipy.sparse import csr_matrix
 # NOTE: Cython optimizations provided minimal performance benefits.
 # The final version with Cython is archived in the `cython_permutation` branch.
 # DISPATCH_TEST_FUNCTIONS can be found at the end of the file.

{risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: risk-network
-Version: 0.0.10
+Version: 0.0.11
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>
@@ -699,7 +699,7 @@ Requires-Dist: leidenalg
 Requires-Dist: markov_clustering
 Requires-Dist: matplotlib
 Requires-Dist: networkx
-Requires-Dist: nltk==3.8.1
+Requires-Dist: nltk
 Requires-Dist: numpy
 Requires-Dist: openpyxl
 Requires-Dist: pandas

{risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
-risk/__init__.py,sha256=256SpbomSJ88a12A8DmDUsPOxlkwJ7cqbu7E4FHrcw4,120
-risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
+risk/__init__.py,sha256=FJYPkeBx_fYMZxCzecYrubpT9mJP2L2GpAs-kg7rhQY,120
+risk/risk.py,sha256=7Yu_Q3bRS05tMQyAyt3WYqVKphUpBo3DqpyrfjF9yC4,1103
 risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
-risk/annotations/annotations.py,sha256=mDgW5X4wck7oQqcMOw1T0XNkFZnSm1vx3MKIUX-FABI,16505
+risk/annotations/annotations.py,sha256=KtFyCiCnoAkhin3HKDBtkNcz5imjpysrmEfQKUwyqh8,14737
 risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
+risk/annotations/nltk_setup.py,sha256=IvuyO3WkrmIg4gz1vsfjxUWBt9Nk-XxkQknPiFRORHE,3533
 risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
 risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
 risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
@@ -32,9 +33,9 @@ risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,73
 risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,11776
 risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
 risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
-risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
-risk_network-0.0.10.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.10.dist-info/METADATA,sha256=MbLHMXmOv8iTUZLm8S4EKj2uDB419weRpRvWIyBKSCM,46966
-risk_network-0.0.10.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-risk_network-0.0.10.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.10.dist-info/RECORD,,
+risk/stats/permutation/test_functions.py,sha256=0hcv18zqhhh2njWhUb1Yl-5PiFCYd4jX-HaY5hFMz4I,3121
+risk_network-0.0.11.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.11.dist-info/METADATA,sha256=XmrzSj1VcALUEiN3g0JqxDm5EM8KB1jR_B8Y7oIXQ5Q,46959
+risk_network-0.0.11.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+risk_network-0.0.11.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.11.dist-info/RECORD,,

{risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl

risk-network 0.0.10py3-none-any.whl → 0.0.11py3-none-any.whl