PyPI - risk-network - Versions diffs - 0.0.9b42__py3-none-any.whl → 0.0.9b44__py3-none-any.whl - Mend

risk-network 0.0.9b42py3-none-any.whl → 0.0.9b44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
 from risk.risk import RISK
-__version__ = "0.0.9-beta.42"
+__version__ = "0.0.9-beta.44"

risk/annotations/annotations.py CHANGED Viewed

@@ -3,7 +3,9 @@ risk/annotations/annotations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
+import os
 import re
+import zipfile
 from collections import Counter
 from itertools import compress
 from typing import Any, Dict, List, Set
@@ -20,29 +22,51 @@ from risk.log import logger
 from scipy.sparse import coo_matrix
-def _setup_nltk():
-    """Ensure necessary NLTK data is downloaded."""
+def ensure_nltk_resource(resource: str) -> None:
+    """Ensure the specified NLTK resource is available."""
+    # Define the path to the resource within the NLTK data directory
+    resource_path = f"corpora/{resource}"
+    # Check if the resource is already available.
     try:
-        nltk.data.find("tokenizers/punkt")
+        nltk.data.find(resource_path)
+        return
     except LookupError:
-        # Force download if not found
-        nltk.download("punkt", force=True, quiet=True)
+        print(f"Resource '{resource}' not found. Attempting to download...")
+    # Download the resource.
+    nltk.download(resource)
+    # Check again after downloading.
     try:
-        nltk.data.find("corpora/stopwords")
+        nltk.data.find(resource_path)
+        return
     except LookupError:
-        nltk.download("stopwords", force=True, quiet=True)
+        print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
+    # Look for a ZIP file in all known NLTK data directories.
+    for data_path in nltk.data.path:
+        zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
+        if os.path.isfile(zip_path):
+            print(f"Found ZIP file for '{resource}' at: {zip_path}")
+            target_dir = os.path.join(data_path, "corpora")
+            with zipfile.ZipFile(zip_path, "r") as z:
+                z.extractall(path=target_dir)
+            print(f"Unzipped '{resource}' successfully.")
+            break  # Stop after unzipping the first found ZIP.
+    # Final check: Try to load the resource one last time.
     try:
-        nltk.data.find("corpora/wordnet")
+        nltk.data.find(resource_path)
+        print(f"Resource '{resource}' is now available.")
     except LookupError:
-        nltk.download("wordnet", force=True, quiet=True)
+        raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
-# Ensure you have the necessary NLTK data
-_setup_nltk()
-# Use NLTK's stopwords
-STOP_WORDS = set(stopwords.words("english"))
+# Ensure the NLTK stopwords and WordNet resources are available
+ensure_nltk_resource("stopwords")
+ensure_nltk_resource("wordnet")
+# Use NLTK's stopwords - load all languages
+STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
+# Initialize the WordNet lemmatizer, which is used for normalizing words
 LEMMATIZER = WordNetLemmatizer()
@@ -242,7 +266,7 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
         weight = max(1, int((0 if pd.isna(score) else score) * 10))
         for token in tokens:
             # Clean token: lowercase and remove extraneous punctuation (but preserve intra-word hyphens)
-            token_clean = re.sub(r"[^\w\-]", "", token.lower()).strip()
+            token_clean = re.sub(r"[^\w\-]", "", token).strip()
             if not token_clean:
                 continue
             # Skip tokens that are pure numbers

{risk_network-0.0.9b42.dist-info → risk_network-0.0.9b44.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: risk-network
-Version: 0.0.9b42
+Version: 0.0.9b44
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.9b42.dist-info → risk_network-0.0.9b44.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-risk/__init__.py,sha256=0G_X2wjPwCz7UG5bgL3bYfsnVpLvoRhcMyS2bV45ZKI,127
+risk/__init__.py,sha256=RVOwiHzzwMXL1qujltMK4sdkHgP3Pv85KrFz7QfhPTk,127
 risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
 risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
-risk/annotations/annotations.py,sha256=_2crX1SKphCY1gJmDpRuvYouf8DowScSetbxCG3vLHk,15022
+risk/annotations/annotations.py,sha256=5X2R8RFxgK6kgSoj05UdCPcpkRRPOaHjGwnIrjeD5Ww,16299
 risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
 risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
 risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
@@ -33,8 +33,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177
 risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
 risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
 risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
-risk_network-0.0.9b42.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.9b42.dist-info/METADATA,sha256=vShfLdet9LjI_5lQOuQdcvaG1rIbwbBRO34gF3BFIcw,47627
-risk_network-0.0.9b42.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-risk_network-0.0.9b42.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.9b42.dist-info/RECORD,,
+risk_network-0.0.9b44.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.9b44.dist-info/METADATA,sha256=mLBWb_wyKny6tgHt3xmdlaZgwSic3pVhSg47e-b6O5A,47627
+risk_network-0.0.9b44.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+risk_network-0.0.9b44.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.9b44.dist-info/RECORD,,

{risk_network-0.0.9b42.dist-info → risk_network-0.0.9b44.dist-info}/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.9b42.dist-info → risk_network-0.0.9b44.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.9b42.dist-info → risk_network-0.0.9b44.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.9b42__py3-none-any.whl → 0.0.9b44__py3-none-any.whl

risk-network 0.0.9b42py3-none-any.whl → 0.0.9b44py3-none-any.whl