risk-network 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.10"
10
+ __version__ = "0.0.11"
@@ -3,76 +3,36 @@ risk/annotations/annotations
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- import os
7
6
  import re
8
- import zipfile
9
7
  from collections import Counter
10
8
  from itertools import compress
11
9
  from typing import Any, Dict, List, Set
12
10
 
13
11
  import networkx as nx
14
- import nltk
15
12
  import numpy as np
16
13
  import pandas as pd
17
- from nltk.corpus import stopwords
18
- from nltk.stem import WordNetLemmatizer
19
14
  from nltk.tokenize import word_tokenize
15
+ from scipy.sparse import coo_matrix
20
16
 
17
+ from risk.annotations.nltk_setup import setup_nltk_resources
21
18
  from risk.log import logger
22
- from scipy.sparse import coo_matrix
23
19
 
24
20
 
25
- def ensure_nltk_resource(resource: str) -> None:
26
- """Ensure the specified NLTK resource is available."""
27
- # Define the path to the resource within the NLTK data directory
28
- resource_path = f"corpora/{resource}"
29
- # Check if the resource is already available.
30
- try:
31
- nltk.data.find(resource_path)
32
- return
33
- except LookupError:
34
- print(f"Resource '{resource}' not found. Attempting to download...")
35
-
36
- # Download the resource.
37
- nltk.download(resource)
38
- # Check again after downloading.
39
- try:
40
- nltk.data.find(resource_path)
41
- return
42
- except LookupError:
43
- print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
44
-
45
- # Look for a ZIP file in all known NLTK data directories.
46
- for data_path in nltk.data.path:
47
- zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
48
- if os.path.isfile(zip_path):
49
- print(f"Found ZIP file for '{resource}' at: {zip_path}")
50
- target_dir = os.path.join(data_path, "corpora")
51
- with zipfile.ZipFile(zip_path, "r") as z:
52
- z.extractall(path=target_dir)
53
- print(f"Unzipped '{resource}' successfully.")
54
- break # Stop after unzipping the first found ZIP.
55
-
56
- # Final check: Try to check resource one last time. If it fails, rai
57
- try:
58
- nltk.data.find(resource_path)
59
- print(f"Resource '{resource}' is now available.")
60
- except LookupError:
61
- raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
62
-
63
-
64
- # Ensure the NLTK stopwords and WordNet resources are available
65
- # punkt is known to have issues with the default download method, so we use a custom function if it fails
66
- try:
67
- ensure_nltk_resource("punkt")
68
- except LookupError:
69
- nltk.download("punkt")
70
- ensure_nltk_resource("stopwords")
71
- ensure_nltk_resource("wordnet")
72
- # Use NLTK's stopwords - load all languages
73
- STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
74
- # Initialize the WordNet lemmatizer, which is used for normalizing words
75
- LEMMATIZER = WordNetLemmatizer()
21
+ def initialize_nltk():
22
+ """Initialize all required NLTK components."""
23
+ setup_nltk_resources()
24
+
25
+ # After resources are available, initialize the components
26
+ from nltk.corpus import stopwords
27
+ from nltk.stem import WordNetLemmatizer
28
+
29
+ global STOP_WORDS, LEMMATIZER
30
+ STOP_WORDS = set(stopwords.words("english"))
31
+ LEMMATIZER = WordNetLemmatizer()
32
+
33
+
34
+ # Initialize NLTK components
35
+ initialize_nltk()
76
36
 
77
37
 
78
38
  def load_annotations(
@@ -0,0 +1,85 @@
1
+ """
2
+ risk/annotations/nltk_setup
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import os
7
+ import zipfile
8
+ from typing import List, Tuple
9
+
10
+ import nltk
11
+ from nltk.data import find, path as nltk_data_path
12
+
13
+ from risk.log import logger
14
+
15
+
16
+ def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
17
+ """Ensures all required NLTK resources are available and properly extracted.
18
+ Uses NLTK's default paths and mechanisms.
19
+
20
+ Args:
21
+ required_resources (List[Tuple[str, str]], optional): List of required resources
22
+ to download and extract. Each tuple should contain the resource path within
23
+ NLTK data and the package name. Defaults to None.
24
+ """
25
+ if required_resources is None:
26
+ required_resources = [
27
+ ("tokenizers/punkt", "punkt"),
28
+ ("tokenizers/punkt_tab", "punkt_tab"),
29
+ ("corpora/stopwords", "stopwords"),
30
+ ("corpora/wordnet", "wordnet"),
31
+ ]
32
+
33
+ # Process each resource
34
+ for resource_path, package_name in required_resources:
35
+ try:
36
+ # First try to find the resource - this is how NLTK checks if it's available
37
+ find(resource_path)
38
+ except LookupError:
39
+ # Resource not found, download it
40
+ logger.info(f"Downloading missing NLTK resource: {package_name}")
41
+ nltk.download(package_name, quiet=True)
42
+
43
+ # Even if find() succeeded, the resource might be a zip that failed to extract
44
+ # Check if we need to manually extract zips
45
+ verify_and_extract_if_needed(resource_path, package_name)
46
+
47
+
48
+ def verify_and_extract_if_needed(resource_path: str, package_name: str) -> None:
49
+ """Verifies if the resource is properly extracted and extracts if needed. Respects
50
+ NLTK's directory structure where the extracted content should be in the same directory
51
+ as the zip file.
52
+
53
+ Args:
54
+ resource_path (str): Path to the resource within NLTK data.
55
+ package_name (str): Name of the NLTK package.
56
+ """
57
+ # Get the directory and base name from the resource path
58
+ path_parts = resource_path.split("/")
59
+ resource_type = path_parts[0] # 'corpora', 'tokenizers', etc.
60
+ resource_name = path_parts[-1] # 'wordnet', 'punkt', etc.
61
+
62
+ # Check all NLTK data directories
63
+ for base in nltk_data_path:
64
+ # For resource paths like 'corpora/wordnet', the zip file is at '~/nltk_data/corpora/wordnet.zip'
65
+ # and the extracted directory should be at '~/nltk_data/corpora/wordnet'
66
+ resource_dir = os.path.join(base, resource_type)
67
+ zip_path = os.path.join(resource_dir, f"{resource_name}.zip")
68
+ folder_path = os.path.join(resource_dir, resource_name)
69
+
70
+ # If zip exists but folder doesn't, extraction is needed
71
+ if os.path.exists(zip_path) and not os.path.exists(folder_path):
72
+ logger.info(f"Found unextracted zip for {package_name}, extracting...")
73
+ try:
74
+ with zipfile.ZipFile(zip_path, "r") as zf:
75
+ # Extract files to the same directory where the zip file is located
76
+ zf.extractall(path=resource_dir)
77
+
78
+ if os.path.exists(folder_path):
79
+ logger.info(f"Successfully extracted {package_name}")
80
+ else:
81
+ logger.warning(
82
+ f"Extraction completed but resource directory not found for {package_name}"
83
+ )
84
+ except Exception as e:
85
+ logger.error(f"Failed to extract {package_name}: {e}")
risk/risk.py CHANGED
@@ -3,14 +3,13 @@ risk/risk
3
3
  ~~~~~~~~~
4
4
  """
5
5
 
6
- from risk.network import NetworkIO
7
6
  from risk.annotations import AnnotationsIO
7
+ from risk.log import params, set_global_verbosity
8
8
  from risk.neighborhoods import NeighborhoodsAPI
9
+ from risk.network import NetworkIO
9
10
  from risk.network.graph import GraphAPI
10
11
  from risk.network.plotter import PlotterAPI
11
12
 
12
- from risk.log import params, set_global_verbosity
13
-
14
13
 
15
14
  class RISK(NetworkIO, AnnotationsIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
16
15
  """RISK: A class for network analysis and visualization.
@@ -8,6 +8,7 @@ from scipy.sparse import csr_matrix
8
8
 
9
9
  # NOTE: Cython optimizations provided minimal performance benefits.
10
10
  # The final version with Cython is archived in the `cython_permutation` branch.
11
+
11
12
  # DISPATCH_TEST_FUNCTIONS can be found at the end of the file.
12
13
 
13
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -699,7 +699,7 @@ Requires-Dist: leidenalg
699
699
  Requires-Dist: markov_clustering
700
700
  Requires-Dist: matplotlib
701
701
  Requires-Dist: networkx
702
- Requires-Dist: nltk==3.8.1
702
+ Requires-Dist: nltk
703
703
  Requires-Dist: numpy
704
704
  Requires-Dist: openpyxl
705
705
  Requires-Dist: pandas
@@ -1,8 +1,9 @@
1
- risk/__init__.py,sha256=256SpbomSJ88a12A8DmDUsPOxlkwJ7cqbu7E4FHrcw4,120
2
- risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
1
+ risk/__init__.py,sha256=FJYPkeBx_fYMZxCzecYrubpT9mJP2L2GpAs-kg7rhQY,120
2
+ risk/risk.py,sha256=7Yu_Q3bRS05tMQyAyt3WYqVKphUpBo3DqpyrfjF9yC4,1103
3
3
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
4
- risk/annotations/annotations.py,sha256=mDgW5X4wck7oQqcMOw1T0XNkFZnSm1vx3MKIUX-FABI,16505
4
+ risk/annotations/annotations.py,sha256=KtFyCiCnoAkhin3HKDBtkNcz5imjpysrmEfQKUwyqh8,14737
5
5
  risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
6
+ risk/annotations/nltk_setup.py,sha256=IvuyO3WkrmIg4gz1vsfjxUWBt9Nk-XxkQknPiFRORHE,3533
6
7
  risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
7
8
  risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
8
9
  risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
@@ -32,9 +33,9 @@ risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,73
32
33
  risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,11776
33
34
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
34
35
  risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
35
- risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
36
- risk_network-0.0.10.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
37
- risk_network-0.0.10.dist-info/METADATA,sha256=MbLHMXmOv8iTUZLm8S4EKj2uDB419weRpRvWIyBKSCM,46966
38
- risk_network-0.0.10.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
39
- risk_network-0.0.10.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
40
- risk_network-0.0.10.dist-info/RECORD,,
36
+ risk/stats/permutation/test_functions.py,sha256=0hcv18zqhhh2njWhUb1Yl-5PiFCYd4jX-HaY5hFMz4I,3121
37
+ risk_network-0.0.11.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
+ risk_network-0.0.11.dist-info/METADATA,sha256=XmrzSj1VcALUEiN3g0JqxDm5EM8KB1jR_B8Y7oIXQ5Q,46959
39
+ risk_network-0.0.11.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
40
+ risk_network-0.0.11.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
+ risk_network-0.0.11.dist-info/RECORD,,