risk-network 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +17 -57
- risk/annotations/nltk_setup.py +85 -0
- risk/risk.py +2 -3
- risk/stats/permutation/test_functions.py +1 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/METADATA +2 -2
- {risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/RECORD +10 -9
- {risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/LICENSE +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/WHEEL +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.11.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/annotations.py
CHANGED
@@ -3,76 +3,36 @@ risk/annotations/annotations
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
import os
|
7
6
|
import re
|
8
|
-
import zipfile
|
9
7
|
from collections import Counter
|
10
8
|
from itertools import compress
|
11
9
|
from typing import Any, Dict, List, Set
|
12
10
|
|
13
11
|
import networkx as nx
|
14
|
-
import nltk
|
15
12
|
import numpy as np
|
16
13
|
import pandas as pd
|
17
|
-
from nltk.corpus import stopwords
|
18
|
-
from nltk.stem import WordNetLemmatizer
|
19
14
|
from nltk.tokenize import word_tokenize
|
15
|
+
from scipy.sparse import coo_matrix
|
20
16
|
|
17
|
+
from risk.annotations.nltk_setup import setup_nltk_resources
|
21
18
|
from risk.log import logger
|
22
|
-
from scipy.sparse import coo_matrix
|
23
19
|
|
24
20
|
|
25
|
-
def
|
26
|
-
"""
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
nltk.data.find(resource_path)
|
41
|
-
return
|
42
|
-
except LookupError:
|
43
|
-
print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
|
44
|
-
|
45
|
-
# Look for a ZIP file in all known NLTK data directories.
|
46
|
-
for data_path in nltk.data.path:
|
47
|
-
zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
|
48
|
-
if os.path.isfile(zip_path):
|
49
|
-
print(f"Found ZIP file for '{resource}' at: {zip_path}")
|
50
|
-
target_dir = os.path.join(data_path, "corpora")
|
51
|
-
with zipfile.ZipFile(zip_path, "r") as z:
|
52
|
-
z.extractall(path=target_dir)
|
53
|
-
print(f"Unzipped '{resource}' successfully.")
|
54
|
-
break # Stop after unzipping the first found ZIP.
|
55
|
-
|
56
|
-
# Final check: Try to check resource one last time. If it fails, rai
|
57
|
-
try:
|
58
|
-
nltk.data.find(resource_path)
|
59
|
-
print(f"Resource '{resource}' is now available.")
|
60
|
-
except LookupError:
|
61
|
-
raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
|
62
|
-
|
63
|
-
|
64
|
-
# Ensure the NLTK stopwords and WordNet resources are available
|
65
|
-
# punkt is known to have issues with the default download method, so we use a custom function if it fails
|
66
|
-
try:
|
67
|
-
ensure_nltk_resource("punkt")
|
68
|
-
except LookupError:
|
69
|
-
nltk.download("punkt")
|
70
|
-
ensure_nltk_resource("stopwords")
|
71
|
-
ensure_nltk_resource("wordnet")
|
72
|
-
# Use NLTK's stopwords - load all languages
|
73
|
-
STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
|
74
|
-
# Initialize the WordNet lemmatizer, which is used for normalizing words
|
75
|
-
LEMMATIZER = WordNetLemmatizer()
|
21
|
+
def initialize_nltk():
|
22
|
+
"""Initialize all required NLTK components."""
|
23
|
+
setup_nltk_resources()
|
24
|
+
|
25
|
+
# After resources are available, initialize the components
|
26
|
+
from nltk.corpus import stopwords
|
27
|
+
from nltk.stem import WordNetLemmatizer
|
28
|
+
|
29
|
+
global STOP_WORDS, LEMMATIZER
|
30
|
+
STOP_WORDS = set(stopwords.words("english"))
|
31
|
+
LEMMATIZER = WordNetLemmatizer()
|
32
|
+
|
33
|
+
|
34
|
+
# Initialize NLTK components
|
35
|
+
initialize_nltk()
|
76
36
|
|
77
37
|
|
78
38
|
def load_annotations(
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""
|
2
|
+
risk/annotations/nltk_setup
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import os
|
7
|
+
import zipfile
|
8
|
+
from typing import List, Tuple
|
9
|
+
|
10
|
+
import nltk
|
11
|
+
from nltk.data import find, path as nltk_data_path
|
12
|
+
|
13
|
+
from risk.log import logger
|
14
|
+
|
15
|
+
|
16
|
+
def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
|
17
|
+
"""Ensures all required NLTK resources are available and properly extracted.
|
18
|
+
Uses NLTK's default paths and mechanisms.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
required_resources (List[Tuple[str, str]], optional): List of required resources
|
22
|
+
to download and extract. Each tuple should contain the resource path within
|
23
|
+
NLTK data and the package name. Defaults to None.
|
24
|
+
"""
|
25
|
+
if required_resources is None:
|
26
|
+
required_resources = [
|
27
|
+
("tokenizers/punkt", "punkt"),
|
28
|
+
("tokenizers/punkt_tab", "punkt_tab"),
|
29
|
+
("corpora/stopwords", "stopwords"),
|
30
|
+
("corpora/wordnet", "wordnet"),
|
31
|
+
]
|
32
|
+
|
33
|
+
# Process each resource
|
34
|
+
for resource_path, package_name in required_resources:
|
35
|
+
try:
|
36
|
+
# First try to find the resource - this is how NLTK checks if it's available
|
37
|
+
find(resource_path)
|
38
|
+
except LookupError:
|
39
|
+
# Resource not found, download it
|
40
|
+
logger.info(f"Downloading missing NLTK resource: {package_name}")
|
41
|
+
nltk.download(package_name, quiet=True)
|
42
|
+
|
43
|
+
# Even if find() succeeded, the resource might be a zip that failed to extract
|
44
|
+
# Check if we need to manually extract zips
|
45
|
+
verify_and_extract_if_needed(resource_path, package_name)
|
46
|
+
|
47
|
+
|
48
|
+
def verify_and_extract_if_needed(resource_path: str, package_name: str) -> None:
|
49
|
+
"""Verifies if the resource is properly extracted and extracts if needed. Respects
|
50
|
+
NLTK's directory structure where the extracted content should be in the same directory
|
51
|
+
as the zip file.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
resource_path (str): Path to the resource within NLTK data.
|
55
|
+
package_name (str): Name of the NLTK package.
|
56
|
+
"""
|
57
|
+
# Get the directory and base name from the resource path
|
58
|
+
path_parts = resource_path.split("/")
|
59
|
+
resource_type = path_parts[0] # 'corpora', 'tokenizers', etc.
|
60
|
+
resource_name = path_parts[-1] # 'wordnet', 'punkt', etc.
|
61
|
+
|
62
|
+
# Check all NLTK data directories
|
63
|
+
for base in nltk_data_path:
|
64
|
+
# For resource paths like 'corpora/wordnet', the zip file is at '~/nltk_data/corpora/wordnet.zip'
|
65
|
+
# and the extracted directory should be at '~/nltk_data/corpora/wordnet'
|
66
|
+
resource_dir = os.path.join(base, resource_type)
|
67
|
+
zip_path = os.path.join(resource_dir, f"{resource_name}.zip")
|
68
|
+
folder_path = os.path.join(resource_dir, resource_name)
|
69
|
+
|
70
|
+
# If zip exists but folder doesn't, extraction is needed
|
71
|
+
if os.path.exists(zip_path) and not os.path.exists(folder_path):
|
72
|
+
logger.info(f"Found unextracted zip for {package_name}, extracting...")
|
73
|
+
try:
|
74
|
+
with zipfile.ZipFile(zip_path, "r") as zf:
|
75
|
+
# Extract files to the same directory where the zip file is located
|
76
|
+
zf.extractall(path=resource_dir)
|
77
|
+
|
78
|
+
if os.path.exists(folder_path):
|
79
|
+
logger.info(f"Successfully extracted {package_name}")
|
80
|
+
else:
|
81
|
+
logger.warning(
|
82
|
+
f"Extraction completed but resource directory not found for {package_name}"
|
83
|
+
)
|
84
|
+
except Exception as e:
|
85
|
+
logger.error(f"Failed to extract {package_name}: {e}")
|
risk/risk.py
CHANGED
@@ -3,14 +3,13 @@ risk/risk
|
|
3
3
|
~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from risk.network import NetworkIO
|
7
6
|
from risk.annotations import AnnotationsIO
|
7
|
+
from risk.log import params, set_global_verbosity
|
8
8
|
from risk.neighborhoods import NeighborhoodsAPI
|
9
|
+
from risk.network import NetworkIO
|
9
10
|
from risk.network.graph import GraphAPI
|
10
11
|
from risk.network.plotter import PlotterAPI
|
11
12
|
|
12
|
-
from risk.log import params, set_global_verbosity
|
13
|
-
|
14
13
|
|
15
14
|
class RISK(NetworkIO, AnnotationsIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
|
16
15
|
"""RISK: A class for network analysis and visualization.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -699,7 +699,7 @@ Requires-Dist: leidenalg
|
|
699
699
|
Requires-Dist: markov_clustering
|
700
700
|
Requires-Dist: matplotlib
|
701
701
|
Requires-Dist: networkx
|
702
|
-
Requires-Dist: nltk
|
702
|
+
Requires-Dist: nltk
|
703
703
|
Requires-Dist: numpy
|
704
704
|
Requires-Dist: openpyxl
|
705
705
|
Requires-Dist: pandas
|
@@ -1,8 +1,9 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
2
|
-
risk/risk.py,sha256=
|
1
|
+
risk/__init__.py,sha256=FJYPkeBx_fYMZxCzecYrubpT9mJP2L2GpAs-kg7rhQY,120
|
2
|
+
risk/risk.py,sha256=7Yu_Q3bRS05tMQyAyt3WYqVKphUpBo3DqpyrfjF9yC4,1103
|
3
3
|
risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
|
4
|
-
risk/annotations/annotations.py,sha256=
|
4
|
+
risk/annotations/annotations.py,sha256=KtFyCiCnoAkhin3HKDBtkNcz5imjpysrmEfQKUwyqh8,14737
|
5
5
|
risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
|
6
|
+
risk/annotations/nltk_setup.py,sha256=IvuyO3WkrmIg4gz1vsfjxUWBt9Nk-XxkQknPiFRORHE,3533
|
6
7
|
risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
|
7
8
|
risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
|
8
9
|
risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
|
@@ -32,9 +33,9 @@ risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,73
|
|
32
33
|
risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,11776
|
33
34
|
risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
|
34
35
|
risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
|
35
|
-
risk/stats/permutation/test_functions.py,sha256=
|
36
|
-
risk_network-0.0.
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
40
|
-
risk_network-0.0.
|
36
|
+
risk/stats/permutation/test_functions.py,sha256=0hcv18zqhhh2njWhUb1Yl-5PiFCYd4jX-HaY5hFMz4I,3121
|
37
|
+
risk_network-0.0.11.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
38
|
+
risk_network-0.0.11.dist-info/METADATA,sha256=XmrzSj1VcALUEiN3g0JqxDm5EM8KB1jR_B8Y7oIXQ5Q,46959
|
39
|
+
risk_network-0.0.11.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
40
|
+
risk_network-0.0.11.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
41
|
+
risk_network-0.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|