pythonflex 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pythonflex-0.2.3 → pythonflex-0.2.4}/PKG-INFO +1 -1
- {pythonflex-0.2.3 → pythonflex-0.2.4}/pyproject.toml +1 -1
- pythonflex-0.2.4/src/pythonflex/utils.py +101 -0
- pythonflex-0.2.3/src/pythonflex/utils.py +0 -138
- {pythonflex-0.2.3 → pythonflex-0.2.4}/.gitignore +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/.python-version +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/README.md +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/__init__.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/analysis.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/neuroblastoma_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/CORUM.parquet +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/GOBP.parquet +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/PATHWAY.parquet +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/corum.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/gobp.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/pathway.csv +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/basic_usage.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/dataset_filtering.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/test.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/logging_config.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/plotting.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/preprocessing.py +0 -0
- {pythonflex-0.2.3 → pythonflex-0.2.4}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pythonflex
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
|
|
5
5
|
Author-email: Yasir Demirtaş <tyasird@hotmail.com>
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pythonflex"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.4"
|
|
4
4
|
description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import tempfile
|
|
4
|
+
import joblib
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
# Constants
|
|
9
|
+
TMP_ROOT = ".tmp"
|
|
10
|
+
VALID_EXTS = {".parquet", ".npy", ".pkl"} # Removed .feather
|
|
11
|
+
|
|
12
|
+
# Helper to sanitize names (make filesystem-safe)
|
|
13
|
+
def _sanitize(name):
|
|
14
|
+
if not name:
|
|
15
|
+
return "data"
|
|
16
|
+
# Replace forbidden/problematic chars with '_', collapse multiples, strip edges
|
|
17
|
+
safe = re.sub(r'[<>:"/\\|?*$,\s]+', '_', str(name).strip())
|
|
18
|
+
safe = re.sub(r'_+', '_', safe).strip('_')
|
|
19
|
+
return safe if safe else "data"
|
|
20
|
+
|
|
21
|
+
# Helper to get safe path
|
|
22
|
+
def _safe_path(category, name=None, ext=".pkl"):
|
|
23
|
+
safe_category = _sanitize(category)
|
|
24
|
+
dir_path = os.path.join(TMP_ROOT, safe_category)
|
|
25
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
26
|
+
safe_name = _sanitize(name) if name else "data"
|
|
27
|
+
return os.path.join(dir_path, f"{safe_name}{ext}")
|
|
28
|
+
|
|
29
|
+
# Save function - Parquet for DataFrames
|
|
30
|
+
def dsave(data, category, name=None, path=None):
|
|
31
|
+
# If data is dict and no name, recurse on each item
|
|
32
|
+
if name is None and isinstance(data, dict):
|
|
33
|
+
for k, v in data.items():
|
|
34
|
+
dsave(v, category, k)
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
# Choose format based on type
|
|
38
|
+
if isinstance(data, pd.DataFrame):
|
|
39
|
+
ext = ".parquet"
|
|
40
|
+
save_func = lambda p: data.to_parquet(p)
|
|
41
|
+
elif isinstance(data, np.ndarray):
|
|
42
|
+
ext = ".npy"
|
|
43
|
+
save_func = lambda p: np.save(p, data, allow_pickle=False)
|
|
44
|
+
else:
|
|
45
|
+
ext = ".pkl"
|
|
46
|
+
save_func = lambda p: joblib.dump(data, p, compress=0)
|
|
47
|
+
|
|
48
|
+
target = _safe_path(category, name, ext)
|
|
49
|
+
|
|
50
|
+
# Atomic save: Write to temp file, then rename
|
|
51
|
+
with tempfile.NamedTemporaryFile(dir=os.path.dirname(target), delete=False, suffix=ext) as tf:
|
|
52
|
+
tmp_path = tf.name
|
|
53
|
+
tf.close()
|
|
54
|
+
save_func(tmp_path)
|
|
55
|
+
os.replace(tmp_path, target)
|
|
56
|
+
|
|
57
|
+
# Load function - Parquet for DataFrames
|
|
58
|
+
def dload(category, name=None, path=None):
|
|
59
|
+
dir_path = os.path.join(TMP_ROOT, _sanitize(category))
|
|
60
|
+
|
|
61
|
+
if not os.path.exists(dir_path):
|
|
62
|
+
return {}
|
|
63
|
+
|
|
64
|
+
if name is None:
|
|
65
|
+
# Load all in category as dict
|
|
66
|
+
out = {}
|
|
67
|
+
for filename in os.listdir(dir_path):
|
|
68
|
+
if not any(filename.endswith(ext) for ext in VALID_EXTS):
|
|
69
|
+
continue
|
|
70
|
+
k = os.path.splitext(filename)[0]
|
|
71
|
+
full_path = os.path.join(dir_path, filename)
|
|
72
|
+
try:
|
|
73
|
+
if filename.endswith(".parquet"):
|
|
74
|
+
out[k] = pd.read_parquet(full_path)
|
|
75
|
+
elif filename.endswith(".npy"):
|
|
76
|
+
out[k] = np.load(full_path, mmap_mode="r")
|
|
77
|
+
elif filename.endswith(".pkl"):
|
|
78
|
+
out[k] = joblib.load(full_path, mmap_mode="r")
|
|
79
|
+
except (EOFError, ValueError, OSError):
|
|
80
|
+
print(f"Warning: '{full_path}' is corrupted. Skipping...")
|
|
81
|
+
os.remove(full_path)
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
# Load specific name - try extensions in order
|
|
85
|
+
for ext in VALID_EXTS:
|
|
86
|
+
target = _safe_path(category, name, ext)
|
|
87
|
+
if os.path.exists(target):
|
|
88
|
+
try:
|
|
89
|
+
if ext == ".parquet":
|
|
90
|
+
return pd.read_parquet(target)
|
|
91
|
+
elif ext == ".npy":
|
|
92
|
+
return np.load(target, mmap_mode="r")
|
|
93
|
+
elif ext == ".pkl":
|
|
94
|
+
return joblib.load(target, mmap_mode="r")
|
|
95
|
+
except (EOFError, ValueError, OSError) as e:
|
|
96
|
+
print(f"Warning: '{target}' is corrupted ({e}). Trying next format...")
|
|
97
|
+
os.remove(target)
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
print(f"Warning: No valid file found for {category}/{name}")
|
|
101
|
+
return {}
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import re # For sanitization (built-in, minimal regex)
|
|
3
|
-
import tempfile
|
|
4
|
-
import joblib
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
# Constants - ADD .parquet to valid extensions
|
|
9
|
-
TMP_ROOT = ".tmp"
|
|
10
|
-
VALID_EXTS = {".feather", ".parquet", ".npy", ".pkl"}
|
|
11
|
-
|
|
12
|
-
# REMOVE THE MONKEY PATCH FROM HERE - it should be in your main script!
|
|
13
|
-
|
|
14
|
-
# Helper to sanitize names (make filesystem-safe)
|
|
15
|
-
def _sanitize(name):
|
|
16
|
-
if not name:
|
|
17
|
-
return "data"
|
|
18
|
-
# Replace forbidden/problematic chars with '_', collapse multiples, strip edges
|
|
19
|
-
safe = re.sub(r'[<>:"/\\|?*$,\s]+', '_', str(name).strip())
|
|
20
|
-
safe = re.sub(r'_+', '_', safe).strip('_')
|
|
21
|
-
return safe if safe else "data"
|
|
22
|
-
|
|
23
|
-
# Helper to get safe path
|
|
24
|
-
def _safe_path(category, name=None, ext=".pkl"):
|
|
25
|
-
safe_category = _sanitize(category)
|
|
26
|
-
dir_path = os.path.join(TMP_ROOT, safe_category)
|
|
27
|
-
os.makedirs(dir_path, exist_ok=True)
|
|
28
|
-
safe_name = _sanitize(name) if name else "data"
|
|
29
|
-
return os.path.join(dir_path, f"{safe_name}{ext}")
|
|
30
|
-
|
|
31
|
-
# Save function - with built-in parquet fallback
|
|
32
|
-
def dsave(data, category, name=None, path=None): # 'path' ignored for compatibility with old code
|
|
33
|
-
# If data is dict and no name, recurse on each item
|
|
34
|
-
if name is None and isinstance(data, dict):
|
|
35
|
-
for k, v in data.items():
|
|
36
|
-
dsave(v, category, k)
|
|
37
|
-
return
|
|
38
|
-
|
|
39
|
-
# Choose best extension based on type
|
|
40
|
-
if isinstance(data, pd.DataFrame):
|
|
41
|
-
# Try Feather first, fallback to Parquet if it fails
|
|
42
|
-
target_feather = _safe_path(category, name, ".feather")
|
|
43
|
-
target_parquet = _safe_path(category, name, ".parquet")
|
|
44
|
-
|
|
45
|
-
# Try Feather first
|
|
46
|
-
try:
|
|
47
|
-
with tempfile.NamedTemporaryFile(dir=os.path.dirname(target_feather), delete=False, suffix='.feather') as tf:
|
|
48
|
-
tmp_path = tf.name
|
|
49
|
-
tf.close()
|
|
50
|
-
data.to_feather(tmp_path)
|
|
51
|
-
os.replace(tmp_path, target_feather)
|
|
52
|
-
return
|
|
53
|
-
except ValueError as e:
|
|
54
|
-
if "feather does not support serializing" in str(e):
|
|
55
|
-
print(f"Feather failed for {name}, using Parquet instead")
|
|
56
|
-
# Clean up failed feather temp file
|
|
57
|
-
if os.path.exists(tmp_path):
|
|
58
|
-
os.unlink(tmp_path)
|
|
59
|
-
|
|
60
|
-
# Save as Parquet
|
|
61
|
-
with tempfile.NamedTemporaryFile(dir=os.path.dirname(target_parquet), delete=False, suffix='.parquet') as tf:
|
|
62
|
-
tmp_path = tf.name
|
|
63
|
-
tf.close()
|
|
64
|
-
data.to_parquet(tmp_path)
|
|
65
|
-
os.replace(tmp_path, target_parquet)
|
|
66
|
-
return
|
|
67
|
-
else:
|
|
68
|
-
raise
|
|
69
|
-
|
|
70
|
-
elif isinstance(data, np.ndarray):
|
|
71
|
-
ext = ".npy"
|
|
72
|
-
save_func = lambda p: np.save(p, data, allow_pickle=False)
|
|
73
|
-
else:
|
|
74
|
-
ext = ".pkl"
|
|
75
|
-
save_func = lambda p: joblib.dump(data, p, compress=0)
|
|
76
|
-
|
|
77
|
-
target = _safe_path(category, name, ext)
|
|
78
|
-
|
|
79
|
-
# Atomic save: Write to temp file, then rename
|
|
80
|
-
with tempfile.NamedTemporaryFile(dir=os.path.dirname(target), delete=False) as tf:
|
|
81
|
-
tmp_path = tf.name
|
|
82
|
-
tf.close() # Close so save_func can write
|
|
83
|
-
save_func(tmp_path)
|
|
84
|
-
os.replace(tmp_path, target) # Atomic move
|
|
85
|
-
|
|
86
|
-
# Load function - with parquet support and better error handling
|
|
87
|
-
def dload(category, name=None, path=None): # 'path' ignored for compatibility
|
|
88
|
-
dir_path = os.path.join(TMP_ROOT, _sanitize(category))
|
|
89
|
-
|
|
90
|
-
if not os.path.exists(dir_path):
|
|
91
|
-
return {}
|
|
92
|
-
|
|
93
|
-
if name is None:
|
|
94
|
-
# Load all in category as dict
|
|
95
|
-
out = {}
|
|
96
|
-
for filename in os.listdir(dir_path):
|
|
97
|
-
if not any(filename.endswith(ext) for ext in VALID_EXTS):
|
|
98
|
-
continue
|
|
99
|
-
k = os.path.splitext(filename)[0] # Key from filename (without ext)
|
|
100
|
-
full_path = os.path.join(dir_path, filename)
|
|
101
|
-
try:
|
|
102
|
-
if filename.endswith(".feather"):
|
|
103
|
-
out[k] = pd.read_feather(full_path)
|
|
104
|
-
elif filename.endswith(".parquet"):
|
|
105
|
-
out[k] = pd.read_parquet(full_path)
|
|
106
|
-
elif filename.endswith(".npy"):
|
|
107
|
-
out[k] = np.load(full_path, mmap_mode="r") # MMap for perf
|
|
108
|
-
elif filename.endswith(".pkl"):
|
|
109
|
-
out[k] = joblib.load(full_path, mmap_mode="r") # MMap for perf
|
|
110
|
-
except (EOFError, ValueError, OSError):
|
|
111
|
-
print(f"Warning: '{full_path}' is corrupted. Skipping...")
|
|
112
|
-
os.remove(full_path) # Delete corrupted file
|
|
113
|
-
return out
|
|
114
|
-
|
|
115
|
-
# Load specific name (try extensions in order - prefer parquet for reliability)
|
|
116
|
-
preferred_order = [".parquet", ".feather", ".npy", ".pkl"]
|
|
117
|
-
|
|
118
|
-
for ext in preferred_order:
|
|
119
|
-
if ext not in VALID_EXTS:
|
|
120
|
-
continue
|
|
121
|
-
target = _safe_path(category, name, ext)
|
|
122
|
-
if os.path.exists(target):
|
|
123
|
-
try:
|
|
124
|
-
if ext == ".feather":
|
|
125
|
-
return pd.read_feather(target)
|
|
126
|
-
elif ext == ".parquet":
|
|
127
|
-
return pd.read_parquet(target)
|
|
128
|
-
elif ext == ".npy":
|
|
129
|
-
return np.load(target, mmap_mode="r") # MMap for perf
|
|
130
|
-
elif ext == ".pkl":
|
|
131
|
-
return joblib.load(target, mmap_mode="r") # MMap for perf
|
|
132
|
-
except (EOFError, ValueError, OSError) as e:
|
|
133
|
-
print(f"Warning: '{target}' is corrupted ({e}). Trying next format...")
|
|
134
|
-
os.remove(target) # Delete corrupted file
|
|
135
|
-
continue # Try next format instead of returning {}
|
|
136
|
-
|
|
137
|
-
print(f"Warning: No valid file found for {category}/{name}")
|
|
138
|
-
return {}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv
RENAMED
|
File without changes
|
{pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|