pythonflex 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {pythonflex-0.2.3 → pythonflex-0.2.4}/PKG-INFO +1 -1
  2. {pythonflex-0.2.3 → pythonflex-0.2.4}/pyproject.toml +1 -1
  3. pythonflex-0.2.4/src/pythonflex/utils.py +101 -0
  4. pythonflex-0.2.3/src/pythonflex/utils.py +0 -138
  5. {pythonflex-0.2.3 → pythonflex-0.2.4}/.gitignore +0 -0
  6. {pythonflex-0.2.3 → pythonflex-0.2.4}/.python-version +0 -0
  7. {pythonflex-0.2.3 → pythonflex-0.2.4}/README.md +0 -0
  8. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/__init__.py +0 -0
  9. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/analysis.py +0 -0
  10. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv +0 -0
  11. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv +0 -0
  12. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/dataset/neuroblastoma_cell_lines_500_genes.csv +0 -0
  13. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/CORUM.parquet +0 -0
  14. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/GOBP.parquet +0 -0
  15. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/PATHWAY.parquet +0 -0
  16. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/corum.csv +0 -0
  17. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/gobp.csv +0 -0
  18. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/data/gold_standard/pathway.csv +0 -0
  19. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/basic_usage.py +0 -0
  20. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/dataset_filtering.py +0 -0
  21. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/examples/test.py +0 -0
  22. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/logging_config.py +0 -0
  23. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/plotting.py +0 -0
  24. {pythonflex-0.2.3 → pythonflex-0.2.4}/src/pythonflex/preprocessing.py +0 -0
  25. {pythonflex-0.2.3 → pythonflex-0.2.4}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pythonflex
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
5
5
  Author-email: Yasir Demirtaş <tyasird@hotmail.com>
6
6
  Requires-Python: >=3.9
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pythonflex"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1,101 @@
1
+ import os
2
+ import re
3
+ import tempfile
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ # Constants
9
+ TMP_ROOT = ".tmp"
10
+ VALID_EXTS = {".parquet", ".npy", ".pkl"} # Removed .feather
11
+
12
+ # Helper to sanitize names (make filesystem-safe)
13
+ def _sanitize(name):
14
+ if not name:
15
+ return "data"
16
+ # Replace forbidden/problematic chars with '_', collapse multiples, strip edges
17
+ safe = re.sub(r'[<>:"/\\|?*$,\s]+', '_', str(name).strip())
18
+ safe = re.sub(r'_+', '_', safe).strip('_')
19
+ return safe if safe else "data"
20
+
21
+ # Helper to get safe path
22
+ def _safe_path(category, name=None, ext=".pkl"):
23
+ safe_category = _sanitize(category)
24
+ dir_path = os.path.join(TMP_ROOT, safe_category)
25
+ os.makedirs(dir_path, exist_ok=True)
26
+ safe_name = _sanitize(name) if name else "data"
27
+ return os.path.join(dir_path, f"{safe_name}{ext}")
28
+
29
+ # Save function - Parquet for DataFrames
30
+ def dsave(data, category, name=None, path=None):
31
+ # If data is dict and no name, recurse on each item
32
+ if name is None and isinstance(data, dict):
33
+ for k, v in data.items():
34
+ dsave(v, category, k)
35
+ return
36
+
37
+ # Choose format based on type
38
+ if isinstance(data, pd.DataFrame):
39
+ ext = ".parquet"
40
+ save_func = lambda p: data.to_parquet(p)
41
+ elif isinstance(data, np.ndarray):
42
+ ext = ".npy"
43
+ save_func = lambda p: np.save(p, data, allow_pickle=False)
44
+ else:
45
+ ext = ".pkl"
46
+ save_func = lambda p: joblib.dump(data, p, compress=0)
47
+
48
+ target = _safe_path(category, name, ext)
49
+
50
+ # Atomic save: Write to temp file, then rename
51
+ with tempfile.NamedTemporaryFile(dir=os.path.dirname(target), delete=False, suffix=ext) as tf:
52
+ tmp_path = tf.name
53
+ tf.close()
54
+ save_func(tmp_path)
55
+ os.replace(tmp_path, target)
56
+
57
+ # Load function - Parquet for DataFrames
58
+ def dload(category, name=None, path=None):
59
+ dir_path = os.path.join(TMP_ROOT, _sanitize(category))
60
+
61
+ if not os.path.exists(dir_path):
62
+ return {}
63
+
64
+ if name is None:
65
+ # Load all in category as dict
66
+ out = {}
67
+ for filename in os.listdir(dir_path):
68
+ if not any(filename.endswith(ext) for ext in VALID_EXTS):
69
+ continue
70
+ k = os.path.splitext(filename)[0]
71
+ full_path = os.path.join(dir_path, filename)
72
+ try:
73
+ if filename.endswith(".parquet"):
74
+ out[k] = pd.read_parquet(full_path)
75
+ elif filename.endswith(".npy"):
76
+ out[k] = np.load(full_path, mmap_mode="r")
77
+ elif filename.endswith(".pkl"):
78
+ out[k] = joblib.load(full_path, mmap_mode="r")
79
+ except (EOFError, ValueError, OSError):
80
+ print(f"Warning: '{full_path}' is corrupted. Skipping...")
81
+ os.remove(full_path)
82
+ return out
83
+
84
+ # Load specific name - try extensions in order
85
+ for ext in VALID_EXTS:
86
+ target = _safe_path(category, name, ext)
87
+ if os.path.exists(target):
88
+ try:
89
+ if ext == ".parquet":
90
+ return pd.read_parquet(target)
91
+ elif ext == ".npy":
92
+ return np.load(target, mmap_mode="r")
93
+ elif ext == ".pkl":
94
+ return joblib.load(target, mmap_mode="r")
95
+ except (EOFError, ValueError, OSError) as e:
96
+ print(f"Warning: '{target}' is corrupted ({e}). Trying next format...")
97
+ os.remove(target)
98
+ continue
99
+
100
+ print(f"Warning: No valid file found for {category}/{name}")
101
+ return {}
@@ -1,138 +0,0 @@
1
- import os
2
- import re # For sanitization (built-in, minimal regex)
3
- import tempfile
4
- import joblib
5
- import numpy as np
6
- import pandas as pd
7
-
8
- # Constants - ADD .parquet to valid extensions
9
- TMP_ROOT = ".tmp"
10
- VALID_EXTS = {".feather", ".parquet", ".npy", ".pkl"}
11
-
12
- # REMOVE THE MONKEY PATCH FROM HERE - it should be in your main script!
13
-
14
- # Helper to sanitize names (make filesystem-safe)
15
- def _sanitize(name):
16
- if not name:
17
- return "data"
18
- # Replace forbidden/problematic chars with '_', collapse multiples, strip edges
19
- safe = re.sub(r'[<>:"/\\|?*$,\s]+', '_', str(name).strip())
20
- safe = re.sub(r'_+', '_', safe).strip('_')
21
- return safe if safe else "data"
22
-
23
- # Helper to get safe path
24
- def _safe_path(category, name=None, ext=".pkl"):
25
- safe_category = _sanitize(category)
26
- dir_path = os.path.join(TMP_ROOT, safe_category)
27
- os.makedirs(dir_path, exist_ok=True)
28
- safe_name = _sanitize(name) if name else "data"
29
- return os.path.join(dir_path, f"{safe_name}{ext}")
30
-
31
- # Save function - with built-in parquet fallback
32
- def dsave(data, category, name=None, path=None): # 'path' ignored for compatibility with old code
33
- # If data is dict and no name, recurse on each item
34
- if name is None and isinstance(data, dict):
35
- for k, v in data.items():
36
- dsave(v, category, k)
37
- return
38
-
39
- # Choose best extension based on type
40
- if isinstance(data, pd.DataFrame):
41
- # Try Feather first, fallback to Parquet if it fails
42
- target_feather = _safe_path(category, name, ".feather")
43
- target_parquet = _safe_path(category, name, ".parquet")
44
-
45
- # Try Feather first
46
- try:
47
- with tempfile.NamedTemporaryFile(dir=os.path.dirname(target_feather), delete=False, suffix='.feather') as tf:
48
- tmp_path = tf.name
49
- tf.close()
50
- data.to_feather(tmp_path)
51
- os.replace(tmp_path, target_feather)
52
- return
53
- except ValueError as e:
54
- if "feather does not support serializing" in str(e):
55
- print(f"Feather failed for {name}, using Parquet instead")
56
- # Clean up failed feather temp file
57
- if os.path.exists(tmp_path):
58
- os.unlink(tmp_path)
59
-
60
- # Save as Parquet
61
- with tempfile.NamedTemporaryFile(dir=os.path.dirname(target_parquet), delete=False, suffix='.parquet') as tf:
62
- tmp_path = tf.name
63
- tf.close()
64
- data.to_parquet(tmp_path)
65
- os.replace(tmp_path, target_parquet)
66
- return
67
- else:
68
- raise
69
-
70
- elif isinstance(data, np.ndarray):
71
- ext = ".npy"
72
- save_func = lambda p: np.save(p, data, allow_pickle=False)
73
- else:
74
- ext = ".pkl"
75
- save_func = lambda p: joblib.dump(data, p, compress=0)
76
-
77
- target = _safe_path(category, name, ext)
78
-
79
- # Atomic save: Write to temp file, then rename
80
- with tempfile.NamedTemporaryFile(dir=os.path.dirname(target), delete=False) as tf:
81
- tmp_path = tf.name
82
- tf.close() # Close so save_func can write
83
- save_func(tmp_path)
84
- os.replace(tmp_path, target) # Atomic move
85
-
86
- # Load function - with parquet support and better error handling
87
- def dload(category, name=None, path=None): # 'path' ignored for compatibility
88
- dir_path = os.path.join(TMP_ROOT, _sanitize(category))
89
-
90
- if not os.path.exists(dir_path):
91
- return {}
92
-
93
- if name is None:
94
- # Load all in category as dict
95
- out = {}
96
- for filename in os.listdir(dir_path):
97
- if not any(filename.endswith(ext) for ext in VALID_EXTS):
98
- continue
99
- k = os.path.splitext(filename)[0] # Key from filename (without ext)
100
- full_path = os.path.join(dir_path, filename)
101
- try:
102
- if filename.endswith(".feather"):
103
- out[k] = pd.read_feather(full_path)
104
- elif filename.endswith(".parquet"):
105
- out[k] = pd.read_parquet(full_path)
106
- elif filename.endswith(".npy"):
107
- out[k] = np.load(full_path, mmap_mode="r") # MMap for perf
108
- elif filename.endswith(".pkl"):
109
- out[k] = joblib.load(full_path, mmap_mode="r") # MMap for perf
110
- except (EOFError, ValueError, OSError):
111
- print(f"Warning: '{full_path}' is corrupted. Skipping...")
112
- os.remove(full_path) # Delete corrupted file
113
- return out
114
-
115
- # Load specific name (try extensions in order - prefer parquet for reliability)
116
- preferred_order = [".parquet", ".feather", ".npy", ".pkl"]
117
-
118
- for ext in preferred_order:
119
- if ext not in VALID_EXTS:
120
- continue
121
- target = _safe_path(category, name, ext)
122
- if os.path.exists(target):
123
- try:
124
- if ext == ".feather":
125
- return pd.read_feather(target)
126
- elif ext == ".parquet":
127
- return pd.read_parquet(target)
128
- elif ext == ".npy":
129
- return np.load(target, mmap_mode="r") # MMap for perf
130
- elif ext == ".pkl":
131
- return joblib.load(target, mmap_mode="r") # MMap for perf
132
- except (EOFError, ValueError, OSError) as e:
133
- print(f"Warning: '{target}' is corrupted ({e}). Trying next format...")
134
- os.remove(target) # Delete corrupted file
135
- continue # Try next format instead of returning {}
136
-
137
- print(f"Warning: No valid file found for {category}/{name}")
138
- return {}
File without changes
File without changes
File without changes
File without changes