pythonflex 0.1.6__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pythonflex-0.1.6 → pythonflex-0.2.1}/.gitignore +1 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/PKG-INFO +1 -1
- {pythonflex-0.1.6 → pythonflex-0.2.1}/pyproject.toml +1 -1
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/__init__.py +2 -2
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/analysis.py +101 -27
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/examples/basic_usage.py +2 -3
- pythonflex-0.2.1/src/pythonflex/examples/test.py +104 -0
- pythonflex-0.2.1/src/pythonflex/plotting.py +672 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/utils.py +22 -5
- pythonflex-0.1.6/.vscode/settings.json +0 -5
- pythonflex-0.1.6/src/pythonflex/plotting.py +0 -512
- pythonflex-0.1.6/test/test_corrected_auc.py +0 -33
- pythonflex-0.1.6/test/test_inputs.py +0 -44
- {pythonflex-0.1.6 → pythonflex-0.2.1}/.python-version +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/README.md +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/dataset/neuroblastoma_cell_lines_500_genes.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/CORUM.parquet +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/GOBP.parquet +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/PATHWAY.parquet +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/corum.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/gobp.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/data/gold_standard/pathway.csv +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/examples/dataset_filtering.py +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/logging_config.py +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/src/pythonflex/preprocessing.py +0 -0
- {pythonflex-0.1.6 → pythonflex-0.2.1}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pythonflex
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
|
|
5
5
|
Author-email: Yasir Demirtaş <tyasird@hotmail.com>
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pythonflex"
|
|
3
|
-
version = "0.1
|
|
3
|
+
version = "0.2.1"
|
|
4
4
|
description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
from .logging_config import log
|
|
3
3
|
from .utils import dsave, dload
|
|
4
4
|
from .preprocessing import get_example_data_path, load_datasets, get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
|
|
5
|
-
from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv
|
|
5
|
+
from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config
|
|
6
6
|
from .plotting import (
|
|
7
7
|
adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
|
|
8
8
|
plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores
|
|
@@ -14,5 +14,5 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
|
|
|
14
14
|
"perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
|
|
15
15
|
"drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
|
|
16
16
|
"plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
|
|
17
|
-
"plot_significant_complexes", "plot_auc_scores", "save_results_to_csv"
|
|
17
|
+
"plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config"
|
|
18
18
|
]
|
|
@@ -23,7 +23,7 @@ from .logging_config import log
|
|
|
23
23
|
from .preprocessing import filter_matrix_by_genes
|
|
24
24
|
from .utils import dsave, dload, _sanitize
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
import matplotlib as mpl
|
|
27
27
|
|
|
28
28
|
def deep_update(source, overrides):
|
|
29
29
|
"""Recursively update the source dict with the overrides."""
|
|
@@ -40,7 +40,7 @@ def initialize(config={}):
|
|
|
40
40
|
|
|
41
41
|
default_config = {
|
|
42
42
|
"min_genes_in_complex": 3,
|
|
43
|
-
"min_genes_per_complex_analysis":
|
|
43
|
+
"min_genes_per_complex_analysis": 2,
|
|
44
44
|
"output_folder": "output",
|
|
45
45
|
"gold_standard": "CORUM",
|
|
46
46
|
"color_map": "RdYlBu",
|
|
@@ -48,7 +48,7 @@ def initialize(config={}):
|
|
|
48
48
|
"plotting": {
|
|
49
49
|
"save_plot": True,
|
|
50
50
|
"show_plot": True,
|
|
51
|
-
"output_type": "
|
|
51
|
+
"output_type": "pdf",
|
|
52
52
|
},
|
|
53
53
|
"preprocessing": {
|
|
54
54
|
"normalize": False,
|
|
@@ -95,31 +95,105 @@ def initialize(config={}):
|
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
'
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def update_matploblib_config(config=None, font_family="Arial", layout="single"):
|
|
101
|
+
"""
|
|
102
|
+
Configure matplotlib settings optimized for Nature journal figures:
|
|
103
|
+
- 7 pt fonts (labels, ticks, legend), 9 pt titles
|
|
104
|
+
- Thin spines (0.5 pt), ticks out (left/bottom only), no minor ticks
|
|
105
|
+
- No grid, clean minimalist look
|
|
106
|
+
- Colorblind-friendly Tableau 10 color cycle
|
|
107
|
+
- Illustrator-safe PDF export (Type 42)
|
|
108
|
+
- Figure sizes: "single" (~89 mm), "double" (~183 mm), or custom (width, height) in inches
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
config (dict, optional): Configuration dict (e.g., {'color_map': 'RdYlBu'}).
|
|
112
|
+
font_family (str): Preferred font (e.g., 'Arial', falls back to 'Helvetica').
|
|
113
|
+
layout (str or tuple): 'single' (~89 mm), 'double' (~183 mm), or (width, height) in inches.
|
|
114
|
+
"""
|
|
115
|
+
if config is None:
|
|
116
|
+
config = {}
|
|
117
|
+
# Fallback if chosen font missing
|
|
118
|
+
try:
|
|
119
|
+
from matplotlib.font_manager import findfont, FontProperties
|
|
120
|
+
findfont(FontProperties(family=font_family))
|
|
121
|
+
except Exception:
|
|
122
|
+
font_family = "Helvetica" # Nature prefers Helvetica if Arial unavailable
|
|
123
|
+
print(f"Warning: '{font_family}' not found, falling back to 'Helvetica'.")
|
|
124
|
+
|
|
125
|
+
# Figure size presets (Nature: single ≈ 89 mm, double ≈ 183 mm at 25.4 mm/inch)
|
|
126
|
+
if isinstance(layout, tuple):
|
|
127
|
+
fig_w, fig_h = layout
|
|
128
|
+
else:
|
|
129
|
+
if layout == "double":
|
|
130
|
+
fig_w = 7.2 # ~183 mm
|
|
131
|
+
fig_h = 5.4 # Adjusted aspect
|
|
132
|
+
else: # "single"
|
|
133
|
+
fig_w = 4.0 # Increased from 3.5" for more space (~102 mm)
|
|
134
|
+
fig_h = 3.0 # Increased from 2.6" for better aspect (~76 mm)
|
|
135
|
+
# Colorblind-friendly cycle (Tableau 10 adapted)
|
|
136
|
+
cb_cycle = [
|
|
137
|
+
"#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F",
|
|
138
|
+
"#EDC948", "#B07AA1", "#FF9DA7", "#9C755F", "#BAB0AC"
|
|
139
|
+
]
|
|
140
|
+
mpl.rcParams.update({
|
|
141
|
+
# --- Text & Fonts ---
|
|
142
|
+
"text.usetex": False, # Avoid LaTeX
|
|
143
|
+
"font.family": [font_family], # Explicit font
|
|
144
|
+
"mathtext.fontset": "dejavusans", # Disable mathtext
|
|
145
|
+
"mathtext.default": "regular", # Plain text
|
|
146
|
+
"axes.unicode_minus": True, # Proper minus signs
|
|
147
|
+
# --- Sizes (7 pt baseline, adjusted for space) ---
|
|
148
|
+
"font.size": 7, # Reduced from 8 pt
|
|
149
|
+
"axes.titlesize": 9, # Reduced from 10 pt
|
|
150
|
+
"axes.labelsize": 7,
|
|
151
|
+
"legend.fontsize": 7,
|
|
152
|
+
"xtick.labelsize": 7,
|
|
153
|
+
"ytick.labelsize": 7,
|
|
154
|
+
# --- Lines & Markers ---
|
|
155
|
+
"lines.linewidth": 1.5, # Kept for data visibility
|
|
156
|
+
"lines.markersize": 4.0,
|
|
157
|
+
"patch.linewidth": 0.5,
|
|
158
|
+
"errorbar.capsize": 2,
|
|
159
|
+
# --- Axes, Spines, Ticks ---
|
|
160
|
+
"axes.linewidth": 0.5,
|
|
161
|
+
"axes.edgecolor": "black",
|
|
162
|
+
"axes.facecolor": "none",
|
|
163
|
+
"axes.titlepad": 3.0,
|
|
164
|
+
"axes.labelpad": 2.0,
|
|
165
|
+
"axes.prop_cycle": mpl.cycler(color=cb_cycle),
|
|
166
|
+
"xtick.direction": "out",
|
|
167
|
+
"ytick.direction": "out",
|
|
168
|
+
"xtick.major.size": 2.5,
|
|
169
|
+
"ytick.major.size": 2.5,
|
|
170
|
+
"xtick.minor.visible": False,
|
|
171
|
+
"ytick.minor.visible": False,
|
|
172
|
+
"xtick.major.width": 0.5,
|
|
173
|
+
"ytick.major.width": 0.5,
|
|
174
|
+
"xtick.top": False,
|
|
175
|
+
"ytick.right": False,
|
|
176
|
+
# --- Grid ---
|
|
177
|
+
"axes.grid": False,
|
|
178
|
+
# --- Legend ---
|
|
179
|
+
"legend.frameon": False,
|
|
180
|
+
"legend.handlelength": 1.6, # Slightly adjusted
|
|
181
|
+
"legend.handletextpad": 0.4,
|
|
182
|
+
"legend.borderaxespad": 0.3,
|
|
183
|
+
"legend.loc": "best", # Dynamic placement to avoid overlap
|
|
184
|
+
# --- Figure & Save ---
|
|
185
|
+
"figure.dpi": 600,
|
|
186
|
+
"figure.figsize": (fig_w, fig_h),
|
|
187
|
+
"savefig.dpi": 600,
|
|
188
|
+
"savefig.bbox": "tight",
|
|
189
|
+
"savefig.pad_inches": 0.1, # Increased for spacing
|
|
190
|
+
"savefig.transparent": False, # White background
|
|
191
|
+
# --- PDF/SVG Export ---
|
|
192
|
+
"pdf.fonttype": 42,
|
|
193
|
+
"ps.fonttype": 42,
|
|
194
|
+
"pdf.use14corefonts": False,
|
|
195
|
+
"svg.fonttype": "none",
|
|
121
196
|
})
|
|
122
|
-
log.done("Matplotlib settings updated.")
|
|
123
197
|
|
|
124
198
|
|
|
125
199
|
|
|
@@ -22,7 +22,6 @@ inputs = {
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
#%%
|
|
25
|
-
|
|
26
25
|
default_config = {
|
|
27
26
|
"min_genes_in_complex": 0,
|
|
28
27
|
"min_genes_per_complex_analysis": 3,
|
|
@@ -32,7 +31,7 @@ default_config = {
|
|
|
32
31
|
"jaccard": True,
|
|
33
32
|
"plotting": {
|
|
34
33
|
"save_plot": True,
|
|
35
|
-
"output_type": "
|
|
34
|
+
"output_type": "pdf",
|
|
36
35
|
},
|
|
37
36
|
"preprocessing": {
|
|
38
37
|
"fill_na": True,
|
|
@@ -48,7 +47,6 @@ default_config = {
|
|
|
48
47
|
flex.initialize(default_config)
|
|
49
48
|
|
|
50
49
|
# Load datasets and gold standard terms
|
|
51
|
-
|
|
52
50
|
data, _ = flex.load_datasets(inputs)
|
|
53
51
|
terms, genes_in_terms = flex.load_gold_standard()
|
|
54
52
|
|
|
@@ -59,6 +57,7 @@ for name, dataset in data.items():
|
|
|
59
57
|
pra = flex.pra(name, dataset, is_corr=False)
|
|
60
58
|
fpc = flex.pra_percomplex(name, dataset, is_corr=False)
|
|
61
59
|
cc = flex.complex_contributions(name)
|
|
60
|
+
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
#%%
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#%%
|
|
2
|
+
import pythonflex as flex
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
# # Define specific cell line types you're interested in
|
|
6
|
+
DATA_DIR = "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/"
|
|
7
|
+
|
|
8
|
+
# Specific cell lines of interest with "_cell_lines" suffix removed
|
|
9
|
+
cell_line_files = [
|
|
10
|
+
"soft_tissue_cell_lines.csv",
|
|
11
|
+
"skin_cell_lines.csv",
|
|
12
|
+
# "lung_cell_lines.csv",
|
|
13
|
+
# "head_and_neck_cell_lines.csv",
|
|
14
|
+
# "esophagus_stomach_cell_lines.csv",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
inputs = {}
|
|
18
|
+
|
|
19
|
+
# Create inputs dict with shortened names (removing "_cell_lines" suffix)
|
|
20
|
+
for filename in cell_line_files:
|
|
21
|
+
# Remove .csv extension and _cell_lines suffix
|
|
22
|
+
key = filename.replace("_cell_lines.csv", "")
|
|
23
|
+
full_path = os.path.join(DATA_DIR, filename)
|
|
24
|
+
|
|
25
|
+
inputs[key] = {
|
|
26
|
+
"path": full_path,
|
|
27
|
+
"sort": "high"
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
inputs['depmap'] = {
|
|
31
|
+
"path": "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv",
|
|
32
|
+
"sort": "high"
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Print the resulting inputs dictionary
|
|
36
|
+
print("Configured inputs:")
|
|
37
|
+
for key, value in inputs.items():
|
|
38
|
+
print(f" {key}: {value['path']}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
default_config = {
|
|
43
|
+
"min_genes_in_complex": 2,
|
|
44
|
+
"min_genes_per_complex_analysis": 2,
|
|
45
|
+
"output_folder": "25q2_min_genes_2",
|
|
46
|
+
"gold_standard": "CORUM",
|
|
47
|
+
"color_map": "RdYlBu",
|
|
48
|
+
"jaccard": True,
|
|
49
|
+
"plotting": {
|
|
50
|
+
"save_plot": True,
|
|
51
|
+
"output_type": "pdf",
|
|
52
|
+
},
|
|
53
|
+
"preprocessing": {
|
|
54
|
+
"fill_na": True,
|
|
55
|
+
"normalize": False,
|
|
56
|
+
},
|
|
57
|
+
"corr_function": "numpy",
|
|
58
|
+
"logging": {
|
|
59
|
+
"visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Initialize logger, config, and output folder
|
|
64
|
+
flex.initialize(default_config)
|
|
65
|
+
|
|
66
|
+
# Load datasets and gold standard terms
|
|
67
|
+
data, _ = flex.load_datasets(inputs)
|
|
68
|
+
terms, genes_in_terms = flex.load_gold_standard()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#%%
|
|
72
|
+
# Run analysis
|
|
73
|
+
for name, dataset in data.items():
|
|
74
|
+
pra = flex.pra(name, dataset, is_corr=False)
|
|
75
|
+
fpc = flex.pra_percomplex(name, dataset, is_corr=False)
|
|
76
|
+
cc = flex.complex_contributions(name)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
#%%
|
|
81
|
+
# Generate plots
|
|
82
|
+
flex.plot_auc_scores()
|
|
83
|
+
flex.plot_precision_recall_curve()
|
|
84
|
+
flex.plot_percomplex_scatter()
|
|
85
|
+
flex.plot_percomplex_scatter_bysize()
|
|
86
|
+
flex.plot_significant_complexes()
|
|
87
|
+
flex.plot_complex_contributions()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
#%%
|
|
91
|
+
# Save results to CSV
|
|
92
|
+
flex.save_results_to_csv()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
#%%
|
|
103
|
+
|
|
104
|
+
|