pythonflex 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+
10
+ bfg-*.jar
11
+
12
+ # Virtual environments
13
+ .venv
14
+ **/result.pkl
15
+ examples/output/
16
+ src/benchmarkcr/examples/output/
17
+ .aider*
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: pythonflex
3
+ Version: 0.1.1
4
+ Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
5
+ Author-email: Yasir Demirtaş <tyasird@hotmail.com>
6
+ Requires-Python: >=3.9
7
+ Requires-Dist: adjusttext
8
+ Requires-Dist: art
9
+ Requires-Dist: bitarray
10
+ Requires-Dist: emoji
11
+ Requires-Dist: importlib-resources
12
+ Requires-Dist: ipython
13
+ Requires-Dist: joblib
14
+ Requires-Dist: loguru
15
+ Requires-Dist: matplotlib
16
+ Requires-Dist: numba
17
+ Requires-Dist: numpy
18
+ Requires-Dist: pandas
19
+ Requires-Dist: pyarrow
20
+ Requires-Dist: python-slugify
21
+ Requires-Dist: scikit-learn
22
+ Requires-Dist: scipy
23
+ Requires-Dist: tqdm
24
+ Description-Content-Type: text/markdown
25
+
26
+ # pythonFLEX
27
+
28
+ 🧬 **pythonFLEX** is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. It provides precision-recall analysis using reference gene sets from CORUM protein complexes, Gene Ontology Biological Processes (GO-BP), KEGG pathways, and other curated resources. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
29
+
30
+
31
+ ---
32
+
33
+ ## 🔧 Features
34
+
35
+ - Precision-recall curve generation for ranked gene lists
36
+
37
+ - Evaluation using CORUM complexes, GO terms, pathways
38
+
39
+ - Complex-level resolution analysis and visualization
40
+
41
+ - Easy integration into CRISPR screen workflows
42
+
43
+ ---
44
+
45
+ ## 📦 Installation
46
+
47
+ Suggested to use Python version `3.10` with `virtual env`.
48
+
49
+ Create `venv`
50
+
51
+ ```bash
52
+ conda create -n p310 python=3.10
53
+ conda activate p310
54
+ pip install uv
55
+ ```
56
+
57
+ Install pythonFLEX via pip
58
+
59
+ ``` bash
60
+ uv pip install pythonflex
61
+ ```
62
+
63
+ or
64
+
65
+ ```bash
66
+ pip install pythonflex
67
+ ```
68
+
69
+ or Install pythonFLEX via git (to develop package in local)
70
+
71
+ ```bash
72
+ git clone https://github.com/tyasird/pythonFLEX.git
73
+ cd pythonFLEX
74
+ uv pip install -e .
75
+ ```
76
+
77
+
78
+
79
+ ---
80
+
81
+ ## 🚀 Quickstart
82
+
83
+ ```python
84
+
85
+ import pythonflex as flex
86
+
87
+ inputs = {
88
+ "Melanoma (63 Screens)": {
89
+ "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
90
+ "sort": "high"
91
+ },
92
+ "Liver (24 Screens)": {
93
+ "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
94
+ "sort": "high"
95
+ },
96
+ "Neuroblastoma (37 Screens)": {
97
+ "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
98
+ "sort": "high"
99
+ },
100
+ }
101
+
102
+
103
+
104
+ default_config = {
105
+ "min_genes_in_complex": 2,
106
+ "min_genes_per_complex_analysis": 2,
107
+ "output_folder": "output",
108
+ "gold_standard": "GOBP",
109
+ "color_map": "RdYlBu",
110
+ "jaccard": True,
111
+ "plotting": {
112
+ "save_plot": True,
113
+ "output_type": "png",
114
+ },
115
+ "preprocessing": {
116
+ "fill_na": True,
117
+ "normalize": False,
118
+ },
119
+ "corr_function": "numpy",
120
+ "logging": {
121
+ "visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
122
+ }
123
+ }
124
+
125
+
126
+ # Initialize logger, config, and output folder
127
+ flex.initialize(default_config)
128
+
129
+ # Load datasets and gold standard terms
130
+ data, _ = flex.load_datasets(inputs)
131
+ terms, genes_in_terms = flex.load_gold_standard()
132
+
133
+ # Run analysis
134
+ for name, dataset in data.items():
135
+ df, pr_auc = flex.pra(name, dataset)
136
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
137
+ cc = flex.complex_contributions(name)
138
+
139
+ # Generate plots
140
+ flex.plot_auc_scores()
141
+ flex.plot_precision_recall_curve()
142
+ flex.plot_percomplex_scatter()
143
+ flex.plot_percomplex_scatter_bysize()
144
+ flex.plot_significant_complexes()
145
+ flex.plot_complex_contributions()
146
+
147
+ # Save Result CSVspyflex.save_results_to_csv()
148
+ flex.save_results_to_csv()
149
+
150
+
151
+ ```
152
+
153
+ ---
154
+
155
+ ## 📂 Examples
156
+
157
+ - [src/pythonflex/examples/basic_usage.py](src/pythonflex/examples/basic_usage.py)
158
+
159
+ ---
160
+
161
+ ## 📃 License
162
+
163
+ MIT
@@ -0,0 +1,138 @@
1
+ # pythonFLEX
2
+
3
+ 🧬 **pythonFLEX** is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. It provides precision-recall analysis using reference gene sets from CORUM protein complexes, Gene Ontology Biological Processes (GO-BP), KEGG pathways, and other curated resources. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
4
+
5
+
6
+ ---
7
+
8
+ ## 🔧 Features
9
+
10
+ - Precision-recall curve generation for ranked gene lists
11
+
12
+ - Evaluation using CORUM complexes, GO terms, pathways
13
+
14
+ - Complex-level resolution analysis and visualization
15
+
16
+ - Easy integration into CRISPR screen workflows
17
+
18
+ ---
19
+
20
+ ## 📦 Installation
21
+
22
+ Suggested to use Python version `3.10` with `virtual env`.
23
+
24
+ Create `venv`
25
+
26
+ ```bash
27
+ conda create -n p310 python=3.10
28
+ conda activate p310
29
+ pip install uv
30
+ ```
31
+
32
+ Install pythonFLEX via pip
33
+
34
+ ``` bash
35
+ uv pip install pythonflex
36
+ ```
37
+
38
+ or
39
+
40
+ ```bash
41
+ pip install pythonflex
42
+ ```
43
+
44
+ or Install pythonFLEX via git (to develop package in local)
45
+
46
+ ```bash
47
+ git clone https://github.com/tyasird/pythonFLEX.git
48
+ cd pythonFLEX
49
+ uv pip install -e .
50
+ ```
51
+
52
+
53
+
54
+ ---
55
+
56
+ ## 🚀 Quickstart
57
+
58
+ ```python
59
+
60
+ import pythonflex as flex
61
+
62
+ inputs = {
63
+ "Melanoma (63 Screens)": {
64
+ "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
65
+ "sort": "high"
66
+ },
67
+ "Liver (24 Screens)": {
68
+ "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
69
+ "sort": "high"
70
+ },
71
+ "Neuroblastoma (37 Screens)": {
72
+ "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
73
+ "sort": "high"
74
+ },
75
+ }
76
+
77
+
78
+
79
+ default_config = {
80
+ "min_genes_in_complex": 2,
81
+ "min_genes_per_complex_analysis": 2,
82
+ "output_folder": "output",
83
+ "gold_standard": "GOBP",
84
+ "color_map": "RdYlBu",
85
+ "jaccard": True,
86
+ "plotting": {
87
+ "save_plot": True,
88
+ "output_type": "png",
89
+ },
90
+ "preprocessing": {
91
+ "fill_na": True,
92
+ "normalize": False,
93
+ },
94
+ "corr_function": "numpy",
95
+ "logging": {
96
+ "visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
97
+ }
98
+ }
99
+
100
+
101
+ # Initialize logger, config, and output folder
102
+ flex.initialize(default_config)
103
+
104
+ # Load datasets and gold standard terms
105
+ data, _ = flex.load_datasets(inputs)
106
+ terms, genes_in_terms = flex.load_gold_standard()
107
+
108
+ # Run analysis
109
+ for name, dataset in data.items():
110
+ df, pr_auc = flex.pra(name, dataset)
111
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
112
+ cc = flex.complex_contributions(name)
113
+
114
+ # Generate plots
115
+ flex.plot_auc_scores()
116
+ flex.plot_precision_recall_curve()
117
+ flex.plot_percomplex_scatter()
118
+ flex.plot_percomplex_scatter_bysize()
119
+ flex.plot_significant_complexes()
120
+ flex.plot_complex_contributions()
121
+
122
+ # Save Result CSVspyflex.save_results_to_csv()
123
+ flex.save_results_to_csv()
124
+
125
+
126
+ ```
127
+
128
+ ---
129
+
130
+ ## 📂 Examples
131
+
132
+ - [src/pythonflex/examples/basic_usage.py](src/pythonflex/examples/basic_usage.py)
133
+
134
+ ---
135
+
136
+ ## 📃 License
137
+
138
+ MIT
@@ -0,0 +1,63 @@
1
+ [project]
2
+ name = "pythonflex"
3
+ version = "0.1.1"
4
+ description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Yasir Demirtaş", email = "tyasird@hotmail.com" }
8
+ ]
9
+ requires-python = ">=3.9"
10
+
11
+
12
+ # Exclude the input folder
13
+ exclude = ["src/pythonflex/input/*", "src/pythonflex/output/*", "src/pythonflex/examples/output/*",
14
+ "src/pythonflex/examples/.tmp/*"]
15
+
16
+
17
+ dependencies = [
18
+ "adjustText",
19
+ "art",
20
+ "bitarray",
21
+ "emoji",
22
+ "ipython",
23
+ "joblib",
24
+ "loguru",
25
+ "matplotlib",
26
+ "numba",
27
+ "numpy",
28
+ "pandas",
29
+ "pyarrow",
30
+ "python-slugify",
31
+ "scikit-learn",
32
+ "scipy",
33
+ "tqdm",
34
+ "importlib-resources" # <- Only needed for Python < 3.9
35
+ ]
36
+
37
+ [project.scripts]
38
+ pythonflex = "pythonflex:main"
39
+
40
+ [build-system]
41
+ requires = ["hatchling"]
42
+ build-backend = "hatchling.build"
43
+
44
+
45
+ [tool.ruff]
46
+ ignore = ["F541"]
47
+
48
+ [tool.setuptools.package-data]
49
+ pythonflex = ["data/**/*.parquet", "data/**/*.csv", "data/**/*.tsv", "data/**/*.json", "data/**/*.txt", "data/**/*.xlsx", "data/**/*.xls", "data/**/*.h5", "data/**/*.hdf5"]
50
+
51
+ [tool.hatch.build]
52
+ exclude = ["**/result.pkl", "examples/output"]
53
+
54
+ [tool.hatch.build.targets.wheel]
55
+ packages = ["pythonFLEX"]
56
+
57
+ [tool.uv.sources]
58
+ pythonflex = { workspace = true }
59
+
60
+ [dependency-groups]
61
+ dev = [
62
+ "pythonflex",
63
+ ]
@@ -0,0 +1,18 @@
1
+
2
+ from .logging_config import log
3
+ from .utils import dsave, dload
4
+ from .preprocessing import get_example_data_path, load_datasets, get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
5
+ from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv
6
+ from .plotting import (
7
+ adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
8
+ plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores
9
+ )
10
+
11
+ __all__ = [ "log", "get_example_data_path", "fast_corr",
12
+ "initialize", "dsave", "dload", "load_datasets", "get_common_genes",
13
+ "filter_matrix_by_genes", "load_gold_standard", "filter_duplicate_terms", "pra", "pra_percomplex",
14
+ "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
15
+ "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
16
+ "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
17
+ "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv"
18
+ ]