cellitac 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cellitac-1.0.0/LICENSE ADDED
@@ -0,0 +1,17 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Rana Hamed, Syrus, Emmanuel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: cellitac
3
+ Version: 1.0.0
4
+ Summary: Cell type identification using Transcription factor Analysis and Chromatin accessibility
5
+ Author-email: Rana H Abuzeid <ranahamed2111@gmail.com>, Olaitan Awe <laitanawe@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/omicscodeathon/cellitac/
8
+ Keywords: single-cell,scATAC-seq,scRNA-seq,multiome,cell-type-identification,transcription-factor,chromatin-accessibility,machine-learning
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: numpy>=1.24
21
+ Requires-Dist: pandas>=2.0
22
+ Requires-Dist: openpyxl>=3.1
23
+ Requires-Dist: rpy2>=3.5
24
+ Requires-Dist: scikit-learn>=1.3
25
+ Requires-Dist: xgboost>=2.0
26
+ Requires-Dist: imbalanced-learn>=0.11
27
+ Requires-Dist: sklearn-compat>=0.1.5
28
+ Requires-Dist: matplotlib>=3.7
29
+ Requires-Dist: seaborn>=0.12
30
+ Requires-Dist: plotly>=5.18
31
+ Requires-Dist: networkx>=3.1
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0; extra == "dev"
34
+ Requires-Dist: pytest-cov; extra == "dev"
35
+ Requires-Dist: black; extra == "dev"
36
+ Requires-Dist: ruff; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ # scatactf
40
+
41
+ Single-Cell ATAC + RNA Multiome Processing & ML Classification Pipeline
42
+
43
+ ---
44
+
45
+ ## What It Does
46
+
47
+ | Stage | Steps | Tools |
48
+ |-------|-------|-------|
49
+ | **Preprocessing** | RNA QC → normalization → cell-type annotation | Seurat + SingleR (R via rpy2) |
50
+ | **Preprocessing** | ATAC QC → TF-IDF → LSI | Signac (R via rpy2) |
51
+ | **Preprocessing** | RNA + ATAC integration → ML-ready CSVs | Pure Python |
52
+ | **ML** | Imbalance analysis → SMOTE → feature selection | scikit-learn, imbalanced-learn |
53
+ | **ML** | RF + XGBoost + SVM training & evaluation | scikit-learn, xgboost |
54
+ | **ML** | 19 plots + JSON report + XLSX | matplotlib, seaborn, networkx |
55
+
56
+ ---
57
+
58
+ ## Installation
59
+
60
+ ### Option A – Local / Team (pip install -e)
61
+
62
+ ```bash
63
+ git clone https://github.com/your-org/scatactf.git
64
+ cd scatactf
65
+
66
+ # Install R packages (run once inside R)
67
+ Rscript -e "
68
+ install.packages('BiocManager')
69
+ BiocManager::install(c(
70
+ 'Seurat', 'Signac', 'SingleR', 'celldex',
71
+ 'SingleCellExperiment', 'GenomicRanges',
72
+ 'EnsDb.Hsapiens.v75', 'biovizBase', 'hdf5r'
73
+ ))
74
+ "
75
+
76
+ # Install Python package
77
+ pip install -e ".[dev]"
78
+ ```
79
+
80
+ ### Option B – PyPI
81
+
82
+ ```bash
83
+ pip install scatactf
84
+ # R must be installed separately
85
+ ```
86
+
87
+ ### Option C – Docker (recommended for full reproducibility)
88
+
89
+ ```bash
90
+ docker build -t scatactf:1.0.0 -f docker/Dockerfile .
91
+
92
+ docker run --rm \
93
+ -v /your/data:/data \
94
+ -v $(pwd)/results:/results \
95
+ scatactf:1.0.0 \
96
+ --input /data --output /results
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Data Download
102
+
103
+ https://www.10xgenomics.com/datasets/pbmc-from-a-healthy-donor-no-cell-sorting-10-k-1-standard-1-0-0
104
+
105
+ Required files (place in your `--input` directory):
106
+ ```
107
+ pbmc_unsorted_10k_filtered_feature_bc_matrix.h5
108
+ pbmc_unsorted_10k_per_barcode_metrics.csv
109
+ pbmc_unsorted_10k_atac_fragments.tsv.gz
110
+ pbmc_unsorted_10k_atac_fragments.tsv.gz.tbi
111
+ pbmc_unsorted_10k_atac_peaks.bed
112
+ ```
113
+
114
+ ---
115
+
116
+ ## Usage
117
+
118
+ ### Command Line
119
+
120
+ ```bash
121
+ # Full pipeline (preprocessing + ML)
122
+ scatactf --input ~/singlecell/ATAC --output my_results
123
+
124
+ # Preprocessing only (generates python_ready_data/)
125
+ scatactf-preprocess --input ~/singlecell/ATAC --output my_results
126
+
127
+ # ML only (if you already have python_ready_data/)
128
+ scatactf-model --data my_results/python_ready_data --output my_results/ml
129
+ ```
130
+
131
+ ### Python API
132
+
133
+ ```python
134
+ from scatactf import run_full_pipeline, run_preprocessing, run_model
135
+
136
+ # Full pipeline
137
+ run_full_pipeline(input_dir="~/singlecell/ATAC", output_dir="my_results")
138
+
139
+ # Preprocessing only
140
+ run_preprocessing(input_dir="~/singlecell/ATAC", output_dir_python="python_ready_data")
141
+
142
+ # ML only
143
+ run_model(data_dir="python_ready_data", output_dir="ml_results")
144
+
145
+ # Use the ML class directly for more control
146
+ from scatactf.mainModel import scATACMLPipeline
147
+ pipeline = scATACMLPipeline(data_dir="python_ready_data", output_dir="ml_results")
148
+ pipeline.run_complete_pipeline()
149
+ ```
150
+
151
+ ### Environment Variables
152
+
153
+ ```bash
154
+ export SCATAC_INPUT_DIR=~/singlecell/ATAC
155
+ export SCATAC_OUT_ML=ml_results
156
+ scatactf
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Output Files
162
+
163
+ ### ml_results/
164
+ | File | Description |
165
+ |------|-------------|
166
+ | `ml_pipeline_report.json` | Full JSON report |
167
+ | `model_performance_summary.csv` | Accuracy/F1/AUC per model |
168
+ | `detailed_model_results.xlsx` | Per-class metrics, CV results |
169
+ | `model_performance_comparison.png` | Bar chart comparison |
170
+ | `confusion_matrices.png` | Confusion matrices |
171
+ | `class_distribution_analysis.png` | Cell type distribution |
172
+ | `class_balancing_comparison.png` | Before/after SMOTE |
173
+ | `feature_importance.png` | RF + XGBoost top 20 features |
174
+ | `simple_feature_heatmap.png` | Feature importance heatmap |
175
+ | `overfitting_analysis.png` | CV train vs validation |
176
+ | `learning_curves.png` | Learning curves per model |
177
+ | `performance_radar.png` | Radar chart |
178
+ | `feature_distributions.png` | Violin plots |
179
+ | `class_separation_pca.png` | PCA scatter |
180
+ | `basic_tf_network.png` | Feature–cell-type network |
181
+
182
+ ---
183
+
184
+ ## Package Structure
185
+
186
+ ```
187
+ scatactf/
188
+ ├── src/scatactf/
189
+ │ ├── __init__.py # Public API
190
+ │ ├── _version.py
191
+ │ ├── config.py # All parameters (paths, QC thresholds, ML hyperparams)
192
+ │ ├── pipeline.py # run_preprocessing, run_model, run_full_pipeline
193
+ │ ├── preprocessing.py # R preprocessing via rpy2
194
+ │ ├── mainModel.py # scATACMLPipeline class (19-step ML pipeline)
195
+ │ ├── cli.py # scatactf / scatactf-preprocess / scatactf-model
196
+ │ └── rscripts/
197
+ │ ├── team1_rna.R # Exact Seurat + SingleR code
198
+ │ └── team2_atac.R # Exact Signac code
199
+ ├── tests/
200
+ │ └── test_model.py
201
+ ├── pyproject.toml
202
+ └── README.md
203
+ ```
204
+
205
+ ---
206
+
207
+ ## Tests
208
+
209
+ ```bash
210
+ pip install -e ".[dev]"
211
+ pytest tests/ -v
212
+ ```
213
+
214
+ ---
215
+
216
+ ## License
217
+
218
+ MIT
@@ -0,0 +1,180 @@
1
+ # scatactf
2
+
3
+ Single-Cell ATAC + RNA Multiome Processing & ML Classification Pipeline
4
+
5
+ ---
6
+
7
+ ## What It Does
8
+
9
+ | Stage | Steps | Tools |
10
+ |-------|-------|-------|
11
+ | **Preprocessing** | RNA QC → normalization → cell-type annotation | Seurat + SingleR (R via rpy2) |
12
+ | **Preprocessing** | ATAC QC → TF-IDF → LSI | Signac (R via rpy2) |
13
+ | **Preprocessing** | RNA + ATAC integration → ML-ready CSVs | Pure Python |
14
+ | **ML** | Imbalance analysis → SMOTE → feature selection | scikit-learn, imbalanced-learn |
15
+ | **ML** | RF + XGBoost + SVM training & evaluation | scikit-learn, xgboost |
16
+ | **ML** | 19 plots + JSON report + XLSX | matplotlib, seaborn, networkx |
17
+
18
+ ---
19
+
20
+ ## Installation
21
+
22
+ ### Option A – Local / Team (pip install -e)
23
+
24
+ ```bash
25
+ git clone https://github.com/your-org/scatactf.git
26
+ cd scatactf
27
+
28
+ # Install R packages (run once inside R)
29
+ Rscript -e "
30
+ install.packages('BiocManager')
31
+ BiocManager::install(c(
32
+ 'Seurat', 'Signac', 'SingleR', 'celldex',
33
+ 'SingleCellExperiment', 'GenomicRanges',
34
+ 'EnsDb.Hsapiens.v75', 'biovizBase', 'hdf5r'
35
+ ))
36
+ "
37
+
38
+ # Install Python package
39
+ pip install -e ".[dev]"
40
+ ```
41
+
42
+ ### Option B – PyPI
43
+
44
+ ```bash
45
+ pip install scatactf
46
+ # R must be installed separately
47
+ ```
48
+
49
+ ### Option C – Docker (recommended for full reproducibility)
50
+
51
+ ```bash
52
+ docker build -t scatactf:1.0.0 -f docker/Dockerfile .
53
+
54
+ docker run --rm \
55
+ -v /your/data:/data \
56
+ -v $(pwd)/results:/results \
57
+ scatactf:1.0.0 \
58
+ --input /data --output /results
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Data Download
64
+
65
+ https://www.10xgenomics.com/datasets/pbmc-from-a-healthy-donor-no-cell-sorting-10-k-1-standard-1-0-0
66
+
67
+ Required files (place in your `--input` directory):
68
+ ```
69
+ pbmc_unsorted_10k_filtered_feature_bc_matrix.h5
70
+ pbmc_unsorted_10k_per_barcode_metrics.csv
71
+ pbmc_unsorted_10k_atac_fragments.tsv.gz
72
+ pbmc_unsorted_10k_atac_fragments.tsv.gz.tbi
73
+ pbmc_unsorted_10k_atac_peaks.bed
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Usage
79
+
80
+ ### Command Line
81
+
82
+ ```bash
83
+ # Full pipeline (preprocessing + ML)
84
+ scatactf --input ~/singlecell/ATAC --output my_results
85
+
86
+ # Preprocessing only (generates python_ready_data/)
87
+ scatactf-preprocess --input ~/singlecell/ATAC --output my_results
88
+
89
+ # ML only (if you already have python_ready_data/)
90
+ scatactf-model --data my_results/python_ready_data --output my_results/ml
91
+ ```
92
+
93
+ ### Python API
94
+
95
+ ```python
96
+ from scatactf import run_full_pipeline, run_preprocessing, run_model
97
+
98
+ # Full pipeline
99
+ run_full_pipeline(input_dir="~/singlecell/ATAC", output_dir="my_results")
100
+
101
+ # Preprocessing only
102
+ run_preprocessing(input_dir="~/singlecell/ATAC", output_dir_python="python_ready_data")
103
+
104
+ # ML only
105
+ run_model(data_dir="python_ready_data", output_dir="ml_results")
106
+
107
+ # Use the ML class directly for more control
108
+ from scatactf.mainModel import scATACMLPipeline
109
+ pipeline = scATACMLPipeline(data_dir="python_ready_data", output_dir="ml_results")
110
+ pipeline.run_complete_pipeline()
111
+ ```
112
+
113
+ ### Environment Variables
114
+
115
+ ```bash
116
+ export SCATAC_INPUT_DIR=~/singlecell/ATAC
117
+ export SCATAC_OUT_ML=ml_results
118
+ scatactf
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Output Files
124
+
125
+ ### ml_results/
126
+ | File | Description |
127
+ |------|-------------|
128
+ | `ml_pipeline_report.json` | Full JSON report |
129
+ | `model_performance_summary.csv` | Accuracy/F1/AUC per model |
130
+ | `detailed_model_results.xlsx` | Per-class metrics, CV results |
131
+ | `model_performance_comparison.png` | Bar chart comparison |
132
+ | `confusion_matrices.png` | Confusion matrices |
133
+ | `class_distribution_analysis.png` | Cell type distribution |
134
+ | `class_balancing_comparison.png` | Before/after SMOTE |
135
+ | `feature_importance.png` | RF + XGBoost top 20 features |
136
+ | `simple_feature_heatmap.png` | Feature importance heatmap |
137
+ | `overfitting_analysis.png` | CV train vs validation |
138
+ | `learning_curves.png` | Learning curves per model |
139
+ | `performance_radar.png` | Radar chart |
140
+ | `feature_distributions.png` | Violin plots |
141
+ | `class_separation_pca.png` | PCA scatter |
142
+ | `basic_tf_network.png` | Feature–cell-type network |
143
+
144
+ ---
145
+
146
+ ## Package Structure
147
+
148
+ ```
149
+ scatactf/
150
+ ├── src/scatactf/
151
+ │ ├── __init__.py # Public API
152
+ │ ├── _version.py
153
+ │ ├── config.py # All parameters (paths, QC thresholds, ML hyperparams)
154
+ │ ├── pipeline.py # run_preprocessing, run_model, run_full_pipeline
155
+ │ ├── preprocessing.py # R preprocessing via rpy2
156
+ │ ├── mainModel.py # scATACMLPipeline class (19-step ML pipeline)
157
+ │ ├── cli.py # scatactf / scatactf-preprocess / scatactf-model
158
+ │ └── rscripts/
159
+ │ ├── team1_rna.R # Exact Seurat + SingleR code
160
+ │ └── team2_atac.R # Exact Signac code
161
+ ├── tests/
162
+ │ └── test_model.py
163
+ ├── pyproject.toml
164
+ └── README.md
165
+ ```
166
+
167
+ ---
168
+
169
+ ## Tests
170
+
171
+ ```bash
172
+ pip install -e ".[dev]"
173
+ pytest tests/ -v
174
+ ```
175
+
176
+ ---
177
+
178
+ ## License
179
+
180
+ MIT
@@ -0,0 +1,73 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "cellitac"
7
+ version = "1.0.0"
8
+ description = "Cell type identification using Transcription factor Analysis and Chromatin accessibility"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+
12
+ authors = [
13
+ {name = "Rana H Abuzeid", email = "ranahamed2111@gmail.com"},
14
+ {name = "Olaitan Awe", email = "laitanawe@gmail.com"}
15
+ ]
16
+ keywords = [
17
+ "single-cell", "scATAC-seq", "scRNA-seq", "multiome",
18
+ "cell-type-identification", "transcription-factor",
19
+ "chromatin-accessibility", "machine-learning"
20
+ ]
21
+
22
+ classifiers = [
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Operating System :: OS Independent",
29
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
30
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
31
+ ]
32
+ requires-python = ">=3.9"
33
+
34
+ dependencies = [
35
+ # Data
36
+ "numpy>=1.24",
37
+ "pandas>=2.0",
38
+ "openpyxl>=3.1",
39
+ # rpy2 bridge
40
+ "rpy2>=3.5",
41
+ # ML
42
+ "scikit-learn>=1.3",
43
+ "xgboost>=2.0",
44
+ "imbalanced-learn>=0.11",
45
+ "sklearn-compat>=0.1.5",
46
+ # Visualization
47
+ "matplotlib>=3.7",
48
+ "seaborn>=0.12",
49
+ "plotly>=5.18",
50
+ "networkx>=3.1",
51
+ ]
52
+
53
+ [project.optional-dependencies]
54
+ dev = [
55
+ "pytest>=7.0",
56
+ "pytest-cov",
57
+ "black",
58
+ "ruff",
59
+ ]
60
+
61
+ [project.scripts]
62
+ cellitac = "cellitac.cli:main"
63
+ cellitac-preprocess = "cellitac.cli:run_preprocess"
64
+ cellitac-model = "cellitac.cli:run_model"
65
+
66
+ [project.urls]
67
+ Homepage = "https://github.com/omicscodeathon/cellitac/"
68
+
69
+ [tool.setuptools.packages.find]
70
+ where = ["src"]
71
+
72
+ [tool.setuptools.package-data]
73
+ cellitac = ["rscripts/*.R"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,41 @@
1
+ """
2
+ cellitac
3
+ ========
4
+ Single-Cell ATAC + RNA Multiome Processing & ML Classification Pipeline.
5
+
6
+ This package runs two stages end-to-end:
7
+
8
+ Stage 1 – Preprocessing (R via rpy2)
9
+ • Team 1: RNA processing, QC, normalization, cell-type annotation
10
+ (Seurat + SingleR + celldex)
11
+ • Team 2: ATAC processing, QC, TF-IDF, LSI
12
+ (Signac + EnsDb)
13
+ • Integration: combine RNA + ATAC → ML-ready CSVs
14
+
15
+ Stage 2 – ML Classification (pure Python)
16
+ • Class imbalance analysis & SMOTE balancing
17
+ • Feature engineering & selection
18
+ • Random Forest, XGBoost, SVM training
19
+ • Evaluation, visualizations, reports
20
+
21
+ Quick start
22
+ -----------
23
+ >>> from cellitac import run_preprocessing, run_model, run_full_pipeline
24
+ >>> run_full_pipeline(input_dir="~/singlecell/ATAC", output_dir="results")
25
+
26
+ Or from the command line::
27
+
28
+ cellitac --input ~/singlecell/ATAC --output results
29
+ cellitac-preprocess --input ~/singlecell/ATAC
30
+ cellitac-model --data python_ready_data --output ml_results
31
+ """
32
+
33
+ from cellitac._version import __version__
34
+ from cellitac.pipeline import run_preprocessing, run_model, run_full_pipeline
35
+
36
+ __all__ = [
37
+ "run_preprocessing",
38
+ "run_model",
39
+ "run_full_pipeline",
40
+ "__version__",
41
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,147 @@
1
+ """
2
+ cellitac.cli
3
+ ============
4
+ Command-line interface for the cellitac package.
5
+
6
+ Installed entry points (pyproject.toml):
7
+ cellitac → run full pipeline (preprocessing + ML)
8
+ cellitac-preprocess → run preprocessing only (R steps)
9
+ cellitac-model → run ML model only (on existing python_ready_data)
10
+
11
+ Examples
12
+ --------
13
+ # Full pipeline
14
+ cellitac --input ~/singlecell/ATAC --output results/
15
+
16
+ # Preprocessing only
17
+ cellitac-preprocess --input ~/singlecell/ATAC --output results/
18
+
19
+ # ML only (after preprocessing)
20
+ cellitac-model --data results/python_ready_data --output results/ml_results
21
+ """
22
+
23
+ import argparse
24
+ import sys
25
+
26
+ from cellitac import config as cfg
27
+ from cellitac.pipeline import run_full_pipeline, run_preprocessing, run_model
28
+
29
+
30
+ # ============================================================================
31
+ # Full pipeline CLI
32
+ # ============================================================================
33
+ def main():
34
+ parser = argparse.ArgumentParser(
35
+ prog="cellitac",
36
+ description="scATAC + RNA Multiome Processing & ML Classification Pipeline",
37
+ formatter_class=argparse.RawDescriptionHelpFormatter,
38
+ epilog="""
39
+ Examples:
40
+ cellitac --input ~/singlecell/ATAC --output my_results
41
+ cellitac --input /data/pbmc10k
42
+ """,
43
+ )
44
+ parser.add_argument(
45
+ "--input", "-i",
46
+ default=cfg.INPUT_DIR,
47
+ metavar="DIR",
48
+ help=f"Raw 10x data directory (default: {cfg.INPUT_DIR})",
49
+ )
50
+ parser.add_argument(
51
+ "--output", "-o",
52
+ default="cellitac_results",
53
+ metavar="DIR",
54
+ help="Base output directory (default: cellitac_results)",
55
+ )
56
+ parser.add_argument(
57
+ "--version", "-v",
58
+ action="version",
59
+ version="cellitac 1.0.0",
60
+ )
61
+
62
+ args = parser.parse_args()
63
+ try:
64
+ success = run_full_pipeline(input_dir=args.input, output_dir=args.output)
65
+ sys.exit(0 if success else 1)
66
+ except KeyboardInterrupt:
67
+ print("\nInterrupted.", file=sys.stderr)
68
+ sys.exit(1)
69
+ except Exception as exc:
70
+ print(f"[ERROR] {exc}", file=sys.stderr)
71
+ sys.exit(1)
72
+
73
+
74
+ # ============================================================================
75
+ # Preprocessing-only CLI
76
+ # ============================================================================
77
+ def run_preprocess():
78
+ parser = argparse.ArgumentParser(
79
+ prog="cellitac-preprocess",
80
+ description="Run preprocessing only: RNA (Seurat) + ATAC (Signac) + Integration",
81
+ formatter_class=argparse.RawDescriptionHelpFormatter,
82
+ epilog="""
83
+ Examples:
84
+ cellitac-preprocess --input ~/singlecell/ATAC
85
+ cellitac-preprocess --input /data --output custom_results/
86
+ """,
87
+ )
88
+ parser.add_argument("--input", "-i", default=cfg.INPUT_DIR,
89
+ metavar="DIR", help="Raw data directory")
90
+ parser.add_argument("--output", "-o", default="cellitac_results",
91
+ metavar="DIR", help="Base output directory")
92
+ parser.add_argument("--version", action="version", version="cellitac 1.0.0")
93
+
94
+ args = parser.parse_args()
95
+
96
+ import os
97
+ team1_dir = os.path.join(args.output, "team1_rna_output")
98
+ team2_dir = os.path.join(args.output, "team2_atac_output")
99
+ python_dir = os.path.join(args.output, "python_ready_data")
100
+
101
+ try:
102
+ run_preprocessing(
103
+ input_dir=args.input,
104
+ output_dir_team1=team1_dir,
105
+ output_dir_team2=team2_dir,
106
+ output_dir_python=python_dir,
107
+ )
108
+ print(f"\nPreprocessing complete. ML-ready data in: {python_dir}")
109
+ sys.exit(0)
110
+ except KeyboardInterrupt:
111
+ print("\nInterrupted.", file=sys.stderr)
112
+ sys.exit(1)
113
+ except Exception as exc:
114
+ print(f"[ERROR] {exc}", file=sys.stderr)
115
+ sys.exit(1)
116
+
117
+
118
+ # ============================================================================
119
+ # ML-only CLI
120
+ # ============================================================================
121
+ def run_model_cli():
122
+ parser = argparse.ArgumentParser(
123
+ prog="cellitac-model",
124
+ description="Run ML classification on existing python_ready_data",
125
+ formatter_class=argparse.RawDescriptionHelpFormatter,
126
+ epilog="""
127
+ Examples:
128
+ cellitac-model --data python_ready_data --output ml_results
129
+ cellitac-model --data cellitac_results/python_ready_data
130
+ """,
131
+ )
132
+ parser.add_argument("--data", "-d", default=cfg.OUTPUT_DIR_PYTHON,
133
+ metavar="DIR", help="python_ready_data directory")
134
+ parser.add_argument("--output", "-o", default=cfg.OUTPUT_DIR_ML,
135
+ metavar="DIR", help="ML results output directory")
136
+ parser.add_argument("--version", action="version", version="cellitac 1.0.0")
137
+
138
+ args = parser.parse_args()
139
+ try:
140
+ success = run_model(data_dir=args.data, output_dir=args.output)
141
+ sys.exit(0 if success else 1)
142
+ except KeyboardInterrupt:
143
+ print("\nInterrupted.", file=sys.stderr)
144
+ sys.exit(1)
145
+ except Exception as exc:
146
+ print(f"[ERROR] {exc}", file=sys.stderr)
147
+ sys.exit(1)