iobrpy 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iobrpy-0.1.1/LICENSE +21 -0
- iobrpy-0.1.1/MANIFEST.in +6 -0
- iobrpy-0.1.1/PKG-INFO +365 -0
- iobrpy-0.1.1/README.md +315 -0
- iobrpy-0.1.1/pyproject.toml +54 -0
- iobrpy-0.1.1/setup.cfg +4 -0
- iobrpy-0.1.1/src/iobrpy/__init__.py +3 -0
- iobrpy-0.1.1/src/iobrpy/main.py +482 -0
- iobrpy-0.1.1/src/iobrpy/resources/IPS_genes.txt +163 -0
- iobrpy-0.1.1/src/iobrpy/resources/__init__.py +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/anno_eset.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/c2.cp.kegg.v2023.1.Hs.symbols.gmt +186 -0
- iobrpy-0.1.1/src/iobrpy/resources/c2.cp.reactome.v2023.1.Hs.symbols.gmt +1654 -0
- iobrpy-0.1.1/src/iobrpy/resources/calculate_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/common_genes.txt +10413 -0
- iobrpy-0.1.1/src/iobrpy/resources/count2tpm_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/epic_TRef_BRef.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/estimate_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/lm22.txt +548 -0
- iobrpy-0.1.1/src/iobrpy/resources/lr_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/mcp_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/resources/quantiseq_data.pkl +0 -0
- iobrpy-0.1.1/src/iobrpy/workflow/IPS.py +109 -0
- iobrpy-0.1.1/src/iobrpy/workflow/LR_cal.py +179 -0
- iobrpy-0.1.1/src/iobrpy/workflow/__init__.py +2 -0
- iobrpy-0.1.1/src/iobrpy/workflow/anno_eset.py +253 -0
- iobrpy-0.1.1/src/iobrpy/workflow/calculate_sig_score.py +332 -0
- iobrpy-0.1.1/src/iobrpy/workflow/cibersort.py +170 -0
- iobrpy-0.1.1/src/iobrpy/workflow/count2tpm.py +299 -0
- iobrpy-0.1.1/src/iobrpy/workflow/deside.py +152 -0
- iobrpy-0.1.1/src/iobrpy/workflow/deside_bootstrap.py +175 -0
- iobrpy-0.1.1/src/iobrpy/workflow/epic.py +349 -0
- iobrpy-0.1.1/src/iobrpy/workflow/estimate.py +117 -0
- iobrpy-0.1.1/src/iobrpy/workflow/mcpcounter.py +129 -0
- iobrpy-0.1.1/src/iobrpy/workflow/nmf.py +275 -0
- iobrpy-0.1.1/src/iobrpy/workflow/prepare_salmon.py +141 -0
- iobrpy-0.1.1/src/iobrpy/workflow/quantiseq.py +332 -0
- iobrpy-0.1.1/src/iobrpy/workflow/tme_cluster.py +171 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/PKG-INFO +365 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/SOURCES.txt +42 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/dependency_links.txt +1 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/entry_points.txt +2 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/requires.txt +9 -0
- iobrpy-0.1.1/src/iobrpy.egg-info/top_level.txt +1 -0
iobrpy-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Dongqiang Zeng
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
iobrpy-0.1.1/MANIFEST.in
ADDED
iobrpy-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iobrpy
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Immuno-Oncology Biological Research tools in Python
|
|
5
|
+
Author-email: Haonan Huang <2905611068@qq.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Dongqiang Zeng
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/IOBR/IOBRpy
|
|
29
|
+
Project-URL: Issues, https://github.com/IOBR/IOBRpy/issues
|
|
30
|
+
Keywords: bioinformatics,immuno-oncology,RNA-seq,deconvolution
|
|
31
|
+
Classifier: Intended Audience :: Science/Research
|
|
32
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Requires-Python: >=3.9
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
License-File: LICENSE
|
|
40
|
+
Requires-Dist: numpy>=1.22
|
|
41
|
+
Requires-Dist: pandas>=1.5
|
|
42
|
+
Requires-Dist: scipy>=1.9
|
|
43
|
+
Requires-Dist: scikit-learn>=1.2
|
|
44
|
+
Requires-Dist: statsmodels>=0.13
|
|
45
|
+
Requires-Dist: matplotlib>=3.7
|
|
46
|
+
Requires-Dist: tqdm>=4.66
|
|
47
|
+
Requires-Dist: gseapy>=1.0.6
|
|
48
|
+
Requires-Dist: joblib>=1.3
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
|
|
51
|
+
# iobrpy
|
|
52
|
+
|
|
53
|
+
A Python **command‑line toolkit** for bulk RNA‑seq analysis of the tumor microenvironment (TME): data prep → signature scoring → immune deconvolution → clustering → ligand–receptor scoring.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
**Data preparation**
|
|
60
|
+
- `prepare_salmon` — Clean up Salmon outputs into a TPM matrix; strip version suffixes; keep `symbol`/`ENSG`/`ENST` identifiers.
|
|
61
|
+
- `count2tpm` — Convert read counts to TPM (supports Ensembl/Entrez/Symbol/MGI; biomart/local annotation; effective length CSV).
|
|
62
|
+
- `anno_eset` — Harmonize/annotate an expression matrix (choose symbol/probe columns; deduplicate; aggregation method).
|
|
63
|
+
|
|
64
|
+
**Pathway / signature scoring**
|
|
65
|
+
- `calculate_sig_score` — Sample‑level signature scores via `pca`, `zscore`, `ssgsea`, or `integration`.
|
|
66
|
+
Supports the following signature **groups** (space‑ or comma‑separated), or `all` to merge them:
|
|
67
|
+
- `go_bp`, `go_cc`, `go_mf`
|
|
68
|
+
- `signature_collection`, `signature_tme`, `signature_sc`, `signature_tumor`, `signature_metabolism`
|
|
69
|
+
- `kegg`, `hallmark`, `reactome`
|
|
70
|
+
|
|
71
|
+
**Immune deconvolution and scoring**
|
|
72
|
+
- `cibersort` — CIBERSORT wrapper/implementation with permutations, quantile normalization, absolute mode.
|
|
73
|
+
- `quantiseq` — quanTIseq deconvolution with `lsei` or robust norms (`hampel`, `huber`, `bisquare`); tumor‑gene filtering; mRNA scaling.
|
|
74
|
+
- `epic` — EPIC cell fractions using `TRef`/`BRef` references.
|
|
75
|
+
- `estimate` — ESTIMATE immune/stromal/tumor purity scores.
|
|
76
|
+
- `mcpcounter` — MCPcounter infiltration scores.
|
|
77
|
+
- `IPS` — Immunophenoscore (AZ/SC/CP/EC + total).
|
|
78
|
+
- `deside` — Deep learning–based deconvolution (requires pre‑downloaded model; supports pathway‑masked mode via KEGG/Reactome GMTs).
|
|
79
|
+
|
|
80
|
+
**Clustering / decomposition**
|
|
81
|
+
- `tme_cluster` — k‑means with **automatic k** via KL index (Hartigan–Wong), feature selection and standardization.
|
|
82
|
+
- `nmf` — NMF‑based clustering (auto‑selects k; excludes k=2) with PCA plot and top features.
|
|
83
|
+
|
|
84
|
+
**Ligand–receptor**
|
|
85
|
+
- `LR_cal` — Ligand–receptor interaction scoring using cancer‑type specific networks.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Installation
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# creating a virtual environment is recommended
|
|
93
|
+
conda create -n iobrpy python=3.9
|
|
94
|
+
conda activate iobrpy
|
|
95
|
+
# update pip
|
|
96
|
+
python3 -m pip install --upgrade pip
|
|
97
|
+
# install deside
|
|
98
|
+
pip install iobrpy
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Command‑line usage
|
|
104
|
+
|
|
105
|
+
### Global
|
|
106
|
+
```bash
|
|
107
|
+
iobrpy -h
|
|
108
|
+
iobrpy <command> --help
|
|
109
|
+
# Example: show help for count2tpm
|
|
110
|
+
iobrpy count2tpm --help
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### General I/O conventions
|
|
114
|
+
- **Input orientation**: genes × samples by default.
|
|
115
|
+
- **Separators**: auto‑detected from file extension (`.csv` vs `.tsv`/`.txt`); you can override via command options where available.
|
|
116
|
+
- **Outputs**: CSV/TSV/TXT
|
|
117
|
+
|
|
118
|
+
### Typical end‑to‑end workflow
|
|
119
|
+
|
|
120
|
+
1) **Prepare an expression matrix**
|
|
121
|
+
```bash
|
|
122
|
+
# a) From Salmon outputs → TPM
|
|
123
|
+
iobrpy prepare_salmon -i salmon_tpm.tsv.gz -o TPM_matrix.csv --return_feature symbol --remove_version
|
|
124
|
+
|
|
125
|
+
# b) From raw gene counts → TPM
|
|
126
|
+
iobrpy count2tpm -i counts.tsv.gz -o TPM_matrix.csv --idType Ensembl --org hsa --source local
|
|
127
|
+
# (Optionally provide transcript effective lengths)
|
|
128
|
+
# --effLength_csv efflen.csv --id id --length eff_length --gene_symbol symbol
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
2) **(Optional) Annotate / de‑duplicate**
|
|
132
|
+
```bash
|
|
133
|
+
iobrpy anno_eset -i TPM_matrix.csv -o TPM_anno.csv --annotation anno_hug133plus2 --symbol symbol --probe id --method mean
|
|
134
|
+
# You can also use: --annotation-file my_anno.csv --annotation-key gene_id
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
3) **Signature scoring**
|
|
138
|
+
```bash
|
|
139
|
+
iobrpy calculate_sig_score -i TPM_anno.csv -o sig_scores.csv --signature signature_collection --method pca --mini_gene_count 2 --parallel_size 1
|
|
140
|
+
# Accepts space‑separated or comma‑separated groups; use "all" for a full merge.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
4) **Immune deconvolution (choose one or many)**
|
|
144
|
+
```bash
|
|
145
|
+
# CIBERSORT
|
|
146
|
+
iobrpy cibersort -i TPM_anno.csv -o cibersort.csv --perm 100 --QN True --absolute Flase --abs_method sig.score --threads 1
|
|
147
|
+
|
|
148
|
+
# quanTIseq (method: lsei / robust norms)
|
|
149
|
+
iobrpy quantiseq -i TPM_anno.csv -o quantiseq.csv --signame TIL10 --method lsei --tumor --arrays --scale_mrna
|
|
150
|
+
|
|
151
|
+
# EPIC
|
|
152
|
+
iobrpy epic -i TPM_anno.csv -o epic.csv --reference TRef
|
|
153
|
+
|
|
154
|
+
# ESTIMATE
|
|
155
|
+
iobrpy estimate -i TPM_anno.csv -o estimate.csv --platform affymetrix
|
|
156
|
+
|
|
157
|
+
# MCPcounter
|
|
158
|
+
iobrpy mcpcounter -i TPM_anno.csv -o mcpcounter.csv --features HUGO_symbols
|
|
159
|
+
|
|
160
|
+
# IPS
|
|
161
|
+
iobrpy IPS -i TPM_anno.csv -o IPS.csv
|
|
162
|
+
|
|
163
|
+
# DeSide
|
|
164
|
+
iobrpy deside --model_dir path/to/your/DeSide_model -i TPM_anno.csv -o deside.csv --result_dir path/to/your/plot/folder --exp_type TPM --method_adding_pathway add_to_end --scaling_by_constant --transpose --print_info
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
5) **TME clustering / NMF clustering**
|
|
168
|
+
```bash
|
|
169
|
+
# KL index auto‑select k (k‑means)
|
|
170
|
+
iobrpy tme_cluster -i cibersort.csv -o tme_cluster.csv --features 1:22 --id "ID" --min_nc 2 --max_nc 5 --print_result --scale
|
|
171
|
+
|
|
172
|
+
# NMF clustering (auto k, excludes k=2)
|
|
173
|
+
iobrpy nmf -i cibersort.csv -o path/to/your/result/folder --kmin 2 --kmax 10 --features 1:22 --max-iter 10000 --skip_k_2
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
6) **Ligand–receptor scoring (optional)**
|
|
177
|
+
```bash
|
|
178
|
+
iobrpy LR_cal -i TPM_anno.csv -o LR_score.csv --data_type tpm --id_type "symbol" --cancer_type pancan --verbose
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Commands & common options
|
|
184
|
+
|
|
185
|
+
### Data preparation
|
|
186
|
+
- **prepare_salmon**
|
|
187
|
+
- `-i/--input <TSV|TSV.GZ>` (required): Salmon-combined gene TPM table
|
|
188
|
+
- `-o/--output <CSV/TSV>` (required): cleaned TPM matrix (genes × samples)
|
|
189
|
+
- `-r/--return_feature {ENST|ENSG|symbol}` (default: `symbol`): which identifier to keep
|
|
190
|
+
- `--remove_version`: strip version suffix from gene IDs (e.g., `ENSG000001.12 → ENSG000001`)
|
|
191
|
+
|
|
192
|
+
- **count2tpm**
|
|
193
|
+
- `-i/--input <CSV/TSV[.gz]>` (required): raw count matrix (genes × samples)
|
|
194
|
+
- `-o/--output <CSV/TSV>` (required): output TPM matrix
|
|
195
|
+
- `--effLength_csv <CSV>`: optional effective-length file with columns `id`, `eff_length`, `symbol`
|
|
196
|
+
- `--idType {Ensembl|entrez|symbol|mgi}` (default: `Ensembl`)
|
|
197
|
+
- `--org {hsa|mmus}` (default: `hsa`)
|
|
198
|
+
- `--source {local|biomart}` (default: `local`)
|
|
199
|
+
- `--id <str>` (default: `id`): ID column name in `--effLength_csv`
|
|
200
|
+
- `--length <str>` (default: `eff_length`): length column
|
|
201
|
+
- `--gene_symbol <str>` (default: `symbol`): gene symbol column
|
|
202
|
+
- `--check_data`: check & drop missing/invalid entries before conversion
|
|
203
|
+
|
|
204
|
+
- **anno_eset**
|
|
205
|
+
- `-i/--input <CSV/TSV/TXT>` (required)
|
|
206
|
+
- `-o/--output <CSV/TSV/TXT>` (required)
|
|
207
|
+
- `--annotation {anno_hug133plus2|anno_rnaseq|anno_illumina|anno_grch38}` (required unless using external file)
|
|
208
|
+
- `--annotation-file <pkl/csv/tsv/xlsx>`: external annotation (overrides built-in)
|
|
209
|
+
- `--annotation-key <str>`: key to pick a table if external `.pkl` stores a dict of DataFrames
|
|
210
|
+
- `--symbol <str>` (default: `symbol`): column used as gene symbol
|
|
211
|
+
- `--probe <str>` (default: `id`): column used as probe/feature ID
|
|
212
|
+
- `--method {mean|sd|sum}` (default: `mean`): duplicate-ID aggregation
|
|
213
|
+
|
|
214
|
+
### Signature scoring
|
|
215
|
+
- **calculate_sig_score**
|
|
216
|
+
- `-i/--input <CSV/TSV/TXT>` (required), `-o/--output <CSV/TSV/TXT>` (required)
|
|
217
|
+
- `--signature <one or more groups>` (required; space- or comma-separated; `all` uses every group)
|
|
218
|
+
Groups: `go_bp`, `go_cc`, `go_mf`, `signature_collection`, `signature_tme`, `signature_sc`, `signature_tumor`, `signature_metabolism`, `kegg`, `hallmark`, `reactome`
|
|
219
|
+
- `--method {pca|zscore|ssgsea|integration}` (default: `pca`)
|
|
220
|
+
- `--mini_gene_count <int>` (default: `3`)
|
|
221
|
+
- `--adjust_eset`: apply extra filtering after log2 transform
|
|
222
|
+
- `--parallel_size <int>` (default: `1`; threads for `ssgsea`)
|
|
223
|
+
|
|
224
|
+
### Deconvolution / scoring
|
|
225
|
+
- **cibersort**
|
|
226
|
+
- `-i/--input <CSV/TSV>` (required), `-o/--output <CSV/TSV>` (required)
|
|
227
|
+
- `--perm <int>` (default: `100`)
|
|
228
|
+
- `--QN <True|False>` (default: `True`): quantile normalization
|
|
229
|
+
- `--absolute <True|False>` (default: `False`): absolute mode
|
|
230
|
+
- `--abs_method {sig.score|no.sumto1}` (default: `sig.score`)
|
|
231
|
+
- `--threads <int>` (default: `1`)
|
|
232
|
+
*Output: columns are suffixed with `_CIBERSORT`, index name is `ID`, separator inferred from output extension.*
|
|
233
|
+
|
|
234
|
+
- **quantiseq**
|
|
235
|
+
- `-i/--input <CSV/TSV>` (required; genes × samples), `-o/--output <TSV>` (required)
|
|
236
|
+
- `--arrays`: perform quantile normalization for arrays
|
|
237
|
+
- `--signame <str>` (default: `TIL10`)
|
|
238
|
+
- `--tumor`: remove genes highly expressed in tumors
|
|
239
|
+
- `--scale_mrna`: enable mRNA scaling (otherwise raw signature proportions)
|
|
240
|
+
- `--method {lsei|hampel|huber|bisquare}` (default: `lsei`)
|
|
241
|
+
- `--rmgenes <str>` (default: `unassigned`; allowed: `default`, `none`, or comma-separated list)
|
|
242
|
+
|
|
243
|
+
- **epic**
|
|
244
|
+
- `-i/--input <CSV/TSV>` (required; genes × samples)
|
|
245
|
+
- `-o/--output <CSV/TSV>` (required)
|
|
246
|
+
- `--reference {TRef|BRef|both}` (default: `TRef`)
|
|
247
|
+
|
|
248
|
+
- **estimate**
|
|
249
|
+
- `-i/--input <CSV/TSV/TXT>` (required; genes × samples)
|
|
250
|
+
- `-p/--platform {affymetrix|agilent|illumina}` (default: `affymetrix`)
|
|
251
|
+
- `-o/--output <CSV/TSV/TXT>` (required)
|
|
252
|
+
*Output is transposed; columns are suffixed with `_estimate`; index label is `ID`; separator inferred from extension.*
|
|
253
|
+
|
|
254
|
+
- **mcpcounter**
|
|
255
|
+
- `-i/--input <TSV>` (required; genes × samples)
|
|
256
|
+
- `-f/--features {affy133P2_probesets|HUGO_symbols|ENTREZ_ID|ENSEMBL_ID}` (required)
|
|
257
|
+
- `-o/--output <CSV/TSV>` (required)
|
|
258
|
+
*Output: columns normalized (spaces → `_`) and suffixed with `_MCPcounter`; index label `ID`; separator inferred from extension.*
|
|
259
|
+
|
|
260
|
+
- **IPS**
|
|
261
|
+
- `-i/--input <matrix>` (required), `-o/--output <file>` (required)
|
|
262
|
+
*No extra flags (expression matrix → IPS sub-scores + total).*
|
|
263
|
+
|
|
264
|
+
- **deside** (deep learning–based deconvolution)
|
|
265
|
+
- `-m/--model_dir <dir>` (required): path to the pre-downloaded DeSide model directory
|
|
266
|
+
- `-i/--input <CSV/TSV>` (required): rows = genes, columns = samples
|
|
267
|
+
- `-o/--output <CSV>` (required)
|
|
268
|
+
- `--exp_type {TPM|log_space|linear}` (default: `TPM`)
|
|
269
|
+
- `TPM`: already log2 processed
|
|
270
|
+
- `log_space`: `log2(TPM+1)`
|
|
271
|
+
- `linear`: linear space (TPM/counts)
|
|
272
|
+
- `--gmt <file1.gmt file2.gmt ...>`: optional one or more GMT files for pathway masking
|
|
273
|
+
- `--method_adding_pathway {add_to_end|convert}` (default: `add_to_end`)
|
|
274
|
+
- `--scaling_by_constant`, `--scaling_by_sample`, `--one_minus_alpha`: optional scaling/transforms
|
|
275
|
+
- `--print_info`: verbose logs
|
|
276
|
+
- `--add_cell_type`: append predicted cell-type labels
|
|
277
|
+
- `--transpose`: use if your file is *samples × genes*
|
|
278
|
+
- `-r/--result_dir <dir>`: optional directory to save result plots/logs
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
### Clustering / decomposition
|
|
282
|
+
- **tme_cluster**
|
|
283
|
+
- `-i/--input <CSV/TSV/TXT>` (required): input table for clustering.
|
|
284
|
+
- Expected shape: first column = sample ID (use `--id` if not first), remaining columns = features.
|
|
285
|
+
- `-o/--output <CSV/TSV/TXT>` (required): output file for clustering results.
|
|
286
|
+
- `--features <spec>`: select feature columns by 1-based inclusive range, e.g. `1:22` (intended for CIBERSORT outputs; **exclude** the sample ID column when counting).
|
|
287
|
+
- `--pattern <regex>`: alternatively select features by a regex on column names (e.g. `^CD8|^NK`).
|
|
288
|
+
*Tip: use one of `--features` or `--pattern`.*
|
|
289
|
+
- `--id <str>` (default: first column): column name containing sample IDs.
|
|
290
|
+
- `--scale` / `--no-scale`: toggle z-score scaling of features (help text: default = **True**).
|
|
291
|
+
- `--min_nc <int>` (default: `2`): minimum number of clusters to try.
|
|
292
|
+
- `--max_nc <int>` (default: `6`): maximum number of clusters to try.
|
|
293
|
+
- `--max_iter <int>` (default: `10`): maximum iterations for k-means.
|
|
294
|
+
- `--tol <float>` (default: `1e-4`): convergence tolerance for centroid updates.
|
|
295
|
+
- `--print_result`: print intermediate KL scores and cluster counts.
|
|
296
|
+
- `--input_sep <str>` (default: auto): input delimiter (e.g. `,` or `\t`); auto-detected if unset.
|
|
297
|
+
- `--output_sep <str>` (default: auto): output delimiter; inferred from filename if unset.
|
|
298
|
+
|
|
299
|
+
- **nmf**
|
|
300
|
+
- `-i/--input <CSV/TSV>` (required): matrix to factorize; first column should be sample names (index).
|
|
301
|
+
- `-o/--output <DIR>` (required): directory to save results.
|
|
302
|
+
- `--kmin <int>` (default: `2`): minimum `k` (inclusive).
|
|
303
|
+
- `--kmax <int>` (default: `8`): maximum `k` (inclusive).
|
|
304
|
+
- `--features <spec>`: 1-based inclusive selection of feature columns (e.g. `2-10` or `1:5`), typically cell-type columns.
|
|
305
|
+
- `--log1p`: apply `log1p` to the input (useful for counts).
|
|
306
|
+
- `--normalize`: L1 row normalization (each sample sums to 1).
|
|
307
|
+
- `--shift <float>` (default: `None`): if data contain negatives, add a constant to make all values non-negative.
|
|
308
|
+
- `--random-state <int>` (default: `42`): random seed for NMF.
|
|
309
|
+
- `--max-iter <int>` (default: `1000`): NMF max iterations.
|
|
310
|
+
- `--skip_k_2`: skip evaluating `k = 2` when searching for the best `k`.
|
|
311
|
+
|
|
312
|
+
### Ligand–receptor
|
|
313
|
+
- **LR_cal**
|
|
314
|
+
- `-i/--input <CSV/TSV>` (required): expression matrix (genes × samples).
|
|
315
|
+
- `-o/--output <CSV/TSV>` (required): file to save LR scores.
|
|
316
|
+
- `--data_type {count|tpm}` (default: `tpm`): type of the input matrix.
|
|
317
|
+
- `--id_type <str>` (default: `ensembl`): gene ID type expected by the LR backend.
|
|
318
|
+
- `--cancer_type <str>` (default: `pancan`): cancer-type network to use.
|
|
319
|
+
- `--verbose`: verbose logging.
|
|
320
|
+
|
|
321
|
+
---
|
|
322
|
+
|
|
323
|
+
## Troubleshooting
|
|
324
|
+
|
|
325
|
+
- **Wrong input orientation**
|
|
326
|
+
Deconvolution commands expect **genes × samples**. For `deside`, `--transpose` can be helpful depending on your file.
|
|
327
|
+
|
|
328
|
+
- **Mixed separators / encoding**
|
|
329
|
+
Prefer `.csv` , `.txt` or `.tsv` consistently. Auto‑detection works in most subcommands but you can override with explicit flags where provided.
|
|
330
|
+
|
|
331
|
+
- **DeSide model missing**
|
|
332
|
+
The `deside` subcommand requires pretrained model files. If you get errors like `FileNotFoundError: DeSide_model not found` , download the official model archive from:
|
|
333
|
+
https://figshare.com/articles/dataset/DeSide_model/25117862/1?file=44330255
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## Citation & acknowledgments
|
|
338
|
+
|
|
339
|
+
This toolkit implements or wraps well‑known methods (CIBERSORT, quanTIseq, EPIC, ESTIMATE, MCPcounter, DeSide, etc.). For academic use, please cite the corresponding original papers in addition to this package.
|
|
340
|
+
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
## License
|
|
344
|
+
|
|
345
|
+
MIT License
|
|
346
|
+
|
|
347
|
+
Copyright (c) 2024 Dongqiang Zeng
|
|
348
|
+
|
|
349
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
350
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
351
|
+
in the Software without restriction, including without limitation the rights
|
|
352
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
353
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
354
|
+
furnished to do so, subject to the following conditions:
|
|
355
|
+
|
|
356
|
+
The above copyright notice and this permission notice shall be included in all
|
|
357
|
+
copies or substantial portions of the Software.
|
|
358
|
+
|
|
359
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
360
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
361
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
362
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
363
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
364
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
365
|
+
SOFTWARE.
|