iobrpy 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. iobrpy-0.1.1/LICENSE +21 -0
  2. iobrpy-0.1.1/MANIFEST.in +6 -0
  3. iobrpy-0.1.1/PKG-INFO +365 -0
  4. iobrpy-0.1.1/README.md +315 -0
  5. iobrpy-0.1.1/pyproject.toml +54 -0
  6. iobrpy-0.1.1/setup.cfg +4 -0
  7. iobrpy-0.1.1/src/iobrpy/__init__.py +3 -0
  8. iobrpy-0.1.1/src/iobrpy/main.py +482 -0
  9. iobrpy-0.1.1/src/iobrpy/resources/IPS_genes.txt +163 -0
  10. iobrpy-0.1.1/src/iobrpy/resources/__init__.py +0 -0
  11. iobrpy-0.1.1/src/iobrpy/resources/anno_eset.pkl +0 -0
  12. iobrpy-0.1.1/src/iobrpy/resources/c2.cp.kegg.v2023.1.Hs.symbols.gmt +186 -0
  13. iobrpy-0.1.1/src/iobrpy/resources/c2.cp.reactome.v2023.1.Hs.symbols.gmt +1654 -0
  14. iobrpy-0.1.1/src/iobrpy/resources/calculate_data.pkl +0 -0
  15. iobrpy-0.1.1/src/iobrpy/resources/common_genes.txt +10413 -0
  16. iobrpy-0.1.1/src/iobrpy/resources/count2tpm_data.pkl +0 -0
  17. iobrpy-0.1.1/src/iobrpy/resources/epic_TRef_BRef.pkl +0 -0
  18. iobrpy-0.1.1/src/iobrpy/resources/estimate_data.pkl +0 -0
  19. iobrpy-0.1.1/src/iobrpy/resources/lm22.txt +548 -0
  20. iobrpy-0.1.1/src/iobrpy/resources/lr_data.pkl +0 -0
  21. iobrpy-0.1.1/src/iobrpy/resources/mcp_data.pkl +0 -0
  22. iobrpy-0.1.1/src/iobrpy/resources/quantiseq_data.pkl +0 -0
  23. iobrpy-0.1.1/src/iobrpy/workflow/IPS.py +109 -0
  24. iobrpy-0.1.1/src/iobrpy/workflow/LR_cal.py +179 -0
  25. iobrpy-0.1.1/src/iobrpy/workflow/__init__.py +2 -0
  26. iobrpy-0.1.1/src/iobrpy/workflow/anno_eset.py +253 -0
  27. iobrpy-0.1.1/src/iobrpy/workflow/calculate_sig_score.py +332 -0
  28. iobrpy-0.1.1/src/iobrpy/workflow/cibersort.py +170 -0
  29. iobrpy-0.1.1/src/iobrpy/workflow/count2tpm.py +299 -0
  30. iobrpy-0.1.1/src/iobrpy/workflow/deside.py +152 -0
  31. iobrpy-0.1.1/src/iobrpy/workflow/deside_bootstrap.py +175 -0
  32. iobrpy-0.1.1/src/iobrpy/workflow/epic.py +349 -0
  33. iobrpy-0.1.1/src/iobrpy/workflow/estimate.py +117 -0
  34. iobrpy-0.1.1/src/iobrpy/workflow/mcpcounter.py +129 -0
  35. iobrpy-0.1.1/src/iobrpy/workflow/nmf.py +275 -0
  36. iobrpy-0.1.1/src/iobrpy/workflow/prepare_salmon.py +141 -0
  37. iobrpy-0.1.1/src/iobrpy/workflow/quantiseq.py +332 -0
  38. iobrpy-0.1.1/src/iobrpy/workflow/tme_cluster.py +171 -0
  39. iobrpy-0.1.1/src/iobrpy.egg-info/PKG-INFO +365 -0
  40. iobrpy-0.1.1/src/iobrpy.egg-info/SOURCES.txt +42 -0
  41. iobrpy-0.1.1/src/iobrpy.egg-info/dependency_links.txt +1 -0
  42. iobrpy-0.1.1/src/iobrpy.egg-info/entry_points.txt +2 -0
  43. iobrpy-0.1.1/src/iobrpy.egg-info/requires.txt +9 -0
  44. iobrpy-0.1.1/src/iobrpy.egg-info/top_level.txt +1 -0
iobrpy-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Dongqiang Zeng
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,6 @@
1
+ include README.md
2
+ include LICENSE
3
+
4
+ recursive-include src/iobrpy/resources *.pkl *.gmt *.txt
5
+
6
+ global-exclude *.py[cod] __pycache__ .DS_Store
iobrpy-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,365 @@
1
+ Metadata-Version: 2.4
2
+ Name: iobrpy
3
+ Version: 0.1.1
4
+ Summary: Immuno-Oncology Biological Research tools in Python
5
+ Author-email: Haonan Huang <2905611068@qq.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2024 Dongqiang Zeng
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/IOBR/IOBRpy
29
+ Project-URL: Issues, https://github.com/IOBR/IOBRpy/issues
30
+ Keywords: bioinformatics,immuno-oncology,RNA-seq,deconvolution
31
+ Classifier: Intended Audience :: Science/Research
32
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3 :: Only
36
+ Classifier: Operating System :: OS Independent
37
+ Requires-Python: >=3.9
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: numpy>=1.22
41
+ Requires-Dist: pandas>=1.5
42
+ Requires-Dist: scipy>=1.9
43
+ Requires-Dist: scikit-learn>=1.2
44
+ Requires-Dist: statsmodels>=0.13
45
+ Requires-Dist: matplotlib>=3.7
46
+ Requires-Dist: tqdm>=4.66
47
+ Requires-Dist: gseapy>=1.0.6
48
+ Requires-Dist: joblib>=1.3
49
+ Dynamic: license-file
50
+
51
+ # iobrpy
52
+
53
+ A Python **command‑line toolkit** for bulk RNA‑seq analysis of the tumor microenvironment (TME): data prep → signature scoring → immune deconvolution → clustering → ligand–receptor scoring.
54
+
55
+ ---
56
+
57
+ ## Features
58
+
59
+ **Data preparation**
60
+ - `prepare_salmon` — Clean up Salmon outputs into a TPM matrix; strip version suffixes; keep `symbol`/`ENSG`/`ENST` identifiers.
61
+ - `count2tpm` — Convert read counts to TPM (supports Ensembl/Entrez/Symbol/MGI; biomart/local annotation; effective length CSV).
62
+ - `anno_eset` — Harmonize/annotate an expression matrix (choose symbol/probe columns; deduplicate; aggregation method).
63
+
64
+ **Pathway / signature scoring**
65
+ - `calculate_sig_score` — Sample‑level signature scores via `pca`, `zscore`, `ssgsea`, or `integration`.
66
+ Supports the following signature **groups** (space‑ or comma‑separated), or `all` to merge them:
67
+ - `go_bp`, `go_cc`, `go_mf`
68
+ - `signature_collection`, `signature_tme`, `signature_sc`, `signature_tumor`, `signature_metabolism`
69
+ - `kegg`, `hallmark`, `reactome`
70
+
71
+ **Immune deconvolution and scoring**
72
+ - `cibersort` — CIBERSORT wrapper/implementation with permutations, quantile normalization, absolute mode.
73
+ - `quantiseq` — quanTIseq deconvolution with `lsei` or robust norms (`hampel`, `huber`, `bisquare`); tumor‑gene filtering; mRNA scaling.
74
+ - `epic` — EPIC cell fractions using `TRef`/`BRef` references.
75
+ - `estimate` — ESTIMATE immune/stromal/tumor purity scores.
76
+ - `mcpcounter` — MCPcounter infiltration scores.
77
+ - `IPS` — Immunophenoscore (AZ/SC/CP/EC + total).
78
+ - `deside` — Deep learning–based deconvolution (requires pre‑downloaded model; supports pathway‑masked mode via KEGG/Reactome GMTs).
79
+
80
+ **Clustering / decomposition**
81
+ - `tme_cluster` — k‑means with **automatic k** via KL index (Hartigan–Wong), feature selection and standardization.
82
+ - `nmf` — NMF‑based clustering (auto‑selects k; excludes k=2) with PCA plot and top features.
83
+
84
+ **Ligand–receptor**
85
+ - `LR_cal` — Ligand–receptor interaction scoring using cancer‑type specific networks.
86
+
87
+ ---
88
+
89
+ ## Installation
90
+
91
+ ```bash
92
+ # creating a virtual environment is recommended
93
+ conda create -n iobrpy python=3.9
94
+ conda activate iobrpy
95
+ # update pip
96
+ python3 -m pip install --upgrade pip
97
+ # install deside
98
+ pip install iobrpy
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Command‑line usage
104
+
105
+ ### Global
106
+ ```bash
107
+ iobrpy -h
108
+ iobrpy <command> --help
109
+ # Example: show help for count2tpm
110
+ iobrpy count2tpm --help
111
+ ```
112
+
113
+ ### General I/O conventions
114
+ - **Input orientation**: genes × samples by default.
115
+ - **Separators**: auto‑detected from file extension (`.csv` vs `.tsv`/`.txt`); you can override via command options where available.
116
+ - **Outputs**: CSV/TSV/TXT
117
+
118
+ ### Typical end‑to‑end workflow
119
+
120
+ 1) **Prepare an expression matrix**
121
+ ```bash
122
+ # a) From Salmon outputs → TPM
123
+ iobrpy prepare_salmon -i salmon_tpm.tsv.gz -o TPM_matrix.csv --return_feature symbol --remove_version
124
+
125
+ # b) From raw gene counts → TPM
126
+ iobrpy count2tpm -i counts.tsv.gz -o TPM_matrix.csv --idType Ensembl --org hsa --source local
127
+ # (Optionally provide transcript effective lengths)
128
+ # --effLength_csv efflen.csv --id id --length eff_length --gene_symbol symbol
129
+ ```
130
+
131
+ 2) **(Optional) Annotate / de‑duplicate**
132
+ ```bash
133
+ iobrpy anno_eset -i TPM_matrix.csv -o TPM_anno.csv --annotation anno_hug133plus2 --symbol symbol --probe id --method mean
134
+ # You can also use: --annotation-file my_anno.csv --annotation-key gene_id
135
+ ```
136
+
137
+ 3) **Signature scoring**
138
+ ```bash
139
+ iobrpy calculate_sig_score -i TPM_anno.csv -o sig_scores.csv --signature signature_collection --method pca --mini_gene_count 2 --parallel_size 1
140
+ # Accepts space‑separated or comma‑separated groups; use "all" for a full merge.
141
+ ```
142
+
143
+ 4) **Immune deconvolution (choose one or many)**
144
+ ```bash
145
+ # CIBERSORT
146
+ iobrpy cibersort -i TPM_anno.csv -o cibersort.csv --perm 100 --QN True --absolute Flase --abs_method sig.score --threads 1
147
+
148
+ # quanTIseq (method: lsei / robust norms)
149
+ iobrpy quantiseq -i TPM_anno.csv -o quantiseq.csv --signame TIL10 --method lsei --tumor --arrays --scale_mrna
150
+
151
+ # EPIC
152
+ iobrpy epic -i TPM_anno.csv -o epic.csv --reference TRef
153
+
154
+ # ESTIMATE
155
+ iobrpy estimate -i TPM_anno.csv -o estimate.csv --platform affymetrix
156
+
157
+ # MCPcounter
158
+ iobrpy mcpcounter -i TPM_anno.csv -o mcpcounter.csv --features HUGO_symbols
159
+
160
+ # IPS
161
+ iobrpy IPS -i TPM_anno.csv -o IPS.csv
162
+
163
+ # DeSide
164
+ iobrpy deside --model_dir path/to/your/DeSide_model -i TPM_anno.csv -o deside.csv --result_dir path/to/your/plot/folder --exp_type TPM --method_adding_pathway add_to_end --scaling_by_constant --transpose --print_info
165
+ ```
166
+
167
+ 5) **TME clustering / NMF clustering**
168
+ ```bash
169
+ # KL index auto‑select k (k‑means)
170
+ iobrpy tme_cluster -i cibersort.csv -o tme_cluster.csv --features 1:22 --id "ID" --min_nc 2 --max_nc 5 --print_result --scale
171
+
172
+ # NMF clustering (auto k, excludes k=2)
173
+ iobrpy nmf -i cibersort.csv -o path/to/your/result/folder --kmin 2 --kmax 10 --features 1:22 --max-iter 10000 --skip_k_2
174
+ ```
175
+
176
+ 6) **Ligand–receptor scoring (optional)**
177
+ ```bash
178
+ iobrpy LR_cal -i TPM_anno.csv -o LR_score.csv --data_type tpm --id_type "symbol" --cancer_type pancan --verbose
179
+ ```
180
+
181
+ ---
182
+
183
+ ## Commands & common options
184
+
185
+ ### Data preparation
186
+ - **prepare_salmon**
187
+ - `-i/--input <TSV|TSV.GZ>` (required): Salmon-combined gene TPM table
188
+ - `-o/--output <CSV/TSV>` (required): cleaned TPM matrix (genes × samples)
189
+ - `-r/--return_feature {ENST|ENSG|symbol}` (default: `symbol`): which identifier to keep
190
+ - `--remove_version`: strip version suffix from gene IDs (e.g., `ENSG000001.12 → ENSG000001`)
191
+
192
+ - **count2tpm**
193
+ - `-i/--input <CSV/TSV[.gz]>` (required): raw count matrix (genes × samples)
194
+ - `-o/--output <CSV/TSV>` (required): output TPM matrix
195
+ - `--effLength_csv <CSV>`: optional effective-length file with columns `id`, `eff_length`, `symbol`
196
+ - `--idType {Ensembl|entrez|symbol|mgi}` (default: `Ensembl`)
197
+ - `--org {hsa|mmus}` (default: `hsa`)
198
+ - `--source {local|biomart}` (default: `local`)
199
+ - `--id <str>` (default: `id`): ID column name in `--effLength_csv`
200
+ - `--length <str>` (default: `eff_length`): length column
201
+ - `--gene_symbol <str>` (default: `symbol`): gene symbol column
202
+ - `--check_data`: check & drop missing/invalid entries before conversion
203
+
204
+ - **anno_eset**
205
+ - `-i/--input <CSV/TSV/TXT>` (required)
206
+ - `-o/--output <CSV/TSV/TXT>` (required)
207
+ - `--annotation {anno_hug133plus2|anno_rnaseq|anno_illumina|anno_grch38}` (required unless using external file)
208
+ - `--annotation-file <pkl/csv/tsv/xlsx>`: external annotation (overrides built-in)
209
+ - `--annotation-key <str>`: key to pick a table if external `.pkl` stores a dict of DataFrames
210
+ - `--symbol <str>` (default: `symbol`): column used as gene symbol
211
+ - `--probe <str>` (default: `id`): column used as probe/feature ID
212
+ - `--method {mean|sd|sum}` (default: `mean`): duplicate-ID aggregation
213
+
214
+ ### Signature scoring
215
+ - **calculate_sig_score**
216
+ - `-i/--input <CSV/TSV/TXT>` (required), `-o/--output <CSV/TSV/TXT>` (required)
217
+ - `--signature <one or more groups>` (required; space- or comma-separated; `all` uses every group)
218
+ Groups: `go_bp`, `go_cc`, `go_mf`, `signature_collection`, `signature_tme`, `signature_sc`, `signature_tumor`, `signature_metabolism`, `kegg`, `hallmark`, `reactome`
219
+ - `--method {pca|zscore|ssgsea|integration}` (default: `pca`)
220
+ - `--mini_gene_count <int>` (default: `3`)
221
+ - `--adjust_eset`: apply extra filtering after log2 transform
222
+ - `--parallel_size <int>` (default: `1`; threads for `ssgsea`)
223
+
224
+ ### Deconvolution / scoring
225
+ - **cibersort**
226
+ - `-i/--input <CSV/TSV>` (required), `-o/--output <CSV/TSV>` (required)
227
+ - `--perm <int>` (default: `100`)
228
+ - `--QN <True|False>` (default: `True`): quantile normalization
229
+ - `--absolute <True|False>` (default: `False`): absolute mode
230
+ - `--abs_method {sig.score|no.sumto1}` (default: `sig.score`)
231
+ - `--threads <int>` (default: `1`)
232
+ *Output: columns are suffixed with `_CIBERSORT`, index name is `ID`, separator inferred from output extension.*
233
+
234
+ - **quantiseq**
235
+ - `-i/--input <CSV/TSV>` (required; genes × samples), `-o/--output <TSV>` (required)
236
+ - `--arrays`: perform quantile normalization for arrays
237
+ - `--signame <str>` (default: `TIL10`)
238
+ - `--tumor`: remove genes highly expressed in tumors
239
+ - `--scale_mrna`: enable mRNA scaling (otherwise raw signature proportions)
240
+ - `--method {lsei|hampel|huber|bisquare}` (default: `lsei`)
241
+ - `--rmgenes <str>` (default: `unassigned`; allowed: `default`, `none`, or comma-separated list)
242
+
243
+ - **epic**
244
+ - `-i/--input <CSV/TSV>` (required; genes × samples)
245
+ - `-o/--output <CSV/TSV>` (required)
246
+ - `--reference {TRef|BRef|both}` (default: `TRef`)
247
+
248
+ - **estimate**
249
+ - `-i/--input <CSV/TSV/TXT>` (required; genes × samples)
250
+ - `-p/--platform {affymetrix|agilent|illumina}` (default: `affymetrix`)
251
+ - `-o/--output <CSV/TSV/TXT>` (required)
252
+ *Output is transposed; columns are suffixed with `_estimate`; index label is `ID`; separator inferred from extension.*
253
+
254
+ - **mcpcounter**
255
+ - `-i/--input <TSV>` (required; genes × samples)
256
+ - `-f/--features {affy133P2_probesets|HUGO_symbols|ENTREZ_ID|ENSEMBL_ID}` (required)
257
+ - `-o/--output <CSV/TSV>` (required)
258
+ *Output: columns normalized (spaces → `_`) and suffixed with `_MCPcounter`; index label `ID`; separator inferred from extension.*
259
+
260
+ - **IPS**
261
+ - `-i/--input <matrix>` (required), `-o/--output <file>` (required)
262
+ *No extra flags (expression matrix → IPS sub-scores + total).*
263
+
264
+ - **deside** (deep learning–based deconvolution)
265
+ - `-m/--model_dir <dir>` (required): path to the pre-downloaded DeSide model directory
266
+ - `-i/--input <CSV/TSV>` (required): rows = genes, columns = samples
267
+ - `-o/--output <CSV>` (required)
268
+ - `--exp_type {TPM|log_space|linear}` (default: `TPM`)
269
+ - `TPM`: already log2 processed
270
+ - `log_space`: `log2(TPM+1)`
271
+ - `linear`: linear space (TPM/counts)
272
+ - `--gmt <file1.gmt file2.gmt ...>`: optional one or more GMT files for pathway masking
273
+ - `--method_adding_pathway {add_to_end|convert}` (default: `add_to_end`)
274
+ - `--scaling_by_constant`, `--scaling_by_sample`, `--one_minus_alpha`: optional scaling/transforms
275
+ - `--print_info`: verbose logs
276
+ - `--add_cell_type`: append predicted cell-type labels
277
+ - `--transpose`: use if your file is *samples × genes*
278
+ - `-r/--result_dir <dir>`: optional directory to save result plots/logs
279
+
280
+
281
+ ### Clustering / decomposition
282
+ - **tme_cluster**
283
+ - `-i/--input <CSV/TSV/TXT>` (required): input table for clustering.
284
+ - Expected shape: first column = sample ID (use `--id` if not first), remaining columns = features.
285
+ - `-o/--output <CSV/TSV/TXT>` (required): output file for clustering results.
286
+ - `--features <spec>`: select feature columns by 1-based inclusive range, e.g. `1:22` (intended for CIBERSORT outputs; **exclude** the sample ID column when counting).
287
+ - `--pattern <regex>`: alternatively select features by a regex on column names (e.g. `^CD8|^NK`).
288
+ *Tip: use one of `--features` or `--pattern`.*
289
+ - `--id <str>` (default: first column): column name containing sample IDs.
290
+ - `--scale` / `--no-scale`: toggle z-score scaling of features (help text: default = **True**).
291
+ - `--min_nc <int>` (default: `2`): minimum number of clusters to try.
292
+ - `--max_nc <int>` (default: `6`): maximum number of clusters to try.
293
+ - `--max_iter <int>` (default: `10`): maximum iterations for k-means.
294
+ - `--tol <float>` (default: `1e-4`): convergence tolerance for centroid updates.
295
+ - `--print_result`: print intermediate KL scores and cluster counts.
296
+ - `--input_sep <str>` (default: auto): input delimiter (e.g. `,` or `\t`); auto-detected if unset.
297
+ - `--output_sep <str>` (default: auto): output delimiter; inferred from filename if unset.
298
+
299
+ - **nmf**
300
+ - `-i/--input <CSV/TSV>` (required): matrix to factorize; first column should be sample names (index).
301
+ - `-o/--output <DIR>` (required): directory to save results.
302
+ - `--kmin <int>` (default: `2`): minimum `k` (inclusive).
303
+ - `--kmax <int>` (default: `8`): maximum `k` (inclusive).
304
+ - `--features <spec>`: 1-based inclusive selection of feature columns (e.g. `2-10` or `1:5`), typically cell-type columns.
305
+ - `--log1p`: apply `log1p` to the input (useful for counts).
306
+ - `--normalize`: L1 row normalization (each sample sums to 1).
307
+ - `--shift <float>` (default: `None`): if data contain negatives, add a constant to make all values non-negative.
308
+ - `--random-state <int>` (default: `42`): random seed for NMF.
309
+ - `--max-iter <int>` (default: `1000`): NMF max iterations.
310
+ - `--skip_k_2`: skip evaluating `k = 2` when searching for the best `k`.
311
+
312
+ ### Ligand–receptor
313
+ - **LR_cal**
314
+ - `-i/--input <CSV/TSV>` (required): expression matrix (genes × samples).
315
+ - `-o/--output <CSV/TSV>` (required): file to save LR scores.
316
+ - `--data_type {count|tpm}` (default: `tpm`): type of the input matrix.
317
+ - `--id_type <str>` (default: `ensembl`): gene ID type expected by the LR backend.
318
+ - `--cancer_type <str>` (default: `pancan`): cancer-type network to use.
319
+ - `--verbose`: verbose logging.
320
+
321
+ ---
322
+
323
+ ## Troubleshooting
324
+
325
+ - **Wrong input orientation**
326
+ Deconvolution commands expect **genes × samples**. For `deside`, `--transpose` can be helpful depending on your file.
327
+
328
+ - **Mixed separators / encoding**
329
+ Prefer `.csv` , `.txt` or `.tsv` consistently. Auto‑detection works in most subcommands but you can override with explicit flags where provided.
330
+
331
+ - **DeSide model missing**
332
+ The `deside` subcommand requires pretrained model files. If you get errors like `FileNotFoundError: DeSide_model not found` , download the official model archive from:
333
+ https://figshare.com/articles/dataset/DeSide_model/25117862/1?file=44330255
334
+
335
+ ---
336
+
337
+ ## Citation & acknowledgments
338
+
339
+ This toolkit implements or wraps well‑known methods (CIBERSORT, quanTIseq, EPIC, ESTIMATE, MCPcounter, DeSide, etc.). For academic use, please cite the corresponding original papers in addition to this package.
340
+
341
+ ---
342
+
343
+ ## License
344
+
345
+ MIT License
346
+
347
+ Copyright (c) 2024 Dongqiang Zeng
348
+
349
+ Permission is hereby granted, free of charge, to any person obtaining a copy
350
+ of this software and associated documentation files (the "Software"), to deal
351
+ in the Software without restriction, including without limitation the rights
352
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
353
+ copies of the Software, and to permit persons to whom the Software is
354
+ furnished to do so, subject to the following conditions:
355
+
356
+ The above copyright notice and this permission notice shall be included in all
357
+ copies or substantial portions of the Software.
358
+
359
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
360
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
361
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
362
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
363
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
364
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
365
+ SOFTWARE.