SigProfilerExtractor 1.1.24__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/MANIFEST.in +1 -0
- {SigProfilerExtractor-1.1.24/SigProfilerExtractor.egg-info → sigprofilerextractor-1.2.0}/PKG-INFO +65 -43
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/README.md +42 -41
- sigprofilerextractor-1.2.0/SigProfilerExtractor/controllers/cli_controller.py +266 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/nmf_cpu.py +4 -4
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/nmf_gpu.py +4 -4
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/sigpro.py +4 -0
- sigprofilerextractor-1.2.0/SigProfilerExtractor/sigprofilerextractor_cli.py +34 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/subroutines.py +25 -15
- sigprofilerextractor-1.2.0/SigProfilerExtractor/version.py +7 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0/SigProfilerExtractor.egg-info}/PKG-INFO +65 -43
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor.egg-info/SOURCES.txt +3 -0
- sigprofilerextractor-1.2.0/SigProfilerExtractor.egg-info/entry_points.txt +2 -0
- sigprofilerextractor-1.2.0/SigProfilerExtractor.egg-info/requires.txt +11 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/setup.py +13 -10
- SigProfilerExtractor-1.1.24/SigProfilerExtractor/version.py +0 -7
- SigProfilerExtractor-1.1.24/SigProfilerExtractor.egg-info/requires.txt +0 -14
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/LICENSE.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/__init__.py +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/CNVInput/Battenberg_test.tsv +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/CSVInput/csv_example.csv +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/MatObjInput/21_breast_WGS_substitutions.mat +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/ReferenceFiles/CNV_features.tsv +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/ReferenceFiles/CN_classes_dictionary.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/TextInput/Samples_CNV.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/TextInput/Samples_DBS.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/TextInput/Samples_ID.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/TextInput/Samples_SBS.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/TextInput/Samples_SV.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/VCFInput/PD3851a.vcf +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/VCFInput/PD3890a.vcf +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/VCFInput/PD3904a.vcf +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/VCFInput/PD3905a.vcf +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/data/VCFInput/PD3945a.vcf +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor/estimate_best_solution.py +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor.egg-info/dependency_links.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor.egg-info/not-zip-safe +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/SigProfilerExtractor.egg-info/top_level.txt +0 -0
- {SigProfilerExtractor-1.1.24 → sigprofilerextractor-1.2.0}/setup.cfg +0 -0
{SigProfilerExtractor-1.1.24/SigProfilerExtractor.egg-info → sigprofilerextractor-1.2.0}/PKG-INFO
RENAMED
|
@@ -1,17 +1,38 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: SigProfilerExtractor
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Extracts mutational signatures from mutational catalogues
|
|
5
5
|
Home-page: https://github.com/AlexandrovLab/SigProfilerExtractor.git
|
|
6
6
|
Author: S Mishu Ashiqul Islam
|
|
7
7
|
Author-email: m0islam@ucsd.edu
|
|
8
8
|
License: UCSD
|
|
9
|
+
Requires-Python: >=3.9
|
|
9
10
|
Description-Content-Type: text/markdown
|
|
10
11
|
License-File: LICENSE.txt
|
|
12
|
+
Requires-Dist: scipy>=1.6.3
|
|
13
|
+
Requires-Dist: torch>=1.8.1
|
|
14
|
+
Requires-Dist: numpy>=2.0.0
|
|
15
|
+
Requires-Dist: pandas>=2.0.0
|
|
16
|
+
Requires-Dist: nimfa>=1.1.0
|
|
17
|
+
Requires-Dist: sigProfilerPlotting>=1.4.0
|
|
18
|
+
Requires-Dist: SigProfilerMatrixGenerator>=1.3.0
|
|
19
|
+
Requires-Dist: SigProfilerAssignment>=0.2.0
|
|
20
|
+
Requires-Dist: statsmodels>=0.9.0
|
|
21
|
+
Requires-Dist: scikit-learn>=0.24.2
|
|
22
|
+
Requires-Dist: psutil>=5.6.1
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: requires-dist
|
|
30
|
+
Dynamic: requires-python
|
|
31
|
+
Dynamic: summary
|
|
11
32
|
|
|
12
33
|
[](https://osf.io/t6j7u/wiki/home/)
|
|
13
34
|
[](https://opensource.org/licenses/BSD-2-Clause)
|
|
14
|
-
[](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor)
|
|
35
|
+
[](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor)
|
|
15
36
|
|
|
16
37
|
# SigProfilerExtractor
|
|
17
38
|
SigProfilerExtractor allows de novo extraction of mutational signatures from data generated in a matrix format.
|
|
@@ -104,43 +125,44 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
|
|
|
104
125
|
| Category | Parameter | Variable Type | Parameter Description |
|
|
105
126
|
| --------- | --------------------- | -------- |-------- |
|
|
106
127
|
| **Input Data** | | | |
|
|
107
|
-
| | **input_type** | String | The type of input:<br><ul><li
|
|
128
|
+
| | **input_type** | String | The type of input:<br><ul><li>`"vcf"`: used for vcf format inputs.</li><li>`"matrix"`: used for table format inputs using a tab separated file.</li><li>`"bedpe"`: used for bedpe files with each SV annotated with its type, size bin, and clustered/non-clustered status. Please check the required format at https://github.com/AlexandrovLab/SigProfilerMatrixGenerator#structural-variant-matrix-generation.</li><li>`"seg:TYPE"`: used for a multi-sample segmentation file for copy number analysis. Please check the required format at https://github.com/AlexandrovLab/SigProfilerMatrixGenerator#copy-number-matrix-generation. The accepted callers for TYPE are the following {"ASCAT", "ASCAT_NGS", "SEQUENZA", "ABSOLUTE", "BATTENBERG", "FACETS", "PURPLE", "TCGA"}. For example, when using segmentation file from BATTENBERG then set input_type to "seg:BATTENBERG".</li></ul> |
|
|
108
129
|
| | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. |
|
|
109
|
-
| | **input_data** | String | <br>Path to input folder for input_type:<ul><li
|
|
110
|
-
| | **reference_genome** | String | The name of the reference genome
|
|
111
|
-
| | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures
|
|
112
|
-
| | **context_type** | String |
|
|
113
|
-
| | **exome** | Boolean | Defines if the exomes will be extracted
|
|
130
|
+
| | **input_data** | String | <br>Path to input folder for input_type:<ul><li>`vcf`</li><li>`bedpe`</li></ul>Path to file for input_type:<ul><li>`matrix`</li><li>`seg:TYPE`</li></ul> |
|
|
131
|
+
| | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. |
|
|
132
|
+
| | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, and `rn6`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
|
|
133
|
+
| | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. |
|
|
134
|
+
| | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). |
|
|
114
135
|
| **NMF Replicates** | | | |
|
|
115
|
-
| | **minimum_signatures** | Positive Integer | The minimum number of signatures to be extracted
|
|
116
|
-
| | **maximum_signatures** | Positive Integer | The maximum number of signatures to be extracted
|
|
117
|
-
| | **nmf_replicates** | Positive Integer | The number of iteration to be performed to extract each number signature
|
|
118
|
-
| | **resample** | Boolean |
|
|
119
|
-
| | **seeds** | String |
|
|
136
|
+
| | **minimum_signatures** | Positive Integer | The minimum number of signatures to be extracted (default: `1`). |
|
|
137
|
+
| | **maximum_signatures** | Positive Integer | The maximum number of signatures to be extracted (default: `25`). |
|
|
138
|
+
| | **nmf_replicates** | Positive Integer | The number of iteration to be performed to extract each number signature (default: `100`). |
|
|
139
|
+
| | **resample** | Boolean | If `True`, add poisson noise to samples by resampling (default: `True`). |
|
|
140
|
+
| | **seeds** | String | Ensures reproducible NMF replicate resamples. Provide the path to the `Seeds.txt` file (found in the results folder from a previous analysis) to reproduce results (default: `"random"`). |
|
|
120
141
|
| **NMF Engines** | | | |
|
|
121
|
-
| | **matrix_normalization** | String | Method of normalizing the genome matrix before it is analyzed by NMF
|
|
122
|
-
| | **nmf_init** | String | The initialization algorithm for W and H matrix of NMF. Options are
|
|
123
|
-
| | **precision** | String | Values should be single or double
|
|
124
|
-
| | **min_nmf_iterations** | Integer | Value defines the minimum number of iterations to be completed before NMF converges
|
|
125
|
-
| | **max_nmf_iterations** | Integer | Value defines the maximum number of iterations to be completed before NMF converges
|
|
126
|
-
| | **nmf_test_conv** | Integer | Value defines the number number of iterations to done between checking next convergence
|
|
127
|
-
| | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge
|
|
142
|
+
| | **matrix_normalization** | String | Method of normalizing the genome matrix before it is analyzed by NMF (default: `"gmm"`). Options are, `"log2"`, `"custom"` or `"none"`. |
|
|
143
|
+
| | **nmf_init** | String | The initialization algorithm for W and H matrix of NMF (default: `"random"`). Options are `"random"`, `"nndsvd"`, `"nndsvda"`, `"nndsvdar"` and `"nndsvd_min"`. |
|
|
144
|
+
| | **precision** | String | Values should be single or double (default: `"single"`). |
|
|
145
|
+
| | **min_nmf_iterations** | Integer | Value defines the minimum number of iterations to be completed before NMF converges (default: `10000`). |
|
|
146
|
+
| | **max_nmf_iterations** | Integer | Value defines the maximum number of iterations to be completed before NMF converges (default: `1000000`). |
|
|
147
|
+
| | **nmf_test_conv** | Integer | Value defines the number number of iterations to done between checking next convergence (default: `10000`). |
|
|
148
|
+
| | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).|
|
|
128
149
|
| **Execution** | | | |
|
|
129
|
-
| | **cpu** | Integer | The number of processors to be used to extract the signatures
|
|
130
|
-
| | **gpu** | Boolean | Defines if the GPU resource will used if available
|
|
131
|
-
| | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing.
|
|
150
|
+
| | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). |
|
|
151
|
+
| | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*|
|
|
152
|
+
| | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*|
|
|
132
153
|
| **Solution Estimation Thresholds** | | | |
|
|
133
|
-
| | **stability** | Float |
|
|
134
|
-
| | **min_stability** | Float |
|
|
135
|
-
| | **combined_stability** | Float |
|
|
136
|
-
| | **allow_stability_drop** | Boolean |
|
|
154
|
+
| | **stability** | Float | The cutoff thresh-hold of the average stability (default: `0.8`). Solutions with average stabilities below this thresh-hold will not be considered. |
|
|
155
|
+
| | **min_stability** | Float | The cutoff thresh-hold of the minimum stability (default: `0.2`). Solutions with minimum stabilities below this thresh-hold will not be considered. |
|
|
156
|
+
| | **combined_stability** | Float | The cutoff thresh-hold of the combined stability (sum of average and minimum stability) (default: `1.0`). Solutions with combined stabilities below this thresh-hold will not be considered. |
|
|
157
|
+
| | **allow_stability_drop** | Boolean | Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered (default: `False`). |
|
|
137
158
|
| **Decomposition** | | | |
|
|
138
|
-
| | **cosmic_version** | Float | Takes a positive float among 1
|
|
139
|
-
| | **make_decomposition_plots** | Boolean |
|
|
140
|
-
| | **collapse_to_SBS96** | Boolean |
|
|
159
|
+
| | **cosmic_version** | Float | Defines the version of the COSMIC reference signatures (default: `3.4`). Takes a positive float among `1`, `2`, `3`, `3.1`, `3.2`, `3.3`, and `3.4`.|
|
|
160
|
+
| | **make_decomposition_plots** | Boolean | Generate de novo to COSMIC signature decomposition plots as part of the results (default: `True`). Set to `False` to skip generating these plots. |
|
|
161
|
+
| | **collapse_to_SBS96** | Boolean | If `True`, SBS288 and SBS1536 de novo signatures will be mapped to SBS96 reference signatures (default: `True`). If `False`, those will be mapped to reference signatures of the same context.
|
|
141
162
|
| **Others** | | | |
|
|
142
|
-
| | **get_all_signature_matrices** | Boolean |
|
|
143
|
-
| | **export_probabilities** | Boolean |
|
|
163
|
+
| | **get_all_signature_matrices** | Boolean | Write to output Ws and Hs from all the NMF iterations (default: `False`) |
|
|
164
|
+
| | **export_probabilities** | Boolean | Create the probability matrix (default: `True`). |
|
|
165
|
+
| | **volume** | String | Path to the volume for writing and loading reference genomes, plotting templates, and COSMIC signature plots (default: `None`). Environmental variables take precedence: `SIGPROFILERMATRIXGENERATOR_VOLUME`, `SIGPROFILERPLOTTING_VOLUME`, and `SIGPROFILERASSIGNMENT_VOLUME`. |
|
|
144
166
|
|
|
145
167
|
#### sigProfilerExtractor Example
|
|
146
168
|
VCF Files as Input
|
|
@@ -191,16 +213,16 @@ estimate_solution(base_csvfile="All_solutions_stat.csv",
|
|
|
191
213
|
|
|
192
214
|
| Parameter | Variable Type | Parameter Description |
|
|
193
215
|
| --------------------- | -------- |-------- |
|
|
194
|
-
| **base_csvfile** | String | Default is "All_solutions_stat.csv"
|
|
195
|
-
| **All_solution** | String | Default is "All_Solutions"
|
|
196
|
-
| **genomes** | String | Default is Samples.txt
|
|
197
|
-
| **output** | String | Default is "results"
|
|
198
|
-
| **title** | String | Default is "Selection_Plot"
|
|
216
|
+
| **base_csvfile** | String | Default is `"All_solutions_stat.csv"`. Path to a CSV file that contains the statistics of all solutions. |
|
|
217
|
+
| **All_solution** | String | Default is `"All_Solutions"`. Path to a folder that contains the results of all solutions. |
|
|
218
|
+
| **genomes** | String | Default is `"Samples.txt"`. Path to a tab delimilted file that contains the mutation counts for all genomes given to different mutation types. |
|
|
219
|
+
| **output** | String | Default is `"results"`. Path to the output folder. |
|
|
220
|
+
| **title** | String | Default is `"Selection_Plot"`. This sets the title of the selection_plot.pdf |
|
|
199
221
|
| **stability** | Float | Default is 0.8. The cutoff thresh-hold of the average stability. Solutions with average stabilities below this thresh-hold will not be considered. |
|
|
200
|
-
| **min_stability** | Float | Default is 0.2
|
|
201
|
-
| **combined_stability** | Float | Default is 1.0
|
|
202
|
-
| **allow_stability_drop** | Boolean | Default is False
|
|
203
|
-
| **exome** | Boolean | Default is
|
|
222
|
+
| **min_stability** | Float | Default is `0.2`. The cutoff thresh-hold of the minimum stability. Solutions with minimum stabilities below this thresh-hold will not be considered. |
|
|
223
|
+
| **combined_stability** | Float | Default is `1.0`. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. |
|
|
224
|
+
| **allow_stability_drop** | Boolean | Default is `False`. Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered. |
|
|
225
|
+
| **exome** | Boolean | Default is `False`. Defines if exomes samples are used. |
|
|
204
226
|
|
|
205
227
|
|
|
206
228
|
#### Estimation of the Optimum Solution Example
|
|
@@ -227,7 +249,7 @@ The files below will be generated in the output folder:
|
|
|
227
249
|
|
|
228
250
|
### <a name="decompose"></a> Decompose
|
|
229
251
|
|
|
230
|
-
For decomposition of
|
|
252
|
+
For decomposition of de novo signatures please use [SigProfilerAssignment](https://github.com/AlexandrovLab/SigProfilerAssignment)
|
|
231
253
|
|
|
232
254
|
### <a name="plotActivity"></a> Activity Stacked Bar Plot
|
|
233
255
|
Generates a stacked bar plot showing activities in individuals
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[](https://osf.io/t6j7u/wiki/home/)
|
|
2
2
|
[](https://opensource.org/licenses/BSD-2-Clause)
|
|
3
|
-
[](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor)
|
|
3
|
+
[](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor)
|
|
4
4
|
|
|
5
5
|
# SigProfilerExtractor
|
|
6
6
|
SigProfilerExtractor allows de novo extraction of mutational signatures from data generated in a matrix format.
|
|
@@ -93,43 +93,44 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
|
|
|
93
93
|
| Category | Parameter | Variable Type | Parameter Description |
|
|
94
94
|
| --------- | --------------------- | -------- |-------- |
|
|
95
95
|
| **Input Data** | | | |
|
|
96
|
-
| | **input_type** | String | The type of input:<br><ul><li
|
|
96
|
+
| | **input_type** | String | The type of input:<br><ul><li>`"vcf"`: used for vcf format inputs.</li><li>`"matrix"`: used for table format inputs using a tab separated file.</li><li>`"bedpe"`: used for bedpe files with each SV annotated with its type, size bin, and clustered/non-clustered status. Please check the required format at https://github.com/AlexandrovLab/SigProfilerMatrixGenerator#structural-variant-matrix-generation.</li><li>`"seg:TYPE"`: used for a multi-sample segmentation file for copy number analysis. Please check the required format at https://github.com/AlexandrovLab/SigProfilerMatrixGenerator#copy-number-matrix-generation. The accepted callers for TYPE are the following {"ASCAT", "ASCAT_NGS", "SEQUENZA", "ABSOLUTE", "BATTENBERG", "FACETS", "PURPLE", "TCGA"}. For example, when using segmentation file from BATTENBERG then set input_type to "seg:BATTENBERG".</li></ul> |
|
|
97
97
|
| | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. |
|
|
98
|
-
| | **input_data** | String | <br>Path to input folder for input_type:<ul><li
|
|
99
|
-
| | **reference_genome** | String | The name of the reference genome
|
|
100
|
-
| | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures
|
|
101
|
-
| | **context_type** | String |
|
|
102
|
-
| | **exome** | Boolean | Defines if the exomes will be extracted
|
|
98
|
+
| | **input_data** | String | <br>Path to input folder for input_type:<ul><li>`vcf`</li><li>`bedpe`</li></ul>Path to file for input_type:<ul><li>`matrix`</li><li>`seg:TYPE`</li></ul> |
|
|
99
|
+
| | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. |
|
|
100
|
+
| | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, and `rn6`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
|
|
101
|
+
| | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. |
|
|
102
|
+
| | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). |
|
|
103
103
|
| **NMF Replicates** | | | |
|
|
104
|
-
| | **minimum_signatures** | Positive Integer | The minimum number of signatures to be extracted
|
|
105
|
-
| | **maximum_signatures** | Positive Integer | The maximum number of signatures to be extracted
|
|
106
|
-
| | **nmf_replicates** | Positive Integer | The number of iteration to be performed to extract each number signature
|
|
107
|
-
| | **resample** | Boolean |
|
|
108
|
-
| | **seeds** | String |
|
|
104
|
+
| | **minimum_signatures** | Positive Integer | The minimum number of signatures to be extracted (default: `1`). |
|
|
105
|
+
| | **maximum_signatures** | Positive Integer | The maximum number of signatures to be extracted (default: `25`). |
|
|
106
|
+
| | **nmf_replicates** | Positive Integer | The number of iteration to be performed to extract each number signature (default: `100`). |
|
|
107
|
+
| | **resample** | Boolean | If `True`, add poisson noise to samples by resampling (default: `True`). |
|
|
108
|
+
| | **seeds** | String | Ensures reproducible NMF replicate resamples. Provide the path to the `Seeds.txt` file (found in the results folder from a previous analysis) to reproduce results (default: `"random"`). |
|
|
109
109
|
| **NMF Engines** | | | |
|
|
110
|
-
| | **matrix_normalization** | String | Method of normalizing the genome matrix before it is analyzed by NMF
|
|
111
|
-
| | **nmf_init** | String | The initialization algorithm for W and H matrix of NMF. Options are
|
|
112
|
-
| | **precision** | String | Values should be single or double
|
|
113
|
-
| | **min_nmf_iterations** | Integer | Value defines the minimum number of iterations to be completed before NMF converges
|
|
114
|
-
| | **max_nmf_iterations** | Integer | Value defines the maximum number of iterations to be completed before NMF converges
|
|
115
|
-
| | **nmf_test_conv** | Integer | Value defines the number number of iterations to done between checking next convergence
|
|
116
|
-
| | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge
|
|
110
|
+
| | **matrix_normalization** | String | Method of normalizing the genome matrix before it is analyzed by NMF (default: `"gmm"`). Options are, `"log2"`, `"custom"` or `"none"`. |
|
|
111
|
+
| | **nmf_init** | String | The initialization algorithm for W and H matrix of NMF (default: `"random"`). Options are `"random"`, `"nndsvd"`, `"nndsvda"`, `"nndsvdar"` and `"nndsvd_min"`. |
|
|
112
|
+
| | **precision** | String | Values should be single or double (default: `"single"`). |
|
|
113
|
+
| | **min_nmf_iterations** | Integer | Value defines the minimum number of iterations to be completed before NMF converges (default: `10000`). |
|
|
114
|
+
| | **max_nmf_iterations** | Integer | Value defines the maximum number of iterations to be completed before NMF converges (default: `1000000`). |
|
|
115
|
+
| | **nmf_test_conv** | Integer | Value defines the number number of iterations to done between checking next convergence (default: `10000`). |
|
|
116
|
+
| | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).|
|
|
117
117
|
| **Execution** | | | |
|
|
118
|
-
| | **cpu** | Integer | The number of processors to be used to extract the signatures
|
|
119
|
-
| | **gpu** | Boolean | Defines if the GPU resource will used if available
|
|
120
|
-
| | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing.
|
|
118
|
+
| | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). |
|
|
119
|
+
| | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*|
|
|
120
|
+
| | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*|
|
|
121
121
|
| **Solution Estimation Thresholds** | | | |
|
|
122
|
-
| | **stability** | Float |
|
|
123
|
-
| | **min_stability** | Float |
|
|
124
|
-
| | **combined_stability** | Float |
|
|
125
|
-
| | **allow_stability_drop** | Boolean |
|
|
122
|
+
| | **stability** | Float | The cutoff thresh-hold of the average stability (default: `0.8`). Solutions with average stabilities below this thresh-hold will not be considered. |
|
|
123
|
+
| | **min_stability** | Float | The cutoff thresh-hold of the minimum stability (default: `0.2`). Solutions with minimum stabilities below this thresh-hold will not be considered. |
|
|
124
|
+
| | **combined_stability** | Float | The cutoff thresh-hold of the combined stability (sum of average and minimum stability) (default: `1.0`). Solutions with combined stabilities below this thresh-hold will not be considered. |
|
|
125
|
+
| | **allow_stability_drop** | Boolean | Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered (default: `False`). |
|
|
126
126
|
| **Decomposition** | | | |
|
|
127
|
-
| | **cosmic_version** | Float | Takes a positive float among 1
|
|
128
|
-
| | **make_decomposition_plots** | Boolean |
|
|
129
|
-
| | **collapse_to_SBS96** | Boolean |
|
|
127
|
+
| | **cosmic_version** | Float | Defines the version of the COSMIC reference signatures (default: `3.4`). Takes a positive float among `1`, `2`, `3`, `3.1`, `3.2`, `3.3`, and `3.4`.|
|
|
128
|
+
| | **make_decomposition_plots** | Boolean | Generate de novo to COSMIC signature decomposition plots as part of the results (default: `True`). Set to `False` to skip generating these plots. |
|
|
129
|
+
| | **collapse_to_SBS96** | Boolean | If `True`, SBS288 and SBS1536 de novo signatures will be mapped to SBS96 reference signatures (default: `True`). If `False`, those will be mapped to reference signatures of the same context.
|
|
130
130
|
| **Others** | | | |
|
|
131
|
-
| | **get_all_signature_matrices** | Boolean |
|
|
132
|
-
| | **export_probabilities** | Boolean |
|
|
131
|
+
| | **get_all_signature_matrices** | Boolean | Write to output Ws and Hs from all the NMF iterations (default: `False`) |
|
|
132
|
+
| | **export_probabilities** | Boolean | Create the probability matrix (default: `True`). |
|
|
133
|
+
| | **volume** | String | Path to the volume for writing and loading reference genomes, plotting templates, and COSMIC signature plots (default: `None`). Environmental variables take precedence: `SIGPROFILERMATRIXGENERATOR_VOLUME`, `SIGPROFILERPLOTTING_VOLUME`, and `SIGPROFILERASSIGNMENT_VOLUME`. |
|
|
133
134
|
|
|
134
135
|
#### sigProfilerExtractor Example
|
|
135
136
|
VCF Files as Input
|
|
@@ -180,16 +181,16 @@ estimate_solution(base_csvfile="All_solutions_stat.csv",
|
|
|
180
181
|
|
|
181
182
|
| Parameter | Variable Type | Parameter Description |
|
|
182
183
|
| --------------------- | -------- |-------- |
|
|
183
|
-
| **base_csvfile** | String | Default is "All_solutions_stat.csv"
|
|
184
|
-
| **All_solution** | String | Default is "All_Solutions"
|
|
185
|
-
| **genomes** | String | Default is Samples.txt
|
|
186
|
-
| **output** | String | Default is "results"
|
|
187
|
-
| **title** | String | Default is "Selection_Plot"
|
|
184
|
+
| **base_csvfile** | String | Default is `"All_solutions_stat.csv"`. Path to a CSV file that contains the statistics of all solutions. |
|
|
185
|
+
| **All_solution** | String | Default is `"All_Solutions"`. Path to a folder that contains the results of all solutions. |
|
|
186
|
+
| **genomes** | String | Default is `"Samples.txt"`. Path to a tab delimilted file that contains the mutation counts for all genomes given to different mutation types. |
|
|
187
|
+
| **output** | String | Default is `"results"`. Path to the output folder. |
|
|
188
|
+
| **title** | String | Default is `"Selection_Plot"`. This sets the title of the selection_plot.pdf |
|
|
188
189
|
| **stability** | Float | Default is 0.8. The cutoff thresh-hold of the average stability. Solutions with average stabilities below this thresh-hold will not be considered. |
|
|
189
|
-
| **min_stability** | Float | Default is 0.2
|
|
190
|
-
| **combined_stability** | Float | Default is 1.0
|
|
191
|
-
| **allow_stability_drop** | Boolean | Default is False
|
|
192
|
-
| **exome** | Boolean | Default is
|
|
190
|
+
| **min_stability** | Float | Default is `0.2`. The cutoff thresh-hold of the minimum stability. Solutions with minimum stabilities below this thresh-hold will not be considered. |
|
|
191
|
+
| **combined_stability** | Float | Default is `1.0`. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. |
|
|
192
|
+
| **allow_stability_drop** | Boolean | Default is `False`. Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered. |
|
|
193
|
+
| **exome** | Boolean | Default is `False`. Defines if exomes samples are used. |
|
|
193
194
|
|
|
194
195
|
|
|
195
196
|
#### Estimation of the Optimum Solution Example
|
|
@@ -216,7 +217,7 @@ The files below will be generated in the output folder:
|
|
|
216
217
|
|
|
217
218
|
### <a name="decompose"></a> Decompose
|
|
218
219
|
|
|
219
|
-
For decomposition of
|
|
220
|
+
For decomposition of de novo signatures please use [SigProfilerAssignment](https://github.com/AlexandrovLab/SigProfilerAssignment)
|
|
220
221
|
|
|
221
222
|
### <a name="plotActivity"></a> Activity Stacked Bar Plot
|
|
222
223
|
Generates a stacked bar plot showing activities in individuals
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from typing import List
|
|
3
|
+
from SigProfilerExtractor import sigpro
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def str2bool(v):
|
|
7
|
+
if isinstance(v, bool):
|
|
8
|
+
return v
|
|
9
|
+
if v.lower() in ("yes", "true", "t", "y", "1"):
|
|
10
|
+
return True
|
|
11
|
+
elif v.lower() in ("no", "false", "f", "n", "0"):
|
|
12
|
+
return False
|
|
13
|
+
else:
|
|
14
|
+
raise argparse.ArgumentTypeError("Boolean value expected.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_arguments_extractor(args: List[str], description: str) -> argparse.Namespace:
|
|
18
|
+
parser = argparse.ArgumentParser(description=description)
|
|
19
|
+
|
|
20
|
+
# Core required arguments
|
|
21
|
+
input_type_help = (
|
|
22
|
+
"The input file type: 'vcf', 'matrix', 'bedpe', or 'seg:TYPE'. "
|
|
23
|
+
"Accepted callers for TYPE: {'ASCAT', 'ASCAT_NGS', 'SEQUENZA', "
|
|
24
|
+
"'ABSOLUTE', 'BATTENBERG', 'FACETS', 'PURPLE', 'TCGA'}."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"input_type",
|
|
29
|
+
help=input_type_help,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"output",
|
|
34
|
+
help="Path to the output folder.",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
input_data_help = (
|
|
38
|
+
"Path to input data. For 'vcf' or 'bedpe', provide an input folder. "
|
|
39
|
+
"For 'matrix' or 'seg:TYPE', provide an input file."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"input_data",
|
|
44
|
+
help=input_data_help,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Optional arguments with defaults
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--reference_genome",
|
|
50
|
+
default="GRCh37",
|
|
51
|
+
help="Reference genome (default: 'GRCh37'). This parameter is applicable only if the input_type is 'vcf'.",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--opportunity_genome",
|
|
55
|
+
default="GRCh37",
|
|
56
|
+
help="The build or version of the reference genome for the reference signatures (default: 'GRCh37'). When the input type is 'vcf' the value for 'opportunity_genome' will be used instead.",
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--context_type",
|
|
60
|
+
default="default",
|
|
61
|
+
help="Mutational context types (default: '96,DINUC,ID').",
|
|
62
|
+
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"--exome",
|
|
65
|
+
type=str2bool,
|
|
66
|
+
nargs="?",
|
|
67
|
+
const=True,
|
|
68
|
+
default=False,
|
|
69
|
+
help="Extract exomes (default: False).",
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--minimum_signatures",
|
|
73
|
+
type=int,
|
|
74
|
+
default=1,
|
|
75
|
+
help="Minimum number of signatures to be extracted (default: 1).",
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"--maximum_signatures",
|
|
79
|
+
type=int,
|
|
80
|
+
default=10,
|
|
81
|
+
help="Maximum number of signatures to be extracted (default: 10).",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--nmf_replicates",
|
|
85
|
+
type=int,
|
|
86
|
+
default=100,
|
|
87
|
+
help="Number of NMF replicates to be performed at each rank using W and H (default: 100).",
|
|
88
|
+
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--resample",
|
|
91
|
+
type=str2bool,
|
|
92
|
+
nargs="?",
|
|
93
|
+
const=True,
|
|
94
|
+
default=True,
|
|
95
|
+
help="Add poisson noise to samples by resampling (default: True).",
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--seeds",
|
|
99
|
+
default="random",
|
|
100
|
+
help="Seeds for reproducible resamples, file path or 'random' (default: 'random').",
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
"--batch_size",
|
|
104
|
+
type=int,
|
|
105
|
+
default=1,
|
|
106
|
+
help="Batch size is for GPU only and defines the number of NMF replicates to be performed by each CPU during parallel processing (default: 1).",
|
|
107
|
+
)
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
"--cpu",
|
|
110
|
+
type=int,
|
|
111
|
+
default=-1,
|
|
112
|
+
help="Number of processors to use (default: all available).",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--gpu",
|
|
116
|
+
type=str2bool,
|
|
117
|
+
nargs="?",
|
|
118
|
+
const=True,
|
|
119
|
+
default=False,
|
|
120
|
+
help="Use GPU if available (default: False). note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the 'cpu' parameter.",
|
|
121
|
+
)
|
|
122
|
+
parser.add_argument(
|
|
123
|
+
"--nmf_init",
|
|
124
|
+
default="random",
|
|
125
|
+
help="The initialization algorithm for W and H matrix of NMF (default: 'random'). Options are 'random', 'nndsvd', 'nndsvda', 'nndsvdar' and 'nndsvd_min'.",
|
|
126
|
+
)
|
|
127
|
+
parser.add_argument(
|
|
128
|
+
"--precision",
|
|
129
|
+
default="single",
|
|
130
|
+
help="Precision for calculations (default: 'single'). Options are 'single' and 'double'.",
|
|
131
|
+
)
|
|
132
|
+
parser.add_argument(
|
|
133
|
+
"--matrix_normalization",
|
|
134
|
+
default="gmm",
|
|
135
|
+
help="Method of normalizing the genome matrix before it is analyzed by NMF (default: 'gmm'). Options are 'custom', 'gmm', 'log2', or 'none'.",
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"--min_nmf_iterations",
|
|
139
|
+
type=int,
|
|
140
|
+
default=10000,
|
|
141
|
+
help="Minimum NMF iterations (default: 10000).",
|
|
142
|
+
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--max_nmf_iterations",
|
|
145
|
+
type=int,
|
|
146
|
+
default=1000000,
|
|
147
|
+
help="Maximum NMF iterations (default: 1000000).",
|
|
148
|
+
)
|
|
149
|
+
parser.add_argument(
|
|
150
|
+
"--nmf_test_conv",
|
|
151
|
+
type=int,
|
|
152
|
+
default=10000,
|
|
153
|
+
help="Test convergence every X iterations (default: 10000).",
|
|
154
|
+
)
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"--nmf_tolerance",
|
|
157
|
+
type=float,
|
|
158
|
+
default=1e-15,
|
|
159
|
+
help="NMF tolerance for convergence (default: 1e-15).",
|
|
160
|
+
)
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"--get_all_signature_matrices",
|
|
163
|
+
type=str2bool,
|
|
164
|
+
nargs="?",
|
|
165
|
+
const=True,
|
|
166
|
+
default=False,
|
|
167
|
+
help="Get all NMF matrices (default: False).",
|
|
168
|
+
)
|
|
169
|
+
parser.add_argument(
|
|
170
|
+
"--export_probabilities",
|
|
171
|
+
type=str2bool,
|
|
172
|
+
nargs="?",
|
|
173
|
+
const=True,
|
|
174
|
+
default=True,
|
|
175
|
+
help="Export probability matrix (default: True).",
|
|
176
|
+
)
|
|
177
|
+
parser.add_argument(
|
|
178
|
+
"--stability",
|
|
179
|
+
type=float,
|
|
180
|
+
default=0.8,
|
|
181
|
+
help="Average stability cutoff (default: 0.8).",
|
|
182
|
+
)
|
|
183
|
+
parser.add_argument(
|
|
184
|
+
"--min_stability",
|
|
185
|
+
type=float,
|
|
186
|
+
default=0.2,
|
|
187
|
+
help="Minimum stability cutoff (default: 0.2).",
|
|
188
|
+
)
|
|
189
|
+
parser.add_argument(
|
|
190
|
+
"--combined_stability",
|
|
191
|
+
type=float,
|
|
192
|
+
default=1.0,
|
|
193
|
+
help="Combined stability cutoff (default: 1.0).",
|
|
194
|
+
)
|
|
195
|
+
parser.add_argument(
|
|
196
|
+
"--allow_stability_drop",
|
|
197
|
+
type=str2bool,
|
|
198
|
+
nargs="?",
|
|
199
|
+
const=True,
|
|
200
|
+
default=False,
|
|
201
|
+
help="Allow stability drop (default: False).",
|
|
202
|
+
)
|
|
203
|
+
parser.add_argument(
|
|
204
|
+
"--cosmic_version",
|
|
205
|
+
type=float,
|
|
206
|
+
default=3.4,
|
|
207
|
+
help="COSMIC version for reference signatures. Valid values are 1, 2, 3, 3.1, 3.2, 3.3, and 3.4 (default: 3.4).",
|
|
208
|
+
)
|
|
209
|
+
parser.add_argument(
|
|
210
|
+
"--make_decomposition_plots",
|
|
211
|
+
type=str2bool,
|
|
212
|
+
nargs="?",
|
|
213
|
+
const=True,
|
|
214
|
+
default=True,
|
|
215
|
+
help="Generate decomposition plots (default: True).",
|
|
216
|
+
)
|
|
217
|
+
parser.add_argument(
|
|
218
|
+
"--collapse_to_SBS96",
|
|
219
|
+
type=str2bool,
|
|
220
|
+
nargs="?",
|
|
221
|
+
const=True,
|
|
222
|
+
default=True,
|
|
223
|
+
help="Collapse to SBS288 and SBS1536 matrices to SBS96. If False, will map reference signatures to the same context as input (default: True).",
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return parser.parse_args(args)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class CliController:
|
|
230
|
+
def dispatch_sigProfilerExtractor(self, user_args: List[str]) -> None:
|
|
231
|
+
parsed_args = parse_arguments_extractor(
|
|
232
|
+
user_args, "Extract mutational signatures from input samples."
|
|
233
|
+
)
|
|
234
|
+
sigpro.sigProfilerExtractor(
|
|
235
|
+
input_type=parsed_args.input_type,
|
|
236
|
+
output=parsed_args.output,
|
|
237
|
+
input_data=parsed_args.input_data,
|
|
238
|
+
reference_genome=parsed_args.reference_genome,
|
|
239
|
+
opportunity_genome=parsed_args.opportunity_genome,
|
|
240
|
+
context_type=parsed_args.context_type,
|
|
241
|
+
exome=parsed_args.exome,
|
|
242
|
+
minimum_signatures=parsed_args.minimum_signatures,
|
|
243
|
+
maximum_signatures=parsed_args.maximum_signatures,
|
|
244
|
+
nmf_replicates=parsed_args.nmf_replicates,
|
|
245
|
+
resample=parsed_args.resample,
|
|
246
|
+
seeds=parsed_args.seeds,
|
|
247
|
+
batch_size=parsed_args.batch_size,
|
|
248
|
+
cpu=parsed_args.cpu,
|
|
249
|
+
gpu=parsed_args.gpu,
|
|
250
|
+
nmf_init=parsed_args.nmf_init,
|
|
251
|
+
precision=parsed_args.precision,
|
|
252
|
+
matrix_normalization=parsed_args.matrix_normalization,
|
|
253
|
+
min_nmf_iterations=parsed_args.min_nmf_iterations,
|
|
254
|
+
max_nmf_iterations=parsed_args.max_nmf_iterations,
|
|
255
|
+
nmf_test_conv=parsed_args.nmf_test_conv,
|
|
256
|
+
nmf_tolerance=parsed_args.nmf_tolerance,
|
|
257
|
+
get_all_signature_matrices=parsed_args.get_all_signature_matrices,
|
|
258
|
+
export_probabilities=parsed_args.export_probabilities,
|
|
259
|
+
stability=parsed_args.stability,
|
|
260
|
+
min_stability=parsed_args.min_stability,
|
|
261
|
+
combined_stability=parsed_args.combined_stability,
|
|
262
|
+
allow_stability_drop=parsed_args.allow_stability_drop,
|
|
263
|
+
cosmic_version=parsed_args.cosmic_version,
|
|
264
|
+
make_decomposition_plots=parsed_args.make_decomposition_plots,
|
|
265
|
+
collapse_to_SBS96=parsed_args.collapse_to_SBS96,
|
|
266
|
+
)
|
|
@@ -113,7 +113,7 @@ class NMF:
|
|
|
113
113
|
H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]])
|
|
114
114
|
nv = nndsvd.Nndsvd()
|
|
115
115
|
for i in range(self._V.shape[0]):
|
|
116
|
-
vin = np.
|
|
116
|
+
vin = np.asmatrix(self._V.cpu().numpy()[i])
|
|
117
117
|
W[i, :, :], H[i, :, :] = nv.initialize(
|
|
118
118
|
vin, self._rank, options={"flag": 0}
|
|
119
119
|
)
|
|
@@ -123,7 +123,7 @@ class NMF:
|
|
|
123
123
|
H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]])
|
|
124
124
|
nv = nndsvd.Nndsvd()
|
|
125
125
|
for i in range(self._V.shape[0]):
|
|
126
|
-
vin = np.
|
|
126
|
+
vin = np.asmatrix(self._V.cpu().numpy()[i])
|
|
127
127
|
W[i, :, :], H[i, :, :] = nv.initialize(
|
|
128
128
|
vin, self._rank, options={"flag": 1}
|
|
129
129
|
)
|
|
@@ -133,7 +133,7 @@ class NMF:
|
|
|
133
133
|
H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]])
|
|
134
134
|
nv = nndsvd.Nndsvd()
|
|
135
135
|
for i in range(self._V.shape[0]):
|
|
136
|
-
vin = np.
|
|
136
|
+
vin = np.asmatrix(self._V.cpu().numpy()[i])
|
|
137
137
|
W[i, :, :], H[i, :, :] = nv.initialize(
|
|
138
138
|
vin, self._rank, options={"flag": 2}
|
|
139
139
|
)
|
|
@@ -142,7 +142,7 @@ class NMF:
|
|
|
142
142
|
H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]])
|
|
143
143
|
nv = nndsvd.Nndsvd()
|
|
144
144
|
for i in range(self._V.shape[0]):
|
|
145
|
-
vin = np.
|
|
145
|
+
vin = np.asmatrix(self._V.cpu().numpy()[i])
|
|
146
146
|
w, h = nv.initialize(vin, self._rank, options={"flag": 2})
|
|
147
147
|
min_X = np.min(vin[vin > 0])
|
|
148
148
|
h[h <= min_X] = min_X
|