SigProfilerExtractor 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {sigprofilerextractor-1.2.2/SigProfilerExtractor.egg-info → sigprofilerextractor-1.2.4}/PKG-INFO +4 -3
  2. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/README.md +2 -1
  3. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/controllers/cli_controller.py +7 -0
  4. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/sigpro.py +7 -9
  5. sigprofilerextractor-1.2.4/SigProfilerExtractor/version.py +7 -0
  6. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4/SigProfilerExtractor.egg-info}/PKG-INFO +4 -3
  7. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/requires.txt +1 -1
  8. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/setup.py +3 -3
  9. sigprofilerextractor-1.2.2/SigProfilerExtractor/version.py +0 -7
  10. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/LICENSE.txt +0 -0
  11. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/MANIFEST.in +0 -0
  12. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/__init__.py +0 -0
  13. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/CNVInput/Battenberg_test.tsv +0 -0
  14. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/CSVInput/csv_example.csv +0 -0
  15. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/MatObjInput/21_breast_WGS_substitutions.mat +0 -0
  16. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/ReferenceFiles/CNV_features.tsv +0 -0
  17. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/ReferenceFiles/CN_classes_dictionary.txt +0 -0
  18. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/TextInput/Samples_CNV.txt +0 -0
  19. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/TextInput/Samples_DBS.txt +0 -0
  20. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/TextInput/Samples_ID.txt +0 -0
  21. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/TextInput/Samples_SBS.txt +0 -0
  22. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/TextInput/Samples_SV.txt +0 -0
  23. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/VCFInput/PD3851a.vcf +0 -0
  24. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/VCFInput/PD3890a.vcf +0 -0
  25. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/VCFInput/PD3904a.vcf +0 -0
  26. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/VCFInput/PD3905a.vcf +0 -0
  27. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/data/VCFInput/PD3945a.vcf +0 -0
  28. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/estimate_best_solution.py +0 -0
  29. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/nmf_cpu.py +0 -0
  30. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/nmf_gpu.py +0 -0
  31. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/sigprofilerextractor_cli.py +0 -0
  32. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor/subroutines.py +0 -0
  33. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/SOURCES.txt +0 -0
  34. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/dependency_links.txt +0 -0
  35. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/entry_points.txt +0 -0
  36. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/not-zip-safe +0 -0
  37. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/SigProfilerExtractor.egg-info/top_level.txt +0 -0
  38. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/pyproject.toml +0 -0
  39. {sigprofilerextractor-1.2.2 → sigprofilerextractor-1.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: SigProfilerExtractor
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Extracts mutational signatures from mutational catalogues
5
5
  Home-page: https://github.com/AlexandrovLab/SigProfilerExtractor.git
6
6
  Author: S Mishu Ashiqul Islam
@@ -16,7 +16,7 @@ Requires-Dist: pandas>=2.0.0
16
16
  Requires-Dist: nimfa>=1.1.0
17
17
  Requires-Dist: sigProfilerPlotting>=1.4.1
18
18
  Requires-Dist: SigProfilerMatrixGenerator>=1.3.5
19
- Requires-Dist: SigProfilerAssignment>=0.2.4
19
+ Requires-Dist: SigProfilerAssignment>=1.0.1
20
20
  Requires-Dist: statsmodels>=0.9.0
21
21
  Requires-Dist: scikit-learn>=0.24.2
22
22
  Requires-Dist: psutil>=5.6.1
@@ -130,7 +130,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
130
130
  | | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. |
131
131
  | | **input_data** | String | <br>Path to input folder for input_type:<ul><li>`vcf`</li><li>`bedpe`</li></ul>Path to file for input_type:<ul><li>`matrix`</li><li>`seg:TYPE`</li></ul> |
132
132
  | | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. |
133
- | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, and `rn6`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
133
+ | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, `mm39`, `rn6`, and `rn7`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
134
134
  | | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. |
135
135
  | | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). |
136
136
  | **NMF Replicates** | | | |
@@ -149,6 +149,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
149
149
  | | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).|
150
150
  | **Execution** | | | |
151
151
  | | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). |
152
+ | | **assignment_cpu** | Integer | The number of processors to be used for the final signature assignment step (default: all processors). This is independent of the `cpu` parameter. |
152
153
  | | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*|
153
154
  | | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*|
154
155
  | **Solution Estimation Thresholds** | | | |
@@ -97,7 +97,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
97
97
  | | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. |
98
98
  | | **input_data** | String | <br>Path to input folder for input_type:<ul><li>`vcf`</li><li>`bedpe`</li></ul>Path to file for input_type:<ul><li>`matrix`</li><li>`seg:TYPE`</li></ul> |
99
99
  | | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. |
100
- | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, and `rn6`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
100
+ | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, `mm39`, `rn6`, and `rn7`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
101
101
  | | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. |
102
102
  | | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). |
103
103
  | **NMF Replicates** | | | |
@@ -116,6 +116,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
116
116
  | | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).|
117
117
  | **Execution** | | | |
118
118
  | | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). |
119
+ | | **assignment_cpu** | Integer | The number of processors to be used for the final signature assignment step (default: all processors). This is independent of the `cpu` parameter. |
119
120
  | | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*|
120
121
  | | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*|
121
122
  | **Solution Estimation Thresholds** | | | |
@@ -111,6 +111,12 @@ def parse_arguments_extractor(args: List[str], description: str) -> argparse.Nam
111
111
  default=-1,
112
112
  help="Number of processors to use (default: all available).",
113
113
  )
114
+ parser.add_argument(
115
+ "--assignment_cpu",
116
+ type=int,
117
+ default=-1,
118
+ help="Number of processors to be used by SigProfilerAssignment for the final signature assignment step (default: all available). This is independent of the 'cpu' parameter.",
119
+ )
114
120
  parser.add_argument(
115
121
  "--gpu",
116
122
  type=str2bool,
@@ -261,6 +267,7 @@ class CliController:
261
267
  seeds=parsed_args.seeds,
262
268
  batch_size=parsed_args.batch_size,
263
269
  cpu=parsed_args.cpu,
270
+ assignment_cpu=parsed_args.assignment_cpu,
264
271
  gpu=parsed_args.gpu,
265
272
  nmf_init=parsed_args.nmf_init,
266
273
  precision=parsed_args.precision,
@@ -187,6 +187,7 @@ def record_parameters(sysdata, execution_parameters, start_time):
187
187
  )
188
188
  )
189
189
 
190
+ sysdata.write("\tassignment_cpu: {}\n".format(execution_parameters["assignment_cpu"]))
190
191
  sysdata.write("\tgpu: {}\n".format(execution_parameters["gpu"]))
191
192
  sysdata.write("Solution Estimation\n")
192
193
  sysdata.write("\tstability: {}\n".format(execution_parameters["stability"]))
@@ -248,6 +249,7 @@ def sigProfilerExtractor(
248
249
  resample=True,
249
250
  batch_size=1,
250
251
  cpu=-1,
252
+ assignment_cpu=-1,
251
253
  gpu=False,
252
254
  nmf_init="random",
253
255
  precision="single",
@@ -485,6 +487,7 @@ def sigProfilerExtractor(
485
487
  "maximum_signatures": maximum_signatures,
486
488
  "NMF_replicates": nmf_replicates,
487
489
  "cpu": cpu,
490
+ "assignment_cpu": assignment_cpu,
488
491
  "gpu": gpu,
489
492
  "batch_size": batch_size,
490
493
  "NMF_init": nmf_init,
@@ -1094,25 +1097,19 @@ def sigProfilerExtractor(
1094
1097
  devopts["make_decomposition_plots"] = make_decomposition_plots
1095
1098
 
1096
1099
  # Check if genome_build is available in COSMIC, if not reset to GRCh37
1097
- if (
1098
- genome_build == "GRCh37"
1099
- or genome_build == "GRCh38"
1100
- or genome_build == "mm9"
1101
- or genome_build == "mm10"
1102
- or genome_build == "rn6"
1103
- ):
1100
+ if genome_build in ["GRCh37", "GRCh38", "mm9", "mm10", "mm39", "rn6", "rn7"]:
1104
1101
  genome_build = genome_build
1105
1102
  else:
1106
1103
  sysdata = open(out_put + "/JOB_METADATA.txt", "a")
1107
1104
  sysdata.write(
1108
- "\n[{}] The selected opportunity genome is {}. COSMIC signatures are available only for GRCh37/38, mm9/10 and rn6 genomes. So, the opportunity genome is reset to GRCh37.\n".format(
1105
+ "\n[{}] The selected opportunity genome is {}. COSMIC signatures are available only for GRCh37/38, mm9/10/39 and rn6/7 genomes. So, the opportunity genome is reset to GRCh37.\n".format(
1109
1106
  str(datetime.datetime.now()).split(".")[0], str(genome_build)
1110
1107
  )
1111
1108
  )
1112
1109
  print(
1113
1110
  "The selected opportunity genome is "
1114
1111
  + str(genome_build)
1115
- + ". COSMIC signatures are available only for GRCh37/38, mm9/10 and rn6 genomes. So, the opportunity genome is reset to GRCh37."
1112
+ + ". COSMIC signatures are available only for GRCh37/38, mm9/10/39 and rn6/7 genomes. So, the opportunity genome is reset to GRCh37."
1116
1113
  )
1117
1114
  sysdata.close()
1118
1115
  genome_build = "GRCh37"
@@ -1132,6 +1129,7 @@ def sigProfilerExtractor(
1132
1129
  devopts=devopts,
1133
1130
  make_metadata=False,
1134
1131
  volume=volume,
1132
+ cpu=assignment_cpu,
1135
1133
  )
1136
1134
 
1137
1135
  sysdata = open(out_put + "/JOB_METADATA.txt", "a")
@@ -0,0 +1,7 @@
1
+
2
+ # THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY
3
+ short_version = '1.2.4'
4
+ version = '1.2.4'
5
+ Update = 'v1.2.4: Added assignment_cpu parameter for SigProfilerAssignment v1.0.0 support.'
6
+
7
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: SigProfilerExtractor
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Extracts mutational signatures from mutational catalogues
5
5
  Home-page: https://github.com/AlexandrovLab/SigProfilerExtractor.git
6
6
  Author: S Mishu Ashiqul Islam
@@ -16,7 +16,7 @@ Requires-Dist: pandas>=2.0.0
16
16
  Requires-Dist: nimfa>=1.1.0
17
17
  Requires-Dist: sigProfilerPlotting>=1.4.1
18
18
  Requires-Dist: SigProfilerMatrixGenerator>=1.3.5
19
- Requires-Dist: SigProfilerAssignment>=0.2.4
19
+ Requires-Dist: SigProfilerAssignment>=1.0.1
20
20
  Requires-Dist: statsmodels>=0.9.0
21
21
  Requires-Dist: scikit-learn>=0.24.2
22
22
  Requires-Dist: psutil>=5.6.1
@@ -130,7 +130,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
130
130
  | | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. |
131
131
  | | **input_data** | String | <br>Path to input folder for input_type:<ul><li>`vcf`</li><li>`bedpe`</li></ul>Path to file for input_type:<ul><li>`matrix`</li><li>`seg:TYPE`</li></ul> |
132
132
  | | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. |
133
- | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, and `rn6`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
133
+ | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, `mm39`, `rn6`, and `rn7`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. |
134
134
  | | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. |
135
135
  | | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). |
136
136
  | **NMF Replicates** | | | |
@@ -149,6 +149,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37",
149
149
  | | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).|
150
150
  | **Execution** | | | |
151
151
  | | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). |
152
+ | | **assignment_cpu** | Integer | The number of processors to be used for the final signature assignment step (default: all processors). This is independent of the `cpu` parameter. |
152
153
  | | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*|
153
154
  | | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*|
154
155
  | **Solution Estimation Thresholds** | | | |
@@ -5,7 +5,7 @@ pandas>=2.0.0
5
5
  nimfa>=1.1.0
6
6
  sigProfilerPlotting>=1.4.1
7
7
  SigProfilerMatrixGenerator>=1.3.5
8
- SigProfilerAssignment>=0.2.4
8
+ SigProfilerAssignment>=1.0.1
9
9
  statsmodels>=0.9.0
10
10
  scikit-learn>=0.24.2
11
11
  psutil>=5.6.1
@@ -8,7 +8,7 @@ import subprocess
8
8
  if os.path.exists("dist"):
9
9
  shutil.rmtree("dist")
10
10
 
11
- VERSION = "1.2.2"
11
+ VERSION = "1.2.4"
12
12
 
13
13
 
14
14
  with open("README.md") as f:
@@ -21,7 +21,7 @@ def write_version_py(filename="SigProfilerExtractor/version.py"):
21
21
  # THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY
22
22
  short_version = '%(version)s'
23
23
  version = '%(version)s'
24
- Update = 'v1.2.2: Add mutation count and stability to 4608 plots in All_Solutions and a stop parameter'
24
+ Update = 'v1.2.4: Added assignment_cpu parameter for SigProfilerAssignment v1.0.0 support.'
25
25
 
26
26
  """
27
27
  fh = open(filename, "w")
@@ -42,7 +42,7 @@ requirements = [
42
42
  "nimfa>=1.1.0",
43
43
  "sigProfilerPlotting>=1.4.1",
44
44
  "SigProfilerMatrixGenerator>=1.3.5",
45
- "SigProfilerAssignment>=0.2.4",
45
+ "SigProfilerAssignment>=1.0.1",
46
46
  "statsmodels>=0.9.0",
47
47
  "scikit-learn>=0.24.2",
48
48
  "psutil>=5.6.1",
@@ -1,7 +0,0 @@
1
-
2
- # THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY
3
- short_version = '1.2.2'
4
- version = '1.2.2'
5
- Update = 'v1.2.2: Add mutation count and stability to 4608 plots in All_Solutions and a stop parameter'
6
-
7
-