nci-cidc-schemas 0.28.1__py2.py3-none-any.whl → 0.28.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cidc_schemas/__init__.py +1 -1
  2. cidc_schemas/ngs_pipeline_api/__init__.py +29 -0
  3. cidc_schemas/ngs_pipeline_api/atacseq/atacseq.md +55 -0
  4. cidc_schemas/ngs_pipeline_api/atacseq/atacseq_output_API.json +39 -0
  5. cidc_schemas/ngs_pipeline_api/atacseq/imgs/atacseq.png +0 -0
  6. cidc_schemas/ngs_pipeline_api/output_API.schema.json +45 -0
  7. cidc_schemas/ngs_pipeline_api/rna/imgs/RIMA.png +0 -0
  8. cidc_schemas/ngs_pipeline_api/rna/rna.md +54 -0
  9. cidc_schemas/ngs_pipeline_api/rna/rna_config.schema.json +39 -0
  10. cidc_schemas/ngs_pipeline_api/rna/rna_output_API.json +195 -0
  11. cidc_schemas/ngs_pipeline_api/tcr/imgs/TCRseq.png +0 -0
  12. cidc_schemas/ngs_pipeline_api/tcr/tcr.md +101 -0
  13. cidc_schemas/ngs_pipeline_api/wes/imgs/wes.png +0 -0
  14. cidc_schemas/ngs_pipeline_api/wes/wes.md +46 -0
  15. cidc_schemas/ngs_pipeline_api/wes/wes_config.schema.json +82 -0
  16. cidc_schemas/ngs_pipeline_api/wes/wes_output_API.json +503 -0
  17. cidc_schemas/ngs_pipeline_api/wes/wes_output_API.py +548 -0
  18. cidc_schemas/ngs_pipeline_api/wes/wes_tumor_only_output_API.json +213 -0
  19. cidc_schemas/prism/constants.py +2 -0
  20. cidc_schemas/schemas/assays/components/available_assays.json +11 -0
  21. cidc_schemas/schemas/assays/components/nulisa_input.json +46 -0
  22. cidc_schemas/schemas/assays/nulisa_assay.json +63 -0
  23. cidc_schemas/schemas/templates/assays/nulisa_template.json +88 -0
  24. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/METADATA +1 -1
  25. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/RECORD +29 -9
  26. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/WHEEL +0 -0
  27. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/entry_points.txt +0 -0
  28. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/licenses/LICENSE +0 -0
  29. {nci_cidc_schemas-0.28.1.dist-info → nci_cidc_schemas-0.28.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,82 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "output_API.schema",
4
+ "title": "Pipeline output_API.JSON schema",
5
+ "type": "object",
6
+ "description": "Schema for pipeline's output_API.JSONs",
7
+ "properties": {
8
+ "samples": {
9
+ "type": "object",
10
+ "additionalProperties": {
11
+ "type": "array",
12
+ "items": {
13
+ "type": "string",
14
+ "pattern": "^gs://.*\\.(bam|fastq)"
15
+ }
16
+ }
17
+ },
18
+ "metasheet": {
19
+ "type": "object",
20
+ "additionalProperties": {
21
+ "type": "object",
22
+ "properties": {
23
+ "tumor": {"type": "string"},
24
+ "normal": {"type": "string"}
25
+ }
26
+ }
27
+ },
28
+ "rna": {
29
+ "description": "Optional section that should be included ONLY when RNAseq data is available. bam_file is the sorted.bam file; expression_file is the salmon expression (.sf) file",
30
+ "type": "object",
31
+ "additionalProperties": {
32
+ "type": "object",
33
+ "properties": {
34
+ "bam_file": {"type": "string",
35
+ "pattern": "^gs://.*\\.bam"},
36
+ "expression_file": {"type": "string",
37
+ "pattern": "^gs://.*\\.sf"}
38
+ }
39
+ }
40
+ },
41
+ "instance_name": {
42
+ "type": "string",
43
+ "pattern": "[a-z]+"
44
+ },
45
+ "cores": {
46
+ "type": "number",
47
+ "enum": [32, 64, 96]
48
+ },
49
+ "disk_size": {
50
+ "description": "Disk size to use, in GB, default 500",
51
+ "type": "number"
52
+ },
53
+ "somatic_caller": {
54
+ "description": "Somatic variant caller to use, choose from {tnsnv, tnhaplotyper2, tnscope (default)}",
55
+ "enum": ['tnsnv','tnhaplotyper2','tnscope']
56
+ },
57
+ "cimac_center": {
58
+ "description": "CIMAC center the samples associate with, i.e. broad, mocha, or mda",
59
+ "enum": ['broad','mocha','mda']
60
+ },
61
+ "wes_commit": {
62
+ "description": "Specific wes commit string to use (filled by pipeline team)",
63
+ "type": "string",
64
+ },
65
+ "image": {
66
+ "description": "Specific wes google image to use (filled by pipeline team)",
67
+ "type": "string",
68
+ },
69
+ "wes_ref_snapshot": {
70
+ "description": "Specific wes reference snapshot to use (filled by pipeline team)",
71
+ "type": "string",
72
+ },
73
+ "trim_soft_clip": {
74
+ "description": "Specifies whether the variant caller should soft-clip the reads (default: False)",
75
+ "type": "boolean",
76
+ },
77
+ "tumor_only": {
78
+ "description": "Specifies whether the sample is tumor only, i.e. missing a matched normal (default: False)",
79
+ "type": "boolean",
80
+ }
81
+ }
82
+ }
@@ -0,0 +1,503 @@
1
+ {
2
+ "run id": [
3
+ {
4
+ "file_path_template": "analysis/{run id}_error.yaml",
5
+ "short_description": "yaml file that specifies error codes for files",
6
+ "long_description": "Explanation of all files which are expected to be empty due to a failed/missing module.",
7
+ "filter_group": "",
8
+ "file_purpose": "Analysis view",
9
+ "optional": true,
10
+ "tumor_only_assay": true
11
+ },
12
+ {
13
+ "file_path_template": "analysis/clonality/{run id}/{run id}_segments.txt",
14
+ "short_description": "copynumber: Sequenza CNV segments file",
15
+ "long_description": "Copy number variation segments file called by the Sequenza software package. The column descriptions for the segment file could be found here (https://cran.r-project.org/web/packages/sequenza/vignettes/sequenza.html#plots-and-results)",
16
+ "filter_group": "copynumber",
17
+ "file_purpose": "Analysis view",
18
+ "optional": false,
19
+ "tumor_only_assay": false
20
+ },
21
+ {
22
+ "file_path_template": "analysis/clonality/{run id}/{run id}_genome_view.pdf",
23
+ "short_description": "copynumber: Sequenza genome-wide plot of depth.ratio and B-allele frequency.",
24
+ "long_description": "Genome-wide plot (generated by Sequenza) showing depth.ratio and B-allele frequency.",
25
+ "filter_group": "copynumber",
26
+ "file_purpose": "Analysis view",
27
+ "optional": false,
28
+ "tumor_only_assay": false
29
+ },
30
+ {
31
+ "file_path_template": "analysis/clonality/{run id}/{run id}_chromosome_view.pdf",
32
+ "short_description": "copynumber: Sequenza plot of depth.ratio and B-allele frequency chromosome by chromosome.",
33
+ "long_description": "Chromosome by chromosome plot (generated by Sequenza) showing depth.ratio and B-allele frequency.",
34
+ "filter_group": "copynumber",
35
+ "file_purpose": "Analysis view",
36
+ "optional": false,
37
+ "tumor_only_assay": false
38
+ },
39
+ {
40
+ "file_path_template": "analysis/clonality/{run id}/{run id}_sequenza_gainLoss.bed",
41
+ "short_description": "copynumber: Sequenza CNV segments file filtered with hard cut-offs to call regions of GAIN/LOSS",
42
+ "long_description": "Filtered Sequenza segments file after applying a hard cut-off to call regions of GAIN (total copy number >= 3) and regions of LOSS (total copy number <= 1.5).",
43
+ "filter_group": "copynumber",
44
+ "file_purpose": "Analysis view",
45
+ "optional": false,
46
+ "tumor_only_assay": false
47
+ },
48
+ {
49
+ "file_path_template": "analysis/clonality/{run id}/{run id}.bin50.final.seqz.txt.gz",
50
+ "short_description": "copynumber: Sequenza post-processed seqz file used for input to Sequenza CNV caller",
51
+ "long_description": "Sequenza seqz file generated by the bam2seqz software using a GC wiggle track with a window size of 50 (-w 50).",
52
+ "filter_group": "copynumber",
53
+ "file_purpose": "Analysis view",
54
+ "optional": false,
55
+ "tumor_only_assay": false
56
+ },
57
+ {
58
+ "file_path_template": "analysis/clonality/{run id}/{run id}_alternative_solutions.txt",
59
+ "short_description": "purity: Sequenza Cellularity and Ploidy estimate file",
60
+ "long_description": "Cellularity and ploidy estimates of the tumor sample using the Sequenza software package. The columns of the file are follows: Cellularity, Ploidy, and SLPP (Scaled Log Posterior Probability).",
61
+ "filter_group": "purity",
62
+ "file_purpose": "Analysis view",
63
+ "optional": false,
64
+ "tumor_only_assay": false
65
+ },
66
+ {
67
+ "file_path_template": "analysis/clonality/{run id}/{run id}_CP_contours.pdf",
68
+ "short_description": "purity: Sequenza plot of likelihood densities for all cellularity/ploidy solutions.",
69
+ "long_description": "Sequenza generated plot showing the likelihood densities for each cellularity/ploidy solution (https://cran.r-project.org/web/packages/sequenza/vignettes/sequenza.html#plots-and-results).",
70
+ "filter_group": "purity",
71
+ "file_purpose": "Analysis view",
72
+ "optional": false,
73
+ "tumor_only_assay": false
74
+ },
75
+ {
76
+ "file_path_template": "analysis/clonality/{run id}/{run id}_pyclone6.input.tsv",
77
+ "short_description": "tumor clonality: PyClone-VI input file generated by sequenza library (https://cran.r-project.org/web/packages/sequenza/index.html)",
78
+ "long_description": "Input file generated for PyClone-VI analysis. Sequenza was used to generate the expected file format (https://github.com/Roth-Lab/pyclone-vi#input-format).",
79
+ "filter_group": "clonality",
80
+ "file_purpose": "Analysis view",
81
+ "optional": false,
82
+ "tumor_only_assay": false
83
+ },
84
+ {
85
+ "file_path_template": "analysis/clonality/{run id}/{run id}_pyclone6.results.tsv",
86
+ "short_description": "tumor clonality: PyClone-VI tumor clonality results file",
87
+ "long_description": "Tumor clone/cluster prevalence estimations generated by the PyClone-VI software package. The format of the results file is described here (https://github.com/Roth-Lab/pyclone-vi#output-format).",
88
+ "filter_group": "clonality",
89
+ "file_purpose": "Analysis view",
90
+ "optional": false,
91
+ "tumor_only_assay": false
92
+ },
93
+ {
94
+ "file_path_template": "analysis/clonality/{run id}/{run id}_pyclone6.results.summary.tsv",
95
+ "short_description": "tumor clonality: PyClone-VI tumor clonality results summary file",
96
+ "long_description": "Summary of Pyclone-VI results file condensed to only show the cluster_id, cellular_prevalence, and cellular_prevalence_std columns.",
97
+ "filter_group": "clonality",
98
+ "file_purpose": "Analysis view",
99
+ "optional": false,
100
+ "tumor_only_assay": false
101
+ },
102
+ {
103
+ "file_path_template": "analysis/cnvkit/{run id}/{run id}.call.cns",
104
+ "short_description": "copynumber: CNVkit segments file",
105
+ "long_description": "CNVkit's Segmented log2 ratios file. The 'cn' column representes the total copy number of the segment. The other columns of the results file are described here (https://cnvkit.readthedocs.io/en/stable/fileformats.html#segmented-log2-ratios-cns)",
106
+ "filter_group": "copynumber",
107
+ "file_purpose": "Analysis view",
108
+ "optional": false,
109
+ "tumor_only_assay": false
110
+ },
111
+ {
112
+ "file_path_template": "analysis/cnvkit/{run id}/{run id}.call.enhanced.cns",
113
+ "short_description": "copynumber: Enhanced CNVkit segments file with BAF and Major/minor allele information",
114
+ "long_description": "The enhanced CNVkit segments file incoporates somatic sNP and tumor purity information (called by the pipeline) to incorporate B-allele frequencies, major and minor allele (cn1 and cn2 respectively), and correct for tumor sample purity level.",
115
+ "filter_group": "copynumber",
116
+ "file_purpose": "Analysis view",
117
+ "optional": false,
118
+ "tumor_only_assay": false
119
+ },
120
+ {
121
+ "file_path_template": "analysis/cnvkit/{run id}/{run id}.scatter.png",
122
+ "short_description": "copynumber: scatter plot of log2 coverage and segmentation call information",
123
+ "long_description": "Genome-wide scatter plot of log2 coverage ratios and called CNV segments",
124
+ "filter_group": "copynumber",
125
+ "file_purpose": "Analysis view",
126
+ "optional": false,
127
+ "tumor_only_assay": false
128
+ },
129
+ {
130
+ "file_path_template": "analysis/cnvkit/{run id}/{run id}_cnvkit_gainLoss.bed",
131
+ "short_description": "copynumber: CNVkit segments file filtered with hard cut-offs to call regions of GAIN/LOSS",
132
+ "long_description": "Filtered CNVkit segments file after applying a hard cut-off to call regions of GAIN (total copy number >= 3) and regions of LOSS (total copy number <= 1.5).",
133
+ "filter_group": "copynumber",
134
+ "file_purpose": "Analysis view",
135
+ "optional": false,
136
+ "tumor_only_assay": false
137
+ },
138
+ {
139
+ "file_path_template": "analysis/copynumber/{run id}/{run id}_consensus.bed",
140
+ "short_description": "copynumber: Consensus CNV segments file",
141
+ "long_description": "Consensus CNV regions that are called by at least 2 of the 3 callers (CNVkit, Sequenza, or FACETS). CNV Callers must agree on both the region (intersection of overlapped regions) and the call (GAIN or LOSS).",
142
+ "filter_group": "copynumber",
143
+ "file_purpose": "Analysis view",
144
+ "optional": false,
145
+ "tumor_only_assay": false
146
+ },
147
+ {
148
+ "file_path_template": "analysis/copynumber/{run id}/{run id}_consensus_merged_GAIN.bed",
149
+ "short_description": "copynumber: Consensus CNV segments file of only GAIN regions",
150
+ "long_description": "GAIN only CNV regions derived from the consensus CNV file. Regions are also merged if they have an overlap of at least 1bp. ",
151
+ "filter_group": "copynumber",
152
+ "file_purpose": "Analysis view",
153
+ "optional": false,
154
+ "tumor_only_assay": false
155
+ },
156
+ {
157
+ "file_path_template": "analysis/copynumber/{run id}/{run id}_consensus_merged_LOSS.bed",
158
+ "short_description": "copynumber: Consensus CNV segments file of only LOSS regions",
159
+ "long_description": "LOSS only CNV regions derived from the consensus CNV file. Regions are also merged if they have an overlap of at least 1bp. ",
160
+ "filter_group": "copynumber",
161
+ "file_purpose": "Analysis view",
162
+ "optional": false,
163
+ "tumor_only_assay": false
164
+ },
165
+ {
166
+ "file_path_template": "analysis/msisensor2/{run id}/{run id}_msisensor2.txt",
167
+ "short_description": "msisensor2: microsatellite instability calculation",
168
+ "long_description": "Microsatellite instability calculation using msisensor2 (https://github.com/niu-lab/msisensor2)",
169
+ "filter_group": "msisensor2",
170
+ "file_purpose": "Analysis view",
171
+ "optional": false,
172
+ "tumor_only_assay": true
173
+ },
174
+ {
175
+ "file_path_template": "analysis/neoantigen/{run id}/combined/{run id}.filtered.tsv",
176
+ "short_description": "neaontigen: list of predicted neoantigens",
177
+ "long_description": "The combined MHC class I and II predicted neoantigens using the pVACseq software. The column definitions are given here (ref: https://pvactools.readthedocs.io/en/latest/pvacseq/output_files.html)",
178
+ "filter_group": "neoantigen",
179
+ "file_purpose": "Analysis view",
180
+ "optional": false,
181
+ "tumor_only_assay": true
182
+ },
183
+ {
184
+ "file_path_template": "analysis/purity/{run id}/{run id}.optimalpurityvalue.txt",
185
+ "short_description": "tumor purity: tumor purity estimates using the FACETS software package",
186
+ "long_description": "Tumor purity estimates using the FACETS software (https://github.com/mskcc/facets).",
187
+ "filter_group": "purity",
188
+ "file_purpose": "Analysis view",
189
+ "optional": false,
190
+ "tumor_only_assay": false
191
+ },
192
+ {
193
+ "file_path_template": "analysis/purity/{run id}/{run id}.cncf",
194
+ "short_description": "copynumber: FACETS CNV segments file",
195
+ "long_description": "Copy number variation segments file called by the FACETS software (https://github.com/mskcc/facets).",
196
+ "filter_group": "copynumber",
197
+ "file_purpose": "Analysis view",
198
+ "optional": false,
199
+ "tumor_only_assay": false
200
+ },
201
+ {
202
+ "file_path_template": "analysis/purity/{run id}/{run id}_facets_gainLoss.bed",
203
+ "short_description": "copynumber: FACETS CNV segments file filtered with hard-cutoff to call regions of GAIN/LOSS",
204
+ "long_description": "Filtered FACETS segments file after applying a hard cut-off to call regions of GAIN (total copy number >= 3) and regions of LOSS (total copy number <= 1.5).",
205
+ "filter_group": "copynumber",
206
+ "file_purpose": "Analysis view",
207
+ "optional": false,
208
+ "tumor_only_assay": false
209
+ },
210
+ {
211
+ "file_path_template": "analysis/report.tar.gz",
212
+ "short_description": "wes report: wes summary html report",
213
+ "long_description": "This is a gzipped file of the report directory, which contains the report.html file. After unzipping the file, the user can load report/report.html into any browser to view the WES Summary Report. The report contains run information (i.e. wes software version used to run the analysis as well as the software version of the major tools) as well as summarizations of sample quality, copy number variation, somatic variants, and HLA-type/neoantigen predictions.",
214
+ "filter_group": "report",
215
+ "file_purpose": "Analysis view",
216
+ "optional": false,
217
+ "tumor_only_assay": true
218
+ },
219
+ {
220
+ "file_path_template": "analysis/report/somatic_variants/05_tumor_germline_overlap.tsv",
221
+ "short_description": "somatic variants: report file of tumor vs germline variants overlap",
222
+ "long_description": "This file derived from the somatic and germline variants comparison results generated by vcf-compare (http://vcftools.sourceforge.net/perl_module.html#vcf-compare) and is formatted to be human readable. The file reports the number of somatic/tumor only variants (unfiltered), germline/normal only variants (unfiltered), the number of shared variants, and the percent overlap (using the total number of somatic variants as the denominator).",
223
+ "filter_group": "somatic",
224
+ "file_purpose": "Analysis view",
225
+ "optional": false,
226
+ "tumor_only_assay": false
227
+ },
228
+ {
229
+ "file_path_template": "analysis/report/WES_Meta/02_WES_Run_Version.tsv",
230
+ "short_description": "wes pipeline version- INTERNAL ONLY- for reproducibility",
231
+ "long_description": "wes pipeline version- INTERNAL ONLY- for reproducibility",
232
+ "filter_group": "report",
233
+ "file_purpose": "Miscellaneous",
234
+ "optional": false,
235
+ "tumor_only_assay": true
236
+ },
237
+ {
238
+ "file_path_template": "analysis/report/config.yaml",
239
+ "short_description": "wes pipeline config file- INTERNAL ONLY- for reproducibility",
240
+ "long_description": "wes pipeline config file- INTERNAL ONLY- for reproducibility",
241
+ "filter_group": "report",
242
+ "file_purpose": "Miscellaneous",
243
+ "optional": false,
244
+ "tumor_only_assay": true
245
+ },
246
+ {
247
+ "file_path_template": "analysis/report/metasheet.csv",
248
+ "short_description": "wes pipeline metasheet file- INTERNAL ONLY- for reproducibility",
249
+ "long_description": "wes pipeline metasheet file- INTERNAL ONLY- for reproducibility",
250
+ "filter_group": "report",
251
+ "file_purpose": "Miscellaneous",
252
+ "optional": false,
253
+ "tumor_only_assay": true
254
+ },
255
+ {
256
+ "file_path_template": "analysis/report/json/{run id}.wes.json",
257
+ "short_description": "wes sample json for cohort report generation-INTERNAL ONLY",
258
+ "long_description": "wes sample json for cohort report generation-INTERNAL ONLY",
259
+ "filter_group": "report",
260
+ "file_purpose": "Miscellaneous",
261
+ "optional": false,
262
+ "tumor_only_assay": true
263
+ },
264
+ {
265
+ "file_path_template": "analysis/rna/{run id}/{run id}.haplotyper.rna.vcf.gz",
266
+ "short_description": "rna: Variants called from RNA-seq data",
267
+ "long_description": "RNA-seq variants called using the Sentieon RNA Variant Calling pipeline(https://support.sentieon.com/manual/RNA_call/rna/). Sentieon's Haplotyper algorithm was used for the variant calling.",
268
+ "filter_group": "rna",
269
+ "file_purpose": "Analysis view",
270
+ "optional": true,
271
+ "tumor_only_assay": true
272
+ },
273
+ {
274
+ "file_path_template": "analysis/rna/{run id}/{run id}_tnscope.output.twist.neoantigen.vep.rna.vcf",
275
+ "short_description": "rna: Shared RNA and WES variants that is used for neoantigen prediction when RNA-seq data is provided with the WES run",
276
+ "long_description": "Variants file representing the common variants between RNA (haplotyper.rna.vcf.gz) and WES data (output.twist.neoantigen.vep.vcf).",
277
+ "filter_group": "rna",
278
+ "file_purpose": "Analysis view",
279
+ "optional": true,
280
+ "tumor_only_assay": true
281
+ },
282
+ {
283
+ "file_path_template": "analysis/somatic/{run id}/{run id}_tnscope.output.vcf.gz",
284
+ "short_description": "somatic variants: vcf file of somatic variants",
285
+ "long_description": "VCF file of somatic variants using one of the following the Sentieon somatic callers {tnscope (default), tnhaplotyper2, tnsnv}.\n\nTNscope algorithm- https://support.sentieon.com/manual/usages/general/#tnscope-algorithm\nTNhaplotyper2- https://support.sentieon.com/manual/usages/general/#tnhaplotyper2-algorithm\nTNsnv - https://support.sentieon.com/manual/usages/general/#tnsnv-algorithm",
286
+ "filter_group": "somatic",
287
+ "file_purpose": "Analysis view",
288
+ "optional": false,
289
+ "tumor_only_assay": true
290
+ },
291
+ {
292
+ "file_path_template": "analysis/somatic/{run id}/{run id}_tnscope.output.twist.vcf",
293
+ "short_description": "somatic variants: vcf file of somatic variants in TWIST targed capture region",
294
+ "long_description": "VCF file of variants that fall within the TWIST excome capture regions. bcftools is used to filter reads in output.vcf.gz that intersect with the TWIST capture regions.",
295
+ "filter_group": "somatic",
296
+ "file_purpose": "Analysis view",
297
+ "optional": false,
298
+ "tumor_only_assay": true
299
+ },
300
+ {
301
+ "file_path_template": "analysis/somatic/{run id}/{run id}_tnscope.output.twist.maf",
302
+ "short_description": "somatic variants: maf file of somatic variants in TWIST targed capture region",
303
+ "long_description": "MAF file of variants that fall within the TWIST excome capture regions generated using vcf2maf tool (https://github.com/mskcc/vcf2maf). VEP was used to annotate twist.vcf file, which was then used as input to vcf2maf. NOTE: Some columns in this maf file may be affected by the ExACdb assembly compatibility issue discussed in the WES pipeline overview page (https://cidc.nci.nih.gov/pipelines/wes).",
304
+ "filter_group": "somatic",
305
+ "file_purpose": "Analysis view",
306
+ "optional": false,
307
+ "tumor_only_assay": true
308
+ },
309
+ {
310
+ "file_path_template": "analysis/somatic/{run id}/{run id}_tnscope.output.twist.filtered.vcf",
311
+ "short_description": "somatic variants: vcf file of somatic variants in TWIST targed capture region filtered by PASS column",
312
+ "long_description": "VCF file of variants that fall within the TWIST excome capture regions filtered to remove vairants where the PASS column contained one of the following- germline-risk, low_t_alt_frac, t_lod_fstar, or triallelic_site",
313
+ "filter_group": "somatic",
314
+ "file_purpose": "Analysis view",
315
+ "optional": false,
316
+ "tumor_only_assay": true
317
+ },
318
+ {
319
+ "file_path_template": "analysis/somatic/{run id}/{run id}_tnscope.output.twist.filtered.maf",
320
+ "short_description": "somatic variants: maf file of somatic variants in TWIST targed capture region filtered by PASS column",
321
+ "long_description": "MAF file generated by converting twist.filtered.vcf to maf using VEP to annotate variants and vcf2maf to do the conversion. NOTE: Some columns in this maf file may be affected by the ExACdb assembly compatibility issue discussed in the WES pipeline overview page (https://cidc.nci.nih.gov/pipelines/wes).",
322
+ "filter_group": "somatic",
323
+ "file_purpose": "Analysis view",
324
+ "optional": false,
325
+ "tumor_only_assay": true
326
+ },
327
+ {
328
+ "file_path_template": "analysis/tcellextrect/{run id}/{run id}_tcellextrect.txt",
329
+ "short_description": "tcell: TCell fraction estimates generated by TcellExTRECT",
330
+ "long_description": "TCell fraction estimates generated by the TcellExTRECT software (https://github.com/McGranahanLab/TcellExTRECT)",
331
+ "filter_group": "tcell",
332
+ "file_purpose": "Analysis view",
333
+ "optional": false,
334
+ "tumor_only_assay": true
335
+ }
336
+ ],
337
+ "normal cimac id": [
338
+ {
339
+ "file_path_template": "analysis/align/{normal cimac id}/{normal cimac id}.sorted.dedup.bam",
340
+ "short_description": "alignment: bam file with deduplicated reads",
341
+ "long_description": "Aligned reads were sorted and marked duplicates were removed using the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
342
+ "filter_group": "alignment",
343
+ "file_purpose": "Source view",
344
+ "optional": false,
345
+ "tumor_only_assay": true
346
+ },
347
+ {
348
+ "file_path_template": "analysis/align/{normal cimac id}/{normal cimac id}.sorted.dedup.bam.bai",
349
+ "short_description": "alignment: index file for deduplicated bam",
350
+ "long_description": "Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
351
+ "filter_group": "alignment",
352
+ "file_purpose": "Source view",
353
+ "optional": false,
354
+ "tumor_only_assay": true
355
+ },
356
+ {
357
+ "file_path_template": "analysis/align/{normal cimac id}/{normal cimac id}_recalibrated.bam",
358
+ "short_description": "alignment: Base Qualtiy Score Recalibration (BQSR) bam file",
359
+ "long_description": "The Sentieon QualCal (https://support.sentieon.com/manual/usages/general/#qualcal-algorithm) is used to perform BSQR and remove any technical artifacts in the base quality scores.",
360
+ "filter_group": "alignment",
361
+ "file_purpose": "Source view",
362
+ "optional": false,
363
+ "tumor_only_assay": true
364
+ },
365
+ {
366
+ "file_path_template": "analysis/align/{normal cimac id}/{normal cimac id}_recalibrated.bam.bai",
367
+ "short_description": "alignment: index file for Base Qualtiy Score Recalibration (BQSR) bam file",
368
+ "long_description": "Index file for the BQSR bam file",
369
+ "filter_group": "alignment",
370
+ "file_purpose": "Source view",
371
+ "optional": false,
372
+ "tumor_only_assay": true
373
+ },
374
+ {
375
+ "file_path_template": "analysis/germline/{normal cimac id}/{normal cimac id}_haplotyper.output.vcf",
376
+ "short_description": "germline: germline variants",
377
+ "long_description": "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
378
+ "filter_group": "germline",
379
+ "file_purpose": "Analysis view",
380
+ "optional": false,
381
+ "tumor_only_assay": false
382
+ },
383
+ {
384
+ "file_path_template": "analysis/germline/{normal cimac id}/{normal cimac id}_haplotyper.targets.vcf.gz",
385
+ "short_description": "germline: vcf of haplotype variants in targeted regions",
386
+ "long_description": "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
387
+ "filter_group": "germline",
388
+ "file_purpose": "Analysis view",
389
+ "optional": false,
390
+ "tumor_only_assay": false
391
+ },
392
+ {
393
+ "file_path_template": "analysis/hlahd/{normal cimac id}/result/{normal cimac id}_final.result.txt",
394
+ "short_description": "hla: MHC Class I and II results (using HLA-HD)",
395
+ "long_description": "Predicted MHC Class II and II results using the HLA-HD software (https://www.genome.med.kyoto-u.ac.jp/HLA-HD/). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the HLA-HD prediction algorithm.",
396
+ "filter_group": "HLA",
397
+ "file_purpose": "Analysis view",
398
+ "optional": false,
399
+ "tumor_only_assay": true
400
+ },
401
+ {
402
+ "file_path_template": "analysis/optitype/{normal cimac id}/{normal cimac id}_result.tsv",
403
+ "short_description": "hla: MHC Class I results (using OptiType)",
404
+ "long_description": "Predicted MHC Class I alleles using the Optitype software (https://github.com/FRED-2/OptiType). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the Optitype prediction algorithm.",
405
+ "filter_group": "HLA",
406
+ "file_purpose": "Analysis view",
407
+ "optional": false,
408
+ "tumor_only_assay": true
409
+ },
410
+ {
411
+ "file_path_template": "analysis/xhla/{normal cimac id}/report-{normal cimac id}-hla.json",
412
+ "short_description": "hla: MHC Class I and II results (using xhla)",
413
+ "long_description": "Predicted MHC Class I and II results using the xHLA software(https://github.com/humanlongevity/HLA). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the xHLA prediction algorithm.",
414
+ "filter_group": "HLA",
415
+ "file_purpose": "Analysis view",
416
+ "optional": false,
417
+ "tumor_only_assay": true
418
+ }
419
+ ],
420
+ "tumor cimac id": [
421
+ {
422
+ "file_path_template": "analysis/align/{tumor cimac id}/{tumor cimac id}.sorted.dedup.bam",
423
+ "short_description": "alignment: bam file with deduplicated reads",
424
+ "long_description": "Aligned reads were sorted and marked duplicates were removed using the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
425
+ "filter_group": "alignment",
426
+ "file_purpose": "Source view",
427
+ "optional": false,
428
+ "tumor_only_assay": true
429
+ },
430
+ {
431
+ "file_path_template": "analysis/align/{tumor cimac id}/{tumor cimac id}.sorted.dedup.bam.bai",
432
+ "short_description": "alignment: index file for deduplicated bam",
433
+ "long_description": "Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
434
+ "filter_group": "alignment",
435
+ "file_purpose": "Source view",
436
+ "optional": false,
437
+ "tumor_only_assay": true
438
+ },
439
+ {
440
+ "file_path_template": "analysis/align/{tumor cimac id}/{tumor cimac id}_recalibrated.bam",
441
+ "short_description": "alignment: Base Qualtiy Score Recalibration (BQSR) bam file",
442
+ "long_description": "The Sentieon QualCal (https://support.sentieon.com/manual/usages/general/#qualcal-algorithm) is used to perform BSQR and remove any technical artifacts in the base quality scores.",
443
+ "filter_group": "alignment",
444
+ "file_purpose": "Source view",
445
+ "optional": false,
446
+ "tumor_only_assay": true
447
+ },
448
+ {
449
+ "file_path_template": "analysis/align/{tumor cimac id}/{tumor cimac id}_recalibrated.bam.bai",
450
+ "short_description": "alignment: index file for Base Qualtiy Score Recalibration (BQSR) bam file",
451
+ "long_description": "Index file for the BQSR bam file",
452
+ "filter_group": "alignment",
453
+ "file_purpose": "Source view",
454
+ "optional": false,
455
+ "tumor_only_assay": true
456
+ },
457
+ {
458
+ "file_path_template": "analysis/germline/{tumor cimac id}/{tumor cimac id}_haplotyper.output.vcf",
459
+ "short_description": "germline: germline variants",
460
+ "long_description": "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
461
+ "filter_group": "germline",
462
+ "file_purpose": "Analysis view",
463
+ "optional": false,
464
+ "tumor_only_assay": false
465
+ },
466
+ {
467
+ "file_path_template": "analysis/germline/{tumor cimac id}/{tumor cimac id}_haplotyper.targets.vcf.gz",
468
+ "short_description": "germline: vcf of haplotype variants in targeted regions",
469
+ "long_description": "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
470
+ "filter_group": "germline",
471
+ "file_purpose": "Analysis view",
472
+ "optional": false,
473
+ "tumor_only_assay": false
474
+ },
475
+ {
476
+ "file_path_template": "analysis/hlahd/{tumor cimac id}/result/{tumor cimac id}_final.result.txt",
477
+ "short_description": "hla: MHC Class I and II results (using HLA-HD)",
478
+ "long_description": "Predicted MHC Class II and II results using the HLA-HD software (https://www.genome.med.kyoto-u.ac.jp/HLA-HD/). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the HLA-HD prediction algorithm.",
479
+ "filter_group": "HLA",
480
+ "file_purpose": "Analysis view",
481
+ "optional": false,
482
+ "tumor_only_assay": true
483
+ },
484
+ {
485
+ "file_path_template": "analysis/optitype/{tumor cimac id}/{tumor cimac id}_result.tsv",
486
+ "short_description": "hla: MHC Class I results (using OptiType)",
487
+ "long_description": "Predicted MHC Class I alleles using the Optitype software (https://github.com/FRED-2/OptiType). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the Optitype prediction algorithm.",
488
+ "filter_group": "HLA",
489
+ "file_purpose": "Analysis view",
490
+ "optional": false,
491
+ "tumor_only_assay": true
492
+ },
493
+ {
494
+ "file_path_template": "analysis/xhla/{tumor cimac id}/report-{tumor cimac id}-hla.json",
495
+ "short_description": "hla: MHC Class I and II results (using xhla)",
496
+ "long_description": "Predicted MHC Class I and II results using the xHLA software(https://github.com/humanlongevity/HLA). Chromosome 6 reads from the deduplicated bam file were extracted and fed into the xHLA prediction algorithm.",
497
+ "filter_group": "HLA",
498
+ "file_purpose": "Analysis view",
499
+ "optional": false,
500
+ "tumor_only_assay": true
501
+ }
502
+ ]
503
+ }