viral_seq 1.0.14 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/Gemfile.lock +16 -3
- data/README.md +135 -8
- data/bin/tcs +51 -10
- data/bin/tcs_log +102 -0
- data/bin/tcs_sdrm +409 -0
- data/docs/assets/img/cover.jpg +0 -0
- data/docs/dr.json +67 -0
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +5 -1
- data/lib/viral_seq/constant.rb +41 -4
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +3 -2
- data/lib/viral_seq/recency.rb +52 -0
- data/lib/viral_seq/sdrm.rb +101 -35
- data/lib/viral_seq/seq_hash.rb +25 -5
- data/lib/viral_seq/seq_hash_pair.rb +6 -4
- data/lib/viral_seq/sequence.rb +1 -84
- data/lib/viral_seq/tcs_core.rb +3 -1
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/tcs_json.rb +41 -10
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +11 -0
- metadata +74 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f3316ff7e72ca84c6eb2fa861a9fdad14fbcb3ab3c0053ade843ee13cc9ce82e
|
4
|
+
data.tar.gz: df1035ea5934b794ef8c64a04085f407bcd4dffc0888bf81a569a7ccfba3560a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3ec35b3a40ee9cf66131416a1c20eda38bf8bde818aa41af285c099ddd2b49e4f31fe1d011c95def77fd5c6653d96f4295142fd543444f249242154bb2b671b
|
7
|
+
data.tar.gz: daa6e694a841cc615cfde850bf2d98ca7467cb1d27502daf398ab0204e55c4d477f34aafce0149c765a931755fc3a3f7dbdf425964904d0199efb0651b9a09a6
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,16 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.
|
5
|
-
colorize (
|
6
|
-
|
4
|
+
viral_seq (1.1.1)
|
5
|
+
colorize (>= 0.1)
|
6
|
+
combine_pdf (>= 1.0.0)
|
7
|
+
muscle_bio (>= 0.4)
|
8
|
+
prawn (>= 2.3.0)
|
9
|
+
prawn-table (>= 0.2.0)
|
7
10
|
|
8
11
|
GEM
|
9
12
|
remote: https://rubygems.org/
|
10
13
|
specs:
|
11
14
|
colorize (0.8.1)
|
15
|
+
combine_pdf (1.0.21)
|
16
|
+
ruby-rc4 (>= 0.1.5)
|
12
17
|
diff-lcs (1.3)
|
13
18
|
muscle_bio (0.4.0)
|
19
|
+
pdf-core (0.9.0)
|
20
|
+
prawn (2.4.0)
|
21
|
+
pdf-core (~> 0.9.0)
|
22
|
+
ttfunk (~> 1.7)
|
23
|
+
prawn-table (0.2.2)
|
24
|
+
prawn (>= 1.3.0, < 3.0.0)
|
14
25
|
rake (13.0.1)
|
15
26
|
rspec (3.8.0)
|
16
27
|
rspec-core (~> 3.8.0)
|
@@ -25,6 +36,8 @@ GEM
|
|
25
36
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
37
|
rspec-support (~> 3.8.0)
|
27
38
|
rspec-support (3.8.0)
|
39
|
+
ruby-rc4 (0.1.5)
|
40
|
+
ttfunk (1.7.0)
|
28
41
|
|
29
42
|
PLATFORMS
|
30
43
|
ruby
|
data/README.md
CHANGED
@@ -1,8 +1,24 @@
|
|
1
1
|
# ViralSeq
|
2
2
|
|
3
|
+
[![Gem Version](https://img.shields.io/gem/v/viral_seq?color=%2300e673&style=flat-square)](https://rubygems.org/gems/viral_seq)
|
4
|
+
![GitHub](https://img.shields.io/github/license/viralseq/viral_seq)
|
5
|
+
![Gem](https://img.shields.io/gem/dt/viral_seq?color=%23E9967A)
|
6
|
+
![GitHub last commit](https://img.shields.io/github/last-commit/viralseq/viral_seq?color=%2300BFFF)
|
7
|
+
[![Join the chat at https://gitter.im/viral_seq/community](https://badges.gitter.im/viral_seq/community.svg)](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
8
|
+
|
3
9
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
10
|
|
5
|
-
Specifically for Primer
|
11
|
+
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
12
|
+
|
13
|
+
## Illustration for the Primer ID Sequencing
|
14
|
+
|
15
|
+
|
16
|
+
![Primer ID Sequencing](./docs/assets/img/cover.jpg)
|
17
|
+
|
18
|
+
### Reference readings on the Primer ID sequencing
|
19
|
+
[Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
|
20
|
+
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
21
|
+
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
6
22
|
|
7
23
|
## Install
|
8
24
|
|
@@ -14,20 +30,93 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
|
14
30
|
|
15
31
|
### Excutables
|
16
32
|
|
17
|
-
|
33
|
+
### `tcs`
|
34
|
+
Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
|
18
35
|
|
36
|
+
Example commands:
|
19
37
|
```bash
|
20
|
-
$
|
38
|
+
$ tcs -p params.json # run TCS pipeline with params.json
|
39
|
+
$ tcs -p params.json -i DIRECTORY
|
40
|
+
# run TCS pipeline with params.json and DIRECTORY
|
41
|
+
# if DIRECTORY is not defined in params.json
|
42
|
+
$ tcs -dr -i DIRECTORY
|
43
|
+
# run tcs-dr (MPID HIV drug resistance sequencing) pipeline
|
44
|
+
# DIRECTORY needs to be given.
|
45
|
+
$ tcs -j # CLI to generate params.json
|
46
|
+
$ tcs -h # print out the help
|
21
47
|
```
|
22
48
|
|
23
|
-
|
49
|
+
[sample params.json for the tcs-dr pipeline](./docs/dr.json)
|
50
|
+
|
51
|
+
---
|
52
|
+
### `tcs_log`
|
53
|
+
|
54
|
+
Use `tcs_log` script to pool run logs and TCS fasta files after one batch of `tcs` jobs.
|
55
|
+
|
24
56
|
|
57
|
+
Example file structure:
|
58
|
+
```
|
59
|
+
batch_tcs_jobs/
|
60
|
+
├── lib1
|
61
|
+
├── lib2
|
62
|
+
├── lib3
|
63
|
+
├── lib4
|
64
|
+
├── ...
|
65
|
+
```
|
66
|
+
|
67
|
+
Example command:
|
25
68
|
```bash
|
26
|
-
$
|
27
|
-
|
28
|
-
|
69
|
+
$ tcs_log batch_tcs_jobs
|
70
|
+
```
|
71
|
+
|
72
|
+
---
|
73
|
+
### `tcs_sdrm`
|
74
|
+
|
75
|
+
Use `tcs_sdrm` pipeline for HIV-1 drug resistance mutation and recency.
|
76
|
+
|
77
|
+
Example command:
|
78
|
+
```bash
|
79
|
+
$ tcs_sdrm libs_dir
|
80
|
+
```
|
81
|
+
|
82
|
+
lib_dir file structure:
|
83
|
+
```
|
84
|
+
libs_dir/
|
85
|
+
├── lib1
|
86
|
+
├── lib1_RT
|
87
|
+
├── lib1_PR
|
88
|
+
├── lib1_IN
|
89
|
+
├── lib1_V1V3
|
90
|
+
├── lib2
|
91
|
+
├── lib1_RT
|
92
|
+
├── lib1_PR
|
93
|
+
├── lib1_IN
|
94
|
+
├── lib1_V1V3
|
95
|
+
├── ...
|
29
96
|
```
|
30
97
|
|
98
|
+
Output data in a new dir as 'libs_dir_SDRM'
|
99
|
+
|
100
|
+
|
101
|
+
**Note: [R](https://www.r-project.org/) and the following R libraries are required:**
|
102
|
+
- phangorn
|
103
|
+
- ape
|
104
|
+
- scales
|
105
|
+
- ggforce
|
106
|
+
- cowplot
|
107
|
+
- magrittr
|
108
|
+
- gridExtra
|
109
|
+
|
110
|
+
---
|
111
|
+
|
112
|
+
### `locator`
|
113
|
+
Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
114
|
+
|
115
|
+
```bash
|
116
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
117
|
+
```
|
118
|
+
---
|
119
|
+
|
31
120
|
## Some Examples
|
32
121
|
|
33
122
|
Load all ViralSeq classes by requiring 'viral_seq.rb' in your Ruby scripts.
|
@@ -58,7 +147,7 @@ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
|
58
147
|
Further filter out sequences with Apobec3g/f hypermutations
|
59
148
|
|
60
149
|
```ruby
|
61
|
-
qc_seqhash = qc_seqhash.a3g
|
150
|
+
qc_seqhash = qc_seqhash.a3g[:filtered_seq]
|
62
151
|
```
|
63
152
|
|
64
153
|
Calculate nucleotide diveristy π
|
@@ -86,6 +175,44 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
86
175
|
|
87
176
|
## Updates
|
88
177
|
|
178
|
+
### Version 1.2.1-05172021
|
179
|
+
|
180
|
+
1. Added a function in R to check and install missing R packages for `tcs_sdrm` pipeline.
|
181
|
+
|
182
|
+
### Version 1.2.0-05102021
|
183
|
+
|
184
|
+
1. Added `tcs_sdrm` pipeline as an excutable.
|
185
|
+
`tcs_sdrm` processes `tcs`-processed HIV MPID-NGS data for drug resistance mutations, recency and phylogentic analysis.
|
186
|
+
|
187
|
+
2. Added function ViralSeq::SeqHash#sample.
|
188
|
+
|
189
|
+
3. Added recency determining function `ViralSeq::Recency::define`
|
190
|
+
|
191
|
+
4. Fixed a few bugs related to `tcs_sdrm`.
|
192
|
+
|
193
|
+
### Version 1.1.2-04262021
|
194
|
+
|
195
|
+
1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
|
196
|
+
2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
|
197
|
+
3. Added `--keep-original` flag to the `tcs` pipeline.
|
198
|
+
|
199
|
+
### Version 1.1.1-04012021
|
200
|
+
|
201
|
+
1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
|
202
|
+
2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
|
203
|
+
If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
|
204
|
+
3. Added option `-dr` to the `tcs` script.
|
205
|
+
|
206
|
+
### Version 1.1.0-03252021
|
207
|
+
|
208
|
+
1. Optimized the algorithm of end-join.
|
209
|
+
2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
|
210
|
+
3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
|
211
|
+
4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
|
212
|
+
5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
|
213
|
+
Users can choose from 3 MiSeq platforms for processing their sequencing data.
|
214
|
+
MiSeq 300x7x300 is the default option.
|
215
|
+
|
89
216
|
### Version 1.0.14-03052021
|
90
217
|
|
91
218
|
1. Add a function `ViralSeq::TcsCore.validate_file_name` to check MiSeq paired-end file names.
|
data/bin/tcs
CHANGED
@@ -23,7 +23,7 @@
|
|
23
23
|
# THE SOFTWARE.
|
24
24
|
|
25
25
|
# Use JSON file as the run param
|
26
|
-
# run
|
26
|
+
# run `tcs -j` to generate param json file.
|
27
27
|
|
28
28
|
require 'viral_seq'
|
29
29
|
require 'json'
|
@@ -46,11 +46,23 @@ OptionParser.new do |opts|
|
|
46
46
|
options[:params_json] = p
|
47
47
|
end
|
48
48
|
|
49
|
+
opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
|
50
|
+
options[:input] = p
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
|
54
|
+
options[:dr] = true
|
55
|
+
end
|
56
|
+
|
49
57
|
opts.on("-h", "--help", "Prints this help") do
|
50
58
|
puts opts
|
51
59
|
exit
|
52
60
|
end
|
53
61
|
|
62
|
+
opts.on("--keep-original", "keep raw sequence files") do
|
63
|
+
options[:keep] = true
|
64
|
+
end
|
65
|
+
|
54
66
|
opts.on("-v", "--version", "Version info") do
|
55
67
|
puts "tcs version: " + ViralSeq::TCS_VERSION.red.bold
|
56
68
|
puts "viral_seq version: " + ViralSeq::VERSION.red.bold
|
@@ -64,15 +76,21 @@ end.parse!
|
|
64
76
|
|
65
77
|
if options[:json_generator]
|
66
78
|
params = ViralSeq::TcsJson.generate
|
79
|
+
elsif options[:dr]
|
80
|
+
params = ViralSeq::TcsDr::PARAMS
|
67
81
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
68
82
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
69
83
|
else
|
70
84
|
abort "No params JSON file found. Script terminated.".red
|
71
85
|
end
|
72
86
|
|
73
|
-
|
87
|
+
if options[:input]
|
88
|
+
indir = options[:input]
|
89
|
+
else
|
90
|
+
indir = params[:raw_sequence_dir]
|
91
|
+
end
|
74
92
|
|
75
|
-
unless File.exist?(indir)
|
93
|
+
unless indir and File.exist?(indir)
|
76
94
|
abort "No input sequence directory found. Script terminated.".red.bold
|
77
95
|
end
|
78
96
|
|
@@ -115,6 +133,12 @@ else
|
|
115
133
|
error_rate = 0.02
|
116
134
|
end
|
117
135
|
|
136
|
+
if params[:platform_format]
|
137
|
+
$platform_sequencing_length = params[:platform_format]
|
138
|
+
else
|
139
|
+
$platform_sequencing_length = 300
|
140
|
+
end
|
141
|
+
|
118
142
|
primers = params[:primer_pairs]
|
119
143
|
if primers.empty?
|
120
144
|
ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
|
@@ -123,6 +147,7 @@ end
|
|
123
147
|
|
124
148
|
primers.each do |primer|
|
125
149
|
summary_json = {}
|
150
|
+
summary_json[:warnings] = []
|
126
151
|
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
127
152
|
summary_json[:viralseq_version] = ViralSeq::VERSION
|
128
153
|
summary_json[:runtime] = Time.now.to_s
|
@@ -134,6 +159,7 @@ primers.each do |primer|
|
|
134
159
|
forward_primer = primer[:forward]
|
135
160
|
|
136
161
|
export_raw = primer[:export_raw]
|
162
|
+
limit_raw = primer[:limit_raw]
|
137
163
|
|
138
164
|
unless cdna_primer
|
139
165
|
log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
|
@@ -175,6 +201,10 @@ primers.each do |primer|
|
|
175
201
|
paired_seq_number = common_keys.size
|
176
202
|
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
177
203
|
summary_json[:paired_raw_sequence] = paired_seq_number
|
204
|
+
if paired_seq_number < raw_sequence_number * 0.001
|
205
|
+
summary_json[:warnings] <<
|
206
|
+
"WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
|
207
|
+
end
|
178
208
|
|
179
209
|
common_keys.each do |seqtag|
|
180
210
|
r1_seq = r1_passed_seq[seqtag]
|
@@ -236,7 +266,13 @@ primers.each do |primer|
|
|
236
266
|
raw_r1_f = File.open(outfile_raw_r1, 'w')
|
237
267
|
raw_r2_f = File.open(outfile_raw_r2, 'w')
|
238
268
|
|
239
|
-
|
269
|
+
if limit_raw
|
270
|
+
raw_keys = bio_r1.keys.sample(limit_raw.to_i)
|
271
|
+
else
|
272
|
+
raw_keys = bio_r1.keys
|
273
|
+
end
|
274
|
+
|
275
|
+
raw_keys.each do |k|
|
240
276
|
raw_r1_f.puts k + "_r1"
|
241
277
|
raw_r2_f.puts k + "_r2"
|
242
278
|
raw_r1_f.puts bio_r1[k]
|
@@ -273,7 +309,6 @@ primers.each do |primer|
|
|
273
309
|
r1_sub_seq << bio_r1[seq_name]
|
274
310
|
r2_sub_seq << bio_r2[seq_name]
|
275
311
|
end
|
276
|
-
|
277
312
|
#consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
|
278
313
|
consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
|
279
314
|
r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
|
@@ -364,6 +399,7 @@ primers.each do |primer|
|
|
364
399
|
shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
|
365
400
|
joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
|
366
401
|
log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
|
402
|
+
|
367
403
|
summary_json[:combined_tcs] = joined_sh.size
|
368
404
|
|
369
405
|
if export_raw
|
@@ -433,12 +469,15 @@ primers.each do |primer|
|
|
433
469
|
trim_end = primer[:trim_ref_end]
|
434
470
|
trim_ref = primer[:trim_ref].to_sym
|
435
471
|
joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
|
436
|
-
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
437
472
|
if export_raw
|
438
473
|
joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
|
439
|
-
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
440
474
|
end
|
441
475
|
end
|
476
|
+
|
477
|
+
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
478
|
+
if export_raw
|
479
|
+
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
480
|
+
end
|
442
481
|
end
|
443
482
|
|
444
483
|
File.open(outfile_log, "w") do |f|
|
@@ -446,9 +485,11 @@ primers.each do |primer|
|
|
446
485
|
end
|
447
486
|
end
|
448
487
|
|
449
|
-
|
450
|
-
|
451
|
-
File.unlink(
|
488
|
+
unless options[:keep]
|
489
|
+
log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
|
490
|
+
File.unlink(r1_f)
|
491
|
+
File.unlink(r2_f)
|
492
|
+
end
|
452
493
|
log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly exercuted."
|
453
494
|
log.close
|
454
495
|
puts "DONE!"
|
data/bin/tcs_log
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# pool run logs from one batch of tcs jobs
|
4
|
+
# file structure:
|
5
|
+
# batch_tcs_jobs/
|
6
|
+
# ├── lib1
|
7
|
+
# ├── lib2
|
8
|
+
# ├── lib3
|
9
|
+
# ├── lib4
|
10
|
+
# ├── ...
|
11
|
+
#
|
12
|
+
# command example:
|
13
|
+
# $ tcs_log batch_tcs_jobs
|
14
|
+
|
15
|
+
require 'viral_seq'
|
16
|
+
require 'pathname'
|
17
|
+
require 'json'
|
18
|
+
require 'fileutils'
|
19
|
+
|
20
|
+
indir = ARGV[0].chomp
|
21
|
+
indir_basename = File.basename(indir)
|
22
|
+
indir_dirname = File.dirname(indir)
|
23
|
+
|
24
|
+
tcs_dir = File.join(indir_dirname, (indir_basename + "_tcs"))
|
25
|
+
Dir.mkdir(tcs_dir) unless File.directory?(tcs_dir)
|
26
|
+
|
27
|
+
libs = []
|
28
|
+
Dir.chdir(indir) {libs = Dir.glob("*")}
|
29
|
+
|
30
|
+
outdir2 = File.join(tcs_dir, "combined_TCS_per_lib")
|
31
|
+
outdir3 = File.join(tcs_dir, "TCS_per_region")
|
32
|
+
outdir4 = File.join(tcs_dir, "combined_TCS_per_region")
|
33
|
+
|
34
|
+
Dir.mkdir(outdir2) unless File.directory?(outdir2)
|
35
|
+
Dir.mkdir(outdir3) unless File.directory?(outdir3)
|
36
|
+
Dir.mkdir(outdir4) unless File.directory?(outdir4)
|
37
|
+
|
38
|
+
log_file = File.join(tcs_dir,"log.csv")
|
39
|
+
log = File.open(log_file,'w')
|
40
|
+
|
41
|
+
header = %w{
|
42
|
+
lib_name
|
43
|
+
Region
|
44
|
+
Raw_Sequences_per_barcode
|
45
|
+
R1_Raw
|
46
|
+
R2_Raw
|
47
|
+
Paired_Raw
|
48
|
+
Cutoff
|
49
|
+
PID_Length
|
50
|
+
Consensus1
|
51
|
+
Consensus2
|
52
|
+
Distinct_to_Raw
|
53
|
+
Resampling_index
|
54
|
+
Combined_TCS
|
55
|
+
Combined_TCS_after_QC
|
56
|
+
WARNINGS
|
57
|
+
}
|
58
|
+
|
59
|
+
log.puts header.join(',')
|
60
|
+
libs.each do |lib|
|
61
|
+
Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
|
62
|
+
fasta_files = []
|
63
|
+
json_files = []
|
64
|
+
Dir.chdir(File.join(indir, lib)) do
|
65
|
+
fasta_files = Dir.glob("**/*.fasta")
|
66
|
+
json_files = Dir.glob("**/log.json")
|
67
|
+
end
|
68
|
+
fasta_files.each do |f|
|
69
|
+
path_array = Pathname(f).each_filename.to_a
|
70
|
+
region = path_array[0]
|
71
|
+
if path_array[-1] == "combined.fasta"
|
72
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir2, lib, (lib + "_" + region)))
|
73
|
+
Dir.mkdir(File.join(outdir4,region)) unless File.directory?(File.join(outdir4,region))
|
74
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir4, region, (lib + "_" + region)))
|
75
|
+
else
|
76
|
+
Dir.mkdir(File.join(outdir3,region)) unless File.directory?(File.join(outdir3,region))
|
77
|
+
Dir.mkdir(File.join(outdir3,region, lib)) unless File.directory?(File.join(outdir3,region, lib))
|
78
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir3, region, lib, (lib + "_" + region + "_" + path_array[-1])))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
json_files.each do |f|
|
83
|
+
json_log = JSON.parse(File.read(File.join(indir, lib, f)), symbolize_names: true)
|
84
|
+
log.print [lib,
|
85
|
+
json_log[:primer_set_name],
|
86
|
+
json_log[:total_raw_sequence],
|
87
|
+
json_log[:r1_filtered_raw],
|
88
|
+
json_log[:r2_filtered_raw],
|
89
|
+
json_log[:paired_raw_sequence],
|
90
|
+
json_log[:consensus_cutoff],
|
91
|
+
json_log[:length_of_pid],
|
92
|
+
json_log[:total_tcs_with_ambiguities],
|
93
|
+
json_log[:total_tcs],
|
94
|
+
json_log[:distinct_to_raw],
|
95
|
+
json_log[:resampling_param],
|
96
|
+
json_log[:combined_tcs],
|
97
|
+
json_log[:combined_tcs_after_qc],
|
98
|
+
json_log[:warnings],
|
99
|
+
].join(',') + "\n"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
log.close
|