viral_seq 1.0.12 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/Gemfile.lock +16 -3
- data/README.md +95 -12
- data/bin/tcs +51 -10
- data/bin/tcs_log +102 -0
- data/docs/assets/img/cover.jpg +0 -0
- data/docs/dr.json +67 -0
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +4 -1
- data/lib/viral_seq/constant.rb +35 -5
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/math.rb +3 -3
- data/lib/viral_seq/muscle.rb +3 -2
- data/lib/viral_seq/sdrm.rb +101 -35
- data/lib/viral_seq/seq_hash.rb +1 -1
- data/lib/viral_seq/seq_hash_pair.rb +6 -4
- data/lib/viral_seq/sequence.rb +1 -84
- data/lib/viral_seq/tcs_core.rb +34 -5
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/tcs_json.rb +41 -10
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +11 -0
- metadata +71 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '093a8d1d70e251b0748b7035c829eb512991437ffa78fd67387318412e54acf5'
|
4
|
+
data.tar.gz: 1b9d6f6b2cb2ffa8d9cc588b8df096e7ac3840c694bfb241fcf970b738899328
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3853dbfa3f6604d907ec3d77b8c86ec8d885fedcc854c40ca6822ec72e8b2cfe9413bc188aa722a14e4e4f6c9503eca1b36d7f8e0963a5a997c9f0ca8b54fc86
|
7
|
+
data.tar.gz: e5b056cddcf7b87cc30e52c878879cea82d865ea7fc867535767918c30c699d58d6f426518aad02be49916c49f38d9603b0ab27ca6f3625f7a5102ae86863023
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,16 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.
|
5
|
-
colorize (
|
6
|
-
|
4
|
+
viral_seq (1.1.1)
|
5
|
+
colorize (>= 0.1)
|
6
|
+
combine_pdf (>= 1.0.0)
|
7
|
+
muscle_bio (>= 0.4)
|
8
|
+
prawn (>= 2.3.0)
|
9
|
+
prawn-table (>= 0.2.0)
|
7
10
|
|
8
11
|
GEM
|
9
12
|
remote: https://rubygems.org/
|
10
13
|
specs:
|
11
14
|
colorize (0.8.1)
|
15
|
+
combine_pdf (1.0.21)
|
16
|
+
ruby-rc4 (>= 0.1.5)
|
12
17
|
diff-lcs (1.3)
|
13
18
|
muscle_bio (0.4.0)
|
19
|
+
pdf-core (0.9.0)
|
20
|
+
prawn (2.4.0)
|
21
|
+
pdf-core (~> 0.9.0)
|
22
|
+
ttfunk (~> 1.7)
|
23
|
+
prawn-table (0.2.2)
|
24
|
+
prawn (>= 1.3.0, < 3.0.0)
|
14
25
|
rake (13.0.1)
|
15
26
|
rspec (3.8.0)
|
16
27
|
rspec-core (~> 3.8.0)
|
@@ -25,6 +36,8 @@ GEM
|
|
25
36
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
37
|
rspec-support (~> 3.8.0)
|
27
38
|
rspec-support (3.8.0)
|
39
|
+
ruby-rc4 (0.1.5)
|
40
|
+
ttfunk (1.7.0)
|
28
41
|
|
29
42
|
PLATFORMS
|
30
43
|
ruby
|
data/README.md
CHANGED
@@ -1,8 +1,24 @@
|
|
1
1
|
# ViralSeq
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/viral_seq.svg)](https://rubygems.org/gems/viral_seq)
|
4
|
+
![GitHub](https://img.shields.io/github/license/viralseq/viral_seq)
|
5
|
+
![Gem](https://img.shields.io/gem/dt/viral_seq?color=%23E9967A)
|
6
|
+
![GitHub last commit](https://img.shields.io/github/last-commit/viralseq/viral_seq?color=%2300BFFF)
|
7
|
+
[![Join the chat at https://gitter.im/viral_seq/community](https://badges.gitter.im/viral_seq/community.svg)](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
8
|
+
|
3
9
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
10
|
|
5
|
-
Specifically for Primer
|
11
|
+
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
12
|
+
|
13
|
+
## Illustration for the Primer ID Sequencing
|
14
|
+
|
15
|
+
|
16
|
+
![Primer ID Sequencing](./docs/assets/img/cover.jpg)
|
17
|
+
|
18
|
+
### Reference readings on the Primer ID sequencing
|
19
|
+
[Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
|
20
|
+
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
21
|
+
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
6
22
|
|
7
23
|
## Install
|
8
24
|
|
@@ -14,20 +30,55 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
|
14
30
|
|
15
31
|
### Excutables
|
16
32
|
|
17
|
-
|
33
|
+
### `tcs`
|
34
|
+
Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
|
18
35
|
|
36
|
+
Example commands:
|
19
37
|
```bash
|
20
|
-
$
|
38
|
+
$ tcs -p params.json # run TCS pipeline with params.json
|
39
|
+
$ tcs -p params.json -i DIRECTORY
|
40
|
+
# run TCS pipeline with params.json and DIRECTORY
|
41
|
+
# if DIRECTORY is not defined in params.json
|
42
|
+
$ tcs -dr -i DIRECTORY
|
43
|
+
# run tcs-dr (MPID HIV drug resistance sequencing) pipeline
|
44
|
+
# DIRECTORY needs to be given.
|
45
|
+
$ tcs -j # CLI to generate params.json
|
46
|
+
$ tcs -h # print out the help
|
21
47
|
```
|
22
48
|
|
23
|
-
|
49
|
+
[sample params.json for the tcs-dr pipeline](./docs/dr.json)
|
50
|
+
|
51
|
+
---
|
52
|
+
### `tcs_log`
|
53
|
+
|
54
|
+
Use `tcs_log` script to pool run logs and TCS fasta files after one batch of `tcs` jobs.
|
24
55
|
|
56
|
+
|
57
|
+
Example file structure:
|
58
|
+
```
|
59
|
+
batch_tcs_jobs/
|
60
|
+
├── lib1
|
61
|
+
├── lib2
|
62
|
+
├── lib3
|
63
|
+
├── lib4
|
64
|
+
├── ...
|
65
|
+
```
|
66
|
+
|
67
|
+
Example command:
|
25
68
|
```bash
|
26
|
-
$
|
27
|
-
$ tcs -j # CLI to generate params.json
|
28
|
-
$ tcs -h # print out the help
|
69
|
+
$ tcs_log batch_tcs_jobs
|
29
70
|
```
|
30
71
|
|
72
|
+
---
|
73
|
+
|
74
|
+
### `locator`
|
75
|
+
Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
76
|
+
|
77
|
+
```bash
|
78
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
79
|
+
```
|
80
|
+
---
|
81
|
+
|
31
82
|
## Some Examples
|
32
83
|
|
33
84
|
Load all ViralSeq classes by requiring 'viral_seq.rb' in your Ruby scripts.
|
@@ -80,21 +131,53 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
80
131
|
```
|
81
132
|
## Known issues
|
82
133
|
|
83
|
-
1. have a conflict with rails
|
84
|
-
2. Update on 03032021. Still have conflict. But in rails gem file, can just use `requires: false` globally and only require "viral_seq" when the module is needed in controller
|
134
|
+
1. ~~have a conflict with rails.~~
|
135
|
+
2. ~~Update on 03032021. Still have conflict. But in rails gem file, can just use `requires: false` globally and only require "viral_seq" when the module is needed in controller.~~
|
136
|
+
3. The conflict seems to be resovled. It was from a combination of using `!` as a function for factorial and the gem name `viral_seq`. @_@
|
85
137
|
|
86
138
|
## Updates
|
87
139
|
|
88
|
-
### Version 1.1.2-
|
140
|
+
### Version 1.1.2-04262021
|
141
|
+
|
142
|
+
1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
|
143
|
+
2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
|
144
|
+
3. Added `--keep-original` flag to the `tcs` pipeline.
|
145
|
+
|
146
|
+
### Version 1.1.1-04012021
|
147
|
+
|
148
|
+
1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
|
149
|
+
2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
|
150
|
+
If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
|
151
|
+
3. Added option `-dr` to the `tcs` script.
|
152
|
+
|
153
|
+
### Version 1.1.0-03252021
|
154
|
+
|
155
|
+
1. Optimized the algorithm of end-join.
|
156
|
+
2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
|
157
|
+
3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
|
158
|
+
4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
|
159
|
+
5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
|
160
|
+
Users can choose from 3 MiSeq platforms for processing their sequencing data.
|
161
|
+
MiSeq 300x7x300 is the default option.
|
162
|
+
|
163
|
+
### Version 1.0.14-03052021
|
164
|
+
|
165
|
+
1. Add a function `ViralSeq::TcsCore.validate_file_name` to check MiSeq paired-end file names.
|
166
|
+
|
167
|
+
### Version 1.0.13-03032021
|
168
|
+
|
169
|
+
1. Fixed the conflict with rails.
|
170
|
+
|
171
|
+
### Version 1.0.12-03032021
|
89
172
|
|
90
173
|
1. Fixed an issue that may cause conflicts with ActiveRecord.
|
91
174
|
|
92
|
-
### Version 1.
|
175
|
+
### Version 1.0.11-03022021
|
93
176
|
|
94
177
|
1. Fixed an issue when calculating Poisson cutoff for minority mutations `ViralSeq::SeqHash.pm`.
|
95
178
|
2. fixed an issue loading class 'OptionParser'in some ruby environments.
|
96
179
|
|
97
|
-
### Version 1.
|
180
|
+
### Version 1.0.10-11112020:
|
98
181
|
|
99
182
|
1. Modularize TCS pipeline. Move key functions into /viral_seq/tcs_core.rb
|
100
183
|
2. `tcs_json_generator` is removed. This CLI is delivered within the `tcs` pipeline, by running `tcs -j`. The scripts are included in the /viral_seq/tcs_json.rb
|
data/bin/tcs
CHANGED
@@ -23,7 +23,7 @@
|
|
23
23
|
# THE SOFTWARE.
|
24
24
|
|
25
25
|
# Use JSON file as the run param
|
26
|
-
# run
|
26
|
+
# run `tcs -j` to generate param json file.
|
27
27
|
|
28
28
|
require 'viral_seq'
|
29
29
|
require 'json'
|
@@ -46,11 +46,23 @@ OptionParser.new do |opts|
|
|
46
46
|
options[:params_json] = p
|
47
47
|
end
|
48
48
|
|
49
|
+
opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
|
50
|
+
options[:input] = p
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
|
54
|
+
options[:dr] = true
|
55
|
+
end
|
56
|
+
|
49
57
|
opts.on("-h", "--help", "Prints this help") do
|
50
58
|
puts opts
|
51
59
|
exit
|
52
60
|
end
|
53
61
|
|
62
|
+
opts.on("--keep-original", "keep raw sequence files") do
|
63
|
+
options[:keep] = true
|
64
|
+
end
|
65
|
+
|
54
66
|
opts.on("-v", "--version", "Version info") do
|
55
67
|
puts "tcs version: " + ViralSeq::TCS_VERSION.red.bold
|
56
68
|
puts "viral_seq version: " + ViralSeq::VERSION.red.bold
|
@@ -64,15 +76,21 @@ end.parse!
|
|
64
76
|
|
65
77
|
if options[:json_generator]
|
66
78
|
params = ViralSeq::TcsJson.generate
|
79
|
+
elsif options[:dr]
|
80
|
+
params = ViralSeq::TcsDr::PARAMS
|
67
81
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
68
82
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
69
83
|
else
|
70
84
|
abort "No params JSON file found. Script terminated.".red
|
71
85
|
end
|
72
86
|
|
73
|
-
|
87
|
+
if options[:input]
|
88
|
+
indir = options[:input]
|
89
|
+
else
|
90
|
+
indir = params[:raw_sequence_dir]
|
91
|
+
end
|
74
92
|
|
75
|
-
unless File.exist?(indir)
|
93
|
+
unless indir and File.exist?(indir)
|
76
94
|
abort "No input sequence directory found. Script terminated.".red.bold
|
77
95
|
end
|
78
96
|
|
@@ -115,6 +133,12 @@ else
|
|
115
133
|
error_rate = 0.02
|
116
134
|
end
|
117
135
|
|
136
|
+
if params[:platform_format]
|
137
|
+
$platform_sequencing_length = params[:platform_format]
|
138
|
+
else
|
139
|
+
$platform_sequencing_length = 300
|
140
|
+
end
|
141
|
+
|
118
142
|
primers = params[:primer_pairs]
|
119
143
|
if primers.empty?
|
120
144
|
ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
|
@@ -123,6 +147,7 @@ end
|
|
123
147
|
|
124
148
|
primers.each do |primer|
|
125
149
|
summary_json = {}
|
150
|
+
summary_json[:warnings] = []
|
126
151
|
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
127
152
|
summary_json[:viralseq_version] = ViralSeq::VERSION
|
128
153
|
summary_json[:runtime] = Time.now.to_s
|
@@ -134,6 +159,7 @@ primers.each do |primer|
|
|
134
159
|
forward_primer = primer[:forward]
|
135
160
|
|
136
161
|
export_raw = primer[:export_raw]
|
162
|
+
limit_raw = primer[:limit_raw]
|
137
163
|
|
138
164
|
unless cdna_primer
|
139
165
|
log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
|
@@ -175,6 +201,10 @@ primers.each do |primer|
|
|
175
201
|
paired_seq_number = common_keys.size
|
176
202
|
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
177
203
|
summary_json[:paired_raw_sequence] = paired_seq_number
|
204
|
+
if paired_seq_number < raw_sequence_number * 0.001
|
205
|
+
summary_json[:warnings] <<
|
206
|
+
"WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
|
207
|
+
end
|
178
208
|
|
179
209
|
common_keys.each do |seqtag|
|
180
210
|
r1_seq = r1_passed_seq[seqtag]
|
@@ -236,7 +266,13 @@ primers.each do |primer|
|
|
236
266
|
raw_r1_f = File.open(outfile_raw_r1, 'w')
|
237
267
|
raw_r2_f = File.open(outfile_raw_r2, 'w')
|
238
268
|
|
239
|
-
|
269
|
+
if limit_raw
|
270
|
+
raw_keys = bio_r1.keys.sample(limit_raw.to_i)
|
271
|
+
else
|
272
|
+
raw_keys = bio_r1.keys
|
273
|
+
end
|
274
|
+
|
275
|
+
raw_keys.each do |k|
|
240
276
|
raw_r1_f.puts k + "_r1"
|
241
277
|
raw_r2_f.puts k + "_r2"
|
242
278
|
raw_r1_f.puts bio_r1[k]
|
@@ -273,7 +309,6 @@ primers.each do |primer|
|
|
273
309
|
r1_sub_seq << bio_r1[seq_name]
|
274
310
|
r2_sub_seq << bio_r2[seq_name]
|
275
311
|
end
|
276
|
-
|
277
312
|
#consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
|
278
313
|
consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
|
279
314
|
r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
|
@@ -364,6 +399,7 @@ primers.each do |primer|
|
|
364
399
|
shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
|
365
400
|
joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
|
366
401
|
log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
|
402
|
+
|
367
403
|
summary_json[:combined_tcs] = joined_sh.size
|
368
404
|
|
369
405
|
if export_raw
|
@@ -433,12 +469,15 @@ primers.each do |primer|
|
|
433
469
|
trim_end = primer[:trim_ref_end]
|
434
470
|
trim_ref = primer[:trim_ref].to_sym
|
435
471
|
joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
|
436
|
-
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
437
472
|
if export_raw
|
438
473
|
joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
|
439
|
-
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
440
474
|
end
|
441
475
|
end
|
476
|
+
|
477
|
+
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
478
|
+
if export_raw
|
479
|
+
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
480
|
+
end
|
442
481
|
end
|
443
482
|
|
444
483
|
File.open(outfile_log, "w") do |f|
|
@@ -446,9 +485,11 @@ primers.each do |primer|
|
|
446
485
|
end
|
447
486
|
end
|
448
487
|
|
449
|
-
|
450
|
-
|
451
|
-
File.unlink(
|
488
|
+
unless options[:keep]
|
489
|
+
log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
|
490
|
+
File.unlink(r1_f)
|
491
|
+
File.unlink(r2_f)
|
492
|
+
end
|
452
493
|
log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly exercuted."
|
453
494
|
log.close
|
454
495
|
puts "DONE!"
|
data/bin/tcs_log
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# pool run logs from one batch of tcs jobs
|
4
|
+
# file structure:
|
5
|
+
# batch_tcs_jobs/
|
6
|
+
# ├── lib1
|
7
|
+
# ├── lib2
|
8
|
+
# ├── lib3
|
9
|
+
# ├── lib4
|
10
|
+
# ├── ...
|
11
|
+
#
|
12
|
+
# command example:
|
13
|
+
# $ tcs_log batch_tcs_jobs
|
14
|
+
|
15
|
+
require 'viral_seq'
|
16
|
+
require 'pathname'
|
17
|
+
require 'json'
|
18
|
+
require 'fileutils'
|
19
|
+
|
20
|
+
indir = ARGV[0].chomp
|
21
|
+
indir_basename = File.basename(indir)
|
22
|
+
indir_dirname = File.dirname(indir)
|
23
|
+
|
24
|
+
tcs_dir = File.join(indir_dirname, (indir_basename + "_tcs"))
|
25
|
+
Dir.mkdir(tcs_dir) unless File.directory?(tcs_dir)
|
26
|
+
|
27
|
+
libs = []
|
28
|
+
Dir.chdir(indir) {libs = Dir.glob("*")}
|
29
|
+
|
30
|
+
outdir2 = File.join(tcs_dir, "combined_TCS_per_lib")
|
31
|
+
outdir3 = File.join(tcs_dir, "TCS_per_region")
|
32
|
+
outdir4 = File.join(tcs_dir, "combined_TCS_per_region")
|
33
|
+
|
34
|
+
Dir.mkdir(outdir2) unless File.directory?(outdir2)
|
35
|
+
Dir.mkdir(outdir3) unless File.directory?(outdir3)
|
36
|
+
Dir.mkdir(outdir4) unless File.directory?(outdir4)
|
37
|
+
|
38
|
+
log_file = File.join(tcs_dir,"log.csv")
|
39
|
+
log = File.open(log_file,'w')
|
40
|
+
|
41
|
+
header = %w{
|
42
|
+
lib_name
|
43
|
+
Region
|
44
|
+
Raw_Sequences_per_barcode
|
45
|
+
R1_Raw
|
46
|
+
R2_Raw
|
47
|
+
Paired_Raw
|
48
|
+
Cutoff
|
49
|
+
PID_Length
|
50
|
+
Consensus1
|
51
|
+
Consensus2
|
52
|
+
Distinct_to_Raw
|
53
|
+
Resampling_index
|
54
|
+
Combined_TCS
|
55
|
+
Combined_TCS_after_QC
|
56
|
+
WARNINGS
|
57
|
+
}
|
58
|
+
|
59
|
+
log.puts header.join(',')
|
60
|
+
libs.each do |lib|
|
61
|
+
Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
|
62
|
+
fasta_files = []
|
63
|
+
json_files = []
|
64
|
+
Dir.chdir(File.join(indir, lib)) do
|
65
|
+
fasta_files = Dir.glob("**/*.fasta")
|
66
|
+
json_files = Dir.glob("**/log.json")
|
67
|
+
end
|
68
|
+
fasta_files.each do |f|
|
69
|
+
path_array = Pathname(f).each_filename.to_a
|
70
|
+
region = path_array[0]
|
71
|
+
if path_array[-1] == "combined.fasta"
|
72
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir2, lib, (lib + "_" + region)))
|
73
|
+
Dir.mkdir(File.join(outdir4,region)) unless File.directory?(File.join(outdir4,region))
|
74
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir4, region, (lib + "_" + region)))
|
75
|
+
else
|
76
|
+
Dir.mkdir(File.join(outdir3,region)) unless File.directory?(File.join(outdir3,region))
|
77
|
+
Dir.mkdir(File.join(outdir3,region, lib)) unless File.directory?(File.join(outdir3,region, lib))
|
78
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir3, region, lib, (lib + "_" + region + "_" + path_array[-1])))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
json_files.each do |f|
|
83
|
+
json_log = JSON.parse(File.read(File.join(indir, lib, f)), symbolize_names: true)
|
84
|
+
log.print [lib,
|
85
|
+
json_log[:primer_set_name],
|
86
|
+
json_log[:total_raw_sequence],
|
87
|
+
json_log[:r1_filtered_raw],
|
88
|
+
json_log[:r2_filtered_raw],
|
89
|
+
json_log[:paired_raw_sequence],
|
90
|
+
json_log[:consensus_cutoff],
|
91
|
+
json_log[:length_of_pid],
|
92
|
+
json_log[:total_tcs_with_ambiguities],
|
93
|
+
json_log[:total_tcs],
|
94
|
+
json_log[:distinct_to_raw],
|
95
|
+
json_log[:resampling_param],
|
96
|
+
json_log[:combined_tcs],
|
97
|
+
json_log[:combined_tcs_after_qc],
|
98
|
+
json_log[:warnings],
|
99
|
+
].join(',') + "\n"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
log.close
|