viral_seq 1.0.13 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/Gemfile.lock +16 -3
- data/README.md +102 -13
- data/bin/tcs +51 -10
- data/bin/tcs_log +102 -0
- data/bin/tcs_sdrm +402 -0
- data/docs/assets/img/cover.jpg +0 -0
- data/docs/dr.json +67 -0
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +5 -1
- data/lib/viral_seq/constant.rb +35 -5
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +3 -2
- data/lib/viral_seq/recency.rb +52 -0
- data/lib/viral_seq/sdrm.rb +101 -35
- data/lib/viral_seq/seq_hash.rb +25 -5
- data/lib/viral_seq/seq_hash_pair.rb +6 -4
- data/lib/viral_seq/sequence.rb +1 -84
- data/lib/viral_seq/tcs_core.rb +34 -5
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/tcs_json.rb +41 -10
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +11 -0
- metadata +74 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbcddd0679b712b28592875aa18e38269ccbac5b85113f53873d4cedc5572b04
|
4
|
+
data.tar.gz: 7268e596a2c40f7cdd2c815ccf5cdb40663c096c709aba6ee2e0dc4bc9a07542
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a689ed94201b19ee258fb07f73dd89ed2c8fd297b9580ba720d85ef2a16c5a38fdfed326dbdcc987f0913b4c9ab2aa060683a770df48baa4b1d657d63de35152
|
7
|
+
data.tar.gz: 0b8065ae813f66b88fda3d7788c20718aa0db1a4f723d6831e948157b682a81fd1ae44a1d9043ebfb046df91c072bbc16db41ddd42e272d3f6c74a13fa473836
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,16 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.
|
5
|
-
colorize (
|
6
|
-
|
4
|
+
viral_seq (1.1.1)
|
5
|
+
colorize (>= 0.1)
|
6
|
+
combine_pdf (>= 1.0.0)
|
7
|
+
muscle_bio (>= 0.4)
|
8
|
+
prawn (>= 2.3.0)
|
9
|
+
prawn-table (>= 0.2.0)
|
7
10
|
|
8
11
|
GEM
|
9
12
|
remote: https://rubygems.org/
|
10
13
|
specs:
|
11
14
|
colorize (0.8.1)
|
15
|
+
combine_pdf (1.0.21)
|
16
|
+
ruby-rc4 (>= 0.1.5)
|
12
17
|
diff-lcs (1.3)
|
13
18
|
muscle_bio (0.4.0)
|
19
|
+
pdf-core (0.9.0)
|
20
|
+
prawn (2.4.0)
|
21
|
+
pdf-core (~> 0.9.0)
|
22
|
+
ttfunk (~> 1.7)
|
23
|
+
prawn-table (0.2.2)
|
24
|
+
prawn (>= 1.3.0, < 3.0.0)
|
14
25
|
rake (13.0.1)
|
15
26
|
rspec (3.8.0)
|
16
27
|
rspec-core (~> 3.8.0)
|
@@ -25,6 +36,8 @@ GEM
|
|
25
36
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
37
|
rspec-support (~> 3.8.0)
|
27
38
|
rspec-support (3.8.0)
|
39
|
+
ruby-rc4 (0.1.5)
|
40
|
+
ttfunk (1.7.0)
|
28
41
|
|
29
42
|
PLATFORMS
|
30
43
|
ruby
|
data/README.md
CHANGED
@@ -1,8 +1,24 @@
|
|
1
1
|
# ViralSeq
|
2
2
|
|
3
|
+
[](https://rubygems.org/gems/viral_seq)
|
4
|
+

|
5
|
+

|
6
|
+

|
7
|
+
[](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
8
|
+
|
3
9
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
10
|
|
5
|
-
Specifically for Primer
|
11
|
+
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
12
|
+
|
13
|
+
## Illustration for the Primer ID Sequencing
|
14
|
+
|
15
|
+
|
16
|
+

|
17
|
+
|
18
|
+
### Reference readings on the Primer ID sequencing
|
19
|
+
[Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
|
20
|
+
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
21
|
+
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
6
22
|
|
7
23
|
## Install
|
8
24
|
|
@@ -14,20 +30,55 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
|
14
30
|
|
15
31
|
### Excutables
|
16
32
|
|
17
|
-
|
33
|
+
### `tcs`
|
34
|
+
Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
|
18
35
|
|
36
|
+
Example commands:
|
19
37
|
```bash
|
20
|
-
$
|
38
|
+
$ tcs -p params.json # run TCS pipeline with params.json
|
39
|
+
$ tcs -p params.json -i DIRECTORY
|
40
|
+
# run TCS pipeline with params.json and DIRECTORY
|
41
|
+
# if DIRECTORY is not defined in params.json
|
42
|
+
$ tcs -dr -i DIRECTORY
|
43
|
+
# run tcs-dr (MPID HIV drug resistance sequencing) pipeline
|
44
|
+
# DIRECTORY needs to be given.
|
45
|
+
$ tcs -j # CLI to generate params.json
|
46
|
+
$ tcs -h # print out the help
|
21
47
|
```
|
22
48
|
|
23
|
-
|
49
|
+
[sample params.json for the tcs-dr pipeline](./docs/dr.json)
|
24
50
|
|
51
|
+
---
|
52
|
+
### `tcs_log`
|
53
|
+
|
54
|
+
Use `tcs_log` script to pool run logs and TCS fasta files after one batch of `tcs` jobs.
|
55
|
+
|
56
|
+
|
57
|
+
Example file structure:
|
58
|
+
```
|
59
|
+
batch_tcs_jobs/
|
60
|
+
├── lib1
|
61
|
+
├── lib2
|
62
|
+
├── lib3
|
63
|
+
├── lib4
|
64
|
+
├── ...
|
65
|
+
```
|
66
|
+
|
67
|
+
Example command:
|
25
68
|
```bash
|
26
|
-
$
|
27
|
-
$ tcs -j # CLI to generate params.json
|
28
|
-
$ tcs -h # print out the help
|
69
|
+
$ tcs_log batch_tcs_jobs
|
29
70
|
```
|
30
71
|
|
72
|
+
---
|
73
|
+
|
74
|
+
### `locator`
|
75
|
+
Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
76
|
+
|
77
|
+
```bash
|
78
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
79
|
+
```
|
80
|
+
---
|
81
|
+
|
31
82
|
## Some Examples
|
32
83
|
|
33
84
|
Load all ViralSeq classes by requiring 'viral_seq.rb' in your Ruby scripts.
|
@@ -58,7 +109,7 @@ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
|
58
109
|
Further filter out sequences with Apobec3g/f hypermutations
|
59
110
|
|
60
111
|
```ruby
|
61
|
-
qc_seqhash = qc_seqhash.a3g
|
112
|
+
qc_seqhash = qc_seqhash.a3g[:filtered_seq]
|
62
113
|
```
|
63
114
|
|
64
115
|
Calculate nucleotide diveristy π
|
@@ -86,20 +137,58 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
86
137
|
|
87
138
|
## Updates
|
88
139
|
|
89
|
-
### Version 1.
|
140
|
+
### Version 1.2.0-05102021
|
141
|
+
|
142
|
+
1. Added `tcs_sdrm` pipeline as an excutable.
|
143
|
+
`tcs_sdrm` processes `tcs`-processed HIV MPID-NGS data for drug resistance mutations, recency and phylogentic analysis.
|
144
|
+
|
145
|
+
2. Added function ViralSeq::SeqHash#sample.
|
146
|
+
|
147
|
+
3. Added recency determining function `ViralSeq::Recency::define`
|
148
|
+
|
149
|
+
4. Fixed a few bugs related to `tcs_sdrm`.
|
150
|
+
|
151
|
+
### Version 1.1.2-04262021
|
152
|
+
|
153
|
+
1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
|
154
|
+
2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
|
155
|
+
3. Added `--keep-original` flag to the `tcs` pipeline.
|
156
|
+
|
157
|
+
### Version 1.1.1-04012021
|
158
|
+
|
159
|
+
1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
|
160
|
+
2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
|
161
|
+
If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
|
162
|
+
3. Added option `-dr` to the `tcs` script.
|
163
|
+
|
164
|
+
### Version 1.1.0-03252021
|
165
|
+
|
166
|
+
1. Optimized the algorithm of end-join.
|
167
|
+
2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
|
168
|
+
3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
|
169
|
+
4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
|
170
|
+
5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
|
171
|
+
Users can choose from 3 MiSeq platforms for processing their sequencing data.
|
172
|
+
MiSeq 300x7x300 is the default option.
|
173
|
+
|
174
|
+
### Version 1.0.14-03052021
|
175
|
+
|
176
|
+
1. Add a function `ViralSeq::TcsCore.validate_file_name` to check MiSeq paired-end file names.
|
177
|
+
|
178
|
+
### Version 1.0.13-03032021
|
90
179
|
|
91
|
-
1. Fixed the conflict with rails.
|
180
|
+
1. Fixed the conflict with rails.
|
92
181
|
|
93
|
-
### Version 1.
|
182
|
+
### Version 1.0.12-03032021
|
94
183
|
|
95
184
|
1. Fixed an issue that may cause conflicts with ActiveRecord.
|
96
185
|
|
97
|
-
### Version 1.
|
186
|
+
### Version 1.0.11-03022021
|
98
187
|
|
99
188
|
1. Fixed an issue when calculating Poisson cutoff for minority mutations `ViralSeq::SeqHash.pm`.
|
100
189
|
2. fixed an issue loading class 'OptionParser'in some ruby environments.
|
101
190
|
|
102
|
-
### Version 1.
|
191
|
+
### Version 1.0.10-11112020:
|
103
192
|
|
104
193
|
1. Modularize TCS pipeline. Move key functions into /viral_seq/tcs_core.rb
|
105
194
|
2. `tcs_json_generator` is removed. This CLI is delivered within the `tcs` pipeline, by running `tcs -j`. The scripts are included in the /viral_seq/tcs_json.rb
|
data/bin/tcs
CHANGED
@@ -23,7 +23,7 @@
|
|
23
23
|
# THE SOFTWARE.
|
24
24
|
|
25
25
|
# Use JSON file as the run param
|
26
|
-
# run
|
26
|
+
# run `tcs -j` to generate param json file.
|
27
27
|
|
28
28
|
require 'viral_seq'
|
29
29
|
require 'json'
|
@@ -46,11 +46,23 @@ OptionParser.new do |opts|
|
|
46
46
|
options[:params_json] = p
|
47
47
|
end
|
48
48
|
|
49
|
+
opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
|
50
|
+
options[:input] = p
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
|
54
|
+
options[:dr] = true
|
55
|
+
end
|
56
|
+
|
49
57
|
opts.on("-h", "--help", "Prints this help") do
|
50
58
|
puts opts
|
51
59
|
exit
|
52
60
|
end
|
53
61
|
|
62
|
+
opts.on("--keep-original", "keep raw sequence files") do
|
63
|
+
options[:keep] = true
|
64
|
+
end
|
65
|
+
|
54
66
|
opts.on("-v", "--version", "Version info") do
|
55
67
|
puts "tcs version: " + ViralSeq::TCS_VERSION.red.bold
|
56
68
|
puts "viral_seq version: " + ViralSeq::VERSION.red.bold
|
@@ -64,15 +76,21 @@ end.parse!
|
|
64
76
|
|
65
77
|
if options[:json_generator]
|
66
78
|
params = ViralSeq::TcsJson.generate
|
79
|
+
elsif options[:dr]
|
80
|
+
params = ViralSeq::TcsDr::PARAMS
|
67
81
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
68
82
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
69
83
|
else
|
70
84
|
abort "No params JSON file found. Script terminated.".red
|
71
85
|
end
|
72
86
|
|
73
|
-
|
87
|
+
if options[:input]
|
88
|
+
indir = options[:input]
|
89
|
+
else
|
90
|
+
indir = params[:raw_sequence_dir]
|
91
|
+
end
|
74
92
|
|
75
|
-
unless File.exist?(indir)
|
93
|
+
unless indir and File.exist?(indir)
|
76
94
|
abort "No input sequence directory found. Script terminated.".red.bold
|
77
95
|
end
|
78
96
|
|
@@ -115,6 +133,12 @@ else
|
|
115
133
|
error_rate = 0.02
|
116
134
|
end
|
117
135
|
|
136
|
+
if params[:platform_format]
|
137
|
+
$platform_sequencing_length = params[:platform_format]
|
138
|
+
else
|
139
|
+
$platform_sequencing_length = 300
|
140
|
+
end
|
141
|
+
|
118
142
|
primers = params[:primer_pairs]
|
119
143
|
if primers.empty?
|
120
144
|
ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
|
@@ -123,6 +147,7 @@ end
|
|
123
147
|
|
124
148
|
primers.each do |primer|
|
125
149
|
summary_json = {}
|
150
|
+
summary_json[:warnings] = []
|
126
151
|
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
127
152
|
summary_json[:viralseq_version] = ViralSeq::VERSION
|
128
153
|
summary_json[:runtime] = Time.now.to_s
|
@@ -134,6 +159,7 @@ primers.each do |primer|
|
|
134
159
|
forward_primer = primer[:forward]
|
135
160
|
|
136
161
|
export_raw = primer[:export_raw]
|
162
|
+
limit_raw = primer[:limit_raw]
|
137
163
|
|
138
164
|
unless cdna_primer
|
139
165
|
log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
|
@@ -175,6 +201,10 @@ primers.each do |primer|
|
|
175
201
|
paired_seq_number = common_keys.size
|
176
202
|
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
177
203
|
summary_json[:paired_raw_sequence] = paired_seq_number
|
204
|
+
if paired_seq_number < raw_sequence_number * 0.001
|
205
|
+
summary_json[:warnings] <<
|
206
|
+
"WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
|
207
|
+
end
|
178
208
|
|
179
209
|
common_keys.each do |seqtag|
|
180
210
|
r1_seq = r1_passed_seq[seqtag]
|
@@ -236,7 +266,13 @@ primers.each do |primer|
|
|
236
266
|
raw_r1_f = File.open(outfile_raw_r1, 'w')
|
237
267
|
raw_r2_f = File.open(outfile_raw_r2, 'w')
|
238
268
|
|
239
|
-
|
269
|
+
if limit_raw
|
270
|
+
raw_keys = bio_r1.keys.sample(limit_raw.to_i)
|
271
|
+
else
|
272
|
+
raw_keys = bio_r1.keys
|
273
|
+
end
|
274
|
+
|
275
|
+
raw_keys.each do |k|
|
240
276
|
raw_r1_f.puts k + "_r1"
|
241
277
|
raw_r2_f.puts k + "_r2"
|
242
278
|
raw_r1_f.puts bio_r1[k]
|
@@ -273,7 +309,6 @@ primers.each do |primer|
|
|
273
309
|
r1_sub_seq << bio_r1[seq_name]
|
274
310
|
r2_sub_seq << bio_r2[seq_name]
|
275
311
|
end
|
276
|
-
|
277
312
|
#consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
|
278
313
|
consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
|
279
314
|
r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
|
@@ -364,6 +399,7 @@ primers.each do |primer|
|
|
364
399
|
shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
|
365
400
|
joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
|
366
401
|
log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
|
402
|
+
|
367
403
|
summary_json[:combined_tcs] = joined_sh.size
|
368
404
|
|
369
405
|
if export_raw
|
@@ -433,12 +469,15 @@ primers.each do |primer|
|
|
433
469
|
trim_end = primer[:trim_ref_end]
|
434
470
|
trim_ref = primer[:trim_ref].to_sym
|
435
471
|
joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
|
436
|
-
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
437
472
|
if export_raw
|
438
473
|
joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
|
439
|
-
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
440
474
|
end
|
441
475
|
end
|
476
|
+
|
477
|
+
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
478
|
+
if export_raw
|
479
|
+
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
480
|
+
end
|
442
481
|
end
|
443
482
|
|
444
483
|
File.open(outfile_log, "w") do |f|
|
@@ -446,9 +485,11 @@ primers.each do |primer|
|
|
446
485
|
end
|
447
486
|
end
|
448
487
|
|
449
|
-
|
450
|
-
|
451
|
-
File.unlink(
|
488
|
+
unless options[:keep]
|
489
|
+
log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
|
490
|
+
File.unlink(r1_f)
|
491
|
+
File.unlink(r2_f)
|
492
|
+
end
|
452
493
|
log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly exercuted."
|
453
494
|
log.close
|
454
495
|
puts "DONE!"
|
data/bin/tcs_log
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# pool run logs from one batch of tcs jobs
|
4
|
+
# file structure:
|
5
|
+
# batch_tcs_jobs/
|
6
|
+
# ├── lib1
|
7
|
+
# ├── lib2
|
8
|
+
# ├── lib3
|
9
|
+
# ├── lib4
|
10
|
+
# ├── ...
|
11
|
+
#
|
12
|
+
# command example:
|
13
|
+
# $ tcs_log batch_tcs_jobs
|
14
|
+
|
15
|
+
require 'viral_seq'
|
16
|
+
require 'pathname'
|
17
|
+
require 'json'
|
18
|
+
require 'fileutils'
|
19
|
+
|
20
|
+
indir = ARGV[0].chomp
|
21
|
+
indir_basename = File.basename(indir)
|
22
|
+
indir_dirname = File.dirname(indir)
|
23
|
+
|
24
|
+
tcs_dir = File.join(indir_dirname, (indir_basename + "_tcs"))
|
25
|
+
Dir.mkdir(tcs_dir) unless File.directory?(tcs_dir)
|
26
|
+
|
27
|
+
libs = []
|
28
|
+
Dir.chdir(indir) {libs = Dir.glob("*")}
|
29
|
+
|
30
|
+
outdir2 = File.join(tcs_dir, "combined_TCS_per_lib")
|
31
|
+
outdir3 = File.join(tcs_dir, "TCS_per_region")
|
32
|
+
outdir4 = File.join(tcs_dir, "combined_TCS_per_region")
|
33
|
+
|
34
|
+
Dir.mkdir(outdir2) unless File.directory?(outdir2)
|
35
|
+
Dir.mkdir(outdir3) unless File.directory?(outdir3)
|
36
|
+
Dir.mkdir(outdir4) unless File.directory?(outdir4)
|
37
|
+
|
38
|
+
log_file = File.join(tcs_dir,"log.csv")
|
39
|
+
log = File.open(log_file,'w')
|
40
|
+
|
41
|
+
header = %w{
|
42
|
+
lib_name
|
43
|
+
Region
|
44
|
+
Raw_Sequences_per_barcode
|
45
|
+
R1_Raw
|
46
|
+
R2_Raw
|
47
|
+
Paired_Raw
|
48
|
+
Cutoff
|
49
|
+
PID_Length
|
50
|
+
Consensus1
|
51
|
+
Consensus2
|
52
|
+
Distinct_to_Raw
|
53
|
+
Resampling_index
|
54
|
+
Combined_TCS
|
55
|
+
Combined_TCS_after_QC
|
56
|
+
WARNINGS
|
57
|
+
}
|
58
|
+
|
59
|
+
log.puts header.join(',')
|
60
|
+
libs.each do |lib|
|
61
|
+
Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
|
62
|
+
fasta_files = []
|
63
|
+
json_files = []
|
64
|
+
Dir.chdir(File.join(indir, lib)) do
|
65
|
+
fasta_files = Dir.glob("**/*.fasta")
|
66
|
+
json_files = Dir.glob("**/log.json")
|
67
|
+
end
|
68
|
+
fasta_files.each do |f|
|
69
|
+
path_array = Pathname(f).each_filename.to_a
|
70
|
+
region = path_array[0]
|
71
|
+
if path_array[-1] == "combined.fasta"
|
72
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir2, lib, (lib + "_" + region)))
|
73
|
+
Dir.mkdir(File.join(outdir4,region)) unless File.directory?(File.join(outdir4,region))
|
74
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir4, region, (lib + "_" + region)))
|
75
|
+
else
|
76
|
+
Dir.mkdir(File.join(outdir3,region)) unless File.directory?(File.join(outdir3,region))
|
77
|
+
Dir.mkdir(File.join(outdir3,region, lib)) unless File.directory?(File.join(outdir3,region, lib))
|
78
|
+
FileUtils.cp(File.join(indir, lib, f), File.join(outdir3, region, lib, (lib + "_" + region + "_" + path_array[-1])))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
json_files.each do |f|
|
83
|
+
json_log = JSON.parse(File.read(File.join(indir, lib, f)), symbolize_names: true)
|
84
|
+
log.print [lib,
|
85
|
+
json_log[:primer_set_name],
|
86
|
+
json_log[:total_raw_sequence],
|
87
|
+
json_log[:r1_filtered_raw],
|
88
|
+
json_log[:r2_filtered_raw],
|
89
|
+
json_log[:paired_raw_sequence],
|
90
|
+
json_log[:consensus_cutoff],
|
91
|
+
json_log[:length_of_pid],
|
92
|
+
json_log[:total_tcs_with_ambiguities],
|
93
|
+
json_log[:total_tcs],
|
94
|
+
json_log[:distinct_to_raw],
|
95
|
+
json_log[:resampling_param],
|
96
|
+
json_log[:combined_tcs],
|
97
|
+
json_log[:combined_tcs_after_qc],
|
98
|
+
json_log[:warnings],
|
99
|
+
].join(',') + "\n"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
log.close
|