viral_seq 1.10.0 → 1.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/README.md +10 -1
- data/bin/locator +15 -1
- data/bin/tcs_sdrm +6 -6
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/recency_report.rb +1 -1
- data/lib/viral_seq/seq_hash.rb +3 -1
- data/lib/viral_seq/seq_hash_pair.rb +1 -1
- data/lib/viral_seq/tcs_dr.rb +4 -4
- data/lib/viral_seq/tcs_json.rb +2 -2
- data/lib/viral_seq/util/drm_list.json +427 -682
- data/lib/viral_seq/version.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 727ed9feb929e077b25cfd2a83661dccb88f1fd8c503635f14c948d71f6f2519
|
|
4
|
+
data.tar.gz: 6f1a69d0abeeabcad8107c3bbe515e9f909cd46b6732cdff66827ff4993e7efd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a2a7dd1af8b9022f2b6cadec96be4f49985681d6d98e2b152fc2a12844573218eff17cf6db089ebacb84ea5284c47be254981ddf406e22989647eb4c07fb3285
|
|
7
|
+
data.tar.gz: 024f6066ecec3c5caed0277e10f0aaba596b3d95487e342885a325a1f9bfec7feb1c777eec2a4fbe26c2eb0ad55c8d437c6df6511a5c1c523d43a5d089eeef91
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -191,7 +191,16 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
|
191
191
|
|
|
192
192
|
## Updates
|
|
193
193
|
|
|
194
|
-
### Version-1.10.
|
|
194
|
+
### Version-1.10.3-12112025
|
|
195
|
+
|
|
196
|
+
1. Bug fix for SDRM pipeline.
|
|
197
|
+
2. Add a function in `locator` tool to export sequences in positive sense direction.
|
|
198
|
+
|
|
199
|
+
### Version-1.10.2-07210225
|
|
200
|
+
|
|
201
|
+
1. Fixed a bug processing parameters for HIV sequence QC.
|
|
202
|
+
|
|
203
|
+
### Version-1.10.1-05012025
|
|
195
204
|
|
|
196
205
|
1. Added quality filter for Illumina 2-color sequencing platforms (filter poly-G and poly-C)
|
|
197
206
|
2. Replaced `MuscleBio` with [`VirustLocator`]("https://github.com/ViralSeq/virust-locator-ruby") for faster and more accurate pairwise alignment.
|
data/bin/locator
CHANGED
|
@@ -76,6 +76,10 @@ begin
|
|
|
76
76
|
else
|
|
77
77
|
csv_file = seq_file + ".csv"
|
|
78
78
|
end
|
|
79
|
+
|
|
80
|
+
fasta_file = csv_file.sub(/\.csv$/i, '.direction.fasta')
|
|
81
|
+
|
|
82
|
+
fasta_handle = File.open(fasta_file, 'w')
|
|
79
83
|
|
|
80
84
|
unless File.exist?(seq_file)
|
|
81
85
|
raise StandardError.new("Input file sequence file not found".red.bold)
|
|
@@ -92,9 +96,19 @@ begin
|
|
|
92
96
|
locs = seqs.loc(opt)
|
|
93
97
|
head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
|
94
98
|
locs.unshift(head)
|
|
99
|
+
|
|
100
|
+
directional_fasta = []
|
|
95
101
|
data = CSV.generate do |csv|
|
|
96
|
-
locs.each
|
|
102
|
+
locs.each do |loc|
|
|
103
|
+
csv << loc
|
|
104
|
+
directional_fasta << loc[1]
|
|
105
|
+
directional_fasta << loc[8].tr("-", "") # remove gaps for aligned_input
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
directional_fasta[2..-1].each do |line|
|
|
109
|
+
fasta_handle.puts line
|
|
97
110
|
end
|
|
111
|
+
fasta_handle.close
|
|
98
112
|
|
|
99
113
|
File.write(csv_file, data)
|
|
100
114
|
puts "Output file found at #{csv_file.green.bold}"
|
data/bin/tcs_sdrm
CHANGED
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
# ├── lib1_IN
|
|
13
13
|
# ├── lib1_V1V3
|
|
14
14
|
# ├── lib2
|
|
15
|
-
# ├──
|
|
16
|
-
# ├──
|
|
17
|
-
# ├──
|
|
18
|
-
# ├──
|
|
15
|
+
# ├── lib2_RT
|
|
16
|
+
# ├── lib2_PR
|
|
17
|
+
# ├── lib2_IN
|
|
18
|
+
# ├── lib2_V1V3
|
|
19
19
|
# ├── ...
|
|
20
20
|
#
|
|
21
21
|
# output data in a new dir as 'libs_dir_SDRM'
|
|
@@ -37,7 +37,7 @@ r_version = ViralSeq::R.check_R
|
|
|
37
37
|
ViralSeq::R.check_R_packages
|
|
38
38
|
|
|
39
39
|
def abstract_line(data)
|
|
40
|
-
|
|
40
|
+
data[3] + data[2] + data[4] + ":" +
|
|
41
41
|
(data[6].to_f * 100).round(2).to_s + "(" +
|
|
42
42
|
(data[7].to_f * 100).round(2).to_s + "-" +
|
|
43
43
|
(data[8].to_f * 100).round(2).to_s + "); "
|
|
@@ -143,7 +143,7 @@ libs.each do |lib|
|
|
|
143
143
|
seq_basename.gsub!(/\_P17/i, "_CA")
|
|
144
144
|
region = version_config.query_region(region_name.to_s)
|
|
145
145
|
|
|
146
|
-
puts "
|
|
146
|
+
puts "processing region: " + region.region
|
|
147
147
|
|
|
148
148
|
sh = ViralSeq::SeqHash.fa(path_to_file)
|
|
149
149
|
|
data/lib/viral_seq/hivdr.rb
CHANGED
|
@@ -492,7 +492,7 @@ module ViralSeq
|
|
|
492
492
|
|
|
493
493
|
self.dna_hash.each do |k,v|
|
|
494
494
|
r1_seqs[k] = v[0,r1_length]
|
|
495
|
-
r2_seqs[k] = v[
|
|
495
|
+
r2_seqs[k] = v[-r2_length..-1] # to ensure the length from the end. Sometimes the platform will return sequence with one extra base.
|
|
496
496
|
end
|
|
497
497
|
|
|
498
498
|
r1_sh = ViralSeq::SeqHash.new(r1_seqs)
|
data/lib/viral_seq/seq_hash.rb
CHANGED
|
@@ -1440,8 +1440,10 @@ module ViralSeq
|
|
|
1440
1440
|
def position_helper(position)
|
|
1441
1441
|
if position.is_a?(Range)
|
|
1442
1442
|
return position
|
|
1443
|
-
elsif position.is_a?(Integer)
|
|
1443
|
+
elsif position.is_a?(Integer) && position > 0
|
|
1444
1444
|
return position..position
|
|
1445
|
+
elsif position.is_a?(Integer) && position == 0
|
|
1446
|
+
return 0..1000000
|
|
1445
1447
|
elsif position.is_a?(String)
|
|
1446
1448
|
return position.to_i..position.to_i
|
|
1447
1449
|
elsif position.is_a?(Array)
|
|
@@ -250,7 +250,7 @@ module ViralSeq
|
|
|
250
250
|
|
|
251
251
|
private
|
|
252
252
|
# determine overlap size from @dna_hash
|
|
253
|
-
def determine_overlap_pid_pair(seq_pair_hash, diff = 0.
|
|
253
|
+
def determine_overlap_pid_pair(seq_pair_hash, diff = 0.02)
|
|
254
254
|
overlaps = []
|
|
255
255
|
seq_pair_hash.each do |_seq_name, seq_pair|
|
|
256
256
|
overlap_list = []
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
|
@@ -65,7 +65,7 @@ module ViralSeq
|
|
|
65
65
|
:TCS_QC=>true,
|
|
66
66
|
:ref_genome=>"HXB2",
|
|
67
67
|
:ref_start=>6585,
|
|
68
|
-
:ref_end=>
|
|
68
|
+
:ref_end=>7208,
|
|
69
69
|
:indel=>true,
|
|
70
70
|
:trim=>false},
|
|
71
71
|
{:region=>"CA",
|
|
@@ -146,7 +146,7 @@ module ViralSeq
|
|
|
146
146
|
:TCS_QC=>true,
|
|
147
147
|
:ref_genome=>"HXB2",
|
|
148
148
|
:ref_start=>6585,
|
|
149
|
-
:ref_end=>
|
|
149
|
+
:ref_end=>7208,
|
|
150
150
|
:indel=>true,
|
|
151
151
|
:trim=>false},
|
|
152
152
|
{:region=>"CA",
|
|
@@ -226,7 +226,7 @@ module ViralSeq
|
|
|
226
226
|
:TCS_QC=>true,
|
|
227
227
|
:ref_genome=>"HXB2",
|
|
228
228
|
:ref_start=>6585,
|
|
229
|
-
:ref_end=>
|
|
229
|
+
:ref_end=>7208,
|
|
230
230
|
:indel=>true,
|
|
231
231
|
:trim=>false},
|
|
232
232
|
{:region=>"CA",
|
|
@@ -307,7 +307,7 @@ module ViralSeq
|
|
|
307
307
|
:TCS_QC=>true,
|
|
308
308
|
:ref_genome=>"HXB2",
|
|
309
309
|
:ref_start=>6585,
|
|
310
|
-
:ref_end=>
|
|
310
|
+
:ref_end=>7208,
|
|
311
311
|
:indel=>true,
|
|
312
312
|
:trim=>false},
|
|
313
313
|
{:region=>"CA",
|
data/lib/viral_seq/tcs_json.rb
CHANGED
|
@@ -101,10 +101,10 @@ module ViralSeq
|
|
|
101
101
|
|
|
102
102
|
data[:ref_genome] = get_ref
|
|
103
103
|
|
|
104
|
-
print "reference 5'end ref position or
|
|
104
|
+
print "reference 5'end ref position or position range, 0 if no need to match this end \n> "
|
|
105
105
|
data[:ref_start] = gets.chomp.rstrip.to_i
|
|
106
106
|
|
|
107
|
-
print "reference 3'end ref position or
|
|
107
|
+
print "reference 3'end ref position or position range: 0 if no need to match this end \n> "
|
|
108
108
|
data[:ref_end] = gets.chomp.rstrip.to_i
|
|
109
109
|
|
|
110
110
|
print "allow indels? (default as yes) Y/N \n> "
|