viral_seq 1.10.0 → 1.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d940e5f465cba40def34166fe50e0a21b1c62a1fff8e0be8abdabb7b4c4aab77
4
- data.tar.gz: 7e4be6ec82d9081a1ea3130eed49dcaac080608e481c7520b43c2e58a50e379d
3
+ metadata.gz: 727ed9feb929e077b25cfd2a83661dccb88f1fd8c503635f14c948d71f6f2519
4
+ data.tar.gz: 6f1a69d0abeeabcad8107c3bbe515e9f909cd46b6732cdff66827ff4993e7efd
5
5
  SHA512:
6
- metadata.gz: 15805b09c96b6d1bff023a82948f23ceb584c60ffb21b85e59d6f4ddc2e2394045a29788a7c5811c714afedfae6405c36b88e0bcadce0d1408068418c497e596
7
- data.tar.gz: '0871676e5ee49fa14f84ec3c109172d964efac18f3f104ec38ad52daa69b9ef85a935c35ca2377d261b38edc5d8d438469b2360ec791a902116f60c8daeef5c2'
6
+ metadata.gz: a2a7dd1af8b9022f2b6cadec96be4f49985681d6d98e2b152fc2a12844573218eff17cf6db089ebacb84ea5284c47be254981ddf406e22989647eb4c07fb3285
7
+ data.tar.gz: 024f6066ecec3c5caed0277e10f0aaba596b3d95487e342885a325a1f9bfec7feb1c777eec2a4fbe26c2eb0ad55c8d437c6df6511a5c1c523d43a5d089eeef91
data/.gitignore CHANGED
@@ -11,3 +11,5 @@
11
11
 
12
12
  # gem files
13
13
  *.gem
14
+
15
+ .DS_Store
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.10.1)
4
+ viral_seq (1.10.0)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
7
  muscle_bio (= 0.4)
data/README.md CHANGED
@@ -191,7 +191,16 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
191
191
 
192
192
  ## Updates
193
193
 
194
- ### Version-1.10.1
194
+ ### Version-1.10.3-12112025
195
+
196
+ 1. Bug fix for SDRM pipeline.
197
+ 2. Add a function in `locator` tool to export sequences in positive sense direction.
198
+
199
+ ### Version-1.10.2-07210225
200
+
201
+ 1. Fixed a bug processing parameters for HIV sequence QC.
202
+
203
+ ### Version-1.10.1-05012025
195
204
 
196
205
  1. Added quality filter for Illumina 2-color sequencing platforms (filter poly-G and poly-C)
197
206
  2. Replaced `MuscleBio` with [`VirustLocator`]("https://github.com/ViralSeq/virust-locator-ruby") for faster and more accurate pairwise alignment.
data/bin/locator CHANGED
@@ -76,6 +76,10 @@ begin
76
76
  else
77
77
  csv_file = seq_file + ".csv"
78
78
  end
79
+
80
+ fasta_file = csv_file.sub(/\.csv$/i, '.direction.fasta')
81
+
82
+ fasta_handle = File.open(fasta_file, 'w')
79
83
 
80
84
  unless File.exist?(seq_file)
81
85
  raise StandardError.new("Input file sequence file not found".red.bold)
@@ -92,9 +96,19 @@ begin
92
96
  locs = seqs.loc(opt)
93
97
  head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
94
98
  locs.unshift(head)
99
+
100
+ directional_fasta = []
95
101
  data = CSV.generate do |csv|
96
- locs.each {|loc| csv << loc}
102
+ locs.each do |loc|
103
+ csv << loc
104
+ directional_fasta << loc[1]
105
+ directional_fasta << loc[8].tr("-", "") # remove gaps for aligned_input
106
+ end
107
+ end
108
+ directional_fasta[2..-1].each do |line|
109
+ fasta_handle.puts line
97
110
  end
111
+ fasta_handle.close
98
112
 
99
113
  File.write(csv_file, data)
100
114
  puts "Output file found at #{csv_file.green.bold}"
data/bin/tcs_sdrm CHANGED
@@ -12,10 +12,10 @@
12
12
  # ├── lib1_IN
13
13
  # ├── lib1_V1V3
14
14
  # ├── lib2
15
- # ├── lib1_RT
16
- # ├── lib1_PR
17
- # ├── lib1_IN
18
- # ├── lib1_V1V3
15
+ # ├── lib2_RT
16
+ # ├── lib2_PR
17
+ # ├── lib2_IN
18
+ # ├── lib2_V1V3
19
19
  # ├── ...
20
20
  #
21
21
  # output data in a new dir as 'libs_dir_SDRM'
@@ -37,7 +37,7 @@ r_version = ViralSeq::R.check_R
37
37
  ViralSeq::R.check_R_packages
38
38
 
39
39
  def abstract_line(data)
40
- return_data = data[3] + data[2] + data[4] + ":" +
40
+ data[3] + data[2] + data[4] + ":" +
41
41
  (data[6].to_f * 100).round(2).to_s + "(" +
42
42
  (data[7].to_f * 100).round(2).to_s + "-" +
43
43
  (data[8].to_f * 100).round(2).to_s + "); "
@@ -143,7 +143,7 @@ libs.each do |lib|
143
143
  seq_basename.gsub!(/\_P17/i, "_CA")
144
144
  region = version_config.query_region(region_name.to_s)
145
145
 
146
- puts "prcessing region: " + region.region
146
+ puts "processing region: " + region.region
147
147
 
148
148
  sh = ViralSeq::SeqHash.fa(path_to_file)
149
149
 
@@ -492,7 +492,7 @@ module ViralSeq
492
492
 
493
493
  self.dna_hash.each do |k,v|
494
494
  r1_seqs[k] = v[0,r1_length]
495
- r2_seqs[k] = v[r1_length, r2_length]
495
+ r2_seqs[k] = v[-r2_length..-1] # to ensure the length from the end. Sometimes the platform will return sequence with one extra base.
496
496
  end
497
497
 
498
498
  r1_sh = ViralSeq::SeqHash.new(r1_seqs)
@@ -106,7 +106,7 @@ module ViralSeq
106
106
  )
107
107
 
108
108
  formatted_text(
109
- text_format2("P17", log[:pi_P17], log[:dist20_P17], log[:tcs_P17])
109
+ text_format2("CA", log[:pi_CA], log[:dist20_CA], log[:tcs_CA])
110
110
  )
111
111
 
112
112
  move_down 30
@@ -1440,8 +1440,10 @@ module ViralSeq
1440
1440
  def position_helper(position)
1441
1441
  if position.is_a?(Range)
1442
1442
  return position
1443
- elsif position.is_a?(Integer)
1443
+ elsif position.is_a?(Integer) && position > 0
1444
1444
  return position..position
1445
+ elsif position.is_a?(Integer) && position == 0
1446
+ return 0..1000000
1445
1447
  elsif position.is_a?(String)
1446
1448
  return position.to_i..position.to_i
1447
1449
  elsif position.is_a?(Array)
@@ -250,7 +250,7 @@ module ViralSeq
250
250
 
251
251
  private
252
252
  # determine overlap size from @dna_hash
253
- def determine_overlap_pid_pair(seq_pair_hash, diff = 0.0)
253
+ def determine_overlap_pid_pair(seq_pair_hash, diff = 0.02)
254
254
  overlaps = []
255
255
  seq_pair_hash.each do |_seq_name, seq_pair|
256
256
  overlap_list = []
@@ -65,7 +65,7 @@ module ViralSeq
65
65
  :TCS_QC=>true,
66
66
  :ref_genome=>"HXB2",
67
67
  :ref_start=>6585,
68
- :ref_end=>7205..7210,
68
+ :ref_end=>7208,
69
69
  :indel=>true,
70
70
  :trim=>false},
71
71
  {:region=>"CA",
@@ -146,7 +146,7 @@ module ViralSeq
146
146
  :TCS_QC=>true,
147
147
  :ref_genome=>"HXB2",
148
148
  :ref_start=>6585,
149
- :ref_end=>7205..7210,
149
+ :ref_end=>7208,
150
150
  :indel=>true,
151
151
  :trim=>false},
152
152
  {:region=>"CA",
@@ -226,7 +226,7 @@ module ViralSeq
226
226
  :TCS_QC=>true,
227
227
  :ref_genome=>"HXB2",
228
228
  :ref_start=>6585,
229
- :ref_end=>7205..7210,
229
+ :ref_end=>7208,
230
230
  :indel=>true,
231
231
  :trim=>false},
232
232
  {:region=>"CA",
@@ -307,7 +307,7 @@ module ViralSeq
307
307
  :TCS_QC=>true,
308
308
  :ref_genome=>"HXB2",
309
309
  :ref_start=>6585,
310
- :ref_end=>7205..7210,
310
+ :ref_end=>7208,
311
311
  :indel=>true,
312
312
  :trim=>false},
313
313
  {:region=>"CA",
@@ -101,10 +101,10 @@ module ViralSeq
101
101
 
102
102
  data[:ref_genome] = get_ref
103
103
 
104
- print "reference 5'end ref position or posiiton range, 0 if no need to match this end \n> "
104
+ print "reference 5'end ref position or position range, 0 if no need to match this end \n> "
105
105
  data[:ref_start] = gets.chomp.rstrip.to_i
106
106
 
107
- print "reference 3'end ref position or posiiton range: 0 if no need to match this end \n> "
107
+ print "reference 3'end ref position or position range: 0 if no need to match this end \n> "
108
108
  data[:ref_end] = gets.chomp.rstrip.to_i
109
109
 
110
110
  print "allow indels? (default as yes) Y/N \n> "