viral_seq 1.0.5 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -4
- data/README.md +110 -38
- data/bin/locator +31 -9
- data/bin/tcs +450 -0
- data/lib/viral_seq.rb +4 -1
- data/lib/viral_seq/hash.rb +1 -1
- data/lib/viral_seq/hivdr.rb +2 -0
- data/lib/viral_seq/muscle.rb +2 -2
- data/lib/viral_seq/seq_hash.rb +220 -41
- data/lib/viral_seq/seq_hash_pair.rb +16 -6
- data/lib/viral_seq/tcs_core.rb +303 -0
- data/lib/viral_seq/tcs_json.rb +178 -0
- data/lib/viral_seq/version.rb +2 -1
- data/viral_seq.gemspec +5 -1
- metadata +23 -5
| @@ -0,0 +1,303 @@ | |
| 1 | 
            +
            module ViralSeq
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              # Core functions for `tcs` pipeline
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              class TcsCore
         | 
| 6 | 
            +
                class << self
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  # methods to calculate TCS consensus cut-off based on the maximum numbers of PIDs and platform error rate.
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  def calculate_cut_off(m, error_rate = 0.02)
         | 
| 11 | 
            +
                    n = 0
         | 
| 12 | 
            +
                    case error_rate
         | 
| 13 | 
            +
                    when 0.005...0.015
         | 
| 14 | 
            +
                      if m <= 10
         | 
| 15 | 
            +
                        n = 2
         | 
| 16 | 
            +
                      else
         | 
| 17 | 
            +
                        n = 1.09*10**-26*m**6 + 7.82*10**-22*m**5 - 1.93*10**-16*m**4 + 1.01*10**-11*m**3 - 2.31*10**-7*m**2 + 0.00645*m + 2.872
         | 
| 18 | 
            +
                      end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                    when 0...0.005
         | 
| 21 | 
            +
                      if m <= 10
         | 
| 22 | 
            +
                        n = 2
         | 
| 23 | 
            +
                      else
         | 
| 24 | 
            +
                        n = -9.59*10**-27*m**6 + 3.27*10**-21*m**5 - 3.05*10**-16*m**4 + 1.2*10**-11*m**3 - 2.19*10**-7*m**2 + 0.004044*m + 2.273
         | 
| 25 | 
            +
                      end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    else
         | 
| 28 | 
            +
                      if m <= 10
         | 
| 29 | 
            +
                        n = 2
         | 
| 30 | 
            +
                      elsif m <= 8500
         | 
| 31 | 
            +
                        n = -1.24*10**-21*m**6 + 3.53*10**-17*m**5 - 3.90*10**-13*m**4 + 2.12*10**-9*m**3 - 6.06*10**-6*m**2 + 1.80*10**-2*m + 3.15
         | 
| 32 | 
            +
                      else
         | 
| 33 | 
            +
                        n = 0.0079 * m + 9.4869
         | 
| 34 | 
            +
                      end
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    n = n.round
         | 
| 38 | 
            +
                    n = 2 if n < 3
         | 
| 39 | 
            +
                    return n
         | 
| 40 | 
            +
                  end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                  # identify which file in the directory is R1 file, and which is R2 file based on file names
         | 
| 43 | 
            +
                  # input as directory (Dir object or a string of path)
         | 
| 44 | 
            +
                  # by default, .gz files will be unzipped.
         | 
| 45 | 
            +
                  # return as an hash of {r1_file: file1, r1_file: file2}
         | 
| 46 | 
            +
                  def r1r2(directory, unzip = true)
         | 
| 47 | 
            +
                    files = []
         | 
| 48 | 
            +
                    Dir.chdir(directory) { files = Dir.glob "*" }
         | 
| 49 | 
            +
                    r1_file = ""
         | 
| 50 | 
            +
                    r2_file = ""
         | 
| 51 | 
            +
                    files.each do |f|
         | 
| 52 | 
            +
                      tag = parser_file_name(f)[:tag]
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                      if tag.include? "R1"
         | 
| 55 | 
            +
                        unzip ? r1_file = unzip_r(directory, f) : r1_file = File.join(directory, f)
         | 
| 56 | 
            +
                      elsif tag.include? "R2"
         | 
| 57 | 
            +
                        unzip ? r2_file = unzip_r(directory, f) : r2_file = File.join(directory, f)
         | 
| 58 | 
            +
                      end
         | 
| 59 | 
            +
                    end
         | 
| 60 | 
            +
                    return { r1_file: r1_file, r2_file: r2_file }
         | 
| 61 | 
            +
                  end # end of ViralSeq:TcsCore.r1r2
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                  # sort directories containing mulitple r1 and r2 files.
         | 
| 64 | 
            +
                  # use the library name (first string before "_") to seperate libraries
         | 
| 65 | 
            +
                  # out_dir is the Dir object or string of the output directory, by default named as directory + "_sorted"
         | 
| 66 | 
            +
                  # return a hash as { with_both_r1_r2: [lib1, lib2, ...], missing_r1: [lib1, lib2, ...], missing_r2: [lib1, lib2, ...], error: [lib1, lib2, ...]}
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                  def sort_by_lib(directory, out_dir = directory + "_sorted")
         | 
| 69 | 
            +
                    Dir.mkdir(out_dir) unless File.directory?(out_dir)
         | 
| 70 | 
            +
                    files = []
         | 
| 71 | 
            +
                    Dir.chdir(directory) {files = Dir.glob("*")}
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    files.each do |file|
         | 
| 74 | 
            +
                      path = File.join(directory,file)
         | 
| 75 | 
            +
                      index = file.split("_")[0]
         | 
| 76 | 
            +
                      index_dir = File.join(out_dir, index)
         | 
| 77 | 
            +
                      Dir.mkdir(index_dir) unless File.directory?(index_dir)
         | 
| 78 | 
            +
                      File.rename(path, File.join(index_dir, file))
         | 
| 79 | 
            +
                    end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    return_obj = { with_both_r1_r2: [],
         | 
| 82 | 
            +
                                   missing_r1: [],
         | 
| 83 | 
            +
                                   missing_r2: [],
         | 
| 84 | 
            +
                                   error: []
         | 
| 85 | 
            +
                                  }
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    libs = []
         | 
| 88 | 
            +
                    Dir.chdir(out_dir) { libs = Dir.glob('*') }
         | 
| 89 | 
            +
                    libs.each do |lib|
         | 
| 90 | 
            +
                      file_check = ViralSeq::TcsCore.r1r2(File.join(out_dir, lib))
         | 
| 91 | 
            +
                      if !file_check[:r1_file].empty? and !file_check[:r2_file].empty?
         | 
| 92 | 
            +
                        return_obj[:with_both_r1_r2] << lib
         | 
| 93 | 
            +
                      elsif file_check[:r1_file].empty? and !file_check[:r2_file].empty?
         | 
| 94 | 
            +
                        return_obj[:missing_r1] << lib
         | 
| 95 | 
            +
                      elsif file_check[:r2_file].empty? and !file_check[:r1_file].empty?
         | 
| 96 | 
            +
                        return_obj[:missing_r2] << lib
         | 
| 97 | 
            +
                      else
         | 
| 98 | 
            +
                        return_obj[:error] << lib
         | 
| 99 | 
            +
                      end
         | 
| 100 | 
            +
                    end
         | 
| 101 | 
            +
                    return return_obj
         | 
| 102 | 
            +
                  end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                  # sort array of file names to determine if there is potential errors
         | 
| 105 | 
            +
                  # input name_array array of file names
         | 
| 106 | 
            +
                  # output hash { }
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                  def validate_file_name(name_array)
         | 
| 109 | 
            +
                    errors = { file_type_error: [] ,
         | 
| 110 | 
            +
                               missing_r1_file: [] ,
         | 
| 111 | 
            +
                               missing_r2_file: [] ,
         | 
| 112 | 
            +
                               extra_r1_r2_file: [],
         | 
| 113 | 
            +
                               no_region_tag: [] ,
         | 
| 114 | 
            +
                               multiple_region_tag: []}
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    passed_libs = {}
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    name_with_r1_r2 = []
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                    name_array.each do |name|
         | 
| 121 | 
            +
                      tag = parser_file_name(name)[:tag]
         | 
| 122 | 
            +
                      if name !~ /\.fastq\Z|\.fastq\.gz\Z/
         | 
| 123 | 
            +
                        errors[:file_type_error] << name
         | 
| 124 | 
            +
                      elsif tag.count("R1") == 0 and tag.count("R2") == 0
         | 
| 125 | 
            +
                        errors[:no_region_tag] << name
         | 
| 126 | 
            +
                      elsif tag.count("R1") > 0 and tag.count("R2") > 0
         | 
| 127 | 
            +
                        errors[:multiple_region_tag] << name
         | 
| 128 | 
            +
                      elsif tag.count("R1") > 1 or tag.count("R2") > 1
         | 
| 129 | 
            +
                        errors[:multiple_region_tag] << name
         | 
| 130 | 
            +
                      else
         | 
| 131 | 
            +
                        name_with_r1_r2 << name
         | 
| 132 | 
            +
                      end
         | 
| 133 | 
            +
                    end
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    libs = {}
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    name_with_r1_r2.map do |name|
         | 
| 138 | 
            +
                      libname = parser_file_name(name)[:libname]
         | 
| 139 | 
            +
                      libs[libname] ||= []
         | 
| 140 | 
            +
                      libs[libname] << name
         | 
| 141 | 
            +
                    end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                    libs.each do |libname, files|
         | 
| 144 | 
            +
                      count_r1_file = 0
         | 
| 145 | 
            +
                      count_r2_file = 0
         | 
| 146 | 
            +
                      files.each do |name|
         | 
| 147 | 
            +
                        tag = parser_file_name(name)[:tag]
         | 
| 148 | 
            +
                        if tag.include? "R1"
         | 
| 149 | 
            +
                          count_r1_file += 1
         | 
| 150 | 
            +
                        elsif tag.include? "R2"
         | 
| 151 | 
            +
                          count_r2_file += 1
         | 
| 152 | 
            +
                        end
         | 
| 153 | 
            +
                      end
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                      if count_r1_file > 1 or count_r2_file > 1
         | 
| 156 | 
            +
                        errors[:extra_r1_r2_file] += files
         | 
| 157 | 
            +
                      elsif count_r1_file.zero?
         | 
| 158 | 
            +
                        errors[:missing_r1_file] += files
         | 
| 159 | 
            +
                      elsif count_r2_file.zero?
         | 
| 160 | 
            +
                        errors[:missing_r2_file] += files
         | 
| 161 | 
            +
                      else
         | 
| 162 | 
            +
                        passed_libs[libname] = files
         | 
| 163 | 
            +
                      end
         | 
| 164 | 
            +
                    end
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                    passed_names = []
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                    passed_libs.values.each { |names| passed_names += names}
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                    if passed_names.size < name_array.size
         | 
| 171 | 
            +
                      pass = false
         | 
| 172 | 
            +
                    else
         | 
| 173 | 
            +
                      pass = true
         | 
| 174 | 
            +
                    end
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                    return { errors: errors, all_pass: pass, passed_names: passed_names, passed_libs: passed_libs }
         | 
| 177 | 
            +
                  end
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                  # filter r1 raw sequences for non-specific primers.
         | 
| 180 | 
            +
                  # input r1_sh, SeqHash obj.
         | 
| 181 | 
            +
                  # return filtered Hash of sequence name and seq pair, in the object { r1_filtered_seq: r1_filtered_seq_pair }
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                  def filter_r1(r1_sh, forward_primer)
         | 
| 184 | 
            +
                    if forward_primer.match(/(N+)(\w+)$/)
         | 
| 185 | 
            +
                      forward_n = $1.size
         | 
| 186 | 
            +
                      forward_bio_primer = $2
         | 
| 187 | 
            +
                    else
         | 
| 188 | 
            +
                      forward_n = 0
         | 
| 189 | 
            +
                      forward_bio_primer = forward_primer
         | 
| 190 | 
            +
                    end
         | 
| 191 | 
            +
                    forward_bio_primer_size = forward_bio_primer.size
         | 
| 192 | 
            +
                    forward_starting_number = forward_n + forward_bio_primer_size
         | 
| 193 | 
            +
                    forward_primer_ref = forward_bio_primer.nt_parser
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    r1_passed_seq = {}
         | 
| 196 | 
            +
                    r1_raw = r1_sh.dna_hash
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    proc_filter = proc do |name|
         | 
| 199 | 
            +
                      seq = r1_raw[name]
         | 
| 200 | 
            +
                      next unless general_filter seq
         | 
| 201 | 
            +
                      primer_region_seq = seq[forward_n, forward_bio_primer_size]
         | 
| 202 | 
            +
                      if primer_region_seq =~ forward_primer_ref
         | 
| 203 | 
            +
                        new_name = remove_tag name
         | 
| 204 | 
            +
                        r1_passed_seq[new_name] = seq
         | 
| 205 | 
            +
                      end
         | 
| 206 | 
            +
                    end
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    r1_raw.keys.map do |name|
         | 
| 209 | 
            +
                      proc_filter.call name
         | 
| 210 | 
            +
                    end
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                    return { r1_passed_seq: r1_passed_seq, forward_starting_number: forward_starting_number }
         | 
| 213 | 
            +
                  end # end of filter_r1
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                  # filter r2 raw sequences for non-specific primers.
         | 
| 216 | 
            +
                  # input r2_sh, SeqHash obj.
         | 
| 217 | 
            +
                  # return filtered Hash of sequence name and seq pair, as well as the length of PID.
         | 
| 218 | 
            +
                  def filter_r2(r2_sh, cdna_primer)
         | 
| 219 | 
            +
                    r2_raw = r2_sh.dna_hash
         | 
| 220 | 
            +
                    cdna_primer.match(/(N+)(\w+)$/)
         | 
| 221 | 
            +
                    pid_length = $1.size
         | 
| 222 | 
            +
                    cdna_bio_primer = $2
         | 
| 223 | 
            +
                    cdna_bio_primer_size = cdna_bio_primer.size
         | 
| 224 | 
            +
                    reverse_starting_number = pid_length + cdna_bio_primer_size
         | 
| 225 | 
            +
                    cdna_primer_ref = cdna_bio_primer.nt_parser
         | 
| 226 | 
            +
                    r2_passed_seq = {}
         | 
| 227 | 
            +
                    proc_filter = proc do |name|
         | 
| 228 | 
            +
                      seq = r2_raw[name]
         | 
| 229 | 
            +
                      next unless general_filter seq
         | 
| 230 | 
            +
                      primer_region_seq = seq[pid_length, cdna_bio_primer_size]
         | 
| 231 | 
            +
                      if primer_region_seq =~ cdna_primer_ref
         | 
| 232 | 
            +
                        new_name = remove_tag name
         | 
| 233 | 
            +
                        r2_passed_seq[new_name] = seq
         | 
| 234 | 
            +
                      end
         | 
| 235 | 
            +
                    end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    r2_raw.keys.map do |name|
         | 
| 238 | 
            +
                      proc_filter.call name
         | 
| 239 | 
            +
                    end
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                    return { r2_passed_seq: r2_passed_seq, pid_length: pid_length, reverse_starting_number: reverse_starting_number }
         | 
| 242 | 
            +
                  end # end of filter_r2
         | 
| 243 | 
            +
             | 
| 244 | 
            +
             | 
| 245 | 
            +
             | 
| 246 | 
            +
                  # puts error message in the log file handler, and abort with the same infor
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                  def log_and_abort(log, infor)
         | 
| 249 | 
            +
                    log.puts Time.now.to_s + "\t" + infor
         | 
| 250 | 
            +
                    log.close
         | 
| 251 | 
            +
                    abort infor.red.bold
         | 
| 252 | 
            +
                  end
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                  private
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                  def unzip_r(indir, f)
         | 
| 257 | 
            +
                    r_file = File.join(indir, f)
         | 
| 258 | 
            +
                    if f =~ /.gz/
         | 
| 259 | 
            +
                      `gzip -d #{r_file}`
         | 
| 260 | 
            +
                      new_f = f.sub ".gz", ""
         | 
| 261 | 
            +
                      r_file = File.join(indir, new_f)
         | 
| 262 | 
            +
                    end
         | 
| 263 | 
            +
                    return r_file
         | 
| 264 | 
            +
                  end
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                  def parser_file_name(file_name)
         | 
| 267 | 
            +
                    t = file_name.split(".")[0].split("_")
         | 
| 268 | 
            +
                    if t.size == 1
         | 
| 269 | 
            +
                      libname = "lib"
         | 
| 270 | 
            +
                      tag = [ t[0].upcase ]
         | 
| 271 | 
            +
                    else
         | 
| 272 | 
            +
                      libname = t[0]
         | 
| 273 | 
            +
                      tag = t[1..-1].map(&:upcase)
         | 
| 274 | 
            +
                    end
         | 
| 275 | 
            +
                    return {libname: libname, tag: tag}
         | 
| 276 | 
            +
                  end
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                  def general_filter(seq)
         | 
| 279 | 
            +
                    if seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
         | 
| 280 | 
            +
                      return false
         | 
| 281 | 
            +
                    elsif seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
         | 
| 282 | 
            +
                      return false
         | 
| 283 | 
            +
                    elsif seq =~ /T{11}/ # a string of poly-T indicates adaptor sequence
         | 
| 284 | 
            +
                      return false
         | 
| 285 | 
            +
                    else
         | 
| 286 | 
            +
                      return true
         | 
| 287 | 
            +
                    end
         | 
| 288 | 
            +
                  end
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                  # remove region info tags from the raw MiSeq sequences.
         | 
| 291 | 
            +
                  def remove_tag(seq_name)
         | 
| 292 | 
            +
                    if seq_name =~ /\s/
         | 
| 293 | 
            +
                      new_tag = $`
         | 
| 294 | 
            +
                    else
         | 
| 295 | 
            +
                      new_tag = seq_name[0..-3]
         | 
| 296 | 
            +
                    end
         | 
| 297 | 
            +
                  end
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                end # end of class << self
         | 
| 300 | 
            +
             | 
| 301 | 
            +
              end # end of TcsCore module
         | 
| 302 | 
            +
             | 
| 303 | 
            +
            end # end of main module
         | 
| @@ -0,0 +1,178 @@ | |
| 1 | 
            +
            module ViralSeq
         | 
| 2 | 
            +
              class TcsJson
         | 
| 3 | 
            +
                class << self
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                  def generate
         | 
| 6 | 
            +
                    puts '-'*58
         | 
| 7 | 
            +
                    puts '| JSON Parameter Generator for ' + "TCS #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
         | 
| 8 | 
            +
                    puts '-'*58 + "\n"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                    param = {}
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                    puts 'Enter the path to the directory that contains the MiSeq pair-end R1 and R2 .fastq or .fastq.gz file'
         | 
| 13 | 
            +
                    print '> '
         | 
| 14 | 
            +
                    param[:raw_sequence_dir] = gets.chomp.rstrip
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    puts 'Enter the estimated platform error rate (for TCS cut-off calculation), default as ' + '0.02'.red.bold
         | 
| 17 | 
            +
                    print '> '
         | 
| 18 | 
            +
                    input_error = gets.chomp.rstrip.to_f
         | 
| 19 | 
            +
                    if input_error == 0.0
         | 
| 20 | 
            +
                      param[:platform_error_rate] = 0.02
         | 
| 21 | 
            +
                    else
         | 
| 22 | 
            +
                      param[:platform_error_rate] = input_error
         | 
| 23 | 
            +
                    end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    param[:primer_pairs] = []
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    loop do
         | 
| 28 | 
            +
                      data = {}
         | 
| 29 | 
            +
                      puts "Enter the name for the sequenced region: "
         | 
| 30 | 
            +
                      print '> '
         | 
| 31 | 
            +
                      data[:region] = gets.chomp.rstrip
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                      puts "Enter the #{"cDNA".red.bold} primer sequence: "
         | 
| 34 | 
            +
                      print '> '
         | 
| 35 | 
            +
                      data[:cdna] = gets.chomp.rstrip
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                      puts "Enter the #{"forward".blue.bold} primer sequence: "
         | 
| 38 | 
            +
                      print '> '
         | 
| 39 | 
            +
                      data[:forward] = gets.chomp.rstrip
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                      puts "Enter supermajority cut-off (0.5 - 1.0). Default Simple Majority"
         | 
| 42 | 
            +
                      print '> '
         | 
| 43 | 
            +
                      mj = gets.chomp.rstrip.to_f
         | 
| 44 | 
            +
                      if (0.5..1.0).include?(mj)
         | 
| 45 | 
            +
                        data[:majority] = mj
         | 
| 46 | 
            +
                      else
         | 
| 47 | 
            +
                        data[:majority] = 0
         | 
| 48 | 
            +
                      end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                      print "Need end-join? Y/N \n> "
         | 
| 51 | 
            +
                      ej = gets.chomp.rstrip
         | 
| 52 | 
            +
                      if ej =~ /y|yes/i
         | 
| 53 | 
            +
                        data[:end_join] = true
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                        print "End-join option? Choose from (1-4):\n
         | 
| 56 | 
            +
                        1: simple join, no overlap
         | 
| 57 | 
            +
                        2: known overlap \n
         | 
| 58 | 
            +
                        3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap\n
         | 
| 59 | 
            +
                        4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap\n
         | 
| 60 | 
            +
                        > "
         | 
| 61 | 
            +
                        ej_option = gets.chomp.rstrip
         | 
| 62 | 
            +
                        while ![1,2,3,4].include?(ej_option.to_i)
         | 
| 63 | 
            +
                          puts "Entered end-join option #{ej_option.red.bold} not valid (choose 1-4), try again"
         | 
| 64 | 
            +
                          ej_option = gets.chomp.rstrip.to_i
         | 
| 65 | 
            +
                        end
         | 
| 66 | 
            +
                        case ej_option.to_i
         | 
| 67 | 
            +
                        when 1
         | 
| 68 | 
            +
                          data[:end_join_option] = 1
         | 
| 69 | 
            +
                          data[:overlap] = 0
         | 
| 70 | 
            +
                        when 2
         | 
| 71 | 
            +
                          data[:end_join_option] = 1
         | 
| 72 | 
            +
                          print "overlap bases: \n> "
         | 
| 73 | 
            +
                          ol = gets.chomp.rstrip.to_i
         | 
| 74 | 
            +
                          data[:overlap] = ol
         | 
| 75 | 
            +
                        when 3
         | 
| 76 | 
            +
                          data[:end_join_option] = 3
         | 
| 77 | 
            +
                        when 4
         | 
| 78 | 
            +
                          data[:end_join_option] = 4
         | 
| 79 | 
            +
                        end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                        print "Need QC for TCS? (support for HIV-1 and SIV)? Y/N \n> "
         | 
| 82 | 
            +
                        qc = gets.chomp.rstrip
         | 
| 83 | 
            +
                        if qc =~ /y|yes/i
         | 
| 84 | 
            +
                          data[:TCS_QC] = true
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                          data[:ref_genome] = get_ref
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                          print "reference 5'end ref position or posiiton range, 0 if no need to match this end \n> "
         | 
| 89 | 
            +
                          data[:ref_start] = gets.chomp.rstrip.to_i
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                          print "reference 3'end ref position or posiiton range: 0 if no need to match this end \n> "
         | 
| 92 | 
            +
                          data[:ref_end] = gets.chomp.rstrip.to_i
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                          print "allow indels? (default as yes) Y/N \n> "
         | 
| 95 | 
            +
                          indel = gets.chomp.rstrip
         | 
| 96 | 
            +
                          if indel =~ /n|no/i
         | 
| 97 | 
            +
                            data[:indel] = false
         | 
| 98 | 
            +
                          else
         | 
| 99 | 
            +
                            data[:indel] = true
         | 
| 100 | 
            +
                          end
         | 
| 101 | 
            +
                        else
         | 
| 102 | 
            +
                          data[:TCS_QC] = false
         | 
| 103 | 
            +
                        end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                        print "Need trimming to a reference genome? Y/N \n> "
         | 
| 106 | 
            +
                        trim_option = gets.chomp.rstrip
         | 
| 107 | 
            +
                        if trim_option =~ /y|yes/i
         | 
| 108 | 
            +
                          data[:trim] = true
         | 
| 109 | 
            +
                          data[:trim_ref] = get_ref
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                          print "reference 5'end ref position \n> "
         | 
| 112 | 
            +
                          data[:trim_ref_start] = gets.chomp.rstrip.to_i
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                          print "reference 3'end ref position \n> "
         | 
| 115 | 
            +
                          data[:trim_ref_end] = gets.chomp.rstrip.to_i
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                        else
         | 
| 118 | 
            +
                          data[:trim] = false
         | 
| 119 | 
            +
                        end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                      else
         | 
| 122 | 
            +
                        data[:end_join] = false
         | 
| 123 | 
            +
                      end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                      param[:primer_pairs] << data
         | 
| 126 | 
            +
                      print "Do you wish to conintue? Y/N \n> "
         | 
| 127 | 
            +
                      continue_sig = gets.chomp.rstrip
         | 
| 128 | 
            +
                      break unless continue_sig =~ /y|yes/i
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                    end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                    puts "\nYour JSON string is:"
         | 
| 133 | 
            +
                    puts JSON.pretty_generate(param)
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    print "\nDo you wish to save it as a file? Y/N \n> "
         | 
| 136 | 
            +
                    save_option = gets.chomp.rstrip
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                    if save_option =~ /y|yes/i
         | 
| 139 | 
            +
                      print "Path to save JSON file:\n> "
         | 
| 140 | 
            +
                      path = gets.chomp.rstrip
         | 
| 141 | 
            +
                      File.open(path, 'w') {|f| f.puts JSON.pretty_generate(param)}
         | 
| 142 | 
            +
                    end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    print "\nDo you wish to execute tcs pipeline with the input params now? Y/N \n> "
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                    rsp = gets.chomp.rstrip
         | 
| 147 | 
            +
                    if rsp =~ /y/i
         | 
| 148 | 
            +
                      return param
         | 
| 149 | 
            +
                    else
         | 
| 150 | 
            +
                      abort "Params json file generated. You can execute tcs pipeline using `tcs -p [params.json]`"
         | 
| 151 | 
            +
                    end
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                  end
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                  private
         | 
| 156 | 
            +
                  def get_ref
         | 
| 157 | 
            +
                    puts "Choose reference genome (1-3):"
         | 
| 158 | 
            +
                    puts "1. HIV-1 HXB2".red.bold
         | 
| 159 | 
            +
                    puts "2. HIV-1 NL4-3".blue.bold
         | 
| 160 | 
            +
                    puts "3. SIV MAC239".magenta.bold
         | 
| 161 | 
            +
                    print "> "
         | 
| 162 | 
            +
                    ref_option = gets.chomp.rstrip
         | 
| 163 | 
            +
                    while ![1,2,3].include?(ref_option.to_i)
         | 
| 164 | 
            +
                      print "Entered end-join option #{ref_option.to_s.red.bold} not valid (choose 1-3), try again\n> "
         | 
| 165 | 
            +
                      ref_option = gets.chomp.rstrip.to_i
         | 
| 166 | 
            +
                    end
         | 
| 167 | 
            +
                    ref = case ref_option.to_i
         | 
| 168 | 
            +
                          when 1
         | 
| 169 | 
            +
                            :HXB2
         | 
| 170 | 
            +
                          when 2
         | 
| 171 | 
            +
                            :NL43
         | 
| 172 | 
            +
                          when 3
         | 
| 173 | 
            +
                            :MAC239
         | 
| 174 | 
            +
                          end
         | 
| 175 | 
            +
                  end
         | 
| 176 | 
            +
                end
         | 
| 177 | 
            +
              end # end TcsJson
         | 
| 178 | 
            +
            end # end main module
         |