bacterial-annotator 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ba_raxml +3 -2
- data/bin/bacterial-annotator +35 -9
- data/lib/bacterial-annotator.rb +4 -0
- data/lib/bacterial-annotator/sequence-synteny.rb +2 -53
- data/lib/bacterial-comparator.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 713193368d710e1a3f78b0279657fab96ded5586
         | 
| 4 | 
            +
              data.tar.gz: 82a924362e32d116b029429a00e55a45aa3f8dc7
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 9d853a09c8cfbb5e9bfc4525acf4d222ed8c15d601c7b40c0f68d21c724419f773dbe354e161940a04138a59c75c49743165f290f0f4acc1e4a7b15a7d5b9680
         | 
| 7 | 
            +
              data.tar.gz: 3772047f7326e06f3cafd73f8776318001b3e53b446a92e144ec30725dd22fc41cb13c08a5f4cb4cc638bbc6b728f4c7af8b8375755f58b44ec42940f589cd6c
         | 
    
        data/bin/ba_raxml
    CHANGED
    
    | @@ -10,17 +10,18 @@ | |
| 10 10 | 
             
            require 'open-uri'
         | 
| 11 11 |  | 
| 12 12 | 
             
            ROOT_path = File.dirname(__FILE__)
         | 
| 13 | 
            -
            # raxml_url = https://github.com/stamatak/standard-RAxML/archive/ | 
| 13 | 
            +
            # raxml_url = https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip
         | 
| 14 14 | 
             
            # Install RAXML on the user system
         | 
| 15 15 | 
             
            def installRaxml
         | 
| 16 16 |  | 
| 17 17 | 
             
              begin
         | 
| 18 | 
            -
                resp = open("https://github.com/stamatak/standard-RAxML/archive/ | 
| 18 | 
            +
                resp = open("https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip")
         | 
| 19 19 | 
             
                open("#{ROOT_path}/master.zip", "wb") do |file|
         | 
| 20 20 | 
             
                  file.write(resp.read)
         | 
| 21 21 | 
             
                end
         | 
| 22 22 | 
             
                Dir.chdir("#{ROOT_path}/")
         | 
| 23 23 | 
             
                `unzip master.zip && rm master.zip`
         | 
| 24 | 
            +
                `mv standard-RAxML-8.2.11 standard-RAxML-master`
         | 
| 24 25 | 
             
                Dir.chdir("#{ROOT_path}/standard-RAxML-master")
         | 
| 25 26 | 
             
                `make -f Makefile.PTHREADS.gcc`
         | 
| 26 27 | 
             
                `rm *.o`
         | 
    
        data/bin/bacterial-annotator
    CHANGED
    
    | @@ -11,6 +11,21 @@ | |
| 11 11 | 
             
            require 'bacterial-annotator'
         | 
| 12 12 | 
             
            require 'bacterial-comparator'
         | 
| 13 13 |  | 
| 14 | 
            +
            VERSION = "0.6.1"
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            def print_version
         | 
| 17 | 
            +
              version = "Bacterial Annotator - Version #{VERSION}\n\n"
         | 
| 18 | 
            +
              version += "Packaged with : \n"
         | 
| 19 | 
            +
              version += "  -- Prodigal v2.6.2\n"
         | 
| 20 | 
            +
              version += "  -- BLAT standalone version\n"
         | 
| 21 | 
            +
              version += "  -- MAFFT v7.222\n"
         | 
| 22 | 
            +
              version += "  -- fasta36  v36.3.8d\n"
         | 
| 23 | 
            +
              version += "  -- RAxML v8.2.11\n"
         | 
| 24 | 
            +
              version += "  -- FastTree v2.1.10\n"
         | 
| 25 | 
            +
              puts version
         | 
| 26 | 
            +
            end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 14 29 |  | 
| 15 30 | 
             
            # Usage message to print to CLI
         | 
| 16 31 | 
             
            def usage
         | 
| @@ -29,6 +44,8 @@ bacterial-annotator [annotate | compare] [OPTIONS] | |
| 29 44 |  | 
| 30 45 | 
             
              --help/-h		Print this !
         | 
| 31 46 |  | 
| 47 | 
            +
              --version/-v  Version of the Bacterial-Annotator
         | 
| 48 | 
            +
             | 
| 32 49 | 
             
            OEM
         | 
| 33 50 |  | 
| 34 51 | 
             
            end
         | 
| @@ -47,7 +64,7 @@ annotate [OPTIONS] | |
| 47 64 | 
             
                --name/-n     <name> Sample name
         | 
| 48 65 |  | 
| 49 66 | 
             
              // MERGEM-based Annotation (Recommended)
         | 
| 50 | 
            -
                --db          MERGEM database directory
         | 
| 67 | 
            +
                --db/-d          <directory> MERGEM database directory
         | 
| 51 68 |  | 
| 52 69 | 
             
              // Reference-Based Annotation
         | 
| 53 70 | 
             
                --refgenome/-g	<GBK_ID> 	Provide a Genbank file or a Gbk Accession ID.
         | 
| @@ -56,8 +73,8 @@ annotate [OPTIONS] | |
| 56 73 | 
             
            			  Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
         | 
| 57 74 | 
             
            			  >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
         | 
| 58 75 | 
             
            			  >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
         | 
| 59 | 
            -
                --pidentity		<% identity> Minimum percentage identity to incorporate a CDS annotation [default=0. | 
| 60 | 
            -
                --pcoverage		<% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0. | 
| 76 | 
            +
                --pidentity		<% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
         | 
| 77 | 
            +
                --pcoverage		<% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
         | 
| 61 78 | 
             
            			             .. otherwise hint for a non-functional protein
         | 
| 62 79 | 
             
                --minlength		<length> Minimum contig length for annotation [default=500]
         | 
| 63 80 |  | 
| @@ -74,8 +91,8 @@ def parseOptions_annotate | |
| 74 91 |  | 
| 75 92 | 
             
              # default options
         | 
| 76 93 | 
             
              options[:outdir] = "BAnnotation"
         | 
| 77 | 
            -
              options[:pidentity] =  | 
| 78 | 
            -
              options[:pcoverage] =  | 
| 94 | 
            +
              options[:pidentity] = 80
         | 
| 95 | 
            +
              options[:pcoverage] = 80
         | 
| 79 96 | 
             
              options[:minlength] = 500
         | 
| 80 97 | 
             
              options[:meta] = 0
         | 
| 81 98 |  | 
| @@ -84,6 +101,10 @@ def parseOptions_annotate | |
| 84 101 | 
             
                case x.downcase
         | 
| 85 102 | 
             
                when "--input", "-i"
         | 
| 86 103 | 
             
                  options[:input] = ARGV.shift
         | 
| 104 | 
            +
                when "--name", "-n"
         | 
| 105 | 
            +
                  options[:name] = ARGV.shift
         | 
| 106 | 
            +
                when "--db", "-d"
         | 
| 107 | 
            +
                  options[:mergem] = ARGV.shift
         | 
| 87 108 | 
             
                when "--refgenome", "-g"
         | 
| 88 109 | 
             
                  options[:refgenome] = ARGV.shift
         | 
| 89 110 | 
             
                when "--outdir", "-o"
         | 
| @@ -101,8 +122,12 @@ def parseOptions_annotate | |
| 101 122 | 
             
                when "--externaldb"
         | 
| 102 123 | 
             
                  options[:external_db] = ARGV.shift
         | 
| 103 124 | 
             
                when "--help", "-h"
         | 
| 125 | 
            +
             | 
| 104 126 | 
             
                  usage_annotate
         | 
| 105 127 | 
             
                  abort
         | 
| 128 | 
            +
                when "--version", "-v"
         | 
| 129 | 
            +
                  print_version
         | 
| 130 | 
            +
                  abort
         | 
| 106 131 | 
             
                end
         | 
| 107 132 |  | 
| 108 133 | 
             
              end
         | 
| @@ -188,12 +213,11 @@ def parseOptions_compare | |
| 188 213 |  | 
| 189 214 | 
             
            end
         | 
| 190 215 |  | 
| 191 | 
            -
             | 
| 192 216 | 
             
            ########
         | 
| 193 217 | 
             
            # MAIN #
         | 
| 194 218 | 
             
            ########
         | 
| 195 219 |  | 
| 196 | 
            -
            if ARGV.size  | 
| 220 | 
            +
            if ARGV.size >= 1
         | 
| 197 221 |  | 
| 198 222 | 
             
              ROOT = File.dirname(__FILE__)
         | 
| 199 223 |  | 
| @@ -242,11 +266,13 @@ if ARGV.size > 1 | |
| 242 266 | 
             
                bcomp.mafft_aln aln_opt
         | 
| 243 267 | 
             
                bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
         | 
| 244 268 |  | 
| 245 | 
            -
               | 
| 269 | 
            +
              elsif ARGV[0] == "--version" or ARGV[0] == "-v"
         | 
| 246 270 |  | 
| 247 | 
            -
                 | 
| 271 | 
            +
                print_version
         | 
| 248 272 | 
             
                abort
         | 
| 249 273 |  | 
| 274 | 
            +
              else
         | 
| 275 | 
            +
                usage
         | 
| 250 276 | 
             
              end
         | 
| 251 277 |  | 
| 252 278 |  | 
    
        data/lib/bacterial-annotator.rb
    CHANGED
    
    | @@ -30,6 +30,10 @@ class BacterialAnnotator | |
| 30 30 | 
             
                @options[:pcoverage] = @options[:pcoverage].to_f
         | 
| 31 31 | 
             
                @options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
         | 
| 32 32 |  | 
| 33 | 
            +
                if ! @options.has_key? :name
         | 
| 34 | 
            +
                  @options[:name] = @options[:input].gsub(/.fasta|.fa|.fna/,"")
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 33 37 | 
             
                if File.exists? (@options[:outdir])
         | 
| 34 38 | 
             
                  if ! options.has_key? :force
         | 
| 35 39 | 
             
                    abort "Output directory already exist ! Choose another one or use -f to overwrite"
         | 
| @@ -39,7 +39,6 @@ class SequenceSynteny | |
| 39 39 | 
             
                  partial = false
         | 
| 40 40 | 
             
                  if properties.length >= 2 and properties[1].include? "partial"
         | 
| 41 41 | 
             
                    partial = (properties[1].gsub("partial=","")=='01')
         | 
| 42 | 
            -
                    puts "partial:" + partial.to_s
         | 
| 43 42 | 
             
                  end
         | 
| 44 43 | 
             
                  sequences[s_name][:partial] = partial
         | 
| 45 44 | 
             
                  sequences[s_name][:length] = s.seq.length
         | 
| @@ -54,9 +53,10 @@ class SequenceSynteny | |
| 54 53 | 
             
              # run blat on proteins
         | 
| 55 54 | 
             
              def run_blat root, outdir
         | 
| 56 55 | 
             
                base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
         | 
| 57 | 
            -
                system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
         | 
| 58 56 | 
             
                if @type == "prot"
         | 
| 59 57 | 
             
                  system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
         | 
| 58 | 
            +
                else
         | 
| 59 | 
            +
                  system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
         | 
| 60 60 | 
             
                end
         | 
| 61 61 | 
             
                @aln_file = "#{outdir}/#{@name}.blat8.tsv"
         | 
| 62 62 | 
             
                # extract_hits
         | 
| @@ -153,57 +153,6 @@ class SequenceSynteny | |
| 153 153 | 
             
              end                           # end of method
         | 
| 154 154 |  | 
| 155 155 |  | 
| 156 | 
            -
              # Extract Hit from blast8 file and save it in hash
         | 
| 157 | 
            -
              # contig-0_1      ABJ71957.1      96.92   65      2       0       1       65      1       65      9.2e-31 131.0
         | 
| 158 | 
            -
              def extract_hits_prodigal mode
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                @aln_hits = {}
         | 
| 161 | 
            -
                feature = ""
         | 
| 162 | 
            -
                File.open(@aln_file,"r") do |fread|
         | 
| 163 | 
            -
                  while l = fread.gets
         | 
| 164 | 
            -
                    lA = l.chomp!.split("\t")
         | 
| 165 | 
            -
                    key = lA[0]
         | 
| 166 | 
            -
                    if mode == :refgenome
         | 
| 167 | 
            -
                      hit = lA[1]
         | 
| 168 | 
            -
                      feature = "cds"
         | 
| 169 | 
            -
                    elsif mode == :externaldb
         | 
| 170 | 
            -
                      hit = lA[1].chomp.split("|")[3]
         | 
| 171 | 
            -
                      feature = "cds"
         | 
| 172 | 
            -
                    end
         | 
| 173 | 
            -
                    next if lA[2].to_f < @pidentity
         | 
| 174 | 
            -
                    if ! @aln_hits.has_key? key
         | 
| 175 | 
            -
                      @aln_hits[key] = {
         | 
| 176 | 
            -
                        pId: lA[2].to_f.round(2),
         | 
| 177 | 
            -
                        evalue: lA[10],
         | 
| 178 | 
            -
                        score: lA[11].to_f,
         | 
| 179 | 
            -
                        hits: [hit],
         | 
| 180 | 
            -
                        length: [lA[3].to_i],
         | 
| 181 | 
            -
                        query_location: [[lA[6].to_i,lA[7].to_i]],
         | 
| 182 | 
            -
                        subject_location: [[lA[8].to_i,lA[9].to_i]],
         | 
| 183 | 
            -
                        feature: feature
         | 
| 184 | 
            -
                      }
         | 
| 185 | 
            -
                    elsif lA[11].to_f > @aln_hits[key][:score]
         | 
| 186 | 
            -
                      @aln_hits[key] = {
         | 
| 187 | 
            -
                        pId: lA[2].to_f.round(2),
         | 
| 188 | 
            -
                        evalue: lA[10],
         | 
| 189 | 
            -
                        score: lA[11].to_f,
         | 
| 190 | 
            -
                        hits: [hit],
         | 
| 191 | 
            -
                        length: [lA[3].to_i],
         | 
| 192 | 
            -
                        query_location: [[lA[6].to_i,lA[7].to_i]],
         | 
| 193 | 
            -
                        subject_location: [[lA[8].to_i,lA[9].to_i]],
         | 
| 194 | 
            -
                        feature: feature
         | 
| 195 | 
            -
                      }
         | 
| 196 | 
            -
                    elsif lA[11].to_f == @aln_hits[key][:score]
         | 
| 197 | 
            -
                      @aln_hits[key][:hits] << hit
         | 
| 198 | 
            -
                      @aln_hits[key][:length] << lA[3].to_i
         | 
| 199 | 
            -
                      @aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
         | 
| 200 | 
            -
                      @aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
         | 
| 201 | 
            -
                    end
         | 
| 202 | 
            -
                  end
         | 
| 203 | 
            -
                end
         | 
| 204 | 
            -
             | 
| 205 | 
            -
              end                           # end of method
         | 
| 206 | 
            -
             | 
| 207 156 | 
             
              # Extract Hit from blast8 file and save it in hash
         | 
| 208 157 | 
             
              # prpa    PA0668.4|rRNA|23S       99.97   2891    1       0       705042  707932  1       2891    0.0e+00 5671.0
         | 
| 209 158 | 
             
              def extract_hits_dna mode
         | 
    
        data/lib/bacterial-comparator.rb
    CHANGED
    
    | @@ -247,7 +247,7 @@ class BacterialComparator | |
| 247 247 | 
             
                  puts "..Prot alignment files already exists, skipping."
         | 
| 248 248 | 
             
                end
         | 
| 249 249 |  | 
| 250 | 
            -
                # ugly hack to find out the reference genome
         | 
| 250 | 
            +
                # FIXME ugly hack to find out the reference genome
         | 
| 251 251 | 
             
                ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
         | 
| 252 252 |  | 
| 253 253 | 
             
                concat_alignments "align-genes-pep.all.fasta", ref_id
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: bacterial-annotator
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.6. | 
| 4 | 
            +
              version: 0.6.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Maxime Deraspe
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2017- | 
| 11 | 
            +
            date: 2017-10-03 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bio
         |