mgnu 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +0 -0
- data/README.md +31 -0
- data/Rakefile +33 -0
- data/lib/mgnu.rb +9 -0
- data/lib/mgnu/alignment.rb +143 -0
- data/lib/mgnu/common.rb +68 -0
- data/lib/mgnu/genbank.rb +117 -0
- data/lib/mgnu/genbank/feature.rb +84 -0
- data/lib/mgnu/genbank/location.rb +150 -0
- data/lib/mgnu/genbank/qualifier.rb +45 -0
- data/lib/mgnu/genbank/reference.rb +114 -0
- data/lib/mgnu/genbank/source.rb +39 -0
- data/lib/mgnu/loggable.rb +61 -0
- data/lib/mgnu/parser.rb +50 -0
- data/lib/mgnu/parser/blast.rb +87 -0
- data/lib/mgnu/parser/blast/format0.rb +290 -0
- data/lib/mgnu/parser/blast/format7.rb +121 -0
- data/lib/mgnu/parser/blast/format8.rb +120 -0
- data/lib/mgnu/parser/blast/hsp.rb +75 -0
- data/lib/mgnu/parser/blast/query.rb +45 -0
- data/lib/mgnu/parser/blast/sbjct.rb +62 -0
- data/lib/mgnu/parser/clustalw.rb +72 -0
- data/lib/mgnu/parser/fasta.rb +61 -0
- data/lib/mgnu/parser/fasta_header_index.rb +39 -0
- data/lib/mgnu/parser/fasta_index.rb +57 -0
- data/lib/mgnu/parser/fastq.rb +61 -0
- data/lib/mgnu/parser/genbank.rb +187 -0
- data/lib/mgnu/parser/gff.rb +56 -0
- data/lib/mgnu/parser/iprscan/hit.rb +76 -0
- data/lib/mgnu/parser/iprscan_file.rb +39 -0
- data/lib/mgnu/parser/kegg_ontology_index.rb +163 -0
- data/lib/mgnu/parser/pilercr.rb +102 -0
- data/lib/mgnu/parser/prodigal.rb +170 -0
- data/lib/mgnu/parser/sam.rb +115 -0
- data/lib/mgnu/parser/sam/alignment.rb +22 -0
- data/lib/mgnu/parser/sam/header.rb +23 -0
- data/lib/mgnu/parser/sam/pair.rb +18 -0
- data/lib/mgnu/sequence.rb +207 -0
- data/lib/mgnu/sequence/fasta.rb +79 -0
- data/lib/mgnu/sequence/fastq.rb +43 -0
- data/lib/mgnu/version.rb +16 -0
- data/mgnu.gemspec +39 -0
- data/spec/mgnu/parser/blast_format0_spec.rb +114 -0
- data/spec/mgnu/parser/blast_format7_spec.rb +24 -0
- data/spec/mgnu/parser/blast_format8_spec.rb +26 -0
- data/spec/mgnu/parser/blast_multihsp_spec.rb +100 -0
- data/spec/mgnu/parser/blast_oof_spec.rb +53 -0
- data/spec/mgnu/parser/clustalw_spec.rb +90 -0
- data/spec/mgnu/parser/fasta_header_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_parser_spec.rb +53 -0
- data/spec/mgnu/parser_spec.rb +22 -0
- data/spec/mgnu/sequence/fasta_spec.rb +60 -0
- data/spec/mgnu/sequence/fastq_spec.rb +31 -0
- data/spec/mgnu/sequence_spec.rb +81 -0
- data/spec/mgnu_spec.rb +7 -0
- data/spec/spec_helper.rb +53 -0
- metadata +376 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f41f975c84c1e898266e72770b0ba76c8ab4d42d3c50d53944bc4ddd74e1e0aa
|
4
|
+
data.tar.gz: 646a09a7ff525576d25ceb3e3b4250259263b418be11a2b24a5429cebec752c4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b1945190d6d72e495e0eabe909122c8db9afe6fde8aff015af4d8b4d53298741cdeda8ba5351c2010524c623ce5ee869c3f7b2a2417dabbb8ebf6e0226893e58
|
7
|
+
data.tar.gz: 5600bc2b2cf0f3bec4f619261a9c230265d29c0e809a044391bcaf098dcd132c2bb735599e6a224c039efd4201fa2e6ab444f5f4f4fa93fa6c395a261ebbb95d
|
data/.yardopts
ADDED
File without changes
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
<<<<<<< HEAD
|
2
|
+
# mgnu
|
3
|
+
Metagenomi Nu, a fast and small bioinformatics support library
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'mgnu'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install mgnu
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/mgnu/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
|
5
|
+
THRESHOLD_MIN = 70.0
|
6
|
+
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
8
|
+
|
9
|
+
task :test => :spec
|
10
|
+
|
11
|
+
begin
|
12
|
+
require 'rubocop/rake_task'
|
13
|
+
RuboCop::RakeTask.new
|
14
|
+
rescue LoadError
|
15
|
+
task :rubocop do
|
16
|
+
$stderr.puts 'Rubocop is disabled'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'yard'
|
21
|
+
YARD::Rake::YardocTask.new
|
22
|
+
|
23
|
+
require 'yardstick/rake/measurement'
|
24
|
+
Yardstick::Rake::Measurement.new do |measurement|
|
25
|
+
measurement.output = 'measurement/report.txt'
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'yardstick/rake/verify'
|
29
|
+
Yardstick::Rake::Verify.new do |verify|
|
30
|
+
verify.threshold = THRESHOLD_MIN
|
31
|
+
end
|
32
|
+
|
33
|
+
task :default => [:spec] #, :rubocop, :verify_measurements]
|
data/lib/mgnu.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module MgNu
|
2
|
+
class Alignment
|
3
|
+
include Enumerable
|
4
|
+
StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF HY FYW).collect { |x| x.split('').sort }
|
5
|
+
WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
|
6
|
+
|
7
|
+
attr_reader :length
|
8
|
+
attr_accessor :sequences, :order
|
9
|
+
|
10
|
+
# create a new Alignment object
|
11
|
+
def initialize(sequences, order = nil)
|
12
|
+
@sequences = sequences
|
13
|
+
@order = order
|
14
|
+
@length = sequences[sequences.keys[0]].length
|
15
|
+
end
|
16
|
+
|
17
|
+
# override each
|
18
|
+
def each
|
19
|
+
if @order.nil?
|
20
|
+
@sequences.each do |name, seq|
|
21
|
+
yield seq
|
22
|
+
end
|
23
|
+
else
|
24
|
+
@order.each do |name|
|
25
|
+
yield @sequences[name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns an array of arrays containing the sequences at the position indicated.
|
31
|
+
# Can take a range
|
32
|
+
def each_position(range = nil)
|
33
|
+
matrix = []
|
34
|
+
if @order.nil?
|
35
|
+
@sequences.each do |name, seq|
|
36
|
+
if range.class == Range
|
37
|
+
matrix.push(seq[range].split(//))
|
38
|
+
elsif range.class == Integer
|
39
|
+
matrix.push(seq[range])
|
40
|
+
else
|
41
|
+
matrix.push(seq.split(//))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
else
|
45
|
+
@order.each do |name|
|
46
|
+
if range.class == Range
|
47
|
+
# correct for 0 indexed arrays
|
48
|
+
matrix.push(@sequences[name][(range.begin - 1..range.end - 1)].split(//))
|
49
|
+
elsif range.class == Integer
|
50
|
+
matrix.push(@sequences[name][range - 1].chr)
|
51
|
+
else
|
52
|
+
matrix.push(@sequences[name].split(//))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
positions = []
|
58
|
+
if range.class == Range
|
59
|
+
range.each do |pos|
|
60
|
+
position = []
|
61
|
+
matrix.each do |seq|
|
62
|
+
position.push(seq[(pos - 1) - (range.begin - 1)])
|
63
|
+
end
|
64
|
+
positions << position
|
65
|
+
if block_given?
|
66
|
+
yield position
|
67
|
+
end
|
68
|
+
end
|
69
|
+
unless block_given?
|
70
|
+
positions
|
71
|
+
end
|
72
|
+
elsif range.class == Integer
|
73
|
+
position = []
|
74
|
+
matrix.each do |seq|
|
75
|
+
position.push(seq)
|
76
|
+
end
|
77
|
+
positions << position
|
78
|
+
if block_given?
|
79
|
+
yield position
|
80
|
+
end
|
81
|
+
unless block_given?
|
82
|
+
positions
|
83
|
+
end
|
84
|
+
else
|
85
|
+
0.upto(@length-1) do |pos|
|
86
|
+
position = []
|
87
|
+
matrix.each do |seq|
|
88
|
+
position.push(seq[pos])
|
89
|
+
end
|
90
|
+
positions << position
|
91
|
+
if block_given?
|
92
|
+
yield position
|
93
|
+
end
|
94
|
+
end
|
95
|
+
unless block_given?
|
96
|
+
positions
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def [](range = nil)
|
102
|
+
each_position(range)
|
103
|
+
end
|
104
|
+
|
105
|
+
def match(range = nil)
|
106
|
+
# get the matrix for the whole alignment, or a portion if a
|
107
|
+
# range is given
|
108
|
+
m = each_position(range)
|
109
|
+
str = ""
|
110
|
+
|
111
|
+
# go through every row (position) in the array from
|
112
|
+
# each_position and compute the match symbol. Concat to str
|
113
|
+
m.each do |pos|
|
114
|
+
# if there's a gap in the alignment at this pos, return a space
|
115
|
+
if pos.index("-") != nil
|
116
|
+
str += " "
|
117
|
+
else
|
118
|
+
# no gaps, so determine strength of column
|
119
|
+
p = pos.collect { |c| c.upcase }.sort.uniq
|
120
|
+
if p.length == 1
|
121
|
+
str += "*"
|
122
|
+
elsif StrongConservationGroups.find { |x| (p - x).empty? }
|
123
|
+
str += ":"
|
124
|
+
elsif WeakConservationGroups.find { |x| (p - x).empty? }
|
125
|
+
str += "."
|
126
|
+
else
|
127
|
+
str += " "
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
str
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_s
|
135
|
+
str = ""
|
136
|
+
self.order.each do |name|
|
137
|
+
str += "#{name}: #{self.sequences[name]}\n"
|
138
|
+
end
|
139
|
+
str += self.match + "\n"
|
140
|
+
str
|
141
|
+
end
|
142
|
+
end # end MgNu::Alignment class
|
143
|
+
end # end MgNu module
|
data/lib/mgnu/common.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module MgNu
|
2
|
+
# codon table 11 from http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
|
3
|
+
# standard bacteria/archae/plastid codes
|
4
|
+
BACTERIA_CODONS = {'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C',
|
5
|
+
'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C',
|
6
|
+
'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*',
|
7
|
+
'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W',
|
8
|
+
|
9
|
+
'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R',
|
10
|
+
'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R',
|
11
|
+
'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R',
|
12
|
+
'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R',
|
13
|
+
|
14
|
+
'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S',
|
15
|
+
'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S',
|
16
|
+
'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R',
|
17
|
+
'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R',
|
18
|
+
|
19
|
+
'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G',
|
20
|
+
'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G',
|
21
|
+
'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G',
|
22
|
+
'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G'}
|
23
|
+
end
|
24
|
+
|
25
|
+
# example usage of Regexp#global_match
|
26
|
+
# re = /(\w+)/
|
27
|
+
# words = []
|
28
|
+
# re.global_match("cat dog house") do |m|
|
29
|
+
# words.push(m[0])
|
30
|
+
# end
|
31
|
+
# p words # ["cat", "dog", "house"]
|
32
|
+
class Regexp
|
33
|
+
def global_match(str, &proc)
|
34
|
+
retval = nil
|
35
|
+
loop do
|
36
|
+
res = str.sub(self) do |m|
|
37
|
+
proc.call($~) # pass MatchData obj
|
38
|
+
''
|
39
|
+
end
|
40
|
+
break retval if res == str
|
41
|
+
str = res
|
42
|
+
retval ||= true
|
43
|
+
end
|
44
|
+
end # end of global_match
|
45
|
+
end # end of Regexp class
|
46
|
+
|
47
|
+
# add print_multiline method to String class
|
48
|
+
class String
|
49
|
+
def print_multiline(width=80, options={})
|
50
|
+
return unless self.length > 0
|
51
|
+
indent = ' ' * (options[:indent] || 12)
|
52
|
+
x = width - indent.length
|
53
|
+
# string broken up with spaces or solid string
|
54
|
+
split_str = self.scan(/(.{1,#{x}})(?: +|$)\n?|(.{#{x}})/)
|
55
|
+
out = ''
|
56
|
+
# print first line without indent
|
57
|
+
out += split_str.first[0] || split_str.first[1]
|
58
|
+
|
59
|
+
if split_str.length > 1
|
60
|
+
out += "\n"
|
61
|
+
end
|
62
|
+
# print all other lines with indent
|
63
|
+
out += split_str[1..-1].map do |str, other|
|
64
|
+
"#{indent}#{str || other}"
|
65
|
+
end.join("\n")
|
66
|
+
out
|
67
|
+
end # end of print_multiline
|
68
|
+
end
|
data/lib/mgnu/genbank.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'mgnu/genbank/feature'
|
3
|
+
require 'mgnu/genbank/location'
|
4
|
+
require 'mgnu/genbank/qualifier'
|
5
|
+
require 'mgnu/genbank/reference'
|
6
|
+
require 'mgnu/genbank/source'
|
7
|
+
|
8
|
+
module MgNu
|
9
|
+
class Genbank
|
10
|
+
attr_accessor :locus, :definition, :accession, :secondary_accession, :version, :dblink
|
11
|
+
attr_accessor :geninfo_identifier, :keywords, :segment, :source, :references, :comment
|
12
|
+
attr_accessor :features, :sequence
|
13
|
+
include MgNu::Loggable
|
14
|
+
extend Forwardable
|
15
|
+
|
16
|
+
STRUCTURE = [:locus, :definition, :accession, :version, :dblink,
|
17
|
+
:keywords, :segment, :source, :references, :comment,
|
18
|
+
:features, :sequence]
|
19
|
+
|
20
|
+
Locus = Struct.new :name, :length, :no_of_strands, :molecule_type, :molecule_structure, :genbank_division, :modification_date do
|
21
|
+
def to_s
|
22
|
+
str = ''
|
23
|
+
str << 'LOCUS'.ljust(12) # 1-12
|
24
|
+
str << name.ljust(17) # 13-29
|
25
|
+
str << length.rjust(11) # 30-41
|
26
|
+
str << ' bp ' # 41-44
|
27
|
+
str << "#{no_of_strands}".ljust(3) # ss- ds- ms-, 45-47
|
28
|
+
str << "#{molecule_type}".ljust(8) # 48-55
|
29
|
+
str << "#{molecule_structure}".ljust(8) # linear or circular, 56-63
|
30
|
+
str << " #{genbank_division} " # 65-68
|
31
|
+
str << modification_date # 69
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# create a new Genbank object
|
36
|
+
def initialize
|
37
|
+
@locus = nil
|
38
|
+
@definition = ''
|
39
|
+
@accession = ''
|
40
|
+
@secondary_accession = []
|
41
|
+
@dblink = ''
|
42
|
+
@version = ''
|
43
|
+
@geninfo_identifier = ''
|
44
|
+
@keywords = nil
|
45
|
+
@segment = ''
|
46
|
+
@source = nil
|
47
|
+
@references = []
|
48
|
+
@comment = ''
|
49
|
+
@features = []
|
50
|
+
@sequence = ''
|
51
|
+
end
|
52
|
+
|
53
|
+
def_delegators :@locus, :name, :length, :no_of_strands, :molecule_type
|
54
|
+
def_delegators :molecule_structure, :genbank_division, :modification_date
|
55
|
+
|
56
|
+
# string representation
|
57
|
+
def to_s
|
58
|
+
str = ''
|
59
|
+
STRUCTURE.each do |part|
|
60
|
+
p = send(part)
|
61
|
+
p_exists = false
|
62
|
+
case part
|
63
|
+
when :locus, :source
|
64
|
+
if p
|
65
|
+
p_exists = true
|
66
|
+
str << p.to_s
|
67
|
+
end
|
68
|
+
when :definition, :dblink, :segment, :comment
|
69
|
+
if p && !p.empty?
|
70
|
+
p_exists = true
|
71
|
+
str << part.to_s.upcase.ljust(12)
|
72
|
+
str << p.print_multiline
|
73
|
+
str << '.' if part == :definition
|
74
|
+
end
|
75
|
+
when :accession
|
76
|
+
if p && !p.empty?
|
77
|
+
p_exists = true
|
78
|
+
str += 'ACCESSION'.ljust(12)
|
79
|
+
str += accession
|
80
|
+
if secondary_accession.any?
|
81
|
+
str += " #{secondary_accession.join(' ')}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
when :version
|
85
|
+
if p && !p.empty?
|
86
|
+
p_exists = true
|
87
|
+
str += 'VERSION'.ljust(12)
|
88
|
+
str += version
|
89
|
+
str += " GI:#{geninfo_identifier}" if geninfo_identifier
|
90
|
+
end
|
91
|
+
when :features, :references
|
92
|
+
unless p.empty?
|
93
|
+
p_exists = true
|
94
|
+
str += "FEATURES Location/Qualifiers\n" if part == :features
|
95
|
+
temp = p.collect { |x| x.to_s }
|
96
|
+
str += temp.join("\n")
|
97
|
+
end
|
98
|
+
when :sequence
|
99
|
+
unless p.value.empty?
|
100
|
+
p_exists = true
|
101
|
+
str << "#{'ORIGIN'.ljust(12)}\n"
|
102
|
+
str << @sequence.to_genbank
|
103
|
+
end
|
104
|
+
when :keywords
|
105
|
+
p_exists = true
|
106
|
+
str << 'KEYWORDS'.ljust(12)
|
107
|
+
str << p.join('; ').print_multiline if p
|
108
|
+
str << '.'
|
109
|
+
end
|
110
|
+
# print newline character if there are more parts
|
111
|
+
str << "\n" if p_exists && STRUCTURE[STRUCTURE.index(part) + 1]
|
112
|
+
end
|
113
|
+
str << '//'
|
114
|
+
end
|
115
|
+
end # end of MgNu::Parser::Genbank class
|
116
|
+
end # end of MgNu module
|
117
|
+
__END__
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module MgNu
|
2
|
+
class Genbank
|
3
|
+
class Feature
|
4
|
+
attr_accessor :feature_type, :qualifiers, :location, :sequence
|
5
|
+
attr_accessor :start_continues, :stop_continues, :raw_qualifiers
|
6
|
+
|
7
|
+
# create a new Feature object
|
8
|
+
def initialize
|
9
|
+
@qualifiers = []
|
10
|
+
@raw_qualifiers = []
|
11
|
+
end
|
12
|
+
|
13
|
+
# for handling tags in gb format
|
14
|
+
def method_missing(method_name, *args)
|
15
|
+
quals = @qualifiers.select {|q| q.name == method_name.to_s}
|
16
|
+
if quals.length > 1
|
17
|
+
return quals.map {|q| q.value }
|
18
|
+
elsif quals.length == 1
|
19
|
+
return quals.first.value
|
20
|
+
else
|
21
|
+
return nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# class method for parsing a gb entry in a buffer
|
26
|
+
def self.parse(buffer)
|
27
|
+
buffer = buffer.split("\n")
|
28
|
+
feature = Feature.new # create a new feature
|
29
|
+
buffer.each_with_index do |line,i|
|
30
|
+
if line =~ /^\s{5}([\w\-\*']+)\s+(.+)$/ #feature type and (beginning of) location line
|
31
|
+
feature.feature_type = Regexp.last_match[1]
|
32
|
+
loc = Regexp.last_match[2]
|
33
|
+
|
34
|
+
until buffer[i + 1] =~ /\/.+=.+/ # check for a continuation of Location line
|
35
|
+
break unless buffer[i+1]
|
36
|
+
loc += buffer[i + 1].lstrip!
|
37
|
+
buffer.delete_at(i + 1)
|
38
|
+
end
|
39
|
+
feature.location = Location.new(loc)
|
40
|
+
elsif line =~ /^\s{21}\/(.+)=(.+)$/
|
41
|
+
key, value = Regexp.last_match[1], Regexp.last_match[2]
|
42
|
+
|
43
|
+
# to handle multi-line qualifier values
|
44
|
+
until buffer[i+1] =~ /^\s{21}\/(?:.+?)=/ # next qualifier
|
45
|
+
break unless buffer[i + 1]
|
46
|
+
value += " #{buffer[i + 1].lstrip}"
|
47
|
+
buffer.delete_at(i + 1)
|
48
|
+
end
|
49
|
+
# parse out quotes
|
50
|
+
quoted = false
|
51
|
+
if value =~ /^"(.+)"$/
|
52
|
+
value = Regexp.last_match[1]
|
53
|
+
quoted = true # some qualifier values are part of a controlled vocabulary and, as such, unquoted
|
54
|
+
end
|
55
|
+
# make sure sequence contains no spaces
|
56
|
+
if key == 'translation'
|
57
|
+
value.gsub!(/\s/, '');
|
58
|
+
end
|
59
|
+
# add new qualifier to feature
|
60
|
+
feature.qualifiers << Qualifier.new(:name => key, :value => value.squeeze(' '), :quoted => quoted)
|
61
|
+
elsif line =~ /^\s{21}\/(.+)$/ # qualifier name w/out value
|
62
|
+
key = Regexp.last_match[1]
|
63
|
+
feature.qualifiers << Qualifier.new(:name => key)
|
64
|
+
else
|
65
|
+
raise "UNKNOWN FEATURE LINE TYPE: #{line} -- #{i}"
|
66
|
+
end
|
67
|
+
end # end loop through buffer
|
68
|
+
feature
|
69
|
+
end
|
70
|
+
|
71
|
+
# string representation of Feature
|
72
|
+
def to_s
|
73
|
+
out = ''
|
74
|
+
out += ' ' * 5
|
75
|
+
out += feature_type.ljust(16)
|
76
|
+
out += location.to_s
|
77
|
+
qualifiers.each do |q|
|
78
|
+
out += q.to_s
|
79
|
+
end
|
80
|
+
out
|
81
|
+
end
|
82
|
+
end # end MgNu::Genbank::Feature class
|
83
|
+
end # end MgNu::Genbank class
|
84
|
+
end # end MgNu module
|