mgnu 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +0 -0
- data/README.md +31 -0
- data/Rakefile +33 -0
- data/lib/mgnu.rb +9 -0
- data/lib/mgnu/alignment.rb +143 -0
- data/lib/mgnu/common.rb +68 -0
- data/lib/mgnu/genbank.rb +117 -0
- data/lib/mgnu/genbank/feature.rb +84 -0
- data/lib/mgnu/genbank/location.rb +150 -0
- data/lib/mgnu/genbank/qualifier.rb +45 -0
- data/lib/mgnu/genbank/reference.rb +114 -0
- data/lib/mgnu/genbank/source.rb +39 -0
- data/lib/mgnu/loggable.rb +61 -0
- data/lib/mgnu/parser.rb +50 -0
- data/lib/mgnu/parser/blast.rb +87 -0
- data/lib/mgnu/parser/blast/format0.rb +290 -0
- data/lib/mgnu/parser/blast/format7.rb +121 -0
- data/lib/mgnu/parser/blast/format8.rb +120 -0
- data/lib/mgnu/parser/blast/hsp.rb +75 -0
- data/lib/mgnu/parser/blast/query.rb +45 -0
- data/lib/mgnu/parser/blast/sbjct.rb +62 -0
- data/lib/mgnu/parser/clustalw.rb +72 -0
- data/lib/mgnu/parser/fasta.rb +61 -0
- data/lib/mgnu/parser/fasta_header_index.rb +39 -0
- data/lib/mgnu/parser/fasta_index.rb +57 -0
- data/lib/mgnu/parser/fastq.rb +61 -0
- data/lib/mgnu/parser/genbank.rb +187 -0
- data/lib/mgnu/parser/gff.rb +56 -0
- data/lib/mgnu/parser/iprscan/hit.rb +76 -0
- data/lib/mgnu/parser/iprscan_file.rb +39 -0
- data/lib/mgnu/parser/kegg_ontology_index.rb +163 -0
- data/lib/mgnu/parser/pilercr.rb +102 -0
- data/lib/mgnu/parser/prodigal.rb +170 -0
- data/lib/mgnu/parser/sam.rb +115 -0
- data/lib/mgnu/parser/sam/alignment.rb +22 -0
- data/lib/mgnu/parser/sam/header.rb +23 -0
- data/lib/mgnu/parser/sam/pair.rb +18 -0
- data/lib/mgnu/sequence.rb +207 -0
- data/lib/mgnu/sequence/fasta.rb +79 -0
- data/lib/mgnu/sequence/fastq.rb +43 -0
- data/lib/mgnu/version.rb +16 -0
- data/mgnu.gemspec +39 -0
- data/spec/mgnu/parser/blast_format0_spec.rb +114 -0
- data/spec/mgnu/parser/blast_format7_spec.rb +24 -0
- data/spec/mgnu/parser/blast_format8_spec.rb +26 -0
- data/spec/mgnu/parser/blast_multihsp_spec.rb +100 -0
- data/spec/mgnu/parser/blast_oof_spec.rb +53 -0
- data/spec/mgnu/parser/clustalw_spec.rb +90 -0
- data/spec/mgnu/parser/fasta_header_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_parser_spec.rb +53 -0
- data/spec/mgnu/parser_spec.rb +22 -0
- data/spec/mgnu/sequence/fasta_spec.rb +60 -0
- data/spec/mgnu/sequence/fastq_spec.rb +31 -0
- data/spec/mgnu/sequence_spec.rb +81 -0
- data/spec/mgnu_spec.rb +7 -0
- data/spec/spec_helper.rb +53 -0
- metadata +376 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f41f975c84c1e898266e72770b0ba76c8ab4d42d3c50d53944bc4ddd74e1e0aa
|
4
|
+
data.tar.gz: 646a09a7ff525576d25ceb3e3b4250259263b418be11a2b24a5429cebec752c4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b1945190d6d72e495e0eabe909122c8db9afe6fde8aff015af4d8b4d53298741cdeda8ba5351c2010524c623ce5ee869c3f7b2a2417dabbb8ebf6e0226893e58
|
7
|
+
data.tar.gz: 5600bc2b2cf0f3bec4f619261a9c230265d29c0e809a044391bcaf098dcd132c2bb735599e6a224c039efd4201fa2e6ab444f5f4f4fa93fa6c395a261ebbb95d
|
data/.yardopts
ADDED
File without changes
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
<<<<<<< HEAD
|
2
|
+
# mgnu
|
3
|
+
Metagenomi Nu, a fast and small bioinformatics support library
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'mgnu'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install mgnu
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/mgnu/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
|
5
|
+
THRESHOLD_MIN = 70.0
|
6
|
+
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
8
|
+
|
9
|
+
task :test => :spec
|
10
|
+
|
11
|
+
begin
|
12
|
+
require 'rubocop/rake_task'
|
13
|
+
RuboCop::RakeTask.new
|
14
|
+
rescue LoadError
|
15
|
+
task :rubocop do
|
16
|
+
$stderr.puts 'Rubocop is disabled'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'yard'
|
21
|
+
YARD::Rake::YardocTask.new
|
22
|
+
|
23
|
+
require 'yardstick/rake/measurement'
|
24
|
+
Yardstick::Rake::Measurement.new do |measurement|
|
25
|
+
measurement.output = 'measurement/report.txt'
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'yardstick/rake/verify'
|
29
|
+
Yardstick::Rake::Verify.new do |verify|
|
30
|
+
verify.threshold = THRESHOLD_MIN
|
31
|
+
end
|
32
|
+
|
33
|
+
task :default => [:spec] #, :rubocop, :verify_measurements]
|
data/lib/mgnu.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module MgNu
|
2
|
+
class Alignment
|
3
|
+
include Enumerable
|
4
|
+
StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF HY FYW).collect { |x| x.split('').sort }
|
5
|
+
WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
|
6
|
+
|
7
|
+
attr_reader :length
|
8
|
+
attr_accessor :sequences, :order
|
9
|
+
|
10
|
+
# create a new Alignment object
|
11
|
+
def initialize(sequences, order = nil)
|
12
|
+
@sequences = sequences
|
13
|
+
@order = order
|
14
|
+
@length = sequences[sequences.keys[0]].length
|
15
|
+
end
|
16
|
+
|
17
|
+
# override each
|
18
|
+
def each
|
19
|
+
if @order.nil?
|
20
|
+
@sequences.each do |name, seq|
|
21
|
+
yield seq
|
22
|
+
end
|
23
|
+
else
|
24
|
+
@order.each do |name|
|
25
|
+
yield @sequences[name]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns an array of arrays containing the sequences at the position indicated.
|
31
|
+
# Can take a range
|
32
|
+
def each_position(range = nil)
|
33
|
+
matrix = []
|
34
|
+
if @order.nil?
|
35
|
+
@sequences.each do |name, seq|
|
36
|
+
if range.class == Range
|
37
|
+
matrix.push(seq[range].split(//))
|
38
|
+
elsif range.class == Integer
|
39
|
+
matrix.push(seq[range])
|
40
|
+
else
|
41
|
+
matrix.push(seq.split(//))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
else
|
45
|
+
@order.each do |name|
|
46
|
+
if range.class == Range
|
47
|
+
# correct for 0 indexed arrays
|
48
|
+
matrix.push(@sequences[name][(range.begin - 1..range.end - 1)].split(//))
|
49
|
+
elsif range.class == Integer
|
50
|
+
matrix.push(@sequences[name][range - 1].chr)
|
51
|
+
else
|
52
|
+
matrix.push(@sequences[name].split(//))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
positions = []
|
58
|
+
if range.class == Range
|
59
|
+
range.each do |pos|
|
60
|
+
position = []
|
61
|
+
matrix.each do |seq|
|
62
|
+
position.push(seq[(pos - 1) - (range.begin - 1)])
|
63
|
+
end
|
64
|
+
positions << position
|
65
|
+
if block_given?
|
66
|
+
yield position
|
67
|
+
end
|
68
|
+
end
|
69
|
+
unless block_given?
|
70
|
+
positions
|
71
|
+
end
|
72
|
+
elsif range.class == Integer
|
73
|
+
position = []
|
74
|
+
matrix.each do |seq|
|
75
|
+
position.push(seq)
|
76
|
+
end
|
77
|
+
positions << position
|
78
|
+
if block_given?
|
79
|
+
yield position
|
80
|
+
end
|
81
|
+
unless block_given?
|
82
|
+
positions
|
83
|
+
end
|
84
|
+
else
|
85
|
+
0.upto(@length-1) do |pos|
|
86
|
+
position = []
|
87
|
+
matrix.each do |seq|
|
88
|
+
position.push(seq[pos])
|
89
|
+
end
|
90
|
+
positions << position
|
91
|
+
if block_given?
|
92
|
+
yield position
|
93
|
+
end
|
94
|
+
end
|
95
|
+
unless block_given?
|
96
|
+
positions
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def [](range = nil)
|
102
|
+
each_position(range)
|
103
|
+
end
|
104
|
+
|
105
|
+
def match(range = nil)
|
106
|
+
# get the matrix for the whole alignment, or a portion if a
|
107
|
+
# range is given
|
108
|
+
m = each_position(range)
|
109
|
+
str = ""
|
110
|
+
|
111
|
+
# go through every row (position) in the array from
|
112
|
+
# each_position and compute the match symbol. Concat to str
|
113
|
+
m.each do |pos|
|
114
|
+
# if there's a gap in the alignment at this pos, return a space
|
115
|
+
if pos.index("-") != nil
|
116
|
+
str += " "
|
117
|
+
else
|
118
|
+
# no gaps, so determine strength of column
|
119
|
+
p = pos.collect { |c| c.upcase }.sort.uniq
|
120
|
+
if p.length == 1
|
121
|
+
str += "*"
|
122
|
+
elsif StrongConservationGroups.find { |x| (p - x).empty? }
|
123
|
+
str += ":"
|
124
|
+
elsif WeakConservationGroups.find { |x| (p - x).empty? }
|
125
|
+
str += "."
|
126
|
+
else
|
127
|
+
str += " "
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
str
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_s
|
135
|
+
str = ""
|
136
|
+
self.order.each do |name|
|
137
|
+
str += "#{name}: #{self.sequences[name]}\n"
|
138
|
+
end
|
139
|
+
str += self.match + "\n"
|
140
|
+
str
|
141
|
+
end
|
142
|
+
end # end MgNu::Alignment class
|
143
|
+
end # end MgNu module
|
data/lib/mgnu/common.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module MgNu
|
2
|
+
# codon table 11 from http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
|
3
|
+
# standard bacteria/archae/plastid codes
|
4
|
+
BACTERIA_CODONS = {'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C',
|
5
|
+
'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C',
|
6
|
+
'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => '*',
|
7
|
+
'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W',
|
8
|
+
|
9
|
+
'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R',
|
10
|
+
'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R',
|
11
|
+
'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R',
|
12
|
+
'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R',
|
13
|
+
|
14
|
+
'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S',
|
15
|
+
'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S',
|
16
|
+
'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R',
|
17
|
+
'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R',
|
18
|
+
|
19
|
+
'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G',
|
20
|
+
'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G',
|
21
|
+
'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G',
|
22
|
+
'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G'}
|
23
|
+
end
|
24
|
+
|
25
|
+
# example usage of Regexp#global_match
|
26
|
+
# re = /(\w+)/
|
27
|
+
# words = []
|
28
|
+
# re.global_match("cat dog house") do |m|
|
29
|
+
# words.push(m[0])
|
30
|
+
# end
|
31
|
+
# p words # ["cat", "dog", "house"]
|
32
|
+
class Regexp
|
33
|
+
def global_match(str, &proc)
|
34
|
+
retval = nil
|
35
|
+
loop do
|
36
|
+
res = str.sub(self) do |m|
|
37
|
+
proc.call($~) # pass MatchData obj
|
38
|
+
''
|
39
|
+
end
|
40
|
+
break retval if res == str
|
41
|
+
str = res
|
42
|
+
retval ||= true
|
43
|
+
end
|
44
|
+
end # end of global_match
|
45
|
+
end # end of Regexp class
|
46
|
+
|
47
|
+
# add print_multiline method to String class
|
48
|
+
class String
|
49
|
+
def print_multiline(width=80, options={})
|
50
|
+
return unless self.length > 0
|
51
|
+
indent = ' ' * (options[:indent] || 12)
|
52
|
+
x = width - indent.length
|
53
|
+
# string broken up with spaces or solid string
|
54
|
+
split_str = self.scan(/(.{1,#{x}})(?: +|$)\n?|(.{#{x}})/)
|
55
|
+
out = ''
|
56
|
+
# print first line without indent
|
57
|
+
out += split_str.first[0] || split_str.first[1]
|
58
|
+
|
59
|
+
if split_str.length > 1
|
60
|
+
out += "\n"
|
61
|
+
end
|
62
|
+
# print all other lines with indent
|
63
|
+
out += split_str[1..-1].map do |str, other|
|
64
|
+
"#{indent}#{str || other}"
|
65
|
+
end.join("\n")
|
66
|
+
out
|
67
|
+
end # end of print_multiline
|
68
|
+
end
|
data/lib/mgnu/genbank.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'mgnu/genbank/feature'
|
3
|
+
require 'mgnu/genbank/location'
|
4
|
+
require 'mgnu/genbank/qualifier'
|
5
|
+
require 'mgnu/genbank/reference'
|
6
|
+
require 'mgnu/genbank/source'
|
7
|
+
|
8
|
+
module MgNu
|
9
|
+
class Genbank
|
10
|
+
attr_accessor :locus, :definition, :accession, :secondary_accession, :version, :dblink
|
11
|
+
attr_accessor :geninfo_identifier, :keywords, :segment, :source, :references, :comment
|
12
|
+
attr_accessor :features, :sequence
|
13
|
+
include MgNu::Loggable
|
14
|
+
extend Forwardable
|
15
|
+
|
16
|
+
STRUCTURE = [:locus, :definition, :accession, :version, :dblink,
|
17
|
+
:keywords, :segment, :source, :references, :comment,
|
18
|
+
:features, :sequence]
|
19
|
+
|
20
|
+
Locus = Struct.new :name, :length, :no_of_strands, :molecule_type, :molecule_structure, :genbank_division, :modification_date do
|
21
|
+
def to_s
|
22
|
+
str = ''
|
23
|
+
str << 'LOCUS'.ljust(12) # 1-12
|
24
|
+
str << name.ljust(17) # 13-29
|
25
|
+
str << length.rjust(11) # 30-41
|
26
|
+
str << ' bp ' # 41-44
|
27
|
+
str << "#{no_of_strands}".ljust(3) # ss- ds- ms-, 45-47
|
28
|
+
str << "#{molecule_type}".ljust(8) # 48-55
|
29
|
+
str << "#{molecule_structure}".ljust(8) # linear or circular, 56-63
|
30
|
+
str << " #{genbank_division} " # 65-68
|
31
|
+
str << modification_date # 69
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# create a new Genbank object
|
36
|
+
def initialize
|
37
|
+
@locus = nil
|
38
|
+
@definition = ''
|
39
|
+
@accession = ''
|
40
|
+
@secondary_accession = []
|
41
|
+
@dblink = ''
|
42
|
+
@version = ''
|
43
|
+
@geninfo_identifier = ''
|
44
|
+
@keywords = nil
|
45
|
+
@segment = ''
|
46
|
+
@source = nil
|
47
|
+
@references = []
|
48
|
+
@comment = ''
|
49
|
+
@features = []
|
50
|
+
@sequence = ''
|
51
|
+
end
|
52
|
+
|
53
|
+
def_delegators :@locus, :name, :length, :no_of_strands, :molecule_type
|
54
|
+
def_delegators :molecule_structure, :genbank_division, :modification_date
|
55
|
+
|
56
|
+
# string representation
|
57
|
+
def to_s
|
58
|
+
str = ''
|
59
|
+
STRUCTURE.each do |part|
|
60
|
+
p = send(part)
|
61
|
+
p_exists = false
|
62
|
+
case part
|
63
|
+
when :locus, :source
|
64
|
+
if p
|
65
|
+
p_exists = true
|
66
|
+
str << p.to_s
|
67
|
+
end
|
68
|
+
when :definition, :dblink, :segment, :comment
|
69
|
+
if p && !p.empty?
|
70
|
+
p_exists = true
|
71
|
+
str << part.to_s.upcase.ljust(12)
|
72
|
+
str << p.print_multiline
|
73
|
+
str << '.' if part == :definition
|
74
|
+
end
|
75
|
+
when :accession
|
76
|
+
if p && !p.empty?
|
77
|
+
p_exists = true
|
78
|
+
str += 'ACCESSION'.ljust(12)
|
79
|
+
str += accession
|
80
|
+
if secondary_accession.any?
|
81
|
+
str += " #{secondary_accession.join(' ')}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
when :version
|
85
|
+
if p && !p.empty?
|
86
|
+
p_exists = true
|
87
|
+
str += 'VERSION'.ljust(12)
|
88
|
+
str += version
|
89
|
+
str += " GI:#{geninfo_identifier}" if geninfo_identifier
|
90
|
+
end
|
91
|
+
when :features, :references
|
92
|
+
unless p.empty?
|
93
|
+
p_exists = true
|
94
|
+
str += "FEATURES Location/Qualifiers\n" if part == :features
|
95
|
+
temp = p.collect { |x| x.to_s }
|
96
|
+
str += temp.join("\n")
|
97
|
+
end
|
98
|
+
when :sequence
|
99
|
+
unless p.value.empty?
|
100
|
+
p_exists = true
|
101
|
+
str << "#{'ORIGIN'.ljust(12)}\n"
|
102
|
+
str << @sequence.to_genbank
|
103
|
+
end
|
104
|
+
when :keywords
|
105
|
+
p_exists = true
|
106
|
+
str << 'KEYWORDS'.ljust(12)
|
107
|
+
str << p.join('; ').print_multiline if p
|
108
|
+
str << '.'
|
109
|
+
end
|
110
|
+
# print newline character if there are more parts
|
111
|
+
str << "\n" if p_exists && STRUCTURE[STRUCTURE.index(part) + 1]
|
112
|
+
end
|
113
|
+
str << '//'
|
114
|
+
end
|
115
|
+
end # end of MgNu::Parser::Genbank class
|
116
|
+
end # end of MgNu module
|
117
|
+
__END__
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module MgNu
|
2
|
+
class Genbank
|
3
|
+
class Feature
|
4
|
+
attr_accessor :feature_type, :qualifiers, :location, :sequence
|
5
|
+
attr_accessor :start_continues, :stop_continues, :raw_qualifiers
|
6
|
+
|
7
|
+
# create a new Feature object
|
8
|
+
def initialize
|
9
|
+
@qualifiers = []
|
10
|
+
@raw_qualifiers = []
|
11
|
+
end
|
12
|
+
|
13
|
+
# for handling tags in gb format
|
14
|
+
def method_missing(method_name, *args)
|
15
|
+
quals = @qualifiers.select {|q| q.name == method_name.to_s}
|
16
|
+
if quals.length > 1
|
17
|
+
return quals.map {|q| q.value }
|
18
|
+
elsif quals.length == 1
|
19
|
+
return quals.first.value
|
20
|
+
else
|
21
|
+
return nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# class method for parsing a gb entry in a buffer
|
26
|
+
def self.parse(buffer)
|
27
|
+
buffer = buffer.split("\n")
|
28
|
+
feature = Feature.new # create a new feature
|
29
|
+
buffer.each_with_index do |line,i|
|
30
|
+
if line =~ /^\s{5}([\w\-\*']+)\s+(.+)$/ #feature type and (beginning of) location line
|
31
|
+
feature.feature_type = Regexp.last_match[1]
|
32
|
+
loc = Regexp.last_match[2]
|
33
|
+
|
34
|
+
until buffer[i + 1] =~ /\/.+=.+/ # check for a continuation of Location line
|
35
|
+
break unless buffer[i+1]
|
36
|
+
loc += buffer[i + 1].lstrip!
|
37
|
+
buffer.delete_at(i + 1)
|
38
|
+
end
|
39
|
+
feature.location = Location.new(loc)
|
40
|
+
elsif line =~ /^\s{21}\/(.+)=(.+)$/
|
41
|
+
key, value = Regexp.last_match[1], Regexp.last_match[2]
|
42
|
+
|
43
|
+
# to handle multi-line qualifier values
|
44
|
+
until buffer[i+1] =~ /^\s{21}\/(?:.+?)=/ # next qualifier
|
45
|
+
break unless buffer[i + 1]
|
46
|
+
value += " #{buffer[i + 1].lstrip}"
|
47
|
+
buffer.delete_at(i + 1)
|
48
|
+
end
|
49
|
+
# parse out quotes
|
50
|
+
quoted = false
|
51
|
+
if value =~ /^"(.+)"$/
|
52
|
+
value = Regexp.last_match[1]
|
53
|
+
quoted = true # some qualifier values are part of a controlled vocabulary and, as such, unquoted
|
54
|
+
end
|
55
|
+
# make sure sequence contains no spaces
|
56
|
+
if key == 'translation'
|
57
|
+
value.gsub!(/\s/, '');
|
58
|
+
end
|
59
|
+
# add new qualifier to feature
|
60
|
+
feature.qualifiers << Qualifier.new(:name => key, :value => value.squeeze(' '), :quoted => quoted)
|
61
|
+
elsif line =~ /^\s{21}\/(.+)$/ # qualifier name w/out value
|
62
|
+
key = Regexp.last_match[1]
|
63
|
+
feature.qualifiers << Qualifier.new(:name => key)
|
64
|
+
else
|
65
|
+
raise "UNKNOWN FEATURE LINE TYPE: #{line} -- #{i}"
|
66
|
+
end
|
67
|
+
end # end loop through buffer
|
68
|
+
feature
|
69
|
+
end
|
70
|
+
|
71
|
+
# string representation of Feature
|
72
|
+
def to_s
|
73
|
+
out = ''
|
74
|
+
out += ' ' * 5
|
75
|
+
out += feature_type.ljust(16)
|
76
|
+
out += location.to_s
|
77
|
+
qualifiers.each do |q|
|
78
|
+
out += q.to_s
|
79
|
+
end
|
80
|
+
out
|
81
|
+
end
|
82
|
+
end # end MgNu::Genbank::Feature class
|
83
|
+
end # end MgNu::Genbank class
|
84
|
+
end # end MgNu module
|