mgnu 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +0 -0
- data/README.md +31 -0
- data/Rakefile +33 -0
- data/lib/mgnu.rb +9 -0
- data/lib/mgnu/alignment.rb +143 -0
- data/lib/mgnu/common.rb +68 -0
- data/lib/mgnu/genbank.rb +117 -0
- data/lib/mgnu/genbank/feature.rb +84 -0
- data/lib/mgnu/genbank/location.rb +150 -0
- data/lib/mgnu/genbank/qualifier.rb +45 -0
- data/lib/mgnu/genbank/reference.rb +114 -0
- data/lib/mgnu/genbank/source.rb +39 -0
- data/lib/mgnu/loggable.rb +61 -0
- data/lib/mgnu/parser.rb +50 -0
- data/lib/mgnu/parser/blast.rb +87 -0
- data/lib/mgnu/parser/blast/format0.rb +290 -0
- data/lib/mgnu/parser/blast/format7.rb +121 -0
- data/lib/mgnu/parser/blast/format8.rb +120 -0
- data/lib/mgnu/parser/blast/hsp.rb +75 -0
- data/lib/mgnu/parser/blast/query.rb +45 -0
- data/lib/mgnu/parser/blast/sbjct.rb +62 -0
- data/lib/mgnu/parser/clustalw.rb +72 -0
- data/lib/mgnu/parser/fasta.rb +61 -0
- data/lib/mgnu/parser/fasta_header_index.rb +39 -0
- data/lib/mgnu/parser/fasta_index.rb +57 -0
- data/lib/mgnu/parser/fastq.rb +61 -0
- data/lib/mgnu/parser/genbank.rb +187 -0
- data/lib/mgnu/parser/gff.rb +56 -0
- data/lib/mgnu/parser/iprscan/hit.rb +76 -0
- data/lib/mgnu/parser/iprscan_file.rb +39 -0
- data/lib/mgnu/parser/kegg_ontology_index.rb +163 -0
- data/lib/mgnu/parser/pilercr.rb +102 -0
- data/lib/mgnu/parser/prodigal.rb +170 -0
- data/lib/mgnu/parser/sam.rb +115 -0
- data/lib/mgnu/parser/sam/alignment.rb +22 -0
- data/lib/mgnu/parser/sam/header.rb +23 -0
- data/lib/mgnu/parser/sam/pair.rb +18 -0
- data/lib/mgnu/sequence.rb +207 -0
- data/lib/mgnu/sequence/fasta.rb +79 -0
- data/lib/mgnu/sequence/fastq.rb +43 -0
- data/lib/mgnu/version.rb +16 -0
- data/mgnu.gemspec +39 -0
- data/spec/mgnu/parser/blast_format0_spec.rb +114 -0
- data/spec/mgnu/parser/blast_format7_spec.rb +24 -0
- data/spec/mgnu/parser/blast_format8_spec.rb +26 -0
- data/spec/mgnu/parser/blast_multihsp_spec.rb +100 -0
- data/spec/mgnu/parser/blast_oof_spec.rb +53 -0
- data/spec/mgnu/parser/clustalw_spec.rb +90 -0
- data/spec/mgnu/parser/fasta_header_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_index_tc_parser_spec.rb +25 -0
- data/spec/mgnu/parser/fasta_parser_spec.rb +53 -0
- data/spec/mgnu/parser_spec.rb +22 -0
- data/spec/mgnu/sequence/fasta_spec.rb +60 -0
- data/spec/mgnu/sequence/fastq_spec.rb +31 -0
- data/spec/mgnu/sequence_spec.rb +81 -0
- data/spec/mgnu_spec.rb +7 -0
- data/spec/spec_helper.rb +53 -0
- metadata +376 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'moneta'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module MgNu
|
5
|
+
module Parser
|
6
|
+
class FastaIndex
|
7
|
+
attr_reader :filename, :db_name, :db, :db_type
|
8
|
+
|
9
|
+
# create a new FastaIndex parser
|
10
|
+
def initialize(filename, options = {})
|
11
|
+
options = {
|
12
|
+
:db_type => :TokyoCabinet
|
13
|
+
}.merge!(options)
|
14
|
+
|
15
|
+
@db_type = options[:db_type]
|
16
|
+
|
17
|
+
@filename = filename
|
18
|
+
if @db_type == :TokyoCabinet
|
19
|
+
if @filename =~ /^.+\.tch$/
|
20
|
+
@db_name = @filename
|
21
|
+
else
|
22
|
+
@db_name = "#{@filename}.tch"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if db_type == :TokyoCabinet
|
27
|
+
@db = Moneta.new(:TokyoCabinet, file: @db_name, type: :hdb)
|
28
|
+
end
|
29
|
+
parse
|
30
|
+
end
|
31
|
+
|
32
|
+
# setup parse method for creating tokyo cabinet
|
33
|
+
def parse
|
34
|
+
MgNu::Parser::Fasta.new(@filename).each do |f|
|
35
|
+
name = f.header_name
|
36
|
+
description = f.header_description
|
37
|
+
@db[name] = { 'description' => description, 'sequence' => f.sequence }.to_json
|
38
|
+
end
|
39
|
+
end # end of #parse
|
40
|
+
|
41
|
+
def [](name)
|
42
|
+
f = nil
|
43
|
+
if @db.key?(name)
|
44
|
+
d = JSON.parse(@db[name])
|
45
|
+
f = MgNu::Sequence::Fasta.new(:header => "#{name} #{d['description']}",
|
46
|
+
:sequence => d['sequence'])
|
47
|
+
end
|
48
|
+
f
|
49
|
+
end
|
50
|
+
|
51
|
+
def close
|
52
|
+
@db.close unless @db.nil?
|
53
|
+
end
|
54
|
+
|
55
|
+
end # end of MgNu::Parser::FastaIndex class
|
56
|
+
end # end of MgNu::Parser module
|
57
|
+
end # end of MgNu module
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module MgNu
|
2
|
+
module Parser
|
3
|
+
class Fastq
|
4
|
+
include Enumerable
|
5
|
+
attr_reader :file, :filename
|
6
|
+
|
7
|
+
# create a new Fastq parser
|
8
|
+
def initialize(filename = nil)
|
9
|
+
@filename = filename
|
10
|
+
if @filename
|
11
|
+
if File.exists?(@filename) and File.readable?(@filename)
|
12
|
+
@file = File.open(@filename)
|
13
|
+
else
|
14
|
+
raise "\n\n -- No file by that name (#{@filename}). Exiting\n\n"
|
15
|
+
exit(1)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
$stderr.puts("MgNu::Parser::Fastq.new(): need an existing fastq file name")
|
19
|
+
exit(1)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# override enumerables
|
24
|
+
def each
|
25
|
+
while @file.eof != true # keep reading until EOF
|
26
|
+
header = @file.readline.chomp
|
27
|
+
sequence = @file.readline.chomp
|
28
|
+
qualhdr = @file.readline.chomp
|
29
|
+
quality = @file.readline.chomp
|
30
|
+
if header =~ /^@(.*)/
|
31
|
+
header = $1
|
32
|
+
if qualhdr =~ /^\+(.*)/
|
33
|
+
qualhdr = $1
|
34
|
+
else
|
35
|
+
error("Malformed quality header!")
|
36
|
+
error("\n#{qualhdr}")
|
37
|
+
error("\nExiting at line #{@file.lineno}")
|
38
|
+
exit(1)
|
39
|
+
end
|
40
|
+
if header != qualhdr
|
41
|
+
if qualhdr =~ /\s*/
|
42
|
+
qualhdr = header
|
43
|
+
else
|
44
|
+
warn("Sequence header and quality header don't match!")
|
45
|
+
warn("sequence: #{header}")
|
46
|
+
warn(" quality: #{qualhdr}")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
yield MgNu::Sequence::Fastq.new(:header => header, :sequence => sequence, :qualhdr => qualhdr, :quality => quality)
|
50
|
+
else
|
51
|
+
$stderr.puts "Malformed header!"
|
52
|
+
$stderr.puts "\n#{header}"
|
53
|
+
$stderr.puts "\nExiting at line #{@file.lineno}"
|
54
|
+
exit(1)
|
55
|
+
end
|
56
|
+
end # end of while @file.eof
|
57
|
+
end # end of #each
|
58
|
+
|
59
|
+
end # end of MgNu::Parser::Fasta class
|
60
|
+
end # end of MgNu::File module
|
61
|
+
end # end of MgNu module
|
@@ -0,0 +1,187 @@
|
|
1
|
+
module MgNu
|
2
|
+
module Parser
|
3
|
+
class Genbank
|
4
|
+
attr_reader :file
|
5
|
+
attr_accessor :genbank_instances
|
6
|
+
|
7
|
+
include MgNu::Loggable
|
8
|
+
include MgNu::Parser
|
9
|
+
|
10
|
+
InvalidGenbankFile = Class.new(StandardError)
|
11
|
+
|
12
|
+
LOCUS_REGEX = /^LOCUS\s+(\S+)\s+(\d+)\s+bp\s+(?:(ss-|ds-|ms-))?(\S+)\s+(?:(\S+)\s+)?(\S+)\s+(\S+)$/
|
13
|
+
|
14
|
+
# create a new Genbank parser
|
15
|
+
def initialize(filename)
|
16
|
+
@genbank_instances = []
|
17
|
+
|
18
|
+
if filename
|
19
|
+
if File.exists?(filename) and File.readable?(filename)
|
20
|
+
@file = File.open(filename)
|
21
|
+
else
|
22
|
+
error("MgNu::Parser::Genbank#parse: problems with filename")
|
23
|
+
raise "File doesn't exist or is not readable!"
|
24
|
+
end
|
25
|
+
else
|
26
|
+
error("MgNu::Parser::Genbank#parse: need a filename")
|
27
|
+
raise "no filename given!"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse(debug=false)
|
32
|
+
@debug = debug
|
33
|
+
# parse_header # also triggers parsing of everything else
|
34
|
+
until file.eof? do
|
35
|
+
parse_section
|
36
|
+
end
|
37
|
+
genbank_instances
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_section
|
41
|
+
locus_line = file.readline
|
42
|
+
if md = locus_line.match(LOCUS_REGEX)
|
43
|
+
genbank = MgNu::Genbank.new
|
44
|
+
info("found a LOCUS line") if @debug
|
45
|
+
genbank.locus = MgNu::Genbank::Locus.new(*md.captures)
|
46
|
+
info("LOCUS name #{genbank.locus.name}") if @debug
|
47
|
+
|
48
|
+
buffer = parse_until(file, /^ACCESSION/)
|
49
|
+
if buffer.join =~ /^DEFINITION\s+(.+)$/m
|
50
|
+
genbank.definition = $1.gsub(/\n/, ' ').gsub(/\s{2,}/, ' ').strip.chop
|
51
|
+
info genbank.definition if @debug
|
52
|
+
end
|
53
|
+
|
54
|
+
buffer = parse_until(file, /^VERSION/)
|
55
|
+
# parsing ACESSION number line
|
56
|
+
if buffer.join =~ /^ACCESSION\s+(.+)$/
|
57
|
+
temp = $1.strip.squeeze(' ').split("\s")
|
58
|
+
# multiple secondary accession numbers possible
|
59
|
+
genbank.accession, genbank.secondary_accession = temp.shift, temp
|
60
|
+
end
|
61
|
+
info "ACCESSION: #{genbank.accession}" if @debug
|
62
|
+
|
63
|
+
buffer = parse_until(file, /^KEYWORDS/)
|
64
|
+
# parsing VERSION line
|
65
|
+
buffer.each do |line|
|
66
|
+
if line =~ /^VERSION\s+(.+)$/
|
67
|
+
temp = $1.strip.squeeze(' ').split
|
68
|
+
temp.each do |version|
|
69
|
+
if version =~ /GI:(\d+)/
|
70
|
+
genbank.geninfo_identifier = $1.to_i
|
71
|
+
else
|
72
|
+
genbank.version = version
|
73
|
+
end
|
74
|
+
end
|
75
|
+
elsif line =~ /^DBLINK\s+(.+)$/
|
76
|
+
genbank.dblink = $1.strip.squeeze(' ')
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
buffer = parse_until(file, /^SOURCE/)
|
81
|
+
|
82
|
+
# parse keywords and optional segment
|
83
|
+
keyword_lines = []
|
84
|
+
buffer.each do |line|
|
85
|
+
if line =~ /^KEYWORDS\s+(.+)$/
|
86
|
+
keyword_lines << $1.strip.squeeze(' ')
|
87
|
+
elsif line =~ /^SEGMENT\s+(.+)$/
|
88
|
+
genbank.segment = $1.strip.squeeze(' ')
|
89
|
+
else
|
90
|
+
keyword_lines << line
|
91
|
+
end
|
92
|
+
end
|
93
|
+
k = keyword_lines.join
|
94
|
+
unless k == "."
|
95
|
+
k_array = k.split(/;\s*/) # keywords are separated by semicolons
|
96
|
+
k_array[-1].chop! # gets rid of the period after the last keyword
|
97
|
+
genbank.keywords = k_array
|
98
|
+
end
|
99
|
+
|
100
|
+
buffer = parse_until(file,/^FEATURES/)
|
101
|
+
|
102
|
+
ri = buffer.index {|l| l =~ /^REFERENCE/ }
|
103
|
+
ci = buffer.index {|l| l =~ /^COMMENT/ }
|
104
|
+
|
105
|
+
if ri && ci
|
106
|
+
genbank.source = MgNu::Genbank::Source.parse(buffer[0..ri-1])
|
107
|
+
parse_references(buffer[ri..ci-1], genbank)
|
108
|
+
genbank.comment = buffer[ci..-1].map{|line| line.gsub(/^COMMENT/, '').lstrip!.squeeze(' ')}.join("\n")
|
109
|
+
elsif ri
|
110
|
+
genbank.source = MgNu::Genbank::Source.parse(buffer[0..ri-1])
|
111
|
+
parse_references(buffer[ri..-1], genbank)
|
112
|
+
elsif ci
|
113
|
+
genbank.source = MgNu::Genbank::Source.parse(buffer[0..ci-1])
|
114
|
+
genbank.comment = buffer[ci..-1].map{|line| line.gsub(/^COMMENT/, '').lstrip!.squeeze(' ')}.join("\n")
|
115
|
+
else
|
116
|
+
# neither references nor comment line
|
117
|
+
genbank.source = MgNu::Genbank::Source.parse(buffer)
|
118
|
+
end
|
119
|
+
|
120
|
+
info genbank.source.common_name if @debug
|
121
|
+
info genbank.source.organism if @debug
|
122
|
+
info genbank.source.lineage if @debug
|
123
|
+
|
124
|
+
parse_features(parse_until(file, /^ORIGIN/), genbank)
|
125
|
+
info "features count: #{genbank.features.length}" if @debug
|
126
|
+
|
127
|
+
parse_sequence(parse_until(file, /\/\//), genbank)
|
128
|
+
info "sequence length: #{genbank.sequence.try(:length) || 0}" if @debug
|
129
|
+
file.readline # consumes end of section line //
|
130
|
+
genbank_instances << genbank
|
131
|
+
else
|
132
|
+
unless locus_line =~ /^\s*$/
|
133
|
+
raise InvalidGenbankFile, "Missing or malformed LOCUS line."
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse_features(buffer, genbank)
|
139
|
+
buffer.shift if buffer[0] =~ /^FEATURES/
|
140
|
+
all_features = split_at_features(buffer.join("\n"))
|
141
|
+
|
142
|
+
all_features.each do |feature_str|
|
143
|
+
genbank.features << MgNu::Genbank::Feature.parse(feature_str)
|
144
|
+
end
|
145
|
+
end # end parse_features
|
146
|
+
|
147
|
+
def parse_references(buffer, genbank)
|
148
|
+
ref_array = split_at_header_tag(buffer.join("\n"))
|
149
|
+
ref_array.each do |ref|
|
150
|
+
genbank.references << MgNu::Genbank::Reference.parse(ref)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_sequence(buffer, genbank)
|
155
|
+
buffer.shift # drop ORIGIN line
|
156
|
+
info("inside parse_sequence") if @debug
|
157
|
+
info("buffer is #{buffer.length}") if @debug
|
158
|
+
|
159
|
+
unless buffer.empty?
|
160
|
+
seq = ""
|
161
|
+
bigstr = buffer.join
|
162
|
+
seq = bigstr.gsub(/[\d\s]+/, "")
|
163
|
+
genbank.sequence = MgNu::Sequence.new(:value => seq)
|
164
|
+
genbank.features.each do |f|
|
165
|
+
f.sequence = f.location.get_sequence(genbank.sequence.value)
|
166
|
+
end
|
167
|
+
else
|
168
|
+
genbank.sequence = nil
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# splits at lines beginning with capital letter and no preceding space chars
|
173
|
+
def split_at_header_tag(str)
|
174
|
+
sep = "\001"
|
175
|
+
str.gsub(/\n([A-Z])/, "\n#{sep}\\1").split(sep)
|
176
|
+
end
|
177
|
+
|
178
|
+
def split_at_features(str)
|
179
|
+
sep = "\001"
|
180
|
+
str.gsub(/\n(\s{5}\S)/, "\n#{sep}\\1").split(sep)
|
181
|
+
end
|
182
|
+
|
183
|
+
end # end of MgNu::Parser::Genbank class
|
184
|
+
end # end of MgNu::Parser module
|
185
|
+
end # end of MgNu module
|
186
|
+
|
187
|
+
__END__
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module MgNu
|
2
|
+
module Parser
|
3
|
+
class GFF
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :file
|
7
|
+
|
8
|
+
# create a new GFF parser
|
9
|
+
def initialize(filename = nil)
|
10
|
+
if filename
|
11
|
+
if File.exists?(filename) and File.readable?(filename)
|
12
|
+
@file = File.open(filename)
|
13
|
+
else
|
14
|
+
@file = File.new(filename, "w")
|
15
|
+
end
|
16
|
+
else
|
17
|
+
error("MgNu::Parser::GFF.new(): need a filename for an existing file")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# override enumerables
|
22
|
+
def each
|
23
|
+
@file.each_line do |line|
|
24
|
+
line.chomp!
|
25
|
+
next if line =~ /^#/
|
26
|
+
yield Record.new(line)
|
27
|
+
end
|
28
|
+
end # end of #each
|
29
|
+
|
30
|
+
# class to deal with each line (record) of data
|
31
|
+
class Record
|
32
|
+
attr_accessor :name, :source, :feature, :start, :end
|
33
|
+
attr_accessor :score, :strand, :frame, :attributes
|
34
|
+
|
35
|
+
def initialize(line)
|
36
|
+
@name, @source, @feature, @start, @end,
|
37
|
+
@score, @strand, @frame, @attributes = line.split("\t")
|
38
|
+
@attributes = parse_attributes(attributes) if attributes
|
39
|
+
end
|
40
|
+
|
41
|
+
alias :seqname :name
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def parse_attributes(attributes)
|
46
|
+
hash = Hash.new
|
47
|
+
attributes.split(/[^\\];/).each do |atr|
|
48
|
+
key, value = atr.split(' ', 2)
|
49
|
+
hash[key] = value
|
50
|
+
end
|
51
|
+
hash
|
52
|
+
end
|
53
|
+
end # end of MgNu::Parser::GFF::Record class
|
54
|
+
end # end of MgNu::Parser::GFF class
|
55
|
+
end # end of MgNu::Parser module
|
56
|
+
end # end of MgNu module
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module MgNu
|
2
|
+
module Parser
|
3
|
+
class Iprscan
|
4
|
+
class Hit
|
5
|
+
attr_accessor :query, :crc, :length, :db, :db_id, :db_description
|
6
|
+
attr_accessor :from, :to, :evalue, :status, :date
|
7
|
+
attr_accessor :ipr_id, :ipr_description, :go
|
8
|
+
|
9
|
+
include MgNu::Loggable
|
10
|
+
|
11
|
+
# create a new Hit object
|
12
|
+
def initialize(line = nil)
|
13
|
+
@ipr_id = nil
|
14
|
+
@ipr_description = nil
|
15
|
+
@go = nil
|
16
|
+
|
17
|
+
line.chomp!
|
18
|
+
temp = line.split(/\t/)
|
19
|
+
@query = temp.shift
|
20
|
+
@crc = temp.shift
|
21
|
+
@length = temp.shift.to_i
|
22
|
+
@db = temp.shift
|
23
|
+
@db_id = temp.shift
|
24
|
+
@db_description = temp.shift
|
25
|
+
@from = temp.shift.to_i
|
26
|
+
@to = temp.shift.to_i
|
27
|
+
@evalue = temp.shift.to_f
|
28
|
+
if @db == "Seg" or @db == "TMHMM" or @db == "Coil"
|
29
|
+
@evalue = "NA"
|
30
|
+
end
|
31
|
+
@status = temp.shift
|
32
|
+
@date = temp.shift
|
33
|
+
if temp.length > 0
|
34
|
+
@ipr_id = temp.shift
|
35
|
+
if temp.length > 0
|
36
|
+
@ipr_description = temp.shift
|
37
|
+
if temp.length > 0
|
38
|
+
@go = temp.shift
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
str = "#{@query}\t#{@crc}\t#{@length}\t#{@db}\t#{@db_id}\t#{@db_description}\t"
|
47
|
+
str += "#{@from}\t#{@to}\t#{@evalue}\t#{@status}\t#{@date}"
|
48
|
+
unless @ipr_id.nil?
|
49
|
+
str += "\t#{@ipr_id}\t#{@ipr_description}"
|
50
|
+
unless @go.nil?
|
51
|
+
str += "\t#{@go}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
str
|
55
|
+
end
|
56
|
+
|
57
|
+
def match_length
|
58
|
+
@from < @to ? @to - @from : @from - @to
|
59
|
+
end
|
60
|
+
|
61
|
+
def summary
|
62
|
+
string = "#{@db_description} (db=#{@db} db_id=#{@db_id}"
|
63
|
+
string += " from=#{@from} to=#{@to}"
|
64
|
+
string += " evalue=#{@evalue}" unless db == "Seg" or db == "TMHMM"
|
65
|
+
string += " interpro_id=#{@ipr_id} interpro_description=#{@ipr_description}" unless @ipr_id == "NULL"
|
66
|
+
string += " GO=#{@go}" unless @go.nil?
|
67
|
+
string += ")"
|
68
|
+
string
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end # end of MgNu::Parser::Iprscan::Hit class
|
73
|
+
end # end of MgNu::Parser module
|
74
|
+
end # end of MgNu module
|
75
|
+
|
76
|
+
__END__
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'mgnu/parser/iprscan/hit'
|
2
|
+
|
3
|
+
module MgNu
|
4
|
+
module Parser
|
5
|
+
class IprscanFile
|
6
|
+
attr_reader :file, :queries
|
7
|
+
|
8
|
+
include MgNu::Loggable
|
9
|
+
|
10
|
+
def initialize(filename = nil)
|
11
|
+
if filename
|
12
|
+
if File.exists?(filename) and File.readable?(filename)
|
13
|
+
@file = File.open(filename)
|
14
|
+
else
|
15
|
+
error("MgNu::Parser::IprscanFile.new(): problems with filename")
|
16
|
+
raise "File doesn't exist or is not readable!"
|
17
|
+
end
|
18
|
+
else
|
19
|
+
error("MgNu::Parser::IprscanFile.new(): need a filename")
|
20
|
+
raise "no filename given!"
|
21
|
+
end
|
22
|
+
|
23
|
+
@queries = Hash.new
|
24
|
+
|
25
|
+
parse
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
@file.each do |line|
|
30
|
+
line.chomp!
|
31
|
+
hit = MgNu::Parser::Iprscan::Hit.new(line)
|
32
|
+
@queries.has_key?(hit.query) ? @queries[hit.query] << hit : @queries[hit.query] = [ hit ]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end # end of MgNu::Parser::IprscanFile class
|
36
|
+
end # end of MgNu::Parser module
|
37
|
+
end # end of MgNu module
|
38
|
+
|
39
|
+
__END__
|