unipept 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/peptfilter +5 -0
- data/bin/prot2pept +23 -2
- data/bin/unipept +43 -190
- data/lib/unipept/api_runner.rb +217 -0
- data/lib/unipept/formatters.rb +23 -6
- data/lib/unipept.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f20b5baa54b98b05f96b2ed54cdcc4bc6772021
|
4
|
+
data.tar.gz: 0fdb2fc4944c5bfbe8ad76395531b5414d054f22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 182a1babb95902e0cb74d08494748940d02fb77f0c502086eec19809ed2c4b2b8af5a9f190041efb8ac7e30614230971bd5e2e53aec6770587888543b128eafc
|
7
|
+
data.tar.gz: fde9797bcdf7be09258b8010a39dd91b686732c2bb647b1488f51bac96a9c66efddf1061ab1919b35c90f3eb76334f67b84e8a707c26a7dcc630c3e1020053a0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bin/peptfilter
CHANGED
@@ -16,6 +16,11 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
16
16
|
lacks = opts.fetch(:lacks, "").chars.to_a
|
17
17
|
contains = opts.fetch(:contains, "").chars.to_a
|
18
18
|
$stdin.each_line do |pept|
|
19
|
+
# FASTA headers
|
20
|
+
if pept.start_with? '>'
|
21
|
+
puts pept
|
22
|
+
next
|
23
|
+
end
|
19
24
|
pept = pept.chomp
|
20
25
|
length_ok = pept.length >= minlen && pept.length <= maxlen
|
21
26
|
lacks_ok = (pept.chars.to_a & lacks).size == 0
|
data/bin/prot2pept
CHANGED
@@ -8,8 +8,29 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
8
8
|
required :p, :pattern, "cleavage pattern to split input protein (default: ([KR])([^P]))"
|
9
9
|
run do |opts, args, cmd|
|
10
10
|
pattern = opts.fetch(:pattern, "([KR])([^P])")
|
11
|
-
|
12
|
-
|
11
|
+
# decide if we have FASTA input
|
12
|
+
fasta_header = gets
|
13
|
+
if fasta_header.start_with? '>'
|
14
|
+
# fasta input, need to join lines
|
15
|
+
while !$stdin.eof?
|
16
|
+
prot = ""
|
17
|
+
# Sometimes you just got to accept this weird and ugly code
|
18
|
+
until $stdin.eof? || (line = gets).start_with?('>')
|
19
|
+
prot += line.chomp
|
20
|
+
end
|
21
|
+
puts fasta_header
|
22
|
+
puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
23
|
+
|
24
|
+
fasta_header = line
|
25
|
+
end
|
26
|
+
else
|
27
|
+
# handle our already read line
|
28
|
+
puts fasta_header.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
29
|
+
|
30
|
+
# we no longer have to join lines as input is now more sane
|
31
|
+
$stdin.each_line do |prot|
|
32
|
+
puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
33
|
+
end
|
13
34
|
end
|
14
35
|
end
|
15
36
|
end
|
data/bin/unipept
CHANGED
@@ -10,213 +10,51 @@ require_relative '../lib/unipept'
|
|
10
10
|
|
11
11
|
Signal.trap("PIPE", "EXIT")
|
12
12
|
Signal.trap("INT", "EXIT")
|
13
|
-
class ApiRunner < Cri::CommandRunner
|
14
|
-
|
15
|
-
def initialize(args, opts, cmd)
|
16
|
-
super
|
17
|
-
@configuration = Unipept::Configuration.new
|
18
|
-
host = @configuration['host']
|
19
|
-
if host.nil? || host.empty?
|
20
|
-
puts "WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`"
|
21
|
-
exit 1
|
22
|
-
end
|
23
|
-
if !host.start_with? "http://"
|
24
|
-
host = "http://#{host}"
|
25
|
-
end
|
26
13
|
|
27
|
-
|
28
|
-
|
29
|
-
end
|
14
|
+
module Unipept
|
15
|
+
class Taxa2lca < ApiRunner
|
30
16
|
|
31
|
-
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
def input_iterator
|
36
|
-
if options[:input]
|
37
|
-
File.readlines(options[:input]).each
|
38
|
-
else
|
39
|
-
STDIN.each_line
|
17
|
+
def mapping
|
18
|
+
{"taxa2lca" => "taxa2lca"}
|
40
19
|
end
|
41
|
-
end
|
42
20
|
|
43
|
-
|
44
|
-
|
45
|
-
|
21
|
+
def peptide_iterator(peptides, &block)
|
22
|
+
block.call(peptides.to_a, 0)
|
23
|
+
end
|
46
24
|
|
47
|
-
|
48
|
-
|
49
|
-
if filter.empty?
|
50
|
-
names = true
|
51
|
-
else
|
52
|
-
names = filter.any? {|f| /.*name.*/.match f}
|
25
|
+
def batch_size
|
26
|
+
raise "NOT NEEDED FOR TAXA2LCA"
|
53
27
|
end
|
54
|
-
|
55
|
-
:equate_il => options[:equate],
|
56
|
-
:extra => options[:extra],
|
57
|
-
:names => names,
|
58
|
-
}
|
28
|
+
|
59
29
|
end
|
60
30
|
|
61
|
-
|
62
|
-
return if options[:quiet]
|
63
|
-
return unless STDOUT.tty?
|
64
|
-
last_fetched = @configuration['last_fetch_date']
|
65
|
-
if last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now
|
66
|
-
version = File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
67
|
-
puts Typhoeus.get(@message_url, params: {version: version}).body
|
31
|
+
class Pept2prot < ApiRunner
|
68
32
|
|
69
|
-
|
70
|
-
|
33
|
+
def mapping
|
34
|
+
{"pept2prot" => "pept2prot"}
|
71
35
|
end
|
72
|
-
end
|
73
36
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
filter_list = options[:select] ? options[:select] : []
|
81
|
-
filter_list = filter_list.map {|f| glob_to_regex(f) }
|
82
|
-
output = STDOUT.tty? ? STDOUT : STDERR
|
83
|
-
|
84
|
-
batch_order = Unipept::BatchOrder.new
|
85
|
-
|
86
|
-
printed_header = false
|
87
|
-
result = []
|
88
|
-
|
89
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: 20)
|
90
|
-
num_req = 0
|
91
|
-
|
92
|
-
peptide_iterator(peptides) do |sub_division, i|
|
93
|
-
request = Typhoeus::Request.new(
|
94
|
-
@url,
|
95
|
-
method: :post,
|
96
|
-
body: url_options(sub_division),
|
97
|
-
accept_encoding: "gzip"
|
98
|
-
)
|
99
|
-
request.on_complete do |resp|
|
100
|
-
if resp.timed_out?
|
101
|
-
$stderr.puts "request timed out, continuing anyway, but results might be incomplete"
|
102
|
-
else
|
103
|
-
if resp.success?
|
104
|
-
# if JSON parsing goes wrong
|
105
|
-
sub_result = JSON[resp.response_body] rescue []
|
106
|
-
sub_result = [sub_result] if not sub_result.kind_of? Array
|
107
|
-
|
108
|
-
sub_result.map! {|r| r.select! {|k,v| filter_list.any? {|f| f.match k } } } if ! filter_list.empty?
|
109
|
-
|
110
|
-
if options[:xml]
|
111
|
-
result << sub_result
|
112
|
-
end
|
113
|
-
|
114
|
-
# wait till it's our turn to write
|
115
|
-
batch_order.wait(i) do
|
116
|
-
if ! sub_result.empty?
|
117
|
-
if ! printed_header
|
118
|
-
write_to_output formatter.header(sub_result)
|
119
|
-
printed_header = true
|
120
|
-
end
|
121
|
-
write_to_output formatter.format(sub_result)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
else
|
125
|
-
path = File.expand_path(File.join(Dir.home, "unipept.log"))
|
126
|
-
File.open(path, "w") do |f|
|
127
|
-
f.write resp.response_body
|
128
|
-
end
|
129
|
-
$stderr.puts "API request failed! log can be found in #{path}"
|
37
|
+
def download_xml(result)
|
38
|
+
if options[:xml]
|
39
|
+
FileUtils.mkdir_p(options[:xml])
|
40
|
+
result.first.each do |prot|
|
41
|
+
File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f|
|
42
|
+
f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body
|
130
43
|
end
|
131
44
|
end
|
132
45
|
end
|
133
|
-
hydra.queue request
|
134
|
-
|
135
|
-
num_req += 1
|
136
|
-
if num_req % 200 == 0
|
137
|
-
hydra.run
|
138
|
-
end
|
139
|
-
|
140
46
|
end
|
141
47
|
|
142
|
-
|
143
|
-
|
144
|
-
begin
|
145
|
-
download_xml(result)
|
146
|
-
rescue
|
147
|
-
STDERR.puts "Something went wrong while downloading xml information! please check the output"
|
48
|
+
def batch_size
|
49
|
+
10
|
148
50
|
end
|
149
|
-
|
150
|
-
end
|
151
|
-
|
152
|
-
def write_to_output(string)
|
153
|
-
if options[:output]
|
154
|
-
File.open(options[:output], 'a') do |f|
|
155
|
-
f.write string
|
156
|
-
end
|
157
|
-
else
|
158
|
-
puts string
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
def download_xml(result)
|
164
|
-
if options[:xml]
|
165
|
-
File.open(options[:xml] + ".xml", "wb") do |f|
|
166
|
-
f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map{|h| h['taxon_id'] }.join(",")}&retmode=xml").response_body
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
def peptide_iterator(peptides, &block)
|
172
|
-
peptides.each_slice(batch_size).with_index(&block)
|
173
|
-
end
|
174
|
-
|
175
|
-
private
|
176
|
-
|
177
|
-
def glob_to_regex(glob_string)
|
178
|
-
# only implement * -> . for now
|
179
|
-
Regexp.new glob_string.gsub("*", ".*")
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
class Taxa2lca < ApiRunner
|
184
|
-
|
185
|
-
def mapping
|
186
|
-
{"taxa2lca" => "taxa2lca"}
|
187
|
-
end
|
188
|
-
|
189
|
-
def peptide_iterator(peptides, &block)
|
190
|
-
block.call(peptides.to_a, 0)
|
191
|
-
end
|
192
|
-
|
193
|
-
def batch_size
|
194
|
-
raise "NOT NEEDED FOR TAXA2LCA"
|
195
51
|
end
|
196
52
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
def mapping
|
202
|
-
{"pept2prot" => "pept2prot"}
|
203
|
-
end
|
204
|
-
|
205
|
-
def download_xml(result)
|
206
|
-
if options[:xml]
|
207
|
-
FileUtils.mkdir_p(options[:xml])
|
208
|
-
result.first.each do |prot|
|
209
|
-
File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f|
|
210
|
-
f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body
|
211
|
-
end
|
212
|
-
end
|
53
|
+
class Taxonomy < ApiRunner
|
54
|
+
def mapping
|
55
|
+
{"taxonomy" => "taxonomy"}
|
213
56
|
end
|
214
57
|
end
|
215
|
-
|
216
|
-
def batch_size
|
217
|
-
10
|
218
|
-
end
|
219
|
-
|
220
58
|
end
|
221
59
|
|
222
60
|
root_cmd = Cri::Command.new_basic_root.modify do
|
@@ -227,6 +65,9 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
227
65
|
option :o, :output, "output file", :argument => :required
|
228
66
|
option :f, :format, "output format (available: #{Unipept::Formatter.available.join "," }) (default: #{Unipept::Formatter.default})", :argument => :required
|
229
67
|
|
68
|
+
# Configuration options
|
69
|
+
option nil, "config-host", "Override host setting", argument: :required
|
70
|
+
|
230
71
|
run do |opts, args, cmd|
|
231
72
|
if opts[:version]
|
232
73
|
puts File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
@@ -260,7 +101,7 @@ root_cmd.define_command('pept2taxa') do
|
|
260
101
|
option :a, :extra, "Show full lineage"
|
261
102
|
option :x, :xml, "Download taxonomy from NCBI as xml (specify output filename)", :argument => :required
|
262
103
|
|
263
|
-
runner ApiRunner
|
104
|
+
runner Unipept::ApiRunner
|
264
105
|
end
|
265
106
|
|
266
107
|
root_cmd.define_command('pept2lca') do
|
@@ -273,7 +114,7 @@ root_cmd.define_command('pept2lca') do
|
|
273
114
|
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
274
115
|
option :a, :extra, "Show full lineage"
|
275
116
|
|
276
|
-
runner ApiRunner
|
117
|
+
runner Unipept::ApiRunner
|
277
118
|
end
|
278
119
|
|
279
120
|
root_cmd.define_command('taxa2lca') do
|
@@ -285,7 +126,7 @@ root_cmd.define_command('taxa2lca') do
|
|
285
126
|
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
286
127
|
option :a, :extra, "Show full lineage"
|
287
128
|
|
288
|
-
runner Taxa2lca
|
129
|
+
runner Unipept::Taxa2lca
|
289
130
|
end
|
290
131
|
|
291
132
|
root_cmd.define_command('pept2prot') do
|
@@ -299,7 +140,19 @@ root_cmd.define_command('pept2prot') do
|
|
299
140
|
option :x, :xml, "download uniprot record in specified directory", :argument => :required
|
300
141
|
flag :a, :extra, "include all information. WARNING: will take much longer!"
|
301
142
|
|
302
|
-
runner Pept2prot
|
143
|
+
runner Unipept::Pept2prot
|
144
|
+
end
|
145
|
+
|
146
|
+
root_cmd.define_command('taxonomy') do
|
147
|
+
usage 'taxonomy [options]'
|
148
|
+
aliases :tax
|
149
|
+
summary 'Give NCBI taxonomy information on given input taxon ids'
|
150
|
+
description 'Returns information for each input taxon id'
|
151
|
+
|
152
|
+
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
153
|
+
flag :a, :extra, "include all information. WARNING: will take much longer!"
|
154
|
+
|
155
|
+
runner Unipept::Taxonomy
|
303
156
|
end
|
304
157
|
|
305
158
|
root_cmd.run(ARGV)
|
@@ -0,0 +1,217 @@
|
|
1
|
+
module Unipept
|
2
|
+
class ApiRunner < Cri::CommandRunner
|
3
|
+
|
4
|
+
def initialize(args, opts, cmd)
|
5
|
+
super
|
6
|
+
@configuration = Unipept::Configuration.new
|
7
|
+
|
8
|
+
set_configuration
|
9
|
+
|
10
|
+
@url = "#{@host}/api/v1/#{mapping[cmd.name]}.json"
|
11
|
+
@message_url = "#{@host}/api/v1/messages.json"
|
12
|
+
end
|
13
|
+
|
14
|
+
def set_configuration
|
15
|
+
# find host in opts first
|
16
|
+
if options[:'config-host']
|
17
|
+
host = options[:'config-host']
|
18
|
+
else
|
19
|
+
host = @configuration['host']
|
20
|
+
end
|
21
|
+
|
22
|
+
# No host has been set?
|
23
|
+
if host.nil? || host.empty?
|
24
|
+
puts "WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`"
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
if !host.start_with? "http://"
|
28
|
+
host = "http://#{host}"
|
29
|
+
end
|
30
|
+
|
31
|
+
@host = host
|
32
|
+
end
|
33
|
+
|
34
|
+
def mapping
|
35
|
+
{'pept2taxa' => 'pept2taxa', 'pept2lca' => 'pept2lca'}
|
36
|
+
end
|
37
|
+
|
38
|
+
def input_iterator
|
39
|
+
# Argument over file input over stdin
|
40
|
+
if !arguments.empty?
|
41
|
+
arguments.each
|
42
|
+
else
|
43
|
+
if options[:input]
|
44
|
+
IO.foreach(options[:input])
|
45
|
+
else
|
46
|
+
STDIN.each_line
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def batch_size
|
52
|
+
100
|
53
|
+
end
|
54
|
+
|
55
|
+
def url_options(sub_part)
|
56
|
+
filter = options[:select] ? options[:select] : []
|
57
|
+
if filter.empty?
|
58
|
+
names = true
|
59
|
+
else
|
60
|
+
names = filter.any? {|f| /.*name.*/.match f}
|
61
|
+
end
|
62
|
+
{:input => sub_part,
|
63
|
+
:equate_il => options[:equate],
|
64
|
+
:extra => options[:extra],
|
65
|
+
:names => names,
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_server_message
|
70
|
+
return if options[:quiet]
|
71
|
+
return unless STDOUT.tty?
|
72
|
+
last_fetched = @configuration['last_fetch_date']
|
73
|
+
if last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now
|
74
|
+
version = File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
75
|
+
puts Typhoeus.get(@message_url, params: {version: version}).body
|
76
|
+
|
77
|
+
@configuration['last_fetch_date'] = Time.now
|
78
|
+
@configuration.save
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def run
|
83
|
+
get_server_message
|
84
|
+
|
85
|
+
formatter = Unipept::Formatter.new_for_format(options[:format])
|
86
|
+
peptides = input_iterator
|
87
|
+
|
88
|
+
filter_list = options[:select] ? options[:select] : []
|
89
|
+
filter_list = filter_list.map {|f| glob_to_regex(f) }
|
90
|
+
output = STDOUT.tty? ? STDOUT : STDERR
|
91
|
+
|
92
|
+
batch_order = Unipept::BatchOrder.new
|
93
|
+
|
94
|
+
printed_header = false
|
95
|
+
result = []
|
96
|
+
|
97
|
+
hydra = Typhoeus::Hydra.new(max_concurrency: 10)
|
98
|
+
num_req = 0
|
99
|
+
|
100
|
+
peptide_iterator(peptides) do |sub_division, i, fasta_mapper|
|
101
|
+
request = Typhoeus::Request.new(
|
102
|
+
@url,
|
103
|
+
method: :post,
|
104
|
+
body: url_options(sub_division),
|
105
|
+
accept_encoding: "gzip"
|
106
|
+
)
|
107
|
+
request.on_complete do |resp|
|
108
|
+
if resp.timed_out?
|
109
|
+
$stderr.puts "request timed out, continuing anyway, but results might be incomplete"
|
110
|
+
else
|
111
|
+
if resp.success?
|
112
|
+
# if JSON parsing goes wrong
|
113
|
+
sub_result = JSON[resp.response_body] rescue []
|
114
|
+
sub_result = [sub_result] if not sub_result.kind_of? Array
|
115
|
+
|
116
|
+
sub_result.map! {|r| r.select! {|k,v| filter_list.any? {|f| f.match k } } } if ! filter_list.empty?
|
117
|
+
|
118
|
+
if options[:xml]
|
119
|
+
result << sub_result
|
120
|
+
end
|
121
|
+
|
122
|
+
# wait till it's our turn to write
|
123
|
+
batch_order.wait(i) do
|
124
|
+
if ! sub_result.empty?
|
125
|
+
if ! printed_header
|
126
|
+
write_to_output formatter.header(sub_result, fasta_mapper)
|
127
|
+
printed_header = true
|
128
|
+
end
|
129
|
+
write_to_output formatter.format(sub_result, fasta_mapper)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
else
|
133
|
+
save_error(resp.response_body)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
hydra.queue request
|
138
|
+
|
139
|
+
num_req += 1
|
140
|
+
if num_req % 200 == 0
|
141
|
+
hydra.run
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
hydra.run
|
147
|
+
|
148
|
+
begin
|
149
|
+
download_xml(result)
|
150
|
+
rescue
|
151
|
+
STDERR.puts "Something went wrong while downloading xml information! please check the output"
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
def save_error(message)
|
157
|
+
path = File.expand_path(File.join(Dir.home, ".unipept", "unipept-#{Time.now.strftime("%F-%T")}.log"))
|
158
|
+
FileUtils.mkdir_p File.dirname(path)
|
159
|
+
File.open(path, "w") do |f|
|
160
|
+
f.write message
|
161
|
+
end
|
162
|
+
$stderr.puts "API request failed! log can be found in #{path}"
|
163
|
+
end
|
164
|
+
|
165
|
+
def write_to_output(string)
|
166
|
+
if options[:output]
|
167
|
+
File.open(options[:output], 'a') do |f|
|
168
|
+
f.write string
|
169
|
+
end
|
170
|
+
else
|
171
|
+
puts string
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
def download_xml(result)
|
177
|
+
if options[:xml]
|
178
|
+
File.open(options[:xml] + ".xml", "wb") do |f|
|
179
|
+
f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map{|h| h['taxon_id'] }.join(",")}&retmode=xml").response_body
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def peptide_iterator(peptides, &block)
|
185
|
+
first = peptides.first
|
186
|
+
if first.start_with? '>'
|
187
|
+
# FASTA MODE ENGAGED
|
188
|
+
fasta_header = first
|
189
|
+
peptides.each_slice(batch_size).with_index do |sub,i|
|
190
|
+
fasta_mapper = {}
|
191
|
+
sub.map! {|s| s.chomp}
|
192
|
+
j = 0
|
193
|
+
while j < sub.size
|
194
|
+
if sub[j].start_with? '>'
|
195
|
+
fasta_header = sub[j]
|
196
|
+
else
|
197
|
+
fasta_mapper[sub[j]] = fasta_header
|
198
|
+
end
|
199
|
+
j += 1
|
200
|
+
end
|
201
|
+
sub -= fasta_mapper.values.uniq
|
202
|
+
block.call(sub, i, fasta_mapper)
|
203
|
+
end
|
204
|
+
|
205
|
+
else
|
206
|
+
peptides.each_slice(batch_size).with_index(&block)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
def glob_to_regex(glob_string)
|
213
|
+
# only implement * -> . for now
|
214
|
+
Regexp.new glob_string.gsub("*", ".*")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/lib/unipept/formatters.rb
CHANGED
@@ -30,7 +30,7 @@ module Unipept
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# JSON formatted data goes in, something other comes out
|
33
|
-
def format(data)
|
33
|
+
def format(data, fasta_mapper = nil)
|
34
34
|
data
|
35
35
|
end
|
36
36
|
end
|
@@ -49,23 +49,40 @@ module Unipept
|
|
49
49
|
|
50
50
|
register :csv
|
51
51
|
|
52
|
-
def header(data)
|
52
|
+
def header(data, fasta_mapper = nil)
|
53
53
|
CSV.generate do |csv|
|
54
54
|
first = data.first
|
55
55
|
if first.kind_of? Array
|
56
56
|
first = first.first
|
57
57
|
end
|
58
|
-
|
58
|
+
if fasta_mapper
|
59
|
+
csv << (['fasta_header'] + first.keys).map(&:to_s) if first
|
60
|
+
else
|
61
|
+
csv << first.keys.map(&:to_s) if first
|
62
|
+
end
|
63
|
+
|
59
64
|
end
|
60
65
|
end
|
61
66
|
|
62
|
-
def format(data)
|
67
|
+
def format(data, fasta_mapper = nil)
|
63
68
|
CSV.generate do |csv|
|
64
69
|
data.each do |o|
|
65
70
|
if o.kind_of? Array
|
66
|
-
o.each
|
71
|
+
o.each do |h|
|
72
|
+
if fasta_mapper
|
73
|
+
extra_key = [fasta_mapper[h.values.first]]
|
74
|
+
csv << (extra_key + h.values).map { |v| v == "" ? nil : v }
|
75
|
+
else
|
76
|
+
csv << h.values.map { |v| v == "" ? nil : v }
|
77
|
+
end
|
78
|
+
end
|
67
79
|
else
|
68
|
-
|
80
|
+
if fasta_mapper
|
81
|
+
extra_key = [fasta_mapper[o.values.first]]
|
82
|
+
csv << (extra_key + o.values).map { |v| v == "" ? nil : v }
|
83
|
+
else
|
84
|
+
csv << o.values.map { |v| v == "" ? nil : v }
|
85
|
+
end
|
69
86
|
end
|
70
87
|
end
|
71
88
|
end
|
data/lib/unipept.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unipept
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toon Willems
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: shoulda
|
@@ -131,6 +131,7 @@ files:
|
|
131
131
|
- bin/unipept
|
132
132
|
- bin/uniprot
|
133
133
|
- lib/unipept.rb
|
134
|
+
- lib/unipept/api_runner.rb
|
134
135
|
- lib/unipept/batch_order.rb
|
135
136
|
- lib/unipept/configuration.rb
|
136
137
|
- lib/unipept/formatters.rb
|