unipept 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/peptfilter +5 -0
- data/bin/prot2pept +23 -2
- data/bin/unipept +43 -190
- data/lib/unipept/api_runner.rb +217 -0
- data/lib/unipept/formatters.rb +23 -6
- data/lib/unipept.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f20b5baa54b98b05f96b2ed54cdcc4bc6772021
|
4
|
+
data.tar.gz: 0fdb2fc4944c5bfbe8ad76395531b5414d054f22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 182a1babb95902e0cb74d08494748940d02fb77f0c502086eec19809ed2c4b2b8af5a9f190041efb8ac7e30614230971bd5e2e53aec6770587888543b128eafc
|
7
|
+
data.tar.gz: fde9797bcdf7be09258b8010a39dd91b686732c2bb647b1488f51bac96a9c66efddf1061ab1919b35c90f3eb76334f67b84e8a707c26a7dcc630c3e1020053a0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bin/peptfilter
CHANGED
@@ -16,6 +16,11 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
16
16
|
lacks = opts.fetch(:lacks, "").chars.to_a
|
17
17
|
contains = opts.fetch(:contains, "").chars.to_a
|
18
18
|
$stdin.each_line do |pept|
|
19
|
+
# FASTA headers
|
20
|
+
if pept.start_with? '>'
|
21
|
+
puts pept
|
22
|
+
next
|
23
|
+
end
|
19
24
|
pept = pept.chomp
|
20
25
|
length_ok = pept.length >= minlen && pept.length <= maxlen
|
21
26
|
lacks_ok = (pept.chars.to_a & lacks).size == 0
|
data/bin/prot2pept
CHANGED
@@ -8,8 +8,29 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
8
8
|
required :p, :pattern, "cleavage pattern to split input protein (default: ([KR])([^P]))"
|
9
9
|
run do |opts, args, cmd|
|
10
10
|
pattern = opts.fetch(:pattern, "([KR])([^P])")
|
11
|
-
|
12
|
-
|
11
|
+
# decide if we have FASTA input
|
12
|
+
fasta_header = gets
|
13
|
+
if fasta_header.start_with? '>'
|
14
|
+
# fasta input, need to join lines
|
15
|
+
while !$stdin.eof?
|
16
|
+
prot = ""
|
17
|
+
# Sometimes you just got to accept this weird and ugly code
|
18
|
+
until $stdin.eof? || (line = gets).start_with?('>')
|
19
|
+
prot += line.chomp
|
20
|
+
end
|
21
|
+
puts fasta_header
|
22
|
+
puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
23
|
+
|
24
|
+
fasta_header = line
|
25
|
+
end
|
26
|
+
else
|
27
|
+
# handle our already read line
|
28
|
+
puts fasta_header.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
29
|
+
|
30
|
+
# we no longer have to join lines as input is now more sane
|
31
|
+
$stdin.each_line do |prot|
|
32
|
+
puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?)
|
33
|
+
end
|
13
34
|
end
|
14
35
|
end
|
15
36
|
end
|
data/bin/unipept
CHANGED
@@ -10,213 +10,51 @@ require_relative '../lib/unipept'
|
|
10
10
|
|
11
11
|
Signal.trap("PIPE", "EXIT")
|
12
12
|
Signal.trap("INT", "EXIT")
|
13
|
-
class ApiRunner < Cri::CommandRunner
|
14
|
-
|
15
|
-
def initialize(args, opts, cmd)
|
16
|
-
super
|
17
|
-
@configuration = Unipept::Configuration.new
|
18
|
-
host = @configuration['host']
|
19
|
-
if host.nil? || host.empty?
|
20
|
-
puts "WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`"
|
21
|
-
exit 1
|
22
|
-
end
|
23
|
-
if !host.start_with? "http://"
|
24
|
-
host = "http://#{host}"
|
25
|
-
end
|
26
13
|
|
27
|
-
|
28
|
-
|
29
|
-
end
|
14
|
+
module Unipept
|
15
|
+
class Taxa2lca < ApiRunner
|
30
16
|
|
31
|
-
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
def input_iterator
|
36
|
-
if options[:input]
|
37
|
-
File.readlines(options[:input]).each
|
38
|
-
else
|
39
|
-
STDIN.each_line
|
17
|
+
def mapping
|
18
|
+
{"taxa2lca" => "taxa2lca"}
|
40
19
|
end
|
41
|
-
end
|
42
20
|
|
43
|
-
|
44
|
-
|
45
|
-
|
21
|
+
def peptide_iterator(peptides, &block)
|
22
|
+
block.call(peptides.to_a, 0)
|
23
|
+
end
|
46
24
|
|
47
|
-
|
48
|
-
|
49
|
-
if filter.empty?
|
50
|
-
names = true
|
51
|
-
else
|
52
|
-
names = filter.any? {|f| /.*name.*/.match f}
|
25
|
+
def batch_size
|
26
|
+
raise "NOT NEEDED FOR TAXA2LCA"
|
53
27
|
end
|
54
|
-
|
55
|
-
:equate_il => options[:equate],
|
56
|
-
:extra => options[:extra],
|
57
|
-
:names => names,
|
58
|
-
}
|
28
|
+
|
59
29
|
end
|
60
30
|
|
61
|
-
|
62
|
-
return if options[:quiet]
|
63
|
-
return unless STDOUT.tty?
|
64
|
-
last_fetched = @configuration['last_fetch_date']
|
65
|
-
if last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now
|
66
|
-
version = File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
67
|
-
puts Typhoeus.get(@message_url, params: {version: version}).body
|
31
|
+
class Pept2prot < ApiRunner
|
68
32
|
|
69
|
-
|
70
|
-
|
33
|
+
def mapping
|
34
|
+
{"pept2prot" => "pept2prot"}
|
71
35
|
end
|
72
|
-
end
|
73
36
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
filter_list = options[:select] ? options[:select] : []
|
81
|
-
filter_list = filter_list.map {|f| glob_to_regex(f) }
|
82
|
-
output = STDOUT.tty? ? STDOUT : STDERR
|
83
|
-
|
84
|
-
batch_order = Unipept::BatchOrder.new
|
85
|
-
|
86
|
-
printed_header = false
|
87
|
-
result = []
|
88
|
-
|
89
|
-
hydra = Typhoeus::Hydra.new(max_concurrency: 20)
|
90
|
-
num_req = 0
|
91
|
-
|
92
|
-
peptide_iterator(peptides) do |sub_division, i|
|
93
|
-
request = Typhoeus::Request.new(
|
94
|
-
@url,
|
95
|
-
method: :post,
|
96
|
-
body: url_options(sub_division),
|
97
|
-
accept_encoding: "gzip"
|
98
|
-
)
|
99
|
-
request.on_complete do |resp|
|
100
|
-
if resp.timed_out?
|
101
|
-
$stderr.puts "request timed out, continuing anyway, but results might be incomplete"
|
102
|
-
else
|
103
|
-
if resp.success?
|
104
|
-
# if JSON parsing goes wrong
|
105
|
-
sub_result = JSON[resp.response_body] rescue []
|
106
|
-
sub_result = [sub_result] if not sub_result.kind_of? Array
|
107
|
-
|
108
|
-
sub_result.map! {|r| r.select! {|k,v| filter_list.any? {|f| f.match k } } } if ! filter_list.empty?
|
109
|
-
|
110
|
-
if options[:xml]
|
111
|
-
result << sub_result
|
112
|
-
end
|
113
|
-
|
114
|
-
# wait till it's our turn to write
|
115
|
-
batch_order.wait(i) do
|
116
|
-
if ! sub_result.empty?
|
117
|
-
if ! printed_header
|
118
|
-
write_to_output formatter.header(sub_result)
|
119
|
-
printed_header = true
|
120
|
-
end
|
121
|
-
write_to_output formatter.format(sub_result)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
else
|
125
|
-
path = File.expand_path(File.join(Dir.home, "unipept.log"))
|
126
|
-
File.open(path, "w") do |f|
|
127
|
-
f.write resp.response_body
|
128
|
-
end
|
129
|
-
$stderr.puts "API request failed! log can be found in #{path}"
|
37
|
+
def download_xml(result)
|
38
|
+
if options[:xml]
|
39
|
+
FileUtils.mkdir_p(options[:xml])
|
40
|
+
result.first.each do |prot|
|
41
|
+
File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f|
|
42
|
+
f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body
|
130
43
|
end
|
131
44
|
end
|
132
45
|
end
|
133
|
-
hydra.queue request
|
134
|
-
|
135
|
-
num_req += 1
|
136
|
-
if num_req % 200 == 0
|
137
|
-
hydra.run
|
138
|
-
end
|
139
|
-
|
140
46
|
end
|
141
47
|
|
142
|
-
|
143
|
-
|
144
|
-
begin
|
145
|
-
download_xml(result)
|
146
|
-
rescue
|
147
|
-
STDERR.puts "Something went wrong while downloading xml information! please check the output"
|
48
|
+
def batch_size
|
49
|
+
10
|
148
50
|
end
|
149
|
-
|
150
|
-
end
|
151
|
-
|
152
|
-
def write_to_output(string)
|
153
|
-
if options[:output]
|
154
|
-
File.open(options[:output], 'a') do |f|
|
155
|
-
f.write string
|
156
|
-
end
|
157
|
-
else
|
158
|
-
puts string
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
def download_xml(result)
|
164
|
-
if options[:xml]
|
165
|
-
File.open(options[:xml] + ".xml", "wb") do |f|
|
166
|
-
f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map{|h| h['taxon_id'] }.join(",")}&retmode=xml").response_body
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
def peptide_iterator(peptides, &block)
|
172
|
-
peptides.each_slice(batch_size).with_index(&block)
|
173
|
-
end
|
174
|
-
|
175
|
-
private
|
176
|
-
|
177
|
-
def glob_to_regex(glob_string)
|
178
|
-
# only implement * -> . for now
|
179
|
-
Regexp.new glob_string.gsub("*", ".*")
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
class Taxa2lca < ApiRunner
|
184
|
-
|
185
|
-
def mapping
|
186
|
-
{"taxa2lca" => "taxa2lca"}
|
187
|
-
end
|
188
|
-
|
189
|
-
def peptide_iterator(peptides, &block)
|
190
|
-
block.call(peptides.to_a, 0)
|
191
|
-
end
|
192
|
-
|
193
|
-
def batch_size
|
194
|
-
raise "NOT NEEDED FOR TAXA2LCA"
|
195
51
|
end
|
196
52
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
def mapping
|
202
|
-
{"pept2prot" => "pept2prot"}
|
203
|
-
end
|
204
|
-
|
205
|
-
def download_xml(result)
|
206
|
-
if options[:xml]
|
207
|
-
FileUtils.mkdir_p(options[:xml])
|
208
|
-
result.first.each do |prot|
|
209
|
-
File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f|
|
210
|
-
f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body
|
211
|
-
end
|
212
|
-
end
|
53
|
+
class Taxonomy < ApiRunner
|
54
|
+
def mapping
|
55
|
+
{"taxonomy" => "taxonomy"}
|
213
56
|
end
|
214
57
|
end
|
215
|
-
|
216
|
-
def batch_size
|
217
|
-
10
|
218
|
-
end
|
219
|
-
|
220
58
|
end
|
221
59
|
|
222
60
|
root_cmd = Cri::Command.new_basic_root.modify do
|
@@ -227,6 +65,9 @@ root_cmd = Cri::Command.new_basic_root.modify do
|
|
227
65
|
option :o, :output, "output file", :argument => :required
|
228
66
|
option :f, :format, "output format (available: #{Unipept::Formatter.available.join "," }) (default: #{Unipept::Formatter.default})", :argument => :required
|
229
67
|
|
68
|
+
# Configuration options
|
69
|
+
option nil, "config-host", "Override host setting", argument: :required
|
70
|
+
|
230
71
|
run do |opts, args, cmd|
|
231
72
|
if opts[:version]
|
232
73
|
puts File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
@@ -260,7 +101,7 @@ root_cmd.define_command('pept2taxa') do
|
|
260
101
|
option :a, :extra, "Show full lineage"
|
261
102
|
option :x, :xml, "Download taxonomy from NCBI as xml (specify output filename)", :argument => :required
|
262
103
|
|
263
|
-
runner ApiRunner
|
104
|
+
runner Unipept::ApiRunner
|
264
105
|
end
|
265
106
|
|
266
107
|
root_cmd.define_command('pept2lca') do
|
@@ -273,7 +114,7 @@ root_cmd.define_command('pept2lca') do
|
|
273
114
|
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
274
115
|
option :a, :extra, "Show full lineage"
|
275
116
|
|
276
|
-
runner ApiRunner
|
117
|
+
runner Unipept::ApiRunner
|
277
118
|
end
|
278
119
|
|
279
120
|
root_cmd.define_command('taxa2lca') do
|
@@ -285,7 +126,7 @@ root_cmd.define_command('taxa2lca') do
|
|
285
126
|
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
286
127
|
option :a, :extra, "Show full lineage"
|
287
128
|
|
288
|
-
runner Taxa2lca
|
129
|
+
runner Unipept::Taxa2lca
|
289
130
|
end
|
290
131
|
|
291
132
|
root_cmd.define_command('pept2prot') do
|
@@ -299,7 +140,19 @@ root_cmd.define_command('pept2prot') do
|
|
299
140
|
option :x, :xml, "download uniprot record in specified directory", :argument => :required
|
300
141
|
flag :a, :extra, "include all information. WARNING: will take much longer!"
|
301
142
|
|
302
|
-
runner Pept2prot
|
143
|
+
runner Unipept::Pept2prot
|
144
|
+
end
|
145
|
+
|
146
|
+
root_cmd.define_command('taxonomy') do
|
147
|
+
usage 'taxonomy [options]'
|
148
|
+
aliases :tax
|
149
|
+
summary 'Give NCBI taxonomy information on given input taxon ids'
|
150
|
+
description 'Returns information for each input taxon id'
|
151
|
+
|
152
|
+
option :s, :select, "select the attributes", :argument => :required, :multiple => true
|
153
|
+
flag :a, :extra, "include all information. WARNING: will take much longer!"
|
154
|
+
|
155
|
+
runner Unipept::Taxonomy
|
303
156
|
end
|
304
157
|
|
305
158
|
root_cmd.run(ARGV)
|
@@ -0,0 +1,217 @@
|
|
1
|
+
module Unipept
|
2
|
+
class ApiRunner < Cri::CommandRunner
|
3
|
+
|
4
|
+
def initialize(args, opts, cmd)
|
5
|
+
super
|
6
|
+
@configuration = Unipept::Configuration.new
|
7
|
+
|
8
|
+
set_configuration
|
9
|
+
|
10
|
+
@url = "#{@host}/api/v1/#{mapping[cmd.name]}.json"
|
11
|
+
@message_url = "#{@host}/api/v1/messages.json"
|
12
|
+
end
|
13
|
+
|
14
|
+
def set_configuration
|
15
|
+
# find host in opts first
|
16
|
+
if options[:'config-host']
|
17
|
+
host = options[:'config-host']
|
18
|
+
else
|
19
|
+
host = @configuration['host']
|
20
|
+
end
|
21
|
+
|
22
|
+
# No host has been set?
|
23
|
+
if host.nil? || host.empty?
|
24
|
+
puts "WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`"
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
if !host.start_with? "http://"
|
28
|
+
host = "http://#{host}"
|
29
|
+
end
|
30
|
+
|
31
|
+
@host = host
|
32
|
+
end
|
33
|
+
|
34
|
+
def mapping
|
35
|
+
{'pept2taxa' => 'pept2taxa', 'pept2lca' => 'pept2lca'}
|
36
|
+
end
|
37
|
+
|
38
|
+
def input_iterator
|
39
|
+
# Argument over file input over stdin
|
40
|
+
if !arguments.empty?
|
41
|
+
arguments.each
|
42
|
+
else
|
43
|
+
if options[:input]
|
44
|
+
IO.foreach(options[:input])
|
45
|
+
else
|
46
|
+
STDIN.each_line
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def batch_size
|
52
|
+
100
|
53
|
+
end
|
54
|
+
|
55
|
+
def url_options(sub_part)
|
56
|
+
filter = options[:select] ? options[:select] : []
|
57
|
+
if filter.empty?
|
58
|
+
names = true
|
59
|
+
else
|
60
|
+
names = filter.any? {|f| /.*name.*/.match f}
|
61
|
+
end
|
62
|
+
{:input => sub_part,
|
63
|
+
:equate_il => options[:equate],
|
64
|
+
:extra => options[:extra],
|
65
|
+
:names => names,
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_server_message
|
70
|
+
return if options[:quiet]
|
71
|
+
return unless STDOUT.tty?
|
72
|
+
last_fetched = @configuration['last_fetch_date']
|
73
|
+
if last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now
|
74
|
+
version = File.read(File.join(File.dirname(__FILE__), "..", "VERSION"))
|
75
|
+
puts Typhoeus.get(@message_url, params: {version: version}).body
|
76
|
+
|
77
|
+
@configuration['last_fetch_date'] = Time.now
|
78
|
+
@configuration.save
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def run
|
83
|
+
get_server_message
|
84
|
+
|
85
|
+
formatter = Unipept::Formatter.new_for_format(options[:format])
|
86
|
+
peptides = input_iterator
|
87
|
+
|
88
|
+
filter_list = options[:select] ? options[:select] : []
|
89
|
+
filter_list = filter_list.map {|f| glob_to_regex(f) }
|
90
|
+
output = STDOUT.tty? ? STDOUT : STDERR
|
91
|
+
|
92
|
+
batch_order = Unipept::BatchOrder.new
|
93
|
+
|
94
|
+
printed_header = false
|
95
|
+
result = []
|
96
|
+
|
97
|
+
hydra = Typhoeus::Hydra.new(max_concurrency: 10)
|
98
|
+
num_req = 0
|
99
|
+
|
100
|
+
peptide_iterator(peptides) do |sub_division, i, fasta_mapper|
|
101
|
+
request = Typhoeus::Request.new(
|
102
|
+
@url,
|
103
|
+
method: :post,
|
104
|
+
body: url_options(sub_division),
|
105
|
+
accept_encoding: "gzip"
|
106
|
+
)
|
107
|
+
request.on_complete do |resp|
|
108
|
+
if resp.timed_out?
|
109
|
+
$stderr.puts "request timed out, continuing anyway, but results might be incomplete"
|
110
|
+
else
|
111
|
+
if resp.success?
|
112
|
+
# if JSON parsing goes wrong
|
113
|
+
sub_result = JSON[resp.response_body] rescue []
|
114
|
+
sub_result = [sub_result] if not sub_result.kind_of? Array
|
115
|
+
|
116
|
+
sub_result.map! {|r| r.select! {|k,v| filter_list.any? {|f| f.match k } } } if ! filter_list.empty?
|
117
|
+
|
118
|
+
if options[:xml]
|
119
|
+
result << sub_result
|
120
|
+
end
|
121
|
+
|
122
|
+
# wait till it's our turn to write
|
123
|
+
batch_order.wait(i) do
|
124
|
+
if ! sub_result.empty?
|
125
|
+
if ! printed_header
|
126
|
+
write_to_output formatter.header(sub_result, fasta_mapper)
|
127
|
+
printed_header = true
|
128
|
+
end
|
129
|
+
write_to_output formatter.format(sub_result, fasta_mapper)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
else
|
133
|
+
save_error(resp.response_body)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
hydra.queue request
|
138
|
+
|
139
|
+
num_req += 1
|
140
|
+
if num_req % 200 == 0
|
141
|
+
hydra.run
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
hydra.run
|
147
|
+
|
148
|
+
begin
|
149
|
+
download_xml(result)
|
150
|
+
rescue
|
151
|
+
STDERR.puts "Something went wrong while downloading xml information! please check the output"
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
def save_error(message)
|
157
|
+
path = File.expand_path(File.join(Dir.home, ".unipept", "unipept-#{Time.now.strftime("%F-%T")}.log"))
|
158
|
+
FileUtils.mkdir_p File.dirname(path)
|
159
|
+
File.open(path, "w") do |f|
|
160
|
+
f.write message
|
161
|
+
end
|
162
|
+
$stderr.puts "API request failed! log can be found in #{path}"
|
163
|
+
end
|
164
|
+
|
165
|
+
def write_to_output(string)
|
166
|
+
if options[:output]
|
167
|
+
File.open(options[:output], 'a') do |f|
|
168
|
+
f.write string
|
169
|
+
end
|
170
|
+
else
|
171
|
+
puts string
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
def download_xml(result)
|
177
|
+
if options[:xml]
|
178
|
+
File.open(options[:xml] + ".xml", "wb") do |f|
|
179
|
+
f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map{|h| h['taxon_id'] }.join(",")}&retmode=xml").response_body
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def peptide_iterator(peptides, &block)
|
185
|
+
first = peptides.first
|
186
|
+
if first.start_with? '>'
|
187
|
+
# FASTA MODE ENGAGED
|
188
|
+
fasta_header = first
|
189
|
+
peptides.each_slice(batch_size).with_index do |sub,i|
|
190
|
+
fasta_mapper = {}
|
191
|
+
sub.map! {|s| s.chomp}
|
192
|
+
j = 0
|
193
|
+
while j < sub.size
|
194
|
+
if sub[j].start_with? '>'
|
195
|
+
fasta_header = sub[j]
|
196
|
+
else
|
197
|
+
fasta_mapper[sub[j]] = fasta_header
|
198
|
+
end
|
199
|
+
j += 1
|
200
|
+
end
|
201
|
+
sub -= fasta_mapper.values.uniq
|
202
|
+
block.call(sub, i, fasta_mapper)
|
203
|
+
end
|
204
|
+
|
205
|
+
else
|
206
|
+
peptides.each_slice(batch_size).with_index(&block)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
def glob_to_regex(glob_string)
|
213
|
+
# only implement * -> . for now
|
214
|
+
Regexp.new glob_string.gsub("*", ".*")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/lib/unipept/formatters.rb
CHANGED
@@ -30,7 +30,7 @@ module Unipept
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# JSON formatted data goes in, something other comes out
|
33
|
-
def format(data)
|
33
|
+
def format(data, fasta_mapper = nil)
|
34
34
|
data
|
35
35
|
end
|
36
36
|
end
|
@@ -49,23 +49,40 @@ module Unipept
|
|
49
49
|
|
50
50
|
register :csv
|
51
51
|
|
52
|
-
def header(data)
|
52
|
+
def header(data, fasta_mapper = nil)
|
53
53
|
CSV.generate do |csv|
|
54
54
|
first = data.first
|
55
55
|
if first.kind_of? Array
|
56
56
|
first = first.first
|
57
57
|
end
|
58
|
-
|
58
|
+
if fasta_mapper
|
59
|
+
csv << (['fasta_header'] + first.keys).map(&:to_s) if first
|
60
|
+
else
|
61
|
+
csv << first.keys.map(&:to_s) if first
|
62
|
+
end
|
63
|
+
|
59
64
|
end
|
60
65
|
end
|
61
66
|
|
62
|
-
def format(data)
|
67
|
+
def format(data, fasta_mapper = nil)
|
63
68
|
CSV.generate do |csv|
|
64
69
|
data.each do |o|
|
65
70
|
if o.kind_of? Array
|
66
|
-
o.each
|
71
|
+
o.each do |h|
|
72
|
+
if fasta_mapper
|
73
|
+
extra_key = [fasta_mapper[h.values.first]]
|
74
|
+
csv << (extra_key + h.values).map { |v| v == "" ? nil : v }
|
75
|
+
else
|
76
|
+
csv << h.values.map { |v| v == "" ? nil : v }
|
77
|
+
end
|
78
|
+
end
|
67
79
|
else
|
68
|
-
|
80
|
+
if fasta_mapper
|
81
|
+
extra_key = [fasta_mapper[o.values.first]]
|
82
|
+
csv << (extra_key + o.values).map { |v| v == "" ? nil : v }
|
83
|
+
else
|
84
|
+
csv << o.values.map { |v| v == "" ? nil : v }
|
85
|
+
end
|
69
86
|
end
|
70
87
|
end
|
71
88
|
end
|
data/lib/unipept.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unipept
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toon Willems
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: shoulda
|
@@ -131,6 +131,7 @@ files:
|
|
131
131
|
- bin/unipept
|
132
132
|
- bin/uniprot
|
133
133
|
- lib/unipept.rb
|
134
|
+
- lib/unipept/api_runner.rb
|
134
135
|
- lib/unipept/batch_order.rb
|
135
136
|
- lib/unipept/configuration.rb
|
136
137
|
- lib/unipept/formatters.rb
|