unipept 2.1.1 → 2.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +9 -0
- data/.github/workflows/ci.yml +6 -27
- data/.rakeTasks +7 -0
- data/.rubocop.yml +2 -0
- data/.ruby-version +1 -1
- data/CITATION.cff +30 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -41
- data/README.md +2 -2
- data/Rakefile +4 -4
- data/VERSION +1 -1
- data/lib/batch_iterator.rb +16 -0
- data/lib/commands/prot2pept.rb +1 -2
- data/lib/commands/unipept/api_runner.rb +10 -12
- data/lib/commands/unipept/config.rb +1 -1
- data/lib/commands/unipept/pept2taxa.rb +1 -5
- data/lib/commands/unipept/taxa2tree.rb +74 -0
- data/lib/commands/unipept.rb +25 -1
- data/lib/commands/uniprot.rb +4 -5
- data/lib/configuration.rb +6 -7
- data/lib/formatters.rb +108 -36
- data/lib/server_message.rb +2 -4
- data/test/commands/unipept/test_api_runner.rb +8 -7
- data/test/commands/unipept/test_config.rb +1 -1
- data/test/commands/unipept/test_pept2ec.rb +11 -11
- data/test/commands/unipept/test_pept2funct.rb +15 -15
- data/test/commands/unipept/test_pept2go.rb +10 -10
- data/test/commands/unipept/test_pept2interpro.rb +26 -26
- data/test/commands/unipept/test_pept2lca.rb +2 -2
- data/test/commands/unipept/test_pept2prot.rb +2 -2
- data/test/commands/unipept/test_pept2taxa.rb +2 -4
- data/test/commands/unipept/test_peptinfo.rb +16 -16
- data/test/commands/unipept/test_taxa2lca.rb +2 -2
- data/test/commands/unipept/test_taxa2tree.rb +68 -0
- data/test/commands/unipept/test_taxonomy.rb +2 -2
- data/test/helper.rb +10 -0
- data/test/support/api_stub.rb +60 -0
- data/test/support/resources/pept2ec.json +55 -0
- data/test/support/resources/pept2funct.json +73 -0
- data/test/support/resources/pept2go.json +43 -0
- data/test/support/resources/pept2interpro.json +43 -0
- data/test/support/resources/pept2lca.json +14 -0
- data/test/support/resources/pept2prot.json +422 -0
- data/test/support/resources/pept2taxa.json +194 -0
- data/test/support/resources/peptinfo.json +70 -0
- data/test/support/resources/taxa2tree.json +194 -0
- data/test/support/resources/taxonomy.json +22 -0
- data/test/test_configuration.rb +1 -1
- data/test/test_formatters.rb +5 -5
- data/test/test_output_writer.rb +1 -1
- data/test/test_server_message.rb +2 -2
- data/test.taxa +4 -0
- data/unipept.gemspec +32 -21
- metadata +26 -9
data/lib/configuration.rb
CHANGED
@@ -2,8 +2,7 @@ require 'yaml'
|
|
2
2
|
|
3
3
|
module Unipept
|
4
4
|
class Configuration
|
5
|
-
attr_reader :config
|
6
|
-
attr_reader :file_name
|
5
|
+
attr_reader :config, :file_name
|
7
6
|
|
8
7
|
# Creates a new config object, based on a given YAML file. If no filename
|
9
8
|
# given, '.unipeptrc' in the home dir of the user will be used.
|
@@ -14,17 +13,17 @@ module Unipept
|
|
14
13
|
# config from
|
15
14
|
def initialize(file = nil)
|
16
15
|
@file_name = file || File.join(Dir.home, '.unipeptrc')
|
17
|
-
@config = if
|
18
|
-
|
16
|
+
@config = if File.exist? file_name
|
17
|
+
YAML.load_file file_name, permitted_classes: [Time]
|
19
18
|
else
|
20
|
-
|
19
|
+
{}
|
21
20
|
end
|
22
21
|
end
|
23
22
|
|
24
23
|
# Saves the config to disk. If the file doesn't exist yet, a new one will be
|
25
24
|
# created
|
26
25
|
def save
|
27
|
-
File.
|
26
|
+
File.write(file_name, config.to_yaml)
|
28
27
|
end
|
29
28
|
|
30
29
|
# Deletes a key
|
@@ -39,7 +38,7 @@ module Unipept
|
|
39
38
|
|
40
39
|
# forwards =[] to the internal config hash
|
41
40
|
def []=(*args)
|
42
|
-
config.[]=(*args)
|
41
|
+
config.[]=(*args) # rubocop:disable Layout/SpaceBeforeBrackets
|
43
42
|
end
|
44
43
|
end
|
45
44
|
end
|
data/lib/formatters.rb
CHANGED
@@ -156,7 +156,7 @@ module Unipept
|
|
156
156
|
# @return [String] The converted input data in the JSON format
|
157
157
|
def convert(data, first)
|
158
158
|
output = data.map(&:to_json).join(',')
|
159
|
-
first ? output :
|
159
|
+
first ? output : ",#{output}"
|
160
160
|
end
|
161
161
|
end
|
162
162
|
|
@@ -169,20 +169,7 @@ module Unipept
|
|
169
169
|
'csv'
|
170
170
|
end
|
171
171
|
|
172
|
-
|
173
|
-
# contains all the keys of the first element of the data, preceded by
|
174
|
-
# 'fasta_header' if a fasta_mapper is given.
|
175
|
-
#
|
176
|
-
# @param [Array] data The data that we will use to extract the keys from.
|
177
|
-
#
|
178
|
-
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
179
|
-
# data and corresponding fasta header. The data is represented as a list
|
180
|
-
# containing tuples where the first element is the fasta header and second
|
181
|
-
# element is the input data If a fasta_mapper is given, the output will be
|
182
|
-
# preceded with 'fasta_header'.
|
183
|
-
#
|
184
|
-
# @return [String] The header row
|
185
|
-
def header(data, fasta_mapper = nil)
|
172
|
+
def get_keys(data, fasta_mapper = nil)
|
186
173
|
# This global variable is necessary because we need to know how many items should be
|
187
174
|
# nil in the convert function.
|
188
175
|
$keys_length = 0 # rubocop:disable Style/GlobalVars
|
@@ -191,28 +178,47 @@ module Unipept
|
|
191
178
|
|
192
179
|
# First we look for items for both ec numbers, go terms and ipr codes that are fully filled in.
|
193
180
|
data.each do |row|
|
194
|
-
non_empty_items.
|
181
|
+
non_empty_items.each_key do |annotation_type|
|
195
182
|
non_empty_items[annotation_type] = row if row[annotation_type] && !row[annotation_type].empty?
|
196
183
|
end
|
197
184
|
end
|
198
185
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
processed_keys = keys
|
186
|
+
keys = fasta_mapper ? ['fasta_header'] : []
|
187
|
+
keys += (data.first.keys - %w[ec go ipr])
|
188
|
+
processed_keys = keys
|
203
189
|
|
204
|
-
|
205
|
-
|
190
|
+
non_empty_items.each do |annotation_type, non_empty_item|
|
191
|
+
next unless non_empty_item
|
206
192
|
|
207
|
-
|
208
|
-
|
193
|
+
keys += (non_empty_item.keys - processed_keys)
|
194
|
+
processed_keys += non_empty_item.keys
|
209
195
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
196
|
+
idx = keys.index(annotation_type)
|
197
|
+
keys.delete_at(idx)
|
198
|
+
keys.insert(idx, *non_empty_item[annotation_type].first.keys.map { |el| %w[ec_number go_term ipr_code].include?(el) ? el : "#{annotation_type}_#{el}" })
|
199
|
+
$keys_length = *non_empty_item[annotation_type].first.keys.length # rubocop:disable Style/GlobalVars
|
200
|
+
end
|
215
201
|
|
202
|
+
keys
|
203
|
+
end
|
204
|
+
|
205
|
+
# Returns the header row for the given data and fasta_mapper. This row
|
206
|
+
# contains all the keys of the first element of the data, preceded by
|
207
|
+
# 'fasta_header' if a fasta_mapper is given.
|
208
|
+
#
|
209
|
+
# @param [Array] data The data that we will use to extract the keys from.
|
210
|
+
#
|
211
|
+
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
212
|
+
# data and corresponding fasta header. The data is represented as a list
|
213
|
+
# containing tuples where the first element is the fasta header and second
|
214
|
+
# element is the input data If a fasta_mapper is given, the output will be
|
215
|
+
# preceded with 'fasta_header'.
|
216
|
+
#
|
217
|
+
# @return [String] The header row
|
218
|
+
def header(data, fasta_mapper = nil)
|
219
|
+
keys = get_keys(data, fasta_mapper)
|
220
|
+
|
221
|
+
CSV.generate do |csv|
|
216
222
|
csv << keys.map(&:to_s) if keys.length.positive?
|
217
223
|
end
|
218
224
|
end
|
@@ -229,23 +235,25 @@ module Unipept
|
|
229
235
|
#
|
230
236
|
# @return [String] The converted input data in the CSV format
|
231
237
|
def convert(data, _first)
|
238
|
+
keys = get_keys(data)
|
239
|
+
|
232
240
|
CSV.generate do |csv|
|
233
241
|
data.each do |o|
|
234
|
-
row =
|
242
|
+
row = {}
|
235
243
|
o.each do |k, v|
|
236
244
|
if %w[ec go ipr].include? k
|
237
245
|
if v && !v.empty?
|
238
|
-
v.first.
|
239
|
-
row
|
246
|
+
v.first.each_key do |key|
|
247
|
+
row[key == 'protein_count' ? "#{k}_protein_count" : key] = (v.map { |el| el[key] }).join(' ').strip
|
240
248
|
end
|
241
249
|
else
|
242
|
-
row = row.concat(Array.new($keys_length[0], nil)) # rubocop:disable Style/GlobalVars
|
250
|
+
row[k] = row.concat(Array.new($keys_length[0], nil)) # rubocop:disable Style/GlobalVars
|
243
251
|
end
|
244
252
|
else
|
245
|
-
row
|
253
|
+
row[k] = (v == '' ? nil : v)
|
246
254
|
end
|
247
255
|
end
|
248
|
-
csv << row
|
256
|
+
csv << keys.map { |k| row[k] }
|
249
257
|
end
|
250
258
|
end
|
251
259
|
end
|
@@ -296,7 +304,7 @@ module Unipept
|
|
296
304
|
#
|
297
305
|
# @return [String] The converted input data in the XML format
|
298
306
|
def convert(data, _first)
|
299
|
-
data.map { |row|
|
307
|
+
data.map { |row| "<result>#{row.to_xml}</result>" }.join
|
300
308
|
end
|
301
309
|
end
|
302
310
|
|
@@ -336,4 +344,68 @@ module Unipept
|
|
336
344
|
.join
|
337
345
|
end
|
338
346
|
end
|
347
|
+
|
348
|
+
class HtmlFormatter < Formatter
|
349
|
+
register :html
|
350
|
+
|
351
|
+
# @return [String] The type of the current formatter: html
|
352
|
+
def type
|
353
|
+
'html'
|
354
|
+
end
|
355
|
+
|
356
|
+
def self.hidden?
|
357
|
+
false
|
358
|
+
end
|
359
|
+
|
360
|
+
def header(_data, _fasta_mapper = nil)
|
361
|
+
''
|
362
|
+
end
|
363
|
+
|
364
|
+
def footer
|
365
|
+
''
|
366
|
+
end
|
367
|
+
|
368
|
+
# Converts the given input data to an HTML page that contains the Unipept visualizations
|
369
|
+
#
|
370
|
+
# @param [Array] data The data we wish to convert
|
371
|
+
#
|
372
|
+
# @param [Boolean] Is this the first output batch?
|
373
|
+
#
|
374
|
+
# @return [String] The converted input data in the Blast format
|
375
|
+
def convert(data, _first)
|
376
|
+
data
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
class UrlFormatter < Formatter
|
381
|
+
register :url
|
382
|
+
|
383
|
+
# @return [String] The type of the current formatter: html
|
384
|
+
def type
|
385
|
+
'url'
|
386
|
+
end
|
387
|
+
|
388
|
+
def self.hidden?
|
389
|
+
false
|
390
|
+
end
|
391
|
+
|
392
|
+
def header(_data, _fasta_mapper = nil)
|
393
|
+
''
|
394
|
+
end
|
395
|
+
|
396
|
+
def footer
|
397
|
+
''
|
398
|
+
end
|
399
|
+
|
400
|
+
# Converts the given input data to an HTML page that contains the Unipept visualizations
|
401
|
+
#
|
402
|
+
# @param [Array] data The data we wish to convert
|
403
|
+
#
|
404
|
+
# @param [Boolean] Is this the first output batch?
|
405
|
+
#
|
406
|
+
# @return [String] The converted input data in the Blast format
|
407
|
+
def convert(data, _first)
|
408
|
+
"#{data[0]['gist'].sub!('https://gist.github.com/', 'https://bl.ocks.org/')}\n"
|
409
|
+
end
|
410
|
+
end
|
339
411
|
end
|
data/lib/server_message.rb
CHANGED
@@ -4,9 +4,7 @@ require_relative 'configuration'
|
|
4
4
|
|
5
5
|
module Unipept
|
6
6
|
class ServerMessage
|
7
|
-
attr_reader :message_url
|
8
|
-
|
9
|
-
attr_reader :configuration
|
7
|
+
attr_reader :message_url, :configuration
|
10
8
|
|
11
9
|
def initialize(host)
|
12
10
|
@message_url = "#{host}/api/v1/messages.json"
|
@@ -34,7 +32,7 @@ module Unipept
|
|
34
32
|
# ago.
|
35
33
|
def recently_fetched?
|
36
34
|
last_fetched = @configuration['last_fetch_date']
|
37
|
-
!last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
|
35
|
+
!last_fetched.nil? && (last_fetched + (60 * 60 * 24)) > Time.now
|
38
36
|
end
|
39
37
|
|
40
38
|
# Updates the last checked timestamp
|
@@ -67,8 +67,8 @@ module Unipept
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def test_input_iterator_file
|
70
|
-
File.
|
71
|
-
runner = new_runner('test',
|
70
|
+
File.write('input_file', %w[a b c].join("\n"))
|
71
|
+
runner = new_runner('test', host: 'https://param_host', input: 'input_file')
|
72
72
|
output = []
|
73
73
|
runner.input_iterator.each { |el| output << el.chomp }
|
74
74
|
assert_equal(%w[a b c], output)
|
@@ -84,7 +84,7 @@ module Unipept
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def test_input_iterator_arguments_priority
|
87
|
-
File.
|
87
|
+
File.write('input_file', %w[1 2 3].join("\n"))
|
88
88
|
runner = new_runner('test', { host: 'https://param_host', input: 'input_file' }, %w[a b c])
|
89
89
|
output = []
|
90
90
|
_out, _err = capture_io_with_input(%w[1 2 3]) do
|
@@ -94,8 +94,8 @@ module Unipept
|
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_input_iterator_file_priority
|
97
|
-
File.
|
98
|
-
runner = new_runner('test',
|
97
|
+
File.write('input_file', %w[a b c].join("\n"))
|
98
|
+
runner = new_runner('test', host: 'https://param_host', input: 'input_file')
|
99
99
|
output = []
|
100
100
|
_out, _err = capture_io_with_input(%w[1 2 3]) do
|
101
101
|
runner.input_iterator.each { |el| output << el.chomp }
|
@@ -280,7 +280,7 @@ module Unipept
|
|
280
280
|
runner.save_error('error message')
|
281
281
|
end
|
282
282
|
assert(err.start_with?('API request failed! log can be found in'))
|
283
|
-
assert_equal('error message',
|
283
|
+
assert_equal('error message', File.foreach('errordir/error.log').next.chomp)
|
284
284
|
end
|
285
285
|
end
|
286
286
|
|
@@ -319,7 +319,7 @@ module Unipept
|
|
319
319
|
end
|
320
320
|
|
321
321
|
def test_success_no_header_option_handle_response
|
322
|
-
runner = new_runner('test',
|
322
|
+
runner = new_runner('test', { host: 'test', 'no-header': true })
|
323
323
|
response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
|
324
324
|
lambda = runner.handle_response(response, 0, nil)
|
325
325
|
assert(lambda.lambda?)
|
@@ -409,6 +409,7 @@ module Unipept
|
|
409
409
|
def new_response(values)
|
410
410
|
response = Class.new do
|
411
411
|
def initialize(values)
|
412
|
+
super()
|
412
413
|
@values = values
|
413
414
|
end
|
414
415
|
|
@@ -44,7 +44,7 @@ module Unipept
|
|
44
44
|
out, _err = capture_io_while do
|
45
45
|
Commands::Unipept.run(['config', 'test', value])
|
46
46
|
end
|
47
|
-
assert_equal(
|
47
|
+
assert_equal("test was set to #{value}", out.chomp)
|
48
48
|
assert_equal(value, Unipept::Configuration.new['test'])
|
49
49
|
end
|
50
50
|
|
@@ -51,7 +51,7 @@ module Unipept
|
|
51
51
|
lines = out.each_line
|
52
52
|
assert_equal('', err)
|
53
53
|
assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
|
54
|
-
assert(lines.next.start_with?('AALTER,
|
54
|
+
assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
55
55
|
assert_raises(StopIteration) { lines.next }
|
56
56
|
end
|
57
57
|
|
@@ -62,9 +62,9 @@ module Unipept
|
|
62
62
|
lines = out.each_line
|
63
63
|
assert_equal('', err)
|
64
64
|
assert(lines.next.start_with?('fasta_header,peptide,total_protein_count,ec_number,ec_protein_count'))
|
65
|
-
assert(lines.next.start_with?('>test,AALTER,
|
66
|
-
assert(lines.next.start_with?('>test,AALER,
|
67
|
-
assert(lines.next.start_with?('>tost,AALTER,
|
65
|
+
assert(lines.next.start_with?('>test,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
66
|
+
assert(lines.next.start_with?('>test,AALER,208,6.1.1.16 2.7.7.38,44 13'))
|
67
|
+
assert(lines.next.start_with?('>tost,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
68
68
|
assert_raises(StopIteration) { lines.next }
|
69
69
|
end
|
70
70
|
|
@@ -75,9 +75,9 @@ module Unipept
|
|
75
75
|
lines = out.each_line
|
76
76
|
assert_equal('', err)
|
77
77
|
assert(lines.next.start_with?('fasta_header,peptide,ec_number'))
|
78
|
-
assert(lines.next.start_with?('>test,AALTER,
|
79
|
-
assert(lines.next.start_with?('>test,AALER,
|
80
|
-
assert(lines.next.start_with?('>tost,AALTER,
|
78
|
+
assert(lines.next.start_with?('>test,AALTER,3.1.3.3 6.3.2.13'))
|
79
|
+
assert(lines.next.start_with?('>test,AALER,6.1.1.16 2.7.7.38'))
|
80
|
+
assert(lines.next.start_with?('>tost,AALTER,3.1.3.3 6.3.2.13'))
|
81
81
|
assert_raises(StopIteration) { lines.next }
|
82
82
|
end
|
83
83
|
|
@@ -87,7 +87,7 @@ module Unipept
|
|
87
87
|
end
|
88
88
|
lines = out.each_line
|
89
89
|
assert_equal('', err)
|
90
|
-
output = lines.to_a.join
|
90
|
+
output = lines.to_a.join.chomp
|
91
91
|
assert(output.start_with?('['))
|
92
92
|
assert(output.end_with?(']'))
|
93
93
|
assert(!output.include?('}{'))
|
@@ -100,7 +100,7 @@ module Unipept
|
|
100
100
|
end
|
101
101
|
lines = out.each_line
|
102
102
|
assert_equal('', err)
|
103
|
-
output = lines.to_a.join
|
103
|
+
output = lines.to_a.join.chomp
|
104
104
|
assert(output.start_with?('<results>'))
|
105
105
|
assert(output.end_with?('</results>'))
|
106
106
|
assert(output.include?('<fasta_header>'))
|
@@ -122,7 +122,7 @@ module Unipept
|
|
122
122
|
lines = out.each_line
|
123
123
|
assert_equal('', err)
|
124
124
|
assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
|
125
|
-
assert(lines.next.start_with?('AALTER,
|
125
|
+
assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
126
126
|
assert_raises(StopIteration) { lines.next }
|
127
127
|
end
|
128
128
|
|
@@ -133,7 +133,7 @@ module Unipept
|
|
133
133
|
lines = out.each_line
|
134
134
|
assert_equal('', err)
|
135
135
|
assert(lines.next.start_with?('peptide,total_protein_count'))
|
136
|
-
assert(lines.next.start_with?('MDGTEYIIVK,
|
136
|
+
assert(lines.next.start_with?('MDGTEYIIVK,35'))
|
137
137
|
assert_raises(StopIteration) { lines.next }
|
138
138
|
end
|
139
139
|
end
|