unipept 2.1.1 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +9 -0
- data/.github/workflows/ci.yml +6 -27
- data/.rakeTasks +7 -0
- data/.rubocop.yml +2 -0
- data/.ruby-version +1 -1
- data/CITATION.cff +30 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -41
- data/README.md +2 -2
- data/Rakefile +4 -4
- data/VERSION +1 -1
- data/lib/batch_iterator.rb +16 -0
- data/lib/commands/prot2pept.rb +1 -2
- data/lib/commands/unipept/api_runner.rb +10 -12
- data/lib/commands/unipept/config.rb +1 -1
- data/lib/commands/unipept/pept2taxa.rb +1 -5
- data/lib/commands/unipept/taxa2tree.rb +74 -0
- data/lib/commands/unipept.rb +25 -1
- data/lib/commands/uniprot.rb +4 -5
- data/lib/configuration.rb +6 -7
- data/lib/formatters.rb +108 -36
- data/lib/server_message.rb +2 -4
- data/test/commands/unipept/test_api_runner.rb +8 -7
- data/test/commands/unipept/test_config.rb +1 -1
- data/test/commands/unipept/test_pept2ec.rb +11 -11
- data/test/commands/unipept/test_pept2funct.rb +15 -15
- data/test/commands/unipept/test_pept2go.rb +10 -10
- data/test/commands/unipept/test_pept2interpro.rb +26 -26
- data/test/commands/unipept/test_pept2lca.rb +2 -2
- data/test/commands/unipept/test_pept2prot.rb +2 -2
- data/test/commands/unipept/test_pept2taxa.rb +2 -4
- data/test/commands/unipept/test_peptinfo.rb +16 -16
- data/test/commands/unipept/test_taxa2lca.rb +2 -2
- data/test/commands/unipept/test_taxa2tree.rb +68 -0
- data/test/commands/unipept/test_taxonomy.rb +2 -2
- data/test/helper.rb +10 -0
- data/test/support/api_stub.rb +60 -0
- data/test/support/resources/pept2ec.json +55 -0
- data/test/support/resources/pept2funct.json +73 -0
- data/test/support/resources/pept2go.json +43 -0
- data/test/support/resources/pept2interpro.json +43 -0
- data/test/support/resources/pept2lca.json +14 -0
- data/test/support/resources/pept2prot.json +422 -0
- data/test/support/resources/pept2taxa.json +194 -0
- data/test/support/resources/peptinfo.json +70 -0
- data/test/support/resources/taxa2tree.json +194 -0
- data/test/support/resources/taxonomy.json +22 -0
- data/test/test_configuration.rb +1 -1
- data/test/test_formatters.rb +5 -5
- data/test/test_output_writer.rb +1 -1
- data/test/test_server_message.rb +2 -2
- data/test.taxa +4 -0
- data/unipept.gemspec +32 -21
- metadata +26 -9
data/lib/configuration.rb
CHANGED
@@ -2,8 +2,7 @@ require 'yaml'
|
|
2
2
|
|
3
3
|
module Unipept
|
4
4
|
class Configuration
|
5
|
-
attr_reader :config
|
6
|
-
attr_reader :file_name
|
5
|
+
attr_reader :config, :file_name
|
7
6
|
|
8
7
|
# Creates a new config object, based on a given YAML file. If no filename
|
9
8
|
# given, '.unipeptrc' in the home dir of the user will be used.
|
@@ -14,17 +13,17 @@ module Unipept
|
|
14
13
|
# config from
|
15
14
|
def initialize(file = nil)
|
16
15
|
@file_name = file || File.join(Dir.home, '.unipeptrc')
|
17
|
-
@config = if
|
18
|
-
|
16
|
+
@config = if File.exist? file_name
|
17
|
+
YAML.load_file file_name, permitted_classes: [Time]
|
19
18
|
else
|
20
|
-
|
19
|
+
{}
|
21
20
|
end
|
22
21
|
end
|
23
22
|
|
24
23
|
# Saves the config to disk. If the file doesn't exist yet, a new one will be
|
25
24
|
# created
|
26
25
|
def save
|
27
|
-
File.
|
26
|
+
File.write(file_name, config.to_yaml)
|
28
27
|
end
|
29
28
|
|
30
29
|
# Deletes a key
|
@@ -39,7 +38,7 @@ module Unipept
|
|
39
38
|
|
40
39
|
# forwards =[] to the internal config hash
|
41
40
|
def []=(*args)
|
42
|
-
config.[]=(*args)
|
41
|
+
config.[]=(*args) # rubocop:disable Layout/SpaceBeforeBrackets
|
43
42
|
end
|
44
43
|
end
|
45
44
|
end
|
data/lib/formatters.rb
CHANGED
@@ -156,7 +156,7 @@ module Unipept
|
|
156
156
|
# @return [String] The converted input data in the JSON format
|
157
157
|
def convert(data, first)
|
158
158
|
output = data.map(&:to_json).join(',')
|
159
|
-
first ? output :
|
159
|
+
first ? output : ",#{output}"
|
160
160
|
end
|
161
161
|
end
|
162
162
|
|
@@ -169,20 +169,7 @@ module Unipept
|
|
169
169
|
'csv'
|
170
170
|
end
|
171
171
|
|
172
|
-
|
173
|
-
# contains all the keys of the first element of the data, preceded by
|
174
|
-
# 'fasta_header' if a fasta_mapper is given.
|
175
|
-
#
|
176
|
-
# @param [Array] data The data that we will use to extract the keys from.
|
177
|
-
#
|
178
|
-
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
179
|
-
# data and corresponding fasta header. The data is represented as a list
|
180
|
-
# containing tuples where the first element is the fasta header and second
|
181
|
-
# element is the input data If a fasta_mapper is given, the output will be
|
182
|
-
# preceded with 'fasta_header'.
|
183
|
-
#
|
184
|
-
# @return [String] The header row
|
185
|
-
def header(data, fasta_mapper = nil)
|
172
|
+
def get_keys(data, fasta_mapper = nil)
|
186
173
|
# This global variable is necessary because we need to know how many items should be
|
187
174
|
# nil in the convert function.
|
188
175
|
$keys_length = 0 # rubocop:disable Style/GlobalVars
|
@@ -191,28 +178,47 @@ module Unipept
|
|
191
178
|
|
192
179
|
# First we look for items for both ec numbers, go terms and ipr codes that are fully filled in.
|
193
180
|
data.each do |row|
|
194
|
-
non_empty_items.
|
181
|
+
non_empty_items.each_key do |annotation_type|
|
195
182
|
non_empty_items[annotation_type] = row if row[annotation_type] && !row[annotation_type].empty?
|
196
183
|
end
|
197
184
|
end
|
198
185
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
processed_keys = keys
|
186
|
+
keys = fasta_mapper ? ['fasta_header'] : []
|
187
|
+
keys += (data.first.keys - %w[ec go ipr])
|
188
|
+
processed_keys = keys
|
203
189
|
|
204
|
-
|
205
|
-
|
190
|
+
non_empty_items.each do |annotation_type, non_empty_item|
|
191
|
+
next unless non_empty_item
|
206
192
|
|
207
|
-
|
208
|
-
|
193
|
+
keys += (non_empty_item.keys - processed_keys)
|
194
|
+
processed_keys += non_empty_item.keys
|
209
195
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
196
|
+
idx = keys.index(annotation_type)
|
197
|
+
keys.delete_at(idx)
|
198
|
+
keys.insert(idx, *non_empty_item[annotation_type].first.keys.map { |el| %w[ec_number go_term ipr_code].include?(el) ? el : "#{annotation_type}_#{el}" })
|
199
|
+
$keys_length = *non_empty_item[annotation_type].first.keys.length # rubocop:disable Style/GlobalVars
|
200
|
+
end
|
215
201
|
|
202
|
+
keys
|
203
|
+
end
|
204
|
+
|
205
|
+
# Returns the header row for the given data and fasta_mapper. This row
|
206
|
+
# contains all the keys of the first element of the data, preceded by
|
207
|
+
# 'fasta_header' if a fasta_mapper is given.
|
208
|
+
#
|
209
|
+
# @param [Array] data The data that we will use to extract the keys from.
|
210
|
+
#
|
211
|
+
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
212
|
+
# data and corresponding fasta header. The data is represented as a list
|
213
|
+
# containing tuples where the first element is the fasta header and second
|
214
|
+
# element is the input data If a fasta_mapper is given, the output will be
|
215
|
+
# preceded with 'fasta_header'.
|
216
|
+
#
|
217
|
+
# @return [String] The header row
|
218
|
+
def header(data, fasta_mapper = nil)
|
219
|
+
keys = get_keys(data, fasta_mapper)
|
220
|
+
|
221
|
+
CSV.generate do |csv|
|
216
222
|
csv << keys.map(&:to_s) if keys.length.positive?
|
217
223
|
end
|
218
224
|
end
|
@@ -229,23 +235,25 @@ module Unipept
|
|
229
235
|
#
|
230
236
|
# @return [String] The converted input data in the CSV format
|
231
237
|
def convert(data, _first)
|
238
|
+
keys = get_keys(data)
|
239
|
+
|
232
240
|
CSV.generate do |csv|
|
233
241
|
data.each do |o|
|
234
|
-
row =
|
242
|
+
row = {}
|
235
243
|
o.each do |k, v|
|
236
244
|
if %w[ec go ipr].include? k
|
237
245
|
if v && !v.empty?
|
238
|
-
v.first.
|
239
|
-
row
|
246
|
+
v.first.each_key do |key|
|
247
|
+
row[key == 'protein_count' ? "#{k}_protein_count" : key] = (v.map { |el| el[key] }).join(' ').strip
|
240
248
|
end
|
241
249
|
else
|
242
|
-
row = row.concat(Array.new($keys_length[0], nil)) # rubocop:disable Style/GlobalVars
|
250
|
+
row[k] = row.concat(Array.new($keys_length[0], nil)) # rubocop:disable Style/GlobalVars
|
243
251
|
end
|
244
252
|
else
|
245
|
-
row
|
253
|
+
row[k] = (v == '' ? nil : v)
|
246
254
|
end
|
247
255
|
end
|
248
|
-
csv << row
|
256
|
+
csv << keys.map { |k| row[k] }
|
249
257
|
end
|
250
258
|
end
|
251
259
|
end
|
@@ -296,7 +304,7 @@ module Unipept
|
|
296
304
|
#
|
297
305
|
# @return [String] The converted input data in the XML format
|
298
306
|
def convert(data, _first)
|
299
|
-
data.map { |row|
|
307
|
+
data.map { |row| "<result>#{row.to_xml}</result>" }.join
|
300
308
|
end
|
301
309
|
end
|
302
310
|
|
@@ -336,4 +344,68 @@ module Unipept
|
|
336
344
|
.join
|
337
345
|
end
|
338
346
|
end
|
347
|
+
|
348
|
+
class HtmlFormatter < Formatter
|
349
|
+
register :html
|
350
|
+
|
351
|
+
# @return [String] The type of the current formatter: html
|
352
|
+
def type
|
353
|
+
'html'
|
354
|
+
end
|
355
|
+
|
356
|
+
def self.hidden?
|
357
|
+
false
|
358
|
+
end
|
359
|
+
|
360
|
+
def header(_data, _fasta_mapper = nil)
|
361
|
+
''
|
362
|
+
end
|
363
|
+
|
364
|
+
def footer
|
365
|
+
''
|
366
|
+
end
|
367
|
+
|
368
|
+
# Converts the given input data to an HTML page that contains the Unipept visualizations
|
369
|
+
#
|
370
|
+
# @param [Array] data The data we wish to convert
|
371
|
+
#
|
372
|
+
# @param [Boolean] Is this the first output batch?
|
373
|
+
#
|
374
|
+
# @return [String] The converted input data in the Blast format
|
375
|
+
def convert(data, _first)
|
376
|
+
data
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
class UrlFormatter < Formatter
|
381
|
+
register :url
|
382
|
+
|
383
|
+
# @return [String] The type of the current formatter: html
|
384
|
+
def type
|
385
|
+
'url'
|
386
|
+
end
|
387
|
+
|
388
|
+
def self.hidden?
|
389
|
+
false
|
390
|
+
end
|
391
|
+
|
392
|
+
def header(_data, _fasta_mapper = nil)
|
393
|
+
''
|
394
|
+
end
|
395
|
+
|
396
|
+
def footer
|
397
|
+
''
|
398
|
+
end
|
399
|
+
|
400
|
+
# Converts the given input data to an HTML page that contains the Unipept visualizations
|
401
|
+
#
|
402
|
+
# @param [Array] data The data we wish to convert
|
403
|
+
#
|
404
|
+
# @param [Boolean] Is this the first output batch?
|
405
|
+
#
|
406
|
+
# @return [String] The converted input data in the Blast format
|
407
|
+
def convert(data, _first)
|
408
|
+
"#{data[0]['gist'].sub!('https://gist.github.com/', 'https://bl.ocks.org/')}\n"
|
409
|
+
end
|
410
|
+
end
|
339
411
|
end
|
data/lib/server_message.rb
CHANGED
@@ -4,9 +4,7 @@ require_relative 'configuration'
|
|
4
4
|
|
5
5
|
module Unipept
|
6
6
|
class ServerMessage
|
7
|
-
attr_reader :message_url
|
8
|
-
|
9
|
-
attr_reader :configuration
|
7
|
+
attr_reader :message_url, :configuration
|
10
8
|
|
11
9
|
def initialize(host)
|
12
10
|
@message_url = "#{host}/api/v1/messages.json"
|
@@ -34,7 +32,7 @@ module Unipept
|
|
34
32
|
# ago.
|
35
33
|
def recently_fetched?
|
36
34
|
last_fetched = @configuration['last_fetch_date']
|
37
|
-
!last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
|
35
|
+
!last_fetched.nil? && (last_fetched + (60 * 60 * 24)) > Time.now
|
38
36
|
end
|
39
37
|
|
40
38
|
# Updates the last checked timestamp
|
@@ -67,8 +67,8 @@ module Unipept
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def test_input_iterator_file
|
70
|
-
File.
|
71
|
-
runner = new_runner('test',
|
70
|
+
File.write('input_file', %w[a b c].join("\n"))
|
71
|
+
runner = new_runner('test', host: 'https://param_host', input: 'input_file')
|
72
72
|
output = []
|
73
73
|
runner.input_iterator.each { |el| output << el.chomp }
|
74
74
|
assert_equal(%w[a b c], output)
|
@@ -84,7 +84,7 @@ module Unipept
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def test_input_iterator_arguments_priority
|
87
|
-
File.
|
87
|
+
File.write('input_file', %w[1 2 3].join("\n"))
|
88
88
|
runner = new_runner('test', { host: 'https://param_host', input: 'input_file' }, %w[a b c])
|
89
89
|
output = []
|
90
90
|
_out, _err = capture_io_with_input(%w[1 2 3]) do
|
@@ -94,8 +94,8 @@ module Unipept
|
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_input_iterator_file_priority
|
97
|
-
File.
|
98
|
-
runner = new_runner('test',
|
97
|
+
File.write('input_file', %w[a b c].join("\n"))
|
98
|
+
runner = new_runner('test', host: 'https://param_host', input: 'input_file')
|
99
99
|
output = []
|
100
100
|
_out, _err = capture_io_with_input(%w[1 2 3]) do
|
101
101
|
runner.input_iterator.each { |el| output << el.chomp }
|
@@ -280,7 +280,7 @@ module Unipept
|
|
280
280
|
runner.save_error('error message')
|
281
281
|
end
|
282
282
|
assert(err.start_with?('API request failed! log can be found in'))
|
283
|
-
assert_equal('error message',
|
283
|
+
assert_equal('error message', File.foreach('errordir/error.log').next.chomp)
|
284
284
|
end
|
285
285
|
end
|
286
286
|
|
@@ -319,7 +319,7 @@ module Unipept
|
|
319
319
|
end
|
320
320
|
|
321
321
|
def test_success_no_header_option_handle_response
|
322
|
-
runner = new_runner('test',
|
322
|
+
runner = new_runner('test', { host: 'test', 'no-header': true })
|
323
323
|
response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
|
324
324
|
lambda = runner.handle_response(response, 0, nil)
|
325
325
|
assert(lambda.lambda?)
|
@@ -409,6 +409,7 @@ module Unipept
|
|
409
409
|
def new_response(values)
|
410
410
|
response = Class.new do
|
411
411
|
def initialize(values)
|
412
|
+
super()
|
412
413
|
@values = values
|
413
414
|
end
|
414
415
|
|
@@ -44,7 +44,7 @@ module Unipept
|
|
44
44
|
out, _err = capture_io_while do
|
45
45
|
Commands::Unipept.run(['config', 'test', value])
|
46
46
|
end
|
47
|
-
assert_equal(
|
47
|
+
assert_equal("test was set to #{value}", out.chomp)
|
48
48
|
assert_equal(value, Unipept::Configuration.new['test'])
|
49
49
|
end
|
50
50
|
|
@@ -51,7 +51,7 @@ module Unipept
|
|
51
51
|
lines = out.each_line
|
52
52
|
assert_equal('', err)
|
53
53
|
assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
|
54
|
-
assert(lines.next.start_with?('AALTER,
|
54
|
+
assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
55
55
|
assert_raises(StopIteration) { lines.next }
|
56
56
|
end
|
57
57
|
|
@@ -62,9 +62,9 @@ module Unipept
|
|
62
62
|
lines = out.each_line
|
63
63
|
assert_equal('', err)
|
64
64
|
assert(lines.next.start_with?('fasta_header,peptide,total_protein_count,ec_number,ec_protein_count'))
|
65
|
-
assert(lines.next.start_with?('>test,AALTER,
|
66
|
-
assert(lines.next.start_with?('>test,AALER,
|
67
|
-
assert(lines.next.start_with?('>tost,AALTER,
|
65
|
+
assert(lines.next.start_with?('>test,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
66
|
+
assert(lines.next.start_with?('>test,AALER,208,6.1.1.16 2.7.7.38,44 13'))
|
67
|
+
assert(lines.next.start_with?('>tost,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
68
68
|
assert_raises(StopIteration) { lines.next }
|
69
69
|
end
|
70
70
|
|
@@ -75,9 +75,9 @@ module Unipept
|
|
75
75
|
lines = out.each_line
|
76
76
|
assert_equal('', err)
|
77
77
|
assert(lines.next.start_with?('fasta_header,peptide,ec_number'))
|
78
|
-
assert(lines.next.start_with?('>test,AALTER,
|
79
|
-
assert(lines.next.start_with?('>test,AALER,
|
80
|
-
assert(lines.next.start_with?('>tost,AALTER,
|
78
|
+
assert(lines.next.start_with?('>test,AALTER,3.1.3.3 6.3.2.13'))
|
79
|
+
assert(lines.next.start_with?('>test,AALER,6.1.1.16 2.7.7.38'))
|
80
|
+
assert(lines.next.start_with?('>tost,AALTER,3.1.3.3 6.3.2.13'))
|
81
81
|
assert_raises(StopIteration) { lines.next }
|
82
82
|
end
|
83
83
|
|
@@ -87,7 +87,7 @@ module Unipept
|
|
87
87
|
end
|
88
88
|
lines = out.each_line
|
89
89
|
assert_equal('', err)
|
90
|
-
output = lines.to_a.join
|
90
|
+
output = lines.to_a.join.chomp
|
91
91
|
assert(output.start_with?('['))
|
92
92
|
assert(output.end_with?(']'))
|
93
93
|
assert(!output.include?('}{'))
|
@@ -100,7 +100,7 @@ module Unipept
|
|
100
100
|
end
|
101
101
|
lines = out.each_line
|
102
102
|
assert_equal('', err)
|
103
|
-
output = lines.to_a.join
|
103
|
+
output = lines.to_a.join.chomp
|
104
104
|
assert(output.start_with?('<results>'))
|
105
105
|
assert(output.end_with?('</results>'))
|
106
106
|
assert(output.include?('<fasta_header>'))
|
@@ -122,7 +122,7 @@ module Unipept
|
|
122
122
|
lines = out.each_line
|
123
123
|
assert_equal('', err)
|
124
124
|
assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
|
125
|
-
assert(lines.next.start_with?('AALTER,
|
125
|
+
assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
|
126
126
|
assert_raises(StopIteration) { lines.next }
|
127
127
|
end
|
128
128
|
|
@@ -133,7 +133,7 @@ module Unipept
|
|
133
133
|
lines = out.each_line
|
134
134
|
assert_equal('', err)
|
135
135
|
assert(lines.next.start_with?('peptide,total_protein_count'))
|
136
|
-
assert(lines.next.start_with?('MDGTEYIIVK,
|
136
|
+
assert(lines.next.start_with?('MDGTEYIIVK,35'))
|
137
137
|
assert_raises(StopIteration) { lines.next }
|
138
138
|
end
|
139
139
|
end
|