unipept 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +55 -0
- data/.rubocop.yml +6 -2
- data/.ruby-version +1 -1
- data/Gemfile +5 -8
- data/Gemfile.lock +28 -50
- data/README.md +1 -4
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/lib/batch_iterator.rb +1 -1
- data/lib/commands/unipept.rb +29 -3
- data/lib/commands/unipept/api_runner.rb +64 -0
- data/lib/commands/unipept/pept2interpro.rb +16 -0
- data/lib/formatters.rb +30 -10
- data/test/commands/unipept/test_pept2ec.rb +140 -0
- data/test/commands/unipept/test_pept2funct.rb +140 -0
- data/test/commands/unipept/test_pept2go.rb +140 -0
- data/test/commands/unipept/test_pept2interpro.rb +140 -0
- data/test/commands/unipept/test_peptinfo.rb +140 -0
- data/test/helper.rb +0 -2
- data/unipept.gemspec +27 -24
- metadata +23 -29
- data/.travis.yml +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4e8010e58ccf55d028165182cb5ec241ce7501940d6cf3e80405660f9249076
|
4
|
+
data.tar.gz: 3d21f11d6b8b06ad60f781cb83d8e38a33c1d431854064bc1a69d072c7107559
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af8d1bb017251e5bec0fb57217f26ae5c26e291c1e6e7cdddf62a248489539c023ecf3dc234d5126e992097bcf82b52e563bc908b0b8285a4d88326f142a3dcd
|
7
|
+
data.tar.gz: 25a8ef260a1de7a26d994912a90f73f807de1b4b7f53ca7963fe3ee3270324139de8353b61ba794b6d2bcc4ac3353d5f9f31827975b07edcb93d08527c771706
|
@@ -0,0 +1,55 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
strategy:
|
9
|
+
matrix:
|
10
|
+
ruby: ["2.4", "2.5", "2.6"]
|
11
|
+
fail-fast: false
|
12
|
+
name: Test Ruby ${{ matrix.ruby }}
|
13
|
+
steps:
|
14
|
+
- uses: actions/checkout@v1
|
15
|
+
- name: Setup Ruby
|
16
|
+
uses: actions/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: ${{ matrix.ruby }}
|
19
|
+
- name: Cache Ruby dependencies
|
20
|
+
uses: actions/cache@v1
|
21
|
+
with:
|
22
|
+
path: vendor/bundle
|
23
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gem-${{ hashFiles('**/Gemfile.lock') }}
|
24
|
+
restore-keys: |
|
25
|
+
${{ runner.os }}-${{ matrix.ruby }}-gem-
|
26
|
+
- name: Install dependencies
|
27
|
+
run: |
|
28
|
+
gem install bundler:1.17.2
|
29
|
+
bundle config path vendor/bundle
|
30
|
+
bundle install --jobs 4 --retry 3
|
31
|
+
- name: Test
|
32
|
+
run: |
|
33
|
+
bundle exec rake test
|
34
|
+
lint:
|
35
|
+
name: Lint
|
36
|
+
runs-on: ubuntu-latest
|
37
|
+
steps:
|
38
|
+
- uses: actions/checkout@v1
|
39
|
+
- name: Setup Ruby
|
40
|
+
uses: actions/setup-ruby@v1
|
41
|
+
- name: Cache Ruby dependencies
|
42
|
+
uses: actions/cache@v1
|
43
|
+
with:
|
44
|
+
path: vendor/bundle
|
45
|
+
key: ${{ runner.os }}-${{ matrix.ruby }}-gem-${{ hashFiles('**/Gemfile.lock') }}
|
46
|
+
restore-keys: |
|
47
|
+
${{ runner.os }}-${{ matrix.ruby }}-gem-
|
48
|
+
- name: Install dependencies
|
49
|
+
run: |
|
50
|
+
gem install bundler:1.17.2
|
51
|
+
bundle config path vendor/bundle
|
52
|
+
bundle install --jobs 4 --retry 3
|
53
|
+
- name: Test
|
54
|
+
run: |
|
55
|
+
bundle exec rake test_style
|
data/.rubocop.yml
CHANGED
@@ -23,13 +23,17 @@ Metrics/AbcSize:
|
|
23
23
|
Enabled: false
|
24
24
|
Metrics/ClassLength:
|
25
25
|
Enabled: false
|
26
|
-
|
26
|
+
Layout/LineLength:
|
27
27
|
Enabled: false
|
28
28
|
Metrics/MethodLength:
|
29
29
|
Enabled: false
|
30
30
|
Metrics/BlockLength:
|
31
31
|
Enabled: false
|
32
|
+
Metrics/CyclomaticComplexity:
|
33
|
+
Enabled: false
|
34
|
+
Metrics/PerceivedComplexity:
|
35
|
+
Enabled: false
|
32
36
|
Naming/HeredocDelimiterNaming:
|
33
37
|
Enabled: false
|
34
|
-
Naming/
|
38
|
+
Naming/MethodParameterName:
|
35
39
|
Enabled: false
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.6
|
1
|
+
2.4.6
|
data/Gemfile
CHANGED
@@ -1,16 +1,13 @@
|
|
1
1
|
source 'http://rubygems.org'
|
2
2
|
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
5
3
|
|
6
|
-
gem 'cri', '~>2.15.
|
7
|
-
gem 'typhoeus', '~> 1.1'
|
4
|
+
gem 'cri', '~>2.15.10'
|
5
|
+
gem 'typhoeus', '~> 1.3.1'
|
8
6
|
|
9
7
|
group :development do
|
10
|
-
gem '
|
11
|
-
gem '
|
12
|
-
gem '
|
13
|
-
gem 'rubocop', '~>0.32'
|
8
|
+
gem 'minitest', '~> 5.14'
|
9
|
+
gem 'rake', '~> 13.0.1'
|
10
|
+
gem 'rubocop', '~>0.79.0'
|
14
11
|
end
|
15
12
|
|
16
13
|
group :release do
|
data/Gemfile.lock
CHANGED
@@ -3,24 +3,15 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
addressable (2.4.0)
|
5
5
|
ast (2.4.0)
|
6
|
-
builder (3.2.
|
7
|
-
|
8
|
-
coveralls (0.8.22)
|
9
|
-
json (>= 1.8, < 3)
|
10
|
-
simplecov (~> 0.16.1)
|
11
|
-
term-ansicolor (~> 1.3)
|
12
|
-
thor (~> 0.19.4)
|
13
|
-
tins (~> 1.6)
|
14
|
-
cri (2.15.3)
|
15
|
-
colored (~> 1.2)
|
6
|
+
builder (3.2.4)
|
7
|
+
cri (2.15.10)
|
16
8
|
descendants_tracker (0.0.4)
|
17
9
|
thread_safe (~> 0.3, >= 0.3.1)
|
18
|
-
docile (1.3.1)
|
19
10
|
ethon (0.12.0)
|
20
11
|
ffi (>= 1.3.0)
|
21
12
|
faraday (0.9.2)
|
22
13
|
multipart-post (>= 1.2, < 3)
|
23
|
-
ffi (1.
|
14
|
+
ffi (1.12.1)
|
24
15
|
git (1.5.0)
|
25
16
|
github_api (0.16.0)
|
26
17
|
addressable (~> 2.4.0)
|
@@ -29,9 +20,9 @@ GEM
|
|
29
20
|
hashie (>= 3.4)
|
30
21
|
mime-types (>= 1.16, < 3.0)
|
31
22
|
oauth2 (~> 1.0)
|
32
|
-
hashie (
|
33
|
-
highline (2.0.
|
34
|
-
jaro_winkler (1.5.
|
23
|
+
hashie (4.0.0)
|
24
|
+
highline (2.0.3)
|
25
|
+
jaro_winkler (1.5.4)
|
35
26
|
jeweler (2.3.9)
|
36
27
|
builder
|
37
28
|
bundler
|
@@ -43,66 +34,53 @@ GEM
|
|
43
34
|
rake
|
44
35
|
rdoc
|
45
36
|
semver2
|
46
|
-
|
47
|
-
jwt (2.1.0)
|
37
|
+
jwt (2.2.1)
|
48
38
|
mime-types (2.99.3)
|
49
39
|
mini_portile2 (2.4.0)
|
50
|
-
minitest (5.
|
51
|
-
multi_json (1.
|
40
|
+
minitest (5.14.0)
|
41
|
+
multi_json (1.14.1)
|
52
42
|
multi_xml (0.6.0)
|
53
|
-
multipart-post (2.
|
54
|
-
nokogiri (1.10.
|
43
|
+
multipart-post (2.1.1)
|
44
|
+
nokogiri (1.10.7)
|
55
45
|
mini_portile2 (~> 2.4.0)
|
56
|
-
oauth2 (1.4.
|
57
|
-
faraday (>= 0.8, <
|
46
|
+
oauth2 (1.4.2)
|
47
|
+
faraday (>= 0.8, < 2.0)
|
58
48
|
jwt (>= 1.0, < 3.0)
|
59
49
|
multi_json (~> 1.3)
|
60
50
|
multi_xml (~> 0.5)
|
61
51
|
rack (>= 1.2, < 3)
|
62
|
-
parallel (1.
|
63
|
-
parser (2.
|
52
|
+
parallel (1.19.1)
|
53
|
+
parser (2.7.0.2)
|
64
54
|
ast (~> 2.4.0)
|
65
|
-
powerpack (0.1.2)
|
66
55
|
psych (3.1.0)
|
67
|
-
rack (2.
|
56
|
+
rack (2.1.1)
|
68
57
|
rainbow (3.0.0)
|
69
|
-
rake (
|
70
|
-
rdoc (6.
|
71
|
-
rubocop (0.
|
58
|
+
rake (13.0.1)
|
59
|
+
rdoc (6.2.1)
|
60
|
+
rubocop (0.79.0)
|
72
61
|
jaro_winkler (~> 1.5.1)
|
73
62
|
parallel (~> 1.10)
|
74
|
-
parser (>= 2.
|
75
|
-
powerpack (~> 0.1)
|
63
|
+
parser (>= 2.7.0.1)
|
76
64
|
rainbow (>= 2.2.2, < 4.0)
|
77
65
|
ruby-progressbar (~> 1.7)
|
78
|
-
unicode-display_width (
|
79
|
-
ruby-progressbar (1.10.
|
66
|
+
unicode-display_width (>= 1.4.0, < 1.7)
|
67
|
+
ruby-progressbar (1.10.1)
|
80
68
|
semver2 (3.4.2)
|
81
|
-
simplecov (0.16.1)
|
82
|
-
docile (~> 1.1)
|
83
|
-
json (>= 1.8, < 3)
|
84
|
-
simplecov-html (~> 0.10.0)
|
85
|
-
simplecov-html (0.10.2)
|
86
|
-
term-ansicolor (1.7.1)
|
87
|
-
tins (~> 1.0)
|
88
|
-
thor (0.19.4)
|
89
69
|
thread_safe (0.3.6)
|
90
|
-
tins (1.20.2)
|
91
70
|
typhoeus (1.3.1)
|
92
71
|
ethon (>= 0.9.0)
|
93
|
-
unicode-display_width (1.
|
72
|
+
unicode-display_width (1.6.1)
|
94
73
|
|
95
74
|
PLATFORMS
|
96
75
|
ruby
|
97
76
|
|
98
77
|
DEPENDENCIES
|
99
|
-
|
100
|
-
cri (~> 2.15.3)
|
78
|
+
cri (~> 2.15.10)
|
101
79
|
jeweler
|
102
|
-
minitest (~> 5.
|
103
|
-
rake (~>
|
104
|
-
rubocop (~> 0.
|
105
|
-
typhoeus (~> 1.1)
|
80
|
+
minitest (~> 5.14)
|
81
|
+
rake (~> 13.0.1)
|
82
|
+
rubocop (~> 0.79.0)
|
83
|
+
typhoeus (~> 1.3.1)
|
106
84
|
|
107
85
|
BUNDLED WITH
|
108
86
|
1.17.2
|
data/README.md
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
# unipept-cli
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/unipept.svg)](http://badge.fury.io/rb/unipept)
|
4
|
-
[![Build Status](https://api.travis-ci.org/unipept/unipept-cli.svg)](https://travis-ci.org/unipept/unipept-cli)
|
5
|
-
[![Coverage Status](https://coveralls.io/repos/unipept/unipept-cli/badge.svg)](https://coveralls.io/r/unipept/unipept-cli)
|
6
|
-
[![Code Climate](https://codeclimate.com/github/unipept/unipept-cli/badges/gpa.svg)](https://codeclimate.com/github/unipept/unipept-cli)
|
7
4
|
|
8
5
|
Unipept-cli offers a command line interface to the [Unipept](http://unipept.ugent.be) web service.
|
9
6
|
Documentation about the web service can be found at [http://unipept.ugent.be/apidocs](http://unipept.ugent.be/apidocs), documentation about the command line tools at [http://unipept.ugent.be/clidocs](http://unipept.ugent.be/clidocs).
|
@@ -19,7 +16,7 @@ ruby 2.2.2p95 (2015-04-13 revision 50295) [x86_64-darwin14]
|
|
19
16
|
|
20
17
|
More information on installing Ruby can be found at https://www.ruby-lang.org/en/installation/
|
21
18
|
|
22
|
-
The Unipept CLI is available as a
|
19
|
+
The Unipept CLI is available as a _gem_. This means it can easily be installed with the following command:
|
23
20
|
|
24
21
|
```bash
|
25
22
|
$ gem install unipept
|
data/Rakefile
CHANGED
@@ -25,7 +25,7 @@ begin
|
|
25
25
|
commands for handling proteins using the command line.
|
26
26
|
EOS
|
27
27
|
gem.email = 'unipept@ugent.be'
|
28
|
-
gem.authors = ['
|
28
|
+
gem.authors = ['Bart Mesuere', 'Pieter Verschaffelt', 'Toon Willems', 'Tom Naessens']
|
29
29
|
gem.required_ruby_version = '>= 2.0.0'
|
30
30
|
end
|
31
31
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -43,7 +43,7 @@ end
|
|
43
43
|
|
44
44
|
RuboCop::RakeTask.new(:test_style)
|
45
45
|
|
46
|
-
task test: %i[test_unit
|
46
|
+
task test: %i[test_unit]
|
47
47
|
|
48
48
|
task default: :test
|
49
49
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.1.0
|
data/lib/batch_iterator.rb
CHANGED
data/lib/commands/unipept.rb
CHANGED
@@ -12,6 +12,7 @@ require_relative 'unipept/config'
|
|
12
12
|
require_relative 'unipept/pept2ec'
|
13
13
|
require_relative 'unipept/pept2funct'
|
14
14
|
require_relative 'unipept/pept2go'
|
15
|
+
require_relative 'unipept/pept2interpro'
|
15
16
|
require_relative 'unipept/pept2lca'
|
16
17
|
require_relative 'unipept/pept2prot'
|
17
18
|
require_relative 'unipept/pept2taxa'
|
@@ -28,6 +29,7 @@ module Unipept
|
|
28
29
|
add_pept2ec_command
|
29
30
|
add_pept2funct_command
|
30
31
|
add_pept2go_command
|
32
|
+
add_pept2interpro_command
|
31
33
|
add_pept2lca_command
|
32
34
|
add_peptinfo_command
|
33
35
|
add_taxa2lca_command
|
@@ -149,8 +151,8 @@ module Unipept
|
|
149
151
|
|
150
152
|
def add_pept2funct_command
|
151
153
|
@root_command.define_command('pept2funct') do
|
152
|
-
usage '
|
153
|
-
summary 'Fetch EC numbers
|
154
|
+
usage 'pept2funct[options]'
|
155
|
+
summary 'Fetch EC numbers, GO terms and InterPro codes of UniProt entries that match tryptic peptides.'
|
154
156
|
description <<-EOS
|
155
157
|
For each tryptic peptide the unipept pept2funct command retrieves from Unipept the set of EC numbers and GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
|
156
158
|
|
@@ -164,7 +166,7 @@ module Unipept
|
|
164
166
|
EOS
|
165
167
|
|
166
168
|
flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
|
167
|
-
flag :a, :all, 'Also return the names of the EC numbers and
|
169
|
+
flag :a, :all, 'Also return the names of the EC numbers, GO terms and InterPro codes. Note that this may have a performance penalty.'
|
168
170
|
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
169
171
|
|
170
172
|
runner Commands::Pept2funct
|
@@ -195,6 +197,30 @@ module Unipept
|
|
195
197
|
end
|
196
198
|
end
|
197
199
|
|
200
|
+
def add_pept2interpro_command
|
201
|
+
@root_command.define_command('pept2interpro') do
|
202
|
+
usage 'pept2interpro [options]'
|
203
|
+
summary 'Fetch InterPro entries of UniProt entries that match tryptic peptides.'
|
204
|
+
description <<-EOS
|
205
|
+
For each tryptic peptide the unipept pept2interpro command retrieves from Unipept the set of InterPro entries from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
|
206
|
+
|
207
|
+
- as separate command line arguments
|
208
|
+
|
209
|
+
- in a text file that is passed as an argument to the -i option
|
210
|
+
|
211
|
+
- to standard input
|
212
|
+
|
213
|
+
The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
|
214
|
+
EOS
|
215
|
+
|
216
|
+
flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
|
217
|
+
flag :a, :all, 'Also return the names and types of the InterPro entries. Note that this may have a performance penalty.'
|
218
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
219
|
+
|
220
|
+
runner Commands::Pept2interpro
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
198
224
|
def add_pept2lca_command
|
199
225
|
@root_command.define_command('pept2lca') do
|
200
226
|
usage 'pept2lca [options]'
|
@@ -190,10 +190,74 @@ module Unipept
|
|
190
190
|
def filter_result(json_response)
|
191
191
|
result = JSON[json_response] rescue []
|
192
192
|
result = [result] unless result.is_a? Array
|
193
|
+
key_order = result.first.keys if result.first
|
194
|
+
result = flatten_functional_fields(result) if formatter.instance_of?(Unipept::CSVFormatter)
|
193
195
|
result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty?
|
196
|
+
result = inflate_functional_fields(result, key_order) if formatter.instance_of?(Unipept::CSVFormatter) && result.first
|
194
197
|
result
|
195
198
|
end
|
196
199
|
|
200
|
+
# Transforms the hierarchical input to something without hierarchy. All fields
|
201
|
+
# associated with functional annotations are transformed to a flat alternative.
|
202
|
+
# Example: {"go" => {"go_term": xxx, "protein_count": yyy}} --> {"go_term" => [xxx], "protein_count" => [yyy]}
|
203
|
+
def flatten_functional_fields(data)
|
204
|
+
output = []
|
205
|
+
data.each do |row|
|
206
|
+
output_row = {}
|
207
|
+
row.each do |k, v|
|
208
|
+
if %w[ec go ipr].include? k
|
209
|
+
v.each do |item|
|
210
|
+
item.each do |field_name, field_value|
|
211
|
+
new_field_name = %w[ec_number go_term ipr_code].include?(field_name) ? field_name : k + '_' + field_name
|
212
|
+
output_row[new_field_name] = [] unless output_row.key? new_field_name
|
213
|
+
output_row[new_field_name] << field_value
|
214
|
+
end
|
215
|
+
end
|
216
|
+
else
|
217
|
+
output_row[k] = v
|
218
|
+
end
|
219
|
+
end
|
220
|
+
output << output_row
|
221
|
+
end
|
222
|
+
output
|
223
|
+
end
|
224
|
+
|
225
|
+
# Transforms a flattened input created by flatten_functional_fields to the original
|
226
|
+
# hierarchy.
|
227
|
+
def inflate_functional_fields(data, original_key_order)
|
228
|
+
output = []
|
229
|
+
data.each do |row|
|
230
|
+
output_row = {}
|
231
|
+
|
232
|
+
processed_keys = []
|
233
|
+
original_key_order.each do |original_key|
|
234
|
+
if %w[ec go ipr].include? original_key
|
235
|
+
# First, we take all distinct keys that start with "ec", "go" or "ipr"
|
236
|
+
annotation_keys = row.keys.select { |key| key.start_with? original_key }
|
237
|
+
processed_keys += annotation_keys
|
238
|
+
unless annotation_keys.empty?
|
239
|
+
# Each of the values of the annotation_keys is an array. All respective values of each of
|
240
|
+
# these arrays need to be put together into one hash. (E.g. {a => [1, 2], b=> [x, y]} --> [{a: 1, b: x}, {a: 2, b: y}])
|
241
|
+
reconstructed_objects = []
|
242
|
+
(0..row[annotation_keys[0]].length).each do |i|
|
243
|
+
reconstructed_object = {}
|
244
|
+
annotation_keys.each do |annotation_key|
|
245
|
+
reconstructed_object[%w[ec_number go_term ipr_code].include?(annotation_key) ? annotation_key : annotation_key[annotation_key.index('_') + 1, annotation_key.length]] = row[annotation_key][i]
|
246
|
+
end
|
247
|
+
reconstructed_objects << reconstructed_object
|
248
|
+
end
|
249
|
+
output_row[original_key] = reconstructed_objects
|
250
|
+
end
|
251
|
+
elsif row.key? original_key
|
252
|
+
output_row[original_key] = row[original_key]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
output << output_row
|
257
|
+
end
|
258
|
+
output
|
259
|
+
end
|
260
|
+
|
197
261
|
def glob_to_regex(string)
|
198
262
|
/^#{string.gsub('*', '.*')}$/
|
199
263
|
end
|