unipept 2.1.1 → 2.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ci.yml +6 -27
  4. data/.rakeTasks +7 -0
  5. data/.rubocop.yml +2 -0
  6. data/.ruby-version +1 -1
  7. data/CITATION.cff +30 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +60 -41
  10. data/README.md +2 -2
  11. data/Rakefile +4 -4
  12. data/VERSION +1 -1
  13. data/lib/batch_iterator.rb +16 -0
  14. data/lib/commands/prot2pept.rb +1 -2
  15. data/lib/commands/unipept/api_runner.rb +10 -12
  16. data/lib/commands/unipept/config.rb +1 -1
  17. data/lib/commands/unipept/pept2taxa.rb +1 -5
  18. data/lib/commands/unipept/taxa2tree.rb +74 -0
  19. data/lib/commands/unipept.rb +25 -1
  20. data/lib/commands/uniprot.rb +4 -5
  21. data/lib/configuration.rb +6 -7
  22. data/lib/formatters.rb +108 -36
  23. data/lib/server_message.rb +2 -4
  24. data/test/commands/unipept/test_api_runner.rb +8 -7
  25. data/test/commands/unipept/test_config.rb +1 -1
  26. data/test/commands/unipept/test_pept2ec.rb +11 -11
  27. data/test/commands/unipept/test_pept2funct.rb +15 -15
  28. data/test/commands/unipept/test_pept2go.rb +10 -10
  29. data/test/commands/unipept/test_pept2interpro.rb +26 -26
  30. data/test/commands/unipept/test_pept2lca.rb +2 -2
  31. data/test/commands/unipept/test_pept2prot.rb +2 -2
  32. data/test/commands/unipept/test_pept2taxa.rb +2 -4
  33. data/test/commands/unipept/test_peptinfo.rb +16 -16
  34. data/test/commands/unipept/test_taxa2lca.rb +2 -2
  35. data/test/commands/unipept/test_taxa2tree.rb +68 -0
  36. data/test/commands/unipept/test_taxonomy.rb +2 -2
  37. data/test/helper.rb +10 -0
  38. data/test/support/api_stub.rb +60 -0
  39. data/test/support/resources/pept2ec.json +55 -0
  40. data/test/support/resources/pept2funct.json +73 -0
  41. data/test/support/resources/pept2go.json +43 -0
  42. data/test/support/resources/pept2interpro.json +43 -0
  43. data/test/support/resources/pept2lca.json +14 -0
  44. data/test/support/resources/pept2prot.json +422 -0
  45. data/test/support/resources/pept2taxa.json +194 -0
  46. data/test/support/resources/peptinfo.json +70 -0
  47. data/test/support/resources/taxa2tree.json +194 -0
  48. data/test/support/resources/taxonomy.json +22 -0
  49. data/test/test_configuration.rb +1 -1
  50. data/test/test_formatters.rb +5 -5
  51. data/test/test_output_writer.rb +1 -1
  52. data/test/test_server_message.rb +2 -2
  53. data/test.taxa +4 -0
  54. data/unipept.gemspec +32 -21
  55. metadata +26 -9
@@ -0,0 +1,194 @@
1
+ [
2
+ {
3
+ "peptide": "ENFVYIAK",
4
+ "taxon_id": 2711,
5
+ "taxon_name": "Citrus sinensis",
6
+ "taxon_rank": "species"
7
+ },
8
+ {
9
+ "peptide": "ENFVYIAK",
10
+ "taxon_id": 3818,
11
+ "taxon_name": "Arachis hypogaea",
12
+ "taxon_rank": "species"
13
+ },
14
+ {
15
+ "peptide": "ENFVYIAK",
16
+ "taxon_id": 3821,
17
+ "taxon_name": "Cajanus cajan",
18
+ "taxon_rank": "species"
19
+ },
20
+ {
21
+ "peptide": "ENFVYIAK",
22
+ "taxon_id": 3827,
23
+ "taxon_name": "Cicer arietinum",
24
+ "taxon_rank": "species"
25
+ },
26
+ {
27
+ "peptide": "ENFVYIAK",
28
+ "taxon_id": 3847,
29
+ "taxon_name": "Glycine max",
30
+ "taxon_rank": "species"
31
+ },
32
+ {
33
+ "peptide": "ENFVYIAK",
34
+ "taxon_id": 3848,
35
+ "taxon_name": "Glycine soja",
36
+ "taxon_rank": "species"
37
+ },
38
+ {
39
+ "peptide": "ENFVYIAK",
40
+ "taxon_id": 3880,
41
+ "taxon_name": "Medicago truncatula",
42
+ "taxon_rank": "species"
43
+ },
44
+ {
45
+ "peptide": "ENFVYIAK",
46
+ "taxon_id": 3885,
47
+ "taxon_name": "Phaseolus vulgaris",
48
+ "taxon_rank": "species"
49
+ },
50
+ {
51
+ "peptide": "ENFVYIAK",
52
+ "taxon_id": 3900,
53
+ "taxon_name": "Trifolium subterraneum",
54
+ "taxon_rank": "species"
55
+ },
56
+ {
57
+ "peptide": "ENFVYIAK",
58
+ "taxon_id": 3906,
59
+ "taxon_name": "Vicia faba",
60
+ "taxon_rank": "species"
61
+ },
62
+ {
63
+ "peptide": "ENFVYIAK",
64
+ "taxon_id": 4096,
65
+ "taxon_name": "Nicotiana sylvestris",
66
+ "taxon_rank": "species"
67
+ },
68
+ {
69
+ "peptide": "ENFVYIAK",
70
+ "taxon_id": 4097,
71
+ "taxon_name": "Nicotiana tabacum",
72
+ "taxon_rank": "species"
73
+ },
74
+ {
75
+ "peptide": "ENFVYIAK",
76
+ "taxon_id": 16924,
77
+ "taxon_name": "Davidia involucrata",
78
+ "taxon_rank": "species"
79
+ },
80
+ {
81
+ "peptide": "ENFVYIAK",
82
+ "taxon_id": 28930,
83
+ "taxon_name": "Fagus sylvatica",
84
+ "taxon_rank": "species"
85
+ },
86
+ {
87
+ "peptide": "ENFVYIAK",
88
+ "taxon_id": 29760,
89
+ "taxon_name": "Vitis vinifera",
90
+ "taxon_rank": "species"
91
+ },
92
+ {
93
+ "peptide": "ENFVYIAK",
94
+ "taxon_id": 42345,
95
+ "taxon_name": "Phoenix dactylifera",
96
+ "taxon_rank": "species"
97
+ },
98
+ {
99
+ "peptide": "ENFVYIAK",
100
+ "taxon_id": 49451,
101
+ "taxon_name": "Nicotiana attenuata",
102
+ "taxon_rank": "species"
103
+ },
104
+ {
105
+ "peptide": "ENFVYIAK",
106
+ "taxon_id": 55188,
107
+ "taxon_name": "Citrus unshiu",
108
+ "taxon_rank": "species"
109
+ },
110
+ {
111
+ "peptide": "ENFVYIAK",
112
+ "taxon_id": 56857,
113
+ "taxon_name": "Macleaya cordata",
114
+ "taxon_rank": "species"
115
+ },
116
+ {
117
+ "peptide": "ENFVYIAK",
118
+ "taxon_id": 57577,
119
+ "taxon_name": "Trifolium pratense",
120
+ "taxon_rank": "species"
121
+ },
122
+ {
123
+ "peptide": "ENFVYIAK",
124
+ "taxon_id": 85681,
125
+ "taxon_name": "Citrus clementina",
126
+ "taxon_rank": "species"
127
+ },
128
+ {
129
+ "peptide": "ENFVYIAK",
130
+ "taxon_id": 214687,
131
+ "taxon_name": "Musa acuminata subsp. malaccensis",
132
+ "taxon_rank": "subspecies"
133
+ },
134
+ {
135
+ "peptide": "ENFVYIAK",
136
+ "taxon_id": 337451,
137
+ "taxon_name": "Cinnamomum micranthum f. kanehirae",
138
+ "taxon_rank": "forma"
139
+ },
140
+ {
141
+ "peptide": "ENFVYIAK",
142
+ "taxon_id": 561372,
143
+ "taxon_name": "Nyssa sinensis",
144
+ "taxon_rank": "species"
145
+ },
146
+ {
147
+ "peptide": "EGGAGSSTGQR",
148
+ "taxon_id": 5061,
149
+ "taxon_name": "Aspergillus niger",
150
+ "taxon_rank": "species"
151
+ },
152
+ {
153
+ "peptide": "EGGAGSSTGQR",
154
+ "taxon_id": 105351,
155
+ "taxon_name": "Aspergillus awamori",
156
+ "taxon_rank": "species"
157
+ },
158
+ {
159
+ "peptide": "EGGAGSSTGQR",
160
+ "taxon_id": 380704,
161
+ "taxon_name": "Aspergillus niger ATCC 1015",
162
+ "taxon_rank": "no rank"
163
+ },
164
+ {
165
+ "peptide": "EGGAGSSTGQR",
166
+ "taxon_id": 425011,
167
+ "taxon_name": "Aspergillus niger CBS 513.88",
168
+ "taxon_rank": "no rank"
169
+ },
170
+ {
171
+ "peptide": "EGGAGSSTGQR",
172
+ "taxon_id": 1341132,
173
+ "taxon_name": "Aspergillus welwitschiae",
174
+ "taxon_rank": "species"
175
+ },
176
+ {
177
+ "peptide": "EGGAGSSTGQR",
178
+ "taxon_id": 1353007,
179
+ "taxon_name": "Aspergillus phoenicis ATCC 13157",
180
+ "taxon_rank": "no rank"
181
+ },
182
+ {
183
+ "peptide": "EGGAGSSTGQR",
184
+ "taxon_id": 1353008,
185
+ "taxon_name": "Aspergillus niger ATCC 13496",
186
+ "taxon_rank": "no rank"
187
+ },
188
+ {
189
+ "peptide": "EGGAGSSTGQR",
190
+ "taxon_id": 1450533,
191
+ "taxon_name": "Aspergillus niger CBS 101883",
192
+ "taxon_rank": "no rank"
193
+ }
194
+ ]
@@ -0,0 +1,70 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ec": [
6
+ {
7
+ "ec_number": "3.1.3.3",
8
+ "protein_count": 2
9
+ }
10
+ ],
11
+ "go": [
12
+ {
13
+ "go_term": "GO:0000287",
14
+ "protein_count": 5
15
+ }
16
+ ],
17
+ "ipr": [
18
+ {
19
+ "code": "IPR013221",
20
+ "protein_count": 2
21
+ }
22
+ ],
23
+ "taxon_id": 1,
24
+ "taxon_name": "root",
25
+ "taxon_rank": "no rank"
26
+ },
27
+ {
28
+ "peptide": "AALER",
29
+ "total_protein_count": 208,
30
+ "ec": [
31
+ {
32
+ "ec_number": "6.1.1.16",
33
+ "protein_count": 44
34
+ }
35
+ ],
36
+ "go": [
37
+ {
38
+ "go_term": "GO:0005737",
39
+ "protein_count": 106
40
+ }
41
+ ],
42
+ "ipr": [
43
+ {
44
+ "code": "IPR014729",
45
+ "protein_count": 48
46
+ },
47
+ {
48
+ "code": "IPR009080",
49
+ "protein_count": 45
50
+ },
51
+ {
52
+ "code": "IPR015803",
53
+ "protein_count": 44
54
+ }
55
+ ],
56
+ "taxon_id": 1,
57
+ "taxon_name": "root",
58
+ "taxon_rank": "no rank"
59
+ },
60
+ {
61
+ "peptide": "AAEVALVGTEK",
62
+ "total_protein_count": 0,
63
+ "ec": [],
64
+ "go": [],
65
+ "ipr": [],
66
+ "taxon_id": 1,
67
+ "taxon_name": "root",
68
+ "taxon_rank": "no rank"
69
+ }
70
+ ]
@@ -0,0 +1,194 @@
1
+ {
2
+ "id": 1,
3
+ "name": "Organism",
4
+ "rank": "root",
5
+ "data": {
6
+ "count": 4,
7
+ "self_count": 0
8
+ },
9
+ "children": [
10
+ {
11
+ "id": 2,
12
+ "name": "Bacteria",
13
+ "rank": "superkingdom",
14
+ "data": {
15
+ "count": 4,
16
+ "self_count": 0
17
+ },
18
+ "children": [
19
+ {
20
+ "id": 28,
21
+ "name": "halophilic eubacterium",
22
+ "rank": "species",
23
+ "data": {
24
+ "count": 1,
25
+ "self_count": 1
26
+ },
27
+ "children": []
28
+ },
29
+ {
30
+ "id": 1224,
31
+ "name": "Proteobacteria",
32
+ "rank": "phylum",
33
+ "data": {
34
+ "count": 3,
35
+ "self_count": 0
36
+ },
37
+ "children": [
38
+ {
39
+ "id": 68525,
40
+ "name": "delta/epsilon subdivisions",
41
+ "rank": "subphylum",
42
+ "data": {
43
+ "count": 1,
44
+ "self_count": 0
45
+ },
46
+ "children": [
47
+ {
48
+ "id": 28221,
49
+ "name": "Deltaproteobacteria",
50
+ "rank": "class_",
51
+ "data": {
52
+ "count": 1,
53
+ "self_count": 0
54
+ },
55
+ "children": [
56
+ {
57
+ "id": 29,
58
+ "name": "Myxococcales",
59
+ "rank": "order",
60
+ "data": {
61
+ "count": 1,
62
+ "self_count": 0
63
+ },
64
+ "children": [
65
+ {
66
+ "id": 80812,
67
+ "name": "Sorangiineae",
68
+ "rank": "suborder",
69
+ "data": {
70
+ "count": 1,
71
+ "self_count": 0
72
+ },
73
+ "children": [
74
+ {
75
+ "id": 49,
76
+ "name": "Polyangiaceae",
77
+ "rank": "family",
78
+ "data": {
79
+ "count": 1,
80
+ "self_count": 0
81
+ },
82
+ "children": [
83
+ {
84
+ "id": 55,
85
+ "name": "Polyangium",
86
+ "rank": "genus",
87
+ "data": {
88
+ "count": 1,
89
+ "self_count": 1
90
+ },
91
+ "children": []
92
+ }
93
+ ]
94
+ }
95
+ ]
96
+ }
97
+ ]
98
+ }
99
+ ]
100
+ }
101
+ ]
102
+ },
103
+ {
104
+ "id": 28211,
105
+ "name": "Alphaproteobacteria",
106
+ "rank": "class_",
107
+ "data": {
108
+ "count": 1,
109
+ "self_count": 0
110
+ },
111
+ "children": [
112
+ {
113
+ "id": 204458,
114
+ "name": "Caulobacterales",
115
+ "rank": "order",
116
+ "data": {
117
+ "count": 1,
118
+ "self_count": 0
119
+ },
120
+ "children": [
121
+ {
122
+ "id": 76892,
123
+ "name": "Caulobacteraceae",
124
+ "rank": "family",
125
+ "data": {
126
+ "count": 1,
127
+ "self_count": 0
128
+ },
129
+ "children": [
130
+ {
131
+ "id": 75,
132
+ "name": "Caulobacter",
133
+ "rank": "genus",
134
+ "data": {
135
+ "count": 1,
136
+ "self_count": 1
137
+ },
138
+ "children": []
139
+ }
140
+ ]
141
+ }
142
+ ]
143
+ }
144
+ ]
145
+ },
146
+ {
147
+ "id": 28216,
148
+ "name": "Betaproteobacteria",
149
+ "rank": "class_",
150
+ "data": {
151
+ "count": 1,
152
+ "self_count": 0
153
+ },
154
+ "children": [
155
+ {
156
+ "id": 80840,
157
+ "name": "Burkholderiales",
158
+ "rank": "order",
159
+ "data": {
160
+ "count": 1,
161
+ "self_count": 0
162
+ },
163
+ "children": [
164
+ {
165
+ "id": 88,
166
+ "name": "Leptothrix",
167
+ "rank": "genus",
168
+ "data": {
169
+ "count": 1,
170
+ "self_count": 0
171
+ },
172
+ "children": [
173
+ {
174
+ "id": 89,
175
+ "name": "Leptothrix discophora",
176
+ "rank": "species",
177
+ "data": {
178
+ "count": 1,
179
+ "self_count": 1
180
+ },
181
+ "children": []
182
+ }
183
+ ]
184
+ }
185
+ ]
186
+ }
187
+ ]
188
+ }
189
+ ]
190
+ }
191
+ ]
192
+ }
193
+ ]
194
+ }
@@ -0,0 +1,22 @@
1
+ [
2
+ {
3
+ "taxon_id": 216816,
4
+ "taxon_name": "Bifidobacterium longum",
5
+ "taxon_rank": "species"
6
+ },
7
+ {
8
+ "taxon_id": 1680,
9
+ "taxon_name": "Bifidobacterium adolescentis",
10
+ "taxon_rank": "species"
11
+ },
12
+ {
13
+ "taxon_id": 1,
14
+ "taxon_name": "root",
15
+ "taxon_rank": "no rank"
16
+ },
17
+ {
18
+ "taxon_id": 2,
19
+ "taxon_name": "Bacteria",
20
+ "taxon_rank": "superkingdom"
21
+ }
22
+ ]
@@ -9,7 +9,7 @@ module Unipept
9
9
 
10
10
  def test_load_with_file
11
11
  hash = { 'key' => 'value' }
12
- File.open('new_file', 'w') { |f| f.write hash.to_yaml }
12
+ File.write('new_file', hash.to_yaml)
13
13
  config = Configuration.new('new_file')
14
14
  assert_equal(hash, config.config)
15
15
  end
@@ -114,7 +114,7 @@ module Unipept
114
114
 
115
115
  def test_convert
116
116
  assert_equal(TestObject.as_json, formatter.convert([TestObject.test_object], true))
117
- assert_equal(',' + TestObject.as_json, formatter.convert([TestObject.test_object], false))
117
+ assert_equal(",#{TestObject.as_json}", formatter.convert([TestObject.test_object], false))
118
118
  end
119
119
 
120
120
  def test_format_with_fasta
@@ -159,7 +159,7 @@ module Unipept
159
159
  fasta = [['peptide', '>test']]
160
160
  object = [TestObject.test_object, TestObject.test_object]
161
161
  assert_equal(TestObject.as_csv_header, formatter.header(object))
162
- assert_equal('fasta_header,' + TestObject.as_csv_header, formatter.header(object, fasta))
162
+ assert_equal("fasta_header,#{TestObject.as_csv_header}", formatter.header(object, fasta))
163
163
  end
164
164
 
165
165
  def test_footer
@@ -180,7 +180,7 @@ module Unipept
180
180
  def test_format_with_fasta
181
181
  fasta = [['>test', '5']]
182
182
  object = [TestObject.test_object, TestObject.test_object]
183
- csv = ['>test,' + TestObject.as_csv, '>test,' + TestObject.as_csv, ''].join("\n")
183
+ csv = [">test,#{TestObject.as_csv}", ">test,#{TestObject.as_csv}", ''].join("\n")
184
184
  assert_equal(csv, formatter.format(object, fasta, false))
185
185
  end
186
186
  end
@@ -203,7 +203,7 @@ module Unipept
203
203
  end
204
204
 
205
205
  def test_convert
206
- xml = '<result>' + TestObject.as_xml + '</result>'
206
+ xml = "<result>#{TestObject.as_xml}</result>"
207
207
  assert_equal(xml, formatter.convert([TestObject.test_object], true))
208
208
  assert_equal(xml, formatter.convert([TestObject.test_object], false))
209
209
  end
@@ -211,7 +211,7 @@ module Unipept
211
211
  def test_format_with_fasta
212
212
  fasta = [['>test', '5']]
213
213
  output = formatter.format([TestObject.test_object], fasta, false)
214
- xml = '<result><fasta_header>>test</fasta_header>' + TestObject.as_xml + '</result>'
214
+ xml = "<result><fasta_header>>test</fasta_header>#{TestObject.as_xml}</result>"
215
215
  assert_equal(xml, output)
216
216
  end
217
217
  end
@@ -23,7 +23,7 @@ module Unipept
23
23
  writer.output.flush
24
24
  end
25
25
  assert_equal('', out)
26
- assert_equal('hello world', IO.foreach('output_file').next.chomp)
26
+ assert_equal('hello world', File.foreach('output_file').next.chomp)
27
27
  end
28
28
  end
29
29
  end
@@ -79,13 +79,13 @@ module Unipept
79
79
 
80
80
  def test_old_recently_fetched
81
81
  server = ServerMessage.new('http://api.unipept.ugent.be')
82
- server.configuration['last_fetch_date'] = Time.now - 60 * 60 * 25
82
+ server.configuration['last_fetch_date'] = Time.now - (60 * 60 * 25)
83
83
  assert(!server.recently_fetched?)
84
84
  end
85
85
 
86
86
  def test_recently_recently_fetched
87
87
  server = ServerMessage.new('http://api.unipept.ugent.be')
88
- server.configuration['last_fetch_date'] = Time.now - 60 * 60 * 1
88
+ server.configuration['last_fetch_date'] = Time.now - (60 * 60 * 1)
89
89
  assert(server.recently_fetched?)
90
90
  end
91
91
  end
data/test.taxa ADDED
@@ -0,0 +1,4 @@
1
+ 14,7
2
+ 45,9
3
+ 12,6
4
+ 78,4