unipept 2.1.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ci.yml +6 -27
  4. data/.rakeTasks +7 -0
  5. data/.rubocop.yml +2 -0
  6. data/.ruby-version +1 -1
  7. data/CITATION.cff +30 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +60 -41
  10. data/README.md +2 -2
  11. data/Rakefile +4 -4
  12. data/VERSION +1 -1
  13. data/lib/batch_iterator.rb +16 -0
  14. data/lib/commands/prot2pept.rb +1 -2
  15. data/lib/commands/unipept/api_runner.rb +10 -12
  16. data/lib/commands/unipept/config.rb +1 -1
  17. data/lib/commands/unipept/pept2taxa.rb +1 -5
  18. data/lib/commands/unipept/taxa2tree.rb +74 -0
  19. data/lib/commands/unipept.rb +25 -1
  20. data/lib/commands/uniprot.rb +4 -5
  21. data/lib/configuration.rb +6 -7
  22. data/lib/formatters.rb +108 -36
  23. data/lib/server_message.rb +2 -4
  24. data/test/commands/unipept/test_api_runner.rb +8 -7
  25. data/test/commands/unipept/test_config.rb +1 -1
  26. data/test/commands/unipept/test_pept2ec.rb +11 -11
  27. data/test/commands/unipept/test_pept2funct.rb +15 -15
  28. data/test/commands/unipept/test_pept2go.rb +10 -10
  29. data/test/commands/unipept/test_pept2interpro.rb +26 -26
  30. data/test/commands/unipept/test_pept2lca.rb +2 -2
  31. data/test/commands/unipept/test_pept2prot.rb +2 -2
  32. data/test/commands/unipept/test_pept2taxa.rb +2 -4
  33. data/test/commands/unipept/test_peptinfo.rb +16 -16
  34. data/test/commands/unipept/test_taxa2lca.rb +2 -2
  35. data/test/commands/unipept/test_taxa2tree.rb +68 -0
  36. data/test/commands/unipept/test_taxonomy.rb +2 -2
  37. data/test/helper.rb +10 -0
  38. data/test/support/api_stub.rb +60 -0
  39. data/test/support/resources/pept2ec.json +55 -0
  40. data/test/support/resources/pept2funct.json +73 -0
  41. data/test/support/resources/pept2go.json +43 -0
  42. data/test/support/resources/pept2interpro.json +43 -0
  43. data/test/support/resources/pept2lca.json +14 -0
  44. data/test/support/resources/pept2prot.json +422 -0
  45. data/test/support/resources/pept2taxa.json +194 -0
  46. data/test/support/resources/peptinfo.json +70 -0
  47. data/test/support/resources/taxa2tree.json +194 -0
  48. data/test/support/resources/taxonomy.json +22 -0
  49. data/test/test_configuration.rb +1 -1
  50. data/test/test_formatters.rb +5 -5
  51. data/test/test_output_writer.rb +1 -1
  52. data/test/test_server_message.rb +2 -2
  53. data/test.taxa +4 -0
  54. data/unipept.gemspec +32 -21
  55. metadata +26 -9
@@ -0,0 +1,194 @@
1
+ [
2
+ {
3
+ "peptide": "ENFVYIAK",
4
+ "taxon_id": 2711,
5
+ "taxon_name": "Citrus sinensis",
6
+ "taxon_rank": "species"
7
+ },
8
+ {
9
+ "peptide": "ENFVYIAK",
10
+ "taxon_id": 3818,
11
+ "taxon_name": "Arachis hypogaea",
12
+ "taxon_rank": "species"
13
+ },
14
+ {
15
+ "peptide": "ENFVYIAK",
16
+ "taxon_id": 3821,
17
+ "taxon_name": "Cajanus cajan",
18
+ "taxon_rank": "species"
19
+ },
20
+ {
21
+ "peptide": "ENFVYIAK",
22
+ "taxon_id": 3827,
23
+ "taxon_name": "Cicer arietinum",
24
+ "taxon_rank": "species"
25
+ },
26
+ {
27
+ "peptide": "ENFVYIAK",
28
+ "taxon_id": 3847,
29
+ "taxon_name": "Glycine max",
30
+ "taxon_rank": "species"
31
+ },
32
+ {
33
+ "peptide": "ENFVYIAK",
34
+ "taxon_id": 3848,
35
+ "taxon_name": "Glycine soja",
36
+ "taxon_rank": "species"
37
+ },
38
+ {
39
+ "peptide": "ENFVYIAK",
40
+ "taxon_id": 3880,
41
+ "taxon_name": "Medicago truncatula",
42
+ "taxon_rank": "species"
43
+ },
44
+ {
45
+ "peptide": "ENFVYIAK",
46
+ "taxon_id": 3885,
47
+ "taxon_name": "Phaseolus vulgaris",
48
+ "taxon_rank": "species"
49
+ },
50
+ {
51
+ "peptide": "ENFVYIAK",
52
+ "taxon_id": 3900,
53
+ "taxon_name": "Trifolium subterraneum",
54
+ "taxon_rank": "species"
55
+ },
56
+ {
57
+ "peptide": "ENFVYIAK",
58
+ "taxon_id": 3906,
59
+ "taxon_name": "Vicia faba",
60
+ "taxon_rank": "species"
61
+ },
62
+ {
63
+ "peptide": "ENFVYIAK",
64
+ "taxon_id": 4096,
65
+ "taxon_name": "Nicotiana sylvestris",
66
+ "taxon_rank": "species"
67
+ },
68
+ {
69
+ "peptide": "ENFVYIAK",
70
+ "taxon_id": 4097,
71
+ "taxon_name": "Nicotiana tabacum",
72
+ "taxon_rank": "species"
73
+ },
74
+ {
75
+ "peptide": "ENFVYIAK",
76
+ "taxon_id": 16924,
77
+ "taxon_name": "Davidia involucrata",
78
+ "taxon_rank": "species"
79
+ },
80
+ {
81
+ "peptide": "ENFVYIAK",
82
+ "taxon_id": 28930,
83
+ "taxon_name": "Fagus sylvatica",
84
+ "taxon_rank": "species"
85
+ },
86
+ {
87
+ "peptide": "ENFVYIAK",
88
+ "taxon_id": 29760,
89
+ "taxon_name": "Vitis vinifera",
90
+ "taxon_rank": "species"
91
+ },
92
+ {
93
+ "peptide": "ENFVYIAK",
94
+ "taxon_id": 42345,
95
+ "taxon_name": "Phoenix dactylifera",
96
+ "taxon_rank": "species"
97
+ },
98
+ {
99
+ "peptide": "ENFVYIAK",
100
+ "taxon_id": 49451,
101
+ "taxon_name": "Nicotiana attenuata",
102
+ "taxon_rank": "species"
103
+ },
104
+ {
105
+ "peptide": "ENFVYIAK",
106
+ "taxon_id": 55188,
107
+ "taxon_name": "Citrus unshiu",
108
+ "taxon_rank": "species"
109
+ },
110
+ {
111
+ "peptide": "ENFVYIAK",
112
+ "taxon_id": 56857,
113
+ "taxon_name": "Macleaya cordata",
114
+ "taxon_rank": "species"
115
+ },
116
+ {
117
+ "peptide": "ENFVYIAK",
118
+ "taxon_id": 57577,
119
+ "taxon_name": "Trifolium pratense",
120
+ "taxon_rank": "species"
121
+ },
122
+ {
123
+ "peptide": "ENFVYIAK",
124
+ "taxon_id": 85681,
125
+ "taxon_name": "Citrus clementina",
126
+ "taxon_rank": "species"
127
+ },
128
+ {
129
+ "peptide": "ENFVYIAK",
130
+ "taxon_id": 214687,
131
+ "taxon_name": "Musa acuminata subsp. malaccensis",
132
+ "taxon_rank": "subspecies"
133
+ },
134
+ {
135
+ "peptide": "ENFVYIAK",
136
+ "taxon_id": 337451,
137
+ "taxon_name": "Cinnamomum micranthum f. kanehirae",
138
+ "taxon_rank": "forma"
139
+ },
140
+ {
141
+ "peptide": "ENFVYIAK",
142
+ "taxon_id": 561372,
143
+ "taxon_name": "Nyssa sinensis",
144
+ "taxon_rank": "species"
145
+ },
146
+ {
147
+ "peptide": "EGGAGSSTGQR",
148
+ "taxon_id": 5061,
149
+ "taxon_name": "Aspergillus niger",
150
+ "taxon_rank": "species"
151
+ },
152
+ {
153
+ "peptide": "EGGAGSSTGQR",
154
+ "taxon_id": 105351,
155
+ "taxon_name": "Aspergillus awamori",
156
+ "taxon_rank": "species"
157
+ },
158
+ {
159
+ "peptide": "EGGAGSSTGQR",
160
+ "taxon_id": 380704,
161
+ "taxon_name": "Aspergillus niger ATCC 1015",
162
+ "taxon_rank": "no rank"
163
+ },
164
+ {
165
+ "peptide": "EGGAGSSTGQR",
166
+ "taxon_id": 425011,
167
+ "taxon_name": "Aspergillus niger CBS 513.88",
168
+ "taxon_rank": "no rank"
169
+ },
170
+ {
171
+ "peptide": "EGGAGSSTGQR",
172
+ "taxon_id": 1341132,
173
+ "taxon_name": "Aspergillus welwitschiae",
174
+ "taxon_rank": "species"
175
+ },
176
+ {
177
+ "peptide": "EGGAGSSTGQR",
178
+ "taxon_id": 1353007,
179
+ "taxon_name": "Aspergillus phoenicis ATCC 13157",
180
+ "taxon_rank": "no rank"
181
+ },
182
+ {
183
+ "peptide": "EGGAGSSTGQR",
184
+ "taxon_id": 1353008,
185
+ "taxon_name": "Aspergillus niger ATCC 13496",
186
+ "taxon_rank": "no rank"
187
+ },
188
+ {
189
+ "peptide": "EGGAGSSTGQR",
190
+ "taxon_id": 1450533,
191
+ "taxon_name": "Aspergillus niger CBS 101883",
192
+ "taxon_rank": "no rank"
193
+ }
194
+ ]
@@ -0,0 +1,70 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ec": [
6
+ {
7
+ "ec_number": "3.1.3.3",
8
+ "protein_count": 2
9
+ }
10
+ ],
11
+ "go": [
12
+ {
13
+ "go_term": "GO:0000287",
14
+ "protein_count": 5
15
+ }
16
+ ],
17
+ "ipr": [
18
+ {
19
+ "code": "IPR013221",
20
+ "protein_count": 2
21
+ }
22
+ ],
23
+ "taxon_id": 1,
24
+ "taxon_name": "root",
25
+ "taxon_rank": "no rank"
26
+ },
27
+ {
28
+ "peptide": "AALER",
29
+ "total_protein_count": 208,
30
+ "ec": [
31
+ {
32
+ "ec_number": "6.1.1.16",
33
+ "protein_count": 44
34
+ }
35
+ ],
36
+ "go": [
37
+ {
38
+ "go_term": "GO:0005737",
39
+ "protein_count": 106
40
+ }
41
+ ],
42
+ "ipr": [
43
+ {
44
+ "code": "IPR014729",
45
+ "protein_count": 48
46
+ },
47
+ {
48
+ "code": "IPR009080",
49
+ "protein_count": 45
50
+ },
51
+ {
52
+ "code": "IPR015803",
53
+ "protein_count": 44
54
+ }
55
+ ],
56
+ "taxon_id": 1,
57
+ "taxon_name": "root",
58
+ "taxon_rank": "no rank"
59
+ },
60
+ {
61
+ "peptide": "AAEVALVGTEK",
62
+ "total_protein_count": 0,
63
+ "ec": [],
64
+ "go": [],
65
+ "ipr": [],
66
+ "taxon_id": 1,
67
+ "taxon_name": "root",
68
+ "taxon_rank": "no rank"
69
+ }
70
+ ]
@@ -0,0 +1,194 @@
1
+ {
2
+ "id": 1,
3
+ "name": "Organism",
4
+ "rank": "root",
5
+ "data": {
6
+ "count": 4,
7
+ "self_count": 0
8
+ },
9
+ "children": [
10
+ {
11
+ "id": 2,
12
+ "name": "Bacteria",
13
+ "rank": "superkingdom",
14
+ "data": {
15
+ "count": 4,
16
+ "self_count": 0
17
+ },
18
+ "children": [
19
+ {
20
+ "id": 28,
21
+ "name": "halophilic eubacterium",
22
+ "rank": "species",
23
+ "data": {
24
+ "count": 1,
25
+ "self_count": 1
26
+ },
27
+ "children": []
28
+ },
29
+ {
30
+ "id": 1224,
31
+ "name": "Proteobacteria",
32
+ "rank": "phylum",
33
+ "data": {
34
+ "count": 3,
35
+ "self_count": 0
36
+ },
37
+ "children": [
38
+ {
39
+ "id": 68525,
40
+ "name": "delta/epsilon subdivisions",
41
+ "rank": "subphylum",
42
+ "data": {
43
+ "count": 1,
44
+ "self_count": 0
45
+ },
46
+ "children": [
47
+ {
48
+ "id": 28221,
49
+ "name": "Deltaproteobacteria",
50
+ "rank": "class_",
51
+ "data": {
52
+ "count": 1,
53
+ "self_count": 0
54
+ },
55
+ "children": [
56
+ {
57
+ "id": 29,
58
+ "name": "Myxococcales",
59
+ "rank": "order",
60
+ "data": {
61
+ "count": 1,
62
+ "self_count": 0
63
+ },
64
+ "children": [
65
+ {
66
+ "id": 80812,
67
+ "name": "Sorangiineae",
68
+ "rank": "suborder",
69
+ "data": {
70
+ "count": 1,
71
+ "self_count": 0
72
+ },
73
+ "children": [
74
+ {
75
+ "id": 49,
76
+ "name": "Polyangiaceae",
77
+ "rank": "family",
78
+ "data": {
79
+ "count": 1,
80
+ "self_count": 0
81
+ },
82
+ "children": [
83
+ {
84
+ "id": 55,
85
+ "name": "Polyangium",
86
+ "rank": "genus",
87
+ "data": {
88
+ "count": 1,
89
+ "self_count": 1
90
+ },
91
+ "children": []
92
+ }
93
+ ]
94
+ }
95
+ ]
96
+ }
97
+ ]
98
+ }
99
+ ]
100
+ }
101
+ ]
102
+ },
103
+ {
104
+ "id": 28211,
105
+ "name": "Alphaproteobacteria",
106
+ "rank": "class_",
107
+ "data": {
108
+ "count": 1,
109
+ "self_count": 0
110
+ },
111
+ "children": [
112
+ {
113
+ "id": 204458,
114
+ "name": "Caulobacterales",
115
+ "rank": "order",
116
+ "data": {
117
+ "count": 1,
118
+ "self_count": 0
119
+ },
120
+ "children": [
121
+ {
122
+ "id": 76892,
123
+ "name": "Caulobacteraceae",
124
+ "rank": "family",
125
+ "data": {
126
+ "count": 1,
127
+ "self_count": 0
128
+ },
129
+ "children": [
130
+ {
131
+ "id": 75,
132
+ "name": "Caulobacter",
133
+ "rank": "genus",
134
+ "data": {
135
+ "count": 1,
136
+ "self_count": 1
137
+ },
138
+ "children": []
139
+ }
140
+ ]
141
+ }
142
+ ]
143
+ }
144
+ ]
145
+ },
146
+ {
147
+ "id": 28216,
148
+ "name": "Betaproteobacteria",
149
+ "rank": "class_",
150
+ "data": {
151
+ "count": 1,
152
+ "self_count": 0
153
+ },
154
+ "children": [
155
+ {
156
+ "id": 80840,
157
+ "name": "Burkholderiales",
158
+ "rank": "order",
159
+ "data": {
160
+ "count": 1,
161
+ "self_count": 0
162
+ },
163
+ "children": [
164
+ {
165
+ "id": 88,
166
+ "name": "Leptothrix",
167
+ "rank": "genus",
168
+ "data": {
169
+ "count": 1,
170
+ "self_count": 0
171
+ },
172
+ "children": [
173
+ {
174
+ "id": 89,
175
+ "name": "Leptothrix discophora",
176
+ "rank": "species",
177
+ "data": {
178
+ "count": 1,
179
+ "self_count": 1
180
+ },
181
+ "children": []
182
+ }
183
+ ]
184
+ }
185
+ ]
186
+ }
187
+ ]
188
+ }
189
+ ]
190
+ }
191
+ ]
192
+ }
193
+ ]
194
+ }
@@ -0,0 +1,22 @@
1
+ [
2
+ {
3
+ "taxon_id": 216816,
4
+ "taxon_name": "Bifidobacterium longum",
5
+ "taxon_rank": "species"
6
+ },
7
+ {
8
+ "taxon_id": 1680,
9
+ "taxon_name": "Bifidobacterium adolescentis",
10
+ "taxon_rank": "species"
11
+ },
12
+ {
13
+ "taxon_id": 1,
14
+ "taxon_name": "root",
15
+ "taxon_rank": "no rank"
16
+ },
17
+ {
18
+ "taxon_id": 2,
19
+ "taxon_name": "Bacteria",
20
+ "taxon_rank": "superkingdom"
21
+ }
22
+ ]
@@ -9,7 +9,7 @@ module Unipept
9
9
 
10
10
  def test_load_with_file
11
11
  hash = { 'key' => 'value' }
12
- File.open('new_file', 'w') { |f| f.write hash.to_yaml }
12
+ File.write('new_file', hash.to_yaml)
13
13
  config = Configuration.new('new_file')
14
14
  assert_equal(hash, config.config)
15
15
  end
@@ -114,7 +114,7 @@ module Unipept
114
114
 
115
115
  def test_convert
116
116
  assert_equal(TestObject.as_json, formatter.convert([TestObject.test_object], true))
117
- assert_equal(',' + TestObject.as_json, formatter.convert([TestObject.test_object], false))
117
+ assert_equal(",#{TestObject.as_json}", formatter.convert([TestObject.test_object], false))
118
118
  end
119
119
 
120
120
  def test_format_with_fasta
@@ -159,7 +159,7 @@ module Unipept
159
159
  fasta = [['peptide', '>test']]
160
160
  object = [TestObject.test_object, TestObject.test_object]
161
161
  assert_equal(TestObject.as_csv_header, formatter.header(object))
162
- assert_equal('fasta_header,' + TestObject.as_csv_header, formatter.header(object, fasta))
162
+ assert_equal("fasta_header,#{TestObject.as_csv_header}", formatter.header(object, fasta))
163
163
  end
164
164
 
165
165
  def test_footer
@@ -180,7 +180,7 @@ module Unipept
180
180
  def test_format_with_fasta
181
181
  fasta = [['>test', '5']]
182
182
  object = [TestObject.test_object, TestObject.test_object]
183
- csv = ['>test,' + TestObject.as_csv, '>test,' + TestObject.as_csv, ''].join("\n")
183
+ csv = [">test,#{TestObject.as_csv}", ">test,#{TestObject.as_csv}", ''].join("\n")
184
184
  assert_equal(csv, formatter.format(object, fasta, false))
185
185
  end
186
186
  end
@@ -203,7 +203,7 @@ module Unipept
203
203
  end
204
204
 
205
205
  def test_convert
206
- xml = '<result>' + TestObject.as_xml + '</result>'
206
+ xml = "<result>#{TestObject.as_xml}</result>"
207
207
  assert_equal(xml, formatter.convert([TestObject.test_object], true))
208
208
  assert_equal(xml, formatter.convert([TestObject.test_object], false))
209
209
  end
@@ -211,7 +211,7 @@ module Unipept
211
211
  def test_format_with_fasta
212
212
  fasta = [['>test', '5']]
213
213
  output = formatter.format([TestObject.test_object], fasta, false)
214
- xml = '<result><fasta_header>>test</fasta_header>' + TestObject.as_xml + '</result>'
214
+ xml = "<result><fasta_header>>test</fasta_header>#{TestObject.as_xml}</result>"
215
215
  assert_equal(xml, output)
216
216
  end
217
217
  end
@@ -23,7 +23,7 @@ module Unipept
23
23
  writer.output.flush
24
24
  end
25
25
  assert_equal('', out)
26
- assert_equal('hello world', IO.foreach('output_file').next.chomp)
26
+ assert_equal('hello world', File.foreach('output_file').next.chomp)
27
27
  end
28
28
  end
29
29
  end
@@ -79,13 +79,13 @@ module Unipept
79
79
 
80
80
  def test_old_recently_fetched
81
81
  server = ServerMessage.new('http://api.unipept.ugent.be')
82
- server.configuration['last_fetch_date'] = Time.now - 60 * 60 * 25
82
+ server.configuration['last_fetch_date'] = Time.now - (60 * 60 * 25)
83
83
  assert(!server.recently_fetched?)
84
84
  end
85
85
 
86
86
  def test_recently_recently_fetched
87
87
  server = ServerMessage.new('http://api.unipept.ugent.be')
88
- server.configuration['last_fetch_date'] = Time.now - 60 * 60 * 1
88
+ server.configuration['last_fetch_date'] = Time.now - (60 * 60 * 1)
89
89
  assert(server.recently_fetched?)
90
90
  end
91
91
  end
data/test.taxa ADDED
@@ -0,0 +1,4 @@
1
+ 14,7
2
+ 45,9
3
+ 12,6
4
+ 78,4