unipept 2.1.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ci.yml +6 -27
  4. data/.rakeTasks +7 -0
  5. data/.rubocop.yml +2 -0
  6. data/.ruby-version +1 -1
  7. data/CITATION.cff +30 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +60 -41
  10. data/README.md +2 -2
  11. data/Rakefile +4 -4
  12. data/VERSION +1 -1
  13. data/lib/batch_iterator.rb +16 -0
  14. data/lib/commands/prot2pept.rb +1 -2
  15. data/lib/commands/unipept/api_runner.rb +10 -12
  16. data/lib/commands/unipept/config.rb +1 -1
  17. data/lib/commands/unipept/pept2taxa.rb +1 -5
  18. data/lib/commands/unipept/taxa2tree.rb +74 -0
  19. data/lib/commands/unipept.rb +25 -1
  20. data/lib/commands/uniprot.rb +4 -5
  21. data/lib/configuration.rb +6 -7
  22. data/lib/formatters.rb +108 -36
  23. data/lib/server_message.rb +2 -4
  24. data/test/commands/unipept/test_api_runner.rb +8 -7
  25. data/test/commands/unipept/test_config.rb +1 -1
  26. data/test/commands/unipept/test_pept2ec.rb +11 -11
  27. data/test/commands/unipept/test_pept2funct.rb +15 -15
  28. data/test/commands/unipept/test_pept2go.rb +10 -10
  29. data/test/commands/unipept/test_pept2interpro.rb +26 -26
  30. data/test/commands/unipept/test_pept2lca.rb +2 -2
  31. data/test/commands/unipept/test_pept2prot.rb +2 -2
  32. data/test/commands/unipept/test_pept2taxa.rb +2 -4
  33. data/test/commands/unipept/test_peptinfo.rb +16 -16
  34. data/test/commands/unipept/test_taxa2lca.rb +2 -2
  35. data/test/commands/unipept/test_taxa2tree.rb +68 -0
  36. data/test/commands/unipept/test_taxonomy.rb +2 -2
  37. data/test/helper.rb +10 -0
  38. data/test/support/api_stub.rb +60 -0
  39. data/test/support/resources/pept2ec.json +55 -0
  40. data/test/support/resources/pept2funct.json +73 -0
  41. data/test/support/resources/pept2go.json +43 -0
  42. data/test/support/resources/pept2interpro.json +43 -0
  43. data/test/support/resources/pept2lca.json +14 -0
  44. data/test/support/resources/pept2prot.json +422 -0
  45. data/test/support/resources/pept2taxa.json +194 -0
  46. data/test/support/resources/peptinfo.json +70 -0
  47. data/test/support/resources/taxa2tree.json +194 -0
  48. data/test/support/resources/taxonomy.json +22 -0
  49. data/test/test_configuration.rb +1 -1
  50. data/test/test_formatters.rb +5 -5
  51. data/test/test_output_writer.rb +1 -1
  52. data/test/test_server_message.rb +2 -2
  53. data/test.taxa +4 -0
  54. data/unipept.gemspec +32 -21
  55. metadata +26 -9
@@ -0,0 +1,73 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ec": [
6
+ {
7
+ "ec_number": "3.1.3.3",
8
+ "protein_count": 2
9
+ },
10
+ {
11
+ "ec_number": "6.3.2.13",
12
+ "protein_count": 2
13
+ }
14
+ ],
15
+ "go": [
16
+ {
17
+ "go_term": "GO:0000287",
18
+ "protein_count": 5
19
+ },
20
+ {
21
+ "go_term": "GO:0005737",
22
+ "protein_count": 5
23
+ }
24
+ ],
25
+ "ipr": [
26
+ {
27
+ "code": "IPR013221",
28
+ "protein_count": 2
29
+ }
30
+ ]
31
+ },
32
+ {
33
+ "peptide": "AALER",
34
+ "total_protein_count": 208,
35
+ "ec": [
36
+ {
37
+ "ec_number": "6.1.1.16",
38
+ "protein_count": 44
39
+ },
40
+ {
41
+ "ec_number": "2.7.7.38",
42
+ "protein_count": 13
43
+ }
44
+ ],
45
+ "go": [
46
+ {
47
+ "go_term": "GO:0005737",
48
+ "protein_count": 106
49
+ },
50
+ {
51
+ "go_term": "GO:0005524",
52
+ "protein_count": 75
53
+ }
54
+ ],
55
+ "ipr": [
56
+ {
57
+ "code": "IPR014729",
58
+ "protein_count": 48
59
+ },
60
+ {
61
+ "code": "IPR009080",
62
+ "protein_count": 45
63
+ }
64
+ ]
65
+ },
66
+ {
67
+ "peptide": "AAEVALVGTEK",
68
+ "total_protein_count": 0,
69
+ "ec": [],
70
+ "go": [],
71
+ "ipr": []
72
+ }
73
+ ]
@@ -0,0 +1,43 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "go": [
6
+ {
7
+ "go_term": "GO:0000287",
8
+ "protein_count": 5
9
+ },
10
+ {
11
+ "go_term": "GO:0005737",
12
+ "protein_count": 5
13
+ },
14
+ {
15
+ "go_term": "GO:0042803",
16
+ "protein_count": 1
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "peptide": "AALER",
22
+ "total_protein_count": 208,
23
+ "go": [
24
+ {
25
+ "go_term": "GO:0005737",
26
+ "protein_count": 106
27
+ },
28
+ {
29
+ "go_term": "GO:0005524",
30
+ "protein_count": 75
31
+ },
32
+ {
33
+ "go_term": "GO:0008270",
34
+ "protein_count": 48
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "peptide": "AAEVALVGTEK",
40
+ "total_protein_count": 0,
41
+ "go": []
42
+ }
43
+ ]
@@ -0,0 +1,43 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ipr": [
6
+ {
7
+ "code": "IPR013221",
8
+ "protein_count": 2
9
+ },
10
+ {
11
+ "code": "IPR036565",
12
+ "protein_count": 2
13
+ },
14
+ {
15
+ "code": "IPR023214",
16
+ "protein_count": 2
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "peptide": "AALER",
22
+ "total_protein_count": 208,
23
+ "ipr": [
24
+ {
25
+ "code": "IPR014729",
26
+ "protein_count": 48
27
+ },
28
+ {
29
+ "code": "IPR009080",
30
+ "protein_count": 45
31
+ },
32
+ {
33
+ "code": "IPR015803",
34
+ "protein_count": 44
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "peptide": "VAQFLL",
40
+ "total_protein_count": 0,
41
+ "ipr": []
42
+ }
43
+ ]
@@ -0,0 +1,14 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "taxon_id": 1,
5
+ "taxon_name": "root",
6
+ "taxon_rank": "no rank"
7
+ },
8
+ {
9
+ "peptide": "AALER",
10
+ "taxon_id": 1,
11
+ "taxon_name": "root",
12
+ "taxon_rank": "no rank"
13
+ }
14
+ ]
@@ -0,0 +1,422 @@
1
+ [
2
+ {
3
+ "peptide": "ENFVYIAK",
4
+ "uniprot_id": "P42654",
5
+ "protein_name": "14-3-3-like protein B",
6
+ "taxon_id": 3906
7
+ },
8
+ {
9
+ "peptide": "ENFVYIAK",
10
+ "uniprot_id": "Q96453",
11
+ "protein_name": "14-3-3-like protein D",
12
+ "taxon_id": 3847
13
+ },
14
+ {
15
+ "peptide": "ENFVYIAK",
16
+ "uniprot_id": "C6TH93",
17
+ "protein_name": "Casparian strip membrane protein 4",
18
+ "taxon_id": 3847
19
+ },
20
+ {
21
+ "peptide": "ENFVYIAK",
22
+ "uniprot_id": "A0A2Z6PAC3",
23
+ "protein_name": "14_3_3 domain-containing protein",
24
+ "taxon_id": 3900
25
+ },
26
+ {
27
+ "peptide": "ENFVYIAK",
28
+ "uniprot_id": "A0A5B7A3K5",
29
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
30
+ "taxon_id": 16924
31
+ },
32
+ {
33
+ "peptide": "ENFVYIAK",
34
+ "uniprot_id": "A0A396GNG6",
35
+ "protein_name": "Putative 14-3-3 protein",
36
+ "taxon_id": 3880
37
+ },
38
+ {
39
+ "peptide": "ENFVYIAK",
40
+ "uniprot_id": "A0A072VBW0",
41
+ "protein_name": "Putative 14-3-3 protein",
42
+ "taxon_id": 3880
43
+ },
44
+ {
45
+ "peptide": "ENFVYIAK",
46
+ "uniprot_id": "I1LUM3",
47
+ "protein_name": "14_3_3 domain-containing protein",
48
+ "taxon_id": 3847
49
+ },
50
+ {
51
+ "peptide": "ENFVYIAK",
52
+ "uniprot_id": "A0A1S3WXW9",
53
+ "protein_name": "14-3-3 protein 9-like isoform X1",
54
+ "taxon_id": 4097
55
+ },
56
+ {
57
+ "peptide": "ENFVYIAK",
58
+ "uniprot_id": "A0A2H5N535",
59
+ "protein_name": "14_3_3 domain-containing protein",
60
+ "taxon_id": 55188
61
+ },
62
+ {
63
+ "peptide": "ENFVYIAK",
64
+ "uniprot_id": "A0A438E5J2",
65
+ "protein_name": "14-3-3-like protein D",
66
+ "taxon_id": 29760
67
+ },
68
+ {
69
+ "peptide": "ENFVYIAK",
70
+ "uniprot_id": "A0A445HT97",
71
+ "protein_name": "14-3-3-like protein D isoform A",
72
+ "taxon_id": 3848
73
+ },
74
+ {
75
+ "peptide": "ENFVYIAK",
76
+ "uniprot_id": "A0A5J5A607",
77
+ "protein_name": "14_3_3 domain-containing protein",
78
+ "taxon_id": 561372
79
+ },
80
+ {
81
+ "peptide": "ENFVYIAK",
82
+ "uniprot_id": "A0A1S4A416",
83
+ "protein_name": "14-3-3 protein 9-like",
84
+ "taxon_id": 4097
85
+ },
86
+ {
87
+ "peptide": "ENFVYIAK",
88
+ "uniprot_id": "A0A1U7VM87",
89
+ "protein_name": "14-3-3 protein 9-like isoform X2",
90
+ "taxon_id": 4096
91
+ },
92
+ {
93
+ "peptide": "ENFVYIAK",
94
+ "uniprot_id": "A0A2N9J4Q3",
95
+ "protein_name": "14_3_3 domain-containing protein",
96
+ "taxon_id": 28930
97
+ },
98
+ {
99
+ "peptide": "ENFVYIAK",
100
+ "uniprot_id": "A0A445IBP6",
101
+ "protein_name": "14-3-3-like protein D isoform A",
102
+ "taxon_id": 3848
103
+ },
104
+ {
105
+ "peptide": "ENFVYIAK",
106
+ "uniprot_id": "A0A445HTA3",
107
+ "protein_name": "14-3-3-like protein D isoform C",
108
+ "taxon_id": 3848
109
+ },
110
+ {
111
+ "peptide": "ENFVYIAK",
112
+ "uniprot_id": "T2DN83",
113
+ "protein_name": "14-3-3-like protein D",
114
+ "taxon_id": 3885
115
+ },
116
+ {
117
+ "peptide": "ENFVYIAK",
118
+ "uniprot_id": "A0A445CI86",
119
+ "protein_name": "14_3_3 domain-containing protein",
120
+ "taxon_id": 3818
121
+ },
122
+ {
123
+ "peptide": "ENFVYIAK",
124
+ "uniprot_id": "A0A444Y3Q8",
125
+ "protein_name": "14_3_3 domain-containing protein",
126
+ "taxon_id": 3818
127
+ },
128
+ {
129
+ "peptide": "ENFVYIAK",
130
+ "uniprot_id": "A0A2K3PKW1",
131
+ "protein_name": "14-3-3-like protein d-like",
132
+ "taxon_id": 57577
133
+ },
134
+ {
135
+ "peptide": "ENFVYIAK",
136
+ "uniprot_id": "A0A200QJZ5",
137
+ "protein_name": "14-3-3 protein",
138
+ "taxon_id": 56857
139
+ },
140
+ {
141
+ "peptide": "ENFVYIAK",
142
+ "uniprot_id": "A0A5B6YWQ8",
143
+ "protein_name": "Putative 14-3-3-like protein D",
144
+ "taxon_id": 16924
145
+ },
146
+ {
147
+ "peptide": "ENFVYIAK",
148
+ "uniprot_id": "A0A5J5A796",
149
+ "protein_name": "14_3_3 domain-containing protein",
150
+ "taxon_id": 561372
151
+ },
152
+ {
153
+ "peptide": "ENFVYIAK",
154
+ "uniprot_id": "C6TM63",
155
+ "protein_name": "14_3_3 domain-containing protein",
156
+ "taxon_id": 3847
157
+ },
158
+ {
159
+ "peptide": "ENFVYIAK",
160
+ "uniprot_id": "A0A1S3WXS1",
161
+ "protein_name": "14-3-3 protein 9-like isoform X2",
162
+ "taxon_id": 4097
163
+ },
164
+ {
165
+ "peptide": "ENFVYIAK",
166
+ "uniprot_id": "A0A1U7VVV5",
167
+ "protein_name": "14-3-3 protein 9-like isoform X1",
168
+ "taxon_id": 4096
169
+ },
170
+ {
171
+ "peptide": "ENFVYIAK",
172
+ "uniprot_id": "A0A2H3Y4D2",
173
+ "protein_name": "14-3-3-like protein D isoform X2",
174
+ "taxon_id": 42345
175
+ },
176
+ {
177
+ "peptide": "ENFVYIAK",
178
+ "uniprot_id": "A0A5B6YWR0",
179
+ "protein_name": "14_3_3 domain-containing protein",
180
+ "taxon_id": 16924
181
+ },
182
+ {
183
+ "peptide": "ENFVYIAK",
184
+ "uniprot_id": "A0A067GE20",
185
+ "protein_name": "14_3_3 domain-containing protein",
186
+ "taxon_id": 2711
187
+ },
188
+ {
189
+ "peptide": "ENFVYIAK",
190
+ "uniprot_id": "A0A5B6Z3E6",
191
+ "protein_name": "Putative 14-3-3-like protein D isoform X2",
192
+ "taxon_id": 16924
193
+ },
194
+ {
195
+ "peptide": "ENFVYIAK",
196
+ "uniprot_id": "A0A5J5A2G8",
197
+ "protein_name": "14_3_3 domain-containing protein",
198
+ "taxon_id": 561372
199
+ },
200
+ {
201
+ "peptide": "ENFVYIAK",
202
+ "uniprot_id": "A0A445IC54",
203
+ "protein_name": "14-3-3-like protein D isoform C",
204
+ "taxon_id": 3848
205
+ },
206
+ {
207
+ "peptide": "ENFVYIAK",
208
+ "uniprot_id": "E1U3Z1",
209
+ "protein_name": "14-3-3-like protein B",
210
+ "taxon_id": 3827
211
+ },
212
+ {
213
+ "peptide": "ENFVYIAK",
214
+ "uniprot_id": "I1M3M0",
215
+ "protein_name": "14_3_3 domain-containing protein",
216
+ "taxon_id": 3847
217
+ },
218
+ {
219
+ "peptide": "ENFVYIAK",
220
+ "uniprot_id": "A0A0R0HF31",
221
+ "protein_name": "14_3_3 domain-containing protein",
222
+ "taxon_id": 3847
223
+ },
224
+ {
225
+ "peptide": "ENFVYIAK",
226
+ "uniprot_id": "A0A2H3ZH40",
227
+ "protein_name": "14-3-3-like protein D isoform X2",
228
+ "taxon_id": 42345
229
+ },
230
+ {
231
+ "peptide": "ENFVYIAK",
232
+ "uniprot_id": "A0A5B7A4U9",
233
+ "protein_name": "Putative 14-3-3 domain-containing protein",
234
+ "taxon_id": 16924
235
+ },
236
+ {
237
+ "peptide": "ENFVYIAK",
238
+ "uniprot_id": "M0TAI1",
239
+ "protein_name": "14_3_3 domain-containing protein",
240
+ "taxon_id": 214687
241
+ },
242
+ {
243
+ "peptide": "ENFVYIAK",
244
+ "uniprot_id": "A0A1S4CN65",
245
+ "protein_name": "14-3-3 protein 9-like isoform X2",
246
+ "taxon_id": 4097
247
+ },
248
+ {
249
+ "peptide": "ENFVYIAK",
250
+ "uniprot_id": "A0A2H5N3C1",
251
+ "protein_name": "14_3_3 domain-containing protein",
252
+ "taxon_id": 55188
253
+ },
254
+ {
255
+ "peptide": "ENFVYIAK",
256
+ "uniprot_id": "A0A2H3ZHX0",
257
+ "protein_name": "14-3-3-like protein D isoform X1",
258
+ "taxon_id": 42345
259
+ },
260
+ {
261
+ "peptide": "ENFVYIAK",
262
+ "uniprot_id": "M0TY03",
263
+ "protein_name": "14_3_3 domain-containing protein",
264
+ "taxon_id": 214687
265
+ },
266
+ {
267
+ "peptide": "ENFVYIAK",
268
+ "uniprot_id": "A0A445CI73",
269
+ "protein_name": "14_3_3 domain-containing protein",
270
+ "taxon_id": 3818
271
+ },
272
+ {
273
+ "peptide": "ENFVYIAK",
274
+ "uniprot_id": "A0A3S3NBY5",
275
+ "protein_name": "14-3-3-like protein D isoform X1",
276
+ "taxon_id": 337451
277
+ },
278
+ {
279
+ "peptide": "ENFVYIAK",
280
+ "uniprot_id": "V4U9U4",
281
+ "protein_name": "14_3_3 domain-containing protein",
282
+ "taxon_id": 85681
283
+ },
284
+ {
285
+ "peptide": "ENFVYIAK",
286
+ "uniprot_id": "A0A151SEM7",
287
+ "protein_name": "14-3-3-like protein D",
288
+ "taxon_id": 3821
289
+ },
290
+ {
291
+ "peptide": "ENFVYIAK",
292
+ "uniprot_id": "A0A445IBK9",
293
+ "protein_name": "14-3-3-like protein D isoform D",
294
+ "taxon_id": 3848
295
+ },
296
+ {
297
+ "peptide": "ENFVYIAK",
298
+ "uniprot_id": "A0A1S4CMF0",
299
+ "protein_name": "14-3-3 protein 9-like isoform X1",
300
+ "taxon_id": 4097
301
+ },
302
+ {
303
+ "peptide": "ENFVYIAK",
304
+ "uniprot_id": "A0A1J6KH36",
305
+ "protein_name": "14-3-3 protein 9",
306
+ "taxon_id": 49451
307
+ },
308
+ {
309
+ "peptide": "ENFVYIAK",
310
+ "uniprot_id": "A0A5B7A5K1",
311
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
312
+ "taxon_id": 16924
313
+ },
314
+ {
315
+ "peptide": "ENFVYIAK",
316
+ "uniprot_id": "F6H2P0",
317
+ "protein_name": "14_3_3 domain-containing protein",
318
+ "taxon_id": 29760
319
+ },
320
+ {
321
+ "peptide": "ENFVYIAK",
322
+ "uniprot_id": "A0A1J6IP66",
323
+ "protein_name": "14-3-3 protein 9",
324
+ "taxon_id": 49451
325
+ },
326
+ {
327
+ "peptide": "ENFVYIAK",
328
+ "uniprot_id": "A0A5B7A8T8",
329
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
330
+ "taxon_id": 16924
331
+ },
332
+ {
333
+ "peptide": "ENFVYIAK",
334
+ "uniprot_id": "A0A0R0H7R6",
335
+ "protein_name": "14_3_3 domain-containing protein",
336
+ "taxon_id": 3847
337
+ },
338
+ {
339
+ "peptide": "ENFVYIAK",
340
+ "uniprot_id": "A0A067GDS1",
341
+ "protein_name": "14_3_3 domain-containing protein",
342
+ "taxon_id": 2711
343
+ },
344
+ {
345
+ "peptide": "ENFVYIAK",
346
+ "uniprot_id": "A0A445BBW8",
347
+ "protein_name": "14_3_3 domain-containing protein",
348
+ "taxon_id": 3818
349
+ },
350
+ {
351
+ "peptide": "ENFVYIAK",
352
+ "uniprot_id": "V4W919",
353
+ "protein_name": "14_3_3 domain-containing protein",
354
+ "taxon_id": 85681
355
+ },
356
+ {
357
+ "peptide": "ENFVYIAK",
358
+ "uniprot_id": "G7LIR4",
359
+ "protein_name": "General regulatory factor 2",
360
+ "taxon_id": 3880
361
+ },
362
+ {
363
+ "peptide": "ENFVYIAK",
364
+ "uniprot_id": "A0A2H3Y3V0",
365
+ "protein_name": "14-3-3-like protein D isoform X1",
366
+ "taxon_id": 42345
367
+ },
368
+ {
369
+ "peptide": "ENFVYIAK",
370
+ "uniprot_id": "A0A5J4ZBP6",
371
+ "protein_name": "14_3_3 domain-containing protein",
372
+ "taxon_id": 561372
373
+ },
374
+ {
375
+ "peptide": "EGGAGSSTGQR",
376
+ "uniprot_id": "A0A370PI74",
377
+ "protein_name": "Uncharacterized protein",
378
+ "taxon_id": 1353007
379
+ },
380
+ {
381
+ "peptide": "EGGAGSSTGQR",
382
+ "uniprot_id": "G3Y2A8",
383
+ "protein_name": "Uncharacterized protein",
384
+ "taxon_id": 380704
385
+ },
386
+ {
387
+ "peptide": "EGGAGSSTGQR",
388
+ "uniprot_id": "A0A401KHS6",
389
+ "protein_name": "Uncharacterized protein",
390
+ "taxon_id": 105351
391
+ },
392
+ {
393
+ "peptide": "EGGAGSSTGQR",
394
+ "uniprot_id": "A0A3F3RJX9",
395
+ "protein_name": "Uncharacterized protein",
396
+ "taxon_id": 5061
397
+ },
398
+ {
399
+ "peptide": "EGGAGSSTGQR",
400
+ "uniprot_id": "A0A370CDF7",
401
+ "protein_name": "Uncharacterized protein",
402
+ "taxon_id": 1353008
403
+ },
404
+ {
405
+ "peptide": "EGGAGSSTGQR",
406
+ "uniprot_id": "A0A319AT72",
407
+ "protein_name": "Uncharacterized protein",
408
+ "taxon_id": 1450533
409
+ },
410
+ {
411
+ "peptide": "EGGAGSSTGQR",
412
+ "uniprot_id": "A2QCE4",
413
+ "protein_name": "Aspergillus niger contig An02c0090, genomic contig",
414
+ "taxon_id": 425011
415
+ },
416
+ {
417
+ "peptide": "EGGAGSSTGQR",
418
+ "uniprot_id": "A0A3F3PXY4",
419
+ "protein_name": "Uncharacterized protein",
420
+ "taxon_id": 1341132
421
+ }
422
+ ]