unipept 2.1.1 → 2.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ci.yml +6 -27
  4. data/.rakeTasks +7 -0
  5. data/.rubocop.yml +2 -0
  6. data/.ruby-version +1 -1
  7. data/CITATION.cff +30 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +60 -41
  10. data/README.md +2 -2
  11. data/Rakefile +4 -4
  12. data/VERSION +1 -1
  13. data/lib/batch_iterator.rb +16 -0
  14. data/lib/commands/prot2pept.rb +1 -2
  15. data/lib/commands/unipept/api_runner.rb +10 -12
  16. data/lib/commands/unipept/config.rb +1 -1
  17. data/lib/commands/unipept/pept2taxa.rb +1 -5
  18. data/lib/commands/unipept/taxa2tree.rb +74 -0
  19. data/lib/commands/unipept.rb +25 -1
  20. data/lib/commands/uniprot.rb +4 -5
  21. data/lib/configuration.rb +6 -7
  22. data/lib/formatters.rb +108 -36
  23. data/lib/server_message.rb +2 -4
  24. data/test/commands/unipept/test_api_runner.rb +8 -7
  25. data/test/commands/unipept/test_config.rb +1 -1
  26. data/test/commands/unipept/test_pept2ec.rb +11 -11
  27. data/test/commands/unipept/test_pept2funct.rb +15 -15
  28. data/test/commands/unipept/test_pept2go.rb +10 -10
  29. data/test/commands/unipept/test_pept2interpro.rb +26 -26
  30. data/test/commands/unipept/test_pept2lca.rb +2 -2
  31. data/test/commands/unipept/test_pept2prot.rb +2 -2
  32. data/test/commands/unipept/test_pept2taxa.rb +2 -4
  33. data/test/commands/unipept/test_peptinfo.rb +16 -16
  34. data/test/commands/unipept/test_taxa2lca.rb +2 -2
  35. data/test/commands/unipept/test_taxa2tree.rb +68 -0
  36. data/test/commands/unipept/test_taxonomy.rb +2 -2
  37. data/test/helper.rb +10 -0
  38. data/test/support/api_stub.rb +60 -0
  39. data/test/support/resources/pept2ec.json +55 -0
  40. data/test/support/resources/pept2funct.json +73 -0
  41. data/test/support/resources/pept2go.json +43 -0
  42. data/test/support/resources/pept2interpro.json +43 -0
  43. data/test/support/resources/pept2lca.json +14 -0
  44. data/test/support/resources/pept2prot.json +422 -0
  45. data/test/support/resources/pept2taxa.json +194 -0
  46. data/test/support/resources/peptinfo.json +70 -0
  47. data/test/support/resources/taxa2tree.json +194 -0
  48. data/test/support/resources/taxonomy.json +22 -0
  49. data/test/test_configuration.rb +1 -1
  50. data/test/test_formatters.rb +5 -5
  51. data/test/test_output_writer.rb +1 -1
  52. data/test/test_server_message.rb +2 -2
  53. data/test.taxa +4 -0
  54. data/unipept.gemspec +32 -21
  55. metadata +26 -9
@@ -0,0 +1,73 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ec": [
6
+ {
7
+ "ec_number": "3.1.3.3",
8
+ "protein_count": 2
9
+ },
10
+ {
11
+ "ec_number": "6.3.2.13",
12
+ "protein_count": 2
13
+ }
14
+ ],
15
+ "go": [
16
+ {
17
+ "go_term": "GO:0000287",
18
+ "protein_count": 5
19
+ },
20
+ {
21
+ "go_term": "GO:0005737",
22
+ "protein_count": 5
23
+ }
24
+ ],
25
+ "ipr": [
26
+ {
27
+ "code": "IPR013221",
28
+ "protein_count": 2
29
+ }
30
+ ]
31
+ },
32
+ {
33
+ "peptide": "AALER",
34
+ "total_protein_count": 208,
35
+ "ec": [
36
+ {
37
+ "ec_number": "6.1.1.16",
38
+ "protein_count": 44
39
+ },
40
+ {
41
+ "ec_number": "2.7.7.38",
42
+ "protein_count": 13
43
+ }
44
+ ],
45
+ "go": [
46
+ {
47
+ "go_term": "GO:0005737",
48
+ "protein_count": 106
49
+ },
50
+ {
51
+ "go_term": "GO:0005524",
52
+ "protein_count": 75
53
+ }
54
+ ],
55
+ "ipr": [
56
+ {
57
+ "code": "IPR014729",
58
+ "protein_count": 48
59
+ },
60
+ {
61
+ "code": "IPR009080",
62
+ "protein_count": 45
63
+ }
64
+ ]
65
+ },
66
+ {
67
+ "peptide": "AAEVALVGTEK",
68
+ "total_protein_count": 0,
69
+ "ec": [],
70
+ "go": [],
71
+ "ipr": []
72
+ }
73
+ ]
@@ -0,0 +1,43 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "go": [
6
+ {
7
+ "go_term": "GO:0000287",
8
+ "protein_count": 5
9
+ },
10
+ {
11
+ "go_term": "GO:0005737",
12
+ "protein_count": 5
13
+ },
14
+ {
15
+ "go_term": "GO:0042803",
16
+ "protein_count": 1
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "peptide": "AALER",
22
+ "total_protein_count": 208,
23
+ "go": [
24
+ {
25
+ "go_term": "GO:0005737",
26
+ "protein_count": 106
27
+ },
28
+ {
29
+ "go_term": "GO:0005524",
30
+ "protein_count": 75
31
+ },
32
+ {
33
+ "go_term": "GO:0008270",
34
+ "protein_count": 48
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "peptide": "AAEVALVGTEK",
40
+ "total_protein_count": 0,
41
+ "go": []
42
+ }
43
+ ]
@@ -0,0 +1,43 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "total_protein_count": 7,
5
+ "ipr": [
6
+ {
7
+ "code": "IPR013221",
8
+ "protein_count": 2
9
+ },
10
+ {
11
+ "code": "IPR036565",
12
+ "protein_count": 2
13
+ },
14
+ {
15
+ "code": "IPR023214",
16
+ "protein_count": 2
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "peptide": "AALER",
22
+ "total_protein_count": 208,
23
+ "ipr": [
24
+ {
25
+ "code": "IPR014729",
26
+ "protein_count": 48
27
+ },
28
+ {
29
+ "code": "IPR009080",
30
+ "protein_count": 45
31
+ },
32
+ {
33
+ "code": "IPR015803",
34
+ "protein_count": 44
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "peptide": "VAQFLL",
40
+ "total_protein_count": 0,
41
+ "ipr": []
42
+ }
43
+ ]
@@ -0,0 +1,14 @@
1
+ [
2
+ {
3
+ "peptide": "AALTER",
4
+ "taxon_id": 1,
5
+ "taxon_name": "root",
6
+ "taxon_rank": "no rank"
7
+ },
8
+ {
9
+ "peptide": "AALER",
10
+ "taxon_id": 1,
11
+ "taxon_name": "root",
12
+ "taxon_rank": "no rank"
13
+ }
14
+ ]
@@ -0,0 +1,422 @@
1
+ [
2
+ {
3
+ "peptide": "ENFVYIAK",
4
+ "uniprot_id": "P42654",
5
+ "protein_name": "14-3-3-like protein B",
6
+ "taxon_id": 3906
7
+ },
8
+ {
9
+ "peptide": "ENFVYIAK",
10
+ "uniprot_id": "Q96453",
11
+ "protein_name": "14-3-3-like protein D",
12
+ "taxon_id": 3847
13
+ },
14
+ {
15
+ "peptide": "ENFVYIAK",
16
+ "uniprot_id": "C6TH93",
17
+ "protein_name": "Casparian strip membrane protein 4",
18
+ "taxon_id": 3847
19
+ },
20
+ {
21
+ "peptide": "ENFVYIAK",
22
+ "uniprot_id": "A0A2Z6PAC3",
23
+ "protein_name": "14_3_3 domain-containing protein",
24
+ "taxon_id": 3900
25
+ },
26
+ {
27
+ "peptide": "ENFVYIAK",
28
+ "uniprot_id": "A0A5B7A3K5",
29
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
30
+ "taxon_id": 16924
31
+ },
32
+ {
33
+ "peptide": "ENFVYIAK",
34
+ "uniprot_id": "A0A396GNG6",
35
+ "protein_name": "Putative 14-3-3 protein",
36
+ "taxon_id": 3880
37
+ },
38
+ {
39
+ "peptide": "ENFVYIAK",
40
+ "uniprot_id": "A0A072VBW0",
41
+ "protein_name": "Putative 14-3-3 protein",
42
+ "taxon_id": 3880
43
+ },
44
+ {
45
+ "peptide": "ENFVYIAK",
46
+ "uniprot_id": "I1LUM3",
47
+ "protein_name": "14_3_3 domain-containing protein",
48
+ "taxon_id": 3847
49
+ },
50
+ {
51
+ "peptide": "ENFVYIAK",
52
+ "uniprot_id": "A0A1S3WXW9",
53
+ "protein_name": "14-3-3 protein 9-like isoform X1",
54
+ "taxon_id": 4097
55
+ },
56
+ {
57
+ "peptide": "ENFVYIAK",
58
+ "uniprot_id": "A0A2H5N535",
59
+ "protein_name": "14_3_3 domain-containing protein",
60
+ "taxon_id": 55188
61
+ },
62
+ {
63
+ "peptide": "ENFVYIAK",
64
+ "uniprot_id": "A0A438E5J2",
65
+ "protein_name": "14-3-3-like protein D",
66
+ "taxon_id": 29760
67
+ },
68
+ {
69
+ "peptide": "ENFVYIAK",
70
+ "uniprot_id": "A0A445HT97",
71
+ "protein_name": "14-3-3-like protein D isoform A",
72
+ "taxon_id": 3848
73
+ },
74
+ {
75
+ "peptide": "ENFVYIAK",
76
+ "uniprot_id": "A0A5J5A607",
77
+ "protein_name": "14_3_3 domain-containing protein",
78
+ "taxon_id": 561372
79
+ },
80
+ {
81
+ "peptide": "ENFVYIAK",
82
+ "uniprot_id": "A0A1S4A416",
83
+ "protein_name": "14-3-3 protein 9-like",
84
+ "taxon_id": 4097
85
+ },
86
+ {
87
+ "peptide": "ENFVYIAK",
88
+ "uniprot_id": "A0A1U7VM87",
89
+ "protein_name": "14-3-3 protein 9-like isoform X2",
90
+ "taxon_id": 4096
91
+ },
92
+ {
93
+ "peptide": "ENFVYIAK",
94
+ "uniprot_id": "A0A2N9J4Q3",
95
+ "protein_name": "14_3_3 domain-containing protein",
96
+ "taxon_id": 28930
97
+ },
98
+ {
99
+ "peptide": "ENFVYIAK",
100
+ "uniprot_id": "A0A445IBP6",
101
+ "protein_name": "14-3-3-like protein D isoform A",
102
+ "taxon_id": 3848
103
+ },
104
+ {
105
+ "peptide": "ENFVYIAK",
106
+ "uniprot_id": "A0A445HTA3",
107
+ "protein_name": "14-3-3-like protein D isoform C",
108
+ "taxon_id": 3848
109
+ },
110
+ {
111
+ "peptide": "ENFVYIAK",
112
+ "uniprot_id": "T2DN83",
113
+ "protein_name": "14-3-3-like protein D",
114
+ "taxon_id": 3885
115
+ },
116
+ {
117
+ "peptide": "ENFVYIAK",
118
+ "uniprot_id": "A0A445CI86",
119
+ "protein_name": "14_3_3 domain-containing protein",
120
+ "taxon_id": 3818
121
+ },
122
+ {
123
+ "peptide": "ENFVYIAK",
124
+ "uniprot_id": "A0A444Y3Q8",
125
+ "protein_name": "14_3_3 domain-containing protein",
126
+ "taxon_id": 3818
127
+ },
128
+ {
129
+ "peptide": "ENFVYIAK",
130
+ "uniprot_id": "A0A2K3PKW1",
131
+ "protein_name": "14-3-3-like protein d-like",
132
+ "taxon_id": 57577
133
+ },
134
+ {
135
+ "peptide": "ENFVYIAK",
136
+ "uniprot_id": "A0A200QJZ5",
137
+ "protein_name": "14-3-3 protein",
138
+ "taxon_id": 56857
139
+ },
140
+ {
141
+ "peptide": "ENFVYIAK",
142
+ "uniprot_id": "A0A5B6YWQ8",
143
+ "protein_name": "Putative 14-3-3-like protein D",
144
+ "taxon_id": 16924
145
+ },
146
+ {
147
+ "peptide": "ENFVYIAK",
148
+ "uniprot_id": "A0A5J5A796",
149
+ "protein_name": "14_3_3 domain-containing protein",
150
+ "taxon_id": 561372
151
+ },
152
+ {
153
+ "peptide": "ENFVYIAK",
154
+ "uniprot_id": "C6TM63",
155
+ "protein_name": "14_3_3 domain-containing protein",
156
+ "taxon_id": 3847
157
+ },
158
+ {
159
+ "peptide": "ENFVYIAK",
160
+ "uniprot_id": "A0A1S3WXS1",
161
+ "protein_name": "14-3-3 protein 9-like isoform X2",
162
+ "taxon_id": 4097
163
+ },
164
+ {
165
+ "peptide": "ENFVYIAK",
166
+ "uniprot_id": "A0A1U7VVV5",
167
+ "protein_name": "14-3-3 protein 9-like isoform X1",
168
+ "taxon_id": 4096
169
+ },
170
+ {
171
+ "peptide": "ENFVYIAK",
172
+ "uniprot_id": "A0A2H3Y4D2",
173
+ "protein_name": "14-3-3-like protein D isoform X2",
174
+ "taxon_id": 42345
175
+ },
176
+ {
177
+ "peptide": "ENFVYIAK",
178
+ "uniprot_id": "A0A5B6YWR0",
179
+ "protein_name": "14_3_3 domain-containing protein",
180
+ "taxon_id": 16924
181
+ },
182
+ {
183
+ "peptide": "ENFVYIAK",
184
+ "uniprot_id": "A0A067GE20",
185
+ "protein_name": "14_3_3 domain-containing protein",
186
+ "taxon_id": 2711
187
+ },
188
+ {
189
+ "peptide": "ENFVYIAK",
190
+ "uniprot_id": "A0A5B6Z3E6",
191
+ "protein_name": "Putative 14-3-3-like protein D isoform X2",
192
+ "taxon_id": 16924
193
+ },
194
+ {
195
+ "peptide": "ENFVYIAK",
196
+ "uniprot_id": "A0A5J5A2G8",
197
+ "protein_name": "14_3_3 domain-containing protein",
198
+ "taxon_id": 561372
199
+ },
200
+ {
201
+ "peptide": "ENFVYIAK",
202
+ "uniprot_id": "A0A445IC54",
203
+ "protein_name": "14-3-3-like protein D isoform C",
204
+ "taxon_id": 3848
205
+ },
206
+ {
207
+ "peptide": "ENFVYIAK",
208
+ "uniprot_id": "E1U3Z1",
209
+ "protein_name": "14-3-3-like protein B",
210
+ "taxon_id": 3827
211
+ },
212
+ {
213
+ "peptide": "ENFVYIAK",
214
+ "uniprot_id": "I1M3M0",
215
+ "protein_name": "14_3_3 domain-containing protein",
216
+ "taxon_id": 3847
217
+ },
218
+ {
219
+ "peptide": "ENFVYIAK",
220
+ "uniprot_id": "A0A0R0HF31",
221
+ "protein_name": "14_3_3 domain-containing protein",
222
+ "taxon_id": 3847
223
+ },
224
+ {
225
+ "peptide": "ENFVYIAK",
226
+ "uniprot_id": "A0A2H3ZH40",
227
+ "protein_name": "14-3-3-like protein D isoform X2",
228
+ "taxon_id": 42345
229
+ },
230
+ {
231
+ "peptide": "ENFVYIAK",
232
+ "uniprot_id": "A0A5B7A4U9",
233
+ "protein_name": "Putative 14-3-3 domain-containing protein",
234
+ "taxon_id": 16924
235
+ },
236
+ {
237
+ "peptide": "ENFVYIAK",
238
+ "uniprot_id": "M0TAI1",
239
+ "protein_name": "14_3_3 domain-containing protein",
240
+ "taxon_id": 214687
241
+ },
242
+ {
243
+ "peptide": "ENFVYIAK",
244
+ "uniprot_id": "A0A1S4CN65",
245
+ "protein_name": "14-3-3 protein 9-like isoform X2",
246
+ "taxon_id": 4097
247
+ },
248
+ {
249
+ "peptide": "ENFVYIAK",
250
+ "uniprot_id": "A0A2H5N3C1",
251
+ "protein_name": "14_3_3 domain-containing protein",
252
+ "taxon_id": 55188
253
+ },
254
+ {
255
+ "peptide": "ENFVYIAK",
256
+ "uniprot_id": "A0A2H3ZHX0",
257
+ "protein_name": "14-3-3-like protein D isoform X1",
258
+ "taxon_id": 42345
259
+ },
260
+ {
261
+ "peptide": "ENFVYIAK",
262
+ "uniprot_id": "M0TY03",
263
+ "protein_name": "14_3_3 domain-containing protein",
264
+ "taxon_id": 214687
265
+ },
266
+ {
267
+ "peptide": "ENFVYIAK",
268
+ "uniprot_id": "A0A445CI73",
269
+ "protein_name": "14_3_3 domain-containing protein",
270
+ "taxon_id": 3818
271
+ },
272
+ {
273
+ "peptide": "ENFVYIAK",
274
+ "uniprot_id": "A0A3S3NBY5",
275
+ "protein_name": "14-3-3-like protein D isoform X1",
276
+ "taxon_id": 337451
277
+ },
278
+ {
279
+ "peptide": "ENFVYIAK",
280
+ "uniprot_id": "V4U9U4",
281
+ "protein_name": "14_3_3 domain-containing protein",
282
+ "taxon_id": 85681
283
+ },
284
+ {
285
+ "peptide": "ENFVYIAK",
286
+ "uniprot_id": "A0A151SEM7",
287
+ "protein_name": "14-3-3-like protein D",
288
+ "taxon_id": 3821
289
+ },
290
+ {
291
+ "peptide": "ENFVYIAK",
292
+ "uniprot_id": "A0A445IBK9",
293
+ "protein_name": "14-3-3-like protein D isoform D",
294
+ "taxon_id": 3848
295
+ },
296
+ {
297
+ "peptide": "ENFVYIAK",
298
+ "uniprot_id": "A0A1S4CMF0",
299
+ "protein_name": "14-3-3 protein 9-like isoform X1",
300
+ "taxon_id": 4097
301
+ },
302
+ {
303
+ "peptide": "ENFVYIAK",
304
+ "uniprot_id": "A0A1J6KH36",
305
+ "protein_name": "14-3-3 protein 9",
306
+ "taxon_id": 49451
307
+ },
308
+ {
309
+ "peptide": "ENFVYIAK",
310
+ "uniprot_id": "A0A5B7A5K1",
311
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
312
+ "taxon_id": 16924
313
+ },
314
+ {
315
+ "peptide": "ENFVYIAK",
316
+ "uniprot_id": "F6H2P0",
317
+ "protein_name": "14_3_3 domain-containing protein",
318
+ "taxon_id": 29760
319
+ },
320
+ {
321
+ "peptide": "ENFVYIAK",
322
+ "uniprot_id": "A0A1J6IP66",
323
+ "protein_name": "14-3-3 protein 9",
324
+ "taxon_id": 49451
325
+ },
326
+ {
327
+ "peptide": "ENFVYIAK",
328
+ "uniprot_id": "A0A5B7A8T8",
329
+ "protein_name": "Putative 14-3-3-like protein D isoform X1",
330
+ "taxon_id": 16924
331
+ },
332
+ {
333
+ "peptide": "ENFVYIAK",
334
+ "uniprot_id": "A0A0R0H7R6",
335
+ "protein_name": "14_3_3 domain-containing protein",
336
+ "taxon_id": 3847
337
+ },
338
+ {
339
+ "peptide": "ENFVYIAK",
340
+ "uniprot_id": "A0A067GDS1",
341
+ "protein_name": "14_3_3 domain-containing protein",
342
+ "taxon_id": 2711
343
+ },
344
+ {
345
+ "peptide": "ENFVYIAK",
346
+ "uniprot_id": "A0A445BBW8",
347
+ "protein_name": "14_3_3 domain-containing protein",
348
+ "taxon_id": 3818
349
+ },
350
+ {
351
+ "peptide": "ENFVYIAK",
352
+ "uniprot_id": "V4W919",
353
+ "protein_name": "14_3_3 domain-containing protein",
354
+ "taxon_id": 85681
355
+ },
356
+ {
357
+ "peptide": "ENFVYIAK",
358
+ "uniprot_id": "G7LIR4",
359
+ "protein_name": "General regulatory factor 2",
360
+ "taxon_id": 3880
361
+ },
362
+ {
363
+ "peptide": "ENFVYIAK",
364
+ "uniprot_id": "A0A2H3Y3V0",
365
+ "protein_name": "14-3-3-like protein D isoform X1",
366
+ "taxon_id": 42345
367
+ },
368
+ {
369
+ "peptide": "ENFVYIAK",
370
+ "uniprot_id": "A0A5J4ZBP6",
371
+ "protein_name": "14_3_3 domain-containing protein",
372
+ "taxon_id": 561372
373
+ },
374
+ {
375
+ "peptide": "EGGAGSSTGQR",
376
+ "uniprot_id": "A0A370PI74",
377
+ "protein_name": "Uncharacterized protein",
378
+ "taxon_id": 1353007
379
+ },
380
+ {
381
+ "peptide": "EGGAGSSTGQR",
382
+ "uniprot_id": "G3Y2A8",
383
+ "protein_name": "Uncharacterized protein",
384
+ "taxon_id": 380704
385
+ },
386
+ {
387
+ "peptide": "EGGAGSSTGQR",
388
+ "uniprot_id": "A0A401KHS6",
389
+ "protein_name": "Uncharacterized protein",
390
+ "taxon_id": 105351
391
+ },
392
+ {
393
+ "peptide": "EGGAGSSTGQR",
394
+ "uniprot_id": "A0A3F3RJX9",
395
+ "protein_name": "Uncharacterized protein",
396
+ "taxon_id": 5061
397
+ },
398
+ {
399
+ "peptide": "EGGAGSSTGQR",
400
+ "uniprot_id": "A0A370CDF7",
401
+ "protein_name": "Uncharacterized protein",
402
+ "taxon_id": 1353008
403
+ },
404
+ {
405
+ "peptide": "EGGAGSSTGQR",
406
+ "uniprot_id": "A0A319AT72",
407
+ "protein_name": "Uncharacterized protein",
408
+ "taxon_id": 1450533
409
+ },
410
+ {
411
+ "peptide": "EGGAGSSTGQR",
412
+ "uniprot_id": "A2QCE4",
413
+ "protein_name": "Aspergillus niger contig An02c0090, genomic contig",
414
+ "taxon_id": 425011
415
+ },
416
+ {
417
+ "peptide": "EGGAGSSTGQR",
418
+ "uniprot_id": "A0A3F3PXY4",
419
+ "protein_name": "Uncharacterized protein",
420
+ "taxon_id": 1341132
421
+ }
422
+ ]