miga-base 0.3.1.6 → 0.3.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/ncbi_get.rb +57 -42
- data/lib/miga/result/base.rb +7 -0
- data/lib/miga/result/dates.rb +42 -0
- data/lib/miga/result.rb +4 -0
- data/lib/miga/version.rb +1 -1
- data/scripts/essential_genes.bash +5 -4
- data/utils/enveomics/Makefile +1 -1
- data/utils/enveomics/Manifest/Tasks/aasubs.json +75 -75
- data/utils/enveomics/Manifest/Tasks/blasttab.json +194 -185
- data/utils/enveomics/Manifest/Tasks/distances.json +130 -130
- data/utils/enveomics/Manifest/Tasks/fasta.json +51 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +161 -126
- data/utils/enveomics/Manifest/Tasks/graphics.json +111 -111
- data/utils/enveomics/Manifest/Tasks/mapping.json +30 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +308 -265
- data/utils/enveomics/Manifest/Tasks/other.json +451 -449
- data/utils/enveomics/Manifest/Tasks/remote.json +1 -1
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +18 -10
- data/utils/enveomics/Manifest/Tasks/tables.json +250 -250
- data/utils/enveomics/Manifest/Tasks/trees.json +52 -52
- data/utils/enveomics/Manifest/Tasks/variants.json +4 -4
- data/utils/enveomics/Manifest/categories.json +12 -4
- data/utils/enveomics/Manifest/examples.json +1 -1
- data/utils/enveomics/Scripts/BedGraph.tad.rb +71 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +23 -22
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +272 -258
- data/utils/enveomics/Scripts/aai.rb +13 -6
- data/utils/enveomics/Scripts/ani.rb +2 -2
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +12 -14
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +2 -2
- data/utils/enveomics/Scripts/rbm.rb +23 -14
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
- data/utils/enveomics/enveomics.R/R/barplot.R +2 -2
- metadata +9 -2
@@ -8,41 +8,41 @@
|
|
8
8
|
"help_arg": "--help",
|
9
9
|
"options": [
|
10
10
|
{
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
11
|
+
"name": "Input file",
|
12
|
+
"opt": "--in",
|
13
|
+
"arg": "in_file",
|
14
|
+
"mandatory": true,
|
15
|
+
"description": ["Input file containing the OGs (as generated by",
|
16
|
+
"ogs.mcl.rb)."]
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"name": "Output file",
|
20
|
+
"opt": "--out",
|
21
|
+
"arg": "out_file",
|
22
|
+
"mandatory": true,
|
23
|
+
"description": "Output file containing the annotated OGs."
|
24
|
+
},
|
25
|
+
{
|
26
|
+
"name": "Annotations",
|
27
|
+
"opt": "-a",
|
28
|
+
"arg": "in_file",
|
29
|
+
"mandatory": true,
|
30
|
+
"multiple_sep": ",",
|
31
|
+
"description": ["Input file(s) containing the annotations. One or",
|
32
|
+
"more tab-delimited files with the gene names in the first column",
|
33
|
+
"and the annotation in the second."]
|
34
|
+
},
|
35
|
+
{
|
36
|
+
"opt": "--format",
|
37
|
+
"arg": "string",
|
38
|
+
"default": "(\\S+)\\.txt",
|
39
|
+
"description": ["Format of the filenames for the annotation files,",
|
40
|
+
"using regex syntax."]
|
41
|
+
},
|
42
|
+
{
|
43
|
+
"opt": "--quiet",
|
44
|
+
"description": "Run quietly (no STDERR output)."
|
45
|
+
}
|
46
46
|
]
|
47
47
|
},
|
48
48
|
{
|
@@ -52,48 +52,48 @@
|
|
52
52
|
"help_arg": "--help",
|
53
53
|
"requires": [
|
54
54
|
{
|
55
|
-
|
56
|
-
|
55
|
+
"ruby_gem": "json"
|
56
|
+
}
|
57
57
|
],
|
58
58
|
"see_also": ["ogs.mcl.rb"],
|
59
59
|
"options": [
|
60
60
|
{
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
61
|
+
"opt": "--ogs",
|
62
|
+
"arg": "in_file",
|
63
|
+
"mandatory": true,
|
64
|
+
"description": "Input file containing the precomputed OGs."
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"opt": "--summary",
|
68
|
+
"arg": "out_file",
|
69
|
+
"description": ["Output file in tabular format with summary",
|
70
|
+
"statistics."]
|
71
|
+
},
|
72
|
+
{
|
73
|
+
"opt": "--tab",
|
74
|
+
"arg": "out_file",
|
75
|
+
"description": "Output file in tabular format."
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"opt": "--json",
|
79
|
+
"arg": "out_file",
|
80
|
+
"description": "Output file in JSON format."
|
81
|
+
},
|
82
|
+
{
|
83
|
+
"opt": "--replicates",
|
84
|
+
"arg": "integer",
|
85
|
+
"description": "Number of replicates to estimate.",
|
86
|
+
"default": 100
|
87
|
+
},
|
88
|
+
{
|
89
|
+
"opt": "--threads",
|
90
|
+
"arg": "integer",
|
91
|
+
"description": "Children threads to spawn."
|
92
|
+
},
|
93
|
+
{
|
94
|
+
"opt": "--quiet",
|
95
|
+
"description": "Run quietly (no STDERR output)."
|
96
|
+
}
|
97
97
|
]
|
98
98
|
},
|
99
99
|
{
|
@@ -104,77 +104,77 @@
|
|
104
104
|
"see_also": ["ogs.mcl.rb"],
|
105
105
|
"options": [
|
106
106
|
{
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
107
|
+
"name": "Input file",
|
108
|
+
"opt": "--in",
|
109
|
+
"arg": "in_file",
|
110
|
+
"mandatory": true,
|
111
|
+
"description": ["Input file containing the OGs (as generated by",
|
112
|
+
"ogs.mcl.rb)."]
|
113
|
+
},
|
114
|
+
{
|
115
|
+
"name": "Output file",
|
116
|
+
"opt": "--out",
|
117
|
+
"arg": "out_file",
|
118
|
+
"mandatory": true,
|
119
|
+
"description": "Output directory where to place extracted sequences."
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"name": "Sequences",
|
123
|
+
"opt": "--seqs",
|
124
|
+
"arg": "in_file",
|
125
|
+
"mandatory": true,
|
126
|
+
"description": ["Path to the proteomes in FastA format, using '%s'",
|
127
|
+
"to denote the genome. For example: /path/to/seqs/%s.faa."]
|
128
|
+
},
|
129
|
+
{
|
130
|
+
"opt": "--core",
|
131
|
+
"arg": "float",
|
132
|
+
"description": ["Use only OGs present in at least this fraction of",
|
133
|
+
"the genomes. To use only the strict core genome*, use --core 1."],
|
134
|
+
"note": ["* To use only the unus genome (OGs with exactly one gene",
|
135
|
+
"per genome), use: --core 1 --duplicates 1."]
|
136
|
+
},
|
137
|
+
{
|
138
|
+
"opt": "--duplicates",
|
139
|
+
"arg": "integer",
|
140
|
+
"description": ["Use only OGs with less than this number of",
|
141
|
+
"in-paralogs in a genome. To use only genes without in-paralogs*,",
|
142
|
+
"use --duplicates 1."],
|
143
|
+
"note": ["* To use only the unus genome (OGs with exactly one gene",
|
144
|
+
"per genome), use: --core 1 --duplicates 1."]
|
145
|
+
},
|
146
|
+
{
|
147
|
+
"opt": "--per-genome",
|
148
|
+
"description": ["If set, the output is generated per genome. By",
|
149
|
+
"default, the output is per OG."]
|
150
|
+
},
|
151
|
+
{
|
152
|
+
"opt": "--prefix",
|
153
|
+
"description": ["If set, each sequence is prefixed with the genome",
|
154
|
+
"name (or OG number, if --per-genome) and a dash."]
|
155
|
+
},
|
156
|
+
{
|
157
|
+
"opt": "--rand",
|
158
|
+
"description": ["Get only one gene per genome per OG (random)",
|
159
|
+
"regardless of in-paralogs. By default all genes are extracted."]
|
160
|
+
},
|
161
|
+
{
|
162
|
+
"opt": "--first",
|
163
|
+
"description": ["Get only one gene per genome per OG (first)",
|
164
|
+
"regardless of in-paralogs. By default all genes are extracted.",
|
165
165
|
"Takes precedence over --rand."]
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
166
|
+
},
|
167
|
+
{
|
168
|
+
"opt": "--quiet",
|
169
|
+
"description": "Run quietly (no STDERR output)."
|
170
|
+
}
|
171
171
|
]
|
172
172
|
},
|
173
173
|
{
|
174
174
|
"task": "ogs.mcl.rb",
|
175
175
|
"description": ["Identifies Orthology Groups (OGs) in Reciprocal Best",
|
176
176
|
"Matches (RBM) between all pairs in a collection of genomes, using the",
|
177
|
-
|
177
|
+
"Markov Cluster Algorithm."],
|
178
178
|
"see_also": ["ogs.annotate.rb", "ogs.core-pan.rb", "ogs.extract.rb",
|
179
179
|
"ogs.stats.rb"],
|
180
180
|
"cite": [["Enright et al, 2002, NAR",
|
@@ -182,72 +182,72 @@
|
|
182
182
|
"help_arg": "--help",
|
183
183
|
"options": [
|
184
184
|
{
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
185
|
+
"opt": "--out",
|
186
|
+
"arg": "out_file",
|
187
|
+
"mandatory": true,
|
188
|
+
"description": "Output file containing the detected OGs."
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"opt": "--dir",
|
192
|
+
"arg": "in_dir",
|
193
|
+
"description": "Directory containing the RBM files.",
|
194
|
+
"note": "Mandatory, unless --abc is set to a non-empty file."
|
195
|
+
},
|
196
|
+
{
|
197
|
+
"opt": "--format",
|
198
|
+
"arg": "string",
|
199
|
+
"description": ["Format of the filenames for the RBM files (within",
|
200
|
+
"--dir), using regex syntax."],
|
201
|
+
"default": "(\\S+)-(\\S+)\\.rbm"
|
202
|
+
},
|
203
|
+
{
|
204
|
+
"opt": "--inflation",
|
205
|
+
"arg": "float",
|
206
|
+
"description": "Inflation parameter for MCL clustering.",
|
207
|
+
"default": 1.5
|
208
|
+
},
|
209
|
+
{
|
210
|
+
"opt": "--blind",
|
211
|
+
"description": ["If set, computes clusters without taking bitscore",
|
212
|
+
"into account."]
|
213
|
+
},
|
214
|
+
{
|
215
|
+
"opt": "--evalue",
|
216
|
+
"description": ["If set, uses the e-value to weight edges, instead",
|
217
|
+
"of the default Bit-Score."]
|
218
|
+
},
|
219
|
+
{
|
220
|
+
"opt": "--identity",
|
221
|
+
"description": ["If set, uses the identity to weight edges, instead",
|
222
|
+
"of the default Bit-Score."]
|
223
|
+
},
|
224
|
+
{
|
225
|
+
"opt": "--best-match",
|
226
|
+
"description": ["If set, it assumes best-matches instead reciprocal",
|
227
|
+
"best matches."]
|
228
|
+
},
|
229
|
+
{
|
230
|
+
"opt": "--mcl-bin",
|
231
|
+
"arg": "in_dir",
|
232
|
+
"description": ["Path to the directory containing the mcl binaries.",
|
233
|
+
"By default, assumed to be in the PATH."]
|
234
|
+
},
|
235
|
+
{
|
236
|
+
"name": "abc",
|
237
|
+
"arg": "out_file",
|
238
|
+
"opt": "--abc",
|
239
|
+
"description": "Use this abc file instead of a temporal file."
|
240
|
+
},
|
241
|
+
{
|
242
|
+
"opt": "--threads",
|
243
|
+
"arg": "integer",
|
244
|
+
"default": 2,
|
245
|
+
"description": "Number of threads to use."
|
246
|
+
},
|
247
|
+
{
|
248
|
+
"opt": "--quiet",
|
249
|
+
"description": "Run quietly (no STDERR output)."
|
250
|
+
}
|
251
251
|
]
|
252
252
|
},
|
253
253
|
{
|
@@ -256,44 +256,44 @@
|
|
256
256
|
"Matches (RBM) between all pairs in a collection of genomes."],
|
257
257
|
"warn": ["This script suffers from chaining effect and is very",
|
258
258
|
"sensitive to spurious connections, because it applies a greedy",
|
259
|
-
|
260
|
-
|
259
|
+
"clustering algorithm. For most practical purposes, the use of this",
|
260
|
+
"script is discouraged and `ogs.mcl.rb` should be preferred."],
|
261
261
|
"help_arg": "--help",
|
262
262
|
"see_also": ["ogs.mcl.rb"],
|
263
263
|
"options": [
|
264
264
|
{
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
265
|
+
"opt": "--out",
|
266
|
+
"mandatory": true,
|
267
|
+
"arg": "out_file",
|
268
|
+
"description": "Output file containing the detected OGs."
|
269
|
+
},
|
270
|
+
{
|
271
|
+
"opt": "--dir",
|
272
|
+
"arg": "in_dir",
|
273
|
+
"description": "Directory containing the RBM files.",
|
274
|
+
"note": "Required unless --pre-ogs is passed."
|
275
|
+
},
|
276
|
+
{
|
277
|
+
"opt": "--pre-ogs",
|
278
|
+
"arg": "in_file",
|
279
|
+
"multiple_sep": ",",
|
280
|
+
"description": "Pre-computed OGs file(s), separated by commas."
|
281
|
+
},
|
282
|
+
{
|
283
|
+
"opt": "--unchecked",
|
284
|
+
"description": "Do not check internal redundancy in OGs."
|
285
|
+
},
|
286
|
+
{
|
287
|
+
"opt": "--format",
|
288
|
+
"arg": "string",
|
289
|
+
"default": "(\\S+)-(\\S+)\\.rbm",
|
290
|
+
"description": ["Format of the filenames for the RBM files (within",
|
291
|
+
"-d), using regex syntax."]
|
292
|
+
},
|
293
|
+
{
|
294
|
+
"opt": "--quiet",
|
295
|
+
"description": "Run quietly (no STDERR output)."
|
296
|
+
}
|
297
297
|
]
|
298
298
|
},
|
299
299
|
{
|
@@ -305,34 +305,77 @@
|
|
305
305
|
"requires": [ { "ruby_gem": "json" } ],
|
306
306
|
"options": [
|
307
307
|
{
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
308
|
+
"opt": "--ogs",
|
309
|
+
"arg": "in_file",
|
310
|
+
"mandatory": true,
|
311
|
+
"description": "Input file containing the precomputed OGs."
|
312
|
+
},
|
313
|
+
{
|
314
|
+
"opt": "--json",
|
315
|
+
"arg": "out_file",
|
316
|
+
"description": "Output file in JSON format."
|
317
|
+
},
|
318
|
+
{
|
319
|
+
"opt": "--tab",
|
320
|
+
"arg": "out_file",
|
321
|
+
"description": "Output file in tabular format."
|
322
|
+
},
|
323
|
+
{
|
324
|
+
"opt": "--transposed-tab",
|
325
|
+
"arg": "out_file",
|
326
|
+
"description": "Output file in transposed tabular format."
|
327
|
+
},
|
328
|
+
{
|
329
|
+
"opt": "--auto",
|
330
|
+
"description": "Run completely quiertly (no STDERR or STDOUT)."
|
331
|
+
},
|
332
|
+
{
|
333
|
+
"opt": "--quiet",
|
334
|
+
"description": "Run quietly (no STDERR output)."
|
335
|
+
}
|
336
|
+
]
|
337
|
+
},
|
338
|
+
{
|
339
|
+
"task": "clust.rand.rb",
|
340
|
+
"description": ["Calculates the Rand Index and the Adjusted Rand Index",
|
341
|
+
"between two clusterings. The clustering format is a raw text file",
|
342
|
+
"with one cluster per line, each defined as comma-delimited members,",
|
343
|
+
"and a header line (ignored). Note that this is equivalent to the OGs",
|
344
|
+
"format for 1 genome."],
|
345
|
+
"see_also": ["ogs.mcl.rb"],
|
346
|
+
"help_arg": "--help",
|
347
|
+
"cite": [
|
348
|
+
["Rand, 1971, J Am Stat Assoc",
|
349
|
+
"https://doi.org/10.2307%2F2284239"],
|
350
|
+
["Hubert & Arabie, 1985, J Classif",
|
351
|
+
"https://doi.org/10.1007%2FBF01908075"]
|
352
|
+
],
|
353
|
+
"options": [
|
354
|
+
{
|
355
|
+
"name": "Input file 1",
|
356
|
+
"opt": "--clust1",
|
357
|
+
"arg": "in_file",
|
358
|
+
"mandatory": true,
|
359
|
+
"description": "First input file."
|
360
|
+
},
|
361
|
+
{
|
362
|
+
"name": "Input file 2",
|
363
|
+
"opt": "--clust2",
|
364
|
+
"arg": "in_file",
|
365
|
+
"mandatory": true,
|
366
|
+
"description": "Second input file."
|
367
|
+
},
|
368
|
+
{
|
369
|
+
"name": "Precision",
|
370
|
+
"opt": "--prec",
|
371
|
+
"arg": "integer",
|
372
|
+
"description": "Precision to report.",
|
373
|
+
"default": 6
|
374
|
+
},
|
375
|
+
{
|
376
|
+
"opt": "--quiet",
|
377
|
+
"description": "Run quietly (no STDERR output)."
|
378
|
+
}
|
336
379
|
]
|
337
380
|
}
|
338
381
|
]
|