miga-base 0.7.26.3 → 1.0.0.sr1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'test_helper'
|
|
4
|
+
|
|
5
|
+
class ParallelTest < Test::Unit::TestCase
|
|
6
|
+
include TestHelper
|
|
7
|
+
|
|
8
|
+
def test_distribute
|
|
9
|
+
declare_forks
|
|
10
|
+
|
|
11
|
+
base = tmpfile('base')
|
|
12
|
+
assert(!File.exist?("#{base}-3"))
|
|
13
|
+
MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
|
|
14
|
+
File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
|
|
15
|
+
end
|
|
16
|
+
assert(File.exist?("#{base}-3"))
|
|
17
|
+
assert(!File.exist?("#{base}-4"))
|
|
18
|
+
t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
|
|
19
|
+
assert_equal([0, 0, 1, 1], t.sort)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def test_thread_enum
|
|
23
|
+
MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
|
|
24
|
+
assert_equal('b', o)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
n = 0
|
|
28
|
+
MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
|
|
29
|
+
assert_equal(5, n)
|
|
30
|
+
end
|
|
31
|
+
end
|
data/test/project_test.rb
CHANGED
|
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
|
|
|
27
27
|
def test_create
|
|
28
28
|
assert_equal(tmpfile('create'), project('create').path)
|
|
29
29
|
assert_path_exist(tmpfile('create'))
|
|
30
|
-
|
|
30
|
+
err = capture_stderr do
|
|
31
31
|
ENV['MIGA_HOME'] = tmpfile('chez-moi')
|
|
32
32
|
project('cuckoo')
|
|
33
33
|
end
|
|
34
|
+
assert_match(/Projects cannot be processed yet/, err.string)
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
def test_load
|
data/utils/distance/commands.rb
CHANGED
|
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
|
|
|
169
169
|
aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
|
|
170
170
|
end
|
|
171
171
|
end
|
|
172
|
+
puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
|
|
172
173
|
batch_data_to_db(:haai, haai_data)
|
|
173
174
|
batch_data_to_db(:aai, aai_data)
|
|
174
175
|
|
data/utils/distance/runner.rb
CHANGED
|
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
|
|
|
18
18
|
@ref_project = MiGA::Project.load(ref_path)
|
|
19
19
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
|
20
20
|
elsif !opts[:run_taxonomy] && dataset.option(:db_project)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
|
|
24
|
-
end
|
|
21
|
+
ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
|
|
22
|
+
ref_path = File.expand_path(dataset.option(:db_project), ref_location)
|
|
25
23
|
@ref_project = MiGA::Project.load(ref_path)
|
|
26
24
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
|
27
25
|
else
|
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
"task": "FastA.N50.pl",
|
|
5
5
|
"description": ["Calculates the N50 value of a set of sequences.",
|
|
6
6
|
"Alternatively, it can calculate other N** values. It also calculates",
|
|
7
|
-
"the total number of sequences
|
|
7
|
+
"the total number of sequences, the total added length, and the",
|
|
8
|
+
"longest sequence length."],
|
|
8
9
|
"help_arg": "",
|
|
9
10
|
"see_also": ["FastA.length.pl"],
|
|
10
11
|
"options": [
|
|
@@ -354,14 +355,14 @@
|
|
|
354
355
|
"opt": "--in",
|
|
355
356
|
"arg": "in_file",
|
|
356
357
|
"mandatory": true,
|
|
357
|
-
"description": "Input FastA file."
|
|
358
|
+
"description": "Input FastA file (supports .gz compression)."
|
|
358
359
|
},
|
|
359
360
|
{
|
|
360
361
|
"name": "Output file",
|
|
361
362
|
"opt": "--out",
|
|
362
363
|
"arg": "out_file",
|
|
363
364
|
"mandatory": true,
|
|
364
|
-
"description": "Output FastA file."
|
|
365
|
+
"description": "Output FastA file (supports .gz compression)."
|
|
365
366
|
},
|
|
366
367
|
{
|
|
367
368
|
"opt": "--fraction",
|
|
@@ -733,6 +734,41 @@
|
|
|
733
734
|
}
|
|
734
735
|
]
|
|
735
736
|
},
|
|
737
|
+
{
|
|
738
|
+
"task": "FastA.toFastQ.rb",
|
|
739
|
+
"description": "Creates a FastQ-compliant file from a FastA file.",
|
|
740
|
+
"see_also": "FastQ.toFastA.awk",
|
|
741
|
+
"help_arg": "--help",
|
|
742
|
+
"options": [
|
|
743
|
+
{
|
|
744
|
+
"name": "Input FastA",
|
|
745
|
+
"opt": "--in",
|
|
746
|
+
"arg": "in_file",
|
|
747
|
+
"mandatory": true,
|
|
748
|
+
"description": "Input FastA file (supports .gz compression)."
|
|
749
|
+
},
|
|
750
|
+
{
|
|
751
|
+
"name": "Output FastQ",
|
|
752
|
+
"opt": "--out",
|
|
753
|
+
"arg": "out_file",
|
|
754
|
+
"mandatory": true,
|
|
755
|
+
"description": "Output FastQ file (supports .gz compression)."
|
|
756
|
+
},
|
|
757
|
+
{
|
|
758
|
+
"opt": "--quality",
|
|
759
|
+
"arg": "integer",
|
|
760
|
+
"default": 31,
|
|
761
|
+
"description": ["PHRED quality score to use (fixed), in the range",
|
|
762
|
+
"[-5, 41]."]
|
|
763
|
+
},
|
|
764
|
+
{
|
|
765
|
+
"opt": "--encoding",
|
|
766
|
+
"arg": "integer",
|
|
767
|
+
"default": 33,
|
|
768
|
+
"description": "Base encoding (33 or 64)."
|
|
769
|
+
}
|
|
770
|
+
]
|
|
771
|
+
},
|
|
736
772
|
{
|
|
737
773
|
"task": "FastA.wrap.rb",
|
|
738
774
|
"description": "Wraps sequences in a FastA to a given line length.",
|
|
@@ -81,6 +81,47 @@
|
|
|
81
81
|
}
|
|
82
82
|
]
|
|
83
83
|
},
|
|
84
|
+
{
|
|
85
|
+
"task": "FastQ.maskQual.rb",
|
|
86
|
+
"description": "Masks low-quality bases in a FastQ file.",
|
|
87
|
+
"help_arg": "--help",
|
|
88
|
+
"options": [
|
|
89
|
+
{
|
|
90
|
+
"opt": "--input",
|
|
91
|
+
"arg": "in_file",
|
|
92
|
+
"mandatory": true,
|
|
93
|
+
"description": ["Path to the FastQ file containing the sequences.",
|
|
94
|
+
"Supports compression with .gz extension."]
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"opt": "--output",
|
|
98
|
+
"arg": "out_file",
|
|
99
|
+
"mandatory": true,
|
|
100
|
+
"description": ["Path to the output FastQ file.",
|
|
101
|
+
"Supports compression with .gz extension."]
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"opt": "--qual",
|
|
105
|
+
"arg": "integer",
|
|
106
|
+
"default": 15,
|
|
107
|
+
"description": "Minimum quality score to allow a base."
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"opt": "--offset",
|
|
111
|
+
"arg": "integer",
|
|
112
|
+
"default": 33,
|
|
113
|
+
"description": "Q-score offset."
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"opt": "--fasta",
|
|
117
|
+
"description": "Output sequences in FastA format."
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"opt": "--quiet",
|
|
121
|
+
"description": "Run quietly."
|
|
122
|
+
}
|
|
123
|
+
]
|
|
124
|
+
},
|
|
84
125
|
{
|
|
85
126
|
"task": "FastQ.offset.pl",
|
|
86
127
|
"description": ["There are several FastQ formats. This script takes a",
|
|
@@ -160,14 +201,20 @@
|
|
|
160
201
|
"opt": "--in",
|
|
161
202
|
"arg": "in_file",
|
|
162
203
|
"mandatory": true,
|
|
163
|
-
"description":
|
|
204
|
+
"description": [
|
|
205
|
+
"FastQ file containing the sequences.",
|
|
206
|
+
"Supports compression with .gz extension."
|
|
207
|
+
]
|
|
164
208
|
},
|
|
165
209
|
{
|
|
166
210
|
"name": "Output file",
|
|
167
211
|
"opt": "--out",
|
|
168
212
|
"arg": "out_file",
|
|
169
213
|
"mandatory": true,
|
|
170
|
-
"description":
|
|
214
|
+
"description": [
|
|
215
|
+
"FastQ to create.",
|
|
216
|
+
"Supports compression with .gz extension."
|
|
217
|
+
]
|
|
171
218
|
},
|
|
172
219
|
{
|
|
173
220
|
"opt": "--prefix",
|
|
@@ -188,6 +235,7 @@
|
|
|
188
235
|
{
|
|
189
236
|
"task": "FastQ.toFastA.awk",
|
|
190
237
|
"description": "Translates FastQ files into FastA.",
|
|
238
|
+
"see_also": "FastA.toFastQ.rb",
|
|
191
239
|
"help_arg": "'' --help",
|
|
192
240
|
"options": [
|
|
193
241
|
"<",
|
|
@@ -62,6 +62,76 @@
|
|
|
62
62
|
"description": "Window size, in base pairs."
|
|
63
63
|
}
|
|
64
64
|
]
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"task": "sam.filter.rb",
|
|
68
|
+
"description": ["Filters a SAM or BAM file by target sequences and/or",
|
|
69
|
+
"identity."],
|
|
70
|
+
"see_also": ["anir.rb"],
|
|
71
|
+
"help_arg": "--help",
|
|
72
|
+
"options": [
|
|
73
|
+
{
|
|
74
|
+
"opt": "--genome",
|
|
75
|
+
"arg": "in_file",
|
|
76
|
+
"mandatory": true,
|
|
77
|
+
"description": ["Genome assembly.",
|
|
78
|
+
"Supports compression with .gz extension."]
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"opt": "--mapping",
|
|
82
|
+
"arg": "in_file",
|
|
83
|
+
"mandatory": true,
|
|
84
|
+
"description": ["Mapping file.",
|
|
85
|
+
"Supports compression with .gz extension."]
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"opt": "--out-sam",
|
|
89
|
+
"arg": "out_file",
|
|
90
|
+
"mandatory": true,
|
|
91
|
+
"description": ["Output filtered file in SAM format.",
|
|
92
|
+
"Supports compression with .gz extension."]
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"opt": "--g-format",
|
|
96
|
+
"arg": "select",
|
|
97
|
+
"values": ["fasta", "list"],
|
|
98
|
+
"default": "fasta",
|
|
99
|
+
"description": ["Genome assembly format."]
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"opt": "--m-format",
|
|
103
|
+
"arg": "select",
|
|
104
|
+
"values": ["sam", "bam"],
|
|
105
|
+
"default": "sam",
|
|
106
|
+
"description": ["Mapping file format. SAM supports compression with",
|
|
107
|
+
".gz file extension."]
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"opt": "--identity",
|
|
111
|
+
"arg": "float",
|
|
112
|
+
"description": "Set a fixed threshold of percent identity.",
|
|
113
|
+
"default": 95.0
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"opt": "--no-header",
|
|
117
|
+
"description": "Do not include the headers."
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"opt": "--threads",
|
|
121
|
+
"arg": "integer",
|
|
122
|
+
"description": "Threads to use.",
|
|
123
|
+
"default": 2
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"opt": "--log",
|
|
127
|
+
"arg": "out_file",
|
|
128
|
+
"description": "Log file to save output."
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"opt": "--quiet",
|
|
132
|
+
"description": "Run quietly."
|
|
133
|
+
}
|
|
134
|
+
]
|
|
65
135
|
}
|
|
66
136
|
]
|
|
67
137
|
}
|
|
@@ -824,6 +824,83 @@
|
|
|
824
824
|
"description": "Features to map in GFF."
|
|
825
825
|
}
|
|
826
826
|
]
|
|
827
|
+
},
|
|
828
|
+
{
|
|
829
|
+
"task": "Table.prefScore.R",
|
|
830
|
+
"description": ["Estimate preference score of species based on occupancy",
|
|
831
|
+
"in biased sample sets."],
|
|
832
|
+
"help_arg": "--help",
|
|
833
|
+
"requires": [ { "r_package": "optparse" } ],
|
|
834
|
+
"options": [
|
|
835
|
+
{
|
|
836
|
+
"name": "Occupancy matrix",
|
|
837
|
+
"opt": "--x",
|
|
838
|
+
"arg": "in_file",
|
|
839
|
+
"description": ["A tab-delimited table of presence/absence (1/0)",
|
|
840
|
+
"with species as rows and samples as columns."],
|
|
841
|
+
"mandatory": true
|
|
842
|
+
},
|
|
843
|
+
{
|
|
844
|
+
"name": "Sample set",
|
|
845
|
+
"opt": "--set",
|
|
846
|
+
"arg": "in_file",
|
|
847
|
+
"description": ["A list of sample names that constitute the test",
|
|
848
|
+
"set, one per line."],
|
|
849
|
+
"mandatory": true
|
|
850
|
+
},
|
|
851
|
+
{
|
|
852
|
+
"opt": "--ignore",
|
|
853
|
+
"arg": "in_file",
|
|
854
|
+
"description": ["A list of species to exclude from the analysis,",
|
|
855
|
+
"one per line."]
|
|
856
|
+
},
|
|
857
|
+
{
|
|
858
|
+
"name": "Significance threshold",
|
|
859
|
+
"opt": "--signif-thr",
|
|
860
|
+
"arg": "float",
|
|
861
|
+
"description": "Absolute value of the significance threshold."
|
|
862
|
+
},
|
|
863
|
+
{
|
|
864
|
+
"opt": "--col-above",
|
|
865
|
+
"arg": "string",
|
|
866
|
+
"description": "Color for points significantly above zero.",
|
|
867
|
+
"default": "#941100"
|
|
868
|
+
},
|
|
869
|
+
{
|
|
870
|
+
"opt": "--col-equal",
|
|
871
|
+
"arg": "string",
|
|
872
|
+
"description": ["Color for points not significantly different from",
|
|
873
|
+
"zero."],
|
|
874
|
+
"default": "#BDBDBD"
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
"opt": "--col-below",
|
|
878
|
+
"arg": "string",
|
|
879
|
+
"description": "Color for points significantly below zero.",
|
|
880
|
+
"default": "#2F5496"
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
"name": "Output preference scores",
|
|
884
|
+
"arg": "out_file",
|
|
885
|
+
"description": "Output raw-text file with preference scores.",
|
|
886
|
+
"mandatory": true
|
|
887
|
+
},
|
|
888
|
+
{
|
|
889
|
+
"name": "Graphical utput",
|
|
890
|
+
"arg": "out_file",
|
|
891
|
+
"description": "Output PDF file with preference scores plot."
|
|
892
|
+
},
|
|
893
|
+
{
|
|
894
|
+
"name": "Width",
|
|
895
|
+
"arg": "float",
|
|
896
|
+
"description": "Width of the plot in inches (7 by default)."
|
|
897
|
+
},
|
|
898
|
+
{
|
|
899
|
+
"name": "Height",
|
|
900
|
+
"arg": "float",
|
|
901
|
+
"description": "Height of the plot in inches (7 by default)."
|
|
902
|
+
}
|
|
903
|
+
]
|
|
827
904
|
}
|
|
828
905
|
]
|
|
829
906
|
}
|
|
@@ -362,6 +362,139 @@
|
|
|
362
362
|
}
|
|
363
363
|
]
|
|
364
364
|
},
|
|
365
|
+
{
|
|
366
|
+
"task": "anir.rb",
|
|
367
|
+
"description": ["Estimates ANIr: the Average Nucleotide Identity of",
|
|
368
|
+
"reads against a genome."],
|
|
369
|
+
"help_arg": "--help",
|
|
370
|
+
"see_also": ["ani.rb", "sam.filter.rb"],
|
|
371
|
+
"options": [
|
|
372
|
+
{
|
|
373
|
+
"opt": "--reads",
|
|
374
|
+
"arg": "in_file",
|
|
375
|
+
"description": "Metagenomic reads."
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
"opt": "--genome",
|
|
379
|
+
"arg": "in_file",
|
|
380
|
+
"description": "Genome assembly."
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
"opt": "--mapping",
|
|
384
|
+
"arg": "in_file",
|
|
385
|
+
"description": "Mapping file."
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
"opt": "--list",
|
|
389
|
+
"arg": "in_file",
|
|
390
|
+
"description": "Output file with identities."
|
|
391
|
+
},
|
|
392
|
+
{
|
|
393
|
+
"opt": "--hist",
|
|
394
|
+
"arg": "in_file",
|
|
395
|
+
"description": "Output file with histogram."
|
|
396
|
+
},
|
|
397
|
+
{
|
|
398
|
+
"opt": "--tab",
|
|
399
|
+
"arg": "out_file",
|
|
400
|
+
"description": "Output file with results in tabular format."
|
|
401
|
+
},
|
|
402
|
+
{
|
|
403
|
+
"name": "Reads format",
|
|
404
|
+
"opt": "--r-format",
|
|
405
|
+
"arg": "select",
|
|
406
|
+
"description": ["Metagenomic reads format: fastq or fasta.",
|
|
407
|
+
"Both options support compression with .gz file extension."],
|
|
408
|
+
"values": ["fastq", "fasta"],
|
|
409
|
+
"default": "fastq"
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
"name": "Reads type",
|
|
413
|
+
"opt": "--r-type",
|
|
414
|
+
"arg": "select",
|
|
415
|
+
"description": ["Type of metagenomic reads: Single reads (single),",
|
|
416
|
+
"coupled reads in separate files (-m must be comma-delimited;",
|
|
417
|
+
"coupled), or coupled reads in a single interposed file",
|
|
418
|
+
"(interleaved)."],
|
|
419
|
+
"values": ["single", "coupled", "interleaved"],
|
|
420
|
+
"default": "single"
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
"name": "Genome format",
|
|
424
|
+
"opt": "--g-format",
|
|
425
|
+
"arg": "select",
|
|
426
|
+
"description": ["Genome assembly format: fasta or list.",
|
|
427
|
+
"Both options support compression with .gz file extension.",
|
|
428
|
+
"If passed in mapping-read mode, filters only matches to these",
|
|
429
|
+
"contigs."],
|
|
430
|
+
"values": ["fasta", "list"],
|
|
431
|
+
"default": "fasta"
|
|
432
|
+
},
|
|
433
|
+
{
|
|
434
|
+
"name": "Mapping format",
|
|
435
|
+
"opt": "--m-format",
|
|
436
|
+
"arg": "select",
|
|
437
|
+
"description": ["Mapping file format: sam, bam, tab, or list.",
|
|
438
|
+
"All except bam support compression with .gz file extension."],
|
|
439
|
+
"values": ["sam", "bam", "tab", "list"],
|
|
440
|
+
"default": "sam"
|
|
441
|
+
},
|
|
442
|
+
{
|
|
443
|
+
"opt": "--identity",
|
|
444
|
+
"arg": "float",
|
|
445
|
+
"description": "Set a fixed threshold of percent identity.",
|
|
446
|
+
"default": 95.0
|
|
447
|
+
},
|
|
448
|
+
{
|
|
449
|
+
"opt": "--algorithm",
|
|
450
|
+
"arg": "select",
|
|
451
|
+
"description": ["Set an algorithm to automatically detect identity",
|
|
452
|
+
"threshold: Valley detection by E-M of Gaussian Mixture Model",
|
|
453
|
+
"(gmm), fixed threshold (see Identity; fix),",
|
|
454
|
+
"Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
|
|
455
|
+
"values": ["gmm", "fix", "auto"],
|
|
456
|
+
"default": "auto"
|
|
457
|
+
},
|
|
458
|
+
{
|
|
459
|
+
"opt": "--bimodality",
|
|
460
|
+
"arg": "float",
|
|
461
|
+
"description": ["Threshold of bimodality below which the algorithm",
|
|
462
|
+
"is set to fix. The coefficient used is the de Michele & Accantino",
|
|
463
|
+
"(2014) B index."],
|
|
464
|
+
"default": 0.5
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
"opt": "--coefficient",
|
|
468
|
+
"arg": "select",
|
|
469
|
+
"description": ["Coefficient of bimodality for Algorithm auto: ",
|
|
470
|
+
"Sarle's bimodality coefficient b (sarle), or",
|
|
471
|
+
"de Michele and Accatino (2014 PLoS ONE) B index",
|
|
472
|
+
"(use with Bimodality 0.1, dma)."],
|
|
473
|
+
"values": ["sarle", "dma"],
|
|
474
|
+
"default": "sarle"
|
|
475
|
+
},
|
|
476
|
+
{
|
|
477
|
+
"opt": "--bin-size",
|
|
478
|
+
"arg": "float",
|
|
479
|
+
"description": "Width of histogram bins (in percent identity).",
|
|
480
|
+
"default": 1.0
|
|
481
|
+
},
|
|
482
|
+
{
|
|
483
|
+
"opt": "--threads",
|
|
484
|
+
"arg": "integer",
|
|
485
|
+
"description": "Threads to use."
|
|
486
|
+
},
|
|
487
|
+
{
|
|
488
|
+
"opt": "--log",
|
|
489
|
+
"arg": "out_file",
|
|
490
|
+
"description": "Log file to save output."
|
|
491
|
+
},
|
|
492
|
+
{
|
|
493
|
+
"opt": "--quiet",
|
|
494
|
+
"description": "Run quietly."
|
|
495
|
+
}
|
|
496
|
+
]
|
|
497
|
+
},
|
|
365
498
|
{
|
|
366
499
|
"task": "HMM.haai.rb",
|
|
367
500
|
"description": ["Estimates Average Amino Acid Identity (AAI) from the",
|
|
@@ -407,10 +540,14 @@
|
|
|
407
540
|
"sequences."],
|
|
408
541
|
"help_arg": "--help",
|
|
409
542
|
"cite":[
|
|
543
|
+
["Camacho et al, 2009, BMC Bioinf (BLAST+)",
|
|
544
|
+
"https://doi.org/10.1186/1471-2105-10-421"],
|
|
410
545
|
["Altschul et al, 2000, JMB (BLAST)",
|
|
411
546
|
"http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
|
|
412
547
|
["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
|
|
413
|
-
"https://dx.doi.org/10.1038/nmeth.3176"]
|
|
548
|
+
"https://dx.doi.org/10.1038/nmeth.3176"],
|
|
549
|
+
["Kent, 2002, Genome Res (BLAT)",
|
|
550
|
+
"https://doi.org/10.1101/gr.229202"]
|
|
414
551
|
],
|
|
415
552
|
"options": [
|
|
416
553
|
{
|