miga-base 0.7.26.3 → 1.0.0.sr1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ParallelTest < Test::Unit::TestCase
|
6
|
+
include TestHelper
|
7
|
+
|
8
|
+
def test_distribute
|
9
|
+
declare_forks
|
10
|
+
|
11
|
+
base = tmpfile('base')
|
12
|
+
assert(!File.exist?("#{base}-3"))
|
13
|
+
MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
|
14
|
+
File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
|
15
|
+
end
|
16
|
+
assert(File.exist?("#{base}-3"))
|
17
|
+
assert(!File.exist?("#{base}-4"))
|
18
|
+
t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
|
19
|
+
assert_equal([0, 0, 1, 1], t.sort)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_thread_enum
|
23
|
+
MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
|
24
|
+
assert_equal('b', o)
|
25
|
+
end
|
26
|
+
|
27
|
+
n = 0
|
28
|
+
MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
|
29
|
+
assert_equal(5, n)
|
30
|
+
end
|
31
|
+
end
|
data/test/project_test.rb
CHANGED
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
|
|
27
27
|
def test_create
|
28
28
|
assert_equal(tmpfile('create'), project('create').path)
|
29
29
|
assert_path_exist(tmpfile('create'))
|
30
|
-
|
30
|
+
err = capture_stderr do
|
31
31
|
ENV['MIGA_HOME'] = tmpfile('chez-moi')
|
32
32
|
project('cuckoo')
|
33
33
|
end
|
34
|
+
assert_match(/Projects cannot be processed yet/, err.string)
|
34
35
|
end
|
35
36
|
|
36
37
|
def test_load
|
data/utils/distance/commands.rb
CHANGED
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
|
|
169
169
|
aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
|
170
170
|
end
|
171
171
|
end
|
172
|
+
puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
|
172
173
|
batch_data_to_db(:haai, haai_data)
|
173
174
|
batch_data_to_db(:aai, aai_data)
|
174
175
|
|
data/utils/distance/runner.rb
CHANGED
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
|
|
18
18
|
@ref_project = MiGA::Project.load(ref_path)
|
19
19
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
20
20
|
elsif !opts[:run_taxonomy] && dataset.option(:db_project)
|
21
|
-
|
22
|
-
|
23
|
-
ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
|
24
|
-
end
|
21
|
+
ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
|
22
|
+
ref_path = File.expand_path(dataset.option(:db_project), ref_location)
|
25
23
|
@ref_project = MiGA::Project.load(ref_path)
|
26
24
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
27
25
|
else
|
@@ -4,7 +4,8 @@
|
|
4
4
|
"task": "FastA.N50.pl",
|
5
5
|
"description": ["Calculates the N50 value of a set of sequences.",
|
6
6
|
"Alternatively, it can calculate other N** values. It also calculates",
|
7
|
-
"the total number of sequences
|
7
|
+
"the total number of sequences, the total added length, and the",
|
8
|
+
"longest sequence length."],
|
8
9
|
"help_arg": "",
|
9
10
|
"see_also": ["FastA.length.pl"],
|
10
11
|
"options": [
|
@@ -354,14 +355,14 @@
|
|
354
355
|
"opt": "--in",
|
355
356
|
"arg": "in_file",
|
356
357
|
"mandatory": true,
|
357
|
-
"description": "Input FastA file."
|
358
|
+
"description": "Input FastA file (supports .gz compression)."
|
358
359
|
},
|
359
360
|
{
|
360
361
|
"name": "Output file",
|
361
362
|
"opt": "--out",
|
362
363
|
"arg": "out_file",
|
363
364
|
"mandatory": true,
|
364
|
-
"description": "Output FastA file."
|
365
|
+
"description": "Output FastA file (supports .gz compression)."
|
365
366
|
},
|
366
367
|
{
|
367
368
|
"opt": "--fraction",
|
@@ -733,6 +734,41 @@
|
|
733
734
|
}
|
734
735
|
]
|
735
736
|
},
|
737
|
+
{
|
738
|
+
"task": "FastA.toFastQ.rb",
|
739
|
+
"description": "Creates a FastQ-compliant file from a FastA file.",
|
740
|
+
"see_also": "FastQ.toFastA.awk",
|
741
|
+
"help_arg": "--help",
|
742
|
+
"options": [
|
743
|
+
{
|
744
|
+
"name": "Input FastA",
|
745
|
+
"opt": "--in",
|
746
|
+
"arg": "in_file",
|
747
|
+
"mandatory": true,
|
748
|
+
"description": "Input FastA file (supports .gz compression)."
|
749
|
+
},
|
750
|
+
{
|
751
|
+
"name": "Output FastQ",
|
752
|
+
"opt": "--out",
|
753
|
+
"arg": "out_file",
|
754
|
+
"mandatory": true,
|
755
|
+
"description": "Output FastQ file (supports .gz compression)."
|
756
|
+
},
|
757
|
+
{
|
758
|
+
"opt": "--quality",
|
759
|
+
"arg": "integer",
|
760
|
+
"default": 31,
|
761
|
+
"description": ["PHRED quality score to use (fixed), in the range",
|
762
|
+
"[-5, 41]."]
|
763
|
+
},
|
764
|
+
{
|
765
|
+
"opt": "--encoding",
|
766
|
+
"arg": "integer",
|
767
|
+
"default": 33,
|
768
|
+
"description": "Base encoding (33 or 64)."
|
769
|
+
}
|
770
|
+
]
|
771
|
+
},
|
736
772
|
{
|
737
773
|
"task": "FastA.wrap.rb",
|
738
774
|
"description": "Wraps sequences in a FastA to a given line length.",
|
@@ -81,6 +81,47 @@
|
|
81
81
|
}
|
82
82
|
]
|
83
83
|
},
|
84
|
+
{
|
85
|
+
"task": "FastQ.maskQual.rb",
|
86
|
+
"description": "Masks low-quality bases in a FastQ file.",
|
87
|
+
"help_arg": "--help",
|
88
|
+
"options": [
|
89
|
+
{
|
90
|
+
"opt": "--input",
|
91
|
+
"arg": "in_file",
|
92
|
+
"mandatory": true,
|
93
|
+
"description": ["Path to the FastQ file containing the sequences.",
|
94
|
+
"Supports compression with .gz extension."]
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"opt": "--output",
|
98
|
+
"arg": "out_file",
|
99
|
+
"mandatory": true,
|
100
|
+
"description": ["Path to the output FastQ file.",
|
101
|
+
"Supports compression with .gz extension."]
|
102
|
+
},
|
103
|
+
{
|
104
|
+
"opt": "--qual",
|
105
|
+
"arg": "integer",
|
106
|
+
"default": 15,
|
107
|
+
"description": "Minimum quality score to allow a base."
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"opt": "--offset",
|
111
|
+
"arg": "integer",
|
112
|
+
"default": 33,
|
113
|
+
"description": "Q-score offset."
|
114
|
+
},
|
115
|
+
{
|
116
|
+
"opt": "--fasta",
|
117
|
+
"description": "Output sequences in FastA format."
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"opt": "--quiet",
|
121
|
+
"description": "Run quietly."
|
122
|
+
}
|
123
|
+
]
|
124
|
+
},
|
84
125
|
{
|
85
126
|
"task": "FastQ.offset.pl",
|
86
127
|
"description": ["There are several FastQ formats. This script takes a",
|
@@ -160,14 +201,20 @@
|
|
160
201
|
"opt": "--in",
|
161
202
|
"arg": "in_file",
|
162
203
|
"mandatory": true,
|
163
|
-
"description":
|
204
|
+
"description": [
|
205
|
+
"FastQ file containing the sequences.",
|
206
|
+
"Supports compression with .gz extension."
|
207
|
+
]
|
164
208
|
},
|
165
209
|
{
|
166
210
|
"name": "Output file",
|
167
211
|
"opt": "--out",
|
168
212
|
"arg": "out_file",
|
169
213
|
"mandatory": true,
|
170
|
-
"description":
|
214
|
+
"description": [
|
215
|
+
"FastQ to create.",
|
216
|
+
"Supports compression with .gz extension."
|
217
|
+
]
|
171
218
|
},
|
172
219
|
{
|
173
220
|
"opt": "--prefix",
|
@@ -188,6 +235,7 @@
|
|
188
235
|
{
|
189
236
|
"task": "FastQ.toFastA.awk",
|
190
237
|
"description": "Translates FastQ files into FastA.",
|
238
|
+
"see_also": "FastA.toFastQ.rb",
|
191
239
|
"help_arg": "'' --help",
|
192
240
|
"options": [
|
193
241
|
"<",
|
@@ -62,6 +62,76 @@
|
|
62
62
|
"description": "Window size, in base pairs."
|
63
63
|
}
|
64
64
|
]
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"task": "sam.filter.rb",
|
68
|
+
"description": ["Filters a SAM or BAM file by target sequences and/or",
|
69
|
+
"identity."],
|
70
|
+
"see_also": ["anir.rb"],
|
71
|
+
"help_arg": "--help",
|
72
|
+
"options": [
|
73
|
+
{
|
74
|
+
"opt": "--genome",
|
75
|
+
"arg": "in_file",
|
76
|
+
"mandatory": true,
|
77
|
+
"description": ["Genome assembly.",
|
78
|
+
"Supports compression with .gz extension."]
|
79
|
+
},
|
80
|
+
{
|
81
|
+
"opt": "--mapping",
|
82
|
+
"arg": "in_file",
|
83
|
+
"mandatory": true,
|
84
|
+
"description": ["Mapping file.",
|
85
|
+
"Supports compression with .gz extension."]
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"opt": "--out-sam",
|
89
|
+
"arg": "out_file",
|
90
|
+
"mandatory": true,
|
91
|
+
"description": ["Output filtered file in SAM format.",
|
92
|
+
"Supports compression with .gz extension."]
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"opt": "--g-format",
|
96
|
+
"arg": "select",
|
97
|
+
"values": ["fasta", "list"],
|
98
|
+
"default": "fasta",
|
99
|
+
"description": ["Genome assembly format."]
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"opt": "--m-format",
|
103
|
+
"arg": "select",
|
104
|
+
"values": ["sam", "bam"],
|
105
|
+
"default": "sam",
|
106
|
+
"description": ["Mapping file format. SAM supports compression with",
|
107
|
+
".gz file extension."]
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"opt": "--identity",
|
111
|
+
"arg": "float",
|
112
|
+
"description": "Set a fixed threshold of percent identity.",
|
113
|
+
"default": 95.0
|
114
|
+
},
|
115
|
+
{
|
116
|
+
"opt": "--no-header",
|
117
|
+
"description": "Do not include the headers."
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"opt": "--threads",
|
121
|
+
"arg": "integer",
|
122
|
+
"description": "Threads to use.",
|
123
|
+
"default": 2
|
124
|
+
},
|
125
|
+
{
|
126
|
+
"opt": "--log",
|
127
|
+
"arg": "out_file",
|
128
|
+
"description": "Log file to save output."
|
129
|
+
},
|
130
|
+
{
|
131
|
+
"opt": "--quiet",
|
132
|
+
"description": "Run quietly."
|
133
|
+
}
|
134
|
+
]
|
65
135
|
}
|
66
136
|
]
|
67
137
|
}
|
@@ -824,6 +824,83 @@
|
|
824
824
|
"description": "Features to map in GFF."
|
825
825
|
}
|
826
826
|
]
|
827
|
+
},
|
828
|
+
{
|
829
|
+
"task": "Table.prefScore.R",
|
830
|
+
"description": ["Estimate preference score of species based on occupancy",
|
831
|
+
"in biased sample sets."],
|
832
|
+
"help_arg": "--help",
|
833
|
+
"requires": [ { "r_package": "optparse" } ],
|
834
|
+
"options": [
|
835
|
+
{
|
836
|
+
"name": "Occupancy matrix",
|
837
|
+
"opt": "--x",
|
838
|
+
"arg": "in_file",
|
839
|
+
"description": ["A tab-delimited table of presence/absence (1/0)",
|
840
|
+
"with species as rows and samples as columns."],
|
841
|
+
"mandatory": true
|
842
|
+
},
|
843
|
+
{
|
844
|
+
"name": "Sample set",
|
845
|
+
"opt": "--set",
|
846
|
+
"arg": "in_file",
|
847
|
+
"description": ["A list of sample names that constitute the test",
|
848
|
+
"set, one per line."],
|
849
|
+
"mandatory": true
|
850
|
+
},
|
851
|
+
{
|
852
|
+
"opt": "--ignore",
|
853
|
+
"arg": "in_file",
|
854
|
+
"description": ["A list of species to exclude from the analysis,",
|
855
|
+
"one per line."]
|
856
|
+
},
|
857
|
+
{
|
858
|
+
"name": "Significance threshold",
|
859
|
+
"opt": "--signif-thr",
|
860
|
+
"arg": "float",
|
861
|
+
"description": "Absolute value of the significance threshold."
|
862
|
+
},
|
863
|
+
{
|
864
|
+
"opt": "--col-above",
|
865
|
+
"arg": "string",
|
866
|
+
"description": "Color for points significantly above zero.",
|
867
|
+
"default": "#941100"
|
868
|
+
},
|
869
|
+
{
|
870
|
+
"opt": "--col-equal",
|
871
|
+
"arg": "string",
|
872
|
+
"description": ["Color for points not significantly different from",
|
873
|
+
"zero."],
|
874
|
+
"default": "#BDBDBD"
|
875
|
+
},
|
876
|
+
{
|
877
|
+
"opt": "--col-below",
|
878
|
+
"arg": "string",
|
879
|
+
"description": "Color for points significantly below zero.",
|
880
|
+
"default": "#2F5496"
|
881
|
+
},
|
882
|
+
{
|
883
|
+
"name": "Output preference scores",
|
884
|
+
"arg": "out_file",
|
885
|
+
"description": "Output raw-text file with preference scores.",
|
886
|
+
"mandatory": true
|
887
|
+
},
|
888
|
+
{
|
889
|
+
"name": "Graphical utput",
|
890
|
+
"arg": "out_file",
|
891
|
+
"description": "Output PDF file with preference scores plot."
|
892
|
+
},
|
893
|
+
{
|
894
|
+
"name": "Width",
|
895
|
+
"arg": "float",
|
896
|
+
"description": "Width of the plot in inches (7 by default)."
|
897
|
+
},
|
898
|
+
{
|
899
|
+
"name": "Height",
|
900
|
+
"arg": "float",
|
901
|
+
"description": "Height of the plot in inches (7 by default)."
|
902
|
+
}
|
903
|
+
]
|
827
904
|
}
|
828
905
|
]
|
829
906
|
}
|
@@ -362,6 +362,139 @@
|
|
362
362
|
}
|
363
363
|
]
|
364
364
|
},
|
365
|
+
{
|
366
|
+
"task": "anir.rb",
|
367
|
+
"description": ["Estimates ANIr: the Average Nucleotide Identity of",
|
368
|
+
"reads against a genome."],
|
369
|
+
"help_arg": "--help",
|
370
|
+
"see_also": ["ani.rb", "sam.filter.rb"],
|
371
|
+
"options": [
|
372
|
+
{
|
373
|
+
"opt": "--reads",
|
374
|
+
"arg": "in_file",
|
375
|
+
"description": "Metagenomic reads."
|
376
|
+
},
|
377
|
+
{
|
378
|
+
"opt": "--genome",
|
379
|
+
"arg": "in_file",
|
380
|
+
"description": "Genome assembly."
|
381
|
+
},
|
382
|
+
{
|
383
|
+
"opt": "--mapping",
|
384
|
+
"arg": "in_file",
|
385
|
+
"description": "Mapping file."
|
386
|
+
},
|
387
|
+
{
|
388
|
+
"opt": "--list",
|
389
|
+
"arg": "in_file",
|
390
|
+
"description": "Output file with identities."
|
391
|
+
},
|
392
|
+
{
|
393
|
+
"opt": "--hist",
|
394
|
+
"arg": "in_file",
|
395
|
+
"description": "Output file with histogram."
|
396
|
+
},
|
397
|
+
{
|
398
|
+
"opt": "--tab",
|
399
|
+
"arg": "out_file",
|
400
|
+
"description": "Output file with results in tabular format."
|
401
|
+
},
|
402
|
+
{
|
403
|
+
"name": "Reads format",
|
404
|
+
"opt": "--r-format",
|
405
|
+
"arg": "select",
|
406
|
+
"description": ["Metagenomic reads format: fastq or fasta.",
|
407
|
+
"Both options support compression with .gz file extension."],
|
408
|
+
"values": ["fastq", "fasta"],
|
409
|
+
"default": "fastq"
|
410
|
+
},
|
411
|
+
{
|
412
|
+
"name": "Reads type",
|
413
|
+
"opt": "--r-type",
|
414
|
+
"arg": "select",
|
415
|
+
"description": ["Type of metagenomic reads: Single reads (single),",
|
416
|
+
"coupled reads in separate files (-m must be comma-delimited;",
|
417
|
+
"coupled), or coupled reads in a single interposed file",
|
418
|
+
"(interleaved)."],
|
419
|
+
"values": ["single", "coupled", "interleaved"],
|
420
|
+
"default": "single"
|
421
|
+
},
|
422
|
+
{
|
423
|
+
"name": "Genome format",
|
424
|
+
"opt": "--g-format",
|
425
|
+
"arg": "select",
|
426
|
+
"description": ["Genome assembly format: fasta or list.",
|
427
|
+
"Both options support compression with .gz file extension.",
|
428
|
+
"If passed in mapping-read mode, filters only matches to these",
|
429
|
+
"contigs."],
|
430
|
+
"values": ["fasta", "list"],
|
431
|
+
"default": "fasta"
|
432
|
+
},
|
433
|
+
{
|
434
|
+
"name": "Mapping format",
|
435
|
+
"opt": "--m-format",
|
436
|
+
"arg": "select",
|
437
|
+
"description": ["Mapping file format: sam, bam, tab, or list.",
|
438
|
+
"All except bam support compression with .gz file extension."],
|
439
|
+
"values": ["sam", "bam", "tab", "list"],
|
440
|
+
"default": "sam"
|
441
|
+
},
|
442
|
+
{
|
443
|
+
"opt": "--identity",
|
444
|
+
"arg": "float",
|
445
|
+
"description": "Set a fixed threshold of percent identity.",
|
446
|
+
"default": 95.0
|
447
|
+
},
|
448
|
+
{
|
449
|
+
"opt": "--algorithm",
|
450
|
+
"arg": "select",
|
451
|
+
"description": ["Set an algorithm to automatically detect identity",
|
452
|
+
"threshold: Valley detection by E-M of Gaussian Mixture Model",
|
453
|
+
"(gmm), fixed threshold (see Identity; fix),",
|
454
|
+
"Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
|
455
|
+
"values": ["gmm", "fix", "auto"],
|
456
|
+
"default": "auto"
|
457
|
+
},
|
458
|
+
{
|
459
|
+
"opt": "--bimodality",
|
460
|
+
"arg": "float",
|
461
|
+
"description": ["Threshold of bimodality below which the algorithm",
|
462
|
+
"is set to fix. The coefficient used is the de Michele & Accantino",
|
463
|
+
"(2014) B index."],
|
464
|
+
"default": 0.5
|
465
|
+
},
|
466
|
+
{
|
467
|
+
"opt": "--coefficient",
|
468
|
+
"arg": "select",
|
469
|
+
"description": ["Coefficient of bimodality for Algorithm auto: ",
|
470
|
+
"Sarle's bimodality coefficient b (sarle), or",
|
471
|
+
"de Michele and Accatino (2014 PLoS ONE) B index",
|
472
|
+
"(use with Bimodality 0.1, dma)."],
|
473
|
+
"values": ["sarle", "dma"],
|
474
|
+
"default": "sarle"
|
475
|
+
},
|
476
|
+
{
|
477
|
+
"opt": "--bin-size",
|
478
|
+
"arg": "float",
|
479
|
+
"description": "Width of histogram bins (in percent identity).",
|
480
|
+
"default": 1.0
|
481
|
+
},
|
482
|
+
{
|
483
|
+
"opt": "--threads",
|
484
|
+
"arg": "integer",
|
485
|
+
"description": "Threads to use."
|
486
|
+
},
|
487
|
+
{
|
488
|
+
"opt": "--log",
|
489
|
+
"arg": "out_file",
|
490
|
+
"description": "Log file to save output."
|
491
|
+
},
|
492
|
+
{
|
493
|
+
"opt": "--quiet",
|
494
|
+
"description": "Run quietly."
|
495
|
+
}
|
496
|
+
]
|
497
|
+
},
|
365
498
|
{
|
366
499
|
"task": "HMM.haai.rb",
|
367
500
|
"description": ["Estimates Average Amino Acid Identity (AAI) from the",
|
@@ -407,10 +540,14 @@
|
|
407
540
|
"sequences."],
|
408
541
|
"help_arg": "--help",
|
409
542
|
"cite":[
|
543
|
+
["Camacho et al, 2009, BMC Bioinf (BLAST+)",
|
544
|
+
"https://doi.org/10.1186/1471-2105-10-421"],
|
410
545
|
["Altschul et al, 2000, JMB (BLAST)",
|
411
546
|
"http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
|
412
547
|
["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
|
413
|
-
"https://dx.doi.org/10.1038/nmeth.3176"]
|
548
|
+
"https://dx.doi.org/10.1038/nmeth.3176"],
|
549
|
+
["Kent, 2002, Genome Res (BLAT)",
|
550
|
+
"https://doi.org/10.1101/gr.229202"]
|
414
551
|
],
|
415
552
|
"options": [
|
416
553
|
{
|