miga-base 0.7.26.1 → 1.0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- data/utils/subclade/pipeline.rb +2 -2
- metadata +35 -7
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
data/scripts/trimmed_fasta.bash
CHANGED
@@ -11,43 +11,27 @@ b=$DATASET
|
|
11
11
|
# Initialize
|
12
12
|
miga date > "$DATASET.start"
|
13
13
|
|
14
|
-
#
|
15
|
-
for
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
&& gzip -d "../02.trimmed_reads/$b.$sis.${ext}.fastq.gz"
|
20
|
-
done
|
14
|
+
# FastQ -> FastA
|
15
|
+
for s in 1 2 ; do
|
16
|
+
in="../02.trimmed_reads/${b}.${s}.clipped.fastq.gz"
|
17
|
+
[[ -s "$in" ]] \
|
18
|
+
&& FastQ.maskQual.rb -i "$in" -o "${b}.1.fasta" --fasta --qual 18
|
21
19
|
done
|
22
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
|
23
20
|
|
24
|
-
#
|
25
|
-
|
26
|
-
|
27
|
-
if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
|
28
|
-
awk -f "$FQ2A" < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
|
29
|
-
FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
|
30
|
-
gzip -9 -f "$b.2.fasta"
|
31
|
-
gzip -9 -f "$b.1.fasta"
|
32
|
-
awk -f "$FQ2A" < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
|
33
|
-
> "$b.SingleReads.fa"
|
34
|
-
gzip -9 -f "$b.SingleReads.fa"
|
21
|
+
# Interpose
|
22
|
+
if [[ -e "${b}.2.fasta" ]] ; then
|
23
|
+
FastA.interpose.pl "${b}.CoupledReads.fa" "$b".[12].fasta
|
35
24
|
else
|
36
|
-
mv "$b.1.fasta" "$b.SingleReads.fa"
|
25
|
+
mv "${b}.1.fasta" "${b}.SingleReads.fa"
|
37
26
|
fi
|
38
27
|
|
39
|
-
#
|
40
|
-
for
|
41
|
-
|
42
|
-
|
43
|
-
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
44
|
-
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
|
45
|
-
[[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
|
46
|
-
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
|
28
|
+
# Gzip
|
29
|
+
for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
|
30
|
+
in="${b}.${x}"
|
31
|
+
[[ -e "$in" ]] && gzip -9f "$in"
|
47
32
|
done
|
48
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
|
49
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
|
50
33
|
|
51
34
|
# Finalize
|
52
35
|
miga date > "$DATASET.done"
|
53
36
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
|
37
|
+
|
data/scripts/trimmed_reads.bash
CHANGED
@@ -11,49 +11,49 @@ b=$DATASET
|
|
11
11
|
# Initialize
|
12
12
|
miga date > "$DATASET.start"
|
13
13
|
|
14
|
-
# Unzip (if necessary)
|
15
|
-
[[ -e "../01.raw_reads/$b.1.fastq.gz" && ! -e "../01.raw_reads/$b.1.fastq" ]] \
|
16
|
-
&& gunzip "../01.raw_reads/$b.1.fastq.gz"
|
17
|
-
[[ -e "../01.raw_reads/$b.2.fastq.gz" && ! -e "../01.raw_reads/$b.2.fastq" ]] \
|
18
|
-
&& gunzip "../01.raw_reads/$b.2.fastq.gz"
|
19
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
|
20
|
-
|
21
14
|
# Clean existing files
|
22
15
|
exists "$b".[12].* && rm "$b".[12].*
|
23
16
|
|
17
|
+
# Gzip (if necessary)
|
18
|
+
for s in 1 2 ; do
|
19
|
+
in="../01.raw_reads/${b}.${s}.fastq"
|
20
|
+
if [[ -s "$in" ]] ; then
|
21
|
+
gzip -9f "$in"
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
|
23
|
+
fi
|
24
|
+
done
|
25
|
+
|
24
26
|
# Tag
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
SolexaQA++ lengthsort "$b".[12].clipped.all.fastq -l 50 -d .
|
41
|
-
rm "$b".[12].clipped.all.fastq
|
42
|
-
[[ -e "$b".1.clipped.all.fastq.single ]] \
|
43
|
-
&& mv "$b.1.clipped.all.fastq.single" "$b.1.clipped.single.fastq"
|
44
|
-
[[ -e "$b".2.clipped.all.fastq.single ]] \
|
45
|
-
&& mv "$b.2.clipped.all.fastq.single" "$b.2.clipped.single.fastq"
|
46
|
-
mv "$b.1.clipped.all.fastq.paired" "$b.1.clipped.fastq"
|
47
|
-
mv "$b.2.clipped.all.fastq.paired" "$b.2.clipped.fastq"
|
48
|
-
rm -f "$b.1.clipped.all.fastq.summary.txt"
|
27
|
+
in1="../01.raw_reads/$b.1.fastq.gz"
|
28
|
+
in2="../01.raw_reads/$b.2.fastq.gz"
|
29
|
+
FastQ.tag.rb -i "$in1" -p "$b-" -s "/1" -o "$b.1.fastq.gz"
|
30
|
+
[[ -e "$in2" ]] && FastQ.tag.rb -i "$in2" -p "$b-" -s "/2" -o "$b.2.fastq.gz"
|
31
|
+
|
32
|
+
# Multitrim
|
33
|
+
CMD="multitrim.py --zip gzip --level 9 --threads $CORES -o $b"
|
34
|
+
if [[ -s "$b.2.fastq.gz" ]] ; then
|
35
|
+
# Paired
|
36
|
+
$CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
|
37
|
+
for s in 1 2 ; do
|
38
|
+
mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
|
39
|
+
mv "$b/${s}.pre_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.pre.${s}.html"
|
40
|
+
mv "$b/${s}.post_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.post.${s}.html"
|
41
|
+
done
|
49
42
|
else
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
mv "$b.1.
|
43
|
+
# Unpaired
|
44
|
+
$CMD -u "$b.1.fastq.gz"
|
45
|
+
mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
|
46
|
+
mv "$b/unpaired.pre_trim_QC_${b}.1.html" "../03.read_quality/${b}.pre.1.html"
|
47
|
+
mv "$b/unpaired.post_trim_QC_${b}.1.html" "../03.read_quality/${b}.post.1.html"
|
54
48
|
fi
|
55
|
-
|
49
|
+
mv "$b/Subsample_Adapter_Detection.stats.txt" \
|
50
|
+
"../03.read_quality/$b.adapters.txt"
|
51
|
+
|
52
|
+
# Cleanup
|
53
|
+
rm -r "$b"
|
54
|
+
rm -f "$b".[12].fastq.gz
|
56
55
|
|
57
56
|
# Finalize
|
58
57
|
miga date > "$DATASET.done"
|
59
58
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
|
59
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ParallelTest < Test::Unit::TestCase
|
6
|
+
include TestHelper
|
7
|
+
|
8
|
+
def test_distribute
|
9
|
+
declare_forks
|
10
|
+
|
11
|
+
base = tmpfile('base')
|
12
|
+
assert(!File.exist?("#{base}-3"))
|
13
|
+
MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
|
14
|
+
File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
|
15
|
+
end
|
16
|
+
assert(File.exist?("#{base}-3"))
|
17
|
+
assert(!File.exist?("#{base}-4"))
|
18
|
+
t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
|
19
|
+
assert_equal([0, 0, 1, 1], t.sort)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_thread_enum
|
23
|
+
MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
|
24
|
+
assert_equal('b', o)
|
25
|
+
end
|
26
|
+
|
27
|
+
n = 0
|
28
|
+
MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
|
29
|
+
assert_equal(5, n)
|
30
|
+
end
|
31
|
+
end
|
data/test/project_test.rb
CHANGED
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
|
|
27
27
|
def test_create
|
28
28
|
assert_equal(tmpfile('create'), project('create').path)
|
29
29
|
assert_path_exist(tmpfile('create'))
|
30
|
-
|
30
|
+
err = capture_stderr do
|
31
31
|
ENV['MIGA_HOME'] = tmpfile('chez-moi')
|
32
32
|
project('cuckoo')
|
33
33
|
end
|
34
|
+
assert_match(/Projects cannot be processed yet/, err.string)
|
34
35
|
end
|
35
36
|
|
36
37
|
def test_load
|
data/test/remote_dataset_test.rb
CHANGED
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/
|
50
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
|
51
51
|
n = 'Cjac_L14'
|
52
52
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
53
|
assert_equal([cjac], rd.ids)
|
data/utils/distance/commands.rb
CHANGED
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
|
|
169
169
|
aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
|
170
170
|
end
|
171
171
|
end
|
172
|
+
puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
|
172
173
|
batch_data_to_db(:haai, haai_data)
|
173
174
|
batch_data_to_db(:aai, aai_data)
|
174
175
|
|
data/utils/distance/database.rb
CHANGED
data/utils/distance/runner.rb
CHANGED
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
|
|
18
18
|
@ref_project = MiGA::Project.load(ref_path)
|
19
19
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
20
20
|
elsif !opts[:run_taxonomy] && dataset.option(:db_project)
|
21
|
-
|
22
|
-
|
23
|
-
ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
|
24
|
-
end
|
21
|
+
ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
|
22
|
+
ref_path = File.expand_path(dataset.option(:db_project), ref_location)
|
25
23
|
@ref_project = MiGA::Project.load(ref_path)
|
26
24
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
27
25
|
else
|
@@ -4,7 +4,8 @@
|
|
4
4
|
"task": "FastA.N50.pl",
|
5
5
|
"description": ["Calculates the N50 value of a set of sequences.",
|
6
6
|
"Alternatively, it can calculate other N** values. It also calculates",
|
7
|
-
"the total number of sequences
|
7
|
+
"the total number of sequences, the total added length, and the",
|
8
|
+
"longest sequence length."],
|
8
9
|
"help_arg": "",
|
9
10
|
"see_also": ["FastA.length.pl"],
|
10
11
|
"options": [
|
@@ -354,14 +355,14 @@
|
|
354
355
|
"opt": "--in",
|
355
356
|
"arg": "in_file",
|
356
357
|
"mandatory": true,
|
357
|
-
"description": "Input FastA file."
|
358
|
+
"description": "Input FastA file (supports .gz compression)."
|
358
359
|
},
|
359
360
|
{
|
360
361
|
"name": "Output file",
|
361
362
|
"opt": "--out",
|
362
363
|
"arg": "out_file",
|
363
364
|
"mandatory": true,
|
364
|
-
"description": "Output FastA file."
|
365
|
+
"description": "Output FastA file (supports .gz compression)."
|
365
366
|
},
|
366
367
|
{
|
367
368
|
"opt": "--fraction",
|
@@ -733,6 +734,41 @@
|
|
733
734
|
}
|
734
735
|
]
|
735
736
|
},
|
737
|
+
{
|
738
|
+
"task": "FastA.toFastQ.rb",
|
739
|
+
"description": "Creates a FastQ-compliant file from a FastA file.",
|
740
|
+
"see_also": "FastQ.toFastA.awk",
|
741
|
+
"help_arg": "--help",
|
742
|
+
"options": [
|
743
|
+
{
|
744
|
+
"name": "Input FastA",
|
745
|
+
"opt": "--in",
|
746
|
+
"arg": "in_file",
|
747
|
+
"mandatory": true,
|
748
|
+
"description": "Input FastA file (supports .gz compression)."
|
749
|
+
},
|
750
|
+
{
|
751
|
+
"name": "Output FastQ",
|
752
|
+
"opt": "--out",
|
753
|
+
"arg": "out_file",
|
754
|
+
"mandatory": true,
|
755
|
+
"description": "Output FastQ file (supports .gz compression)."
|
756
|
+
},
|
757
|
+
{
|
758
|
+
"opt": "--quality",
|
759
|
+
"arg": "integer",
|
760
|
+
"default": 31,
|
761
|
+
"description": ["PHRED quality score to use (fixed), in the range",
|
762
|
+
"[-5, 41]."]
|
763
|
+
},
|
764
|
+
{
|
765
|
+
"opt": "--encoding",
|
766
|
+
"arg": "integer",
|
767
|
+
"default": 33,
|
768
|
+
"description": "Base encoding (33 or 64)."
|
769
|
+
}
|
770
|
+
]
|
771
|
+
},
|
736
772
|
{
|
737
773
|
"task": "FastA.wrap.rb",
|
738
774
|
"description": "Wraps sequences in a FastA to a given line length.",
|
@@ -81,6 +81,47 @@
|
|
81
81
|
}
|
82
82
|
]
|
83
83
|
},
|
84
|
+
{
|
85
|
+
"task": "FastQ.maskQual.rb",
|
86
|
+
"description": "Masks low-quality bases in a FastQ file.",
|
87
|
+
"help_arg": "--help",
|
88
|
+
"options": [
|
89
|
+
{
|
90
|
+
"opt": "--input",
|
91
|
+
"arg": "in_file",
|
92
|
+
"mandatory": true,
|
93
|
+
"description": ["Path to the FastQ file containing the sequences.",
|
94
|
+
"Supports compression with .gz extension."]
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"opt": "--output",
|
98
|
+
"arg": "out_file",
|
99
|
+
"mandatory": true,
|
100
|
+
"description": ["Path to the output FastQ file.",
|
101
|
+
"Supports compression with .gz extension."]
|
102
|
+
},
|
103
|
+
{
|
104
|
+
"opt": "--qual",
|
105
|
+
"arg": "integer",
|
106
|
+
"default": 15,
|
107
|
+
"description": "Minimum quality score to allow a base."
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"opt": "--offset",
|
111
|
+
"arg": "integer",
|
112
|
+
"default": 33,
|
113
|
+
"description": "Q-score offset."
|
114
|
+
},
|
115
|
+
{
|
116
|
+
"opt": "--fasta",
|
117
|
+
"description": "Output sequences in FastA format."
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"opt": "--quiet",
|
121
|
+
"description": "Run quietly."
|
122
|
+
}
|
123
|
+
]
|
124
|
+
},
|
84
125
|
{
|
85
126
|
"task": "FastQ.offset.pl",
|
86
127
|
"description": ["There are several FastQ formats. This script takes a",
|
@@ -160,14 +201,20 @@
|
|
160
201
|
"opt": "--in",
|
161
202
|
"arg": "in_file",
|
162
203
|
"mandatory": true,
|
163
|
-
"description":
|
204
|
+
"description": [
|
205
|
+
"FastQ file containing the sequences.",
|
206
|
+
"Supports compression with .gz extension."
|
207
|
+
]
|
164
208
|
},
|
165
209
|
{
|
166
210
|
"name": "Output file",
|
167
211
|
"opt": "--out",
|
168
212
|
"arg": "out_file",
|
169
213
|
"mandatory": true,
|
170
|
-
"description":
|
214
|
+
"description": [
|
215
|
+
"FastQ to create.",
|
216
|
+
"Supports compression with .gz extension."
|
217
|
+
]
|
171
218
|
},
|
172
219
|
{
|
173
220
|
"opt": "--prefix",
|
@@ -188,6 +235,7 @@
|
|
188
235
|
{
|
189
236
|
"task": "FastQ.toFastA.awk",
|
190
237
|
"description": "Translates FastQ files into FastA.",
|
238
|
+
"see_also": "FastA.toFastQ.rb",
|
191
239
|
"help_arg": "'' --help",
|
192
240
|
"options": [
|
193
241
|
"<",
|
@@ -62,6 +62,76 @@
|
|
62
62
|
"description": "Window size, in base pairs."
|
63
63
|
}
|
64
64
|
]
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"task": "sam.filter.rb",
|
68
|
+
"description": ["Filters a SAM or BAM file by target sequences and/or",
|
69
|
+
"identity."],
|
70
|
+
"see_also": ["anir.rb"],
|
71
|
+
"help_arg": "--help",
|
72
|
+
"options": [
|
73
|
+
{
|
74
|
+
"opt": "--genome",
|
75
|
+
"arg": "in_file",
|
76
|
+
"mandatory": true,
|
77
|
+
"description": ["Genome assembly.",
|
78
|
+
"Supports compression with .gz extension."]
|
79
|
+
},
|
80
|
+
{
|
81
|
+
"opt": "--mapping",
|
82
|
+
"arg": "in_file",
|
83
|
+
"mandatory": true,
|
84
|
+
"description": ["Mapping file.",
|
85
|
+
"Supports compression with .gz extension."]
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"opt": "--out-sam",
|
89
|
+
"arg": "out_file",
|
90
|
+
"mandatory": true,
|
91
|
+
"description": ["Output filtered file in SAM format.",
|
92
|
+
"Supports compression with .gz extension."]
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"opt": "--g-format",
|
96
|
+
"arg": "select",
|
97
|
+
"values": ["fasta", "list"],
|
98
|
+
"default": "fasta",
|
99
|
+
"description": ["Genome assembly format."]
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"opt": "--m-format",
|
103
|
+
"arg": "select",
|
104
|
+
"values": ["sam", "bam"],
|
105
|
+
"default": "sam",
|
106
|
+
"description": ["Mapping file format. SAM supports compression with",
|
107
|
+
".gz file extension."]
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"opt": "--identity",
|
111
|
+
"arg": "float",
|
112
|
+
"description": "Set a fixed threshold of percent identity.",
|
113
|
+
"default": 95.0
|
114
|
+
},
|
115
|
+
{
|
116
|
+
"opt": "--no-header",
|
117
|
+
"description": "Do not include the headers."
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"opt": "--threads",
|
121
|
+
"arg": "integer",
|
122
|
+
"description": "Threads to use.",
|
123
|
+
"default": 2
|
124
|
+
},
|
125
|
+
{
|
126
|
+
"opt": "--log",
|
127
|
+
"arg": "out_file",
|
128
|
+
"description": "Log file to save output."
|
129
|
+
},
|
130
|
+
{
|
131
|
+
"opt": "--quiet",
|
132
|
+
"description": "Run quietly."
|
133
|
+
}
|
134
|
+
]
|
65
135
|
}
|
66
136
|
]
|
67
137
|
}
|