miga-base 0.7.26.1 → 1.0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/distance/commands.rb +1 -0
  39. data/utils/distance/database.rb +0 -1
  40. data/utils/distance/runner.rb +2 -4
  41. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  42. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  43. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  44. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  45. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  46. data/utils/enveomics/Manifest/categories.json +13 -4
  47. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  48. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  49. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  50. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  51. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  52. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  53. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  54. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  55. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  56. data/utils/enveomics/Scripts/aai.rb +3 -2
  57. data/utils/enveomics/Scripts/anir.rb +137 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  64. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  65. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  66. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  67. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  68. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  69. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  70. data/utils/enveomics/Scripts/rbm.rb +87 -133
  71. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  72. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  73. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  74. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  75. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  76. data/utils/enveomics/enveomics.R/README.md +1 -0
  77. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  78. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  79. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  80. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  81. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  82. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  83. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  84. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  85. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  86. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  87. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  88. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  89. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  98. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  99. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  100. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  101. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  102. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  103. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  104. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  105. data/utils/multitrim/README.md +67 -0
  106. data/utils/multitrim/multitrim.py +1555 -0
  107. data/utils/multitrim/multitrim.yml +13 -0
  108. data/utils/requirements.txt +4 -3
  109. data/utils/subclade/pipeline.rb +2 -2
  110. metadata +35 -7
  111. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -11,43 +11,27 @@ b=$DATASET
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
13
 
14
- # Gunzip (if necessary)
15
- for sis in 1 2 ; do
16
- for ext in clipped clipped.single ; do
17
- [[ -e "../02.trimmed_reads/$b.$sis.${ext}.fastq.gz" \
18
- && ! -e "../02.trimmed_reads/$b.$sis.${ext}.fastq" ]] \
19
- && gzip -d "../02.trimmed_reads/$b.$sis.${ext}.fastq.gz"
20
- done
14
+ # FastQ -> FastA
15
+ for s in 1 2 ; do
16
+ in="../02.trimmed_reads/${b}.${s}.clipped.fastq.gz"
17
+ [[ -s "$in" ]] \
18
+ && FastQ.maskQual.rb -i "$in" -o "${b}.1.fasta" --fasta --qual 18
21
19
  done
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
23
20
 
24
- # FastQ -> FastA
25
- FQ2A="$MIGA/utils/enveomics/Scripts/FastQ.toFastA.awk"
26
- awk -f "$FQ2A" < "../02.trimmed_reads/$b.1.clipped.fastq" > "$b.1.fasta"
27
- if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
28
- awk -f "$FQ2A" < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
29
- FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
30
- gzip -9 -f "$b.2.fasta"
31
- gzip -9 -f "$b.1.fasta"
32
- awk -f "$FQ2A" < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
33
- > "$b.SingleReads.fa"
34
- gzip -9 -f "$b.SingleReads.fa"
21
+ # Interpose
22
+ if [[ -e "${b}.2.fasta" ]] ; then
23
+ FastA.interpose.pl "${b}.CoupledReads.fa" "$b".[12].fasta
35
24
  else
36
- mv "$b.1.fasta" "$b.SingleReads.fa"
25
+ mv "${b}.1.fasta" "${b}.SingleReads.fa"
37
26
  fi
38
27
 
39
- # Compress input at 01.raw_reads and 02.trimmed_reads
40
- for sis in 1 2 ; do
41
- [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
42
- && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
43
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
44
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
45
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
46
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
28
+ # Gzip
29
+ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
30
+ in="${b}.${x}"
31
+ [[ -e "$in" ]] && gzip -9f "$in"
47
32
  done
48
- miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
49
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
50
33
 
51
34
  # Finalize
52
35
  miga date > "$DATASET.done"
53
36
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
37
+
@@ -11,49 +11,49 @@ b=$DATASET
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
13
 
14
- # Unzip (if necessary)
15
- [[ -e "../01.raw_reads/$b.1.fastq.gz" && ! -e "../01.raw_reads/$b.1.fastq" ]] \
16
- && gunzip "../01.raw_reads/$b.1.fastq.gz"
17
- [[ -e "../01.raw_reads/$b.2.fastq.gz" && ! -e "../01.raw_reads/$b.2.fastq" ]] \
18
- && gunzip "../01.raw_reads/$b.2.fastq.gz"
19
- miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
20
-
21
14
  # Clean existing files
22
15
  exists "$b".[12].* && rm "$b".[12].*
23
16
 
17
+ # Gzip (if necessary)
18
+ for s in 1 2 ; do
19
+ in="../01.raw_reads/${b}.${s}.fastq"
20
+ if [[ -s "$in" ]] ; then
21
+ gzip -9f "$in"
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
23
+ fi
24
+ done
25
+
24
26
  # Tag
25
- FastQ.tag.rb -i "../01.raw_reads/$b.1.fastq" -p "$b-" -s "/1" -o "$b.1.fastq"
26
- [[ -e "../01.raw_reads/$b.2.fastq" ]] \
27
- && FastQ.tag.rb -i "../01.raw_reads/$b.2.fastq" -p "$b-" -s "/2" \
28
- -o "$b.2.fastq"
29
-
30
- # Trim
31
- SolexaQA++ dynamictrim "$b".[12].fastq -h 20 -d .
32
- SolexaQA++ lengthsort "$b".[12].fastq.trimmed -l 50 -d .
33
-
34
- # Clean adapters
35
- if [[ -e "$b.2.fastq.trimmed.paired" ]] ; then
36
- scythe -a "$MIGA/utils/adapters.fa" "$b.1.fastq.trimmed.paired" \
37
- > "$b.1.clipped.all.fastq"
38
- scythe -a "$MIGA/utils/adapters.fa" "$b.2.fastq.trimmed.paired" \
39
- > "$b.2.clipped.all.fastq"
40
- SolexaQA++ lengthsort "$b".[12].clipped.all.fastq -l 50 -d .
41
- rm "$b".[12].clipped.all.fastq
42
- [[ -e "$b".1.clipped.all.fastq.single ]] \
43
- && mv "$b.1.clipped.all.fastq.single" "$b.1.clipped.single.fastq"
44
- [[ -e "$b".2.clipped.all.fastq.single ]] \
45
- && mv "$b.2.clipped.all.fastq.single" "$b.2.clipped.single.fastq"
46
- mv "$b.1.clipped.all.fastq.paired" "$b.1.clipped.fastq"
47
- mv "$b.2.clipped.all.fastq.paired" "$b.2.clipped.fastq"
48
- rm -f "$b.1.clipped.all.fastq.summary.txt"
27
+ in1="../01.raw_reads/$b.1.fastq.gz"
28
+ in2="../01.raw_reads/$b.2.fastq.gz"
29
+ FastQ.tag.rb -i "$in1" -p "$b-" -s "/1" -o "$b.1.fastq.gz"
30
+ [[ -e "$in2" ]] && FastQ.tag.rb -i "$in2" -p "$b-" -s "/2" -o "$b.2.fastq.gz"
31
+
32
+ # Multitrim
33
+ CMD="multitrim.py --zip gzip --level 9 --threads $CORES -o $b"
34
+ if [[ -s "$b.2.fastq.gz" ]] ; then
35
+ # Paired
36
+ $CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
37
+ for s in 1 2 ; do
38
+ mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
39
+ mv "$b/${s}.pre_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.pre.${s}.html"
40
+ mv "$b/${s}.post_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.post.${s}.html"
41
+ done
49
42
  else
50
- scythe -a "$MIGA/utils/adapters.fa" "$b.1.fastq.trimmed.single" \
51
- > "$b.1.clipped.all.fastq"
52
- SolexaQA++ lengthsort "$b.1.clipped.all.fastq" -l 50 -d .
53
- mv "$b.1.clipped.all.fastq.single" "$b.1.clipped.fastq"
43
+ # Unpaired
44
+ $CMD -u "$b.1.fastq.gz"
45
+ mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
46
+ mv "$b/unpaired.pre_trim_QC_${b}.1.html" "../03.read_quality/${b}.pre.1.html"
47
+ mv "$b/unpaired.post_trim_QC_${b}.1.html" "../03.read_quality/${b}.post.1.html"
54
48
  fi
55
- rm -f "$b".[12].*.discard
49
+ mv "$b/Subsample_Adapter_Detection.stats.txt" \
50
+ "../03.read_quality/$b.adapters.txt"
51
+
52
+ # Cleanup
53
+ rm -r "$b"
54
+ rm -f "$b".[12].fastq.gz
56
55
 
57
56
  # Finalize
58
57
  miga date > "$DATASET.done"
59
58
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
59
+
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ParallelTest < Test::Unit::TestCase
6
+ include TestHelper
7
+
8
+ def test_distribute
9
+ declare_forks
10
+
11
+ base = tmpfile('base')
12
+ assert(!File.exist?("#{base}-3"))
13
+ MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
14
+ File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
15
+ end
16
+ assert(File.exist?("#{base}-3"))
17
+ assert(!File.exist?("#{base}-4"))
18
+ t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
19
+ assert_equal([0, 0, 1, 1], t.sort)
20
+ end
21
+
22
+ def test_thread_enum
23
+ MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
24
+ assert_equal('b', o)
25
+ end
26
+
27
+ n = 0
28
+ MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
29
+ assert_equal(5, n)
30
+ end
31
+ end
data/test/project_test.rb CHANGED
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
27
27
  def test_create
28
28
  assert_equal(tmpfile('create'), project('create').path)
29
29
  assert_path_exist(tmpfile('create'))
30
- assert_raise do
30
+ err = capture_stderr do
31
31
  ENV['MIGA_HOME'] = tmpfile('chez-moi')
32
32
  project('cuckoo')
33
33
  end
34
+ assert_match(/Projects cannot be processed yet/, err.string)
34
35
  end
35
36
 
36
37
  def test_load
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
47
  end
48
48
 
49
49
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
50
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
51
  n = 'Cjac_L14'
52
52
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
53
  assert_equal([cjac], rd.ids)
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
169
169
  aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
170
170
  end
171
171
  end
172
+ puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
172
173
  batch_data_to_db(:haai, haai_data)
173
174
  batch_data_to_db(:aai, aai_data)
174
175
 
@@ -126,7 +126,6 @@ module MiGA::DistanceRunner::Database
126
126
  def batch_data_to_db(metric, data)
127
127
  db = tmp_dbs[metric]
128
128
  table = metric == :haai ? :aai : metric
129
- `cp #{db} ~/here.db`
130
129
  SQLite3::Database.new(db) do |conn|
131
130
  data.each do |k, v|
132
131
  sql = <<~SQL
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
18
18
  @ref_project = MiGA::Project.load(ref_path)
19
19
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
20
20
  elsif !opts[:run_taxonomy] && dataset.option(:db_project)
21
- ref_path = dataset.option(:db_project)
22
- if project.option(:db_proj_dir)
23
- ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
24
- end
21
+ ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
22
+ ref_path = File.expand_path(dataset.option(:db_project), ref_location)
25
23
  @ref_project = MiGA::Project.load(ref_path)
26
24
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
27
25
  else
@@ -4,7 +4,8 @@
4
4
  "task": "FastA.N50.pl",
5
5
  "description": ["Calculates the N50 value of a set of sequences.",
6
6
  "Alternatively, it can calculate other N** values. It also calculates",
7
- "the total number of sequences and the total added length."],
7
+ "the total number of sequences, the total added length, and the",
8
+ "longest sequence length."],
8
9
  "help_arg": "",
9
10
  "see_also": ["FastA.length.pl"],
10
11
  "options": [
@@ -354,14 +355,14 @@
354
355
  "opt": "--in",
355
356
  "arg": "in_file",
356
357
  "mandatory": true,
357
- "description": "Input FastA file."
358
+ "description": "Input FastA file (supports .gz compression)."
358
359
  },
359
360
  {
360
361
  "name": "Output file",
361
362
  "opt": "--out",
362
363
  "arg": "out_file",
363
364
  "mandatory": true,
364
- "description": "Output FastA file."
365
+ "description": "Output FastA file (supports .gz compression)."
365
366
  },
366
367
  {
367
368
  "opt": "--fraction",
@@ -733,6 +734,41 @@
733
734
  }
734
735
  ]
735
736
  },
737
+ {
738
+ "task": "FastA.toFastQ.rb",
739
+ "description": "Creates a FastQ-compliant file from a FastA file.",
740
+ "see_also": "FastQ.toFastA.awk",
741
+ "help_arg": "--help",
742
+ "options": [
743
+ {
744
+ "name": "Input FastA",
745
+ "opt": "--in",
746
+ "arg": "in_file",
747
+ "mandatory": true,
748
+ "description": "Input FastA file (supports .gz compression)."
749
+ },
750
+ {
751
+ "name": "Output FastQ",
752
+ "opt": "--out",
753
+ "arg": "out_file",
754
+ "mandatory": true,
755
+ "description": "Output FastQ file (supports .gz compression)."
756
+ },
757
+ {
758
+ "opt": "--quality",
759
+ "arg": "integer",
760
+ "default": 31,
761
+ "description": ["PHRED quality score to use (fixed), in the range",
762
+ "[-5, 41]."]
763
+ },
764
+ {
765
+ "opt": "--encoding",
766
+ "arg": "integer",
767
+ "default": 33,
768
+ "description": "Base encoding (33 or 64)."
769
+ }
770
+ ]
771
+ },
736
772
  {
737
773
  "task": "FastA.wrap.rb",
738
774
  "description": "Wraps sequences in a FastA to a given line length.",
@@ -81,6 +81,47 @@
81
81
  }
82
82
  ]
83
83
  },
84
+ {
85
+ "task": "FastQ.maskQual.rb",
86
+ "description": "Masks low-quality bases in a FastQ file.",
87
+ "help_arg": "--help",
88
+ "options": [
89
+ {
90
+ "opt": "--input",
91
+ "arg": "in_file",
92
+ "mandatory": true,
93
+ "description": ["Path to the FastQ file containing the sequences.",
94
+ "Supports compression with .gz extension."]
95
+ },
96
+ {
97
+ "opt": "--output",
98
+ "arg": "out_file",
99
+ "mandatory": true,
100
+ "description": ["Path to the output FastQ file.",
101
+ "Supports compression with .gz extension."]
102
+ },
103
+ {
104
+ "opt": "--qual",
105
+ "arg": "integer",
106
+ "default": 15,
107
+ "description": "Minimum quality score to allow a base."
108
+ },
109
+ {
110
+ "opt": "--offset",
111
+ "arg": "integer",
112
+ "default": 33,
113
+ "description": "Q-score offset."
114
+ },
115
+ {
116
+ "opt": "--fasta",
117
+ "description": "Output sequences in FastA format."
118
+ },
119
+ {
120
+ "opt": "--quiet",
121
+ "description": "Run quietly."
122
+ }
123
+ ]
124
+ },
84
125
  {
85
126
  "task": "FastQ.offset.pl",
86
127
  "description": ["There are several FastQ formats. This script takes a",
@@ -160,14 +201,20 @@
160
201
  "opt": "--in",
161
202
  "arg": "in_file",
162
203
  "mandatory": true,
163
- "description": "FastQ file containing the sequences."
204
+ "description": [
205
+ "FastQ file containing the sequences.",
206
+ "Supports compression with .gz extension."
207
+ ]
164
208
  },
165
209
  {
166
210
  "name": "Output file",
167
211
  "opt": "--out",
168
212
  "arg": "out_file",
169
213
  "mandatory": true,
170
- "description": "FastQ to create."
214
+ "description": [
215
+ "FastQ to create.",
216
+ "Supports compression with .gz extension."
217
+ ]
171
218
  },
172
219
  {
173
220
  "opt": "--prefix",
@@ -188,6 +235,7 @@
188
235
  {
189
236
  "task": "FastQ.toFastA.awk",
190
237
  "description": "Translates FastQ files into FastA.",
238
+ "see_also": "FastA.toFastQ.rb",
191
239
  "help_arg": "'' --help",
192
240
  "options": [
193
241
  "<",
@@ -62,6 +62,76 @@
62
62
  "description": "Window size, in base pairs."
63
63
  }
64
64
  ]
65
+ },
66
+ {
67
+ "task": "sam.filter.rb",
68
+ "description": ["Filters a SAM or BAM file by target sequences and/or",
69
+ "identity."],
70
+ "see_also": ["anir.rb"],
71
+ "help_arg": "--help",
72
+ "options": [
73
+ {
74
+ "opt": "--genome",
75
+ "arg": "in_file",
76
+ "mandatory": true,
77
+ "description": ["Genome assembly.",
78
+ "Supports compression with .gz extension."]
79
+ },
80
+ {
81
+ "opt": "--mapping",
82
+ "arg": "in_file",
83
+ "mandatory": true,
84
+ "description": ["Mapping file.",
85
+ "Supports compression with .gz extension."]
86
+ },
87
+ {
88
+ "opt": "--out-sam",
89
+ "arg": "out_file",
90
+ "mandatory": true,
91
+ "description": ["Output filtered file in SAM format.",
92
+ "Supports compression with .gz extension."]
93
+ },
94
+ {
95
+ "opt": "--g-format",
96
+ "arg": "select",
97
+ "values": ["fasta", "list"],
98
+ "default": "fasta",
99
+ "description": ["Genome assembly format."]
100
+ },
101
+ {
102
+ "opt": "--m-format",
103
+ "arg": "select",
104
+ "values": ["sam", "bam"],
105
+ "default": "sam",
106
+ "description": ["Mapping file format. SAM supports compression with",
107
+ ".gz file extension."]
108
+ },
109
+ {
110
+ "opt": "--identity",
111
+ "arg": "float",
112
+ "description": "Set a fixed threshold of percent identity.",
113
+ "default": 95.0
114
+ },
115
+ {
116
+ "opt": "--no-header",
117
+ "description": "Do not include the headers."
118
+ },
119
+ {
120
+ "opt": "--threads",
121
+ "arg": "integer",
122
+ "description": "Threads to use.",
123
+ "default": 2
124
+ },
125
+ {
126
+ "opt": "--log",
127
+ "arg": "out_file",
128
+ "description": "Log file to save output."
129
+ },
130
+ {
131
+ "opt": "--quiet",
132
+ "description": "Run quietly."
133
+ }
134
+ ]
65
135
  }
66
136
  ]
67
137
  }