miga-base 0.7.26.3 → 1.0.0.sr1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ParallelTest < Test::Unit::TestCase
6
+ include TestHelper
7
+
8
+ def test_distribute
9
+ declare_forks
10
+
11
+ base = tmpfile('base')
12
+ assert(!File.exist?("#{base}-3"))
13
+ MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
14
+ File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
15
+ end
16
+ assert(File.exist?("#{base}-3"))
17
+ assert(!File.exist?("#{base}-4"))
18
+ t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
19
+ assert_equal([0, 0, 1, 1], t.sort)
20
+ end
21
+
22
+ def test_thread_enum
23
+ MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
24
+ assert_equal('b', o)
25
+ end
26
+
27
+ n = 0
28
+ MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
29
+ assert_equal(5, n)
30
+ end
31
+ end
data/test/project_test.rb CHANGED
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
27
27
  def test_create
28
28
  assert_equal(tmpfile('create'), project('create').path)
29
29
  assert_path_exist(tmpfile('create'))
30
- assert_raise do
30
+ err = capture_stderr do
31
31
  ENV['MIGA_HOME'] = tmpfile('chez-moi')
32
32
  project('cuckoo')
33
33
  end
34
+ assert_match(/Projects cannot be processed yet/, err.string)
34
35
  end
35
36
 
36
37
  def test_load
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
169
169
  aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
170
170
  end
171
171
  end
172
+ puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
172
173
  batch_data_to_db(:haai, haai_data)
173
174
  batch_data_to_db(:aai, aai_data)
174
175
 
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
18
18
  @ref_project = MiGA::Project.load(ref_path)
19
19
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
20
20
  elsif !opts[:run_taxonomy] && dataset.option(:db_project)
21
- ref_path = dataset.option(:db_project)
22
- if project.option(:db_proj_dir)
23
- ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
24
- end
21
+ ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
22
+ ref_path = File.expand_path(dataset.option(:db_project), ref_location)
25
23
  @ref_project = MiGA::Project.load(ref_path)
26
24
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
27
25
  else
@@ -4,7 +4,8 @@
4
4
  "task": "FastA.N50.pl",
5
5
  "description": ["Calculates the N50 value of a set of sequences.",
6
6
  "Alternatively, it can calculate other N** values. It also calculates",
7
- "the total number of sequences and the total added length."],
7
+ "the total number of sequences, the total added length, and the",
8
+ "longest sequence length."],
8
9
  "help_arg": "",
9
10
  "see_also": ["FastA.length.pl"],
10
11
  "options": [
@@ -354,14 +355,14 @@
354
355
  "opt": "--in",
355
356
  "arg": "in_file",
356
357
  "mandatory": true,
357
- "description": "Input FastA file."
358
+ "description": "Input FastA file (supports .gz compression)."
358
359
  },
359
360
  {
360
361
  "name": "Output file",
361
362
  "opt": "--out",
362
363
  "arg": "out_file",
363
364
  "mandatory": true,
364
- "description": "Output FastA file."
365
+ "description": "Output FastA file (supports .gz compression)."
365
366
  },
366
367
  {
367
368
  "opt": "--fraction",
@@ -733,6 +734,41 @@
733
734
  }
734
735
  ]
735
736
  },
737
+ {
738
+ "task": "FastA.toFastQ.rb",
739
+ "description": "Creates a FastQ-compliant file from a FastA file.",
740
+ "see_also": "FastQ.toFastA.awk",
741
+ "help_arg": "--help",
742
+ "options": [
743
+ {
744
+ "name": "Input FastA",
745
+ "opt": "--in",
746
+ "arg": "in_file",
747
+ "mandatory": true,
748
+ "description": "Input FastA file (supports .gz compression)."
749
+ },
750
+ {
751
+ "name": "Output FastQ",
752
+ "opt": "--out",
753
+ "arg": "out_file",
754
+ "mandatory": true,
755
+ "description": "Output FastQ file (supports .gz compression)."
756
+ },
757
+ {
758
+ "opt": "--quality",
759
+ "arg": "integer",
760
+ "default": 31,
761
+ "description": ["PHRED quality score to use (fixed), in the range",
762
+ "[-5, 41]."]
763
+ },
764
+ {
765
+ "opt": "--encoding",
766
+ "arg": "integer",
767
+ "default": 33,
768
+ "description": "Base encoding (33 or 64)."
769
+ }
770
+ ]
771
+ },
736
772
  {
737
773
  "task": "FastA.wrap.rb",
738
774
  "description": "Wraps sequences in a FastA to a given line length.",
@@ -81,6 +81,47 @@
81
81
  }
82
82
  ]
83
83
  },
84
+ {
85
+ "task": "FastQ.maskQual.rb",
86
+ "description": "Masks low-quality bases in a FastQ file.",
87
+ "help_arg": "--help",
88
+ "options": [
89
+ {
90
+ "opt": "--input",
91
+ "arg": "in_file",
92
+ "mandatory": true,
93
+ "description": ["Path to the FastQ file containing the sequences.",
94
+ "Supports compression with .gz extension."]
95
+ },
96
+ {
97
+ "opt": "--output",
98
+ "arg": "out_file",
99
+ "mandatory": true,
100
+ "description": ["Path to the output FastQ file.",
101
+ "Supports compression with .gz extension."]
102
+ },
103
+ {
104
+ "opt": "--qual",
105
+ "arg": "integer",
106
+ "default": 15,
107
+ "description": "Minimum quality score to allow a base."
108
+ },
109
+ {
110
+ "opt": "--offset",
111
+ "arg": "integer",
112
+ "default": 33,
113
+ "description": "Q-score offset."
114
+ },
115
+ {
116
+ "opt": "--fasta",
117
+ "description": "Output sequences in FastA format."
118
+ },
119
+ {
120
+ "opt": "--quiet",
121
+ "description": "Run quietly."
122
+ }
123
+ ]
124
+ },
84
125
  {
85
126
  "task": "FastQ.offset.pl",
86
127
  "description": ["There are several FastQ formats. This script takes a",
@@ -160,14 +201,20 @@
160
201
  "opt": "--in",
161
202
  "arg": "in_file",
162
203
  "mandatory": true,
163
- "description": "FastQ file containing the sequences."
204
+ "description": [
205
+ "FastQ file containing the sequences.",
206
+ "Supports compression with .gz extension."
207
+ ]
164
208
  },
165
209
  {
166
210
  "name": "Output file",
167
211
  "opt": "--out",
168
212
  "arg": "out_file",
169
213
  "mandatory": true,
170
- "description": "FastQ to create."
214
+ "description": [
215
+ "FastQ to create.",
216
+ "Supports compression with .gz extension."
217
+ ]
171
218
  },
172
219
  {
173
220
  "opt": "--prefix",
@@ -188,6 +235,7 @@
188
235
  {
189
236
  "task": "FastQ.toFastA.awk",
190
237
  "description": "Translates FastQ files into FastA.",
238
+ "see_also": "FastA.toFastQ.rb",
191
239
  "help_arg": "'' --help",
192
240
  "options": [
193
241
  "<",
@@ -62,6 +62,76 @@
62
62
  "description": "Window size, in base pairs."
63
63
  }
64
64
  ]
65
+ },
66
+ {
67
+ "task": "sam.filter.rb",
68
+ "description": ["Filters a SAM or BAM file by target sequences and/or",
69
+ "identity."],
70
+ "see_also": ["anir.rb"],
71
+ "help_arg": "--help",
72
+ "options": [
73
+ {
74
+ "opt": "--genome",
75
+ "arg": "in_file",
76
+ "mandatory": true,
77
+ "description": ["Genome assembly.",
78
+ "Supports compression with .gz extension."]
79
+ },
80
+ {
81
+ "opt": "--mapping",
82
+ "arg": "in_file",
83
+ "mandatory": true,
84
+ "description": ["Mapping file.",
85
+ "Supports compression with .gz extension."]
86
+ },
87
+ {
88
+ "opt": "--out-sam",
89
+ "arg": "out_file",
90
+ "mandatory": true,
91
+ "description": ["Output filtered file in SAM format.",
92
+ "Supports compression with .gz extension."]
93
+ },
94
+ {
95
+ "opt": "--g-format",
96
+ "arg": "select",
97
+ "values": ["fasta", "list"],
98
+ "default": "fasta",
99
+ "description": ["Genome assembly format."]
100
+ },
101
+ {
102
+ "opt": "--m-format",
103
+ "arg": "select",
104
+ "values": ["sam", "bam"],
105
+ "default": "sam",
106
+ "description": ["Mapping file format. SAM supports compression with",
107
+ ".gz file extension."]
108
+ },
109
+ {
110
+ "opt": "--identity",
111
+ "arg": "float",
112
+ "description": "Set a fixed threshold of percent identity.",
113
+ "default": 95.0
114
+ },
115
+ {
116
+ "opt": "--no-header",
117
+ "description": "Do not include the headers."
118
+ },
119
+ {
120
+ "opt": "--threads",
121
+ "arg": "integer",
122
+ "description": "Threads to use.",
123
+ "default": 2
124
+ },
125
+ {
126
+ "opt": "--log",
127
+ "arg": "out_file",
128
+ "description": "Log file to save output."
129
+ },
130
+ {
131
+ "opt": "--quiet",
132
+ "description": "Run quietly."
133
+ }
134
+ ]
65
135
  }
66
136
  ]
67
137
  }
@@ -824,6 +824,83 @@
824
824
  "description": "Features to map in GFF."
825
825
  }
826
826
  ]
827
+ },
828
+ {
829
+ "task": "Table.prefScore.R",
830
+ "description": ["Estimate preference score of species based on occupancy",
831
+ "in biased sample sets."],
832
+ "help_arg": "--help",
833
+ "requires": [ { "r_package": "optparse" } ],
834
+ "options": [
835
+ {
836
+ "name": "Occupancy matrix",
837
+ "opt": "--x",
838
+ "arg": "in_file",
839
+ "description": ["A tab-delimited table of presence/absence (1/0)",
840
+ "with species as rows and samples as columns."],
841
+ "mandatory": true
842
+ },
843
+ {
844
+ "name": "Sample set",
845
+ "opt": "--set",
846
+ "arg": "in_file",
847
+ "description": ["A list of sample names that constitute the test",
848
+ "set, one per line."],
849
+ "mandatory": true
850
+ },
851
+ {
852
+ "opt": "--ignore",
853
+ "arg": "in_file",
854
+ "description": ["A list of species to exclude from the analysis,",
855
+ "one per line."]
856
+ },
857
+ {
858
+ "name": "Significance threshold",
859
+ "opt": "--signif-thr",
860
+ "arg": "float",
861
+ "description": "Absolute value of the significance threshold."
862
+ },
863
+ {
864
+ "opt": "--col-above",
865
+ "arg": "string",
866
+ "description": "Color for points significantly above zero.",
867
+ "default": "#941100"
868
+ },
869
+ {
870
+ "opt": "--col-equal",
871
+ "arg": "string",
872
+ "description": ["Color for points not significantly different from",
873
+ "zero."],
874
+ "default": "#BDBDBD"
875
+ },
876
+ {
877
+ "opt": "--col-below",
878
+ "arg": "string",
879
+ "description": "Color for points significantly below zero.",
880
+ "default": "#2F5496"
881
+ },
882
+ {
883
+ "name": "Output preference scores",
884
+ "arg": "out_file",
885
+ "description": "Output raw-text file with preference scores.",
886
+ "mandatory": true
887
+ },
888
+ {
889
+ "name": "Graphical utput",
890
+ "arg": "out_file",
891
+ "description": "Output PDF file with preference scores plot."
892
+ },
893
+ {
894
+ "name": "Width",
895
+ "arg": "float",
896
+ "description": "Width of the plot in inches (7 by default)."
897
+ },
898
+ {
899
+ "name": "Height",
900
+ "arg": "float",
901
+ "description": "Height of the plot in inches (7 by default)."
902
+ }
903
+ ]
827
904
  }
828
905
  ]
829
906
  }
@@ -362,6 +362,139 @@
362
362
  }
363
363
  ]
364
364
  },
365
+ {
366
+ "task": "anir.rb",
367
+ "description": ["Estimates ANIr: the Average Nucleotide Identity of",
368
+ "reads against a genome."],
369
+ "help_arg": "--help",
370
+ "see_also": ["ani.rb", "sam.filter.rb"],
371
+ "options": [
372
+ {
373
+ "opt": "--reads",
374
+ "arg": "in_file",
375
+ "description": "Metagenomic reads."
376
+ },
377
+ {
378
+ "opt": "--genome",
379
+ "arg": "in_file",
380
+ "description": "Genome assembly."
381
+ },
382
+ {
383
+ "opt": "--mapping",
384
+ "arg": "in_file",
385
+ "description": "Mapping file."
386
+ },
387
+ {
388
+ "opt": "--list",
389
+ "arg": "in_file",
390
+ "description": "Output file with identities."
391
+ },
392
+ {
393
+ "opt": "--hist",
394
+ "arg": "in_file",
395
+ "description": "Output file with histogram."
396
+ },
397
+ {
398
+ "opt": "--tab",
399
+ "arg": "out_file",
400
+ "description": "Output file with results in tabular format."
401
+ },
402
+ {
403
+ "name": "Reads format",
404
+ "opt": "--r-format",
405
+ "arg": "select",
406
+ "description": ["Metagenomic reads format: fastq or fasta.",
407
+ "Both options support compression with .gz file extension."],
408
+ "values": ["fastq", "fasta"],
409
+ "default": "fastq"
410
+ },
411
+ {
412
+ "name": "Reads type",
413
+ "opt": "--r-type",
414
+ "arg": "select",
415
+ "description": ["Type of metagenomic reads: Single reads (single),",
416
+ "coupled reads in separate files (-m must be comma-delimited;",
417
+ "coupled), or coupled reads in a single interposed file",
418
+ "(interleaved)."],
419
+ "values": ["single", "coupled", "interleaved"],
420
+ "default": "single"
421
+ },
422
+ {
423
+ "name": "Genome format",
424
+ "opt": "--g-format",
425
+ "arg": "select",
426
+ "description": ["Genome assembly format: fasta or list.",
427
+ "Both options support compression with .gz file extension.",
428
+ "If passed in mapping-read mode, filters only matches to these",
429
+ "contigs."],
430
+ "values": ["fasta", "list"],
431
+ "default": "fasta"
432
+ },
433
+ {
434
+ "name": "Mapping format",
435
+ "opt": "--m-format",
436
+ "arg": "select",
437
+ "description": ["Mapping file format: sam, bam, tab, or list.",
438
+ "All except bam support compression with .gz file extension."],
439
+ "values": ["sam", "bam", "tab", "list"],
440
+ "default": "sam"
441
+ },
442
+ {
443
+ "opt": "--identity",
444
+ "arg": "float",
445
+ "description": "Set a fixed threshold of percent identity.",
446
+ "default": 95.0
447
+ },
448
+ {
449
+ "opt": "--algorithm",
450
+ "arg": "select",
451
+ "description": ["Set an algorithm to automatically detect identity",
452
+ "threshold: Valley detection by E-M of Gaussian Mixture Model",
453
+ "(gmm), fixed threshold (see Identity; fix),",
454
+ "Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
455
+ "values": ["gmm", "fix", "auto"],
456
+ "default": "auto"
457
+ },
458
+ {
459
+ "opt": "--bimodality",
460
+ "arg": "float",
461
+ "description": ["Threshold of bimodality below which the algorithm",
462
+ "is set to fix. The coefficient used is the de Michele & Accantino",
463
+ "(2014) B index."],
464
+ "default": 0.5
465
+ },
466
+ {
467
+ "opt": "--coefficient",
468
+ "arg": "select",
469
+ "description": ["Coefficient of bimodality for Algorithm auto: ",
470
+ "Sarle's bimodality coefficient b (sarle), or",
471
+ "de Michele and Accatino (2014 PLoS ONE) B index",
472
+ "(use with Bimodality 0.1, dma)."],
473
+ "values": ["sarle", "dma"],
474
+ "default": "sarle"
475
+ },
476
+ {
477
+ "opt": "--bin-size",
478
+ "arg": "float",
479
+ "description": "Width of histogram bins (in percent identity).",
480
+ "default": 1.0
481
+ },
482
+ {
483
+ "opt": "--threads",
484
+ "arg": "integer",
485
+ "description": "Threads to use."
486
+ },
487
+ {
488
+ "opt": "--log",
489
+ "arg": "out_file",
490
+ "description": "Log file to save output."
491
+ },
492
+ {
493
+ "opt": "--quiet",
494
+ "description": "Run quietly."
495
+ }
496
+ ]
497
+ },
365
498
  {
366
499
  "task": "HMM.haai.rb",
367
500
  "description": ["Estimates Average Amino Acid Identity (AAI) from the",
@@ -407,10 +540,14 @@
407
540
  "sequences."],
408
541
  "help_arg": "--help",
409
542
  "cite":[
543
+ ["Camacho et al, 2009, BMC Bioinf (BLAST+)",
544
+ "https://doi.org/10.1186/1471-2105-10-421"],
410
545
  ["Altschul et al, 2000, JMB (BLAST)",
411
546
  "http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
412
547
  ["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
413
- "https://dx.doi.org/10.1038/nmeth.3176"]
548
+ "https://dx.doi.org/10.1038/nmeth.3176"],
549
+ ["Kent, 2002, Genome Res (BLAT)",
550
+ "https://doi.org/10.1101/gr.229202"]
414
551
  ],
415
552
  "options": [
416
553
  {