miga-base 0.7.26.3 → 1.0.0.sr1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ParallelTest < Test::Unit::TestCase
6
+ include TestHelper
7
+
8
+ def test_distribute
9
+ declare_forks
10
+
11
+ base = tmpfile('base')
12
+ assert(!File.exist?("#{base}-3"))
13
+ MiGA::Parallel.distribute((0..3), 2) do |o, _k, t|
14
+ File.open("#{base}-#{o}", 'w') { |fh| fh.puts t }
15
+ end
16
+ assert(File.exist?("#{base}-3"))
17
+ assert(!File.exist?("#{base}-4"))
18
+ t = (0..3).map { |i| File.read("#{base}-#{i}").chomp.to_i }
19
+ assert_equal([0, 0, 1, 1], t.sort)
20
+ end
21
+
22
+ def test_thread_enum
23
+ MiGA::Parallel.thread_enum(%w[a b c d], 3, 1) do |o, _k, _t|
24
+ assert_equal('b', o)
25
+ end
26
+
27
+ n = 0
28
+ MiGA::Parallel.thread_enum(0..19, 4, 0) { n += 1 }
29
+ assert_equal(5, n)
30
+ end
31
+ end
data/test/project_test.rb CHANGED
@@ -27,10 +27,11 @@ class ProjectTest < Test::Unit::TestCase
27
27
  def test_create
28
28
  assert_equal(tmpfile('create'), project('create').path)
29
29
  assert_path_exist(tmpfile('create'))
30
- assert_raise do
30
+ err = capture_stderr do
31
31
  ENV['MIGA_HOME'] = tmpfile('chez-moi')
32
32
  project('cuckoo')
33
33
  end
34
+ assert_match(/Projects cannot be processed yet/, err.string)
34
35
  end
35
36
 
36
37
  def test_load
@@ -169,6 +169,7 @@ module MiGA::DistanceRunner::Commands
169
169
  aai_data[out[1]] = [out[6].to_f, 0, 0, 0] if out[6] !~ /^>/
170
170
  end
171
171
  end
172
+ puts "Results: #{haai_data.size} | Inferences: #{aai_data.size}"
172
173
  batch_data_to_db(:haai, haai_data)
173
174
  batch_data_to_db(:aai, aai_data)
174
175
 
@@ -18,10 +18,8 @@ class MiGA::DistanceRunner
18
18
  @ref_project = MiGA::Project.load(ref_path)
19
19
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
20
20
  elsif !opts[:run_taxonomy] && dataset.option(:db_project)
21
- ref_path = dataset.option(:db_project)
22
- if project.option(:db_proj_dir)
23
- ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
24
- end
21
+ ref_location = project.option(:db_proj_dir) || File.dirname(project.path)
22
+ ref_path = File.expand_path(dataset.option(:db_project), ref_location)
25
23
  @ref_project = MiGA::Project.load(ref_path)
26
24
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
27
25
  else
@@ -4,7 +4,8 @@
4
4
  "task": "FastA.N50.pl",
5
5
  "description": ["Calculates the N50 value of a set of sequences.",
6
6
  "Alternatively, it can calculate other N** values. It also calculates",
7
- "the total number of sequences and the total added length."],
7
+ "the total number of sequences, the total added length, and the",
8
+ "longest sequence length."],
8
9
  "help_arg": "",
9
10
  "see_also": ["FastA.length.pl"],
10
11
  "options": [
@@ -354,14 +355,14 @@
354
355
  "opt": "--in",
355
356
  "arg": "in_file",
356
357
  "mandatory": true,
357
- "description": "Input FastA file."
358
+ "description": "Input FastA file (supports .gz compression)."
358
359
  },
359
360
  {
360
361
  "name": "Output file",
361
362
  "opt": "--out",
362
363
  "arg": "out_file",
363
364
  "mandatory": true,
364
- "description": "Output FastA file."
365
+ "description": "Output FastA file (supports .gz compression)."
365
366
  },
366
367
  {
367
368
  "opt": "--fraction",
@@ -733,6 +734,41 @@
733
734
  }
734
735
  ]
735
736
  },
737
+ {
738
+ "task": "FastA.toFastQ.rb",
739
+ "description": "Creates a FastQ-compliant file from a FastA file.",
740
+ "see_also": "FastQ.toFastA.awk",
741
+ "help_arg": "--help",
742
+ "options": [
743
+ {
744
+ "name": "Input FastA",
745
+ "opt": "--in",
746
+ "arg": "in_file",
747
+ "mandatory": true,
748
+ "description": "Input FastA file (supports .gz compression)."
749
+ },
750
+ {
751
+ "name": "Output FastQ",
752
+ "opt": "--out",
753
+ "arg": "out_file",
754
+ "mandatory": true,
755
+ "description": "Output FastQ file (supports .gz compression)."
756
+ },
757
+ {
758
+ "opt": "--quality",
759
+ "arg": "integer",
760
+ "default": 31,
761
+ "description": ["PHRED quality score to use (fixed), in the range",
762
+ "[-5, 41]."]
763
+ },
764
+ {
765
+ "opt": "--encoding",
766
+ "arg": "integer",
767
+ "default": 33,
768
+ "description": "Base encoding (33 or 64)."
769
+ }
770
+ ]
771
+ },
736
772
  {
737
773
  "task": "FastA.wrap.rb",
738
774
  "description": "Wraps sequences in a FastA to a given line length.",
@@ -81,6 +81,47 @@
81
81
  }
82
82
  ]
83
83
  },
84
+ {
85
+ "task": "FastQ.maskQual.rb",
86
+ "description": "Masks low-quality bases in a FastQ file.",
87
+ "help_arg": "--help",
88
+ "options": [
89
+ {
90
+ "opt": "--input",
91
+ "arg": "in_file",
92
+ "mandatory": true,
93
+ "description": ["Path to the FastQ file containing the sequences.",
94
+ "Supports compression with .gz extension."]
95
+ },
96
+ {
97
+ "opt": "--output",
98
+ "arg": "out_file",
99
+ "mandatory": true,
100
+ "description": ["Path to the output FastQ file.",
101
+ "Supports compression with .gz extension."]
102
+ },
103
+ {
104
+ "opt": "--qual",
105
+ "arg": "integer",
106
+ "default": 15,
107
+ "description": "Minimum quality score to allow a base."
108
+ },
109
+ {
110
+ "opt": "--offset",
111
+ "arg": "integer",
112
+ "default": 33,
113
+ "description": "Q-score offset."
114
+ },
115
+ {
116
+ "opt": "--fasta",
117
+ "description": "Output sequences in FastA format."
118
+ },
119
+ {
120
+ "opt": "--quiet",
121
+ "description": "Run quietly."
122
+ }
123
+ ]
124
+ },
84
125
  {
85
126
  "task": "FastQ.offset.pl",
86
127
  "description": ["There are several FastQ formats. This script takes a",
@@ -160,14 +201,20 @@
160
201
  "opt": "--in",
161
202
  "arg": "in_file",
162
203
  "mandatory": true,
163
- "description": "FastQ file containing the sequences."
204
+ "description": [
205
+ "FastQ file containing the sequences.",
206
+ "Supports compression with .gz extension."
207
+ ]
164
208
  },
165
209
  {
166
210
  "name": "Output file",
167
211
  "opt": "--out",
168
212
  "arg": "out_file",
169
213
  "mandatory": true,
170
- "description": "FastQ to create."
214
+ "description": [
215
+ "FastQ to create.",
216
+ "Supports compression with .gz extension."
217
+ ]
171
218
  },
172
219
  {
173
220
  "opt": "--prefix",
@@ -188,6 +235,7 @@
188
235
  {
189
236
  "task": "FastQ.toFastA.awk",
190
237
  "description": "Translates FastQ files into FastA.",
238
+ "see_also": "FastA.toFastQ.rb",
191
239
  "help_arg": "'' --help",
192
240
  "options": [
193
241
  "<",
@@ -62,6 +62,76 @@
62
62
  "description": "Window size, in base pairs."
63
63
  }
64
64
  ]
65
+ },
66
+ {
67
+ "task": "sam.filter.rb",
68
+ "description": ["Filters a SAM or BAM file by target sequences and/or",
69
+ "identity."],
70
+ "see_also": ["anir.rb"],
71
+ "help_arg": "--help",
72
+ "options": [
73
+ {
74
+ "opt": "--genome",
75
+ "arg": "in_file",
76
+ "mandatory": true,
77
+ "description": ["Genome assembly.",
78
+ "Supports compression with .gz extension."]
79
+ },
80
+ {
81
+ "opt": "--mapping",
82
+ "arg": "in_file",
83
+ "mandatory": true,
84
+ "description": ["Mapping file.",
85
+ "Supports compression with .gz extension."]
86
+ },
87
+ {
88
+ "opt": "--out-sam",
89
+ "arg": "out_file",
90
+ "mandatory": true,
91
+ "description": ["Output filtered file in SAM format.",
92
+ "Supports compression with .gz extension."]
93
+ },
94
+ {
95
+ "opt": "--g-format",
96
+ "arg": "select",
97
+ "values": ["fasta", "list"],
98
+ "default": "fasta",
99
+ "description": ["Genome assembly format."]
100
+ },
101
+ {
102
+ "opt": "--m-format",
103
+ "arg": "select",
104
+ "values": ["sam", "bam"],
105
+ "default": "sam",
106
+ "description": ["Mapping file format. SAM supports compression with",
107
+ ".gz file extension."]
108
+ },
109
+ {
110
+ "opt": "--identity",
111
+ "arg": "float",
112
+ "description": "Set a fixed threshold of percent identity.",
113
+ "default": 95.0
114
+ },
115
+ {
116
+ "opt": "--no-header",
117
+ "description": "Do not include the headers."
118
+ },
119
+ {
120
+ "opt": "--threads",
121
+ "arg": "integer",
122
+ "description": "Threads to use.",
123
+ "default": 2
124
+ },
125
+ {
126
+ "opt": "--log",
127
+ "arg": "out_file",
128
+ "description": "Log file to save output."
129
+ },
130
+ {
131
+ "opt": "--quiet",
132
+ "description": "Run quietly."
133
+ }
134
+ ]
65
135
  }
66
136
  ]
67
137
  }
@@ -824,6 +824,83 @@
824
824
  "description": "Features to map in GFF."
825
825
  }
826
826
  ]
827
+ },
828
+ {
829
+ "task": "Table.prefScore.R",
830
+ "description": ["Estimate preference score of species based on occupancy",
831
+ "in biased sample sets."],
832
+ "help_arg": "--help",
833
+ "requires": [ { "r_package": "optparse" } ],
834
+ "options": [
835
+ {
836
+ "name": "Occupancy matrix",
837
+ "opt": "--x",
838
+ "arg": "in_file",
839
+ "description": ["A tab-delimited table of presence/absence (1/0)",
840
+ "with species as rows and samples as columns."],
841
+ "mandatory": true
842
+ },
843
+ {
844
+ "name": "Sample set",
845
+ "opt": "--set",
846
+ "arg": "in_file",
847
+ "description": ["A list of sample names that constitute the test",
848
+ "set, one per line."],
849
+ "mandatory": true
850
+ },
851
+ {
852
+ "opt": "--ignore",
853
+ "arg": "in_file",
854
+ "description": ["A list of species to exclude from the analysis,",
855
+ "one per line."]
856
+ },
857
+ {
858
+ "name": "Significance threshold",
859
+ "opt": "--signif-thr",
860
+ "arg": "float",
861
+ "description": "Absolute value of the significance threshold."
862
+ },
863
+ {
864
+ "opt": "--col-above",
865
+ "arg": "string",
866
+ "description": "Color for points significantly above zero.",
867
+ "default": "#941100"
868
+ },
869
+ {
870
+ "opt": "--col-equal",
871
+ "arg": "string",
872
+ "description": ["Color for points not significantly different from",
873
+ "zero."],
874
+ "default": "#BDBDBD"
875
+ },
876
+ {
877
+ "opt": "--col-below",
878
+ "arg": "string",
879
+ "description": "Color for points significantly below zero.",
880
+ "default": "#2F5496"
881
+ },
882
+ {
883
+ "name": "Output preference scores",
884
+ "arg": "out_file",
885
+ "description": "Output raw-text file with preference scores.",
886
+ "mandatory": true
887
+ },
888
+ {
889
+ "name": "Graphical utput",
890
+ "arg": "out_file",
891
+ "description": "Output PDF file with preference scores plot."
892
+ },
893
+ {
894
+ "name": "Width",
895
+ "arg": "float",
896
+ "description": "Width of the plot in inches (7 by default)."
897
+ },
898
+ {
899
+ "name": "Height",
900
+ "arg": "float",
901
+ "description": "Height of the plot in inches (7 by default)."
902
+ }
903
+ ]
827
904
  }
828
905
  ]
829
906
  }
@@ -362,6 +362,139 @@
362
362
  }
363
363
  ]
364
364
  },
365
+ {
366
+ "task": "anir.rb",
367
+ "description": ["Estimates ANIr: the Average Nucleotide Identity of",
368
+ "reads against a genome."],
369
+ "help_arg": "--help",
370
+ "see_also": ["ani.rb", "sam.filter.rb"],
371
+ "options": [
372
+ {
373
+ "opt": "--reads",
374
+ "arg": "in_file",
375
+ "description": "Metagenomic reads."
376
+ },
377
+ {
378
+ "opt": "--genome",
379
+ "arg": "in_file",
380
+ "description": "Genome assembly."
381
+ },
382
+ {
383
+ "opt": "--mapping",
384
+ "arg": "in_file",
385
+ "description": "Mapping file."
386
+ },
387
+ {
388
+ "opt": "--list",
389
+ "arg": "in_file",
390
+ "description": "Output file with identities."
391
+ },
392
+ {
393
+ "opt": "--hist",
394
+ "arg": "in_file",
395
+ "description": "Output file with histogram."
396
+ },
397
+ {
398
+ "opt": "--tab",
399
+ "arg": "out_file",
400
+ "description": "Output file with results in tabular format."
401
+ },
402
+ {
403
+ "name": "Reads format",
404
+ "opt": "--r-format",
405
+ "arg": "select",
406
+ "description": ["Metagenomic reads format: fastq or fasta.",
407
+ "Both options support compression with .gz file extension."],
408
+ "values": ["fastq", "fasta"],
409
+ "default": "fastq"
410
+ },
411
+ {
412
+ "name": "Reads type",
413
+ "opt": "--r-type",
414
+ "arg": "select",
415
+ "description": ["Type of metagenomic reads: Single reads (single),",
416
+ "coupled reads in separate files (-m must be comma-delimited;",
417
+ "coupled), or coupled reads in a single interposed file",
418
+ "(interleaved)."],
419
+ "values": ["single", "coupled", "interleaved"],
420
+ "default": "single"
421
+ },
422
+ {
423
+ "name": "Genome format",
424
+ "opt": "--g-format",
425
+ "arg": "select",
426
+ "description": ["Genome assembly format: fasta or list.",
427
+ "Both options support compression with .gz file extension.",
428
+ "If passed in mapping-read mode, filters only matches to these",
429
+ "contigs."],
430
+ "values": ["fasta", "list"],
431
+ "default": "fasta"
432
+ },
433
+ {
434
+ "name": "Mapping format",
435
+ "opt": "--m-format",
436
+ "arg": "select",
437
+ "description": ["Mapping file format: sam, bam, tab, or list.",
438
+ "All except bam support compression with .gz file extension."],
439
+ "values": ["sam", "bam", "tab", "list"],
440
+ "default": "sam"
441
+ },
442
+ {
443
+ "opt": "--identity",
444
+ "arg": "float",
445
+ "description": "Set a fixed threshold of percent identity.",
446
+ "default": 95.0
447
+ },
448
+ {
449
+ "opt": "--algorithm",
450
+ "arg": "select",
451
+ "description": ["Set an algorithm to automatically detect identity",
452
+ "threshold: Valley detection by E-M of Gaussian Mixture Model",
453
+ "(gmm), fixed threshold (see Identity; fix),",
454
+ "Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
455
+ "values": ["gmm", "fix", "auto"],
456
+ "default": "auto"
457
+ },
458
+ {
459
+ "opt": "--bimodality",
460
+ "arg": "float",
461
+ "description": ["Threshold of bimodality below which the algorithm",
462
+ "is set to fix. The coefficient used is the de Michele & Accantino",
463
+ "(2014) B index."],
464
+ "default": 0.5
465
+ },
466
+ {
467
+ "opt": "--coefficient",
468
+ "arg": "select",
469
+ "description": ["Coefficient of bimodality for Algorithm auto: ",
470
+ "Sarle's bimodality coefficient b (sarle), or",
471
+ "de Michele and Accatino (2014 PLoS ONE) B index",
472
+ "(use with Bimodality 0.1, dma)."],
473
+ "values": ["sarle", "dma"],
474
+ "default": "sarle"
475
+ },
476
+ {
477
+ "opt": "--bin-size",
478
+ "arg": "float",
479
+ "description": "Width of histogram bins (in percent identity).",
480
+ "default": 1.0
481
+ },
482
+ {
483
+ "opt": "--threads",
484
+ "arg": "integer",
485
+ "description": "Threads to use."
486
+ },
487
+ {
488
+ "opt": "--log",
489
+ "arg": "out_file",
490
+ "description": "Log file to save output."
491
+ },
492
+ {
493
+ "opt": "--quiet",
494
+ "description": "Run quietly."
495
+ }
496
+ ]
497
+ },
365
498
  {
366
499
  "task": "HMM.haai.rb",
367
500
  "description": ["Estimates Average Amino Acid Identity (AAI) from the",
@@ -407,10 +540,14 @@
407
540
  "sequences."],
408
541
  "help_arg": "--help",
409
542
  "cite":[
543
+ ["Camacho et al, 2009, BMC Bioinf (BLAST+)",
544
+ "https://doi.org/10.1186/1471-2105-10-421"],
410
545
  ["Altschul et al, 2000, JMB (BLAST)",
411
546
  "http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
412
547
  ["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
413
- "https://dx.doi.org/10.1038/nmeth.3176"]
548
+ "https://dx.doi.org/10.1038/nmeth.3176"],
549
+ ["Kent, 2002, Genome Res (BLAT)",
550
+ "https://doi.org/10.1101/gr.229202"]
414
551
  ],
415
552
  "options": [
416
553
  {