miga-base 0.7.26.3 → 1.0.0.sr1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,175 @@
1
+
2
+ require 'enveomics_rb/enveomics'
3
+ require 'enveomics_rb/match'
4
+ use 'tmpdir'
5
+ use 'shellwords'
6
+
7
+ module Enveomics
8
+ class BMset
9
+ attr :qry, :sbj, :set, :opt
10
+
11
+ ##
12
+ # Initialize Enveomics::BMset object with sequence paths +qry+ and +sbj+,
13
+ # and options Hash +opts+ (see #opt for supported options) with Symbol keys
14
+ def initialize(qry, sbj, opts = {})
15
+ @qry = qry
16
+ @sbj = sbj
17
+ @set = nil
18
+ @opt = opts
19
+ end
20
+
21
+ ##
22
+ # Returns option with key +k+ as defined by #initialize or by default
23
+ # Supported options include [defaults in brackets]:
24
+ # - len [0]: Minimum alignment length in residues
25
+ # - id [0.0]: Minimum alignment identity in percent
26
+ # - fract [0.0]: Minimum alignment length as fraction of the query
27
+ # - score [0.0]: Minimum alignment score in bits
28
+ # - nucl [false]: The sequences are in nucleotides
29
+ # - thr [1]: Number of threads to use
30
+ # - bin ['']: Path to the directory containing binaries
31
+ # - program [:blast+]: Search engine to use
32
+ def opt(k)
33
+ @defaults ||= {
34
+ len: 0, id: 0.0, fract: 0.0, score: 0.0,
35
+ nucl: false, thr: 1, bin: '', program: :'blast+'
36
+ }
37
+ k = k.to_sym
38
+ @opt[k] = @defaults[k] if @opt[k].nil?
39
+ @opt[k]
40
+ end
41
+
42
+ ##
43
+ # Array of Enveomics::Match objects
44
+ def set
45
+ match_and_filter! if @set.nil?
46
+ @set
47
+ end
48
+
49
+ ##
50
+ # Returns the best match of query +qry+ as Enveomics::Match or nil if
51
+ # no qualifying match was found
52
+ def [](qry)
53
+ set[qry]
54
+ end
55
+
56
+ ##
57
+ # Number of matches found
58
+ def count
59
+ set.count
60
+ end
61
+
62
+ ##
63
+ # Execute search and filter matches
64
+ def match_and_filter!
65
+ @set = {}
66
+ match!.each do |match|
67
+ # Already a better match?
68
+ next if self[match.qry] && self[match.qry].score >= match.score
69
+
70
+ # Is this a good enough match?
71
+ next unless %i[len id score fract].all? do |metric|
72
+ match.send(metric) >= opt(metric)
73
+ end
74
+
75
+ # Save match
76
+ @set[match.qry] = match
77
+ end
78
+ end
79
+
80
+ ##
81
+ # Find all matches and return as an array of Enveomics::Match objects
82
+ def match!
83
+ y = []
84
+ Dir.mktmpdir do |dir|
85
+ # Determine commands
86
+ say('Temporal directory: ', dir)
87
+ db_path = File.join(dir, 'sbj.db')
88
+ out_path = File.join(dir, 'out.tsv')
89
+ cmds = []
90
+ case opt(:program)
91
+ when :blast
92
+ cmds << [
93
+ 'formatdb', '-i', sbj, '-n', db_path, '-l', File.join(dir, 'log'),
94
+ '-p', opt(:nucl) ? 'F' : 'T'
95
+ ]
96
+ cmd << [
97
+ 'blastall', '-p', opt(:nucl) ? 'blastn' : 'blastp', '-d', db_path,
98
+ '-i', qry, '-v', '1', '-b', '1', '-a', opt(:thr).to_s, '-m', '8',
99
+ '-o', out_path
100
+ ]
101
+ when :'blast+'
102
+ cmds << [
103
+ 'makeblastdb', '-in', sbj, '-out', db_path,
104
+ '-dbtype', opt(:nucl) ? 'nucl' : 'prot'
105
+ ]
106
+ cmds << [
107
+ opt(:nucl) ? 'blastn' : 'blastp', '-db', db_path, '-query', qry,
108
+ '-num_threads', opt(:thr).to_s, '-out', out_path, '-outfmt',
109
+ '6 qseqid sseqid pident length mismatch gapopen qstart qend ' \
110
+ 'sstart send evalue bitscore qlen slen'
111
+ ]
112
+ when :diamond
113
+ raise Enveomics::OptionError.new(
114
+ 'Unsupported search engine diamond for nucleotides'
115
+ ) if opt(:nucl)
116
+ cmds << [
117
+ 'diamond', 'makedb', '--in', sbj, '--db', db_path,
118
+ '--threads', opt(:thr).to_s
119
+ ]
120
+ cmds << [
121
+ 'diamond', 'blastp', '--threads', opt(:thr).to_s,
122
+ '--db', db_path, '--query', qry, '--daa', "#{out_path}.daa",
123
+ '--quiet', '--sensitive'
124
+ ]
125
+ cmds << [
126
+ 'diamond', 'view', '--daa', "#{out_path}.daa", '--out', out_path,
127
+ '--quiet', '--outfmt'
128
+ ] + %w[6 qseqid sseqid pident length mismatch gapopen qstart] +
129
+ %w[qend sstart send evalue bitscore qlen slen]
130
+ when :blat
131
+ cmds << ['blat', sbj, qry, '-out=blast8', out_path]
132
+ cmds[0] << '-prot' unless opt(:nucl)
133
+ else
134
+ raise Enveomics::OptionError.new(
135
+ "Unsupported search engine: #{opt(:program)}"
136
+ )
137
+ end
138
+
139
+ # Run commands
140
+ say('Running comparison')
141
+ say('Query: ', qry)
142
+ say('Subject: ', sbj)
143
+ cmd_err = File.join(dir, 'err')
144
+ begin
145
+ cmds.each do |cmd|
146
+ cmd[0] = File.join(opt(:bin), cmd[0]) unless opt(:bin) == ''
147
+ run_cmd(cmd, stderr: cmd_err)
148
+ end
149
+ rescue Enveomics::CommandError => e
150
+ $stderr.puts e
151
+ $stderr.puts ''
152
+ $stderr.puts '[ Error log ]'
153
+ $stderr.puts File.read(cmd_err)
154
+ exit
155
+ end
156
+
157
+ # Parse output
158
+ File.open(out_path, 'r') do |fh|
159
+ fh.each { |ln| y << Enveomics::Match.new(ln) }
160
+ end
161
+ end
162
+ y
163
+ end
164
+
165
+ ##
166
+ # Enumerate RBMs and yield +blk+
167
+ def each(&blk)
168
+ if block_given?
169
+ set.each { |_, bm| blk.call(bm) }
170
+ else
171
+ to_enum(:each)
172
+ end
173
+ end
174
+ end
175
+ end
@@ -1,24 +1,24 @@
1
1
 
2
- #
3
- # @author: Luis M. Rodriguez-R
4
- # @license: artistic license 2.0
5
- #
2
+ require 'enveomics_rb/utils'
3
+ use 'optparse'
4
+ ARGV << '-h' if ARGV.empty?
6
5
 
7
- require "optparse"
8
- ARGV << "-h" if ARGV.size==0
6
+ module Enveomics
7
+ class << self
8
+ def opt_banner(opt, banner, usage = nil)
9
+ opt.version ||= $VERSION
10
+ usage ||= "#{opt.program_name}.rb [options]"
11
+ opt.banner = <<~BANNER
9
12
 
10
- def use(gems, mandatory=true)
11
- gems = [gems] unless gems.is_a? Array
12
- begin
13
- require "rubygems"
14
- while ! gems.empty?
15
- require gems.shift
13
+ [Enveomics Collection: #{opt.program_name} #{opt.version}]
14
+
15
+ #{banner}
16
+
17
+ Usage
18
+ #{usage}
19
+
20
+ BANNER
16
21
  end
17
- return true
18
- rescue LoadError
19
- abort "\nUnmet requirements, please install required gems:" +
20
- gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
21
- return false
22
22
  end
23
23
  end
24
24
 
@@ -0,0 +1,17 @@
1
+
2
+ module Enveomics
3
+ class Error < RuntimeError
4
+ end
5
+
6
+ class CommandError < Error
7
+ end
8
+
9
+ class OptionError < Error
10
+ end
11
+
12
+ class UnimplementedError < Error
13
+ end
14
+
15
+ class ParseError < Error
16
+ end
17
+ end
@@ -0,0 +1,30 @@
1
+
2
+ require 'enveomics_rb/stats/sample'
3
+
4
+ module Enveomics
5
+ # Calculate Gaussian Mixture Models by Expectation Maximization
6
+ class GmmEm
7
+ attr :sample
8
+ attr :components
9
+ attr :opts
10
+
11
+ # Initialize Enve::GmmEm object from numeric array +x+, +components+
12
+ # gaussian components (an Integer), and options hash +opts+ with supported
13
+ # Symbol keys:
14
+ # - ll_delta_converge: Maximum change in LL to consider convergence
15
+ # (by default: 1e-15)
16
+ # - max_iter: Maximum number of EM iterations (by default: 1_000)
17
+ # - init_mu: Initial components means as numeric array
18
+ # - init_sigma: Initial components standard deviation as numeric array
19
+ # - init_alpha: Initial components fractions as numeric array adding up to 1
20
+ def initialize(x, components = 2, opts = {})
21
+ @sample = Enve::Stats::Sample.new(x)
22
+ @opts = opts
23
+ @opts[:ll_delta_convergence] ||= 1e-15
24
+ @opts[:max_iter] ||= 1_000
25
+ end
26
+
27
+
28
+ end
29
+ end
30
+
@@ -0,0 +1,63 @@
1
+
2
+ module Enveomics
3
+ ##
4
+ # A simple object representing a sequence match from a search engine
5
+ # supporting tabular BLAST output
6
+ class Match
7
+ attr :row
8
+
9
+ ##
10
+ # Initialize Enveomics::Match object from a tabular blast line String +ln+
11
+ def initialize(ln)
12
+ @row = ln.chomp.split("\t")
13
+ end
14
+
15
+ def qry
16
+ row[0]
17
+ end
18
+
19
+ def sbj
20
+ row[1]
21
+ end
22
+
23
+ def id
24
+ @id ||= row[2].to_f
25
+ end
26
+
27
+ def len
28
+ @len ||= row[3].to_i
29
+ end
30
+
31
+ def evalue
32
+ @evalue ||= row[9].to_f
33
+ end
34
+
35
+ def score
36
+ @score ||= row[10].to_f
37
+ end
38
+
39
+ def qry_len
40
+ @qry_len ||= row[12].to_i
41
+ end
42
+
43
+ def sbj_len
44
+ @sbj_len ||= row[13].to_i
45
+ end
46
+
47
+ def qry_fract
48
+ return 0.0 unless qry_len.zero?
49
+ @fract ||= len.to_f / qry_len
50
+ end
51
+
52
+ alias fract qry_fract
53
+
54
+ def sbj_fract
55
+ return 0.0 unless sbj_len.zero?
56
+ @fract ||= len.to_f / sbj_len
57
+ end
58
+
59
+ def to_s
60
+ row.join("\t")
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,49 @@
1
+ require 'enveomics_rb/bm_set'
2
+
3
+ module Enveomics
4
+ class RBM
5
+ attr :seq1, :seq2, :bms1, :bms2
6
+
7
+ ##
8
+ # Initialize RBM object with sequence paths +seq1+ and +seq2+, and
9
+ # Enveomics::BMset options Hash +bm_opts+
10
+ def initialize(seq1, seq2, bm_opts = {})
11
+ @seq1 = seq1
12
+ @seq2 = seq2
13
+ @bms1 = Enveomics::BMset.new(seq1, seq2, bm_opts)
14
+ @bms2 = Enveomics::BMset.new(seq2, seq1, bm_opts)
15
+ @set = nil
16
+ end
17
+
18
+ ##
19
+ # Array of Reciprocal Best Enveomics::Match objects
20
+ def set
21
+ @set ||= reciprocate!
22
+ end
23
+
24
+ ##
25
+ # Number of reciprocal best matches found
26
+ def count
27
+ set.count
28
+ end
29
+
30
+ ##
31
+ # Find reciprocal best matches and return the subset of +bms1+ that
32
+ # is reciprocal with +bms2+
33
+ def reciprocate!
34
+ bms1.each.select do |bm|
35
+ bms2[bm.sbj] && bm.qry == bms2[bm.sbj].sbj
36
+ end
37
+ end
38
+
39
+ ##
40
+ # Enumerate RBMs and yield +blk+
41
+ def each(&blk)
42
+ if block_given?
43
+ set.each { |bm| blk.call(bm) }
44
+ else
45
+ to_enum(:each)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ require 'enveomics_rb/stats/rand'
2
+ require 'enveomics_rb/stats/sample'
3
+
@@ -0,0 +1,31 @@
1
+
2
+ module Enveomics
3
+ module Stats
4
+ class << self
5
+ # Generates a random number from the +dist+ distribution with +params+
6
+ # parameters. This is simply a wrapper to the r_* functions below.
7
+ def rand(dist = :unif, *params)
8
+ send("r_#{dist}", *params)
9
+ end
10
+
11
+ # Generates a random number from the uniform distribution between +min+
12
+ # and +max+. By default generates random numbers between 0.0 and 1.0.
13
+ def r_unif(min = 0.0, max = 1.0)
14
+ min + (max - min) * Random::rand
15
+ end
16
+
17
+ # Generates a random number from the geometric distribution with support
18
+ # {0, 1, 2, ...} and probability of success +p+.
19
+ def r_geom(p)
20
+ (Math::log(1.0 - rand) / Math::log(1.0 - p) - 1.0).ceil
21
+ end
22
+
23
+ # Generates a random number from the shifted geometric distribution with
24
+ # support {1, 2, 3, ...} and probability of success +p+.
25
+ def r_sgeom(p)
26
+ (Math::log(1.0 - rand) / Math::log(1.0 - p)).ceil
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,152 @@
1
+
2
+ module Enveomics
3
+ module Stats
4
+ # Descriptive statistics for a given sample
5
+ class Sample
6
+ attr :x
7
+ attr :opts
8
+
9
+ # Initialize Enveomics::Stats::Sample with numeric vector +x+ and options
10
+ # Hash +opts+ supporting the keys:
11
+ # - +effective_range+: Range where values fall (by default: range of +x+)
12
+ # - +histo_bin_size+: Width of histogram widths
13
+ # (by default: 1/50th of +effective_range+)
14
+ def initialize(x, opts = {})
15
+ raise 'Cannot initialize an empty sample' if x.empty?
16
+ @x = x.map(&:to_f)
17
+ @opts = opts
18
+ end
19
+
20
+ # Size of the sample
21
+ def n
22
+ x.size
23
+ end
24
+
25
+ # Estimates the sample mean
26
+ def mean
27
+ @mean ||= x.inject(:+) / n
28
+ end
29
+
30
+ # Estimates the mean of the square of the sample
31
+ def square_mean
32
+ @square_mean ||= x.map { |i| i**2 }.inject(:+) / n
33
+ end
34
+
35
+ # Estimates the unbiased sample variance
36
+ def var
37
+ @var ||= (square_mean - mean ** 2) * n / (n - 1)
38
+ end
39
+
40
+ # Estimates the unbiased sample standard deviation
41
+ def sd
42
+ @sd ||= var ** 0.5
43
+ end
44
+
45
+ # --- Higher moments ---
46
+
47
+ # Estimate sample skewness
48
+ def skewness
49
+ return 0.0 if n == 1
50
+ cubed_dev = x.inject(0.0) { |sum, i| sum + (i - mean) ** 3 }
51
+ cubed_dev / ((n - 1) * (sd ** 3))
52
+ end
53
+
54
+ # Estimate sample excess kurtosis
55
+ def kurtosis
56
+ return 0.0 if n == 1
57
+ quart_dev = x.inject(0.0) { |sum, i| sum + (i - mean)**4 }
58
+ quart_dev / ((n - 1) * (sd**4))
59
+ end
60
+
61
+ # --- Ranges ---
62
+
63
+ # Range effectively considered
64
+ def effective_range
65
+ @opts[:effective_range] ||= [nil, nil]
66
+ @opts[:effective_range][0] ||= x.min
67
+ @opts[:effective_range][1] ||= x.max
68
+ @opts[:effective_range]
69
+ end
70
+
71
+ # Size of the effective range
72
+ def effective_range_size
73
+ effective_range[1] - effective_range[0]
74
+ end
75
+
76
+ # --- Histograms ---
77
+
78
+ # Size of each histogram bin
79
+ def histo_bin_size
80
+ @opts[:histo_bin_size] ||= effective_range_size / 50.0
81
+ end
82
+
83
+ # Calculate histogram ranges without checking for cached value
84
+ #
85
+ # Use #histo_ranges instead
86
+ def calculate_histo_ranges
87
+ rng = [[effective_range[1], effective_range[1] - histo_bin_size]]
88
+ while rng[rng.size - 1][1] > effective_range[0]
89
+ rng << [rng[rng.size - 1][1], rng[rng.size - 1][1] - histo_bin_size]
90
+ end
91
+ rng
92
+ end
93
+
94
+ # Histogram ranges as an array of two-entry arrays where the fist entry
95
+ # is the closed-ended maximum value (inclusive) of the range and the
96
+ # second entry is the open-ended minimum value (non-inclusive) of the
97
+ # range. The array is sorted from maximum to minimum
98
+ #
99
+ # Something like: +[[100.0, 99.0], [99.0, 98.0], ...]+, representing the
100
+ # ranges: {[100, 99), [99, 98), ...}
101
+ #
102
+ # The bin width is determined by #hist_bin_size
103
+ def histo_ranges
104
+ @histo_ranges ||= calculate_histo_ranges
105
+ end
106
+
107
+ # Mid-points of the histogram ranges from #histo_ranges, returns
108
+ # and array of Float
109
+ def histo_mids
110
+ @histo_mids ||= histo_ranges.map { |x| (x[0] + x[1]) / 2 }
111
+ end
112
+
113
+ # Calculate the histogram counts withouth checking cached value
114
+ #
115
+ # Use #histo_count instead
116
+ def calculate_histo_counts
117
+ counts = []
118
+ xx = x.dup
119
+ histo_ranges.each do |i|
120
+ counts << xx.size - xx.delete_if { |j| j > i[1] }.size
121
+ end
122
+ counts
123
+ end
124
+
125
+ # Histogram counts in the ranges determined by #histo_ranges
126
+ def histo_counts
127
+ @histo_counts ||= calculate_histo_counts
128
+ end
129
+
130
+ # --- Bimodality coefficients ---
131
+
132
+ # Sarle's sample bimodality coefficient b
133
+ def sarle_bimodality
134
+ (skewness**2 + 1) /
135
+ (kurtosis + (3 * ((n - 1)**2)) / ((n - 2) * (n - 3)))
136
+ end
137
+
138
+ # de Michele & Accantino (2014) B index
139
+ # DOI: 10.1371%2Fjournal.pone.0091195
140
+ def dma_bimodality
141
+ (mean - dma_mu_M).abs
142
+ end
143
+
144
+ # µ_M index proposed by Michele & Accantino (2014)
145
+ # DOI: 10.1371%2Fjournal.pone.0091195
146
+ def dma_mu_M
147
+ histo_counts.each_with_index.map { |m, k| m * histo_mids[k] }.inject(:+) / n
148
+ end
149
+ end
150
+ end
151
+ end
152
+