loganb-kder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cde53f25f8badc44aef6aaf29bcb0d255940fd73
4
+ data.tar.gz: 3568e1d009ba8bc7fac4a610a3d0882349818bbd
5
+ SHA512:
6
+ metadata.gz: b44e25490f96342d66f9c70ba8e3386372380dc086e5beda9ad0f0e6784b095b9f50042ef28442a2e14f7b5427737527aa7336269c3945df5079e41f594b8137
7
+ data.tar.gz: 112de7099a77c778a60d25468abf1890c7990db6f086771aa80b7cd4206b3313c397f948d8f1f92526460e9695d998e07f304173df509a359da32352e3a272ad
@@ -0,0 +1,13 @@
1
+ require_relative '../util/enum_extensions'
2
+ module Bandwidth
3
+ class << self
4
+ def silverman(input_arr, std_deviation = input_arr.standard_deviation)
5
+ bandwidth = ((4 * std_deviation ** 5)/(3 * input_arr.size))**(1.0/5)
6
+ end
7
+ alias_method :estimate, :silverman
8
+ alias_method :nrd0, :silverman
9
+
10
+ #def mean_integrated_squared_error(a,b,c,d)
11
+ #end
12
+ end
13
+ end
data/lib/kder/kde.rb ADDED
@@ -0,0 +1,86 @@
1
+ require 'set'
2
+
3
+ class Kder
4
+ require_relative 'bandwidth'
5
+ require_relative '../util/statistics'
6
+
7
+ Sigmas = 2.5
8
+ MeshCount = 2e3
9
+ MinimumThresholdValue = 1e-2
10
+ MinimumStepSize = 1e-3
11
+ DifferenceThreshold = 1e-3
12
+ class << self
13
+ ##
14
+ # :singleton-method: kde
15
+ # Accepts a single member array plus optional additional information
16
+ # Returns a two member array, [x_vals,y_vals] representing the kde
17
+ def kde(arr, bw = nil, opts = {sigmas: Sigmas, sampling_density: MeshCount, threshold: MinimumThresholdValue, minimum_delta: DifferenceThreshold})
18
+ unless bw # is nil
19
+ bw = Bandwidth.silverman(arr)
20
+ end
21
+ bw = bw == 0 ? 0.1 : bw
22
+ # Initialization steps
23
+ range = bw*opts[:sigmas]
24
+ min = arr.min - range
25
+ max = arr.max + range
26
+ step_size = (max-min)/(opts[:sampling_density].to_f)
27
+ step_size = step_size < MinimumStepSize ? MinimumStepSize : step_size
28
+ arr.sort!
29
+ # initialize the range variables
30
+ ranges = (min..max).step(step_size).to_a
31
+ output = [[min,0]]
32
+ old_intensity = 0
33
+ # Step through the range
34
+ ranges[1..-1].map.with_index do |mid, i|
35
+ high_end = mid + range
36
+ lower_end = mid - range
37
+ selection_range = (lower_end..high_end)
38
+ included = arr.select {|a| selection_range.include?(a)}
39
+ intensity = included.map {|a| Kder::Statistics.custom_pdf(a-mid, bw) }.inject(:+) || 0
40
+ unless intensity < opts[:threshold] or (intensity - old_intensity).abs < opts[:minimum_delta]
41
+ output << [mid, intensity ]
42
+ old_intensity = intensity
43
+ end
44
+ end
45
+ output << [max,0]
46
+ output.compact.transpose
47
+ end
48
+
49
+ #
50
+ # :singleton-method: kdevec
51
+ # Works just like kdevec, but arr is a list of 2-D points of sample value and magnitude. Is this valid math? ¯\_(ツ)_/¯
52
+ #
53
+ def kdevec(arr, bw = nil, opts = {}.freeze)
54
+ opts = {sigmas: Sigmas, sampling_density: MeshCount, threshold: MinimumThresholdValue, minimum_delta: DifferenceThreshold}.merge(opts)
55
+ raise "bandwidth must be specified for now" unless bw
56
+
57
+ values = arr.sort_by { |v| v[0] }
58
+
59
+ # Initialization steps
60
+ range = bw*opts[:sigmas]
61
+ min = values.first[0] - range
62
+ max = values.last[0] + range
63
+ step_size = (max-min)/(opts[:sampling_density].to_f)
64
+ step_size = step_size < MinimumStepSize ? MinimumStepSize : step_size
65
+
66
+ # initialize the range variables
67
+ ranges = (min..max).step(step_size).to_a
68
+ output = [[min,0]]
69
+ old_intensity = 0
70
+ # Step through the range
71
+ ranges[1..-1].map.with_index do |mid, i|
72
+ high_end = mid + range
73
+ lower_end = mid - range
74
+ selection_range = (lower_end..high_end)
75
+ included = values.select {|v| selection_range.include?(v[0])}
76
+ intensity = included.map {|v| v[1] * Kder::Statistics.custom_pdf(v[0]-mid, bw) }.inject(:+) || 0
77
+ unless intensity < opts[:threshold] or (intensity - old_intensity).abs < opts[:minimum_delta]
78
+ output << [mid, intensity ]
79
+ old_intensity = intensity
80
+ end
81
+ end
82
+ output << [max,0]
83
+ output.compact.transpose
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,3 @@
1
+ class Kder
2
+ VERSION = "0.1.0"
3
+ end
data/lib/kder.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'kder/kde'
@@ -0,0 +1,19 @@
1
+ module Enumerable
2
+ # Thanks StackOverflow users!
3
+ # http://stackoverflow.com/questions/7749568/how-can-i-do-standard-deviation-in-ruby
4
+ def sum
5
+ self.inject(:+)
6
+ end
7
+ def mean
8
+ self.sum/self.length.to_f
9
+ end
10
+ alias average mean
11
+ def sample_variance
12
+ m = self.mean
13
+ sum = self.inject(0){|accum, i| accum + (i-m)**2}
14
+ sum/(self.length - 1).to_f
15
+ end
16
+ def standard_deviation
17
+ Math.sqrt(self.sample_variance)
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ class Kder
2
+ module Statistics
3
+ SQ2PI = Math.sqrt(2 * Math::PI)
4
+ # Normal pdf with a custom sigma value, still centered around 0.
5
+ def self.custom_pdf(x, sigma = 1.0)
6
+ mean = 0
7
+ (1.0/( sigma*SQ2PI))*Math::exp(-0.5*((x-mean)/sigma)**2)
8
+ end
9
+ end
10
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: loganb-kder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ryan Taylor
8
+ - Logan Bowers
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-11-15 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A KDE implementation in Ruby.
15
+ email:
16
+ - ryanmt@byu.net
17
+ - logan@datacurrent.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/kder.rb
23
+ - lib/kder/bandwidth.rb
24
+ - lib/kder/kde.rb
25
+ - lib/kder/version.rb
26
+ - lib/util/enum_extensions.rb
27
+ - lib/util/statistics.rb
28
+ homepage: https://github.com/loganb/KDER
29
+ licenses: []
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 2.5.2
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Kernel Density Estimation, and an associated bandwidth estimator, rudimentary
51
+ at best, but a decent first approximation for backend preparing of KDE plots for
52
+ plotting in JS libraries, or for whatever you would like to use it.
53
+ test_files: []