normalizer 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +16 -1
- data/VERSION +1 -1
- data/lib/normalizer.rb +62 -1
- data/normalizer.gemspec +5 -4
- data/spec/normalizer_spec.rb +39 -2
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -24,7 +24,22 @@ To find the min/max of your current data:
|
|
24
24
|
data = [[0, 1, 2, 3, 4], [10, 11, 12, 13, 14]]
|
25
25
|
Normalizer.find_min_and_max(data)
|
26
26
|
#=> [[0, 1, 2, 3, 4], [10, 11, 12, 13, 14]]
|
27
|
+
|
28
|
+
You can also use a buffer on the max/min by setting a buffer in standard deviations:
|
29
|
+
data = [[0, 0, 0, 0, 0], [10, 10, 10, 10, 10]]
|
30
|
+
Normalizer.find_min_and_max(data, :std => 3)
|
31
|
+
#=> [[-21.2132034355964, -21.2132034355964, -21.2132034355964, -21.2132034355964, -21.2132034355964], [31.2132034355964, 31.2132034355964, 31.2132034355964, 31.2132034355964, 31.2132034355964]]
|
32
|
+
|
33
|
+
On a project I'm currently working on I need to know whether data has gone past the max/min amount:
|
34
|
+
a = Normalizer.new(:min => [0], :max => [10])
|
35
|
+
a.normalize([50])
|
36
|
+
a.breaks_boundary?
|
37
|
+
#=> true
|
38
|
+
|
39
|
+
= Thanks
|
40
|
+
|
41
|
+
David Richards (http://blog.tegugears.com/)
|
27
42
|
|
28
43
|
== Copyright
|
29
44
|
|
30
|
-
Copyright (c) 2009
|
45
|
+
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/lib/normalizer.rb
CHANGED
@@ -4,14 +4,55 @@ class Normalizer
|
|
4
4
|
def find_min_and_max(data, options={})
|
5
5
|
@data = data
|
6
6
|
@std = options[:std] || 0
|
7
|
+
@cat_data = Array.new(@data[0].size) { Array.new }
|
8
|
+
|
9
|
+
# Along with finding max and min this also fills @cat_data like so:
|
10
|
+
# [[1,2,3], [1,2,3]] turns into [[1,1], [2,2], [3,3]] so we can calculate
|
7
11
|
@max, @min = find_max, find_min
|
8
12
|
|
9
13
|
unless @std > 0
|
14
|
+
[@min, @max]
|
15
|
+
else
|
16
|
+
mean = find_mean
|
17
|
+
std = calculate_std(mean)
|
18
|
+
|
19
|
+
@max.each_with_index do |n, i|
|
20
|
+
@max[i] = n + (std[i] * @std)
|
21
|
+
end
|
22
|
+
|
23
|
+
@min.each_with_index do |n, i|
|
24
|
+
@min[i] = n - (std[i] * @std)
|
25
|
+
end
|
26
|
+
|
10
27
|
[@min, @max]
|
11
28
|
end
|
12
29
|
end
|
13
30
|
|
14
31
|
private
|
32
|
+
|
33
|
+
def calculate_std(mean)
|
34
|
+
std = []
|
35
|
+
|
36
|
+
@cat_data.each do |set|
|
37
|
+
var = 0.0
|
38
|
+
set.each_with_index do |n, index|
|
39
|
+
var += ((n - mean[index]) ** 2)
|
40
|
+
end
|
41
|
+
std << Math.sqrt(var)
|
42
|
+
end
|
43
|
+
|
44
|
+
std
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_mean
|
48
|
+
mean = []
|
49
|
+
|
50
|
+
@cat_data.each do |i|
|
51
|
+
sum = i.inject(0.0) {|sum, n| sum += n}
|
52
|
+
mean << sum / i.size
|
53
|
+
end
|
54
|
+
mean
|
55
|
+
end
|
15
56
|
|
16
57
|
def find_max
|
17
58
|
@max = []
|
@@ -36,6 +77,7 @@ class Normalizer
|
|
36
77
|
def find_max_in_set(index)
|
37
78
|
max = 0.0
|
38
79
|
@data.each do |set|
|
80
|
+
@cat_data[index] << set[index]
|
39
81
|
max = set[index] > max ? set[index] : max
|
40
82
|
end
|
41
83
|
max
|
@@ -66,10 +108,29 @@ class Normalizer
|
|
66
108
|
normalized[index] = (n.to_f - @min[index]) / @ranges[index]
|
67
109
|
end
|
68
110
|
|
69
|
-
normalized
|
111
|
+
@breaks_boundary = normalized.any? {|x| x > 1 || x < 0}
|
112
|
+
|
113
|
+
round_to_boundaries(normalized)
|
114
|
+
end
|
115
|
+
|
116
|
+
def breaks_boundary?
|
117
|
+
@breaks_boundary
|
70
118
|
end
|
71
119
|
|
72
120
|
private
|
121
|
+
|
122
|
+
# Data shouldn't go over/under the boundary
|
123
|
+
def round_to_boundaries(normalized_data)
|
124
|
+
normalized_data.map do |x|
|
125
|
+
if x > 1
|
126
|
+
1
|
127
|
+
elsif x < 0
|
128
|
+
0
|
129
|
+
else
|
130
|
+
x
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
73
134
|
|
74
135
|
def calculate_ranges
|
75
136
|
range = []
|
data/normalizer.gemspec
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{normalizer}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2009-10
|
12
|
+
s.date = %q{2009-11-10}
|
13
13
|
s.description = %q{Tool for normalizing data}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -51,3 +51,4 @@ Gem::Specification.new do |s|
|
|
51
51
|
s.add_dependency(%q<rspec>, [">= 0"])
|
52
52
|
end
|
53
53
|
end
|
54
|
+
|
data/spec/normalizer_spec.rb
CHANGED
@@ -1,15 +1,36 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "Normalizer" do
|
4
|
-
|
4
|
+
|
5
5
|
describe "Normalization" do
|
6
6
|
it "should be 0.5" do
|
7
7
|
a = Normalizer.new(:min => [0], :max => [10])
|
8
8
|
results = a.normalize([5])
|
9
9
|
results[0].should == 0.5
|
10
10
|
end
|
11
|
+
|
12
|
+
describe "Going over/under max/min" do
|
13
|
+
before(:all) do
|
14
|
+
@a = Normalizer.new(:min => [0], :max => [10])
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should return true" do
|
18
|
+
results = @a.normalize([20])
|
19
|
+
@a.breaks_boundary?.should be_true
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return false" do
|
23
|
+
results = @a.normalize([2])
|
24
|
+
@a.breaks_boundary?.should be_false
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should still only return data between 0 and 1" do
|
28
|
+
results = @a.normalize([20])
|
29
|
+
results[0].should == 1
|
30
|
+
end
|
31
|
+
end
|
11
32
|
end
|
12
|
-
|
33
|
+
|
13
34
|
describe "Finding Max and Min" do
|
14
35
|
describe "With 0 Standard deviation" do
|
15
36
|
before(:all) do
|
@@ -24,7 +45,23 @@ describe "Normalizer" do
|
|
24
45
|
it "should return 0,1,2,3,4 for min" do
|
25
46
|
@a[0].should == [0, 1, 2, 3, 4]
|
26
47
|
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe "With 3 Standard deviation" do
|
51
|
+
before(:all) do
|
52
|
+
data = [[0, 0, 0, 0, 0], [10, 10, 10, 10, 10]]
|
53
|
+
@a = Normalizer.find_min_and_max(data, :std => 3)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should return std for max" do
|
57
|
+
std = (Math.sqrt(50) * 3) + 10.0
|
58
|
+
@a[1][0].should == std
|
59
|
+
end
|
27
60
|
|
61
|
+
it "should return std for min" do
|
62
|
+
std = 0.0 - (Math.sqrt(50) * 3)
|
63
|
+
@a[0][0].should == std
|
64
|
+
end
|
28
65
|
end
|
29
66
|
end
|
30
67
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-10
|
12
|
+
date: 2009-11-10 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|