data_prep 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,28 @@
1
+ = Data Prep
2
+
3
+ Data Prep is a gem that makes extracting: training, testing and validation data nice and simple.
4
+
5
+ All you have to do is give it your data and Data Prep will try to spread your classes as evenly as possible in a 50:25:25 (training, testing, validation) split.
6
+
7
+ == Example
8
+
9
+ ham = [[1, 0.5]] * 20
10
+ spam = [[0, 0.2]] * 20
11
+ data_prep = DataPrep.new(1 => ham, 0 => spam)
12
+
13
+ data_prep.testing.size
14
+ #=> 10
15
+ data_prep.validation.size
16
+ #=> 10
17
+ data_prep.training.size
18
+ #=> 20
19
+
20
+ # To make training easy, Data Prep places the data's class
21
+ # along side the piece of data
22
+
23
+ data_prep.training[0].inspect
24
+ #=> [[1, 0.5], [1]] == [[data], [output]]
25
+
26
+ == Copyright
27
+
28
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "data_prep"
8
+ gem.summary = %Q{A tool to help prepare your data.}
9
+ gem.description = %Q{A tool to help prepare your data.}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/Data-Prep"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ if File.exist?('VERSION')
40
+ version = File.read('VERSION')
41
+ else
42
+ version = ""
43
+ end
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "data_prep #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
data/data_prep.gemspec ADDED
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{data_prep}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2009-10-13}
13
+ s.description = %q{A tool to help prepare your data.}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "data_prep.gemspec",
27
+ "lib/data_prep.rb",
28
+ "lib/ext/array.rb",
29
+ "spec/array_spec.rb",
30
+ "spec/data_prep_spec.rb",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/reddavis/Data-Prep}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.5}
37
+ s.summary = %q{A tool to help prepare your data.}
38
+ s.test_files = [
39
+ "spec/array_spec.rb",
40
+ "spec/data_prep_spec.rb",
41
+ "spec/spec_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<rspec>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<rspec>, [">= 0"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 0"])
55
+ end
56
+ end
data/lib/data_prep.rb ADDED
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/ext/array'
2
+
3
+ class DataPrep
4
+
5
+ attr_reader :training, :testing, :validation
6
+
7
+ def initialize(data={})
8
+ inject_outputs(data)
9
+ position_data
10
+ end
11
+
12
+ private
13
+
14
+ def inject_outputs(data)
15
+ @data = []
16
+ data.each_pair do |output, data|
17
+ @data << data.map! {|x| [x, [output]]}
18
+ end
19
+ end
20
+
21
+ def position_data
22
+ @training, @testing, @validation = [], [], []
23
+ @data.map! {|x| x.shuffle}
24
+
25
+ until @data.completely_empty?
26
+ 4.times do |n|
27
+ @data.each do |d|
28
+ if n == 2
29
+ @testing << d.pop
30
+ elsif n == 3
31
+ @validation << d.pop
32
+ else
33
+ @training << d.pop
34
+ end
35
+ end #@data.each
36
+ end #4.times
37
+ end
38
+ @validation.compact!; @testing.compact!; @training.compact!
39
+ end
40
+
41
+ end
data/lib/ext/array.rb ADDED
@@ -0,0 +1,25 @@
1
+ class Array
2
+ def shuffle
3
+ self.sort_by {rand}
4
+ end
5
+
6
+ def completely_empty?
7
+ empty? if dimensions == 1
8
+ empty = true
9
+ each do |n|
10
+ unless n.is_a?(Array)
11
+ empty = false
12
+ else
13
+ empty = false unless n.empty?
14
+ end
15
+ end
16
+ empty
17
+ end
18
+
19
+ # Taken from David Richards Data Frame
20
+ # (I'll put the url up once I get internet)
21
+ def dimensions(n=0)
22
+ n += 1
23
+ self.first.is_a?(Array) ? self.first.dimensions(n) : n
24
+ end
25
+ end
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Array do
4
+
5
+ describe "Completly Empty" do
6
+
7
+ describe "Two Dimensions" do
8
+ it "should return true" do
9
+ [[], [], []].completely_empty?.should be_true
10
+ end
11
+
12
+ it "should return false" do
13
+ [[], [3], []].completely_empty?.should be_false
14
+ end
15
+
16
+ it "should return false with weird array" do
17
+ [[], 3].completely_empty?.should be_false
18
+ end
19
+ end
20
+
21
+ describe "One Dimensions" do
22
+ it "should return true" do
23
+ [].completely_empty?.should be_true
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ describe "Dimensions" do
30
+ it "should be able to determine its dimensions" do
31
+ [1,2,3].dimensions.should eql(1)
32
+ [[1,2,3], [1,2,3]].dimensions.should eql(2)
33
+ [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
34
+ end
35
+
36
+ it "should depend on the first element to determine dimensions" do
37
+ [1, [1,2]].dimensions.should eql(1)
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DataPrep" do
4
+
5
+ describe "Creating the data sets" do
6
+ before do
7
+ ham = [[1, 0.5]] * 20
8
+ spam = [[0, 0.2]] * 20
9
+ @data_prep = DataPrep.new(0 => ham, 1 => spam)
10
+ end
11
+
12
+ it "should spread the data evenly 50:25:25" do
13
+ @data_prep.testing.size.should == 10
14
+ @data_prep.validation.size.should == 10
15
+ @data_prep.training.size.should == 20
16
+ end
17
+
18
+ it "should inject the outputs" do
19
+ @data_prep.training[0].size.should == 2
20
+ end
21
+
22
+ it "should hold 2 arrays" do
23
+ @data_prep.training[0][0].class.should == Array
24
+ @data_prep.training[0][1].class.should == Array
25
+ end
26
+
27
+ end
28
+
29
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'data_prep'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_prep
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-13 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: A tool to help prepare your data.
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - data_prep.gemspec
42
+ - lib/data_prep.rb
43
+ - lib/ext/array.rb
44
+ - spec/array_spec.rb
45
+ - spec/data_prep_spec.rb
46
+ - spec/spec_helper.rb
47
+ has_rdoc: true
48
+ homepage: http://github.com/reddavis/Data-Prep
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: A tool to help prepare your data.
75
+ test_files:
76
+ - spec/array_spec.rb
77
+ - spec/data_prep_spec.rb
78
+ - spec/spec_helper.rb