davidrichards-data_frame 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +64 -0
- data/VERSION.yml +4 -0
- data/lib/data_frame.rb +114 -0
- data/lib/data_frame/callback_array.rb +152 -0
- data/lib/data_frame/transposable_array.rb +22 -0
- data/lib/ext/string.rb +5 -0
- data/lib/ext/symbol.rb +5 -0
- data/spec/data_frame/callback_array_spec.rb +148 -0
- data/spec/data_frame/transposable_array_spec.rb +138 -0
- data/spec/data_frame_spec.rb +98 -0
- data/spec/spec_helper.rb +8 -0
- metadata +96 -0
data/README.rdoc
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
== Data Frame
|
2
|
+
|
3
|
+
This is a general data frame. Load arrays and labels into it, and you will have a very powerful set of tools on your data set.
|
4
|
+
|
5
|
+
==Usage
|
6
|
+
|
7
|
+
df = DataFrame.from_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv')
|
8
|
+
df.labels
|
9
|
+
# => [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
10
|
+
df.dmc
|
11
|
+
# => [26.2, 35.4, 43.7, 33.3, 51.3, 85.3,...]
|
12
|
+
df.dmc.max
|
13
|
+
# => 291.3
|
14
|
+
df.dmc.min
|
15
|
+
# => 1.1
|
16
|
+
df.dmc.mean
|
17
|
+
# => 110.872340425532
|
18
|
+
df.dmc.std
|
19
|
+
# => 64.0464822492543
|
20
|
+
df = DataFrame.new(:list, :of, :things)
|
21
|
+
# => #<DataFrame:0x24ec6e8 @items=[], @labels=[:list, :of, :things]>
|
22
|
+
df.labels
|
23
|
+
# => [:list, :of, :things]
|
24
|
+
df << [1,2,3]
|
25
|
+
# => [[1, 2, 3]]
|
26
|
+
df.import([[2,3,4],[5,6,7]])
|
27
|
+
# => [[2, 3, 4], [5, 6, 7]]
|
28
|
+
df.items
|
29
|
+
# => [[1, 2, 3], [2, 3, 4], [5, 6, 7]]
|
30
|
+
df.list
|
31
|
+
# => [1, 2, 5]
|
32
|
+
df.list.correlation(df.things)
|
33
|
+
# => 1.0
|
34
|
+
df.list
|
35
|
+
# => [1, 2, 5]
|
36
|
+
df.things
|
37
|
+
# => [3, 4, 7]
|
38
|
+
|
39
|
+
There are a few important features to know:
|
40
|
+
|
41
|
+
* DataFrame.from_csv works for a string, a filename, or a URL.
|
42
|
+
* FasterCSV parsing parameters can be passed to DataFrame.from_csv
|
43
|
+
* DataFrame looks for operations first on the column labels, then on the row labels, then on the items table. So don't name things :mean, :standard_deviation, :min, and that sort of thing.
|
44
|
+
* CallbackArray allows you to set a callback anytime an array is tainted or untainted (taint, shift, pop, clear, map!, that sort of thing). This is generally useful and will probably be copied into the Repositories gem.
|
45
|
+
* TransposableArray is a subclass of CallbackArray, demonstrating how to use it. It creates a very simple approach to memoization. It caches the transpose of the table and resets it whenever it is tainted.
|
46
|
+
|
47
|
+
To get your feet wet, you may want to play with data sets found here:
|
48
|
+
|
49
|
+
http://www.liaad.up.pt/~ltorgo/Regression/DataSets.html
|
50
|
+
|
51
|
+
|
52
|
+
==Installation
|
53
|
+
|
54
|
+
sudo gem install davidrichards-data_frame
|
55
|
+
|
56
|
+
=== Dependencies
|
57
|
+
|
58
|
+
* ActiveSupport: sudo gem install active_support
|
59
|
+
* JustEnumerableStats: sudo gem install davidrichards-just_enumerable_stats
|
60
|
+
* FasterCSV: sudo gem install faster_csv
|
61
|
+
|
62
|
+
==COPYRIGHT
|
63
|
+
|
64
|
+
Copyright (c) 2009 David Richards. See LICENSE for details.
|
data/VERSION.yml
ADDED
data/lib/data_frame.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activesupport'
|
3
|
+
require 'just_enumerable_stats'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'fastercsv'
|
6
|
+
|
7
|
+
Dir.glob("#{File.dirname(__FILE__)}/ext/*.rb").each { |file| require file }
|
8
|
+
|
9
|
+
$:.unshift(File.dirname(__FILE__))
|
10
|
+
|
11
|
+
require 'data_frame/callback_array'
|
12
|
+
require 'data_frame/transposable_array'
|
13
|
+
|
14
|
+
# This allows me to have named columns and optionally named rows in a
|
15
|
+
# data frame, to work calculations (usually on the columns), to
|
16
|
+
# transpose the matrix and store the transposed matrix until the object
|
17
|
+
# is tainted.
|
18
|
+
class DataFrame
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
# This is the neatest part of this neat gem.
|
23
|
+
# DataFrame.from_csv can be called in a lot of ways:
|
24
|
+
# DataFrame.from_csv(csv_contents)
|
25
|
+
# DataFrame.from_csv(filename)
|
26
|
+
# DataFrame.from_csv(url)
|
27
|
+
# If you need to define converters for FasterCSV, do it before calling
|
28
|
+
# this method:
|
29
|
+
# FasterCSV::Converters[:special] = lambda{|f| f == 'foo' ? 'bar' : 'foo'}
|
30
|
+
# DataFrame.from_csv('http://example.com/my_special_url.csv', :converters => :special)
|
31
|
+
# This returns bar where 'foo' was found and 'foo' everywhere else.
|
32
|
+
def from_csv(obj, opts={})
|
33
|
+
labels, table = infer_csv_contents(obj)
|
34
|
+
return nil unless labels and table
|
35
|
+
df = new(*labels)
|
36
|
+
df.import(table)
|
37
|
+
df
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
def infer_csv_contents(obj, opts={})
|
42
|
+
contents = File.read(obj) if File.exist?(obj)
|
43
|
+
begin
|
44
|
+
open(obj) {|f| contents = f.read} unless contents
|
45
|
+
rescue
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
contents ||= obj if obj.is_a?(String)
|
49
|
+
return nil unless contents
|
50
|
+
table = FCSV.parse(contents, default_csv_opts.merge(opts))
|
51
|
+
labels = table.shift
|
52
|
+
[labels, table]
|
53
|
+
end
|
54
|
+
|
55
|
+
def default_csv_opts; {:converters => :all}; end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Loads a batch of rows. Expects an array of arrays, else you don't
|
59
|
+
# know what you have.
|
60
|
+
def import(rows)
|
61
|
+
rows.each do |row|
|
62
|
+
self.add_item(row)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# The labels of the data items
|
67
|
+
attr_reader :labels
|
68
|
+
|
69
|
+
# The items stored in the frame
|
70
|
+
attr_reader :items
|
71
|
+
|
72
|
+
def initialize(*labels)
|
73
|
+
@labels = labels.map {|e| e.to_underscore_sym }
|
74
|
+
@items = TransposableArray.new
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_item(item)
|
78
|
+
self.items << item
|
79
|
+
end
|
80
|
+
|
81
|
+
def row_labels
|
82
|
+
@row_labels ||= []
|
83
|
+
end
|
84
|
+
|
85
|
+
def row_labels=(ary)
|
86
|
+
raise ArgumentError, "Row labels must be an array" unless ary.is_a?(Array)
|
87
|
+
@row_labels = ary
|
88
|
+
end
|
89
|
+
|
90
|
+
def render_column(sym)
|
91
|
+
i = @labels.index(sym)
|
92
|
+
return nil unless i
|
93
|
+
@items.transpose[i]
|
94
|
+
end
|
95
|
+
|
96
|
+
def render_row(sym)
|
97
|
+
i = self.row_labels.index(sym)
|
98
|
+
return nil unless i
|
99
|
+
@items[i]
|
100
|
+
end
|
101
|
+
|
102
|
+
def method_missing(sym, *args, &block)
|
103
|
+
if self.labels.include?(sym)
|
104
|
+
render_column(sym)
|
105
|
+
elsif self.row_labels.include?(sym)
|
106
|
+
render_row(sym)
|
107
|
+
elsif @items.respond_to?(sym)
|
108
|
+
@items.send(sym, *args, &block)
|
109
|
+
else
|
110
|
+
super
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# This overloads the tainting methods in array with callbacks. So, I
|
2
|
+
# can block all changes to an array, or broadcast to observers after a
|
3
|
+
# change, or limit the size of an array. It really just opens up the array to one more dimension: change. Before and after change, stack up any activity to block or enhance the experience. There are also callbacks on untaint. The tainting methods actually
|
4
|
+
class CallbackArray < Array
|
5
|
+
|
6
|
+
include ActiveSupport::Callbacks
|
7
|
+
define_callbacks :before_taint, :after_taint, :before_untaint, :after_untaint
|
8
|
+
|
9
|
+
def wrap_call(safe_method, *args)
|
10
|
+
callback_result = run_callbacks(:before_taint)
|
11
|
+
if callback_result
|
12
|
+
result = self.send(safe_method, *args)
|
13
|
+
self.orig_taint
|
14
|
+
run_callbacks(:after_taint)
|
15
|
+
end
|
16
|
+
result
|
17
|
+
end
|
18
|
+
protected :wrap_call
|
19
|
+
|
20
|
+
# Need the original taint for all tainting methods
|
21
|
+
alias :orig_taint :taint
|
22
|
+
def taint
|
23
|
+
callback_result = run_callbacks(:before_taint)
|
24
|
+
if callback_result
|
25
|
+
result = self.orig_taint
|
26
|
+
run_callbacks(:after_taint)
|
27
|
+
end
|
28
|
+
result
|
29
|
+
end
|
30
|
+
|
31
|
+
# No other method needs orig_untaint, so building this in the cleanest
|
32
|
+
# way possible.
|
33
|
+
orig_untaint = instance_method(:untaint)
|
34
|
+
define_method(:untaint) {
|
35
|
+
callback_result = run_callbacks(:before_untaint)
|
36
|
+
if callback_result
|
37
|
+
val = orig_untaint.bind(self).call
|
38
|
+
run_callbacks(:after_untaint)
|
39
|
+
end
|
40
|
+
val
|
41
|
+
}
|
42
|
+
|
43
|
+
alias :nontainting_assign :[]=
|
44
|
+
def []=(index, value)
|
45
|
+
wrap_call(:nontainting_assign, index, value)
|
46
|
+
end
|
47
|
+
|
48
|
+
alias :nontainting_append :<<
|
49
|
+
def <<(value)
|
50
|
+
wrap_call(:nontainting_append, value)
|
51
|
+
end
|
52
|
+
|
53
|
+
alias :nontainting_delete :delete
|
54
|
+
def delete(value)
|
55
|
+
wrap_call(:nontainting_delete, value)
|
56
|
+
end
|
57
|
+
|
58
|
+
alias :nontainting_push :push
|
59
|
+
def push(value)
|
60
|
+
wrap_call(:nontainting_push, value)
|
61
|
+
end
|
62
|
+
|
63
|
+
alias :nontainting_pop :pop
|
64
|
+
def pop
|
65
|
+
wrap_call(:nontainting_pop)
|
66
|
+
end
|
67
|
+
|
68
|
+
alias :nontainting_shift :shift
|
69
|
+
def shift
|
70
|
+
wrap_call(:nontainting_shift)
|
71
|
+
end
|
72
|
+
|
73
|
+
alias :nontainting_unshift :unshift
|
74
|
+
def unshift(value)
|
75
|
+
wrap_call(:nontainting_unshift, value)
|
76
|
+
end
|
77
|
+
|
78
|
+
alias :nontainting_map! :map!
|
79
|
+
def map!(&block)
|
80
|
+
callback_result = run_callbacks(:before_taint)
|
81
|
+
if callback_result
|
82
|
+
result = nontainting_map!(&block)
|
83
|
+
self.orig_taint
|
84
|
+
run_callbacks(:after_taint)
|
85
|
+
end
|
86
|
+
result
|
87
|
+
end
|
88
|
+
|
89
|
+
alias :nontainting_sort! :sort!
|
90
|
+
def sort!(&block)
|
91
|
+
callback_result = run_callbacks(:before_taint)
|
92
|
+
if callback_result
|
93
|
+
result = nontainting_sort!(&block)
|
94
|
+
self.orig_taint
|
95
|
+
run_callbacks(:after_taint)
|
96
|
+
end
|
97
|
+
result
|
98
|
+
end
|
99
|
+
|
100
|
+
alias :nontainting_reverse! :reverse!
|
101
|
+
def reverse!
|
102
|
+
wrap_call(:nontainting_reverse!)
|
103
|
+
end
|
104
|
+
|
105
|
+
alias :nontainting_collect! :collect!
|
106
|
+
def collect!(&block)
|
107
|
+
callback_result = run_callbacks(:before_taint)
|
108
|
+
if callback_result
|
109
|
+
result = nontainting_collect!(&block)
|
110
|
+
self.orig_taint
|
111
|
+
run_callbacks(:after_taint)
|
112
|
+
end
|
113
|
+
result
|
114
|
+
end
|
115
|
+
|
116
|
+
alias :nontainting_compact! :compact!
|
117
|
+
def compact!
|
118
|
+
wrap_call(:nontainting_compact!)
|
119
|
+
end
|
120
|
+
|
121
|
+
alias :nontainting_reject! :reject!
|
122
|
+
def reject!(&block)
|
123
|
+
callback_result = run_callbacks(:before_taint)
|
124
|
+
if callback_result
|
125
|
+
result = nontainting_reject!(&block)
|
126
|
+
self.orig_taint
|
127
|
+
run_callbacks(:after_taint)
|
128
|
+
end
|
129
|
+
result
|
130
|
+
end
|
131
|
+
|
132
|
+
alias :nontainting_slice! :slice!
|
133
|
+
def slice!(*args)
|
134
|
+
wrap_call(:nontainting_slice!, *args)
|
135
|
+
end
|
136
|
+
|
137
|
+
alias :nontainting_flatten! :flatten!
|
138
|
+
def flatten!
|
139
|
+
wrap_call(:nontainting_flatten!)
|
140
|
+
end
|
141
|
+
|
142
|
+
alias :nontainting_uniq! :uniq!
|
143
|
+
def uniq!
|
144
|
+
wrap_call(:nontainting_uniq!)
|
145
|
+
end
|
146
|
+
|
147
|
+
alias :nontainting_clear :clear
|
148
|
+
def clear
|
149
|
+
wrap_call(:nontainting_clear)
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# The only trick in this array is that it's transpose is memoized until
|
2
|
+
# it is tainted. This will reduce computations elegantly.
|
3
|
+
class TransposableArray < CallbackArray
|
4
|
+
|
5
|
+
after_taint :clear_cache
|
6
|
+
|
7
|
+
orig_transpose = instance_method(:transpose)
|
8
|
+
define_method(:transpose) {
|
9
|
+
@transpose ||= orig_transpose.bind(self).call
|
10
|
+
}
|
11
|
+
|
12
|
+
# For debugging and testing purposes, it just feels dirty to always ask
|
13
|
+
# for @ta.send(:instance_variable_get, :@transpose)
|
14
|
+
def cache
|
15
|
+
@transpose
|
16
|
+
end
|
17
|
+
|
18
|
+
def clear_cache
|
19
|
+
@transpose = nil
|
20
|
+
end
|
21
|
+
protected :clear_cache
|
22
|
+
end
|
data/lib/ext/string.rb
ADDED
data/lib/ext/symbol.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
# TransposableArray is a thorough test on the after_taint method. Here
|
4
|
+
# I only test the other callbacks.
|
5
|
+
class Register
|
6
|
+
def self.next(meth)
|
7
|
+
@@count ||= {}
|
8
|
+
@@count[meth] ||= 0
|
9
|
+
@@count[meth] += 1
|
10
|
+
end
|
11
|
+
def self.for(meth)
|
12
|
+
@@count ||= {}
|
13
|
+
@@count[meth]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class A < CallbackArray
|
18
|
+
before_taint :register_before_taint
|
19
|
+
def register_before_taint
|
20
|
+
Register.next(:before_taint)
|
21
|
+
end
|
22
|
+
|
23
|
+
before_untaint :register_before_untaint
|
24
|
+
def register_before_untaint
|
25
|
+
Register.next(:before_untaint)
|
26
|
+
end
|
27
|
+
|
28
|
+
after_untaint :register_after_untaint
|
29
|
+
def register_after_untaint
|
30
|
+
Register.next(:after_untaint)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe CallbackArray do
|
35
|
+
before do
|
36
|
+
@a = A.new [1,2,3]
|
37
|
+
end
|
38
|
+
|
39
|
+
context "before_taint" do
|
40
|
+
before do
|
41
|
+
@c = Register.for(:before_taint) || 0
|
42
|
+
end
|
43
|
+
|
44
|
+
after do
|
45
|
+
Register.for(:before_taint).should eql(@c + 1)
|
46
|
+
@a.should be_tainted
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should callback before taint" do
|
50
|
+
@a.taint
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should callback before :[]=" do
|
54
|
+
@a[0] = 2
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should callback before :<<" do
|
58
|
+
@a << 3
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should callback before :delete" do
|
62
|
+
@a.delete(2)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should callback before :push" do
|
66
|
+
@a.push(5)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should callback before :pop" do
|
70
|
+
@a.pop
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should callback before :shift" do
|
74
|
+
@a.shift
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should callback before :unshift" do
|
78
|
+
@a.unshift(6)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should callback before :map!" do
|
82
|
+
@a.map! {|e| e}
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should callback before :sort!" do
|
86
|
+
@a.sort!
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should callback before :reverse!" do
|
90
|
+
@a.reverse!
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should callback before :collect!" do
|
94
|
+
@a.collect! {|e| e}
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should callback before :compact!" do
|
98
|
+
@a.compact!
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should callback before :reject!" do
|
102
|
+
@a.reject! {|e| not e}
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should callback before :slice!" do
|
106
|
+
@a.slice!(1,2)
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should callback before :flatten!" do
|
110
|
+
@a.flatten!
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should callback before :uniq!" do
|
114
|
+
@a.uniq!
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should callback before :clear" do
|
118
|
+
@a.clear
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should not adjust the array in other methods" do
|
125
|
+
@a.at(0)
|
126
|
+
@a.sort
|
127
|
+
@a.uniq
|
128
|
+
@a.find{|e| e}
|
129
|
+
Register.for(:before_taint).should be_nil
|
130
|
+
@a.should_not be_tainted
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should callback before untaint" do
|
134
|
+
c = Register.for(:before_untaint) || 0
|
135
|
+
@a.taint
|
136
|
+
@a.untaint
|
137
|
+
Register.for(:before_untaint).should eql(c + 1)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should callback after untaint" do
|
141
|
+
c = Register.for(:after_untaint) || 0
|
142
|
+
@a.taint
|
143
|
+
@a.untaint
|
144
|
+
Register.for(:after_untaint).should eql(c + 1)
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe TransposableArray do
|
4
|
+
before do
|
5
|
+
@ta = TransposableArray.new [[1,2,3],[4,5,6],[7,8,9]]
|
6
|
+
@t = [[1,4,7],[2,5,8],[3,6,9]]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to transpose itself" do
|
10
|
+
@ta.transpose.should eql(@t)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should cache the transpose" do
|
14
|
+
@ta.cache.should be_nil
|
15
|
+
@ta.transpose
|
16
|
+
@ta.cache.should eql(@t)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should clear the cache on taint" do
|
20
|
+
@count = nil
|
21
|
+
@ta.transpose
|
22
|
+
@ta.taint
|
23
|
+
@ta.cache.should be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should clear the cache on []=" do
|
27
|
+
@ta.transpose
|
28
|
+
@ta[0] = 1
|
29
|
+
@ta.cache.should be_nil
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should clear the cache on <<" do
|
33
|
+
@ta.transpose
|
34
|
+
@ta << 1
|
35
|
+
@ta.cache.should be_nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should clear the cache on delete" do
|
39
|
+
@ta.transpose
|
40
|
+
@ta.delete(0)
|
41
|
+
@ta.cache.should be_nil
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should clear the cache on push" do
|
45
|
+
@ta.transpose
|
46
|
+
@ta.push(1)
|
47
|
+
@ta.cache.should be_nil
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should clear the cache on pop" do
|
51
|
+
@ta.transpose
|
52
|
+
@ta.pop
|
53
|
+
@ta.cache.should be_nil
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should clear the cache on shift" do
|
57
|
+
@ta.transpose
|
58
|
+
@ta.shift
|
59
|
+
@ta.cache.should be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should clear the cache on unshift" do
|
63
|
+
@ta.transpose
|
64
|
+
@ta.unshift(1)
|
65
|
+
@ta.cache.should be_nil
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should clear the cache on map!" do
|
69
|
+
@ta.transpose
|
70
|
+
@ta.map!{ |e| e }
|
71
|
+
@ta.cache.should be_nil
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should clear the cache on sort!" do
|
75
|
+
@ta.transpose
|
76
|
+
@ta.sort!
|
77
|
+
@ta.cache.should be_nil
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should clear the cache on reverse!" do
|
81
|
+
@ta.transpose
|
82
|
+
@ta.reverse!
|
83
|
+
@ta.cache.should be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should clear the cache on collect!" do
|
87
|
+
@ta.transpose
|
88
|
+
@ta.collect! {|e| e}
|
89
|
+
@ta.cache.should be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should clear the cache on compact!" do
|
93
|
+
@ta.transpose
|
94
|
+
@ta.compact!
|
95
|
+
@ta.cache.should be_nil
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should clear the cache on reject!" do
|
99
|
+
@ta.transpose
|
100
|
+
@ta.reject! {|e| e}
|
101
|
+
@ta.cache.should be_nil
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should clear the cache on slice!" do
|
105
|
+
@ta.transpose
|
106
|
+
@ta.slice!(1,2)
|
107
|
+
@ta.cache.should be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
it "should clear the cache on flatten!" do
|
111
|
+
@ta.transpose
|
112
|
+
@ta.flatten!
|
113
|
+
@ta.cache.should be_nil
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should clear the cache on uniq!" do
|
117
|
+
@ta.transpose
|
118
|
+
@ta.uniq!
|
119
|
+
@ta.cache.should be_nil
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should clear the cache on clear" do
|
123
|
+
@ta.transpose
|
124
|
+
@ta.clear
|
125
|
+
@ta.cache.should be_nil
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should not adjust the array in other methods" do
|
129
|
+
@ta.transpose
|
130
|
+
@ta.at(0)
|
131
|
+
@ta.sort
|
132
|
+
@ta.uniq
|
133
|
+
@ta.find{|e| e}
|
134
|
+
@ta.cache.should eql(@t)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@labels = [:these, :are, :the, :labels]
|
7
|
+
@df = DataFrame.new(*@labels)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should initialize with labels" do
|
11
|
+
df = DataFrame.new(*@labels)
|
12
|
+
df.labels.should eql(@labels)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should initialize with an empty items list" do
|
16
|
+
@df.items.should be_is_a(TransposableArray)
|
17
|
+
@df.items.should be_empty
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be able to add an item" do
|
21
|
+
item = [1,2,3,4]
|
22
|
+
@df.add_item(item)
|
23
|
+
@df.items.should eql([item])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should use just_enumerable_stats" do
|
27
|
+
[1,2,3].std.should eql(1.0)
|
28
|
+
lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
|
29
|
+
end
|
30
|
+
|
31
|
+
context "column and row operations" do
|
32
|
+
before do
|
33
|
+
@df.add_item([1,2,3,4])
|
34
|
+
@df.add_item([5,6,7,8])
|
35
|
+
@df.add_item([9,10,11,12])
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have a method for every label, the column in the data frame" do
|
39
|
+
@df.these.should eql([1,5,9])
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should make columns easily computable" do
|
43
|
+
@df.these.std.should eql([1,5,9].std)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should defer unknown methods to the items in the data frame" do
|
47
|
+
@df[0].should eql([1,2,3,4])
|
48
|
+
@df << [13,14,15,16]
|
49
|
+
@df.last.should eql([13,14,15,16])
|
50
|
+
@df.map { |e| e.sum }.should eql([10,26,42,58])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should allow optional row labels" do
|
54
|
+
@df.row_labels.should eql([])
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should have a setter for row labels" do
|
58
|
+
@df.row_labels = [:other, :things, :here]
|
59
|
+
@df.row_labels.should eql([:other, :things, :here])
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should be able to access rows by their labels" do
|
63
|
+
@df.row_labels = [:other, :things, :here]
|
64
|
+
@df.here.should eql([9,10,11,12])
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should make rows easily computable" do
|
68
|
+
@df.row_labels = [:other, :things, :here]
|
69
|
+
@df.here.std.should be_close(1.414, 0.001)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should be able to import more than one row at a time" do
|
74
|
+
@df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
|
75
|
+
@df.row_labels = [:twos, :threes, :fours]
|
76
|
+
@df.twos.should eql([2,2,2,2])
|
77
|
+
@df.threes.should eql([3,3,3,3])
|
78
|
+
@df.fours.should eql([4,4,4,4])
|
79
|
+
end
|
80
|
+
|
81
|
+
context "csv" do
|
82
|
+
it "should compute easily from csv" do
|
83
|
+
contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
|
84
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
85
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
86
|
+
}
|
87
|
+
labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
88
|
+
|
89
|
+
@df = DataFrame.from_csv(contents)
|
90
|
+
@df.labels.should eql(labels)
|
91
|
+
@df.x.should eql([7,7])
|
92
|
+
@df.area.should eql([0,0])
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: davidrichards-data_frame
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Richards
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-23 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: active_support
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: davidrichards-just_enumerable_stats
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: faster_csv
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: Data Frames with memoized transpose
|
46
|
+
email: davidlamontrichards@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- README.rdoc
|
55
|
+
- VERSION.yml
|
56
|
+
- lib/data_frame
|
57
|
+
- lib/data_frame/callback_array.rb
|
58
|
+
- lib/data_frame/transposable_array.rb
|
59
|
+
- lib/data_frame.rb
|
60
|
+
- lib/ext
|
61
|
+
- lib/ext/string.rb
|
62
|
+
- lib/ext/symbol.rb
|
63
|
+
- spec/data_frame
|
64
|
+
- spec/data_frame/callback_array_spec.rb
|
65
|
+
- spec/data_frame/transposable_array_spec.rb
|
66
|
+
- spec/data_frame_spec.rb
|
67
|
+
- spec/spec_helper.rb
|
68
|
+
has_rdoc: true
|
69
|
+
homepage: http://github.com/davidrichards/data_frame
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options:
|
72
|
+
- --inline-source
|
73
|
+
- --charset=UTF-8
|
74
|
+
require_paths:
|
75
|
+
- lib
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: "0"
|
81
|
+
version:
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: "0"
|
87
|
+
version:
|
88
|
+
requirements: []
|
89
|
+
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 1.2.0
|
92
|
+
signing_key:
|
93
|
+
specification_version: 2
|
94
|
+
summary: Data Frames with memoized transpose
|
95
|
+
test_files: []
|
96
|
+
|