daru 0.0.2.3 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +10 -0
- data/README.md +17 -10
- data/Rakefile +5 -0
- data/daru.gemspec +2 -0
- data/lib/daru.rb +1 -1
- data/lib/daru/dataframe.rb +426 -146
- data/lib/daru/dataframe_by_row.rb +15 -0
- data/lib/daru/dataframe_by_vector.rb +15 -0
- data/lib/daru/index.rb +83 -0
- data/lib/daru/io.rb +30 -0
- data/lib/daru/monkeys.rb +18 -10
- data/lib/daru/vector.rb +178 -47
- data/lib/version.rb +1 -1
- data/spec/dataframe_spec.rb +550 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/index_spec.rb +54 -0
- data/spec/io_spec.rb +49 -0
- data/spec/monkeys_spec.rb +6 -0
- data/spec/spec_helper.rb +10 -1
- data/spec/vector_spec.rb +155 -0
- metadata +47 -10
- data/spec/jruby/dataframe_spec.rb +0 -1
- data/spec/jruby/vector_spec.rb +0 -20
- data/spec/mri/dataframe_spec.rb +0 -139
- data/spec/mri/vector_spec.rb +0 -104
data/lib/daru/index.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
module Daru
|
2
|
+
class Index
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
# needs to iterate over keys sorted by their values. Happens right now by
|
6
|
+
# virtue of ordered Hashes (ruby).
|
7
|
+
def each(&block)
|
8
|
+
@relation_hash.each_key(&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :relation_hash
|
12
|
+
|
13
|
+
attr_reader :size
|
14
|
+
|
15
|
+
attr_reader :index_class
|
16
|
+
|
17
|
+
def initialize index
|
18
|
+
@relation_hash = {}
|
19
|
+
|
20
|
+
index = 0 if index.nil?
|
21
|
+
index = Array.new(index) { |i| i} if index.is_a? Integer
|
22
|
+
|
23
|
+
index.each_with_index do |n, idx|
|
24
|
+
n = n.to_sym unless n.is_a?(Integer)
|
25
|
+
|
26
|
+
@relation_hash[n] = idx
|
27
|
+
end
|
28
|
+
@relation_hash.freeze
|
29
|
+
|
30
|
+
@size = @relation_hash.size
|
31
|
+
|
32
|
+
if index[0].is_a?(Integer)
|
33
|
+
@index_class = Integer
|
34
|
+
else
|
35
|
+
@index_class = Symbol
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def ==(other)
|
40
|
+
return false if other.size != @size
|
41
|
+
|
42
|
+
@relation_hash.keys == other.to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
def [](key)
|
46
|
+
@relation_hash[key]
|
47
|
+
end
|
48
|
+
|
49
|
+
def +(other)
|
50
|
+
if other.respond_to? :relation_hash #another index object
|
51
|
+
(@relation_hash.keys + other.relation_hash.keys).uniq.to_index
|
52
|
+
elsif other.is_a?(Symbol) or other.is_a?(Integer)
|
53
|
+
(@relation_hash.keys << other).uniq.to_index
|
54
|
+
else
|
55
|
+
(@relation_hash.keys + other).uniq.to_index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_a
|
60
|
+
@relation_hash.keys
|
61
|
+
end
|
62
|
+
|
63
|
+
def key(value)
|
64
|
+
@relation_hash.key value
|
65
|
+
end
|
66
|
+
|
67
|
+
def re_index new_index
|
68
|
+
new_index.to_index
|
69
|
+
end
|
70
|
+
|
71
|
+
def include? index
|
72
|
+
@relation_hash.has_key? index
|
73
|
+
end
|
74
|
+
|
75
|
+
def dup
|
76
|
+
Daru::Index.new @relation_hash.keys
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_index
|
80
|
+
self
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/daru/io.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Daru
|
2
|
+
module IO
|
3
|
+
class << self
|
4
|
+
def from_csv path, opts={}
|
5
|
+
opts[:col_sep] ||= ','
|
6
|
+
opts[:headers] ||= true
|
7
|
+
opts[:converters] ||= :numeric
|
8
|
+
opts[:header_converters] ||= :symbol
|
9
|
+
|
10
|
+
csv = CSV.open(path, 'r', opts)
|
11
|
+
|
12
|
+
yield csv if block_given?
|
13
|
+
|
14
|
+
first = true
|
15
|
+
df = nil
|
16
|
+
|
17
|
+
csv.each_with_index do |row, index|
|
18
|
+
if first
|
19
|
+
df = Daru::DataFrame.new({}, csv.headers)
|
20
|
+
first = false
|
21
|
+
end
|
22
|
+
|
23
|
+
df.row[index] = row.fields
|
24
|
+
end
|
25
|
+
|
26
|
+
df
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -1,38 +1,46 @@
|
|
1
1
|
class Array
|
2
|
-
def daru_vector name=nil
|
3
|
-
Daru::Vector.new self,
|
2
|
+
def daru_vector name=nil, index=nil
|
3
|
+
Daru::Vector.new name, self, index
|
4
4
|
end
|
5
5
|
|
6
6
|
alias_method :dv, :daru_vector
|
7
|
+
|
8
|
+
def to_index
|
9
|
+
Daru::Index.new self
|
10
|
+
end
|
7
11
|
end
|
8
12
|
|
9
13
|
class Range
|
10
|
-
def daru_vector name=nil
|
11
|
-
Daru::Vector.new self,
|
14
|
+
def daru_vector name=nil, index=nil
|
15
|
+
Daru::Vector.new name, self, index
|
12
16
|
end
|
13
17
|
|
14
18
|
alias_method :dv, :daru_vector
|
19
|
+
|
20
|
+
def to_index
|
21
|
+
Daru::Index.new self.to_a
|
22
|
+
end
|
15
23
|
end
|
16
24
|
|
17
25
|
class Hash
|
18
|
-
def daru_vector
|
19
|
-
Daru::Vector.new self.values[0], self.keys[0]
|
26
|
+
def daru_vector index=nil
|
27
|
+
Daru::Vector.new self.values[0], self.keys[0], index
|
20
28
|
end
|
21
29
|
|
22
30
|
alias_method :dv, :daru_vector
|
23
31
|
end
|
24
32
|
|
25
33
|
class NMatrix
|
26
|
-
def daru_vector name=nil
|
27
|
-
Daru::Vector.new self
|
34
|
+
def daru_vector name=nil, index=nil
|
35
|
+
Daru::Vector.new name, self, index
|
28
36
|
end
|
29
37
|
|
30
38
|
alias_method :dv, :daru_vector
|
31
39
|
end
|
32
40
|
|
33
41
|
class MDArray
|
34
|
-
def daru_vector name=nil
|
35
|
-
Daru::Vector.new self,
|
42
|
+
def daru_vector name=nil, index=nil
|
43
|
+
Daru::Vector.new name, self, index
|
36
44
|
end
|
37
45
|
|
38
46
|
alias_method :dv, :daru_vector
|
data/lib/daru/vector.rb
CHANGED
@@ -6,96 +6,227 @@ module Daru
|
|
6
6
|
@vector.each(&block)
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
attr_reader
|
9
|
+
attr_reader :name
|
10
|
+
attr_reader :index
|
11
|
+
attr_reader :size
|
12
|
+
|
13
|
+
# Pass it name, source and index
|
14
|
+
def initialize *args
|
15
|
+
name = args.shift
|
16
|
+
source = args.shift || []
|
17
|
+
index = args.shift
|
18
|
+
|
19
|
+
set_name name
|
20
|
+
|
21
|
+
@vector =
|
22
|
+
case source
|
23
|
+
when Array
|
24
|
+
source.dup
|
25
|
+
when Range, Matrix
|
26
|
+
source.to_a.dup
|
27
|
+
else # NMatrix or MDArray
|
28
|
+
source.dup
|
29
|
+
end
|
12
30
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
31
|
+
if index.nil?
|
32
|
+
@index = Daru::Index.new @vector.size
|
33
|
+
else
|
34
|
+
@index = index.to_index
|
35
|
+
end
|
36
|
+
# TODO: Will need work for NMatrix/MDArray
|
37
|
+
if @index.size >= @vector.size
|
38
|
+
(@index.size - @vector.size).times { @vector << nil }
|
18
39
|
else
|
19
|
-
|
40
|
+
raise IndexError, "Expected index size >= vector size"
|
41
|
+
end
|
42
|
+
|
43
|
+
set_size
|
44
|
+
end
|
20
45
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
46
|
+
def [](index, *indexes)
|
47
|
+
if indexes.empty?
|
48
|
+
if @index.include? index
|
49
|
+
@vector[@index[index]]
|
50
|
+
elsif index.is_a?(Numeric)
|
51
|
+
@vector[index]
|
25
52
|
else
|
26
|
-
|
53
|
+
raise IndexError, "Specified index #{index} does not exist."
|
27
54
|
end
|
55
|
+
else
|
56
|
+
indexes.unshift index
|
28
57
|
|
29
|
-
@
|
58
|
+
Daru::Vector.new @name, indexes.map { |index| @vector[@index[index]] }, indexes
|
30
59
|
end
|
31
60
|
end
|
32
61
|
|
33
|
-
def [](index)
|
34
|
-
@
|
62
|
+
def []=(index, value)
|
63
|
+
if @index.include? index
|
64
|
+
@vector[@index[index]] = value
|
65
|
+
else
|
66
|
+
@vector[index] = value
|
67
|
+
end
|
68
|
+
|
69
|
+
set_size
|
35
70
|
end
|
36
71
|
|
37
|
-
|
38
|
-
|
72
|
+
# Two vectors are equal if the have the exact same index values corresponding
|
73
|
+
# with the exact same elements. Name is ignored.
|
74
|
+
def == other
|
75
|
+
@index == other.index and @size == other.size and
|
76
|
+
@index.all? do |index|
|
77
|
+
self[index] == other[index]
|
78
|
+
end
|
39
79
|
end
|
40
80
|
|
41
|
-
def
|
42
|
-
|
81
|
+
def << element
|
82
|
+
concat element
|
43
83
|
end
|
44
84
|
|
45
|
-
def
|
46
|
-
@
|
85
|
+
def concat element, index=nil
|
86
|
+
raise IndexError, "Expected new unique index" if @index.include? index
|
87
|
+
|
88
|
+
if index.nil? and @index.index_class == Integer
|
89
|
+
@index = Daru::Index.new @size+1
|
90
|
+
index = @size
|
91
|
+
else
|
92
|
+
begin
|
93
|
+
@index = @index.re_index(@index + index)
|
94
|
+
rescue Exception => e
|
95
|
+
raise e, "Expected valid index."
|
96
|
+
end
|
97
|
+
end
|
47
98
|
|
48
99
|
@size += 1
|
49
|
-
end
|
50
100
|
|
51
|
-
|
52
|
-
self.to_a.to_json
|
101
|
+
@vector[@index[index]] = element
|
53
102
|
end
|
54
103
|
|
55
|
-
def
|
56
|
-
|
104
|
+
def delete element
|
105
|
+
self.delete_at index_of(element)
|
57
106
|
end
|
58
107
|
|
59
|
-
def
|
60
|
-
|
108
|
+
def delete_at index
|
109
|
+
idx = named_index_for index
|
110
|
+
|
111
|
+
@vector.delete_at @index[idx]
|
112
|
+
|
113
|
+
if @index.index_class == Integer
|
114
|
+
@index = Daru::Index.new @size-1
|
115
|
+
else
|
116
|
+
@index = (@index.to_a - [idx]).to_index
|
117
|
+
end
|
118
|
+
|
119
|
+
set_size
|
61
120
|
end
|
62
121
|
|
63
|
-
|
122
|
+
def index_of element
|
123
|
+
@index.key @vector.index(element) #calling Array#index
|
124
|
+
end
|
64
125
|
|
65
|
-
def
|
66
|
-
|
126
|
+
def to_hash
|
127
|
+
@index.inject({}) do |hsh, index|
|
128
|
+
hsh[index] = self[index]
|
129
|
+
hsh
|
130
|
+
end
|
67
131
|
end
|
68
132
|
|
69
|
-
def
|
70
|
-
|
71
|
-
@vector.compact!
|
72
|
-
@size -= 1
|
133
|
+
def to_json *args
|
134
|
+
self.to_hash.to_json
|
73
135
|
end
|
74
136
|
|
75
137
|
def to_html threshold=15
|
76
|
-
|
138
|
+
name = @name || 'nil'
|
139
|
+
|
140
|
+
html = '<table>' + '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
77
141
|
|
78
|
-
@
|
79
|
-
|
80
|
-
|
81
|
-
|
142
|
+
@index.each_with_index do |index, num|
|
143
|
+
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
144
|
+
|
145
|
+
if num > threshold
|
146
|
+
html += '<tr><td>...</td><td>...</td></tr>'
|
147
|
+
break
|
148
|
+
end
|
82
149
|
end
|
83
150
|
|
84
151
|
html += '</table>'
|
152
|
+
|
153
|
+
html
|
154
|
+
end
|
155
|
+
|
156
|
+
def to_s
|
157
|
+
to_html
|
158
|
+
end
|
159
|
+
|
160
|
+
def inspect spacing=10, threshold=15
|
161
|
+
longest = [@index.to_a.map(&:to_s).map(&:size).max,
|
162
|
+
@vector .map(&:to_s).map(&:size).max].max
|
163
|
+
|
164
|
+
content = ""
|
165
|
+
longest = spacing if longest > spacing
|
166
|
+
name = @name || 'nil'
|
167
|
+
formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
|
168
|
+
|
169
|
+
content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " + name.to_s + " @size = " + size.to_s + " >"
|
170
|
+
|
171
|
+
content += sprintf formatter, "", name
|
172
|
+
@index.each_with_index do |index, num|
|
173
|
+
content += sprintf formatter, index.to_s, self[index]
|
174
|
+
|
175
|
+
if num > threshold
|
176
|
+
content += sprintf formatter, '...', '...'
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
content += "\n"
|
182
|
+
|
183
|
+
content
|
184
|
+
end
|
185
|
+
|
186
|
+
def compact!
|
187
|
+
# TODO: Compact and also take care of indexes
|
188
|
+
# @vector.compact!
|
189
|
+
# set_size
|
190
|
+
end
|
191
|
+
|
192
|
+
def rename new_name
|
193
|
+
@name = new_name.to_sym
|
85
194
|
end
|
86
195
|
|
87
|
-
def dup
|
88
|
-
Daru::Vector.new @vector.dup, @
|
196
|
+
def dup
|
197
|
+
Daru::Vector.new @name, @vector.dup, @index.dup
|
89
198
|
end
|
90
199
|
|
91
|
-
def daru_vector
|
200
|
+
def daru_vector *name
|
92
201
|
self
|
93
202
|
end
|
94
203
|
|
95
204
|
alias_method :dv, :daru_vector
|
96
205
|
|
97
|
-
|
98
|
-
|
206
|
+
private
|
207
|
+
|
208
|
+
def named_index_for index
|
209
|
+
if @index.include? index
|
210
|
+
index
|
211
|
+
elsif @index.key index
|
212
|
+
@index.key index
|
213
|
+
else
|
214
|
+
raise IndexError, "Specified index #{index} does not exist."
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def set_size
|
219
|
+
@size = @vector.size
|
220
|
+
end
|
221
|
+
|
222
|
+
def set_name name
|
223
|
+
if name.is_a?(Numeric)
|
224
|
+
@name = name
|
225
|
+
elsif name # anything but Numeric or nil
|
226
|
+
@name = name.to_sym
|
227
|
+
else
|
228
|
+
@name = nil
|
229
|
+
end
|
99
230
|
end
|
100
231
|
end
|
101
232
|
end
|