daru 0.0.2.3 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +10 -0
- data/README.md +17 -10
- data/Rakefile +5 -0
- data/daru.gemspec +2 -0
- data/lib/daru.rb +1 -1
- data/lib/daru/dataframe.rb +426 -146
- data/lib/daru/dataframe_by_row.rb +15 -0
- data/lib/daru/dataframe_by_vector.rb +15 -0
- data/lib/daru/index.rb +83 -0
- data/lib/daru/io.rb +30 -0
- data/lib/daru/monkeys.rb +18 -10
- data/lib/daru/vector.rb +178 -47
- data/lib/version.rb +1 -1
- data/spec/dataframe_spec.rb +550 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/index_spec.rb +54 -0
- data/spec/io_spec.rb +49 -0
- data/spec/monkeys_spec.rb +6 -0
- data/spec/spec_helper.rb +10 -1
- data/spec/vector_spec.rb +155 -0
- metadata +47 -10
- data/spec/jruby/dataframe_spec.rb +0 -1
- data/spec/jruby/vector_spec.rb +0 -20
- data/spec/mri/dataframe_spec.rb +0 -139
- data/spec/mri/vector_spec.rb +0 -104
data/lib/daru/index.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
module Daru
|
2
|
+
class Index
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
# needs to iterate over keys sorted by their values. Happens right now by
|
6
|
+
# virtue of ordered Hashes (ruby).
|
7
|
+
def each(&block)
|
8
|
+
@relation_hash.each_key(&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :relation_hash
|
12
|
+
|
13
|
+
attr_reader :size
|
14
|
+
|
15
|
+
attr_reader :index_class
|
16
|
+
|
17
|
+
def initialize index
|
18
|
+
@relation_hash = {}
|
19
|
+
|
20
|
+
index = 0 if index.nil?
|
21
|
+
index = Array.new(index) { |i| i} if index.is_a? Integer
|
22
|
+
|
23
|
+
index.each_with_index do |n, idx|
|
24
|
+
n = n.to_sym unless n.is_a?(Integer)
|
25
|
+
|
26
|
+
@relation_hash[n] = idx
|
27
|
+
end
|
28
|
+
@relation_hash.freeze
|
29
|
+
|
30
|
+
@size = @relation_hash.size
|
31
|
+
|
32
|
+
if index[0].is_a?(Integer)
|
33
|
+
@index_class = Integer
|
34
|
+
else
|
35
|
+
@index_class = Symbol
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def ==(other)
|
40
|
+
return false if other.size != @size
|
41
|
+
|
42
|
+
@relation_hash.keys == other.to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
def [](key)
|
46
|
+
@relation_hash[key]
|
47
|
+
end
|
48
|
+
|
49
|
+
def +(other)
|
50
|
+
if other.respond_to? :relation_hash #another index object
|
51
|
+
(@relation_hash.keys + other.relation_hash.keys).uniq.to_index
|
52
|
+
elsif other.is_a?(Symbol) or other.is_a?(Integer)
|
53
|
+
(@relation_hash.keys << other).uniq.to_index
|
54
|
+
else
|
55
|
+
(@relation_hash.keys + other).uniq.to_index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_a
|
60
|
+
@relation_hash.keys
|
61
|
+
end
|
62
|
+
|
63
|
+
def key(value)
|
64
|
+
@relation_hash.key value
|
65
|
+
end
|
66
|
+
|
67
|
+
def re_index new_index
|
68
|
+
new_index.to_index
|
69
|
+
end
|
70
|
+
|
71
|
+
def include? index
|
72
|
+
@relation_hash.has_key? index
|
73
|
+
end
|
74
|
+
|
75
|
+
def dup
|
76
|
+
Daru::Index.new @relation_hash.keys
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_index
|
80
|
+
self
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/daru/io.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Daru
|
2
|
+
module IO
|
3
|
+
class << self
|
4
|
+
def from_csv path, opts={}
|
5
|
+
opts[:col_sep] ||= ','
|
6
|
+
opts[:headers] ||= true
|
7
|
+
opts[:converters] ||= :numeric
|
8
|
+
opts[:header_converters] ||= :symbol
|
9
|
+
|
10
|
+
csv = CSV.open(path, 'r', opts)
|
11
|
+
|
12
|
+
yield csv if block_given?
|
13
|
+
|
14
|
+
first = true
|
15
|
+
df = nil
|
16
|
+
|
17
|
+
csv.each_with_index do |row, index|
|
18
|
+
if first
|
19
|
+
df = Daru::DataFrame.new({}, csv.headers)
|
20
|
+
first = false
|
21
|
+
end
|
22
|
+
|
23
|
+
df.row[index] = row.fields
|
24
|
+
end
|
25
|
+
|
26
|
+
df
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -1,38 +1,46 @@
|
|
1
1
|
class Array
|
2
|
-
def daru_vector name=nil
|
3
|
-
Daru::Vector.new self,
|
2
|
+
def daru_vector name=nil, index=nil
|
3
|
+
Daru::Vector.new name, self, index
|
4
4
|
end
|
5
5
|
|
6
6
|
alias_method :dv, :daru_vector
|
7
|
+
|
8
|
+
def to_index
|
9
|
+
Daru::Index.new self
|
10
|
+
end
|
7
11
|
end
|
8
12
|
|
9
13
|
class Range
|
10
|
-
def daru_vector name=nil
|
11
|
-
Daru::Vector.new self,
|
14
|
+
def daru_vector name=nil, index=nil
|
15
|
+
Daru::Vector.new name, self, index
|
12
16
|
end
|
13
17
|
|
14
18
|
alias_method :dv, :daru_vector
|
19
|
+
|
20
|
+
def to_index
|
21
|
+
Daru::Index.new self.to_a
|
22
|
+
end
|
15
23
|
end
|
16
24
|
|
17
25
|
class Hash
|
18
|
-
def daru_vector
|
19
|
-
Daru::Vector.new self.values[0], self.keys[0]
|
26
|
+
def daru_vector index=nil
|
27
|
+
Daru::Vector.new self.values[0], self.keys[0], index
|
20
28
|
end
|
21
29
|
|
22
30
|
alias_method :dv, :daru_vector
|
23
31
|
end
|
24
32
|
|
25
33
|
class NMatrix
|
26
|
-
def daru_vector name=nil
|
27
|
-
Daru::Vector.new self
|
34
|
+
def daru_vector name=nil, index=nil
|
35
|
+
Daru::Vector.new name, self, index
|
28
36
|
end
|
29
37
|
|
30
38
|
alias_method :dv, :daru_vector
|
31
39
|
end
|
32
40
|
|
33
41
|
class MDArray
|
34
|
-
def daru_vector name=nil
|
35
|
-
Daru::Vector.new self,
|
42
|
+
def daru_vector name=nil, index=nil
|
43
|
+
Daru::Vector.new name, self, index
|
36
44
|
end
|
37
45
|
|
38
46
|
alias_method :dv, :daru_vector
|
data/lib/daru/vector.rb
CHANGED
@@ -6,96 +6,227 @@ module Daru
|
|
6
6
|
@vector.each(&block)
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
attr_reader
|
9
|
+
attr_reader :name
|
10
|
+
attr_reader :index
|
11
|
+
attr_reader :size
|
12
|
+
|
13
|
+
# Pass it name, source and index
|
14
|
+
def initialize *args
|
15
|
+
name = args.shift
|
16
|
+
source = args.shift || []
|
17
|
+
index = args.shift
|
18
|
+
|
19
|
+
set_name name
|
20
|
+
|
21
|
+
@vector =
|
22
|
+
case source
|
23
|
+
when Array
|
24
|
+
source.dup
|
25
|
+
when Range, Matrix
|
26
|
+
source.to_a.dup
|
27
|
+
else # NMatrix or MDArray
|
28
|
+
source.dup
|
29
|
+
end
|
12
30
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
31
|
+
if index.nil?
|
32
|
+
@index = Daru::Index.new @vector.size
|
33
|
+
else
|
34
|
+
@index = index.to_index
|
35
|
+
end
|
36
|
+
# TODO: Will need work for NMatrix/MDArray
|
37
|
+
if @index.size >= @vector.size
|
38
|
+
(@index.size - @vector.size).times { @vector << nil }
|
18
39
|
else
|
19
|
-
|
40
|
+
raise IndexError, "Expected index size >= vector size"
|
41
|
+
end
|
42
|
+
|
43
|
+
set_size
|
44
|
+
end
|
20
45
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
46
|
+
def [](index, *indexes)
|
47
|
+
if indexes.empty?
|
48
|
+
if @index.include? index
|
49
|
+
@vector[@index[index]]
|
50
|
+
elsif index.is_a?(Numeric)
|
51
|
+
@vector[index]
|
25
52
|
else
|
26
|
-
|
53
|
+
raise IndexError, "Specified index #{index} does not exist."
|
27
54
|
end
|
55
|
+
else
|
56
|
+
indexes.unshift index
|
28
57
|
|
29
|
-
@
|
58
|
+
Daru::Vector.new @name, indexes.map { |index| @vector[@index[index]] }, indexes
|
30
59
|
end
|
31
60
|
end
|
32
61
|
|
33
|
-
def [](index)
|
34
|
-
@
|
62
|
+
def []=(index, value)
|
63
|
+
if @index.include? index
|
64
|
+
@vector[@index[index]] = value
|
65
|
+
else
|
66
|
+
@vector[index] = value
|
67
|
+
end
|
68
|
+
|
69
|
+
set_size
|
35
70
|
end
|
36
71
|
|
37
|
-
|
38
|
-
|
72
|
+
# Two vectors are equal if the have the exact same index values corresponding
|
73
|
+
# with the exact same elements. Name is ignored.
|
74
|
+
def == other
|
75
|
+
@index == other.index and @size == other.size and
|
76
|
+
@index.all? do |index|
|
77
|
+
self[index] == other[index]
|
78
|
+
end
|
39
79
|
end
|
40
80
|
|
41
|
-
def
|
42
|
-
|
81
|
+
def << element
|
82
|
+
concat element
|
43
83
|
end
|
44
84
|
|
45
|
-
def
|
46
|
-
@
|
85
|
+
def concat element, index=nil
|
86
|
+
raise IndexError, "Expected new unique index" if @index.include? index
|
87
|
+
|
88
|
+
if index.nil? and @index.index_class == Integer
|
89
|
+
@index = Daru::Index.new @size+1
|
90
|
+
index = @size
|
91
|
+
else
|
92
|
+
begin
|
93
|
+
@index = @index.re_index(@index + index)
|
94
|
+
rescue Exception => e
|
95
|
+
raise e, "Expected valid index."
|
96
|
+
end
|
97
|
+
end
|
47
98
|
|
48
99
|
@size += 1
|
49
|
-
end
|
50
100
|
|
51
|
-
|
52
|
-
self.to_a.to_json
|
101
|
+
@vector[@index[index]] = element
|
53
102
|
end
|
54
103
|
|
55
|
-
def
|
56
|
-
|
104
|
+
def delete element
|
105
|
+
self.delete_at index_of(element)
|
57
106
|
end
|
58
107
|
|
59
|
-
def
|
60
|
-
|
108
|
+
def delete_at index
|
109
|
+
idx = named_index_for index
|
110
|
+
|
111
|
+
@vector.delete_at @index[idx]
|
112
|
+
|
113
|
+
if @index.index_class == Integer
|
114
|
+
@index = Daru::Index.new @size-1
|
115
|
+
else
|
116
|
+
@index = (@index.to_a - [idx]).to_index
|
117
|
+
end
|
118
|
+
|
119
|
+
set_size
|
61
120
|
end
|
62
121
|
|
63
|
-
|
122
|
+
def index_of element
|
123
|
+
@index.key @vector.index(element) #calling Array#index
|
124
|
+
end
|
64
125
|
|
65
|
-
def
|
66
|
-
|
126
|
+
def to_hash
|
127
|
+
@index.inject({}) do |hsh, index|
|
128
|
+
hsh[index] = self[index]
|
129
|
+
hsh
|
130
|
+
end
|
67
131
|
end
|
68
132
|
|
69
|
-
def
|
70
|
-
|
71
|
-
@vector.compact!
|
72
|
-
@size -= 1
|
133
|
+
def to_json *args
|
134
|
+
self.to_hash.to_json
|
73
135
|
end
|
74
136
|
|
75
137
|
def to_html threshold=15
|
76
|
-
|
138
|
+
name = @name || 'nil'
|
139
|
+
|
140
|
+
html = '<table>' + '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
77
141
|
|
78
|
-
@
|
79
|
-
|
80
|
-
|
81
|
-
|
142
|
+
@index.each_with_index do |index, num|
|
143
|
+
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
144
|
+
|
145
|
+
if num > threshold
|
146
|
+
html += '<tr><td>...</td><td>...</td></tr>'
|
147
|
+
break
|
148
|
+
end
|
82
149
|
end
|
83
150
|
|
84
151
|
html += '</table>'
|
152
|
+
|
153
|
+
html
|
154
|
+
end
|
155
|
+
|
156
|
+
def to_s
|
157
|
+
to_html
|
158
|
+
end
|
159
|
+
|
160
|
+
def inspect spacing=10, threshold=15
|
161
|
+
longest = [@index.to_a.map(&:to_s).map(&:size).max,
|
162
|
+
@vector .map(&:to_s).map(&:size).max].max
|
163
|
+
|
164
|
+
content = ""
|
165
|
+
longest = spacing if longest > spacing
|
166
|
+
name = @name || 'nil'
|
167
|
+
formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
|
168
|
+
|
169
|
+
content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " + name.to_s + " @size = " + size.to_s + " >"
|
170
|
+
|
171
|
+
content += sprintf formatter, "", name
|
172
|
+
@index.each_with_index do |index, num|
|
173
|
+
content += sprintf formatter, index.to_s, self[index]
|
174
|
+
|
175
|
+
if num > threshold
|
176
|
+
content += sprintf formatter, '...', '...'
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
content += "\n"
|
182
|
+
|
183
|
+
content
|
184
|
+
end
|
185
|
+
|
186
|
+
def compact!
|
187
|
+
# TODO: Compact and also take care of indexes
|
188
|
+
# @vector.compact!
|
189
|
+
# set_size
|
190
|
+
end
|
191
|
+
|
192
|
+
def rename new_name
|
193
|
+
@name = new_name.to_sym
|
85
194
|
end
|
86
195
|
|
87
|
-
def dup
|
88
|
-
Daru::Vector.new @vector.dup, @
|
196
|
+
def dup
|
197
|
+
Daru::Vector.new @name, @vector.dup, @index.dup
|
89
198
|
end
|
90
199
|
|
91
|
-
def daru_vector
|
200
|
+
def daru_vector *name
|
92
201
|
self
|
93
202
|
end
|
94
203
|
|
95
204
|
alias_method :dv, :daru_vector
|
96
205
|
|
97
|
-
|
98
|
-
|
206
|
+
private
|
207
|
+
|
208
|
+
def named_index_for index
|
209
|
+
if @index.include? index
|
210
|
+
index
|
211
|
+
elsif @index.key index
|
212
|
+
@index.key index
|
213
|
+
else
|
214
|
+
raise IndexError, "Specified index #{index} does not exist."
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def set_size
|
219
|
+
@size = @vector.size
|
220
|
+
end
|
221
|
+
|
222
|
+
def set_name name
|
223
|
+
if name.is_a?(Numeric)
|
224
|
+
@name = name
|
225
|
+
elsif name # anything but Numeric or nil
|
226
|
+
@name = name.to_sym
|
227
|
+
else
|
228
|
+
@name = nil
|
229
|
+
end
|
99
230
|
end
|
100
231
|
end
|
101
232
|
end
|