daru 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,170 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
module MergeHelper
|
4
|
+
class << self
|
5
|
+
def replace_keys_if_duplicates hash, matcher
|
6
|
+
matched = nil
|
7
|
+
hash.keys.each { |d|
|
8
|
+
if matcher.match(Regexp.new(d.to_s))
|
9
|
+
matched = d
|
10
|
+
break
|
11
|
+
end
|
12
|
+
}
|
13
|
+
|
14
|
+
if matched
|
15
|
+
hash[matcher] = hash[matched]
|
16
|
+
hash.delete matched
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def resolve_duplicates df_hash1, df_hash2, on
|
21
|
+
hk = df_hash1.keys + df_hash2.keys - on
|
22
|
+
recoded = hk.recode_repeated.map(&:to_sym)
|
23
|
+
diff = (recoded - hk).sort
|
24
|
+
|
25
|
+
diff.each_slice(2) do |a|
|
26
|
+
replace_keys_if_duplicates df_hash1, a[0]
|
27
|
+
replace_keys_if_duplicates df_hash2, a[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def hashify df
|
32
|
+
hsh = df.to_hash
|
33
|
+
hsh.each { |k,v| hsh[k] = v.to_a }
|
34
|
+
hsh
|
35
|
+
end
|
36
|
+
|
37
|
+
def inner_join df1, df2, df_hash1, df_hash2, on
|
38
|
+
joined_hash = {}
|
39
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
40
|
+
joined_hash[k] = []
|
41
|
+
end
|
42
|
+
|
43
|
+
(0...df1.size).each do |id1|
|
44
|
+
(0...df2.size).each do |id2|
|
45
|
+
if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
|
46
|
+
joined_hash.each do |k,v|
|
47
|
+
v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
54
|
+
end
|
55
|
+
|
56
|
+
def full_outer_join df1, df2, df_hash1, df_hash2, on
|
57
|
+
left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
|
58
|
+
right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
|
59
|
+
|
60
|
+
Daru::DataFrame.rows(
|
61
|
+
(left.values.transpose | right.values.transpose), order: left.keys)
|
62
|
+
end
|
63
|
+
|
64
|
+
def left_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
|
65
|
+
joined_hash = {}
|
66
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
67
|
+
joined_hash[k] = []
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
(0...df1.size).each do |id1|
|
72
|
+
joined = false
|
73
|
+
(0...df2.size).each do |id2|
|
74
|
+
if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
|
75
|
+
joined = true
|
76
|
+
joined_hash.each do |k,v|
|
77
|
+
v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
unless joined
|
83
|
+
df_hash1.keys.each do |k|
|
84
|
+
joined_hash[k] << df_hash1[k][id1]
|
85
|
+
end
|
86
|
+
|
87
|
+
(joined_hash.keys - df_hash1.keys).each do |k|
|
88
|
+
joined_hash[k] << nil
|
89
|
+
end
|
90
|
+
joined = false
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
return joined_hash if as_hash
|
95
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
96
|
+
end
|
97
|
+
|
98
|
+
def right_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
|
99
|
+
joined_hash = {}
|
100
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
101
|
+
joined_hash[k] = []
|
102
|
+
end
|
103
|
+
|
104
|
+
(0...df2.size).each do |id1|
|
105
|
+
joined = false
|
106
|
+
(0...df1.size).each do |id2|
|
107
|
+
if on.all? { |n| df_hash2[n][id1] == df_hash1[n][id2] }
|
108
|
+
joined = true
|
109
|
+
joined_hash.each do |k,v|
|
110
|
+
v << (df_hash2.has_key?(k) ? df_hash2[k][id1] : df_hash1[k][id2])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
unless joined
|
116
|
+
df_hash2.keys.each do |k|
|
117
|
+
joined_hash[k] << df_hash2[k][id1]
|
118
|
+
end
|
119
|
+
|
120
|
+
(joined_hash.keys - df_hash2.keys).each do |k|
|
121
|
+
joined_hash[k] << nil
|
122
|
+
end
|
123
|
+
joined = false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
return joined_hash if as_hash
|
128
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
129
|
+
end
|
130
|
+
|
131
|
+
def verify_dataframes df_hash1, df_hash2, on
|
132
|
+
raise ArgumentError,
|
133
|
+
"All fields in :on must be present in self" if !on.all? { |e| df_hash1[e] }
|
134
|
+
raise ArgumentError,
|
135
|
+
"All fields in :on must be present in other DF" if !on.all? { |e| df_hash2[e] }
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# Private module containing methods for join, merge, concat operations on
|
140
|
+
# dataframes and vectors.
|
141
|
+
# @private
|
142
|
+
module Merge
|
143
|
+
class << self
|
144
|
+
def join df1, df2, opts={}
|
145
|
+
helper = MergeHelper
|
146
|
+
|
147
|
+
df_hash1 = helper.hashify df1
|
148
|
+
df_hash2 = helper.hashify df2
|
149
|
+
on = opts[:on]
|
150
|
+
|
151
|
+
helper.verify_dataframes df_hash1, df_hash2, on
|
152
|
+
helper.resolve_duplicates df_hash1, df_hash2, on
|
153
|
+
|
154
|
+
case opts[:how]
|
155
|
+
when :inner
|
156
|
+
helper.inner_join df1, df2, df_hash1, df_hash2, on
|
157
|
+
when :outer
|
158
|
+
helper.full_outer_join df1, df2, df_hash1, df_hash2, on
|
159
|
+
when :left
|
160
|
+
helper.left_outer_join df1, df2, df_hash1, df_hash2, on
|
161
|
+
when :right
|
162
|
+
helper.right_outer_join df1, df2, df_hash1, df_hash2, on
|
163
|
+
else
|
164
|
+
raise ArgumentError, "Unrecognized option in :how => #{opts[:how]}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
module Query
|
4
|
+
class BoolArray
|
5
|
+
attr_reader :barry
|
6
|
+
|
7
|
+
def initialize barry
|
8
|
+
@barry = barry
|
9
|
+
end
|
10
|
+
|
11
|
+
def & other
|
12
|
+
new_bool = []
|
13
|
+
other_barry = other.barry
|
14
|
+
@barry.each_with_index do |b, i|
|
15
|
+
new_bool << (b and other_barry[i])
|
16
|
+
end
|
17
|
+
|
18
|
+
BoolArray.new(new_bool)
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :and :&
|
22
|
+
|
23
|
+
def | other
|
24
|
+
new_bool = []
|
25
|
+
other_barry = other.barry
|
26
|
+
@barry.each_with_index do |b, i|
|
27
|
+
new_bool << (b or other_barry[i])
|
28
|
+
end
|
29
|
+
|
30
|
+
BoolArray.new(new_bool)
|
31
|
+
end
|
32
|
+
|
33
|
+
alias :or :|
|
34
|
+
|
35
|
+
def !
|
36
|
+
BoolArray.new(@barry.map { |b| !b })
|
37
|
+
end
|
38
|
+
|
39
|
+
def == other
|
40
|
+
@barry == other.barry
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_a
|
44
|
+
@barry
|
45
|
+
end
|
46
|
+
|
47
|
+
def inspect
|
48
|
+
"(#{self.class}:#{self.object_id} bool_arry=#{@barry})"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class << self
|
53
|
+
def apply_scalar_operator operator, data, other
|
54
|
+
arry = data.inject([]) do |memo,d|
|
55
|
+
memo << (d.send(operator, other) ? true : false)
|
56
|
+
memo
|
57
|
+
end
|
58
|
+
|
59
|
+
BoolArray.new(arry)
|
60
|
+
end
|
61
|
+
|
62
|
+
def apply_vector_operator operator, vector, other
|
63
|
+
bool_arry = []
|
64
|
+
vector.each_with_index do |d, i|
|
65
|
+
bool_arry << (d.send(operator, other[i]) ? true : false)
|
66
|
+
end
|
67
|
+
|
68
|
+
BoolArray.new(bool_arry)
|
69
|
+
end
|
70
|
+
|
71
|
+
def df_where data_frame, bool_array
|
72
|
+
vecs = data_frame.map do |vector|
|
73
|
+
vector.where(bool_array)
|
74
|
+
end
|
75
|
+
|
76
|
+
Daru::DataFrame.new(
|
77
|
+
vecs, order: data_frame.vectors, index: vecs[0].index, clone: false)
|
78
|
+
end
|
79
|
+
|
80
|
+
def vector_where data, index, bool_array, dtype
|
81
|
+
new_data = []
|
82
|
+
new_index = []
|
83
|
+
bool_array.to_a.each_with_index do |b, i|
|
84
|
+
if b
|
85
|
+
new_data << data[i]
|
86
|
+
new_index << index[i]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
Daru::Vector.new(new_data, index: new_index, dtype: dtype)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
$:.unshift File.dirname(__FILE__)
|
2
2
|
|
3
3
|
require 'accessors/dataframe_by_row.rb'
|
4
|
-
require 'accessors/dataframe_by_vector.rb'
|
5
4
|
require 'maths/arithmetic/dataframe.rb'
|
6
5
|
require 'maths/statistics/dataframe.rb'
|
7
6
|
require 'plotting/dataframe.rb'
|
@@ -226,11 +225,11 @@ module Daru
|
|
226
225
|
@data = []
|
227
226
|
|
228
227
|
temp_name = opts[:name]
|
229
|
-
@name = temp_name
|
228
|
+
@name = temp_name || SecureRandom.uuid
|
230
229
|
|
231
230
|
if source.empty?
|
232
|
-
@vectors =
|
233
|
-
@index =
|
231
|
+
@vectors = try_create_index vectors
|
232
|
+
@index = try_create_index index
|
234
233
|
create_empty_vectors
|
235
234
|
else
|
236
235
|
case source
|
@@ -239,8 +238,8 @@ module Daru
|
|
239
238
|
raise ArgumentError, "Number of vectors (#{vectors.size}) should \
|
240
239
|
equal order size (#{source.size})" if source.size != vectors.size
|
241
240
|
|
242
|
-
@index =
|
243
|
-
@vectors =
|
241
|
+
@index = try_create_index(index || source[0].size)
|
242
|
+
@vectors = try_create_index(vectors)
|
244
243
|
|
245
244
|
@vectors.each_with_index do |vec,idx|
|
246
245
|
@data << Daru::Vector.new(source[idx], index: @index)
|
@@ -253,9 +252,10 @@ module Daru
|
|
253
252
|
initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
|
254
253
|
else # array of hashes
|
255
254
|
if vectors.nil?
|
256
|
-
@vectors = Daru::Index.new source[0].keys
|
255
|
+
@vectors = Daru::Index.new source[0].keys
|
257
256
|
else
|
258
|
-
@vectors = Daru::Index.new
|
257
|
+
@vectors = Daru::Index.new(
|
258
|
+
(vectors + (source[0].keys - vectors)).uniq)
|
259
259
|
end
|
260
260
|
@index = Daru::Index.new(index || source.size)
|
261
261
|
|
@@ -272,8 +272,9 @@ module Daru
|
|
272
272
|
create_vectors_index_with vectors, source
|
273
273
|
if all_daru_vectors_in_source? source
|
274
274
|
if !index.nil?
|
275
|
-
@index =
|
275
|
+
@index = try_create_index index
|
276
276
|
elsif all_vectors_have_equal_indexes?(source)
|
277
|
+
vectors_have_same_index = true
|
277
278
|
@index = source.values[0].index.dup
|
278
279
|
else
|
279
280
|
all_indexes = []
|
@@ -289,17 +290,28 @@ module Daru
|
|
289
290
|
|
290
291
|
if clone
|
291
292
|
@vectors.each do |vector|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
293
|
+
# avoids matching indexes of vectors if all the supplied vectors
|
294
|
+
# have the same index.
|
295
|
+
if vectors_have_same_index
|
296
|
+
v = source[vector].dup
|
297
|
+
else
|
298
|
+
v = Daru::Vector.new([], name: vector, index: @index)
|
299
|
+
|
300
|
+
@index.each do |idx|
|
301
|
+
if source[vector].index.include? idx
|
302
|
+
v[idx] = source[vector][idx]
|
303
|
+
else
|
304
|
+
v[idx] = nil
|
305
|
+
end
|
306
|
+
end
|
296
307
|
end
|
308
|
+
@data << v
|
297
309
|
end
|
298
310
|
else
|
299
311
|
@data.concat source.values
|
300
312
|
end
|
301
313
|
else
|
302
|
-
@index =
|
314
|
+
@index = try_create_index(index || source.values[0].size)
|
303
315
|
|
304
316
|
@vectors.each do |name|
|
305
317
|
@data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
|
@@ -313,6 +325,11 @@ module Daru
|
|
313
325
|
update
|
314
326
|
end
|
315
327
|
|
328
|
+
def vector *args
|
329
|
+
$stderr.puts "#vector has been deprecated in favour of #[]. Please use that."
|
330
|
+
self[*names]
|
331
|
+
end
|
332
|
+
|
316
333
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
317
334
|
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
318
335
|
# rows or vectors. Use df.row[:a] for accessing row with index ':a' or
|
@@ -324,7 +341,6 @@ module Daru
|
|
324
341
|
else
|
325
342
|
axis = :vector
|
326
343
|
end
|
327
|
-
names.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
328
344
|
|
329
345
|
if axis == :vector
|
330
346
|
access_vector *names
|
@@ -349,7 +365,6 @@ module Daru
|
|
349
365
|
|
350
366
|
name = args[0..-2]
|
351
367
|
vector = args[-1]
|
352
|
-
name.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
353
368
|
|
354
369
|
if axis == :vector
|
355
370
|
insert_or_modify_vector name, vector
|
@@ -360,15 +375,6 @@ module Daru
|
|
360
375
|
end
|
361
376
|
end
|
362
377
|
|
363
|
-
# Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
|
364
|
-
#
|
365
|
-
# == Usage
|
366
|
-
# df.vector[:a] # access vector named ':a'
|
367
|
-
# df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
|
368
|
-
def vector
|
369
|
-
Daru::Accessors::DataFrameByVector.new(self)
|
370
|
-
end
|
371
|
-
|
372
378
|
# Access a vector by name.
|
373
379
|
def column name
|
374
380
|
vector[name]
|
@@ -398,24 +404,13 @@ module Daru
|
|
398
404
|
# * +vectors_to_dup+ - An Array specifying the names of Vectors to
|
399
405
|
# be duplicated. Will duplicate the entire DataFrame if not specified.
|
400
406
|
def dup vectors_to_dup=nil
|
401
|
-
vectors_to_dup = @vectors unless vectors_to_dup
|
407
|
+
vectors_to_dup = @vectors.to_a unless vectors_to_dup
|
402
408
|
|
403
|
-
|
404
|
-
|
405
|
-
src
|
406
|
-
vectors_to_dup.each do |vec|
|
407
|
-
src << @data[@vectors[vec]].dup
|
408
|
-
end
|
409
|
-
|
410
|
-
Daru::MultiIndex.new(vectors_to_dup)
|
411
|
-
else
|
412
|
-
src = {}
|
413
|
-
vectors_to_dup.each do |vector|
|
414
|
-
src[vector] = @data[@vectors[vector]].dup
|
415
|
-
end
|
416
|
-
|
417
|
-
Daru::Index.new(vectors_to_dup)
|
409
|
+
src = []
|
410
|
+
vectors_to_dup.each do |vec|
|
411
|
+
src << @data[@vectors[vec]].to_a
|
418
412
|
end
|
413
|
+
new_order = Daru::Index.new(vectors_to_dup)
|
419
414
|
|
420
415
|
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
|
421
416
|
end
|
@@ -465,6 +460,14 @@ module Daru
|
|
465
460
|
(vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
|
466
461
|
end
|
467
462
|
|
463
|
+
# Iterate over each index of the DataFrame.
|
464
|
+
def each_index &block
|
465
|
+
return to_enum(:each_index) unless block_given?
|
466
|
+
|
467
|
+
@index.each(&block)
|
468
|
+
self
|
469
|
+
end
|
470
|
+
|
468
471
|
# Iterate over each vector
|
469
472
|
def each_vector(&block)
|
470
473
|
return to_enum(:each_vector) unless block_given?
|
@@ -608,7 +611,7 @@ module Daru
|
|
608
611
|
#
|
609
612
|
# Recode works similarly to #map, but an important difference between
|
610
613
|
# the two is that recode returns a modified Daru::DataFrame instead
|
611
|
-
# of an Array. For this reason, #
|
614
|
+
# of an Array. For this reason, #recode expects that every run of the
|
612
615
|
# block to return a Daru::Vector.
|
613
616
|
#
|
614
617
|
# Just like map and each, recode also accepts an optional _axis_ argument.
|
@@ -667,7 +670,8 @@ module Daru
|
|
667
670
|
df = self.dup
|
668
671
|
df.each_vector_with_index do |v, i|
|
669
672
|
ret = yield v
|
670
|
-
ret.is_a?(Daru::Vector) or
|
673
|
+
ret.is_a?(Daru::Vector) or
|
674
|
+
raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
|
671
675
|
df[*i] = ret
|
672
676
|
end
|
673
677
|
|
@@ -841,7 +845,7 @@ module Daru
|
|
841
845
|
idx = named_index_for index
|
842
846
|
|
843
847
|
if @index.include? idx
|
844
|
-
@index =
|
848
|
+
@index = Daru::Index.new(@index.to_a - [idx])
|
845
849
|
self.each_vector do |vector|
|
846
850
|
vector.delete_at idx
|
847
851
|
end
|
@@ -1015,7 +1019,7 @@ module Daru
|
|
1015
1019
|
number_of_missing << row.missing_positions.size
|
1016
1020
|
end
|
1017
1021
|
|
1018
|
-
Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
|
1022
|
+
Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
|
1019
1023
|
end
|
1020
1024
|
|
1021
1025
|
# TODO: remove next version
|
@@ -1087,9 +1091,18 @@ module Daru
|
|
1087
1091
|
|
1088
1092
|
# Check if a vector is present
|
1089
1093
|
def has_vector? vector
|
1090
|
-
|
1094
|
+
@vectors.include? vector
|
1091
1095
|
end
|
1092
1096
|
|
1097
|
+
# Works like Array#any?.
|
1098
|
+
#
|
1099
|
+
# @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
|
1100
|
+
# :row. A Daru::Vector object is yielded in the block.
|
1101
|
+
# @example Using any?
|
1102
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
|
1103
|
+
# df.any?(:row) do |row|
|
1104
|
+
# row[:a] < 3 and row[:b] == 'b'
|
1105
|
+
# end #=> true
|
1093
1106
|
def any? axis=:vector, &block
|
1094
1107
|
if axis == :vector or axis == :column
|
1095
1108
|
@data.any?(&block)
|
@@ -1103,6 +1116,15 @@ module Daru
|
|
1103
1116
|
end
|
1104
1117
|
end
|
1105
1118
|
|
1119
|
+
# Works like Array#all?
|
1120
|
+
#
|
1121
|
+
# @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
|
1122
|
+
# :row. A Daru::Vector object is yielded in the block.
|
1123
|
+
# @example Using all?
|
1124
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
|
1125
|
+
# df.all?(:row) do |row|
|
1126
|
+
# row[:a] < 10
|
1127
|
+
# end #=> true
|
1106
1128
|
def all? axis=:vector, &block
|
1107
1129
|
if axis == :vector or axis == :column
|
1108
1130
|
@data.all?(&block)
|
@@ -1183,38 +1205,126 @@ module Daru
|
|
1183
1205
|
# # ["foo", "one", 3]=>[6],
|
1184
1206
|
# # ["foo", "three", 8]=>[7],
|
1185
1207
|
# # ["foo", "two", 3]=>[2, 4]}
|
1186
|
-
def group_by vectors
|
1187
|
-
vectors
|
1208
|
+
def group_by *vectors
|
1209
|
+
vectors.flatten!
|
1188
1210
|
vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
|
1189
1211
|
has_vector?(v) }
|
1190
1212
|
|
1191
1213
|
Daru::Core::GroupBy.new(self, vectors)
|
1192
1214
|
end
|
1193
1215
|
|
1194
|
-
def reindex_vectors
|
1195
|
-
raise ArgumentError, "
|
1196
|
-
|
1216
|
+
def reindex_vectors new_vectors
|
1217
|
+
raise ArgumentError, "Must pass the new index of type Index or its "\
|
1218
|
+
"subclasses, not #{new_index.class}" unless new_vectors.kind_of?(Daru::Index)
|
1219
|
+
|
1220
|
+
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
1221
|
+
new_vectors.each do |vec|
|
1222
|
+
if @vectors.include?(vec)
|
1223
|
+
cl[vec] = self[vec]
|
1224
|
+
else
|
1225
|
+
cl[vec] = [nil]*nrows
|
1226
|
+
end
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
cl
|
1230
|
+
end
|
1231
|
+
|
1232
|
+
# Concatenate another DataFrame along corresponding columns.
|
1233
|
+
# Very premature implementation. Use with caution.
|
1234
|
+
def concat other_df
|
1235
|
+
vectors = []
|
1236
|
+
@vectors.each do |v|
|
1237
|
+
vectors << self[v].to_a.concat(other_df[v].to_a)
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
Daru::DataFrame.new(vectors, order: @vectors)
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
# Set a particular column as the new DF
|
1244
|
+
def set_index new_index, opts={}
|
1245
|
+
raise ArgumentError, "All elements in new index must be unique." if
|
1246
|
+
@size != self[new_index].uniq.size
|
1247
|
+
|
1248
|
+
self.index = Daru::Index.new(self[new_index].to_a)
|
1249
|
+
self.delete_vector(new_index) unless opts[:keep]
|
1197
1250
|
|
1198
|
-
|
1251
|
+
self
|
1199
1252
|
end
|
1200
1253
|
|
1201
|
-
# Change the index of the DataFrame and
|
1254
|
+
# Change the index of the DataFrame and preserve the labels of the previous
|
1255
|
+
# indexing. New index can be Daru::Index or any of its subclasses.
|
1202
1256
|
#
|
1203
|
-
# @param [
|
1204
|
-
|
1205
|
-
|
1257
|
+
# @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
|
1258
|
+
# @example Reindexing DataFrame
|
1259
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
|
1260
|
+
# index: ['a','b','c','d'])
|
1261
|
+
# #=>
|
1262
|
+
# ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
1263
|
+
# # a b
|
1264
|
+
# # a 1 11
|
1265
|
+
# # b 2 22
|
1266
|
+
# # c 3 33
|
1267
|
+
# # d 4 44
|
1268
|
+
# df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
|
1269
|
+
# #=>
|
1270
|
+
# ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
1271
|
+
# # a b
|
1272
|
+
# # b 2 22
|
1273
|
+
# # 0 nil nil
|
1274
|
+
# # a 1 11
|
1275
|
+
# # g nil nil
|
1276
|
+
def reindex new_index
|
1277
|
+
raise ArgumentError, "Must pass the new index of type Index or its "\
|
1278
|
+
"subclasses, not #{new_index.class}" unless new_index.kind_of?(Daru::Index)
|
1206
1279
|
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1280
|
+
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
1281
|
+
new_index.each do |idx|
|
1282
|
+
if @index.include?(idx)
|
1283
|
+
cl.row[idx] = self.row[idx]
|
1284
|
+
else
|
1285
|
+
cl.row[idx] = [nil]*ncols
|
1286
|
+
end
|
1210
1287
|
end
|
1211
1288
|
|
1289
|
+
cl
|
1290
|
+
end
|
1291
|
+
|
1292
|
+
# Reassign index with a new index of type Daru::Index or any of its subclasses.
|
1293
|
+
#
|
1294
|
+
# @param [Daru::Index] idx New index object on which the rows of the dataframe
|
1295
|
+
# are to be indexed.
|
1296
|
+
# @example Reassgining index of a DataFrame
|
1297
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
|
1298
|
+
# df.index.to_a #=> [0,1,2,3]
|
1299
|
+
#
|
1300
|
+
# df.index = Daru::Index.new(['a','b','c','d'])
|
1301
|
+
# df.index.to_a #=> ['a','b','c','d']
|
1302
|
+
# df.row['a'].to_a #=> [1,11]
|
1303
|
+
def index= idx
|
1304
|
+
@data.each { |vec| vec.index = idx}
|
1305
|
+
@index = idx
|
1306
|
+
|
1212
1307
|
self
|
1213
1308
|
end
|
1214
1309
|
|
1215
|
-
#
|
1216
|
-
|
1217
|
-
|
1310
|
+
# Reassign vectors with a new index of type Daru::Index or any of its subclasses.
|
1311
|
+
#
|
1312
|
+
# @param [Daru::Index] idx The new index object on which the vectors are to
|
1313
|
+
# be indexed. Must of the same size as ncols.
|
1314
|
+
# @example Reassigning vectors of a DataFrame
|
1315
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
|
1316
|
+
# df.vectors.to_a #=> [:a, :b, :c]
|
1317
|
+
#
|
1318
|
+
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
|
1319
|
+
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
1320
|
+
def vectors= idx
|
1321
|
+
raise ArgumentError, "Can only reindex with Index and its subclasses" unless
|
1322
|
+
index.kind_of?(Daru::Index)
|
1323
|
+
raise ArgumentError, "Specified index length #{idx.size} not equal to"\
|
1324
|
+
"dataframe size #{ncols}" if idx.size != ncols
|
1325
|
+
|
1326
|
+
@vectors = idx
|
1327
|
+
self
|
1218
1328
|
end
|
1219
1329
|
|
1220
1330
|
# Return the indexes of all the numeric vectors. Will include vectors with nils
|
@@ -1231,8 +1341,8 @@ module Daru
|
|
1231
1341
|
def numeric_vector_names
|
1232
1342
|
numerics = []
|
1233
1343
|
|
1234
|
-
|
1235
|
-
numerics <<
|
1344
|
+
@vectors.each do |v|
|
1345
|
+
numerics << v if (self[v].type == :numeric)
|
1236
1346
|
end
|
1237
1347
|
numerics
|
1238
1348
|
end
|
@@ -1248,7 +1358,7 @@ module Daru
|
|
1248
1358
|
arr
|
1249
1359
|
end
|
1250
1360
|
|
1251
|
-
order =
|
1361
|
+
order = Index.new(nv)
|
1252
1362
|
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
|
1253
1363
|
end
|
1254
1364
|
|
@@ -1302,9 +1412,11 @@ module Daru
|
|
1302
1412
|
|
1303
1413
|
opts[:by] = create_logic_blocks vector_order, opts[:by]
|
1304
1414
|
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
|
1305
|
-
|
1306
|
-
send(opts[:type], vector_order,
|
1307
|
-
|
1415
|
+
idx = @index.to_a
|
1416
|
+
send(opts[:type], vector_order, idx, opts[:by], opts[:ascending])
|
1417
|
+
self.index = Daru::Index.new(idx)
|
1418
|
+
|
1419
|
+
self
|
1308
1420
|
end
|
1309
1421
|
|
1310
1422
|
# Non-destructive version of #sort!
|
@@ -1347,7 +1459,8 @@ module Daru
|
|
1347
1459
|
# # [:bar] 18 26
|
1348
1460
|
# # [:foo] 10 12
|
1349
1461
|
def pivot_table opts={}
|
1350
|
-
raise ArgumentError,
|
1462
|
+
raise ArgumentError,
|
1463
|
+
"Specify grouping index" if !opts[:index] or opts[:index].empty?
|
1351
1464
|
|
1352
1465
|
index = opts[:index]
|
1353
1466
|
vectors = opts[:vectors] || []
|
@@ -1389,18 +1502,20 @@ module Daru
|
|
1389
1502
|
end
|
1390
1503
|
end
|
1391
1504
|
|
1392
|
-
df_index = Daru::MultiIndex.
|
1505
|
+
df_index = Daru::MultiIndex.from_tuples super_hash.keys
|
1393
1506
|
|
1394
1507
|
vector_indexes = []
|
1395
1508
|
super_hash.each_value do |sub_hash|
|
1396
1509
|
vector_indexes.concat sub_hash.keys
|
1397
1510
|
end
|
1398
|
-
|
1511
|
+
|
1512
|
+
df_vectors = Daru::MultiIndex.from_tuples vector_indexes.uniq
|
1399
1513
|
pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
|
1400
1514
|
|
1401
1515
|
super_hash.each do |row_index, sub_h|
|
1402
1516
|
sub_h.each do |vector_index, val|
|
1403
|
-
pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
1517
|
+
# pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
1518
|
+
pivoted_dataframe[vector_index][row_index] = val
|
1404
1519
|
end
|
1405
1520
|
end
|
1406
1521
|
return pivoted_dataframe
|
@@ -1430,47 +1545,33 @@ module Daru
|
|
1430
1545
|
df_new
|
1431
1546
|
end
|
1432
1547
|
|
1433
|
-
# Join 2 DataFrames
|
1434
|
-
#
|
1435
|
-
#
|
1436
|
-
#
|
1437
|
-
#
|
1438
|
-
# @
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
if other_ds_hash[key].nil?
|
1462
|
-
if type == :left
|
1463
|
-
fields_new.each{|field| new_case[field] = nil}
|
1464
|
-
new_ds.add_row(Daru::Vector.new(new_case))
|
1465
|
-
end
|
1466
|
-
else
|
1467
|
-
other_ds_hash[key].each do |new_values|
|
1468
|
-
new_ds.add_row(Daru::Vector.new(new_case.merge(new_values)))
|
1469
|
-
end
|
1470
|
-
end
|
1471
|
-
end
|
1472
|
-
|
1473
|
-
new_ds
|
1548
|
+
# Join 2 DataFrames with SQL style joins. Currently supports inner, left
|
1549
|
+
# outer, right outer and full outer joins.
|
1550
|
+
#
|
1551
|
+
# @param [Daru::DataFrame] other_df Another DataFrame on which the join is
|
1552
|
+
# to be performed.
|
1553
|
+
# @param [Hash] opts Options Hash
|
1554
|
+
# @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
|
1555
|
+
# @option :on [Array] The columns on which the join is to be performed.
|
1556
|
+
# Column names specified here must be common to both DataFrames.
|
1557
|
+
# @return [Daru::DataFrame]
|
1558
|
+
# @example Inner Join
|
1559
|
+
# left = Daru::DataFrame.new({
|
1560
|
+
# :id => [1,2,3,4],
|
1561
|
+
# :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti']
|
1562
|
+
# })
|
1563
|
+
# right = Daru::DataFrame.new({
|
1564
|
+
# :id => [1,2,3,4],
|
1565
|
+
# :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
1566
|
+
# })
|
1567
|
+
# left.join(right, how: :inner, on: [:name])
|
1568
|
+
# #=>
|
1569
|
+
# ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
|
1570
|
+
# # id_1 name id_2
|
1571
|
+
# # 0 1 Pirate 2
|
1572
|
+
# # 1 3 Ninja 4
|
1573
|
+
def join(other_df,opts={})
|
1574
|
+
Daru::Core::Merge.join(self, other_df, opts)
|
1474
1575
|
end
|
1475
1576
|
|
1476
1577
|
|
@@ -1486,7 +1587,7 @@ module Daru
|
|
1486
1587
|
# to new dataset, and fields which responds to second
|
1487
1588
|
# pattern will be added one case for each different %n.
|
1488
1589
|
#
|
1489
|
-
#
|
1590
|
+
# @example
|
1490
1591
|
# cases=[
|
1491
1592
|
# ['1','george','red',10,'blue',20,nil,nil],
|
1492
1593
|
# ['2','fred','green',15,'orange',30,'white',20],
|
@@ -1494,17 +1595,16 @@ module Daru
|
|
1494
1595
|
# ]
|
1495
1596
|
# ds=Daru::DataFrame.rows(cases, order: [:id, :name, :car_color1, :car_value1, :car_color2, :car_value2, :car_color3, :car_value3])
|
1496
1597
|
# ds.one_to_many([:id],'car_%v%n').to_matrix
|
1497
|
-
#
|
1498
|
-
#
|
1499
|
-
#
|
1500
|
-
#
|
1501
|
-
#
|
1502
|
-
#
|
1503
|
-
#
|
1504
|
-
#
|
1598
|
+
# #=> Matrix[
|
1599
|
+
# # ["red", "1", 10],
|
1600
|
+
# # ["blue", "1", 20],
|
1601
|
+
# # ["green", "2", 15],
|
1602
|
+
# # ["orange", "2", 30],
|
1603
|
+
# # ["white", "2", 20]
|
1604
|
+
# # ]
|
1505
1605
|
def one_to_many(parent_fields, pattern)
|
1506
1606
|
re = Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
|
1507
|
-
ds_vars = parent_fields
|
1607
|
+
ds_vars = parent_fields.dup
|
1508
1608
|
vars = []
|
1509
1609
|
max_n = 0
|
1510
1610
|
h = parent_fields.inject({}) { |a,v|
|
@@ -1512,8 +1612,8 @@ module Daru
|
|
1512
1612
|
a
|
1513
1613
|
}
|
1514
1614
|
# Adding _row_id
|
1515
|
-
h[
|
1516
|
-
ds_vars.push(
|
1615
|
+
h['_col_id'] = Daru::Vector.new([])
|
1616
|
+
ds_vars.push('_col_id')
|
1517
1617
|
|
1518
1618
|
@vectors.each do |f|
|
1519
1619
|
if f =~ re
|
@@ -1529,20 +1629,20 @@ module Daru
|
|
1529
1629
|
each_row do |row|
|
1530
1630
|
row_out = {}
|
1531
1631
|
parent_fields.each do |f|
|
1532
|
-
row_out[f]=row[f]
|
1632
|
+
row_out[f] = row[f]
|
1533
1633
|
end
|
1534
1634
|
|
1535
1635
|
max_n.times do |n1|
|
1536
1636
|
n = n1+1
|
1537
1637
|
any_data = false
|
1538
1638
|
vars.each do |v|
|
1539
|
-
data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)
|
1639
|
+
data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
|
1540
1640
|
row_out[v] = data
|
1541
1641
|
any_data = true if !data.nil?
|
1542
1642
|
end
|
1543
1643
|
|
1544
1644
|
if any_data
|
1545
|
-
row_out[
|
1645
|
+
row_out['_col_id'] = n
|
1546
1646
|
ds.add_row(row_out)
|
1547
1647
|
end
|
1548
1648
|
end
|
@@ -1569,14 +1669,14 @@ module Daru
|
|
1569
1669
|
# * table - String specifying name of the table that will created in SQL.
|
1570
1670
|
# * charset - Character set. Default is "UTF8".
|
1571
1671
|
#
|
1572
|
-
#
|
1672
|
+
# @example
|
1573
1673
|
#
|
1574
1674
|
# ds = Daru::DataFrame.new({
|
1575
1675
|
# :id => Daru::Vector.new([1,2,3,4,5]),
|
1576
1676
|
# :name => Daru::Vector.new(%w{Alex Peter Susan Mary John})
|
1577
1677
|
# })
|
1578
1678
|
# ds.create_sql('names')
|
1579
|
-
#
|
1679
|
+
# #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
|
1580
1680
|
#
|
1581
1681
|
def create_sql(table,charset="UTF8")
|
1582
1682
|
sql = "CREATE TABLE #{table} ("
|
@@ -1639,6 +1739,8 @@ module Daru
|
|
1639
1739
|
arry
|
1640
1740
|
end
|
1641
1741
|
|
1742
|
+
# Convert to json. If no_index is false then the index will NOT be included
|
1743
|
+
# in the JSON thus created.
|
1642
1744
|
def to_json no_index=true
|
1643
1745
|
if no_index
|
1644
1746
|
self.to_a[0].to_json
|
@@ -1681,7 +1783,7 @@ module Daru
|
|
1681
1783
|
html += '</tr>'
|
1682
1784
|
if num > threshold
|
1683
1785
|
html += '<tr>'
|
1684
|
-
(@vectors + 1).
|
1786
|
+
(@vectors.size + 1).times { html += '<td>...</td>' }
|
1685
1787
|
html += '</tr>'
|
1686
1788
|
|
1687
1789
|
last_index = @index.to_a.last
|
@@ -1713,12 +1815,9 @@ module Daru
|
|
1713
1815
|
@data.each { |v| v.update } if Daru.lazy_update
|
1714
1816
|
end
|
1715
1817
|
|
1818
|
+
# Rename the DataFrame.
|
1716
1819
|
def rename new_name
|
1717
|
-
|
1718
|
-
@name = new_name
|
1719
|
-
return
|
1720
|
-
end
|
1721
|
-
@name = new_name.to_sym
|
1820
|
+
@name = new_name
|
1722
1821
|
end
|
1723
1822
|
|
1724
1823
|
# Write this DataFrame to a CSV file.
|
@@ -1792,7 +1891,7 @@ module Daru
|
|
1792
1891
|
# df.recast a: :nmatrix, c: :nmatrix
|
1793
1892
|
def recast opts={}
|
1794
1893
|
opts.each do |vector_name, dtype|
|
1795
|
-
|
1894
|
+
self[vector_name].cast(dtype: dtype)
|
1796
1895
|
end
|
1797
1896
|
end
|
1798
1897
|
|
@@ -1840,16 +1939,24 @@ module Daru
|
|
1840
1939
|
content
|
1841
1940
|
end
|
1842
1941
|
|
1942
|
+
# Query a DataFrame by passing a Daru::Core::Query::BoolArray object.
|
1943
|
+
def where bool_array
|
1944
|
+
Daru::Core::Query.df_where self, bool_array
|
1945
|
+
end
|
1946
|
+
|
1843
1947
|
def == other
|
1844
|
-
|
1845
|
-
@
|
1948
|
+
self.class == other.class and
|
1949
|
+
@size == other.size and
|
1950
|
+
@index == other.index and
|
1951
|
+
@vectors == other.vectors and
|
1952
|
+
@vectors.to_a.all? { |v| self[v] == other[v] }
|
1846
1953
|
end
|
1847
1954
|
|
1848
1955
|
def method_missing(name, *args, &block)
|
1849
1956
|
if md = name.match(/(.+)\=/)
|
1850
1957
|
insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
|
1851
1958
|
elsif self.has_vector? name
|
1852
|
-
self[name
|
1959
|
+
self[name]
|
1853
1960
|
else
|
1854
1961
|
super(name, *args, &block)
|
1855
1962
|
end
|
@@ -1859,7 +1966,7 @@ module Daru
|
|
1859
1966
|
|
1860
1967
|
def possibly_multi_index? index
|
1861
1968
|
if @index.is_a?(MultiIndex)
|
1862
|
-
Daru::MultiIndex.
|
1969
|
+
Daru::MultiIndex.from_tuples(index)
|
1863
1970
|
else
|
1864
1971
|
Daru::Index.new(index)
|
1865
1972
|
end
|
@@ -1892,7 +1999,7 @@ module Daru
|
|
1892
1999
|
|
1893
2000
|
def partition vector_order, index, by, ascending, left_lower, right_upper
|
1894
2001
|
mindex = (left_lower + right_upper) / 2
|
1895
|
-
mvalues = vector_order.inject([]) { |a, vector_name| a <<
|
2002
|
+
mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
|
1896
2003
|
i = left_lower
|
1897
2004
|
j = right_upper
|
1898
2005
|
descending = ascending.map { |a| !a }
|
@@ -1929,7 +2036,7 @@ module Daru
|
|
1929
2036
|
def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
|
1930
2037
|
vector_name = vector_order[vector_order_index]
|
1931
2038
|
if vector_name
|
1932
|
-
vec =
|
2039
|
+
vec = self[vector_name]
|
1933
2040
|
eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
|
1934
2041
|
|
1935
2042
|
if sort_order[vector_order_index] # sort in ascending order
|
@@ -1980,28 +2087,41 @@ module Daru
|
|
1980
2087
|
|
1981
2088
|
return dup(@vectors[location]) if location.is_a?(Range)
|
1982
2089
|
if @vectors.is_a?(MultiIndex)
|
1983
|
-
pos =
|
2090
|
+
pos = @vectors[names]
|
1984
2091
|
|
1985
2092
|
if pos.is_a?(Integer)
|
1986
2093
|
return @data[pos]
|
1987
2094
|
else # MultiIndex
|
1988
2095
|
new_vectors = pos.map do |tuple|
|
1989
|
-
@data[
|
2096
|
+
@data[@vectors[tuple]]
|
1990
2097
|
end
|
1991
|
-
|
2098
|
+
|
2099
|
+
if !location.is_a?(Range) and names.size < @vectors.width
|
2100
|
+
pos = pos.drop_left_level names.size
|
2101
|
+
end
|
2102
|
+
|
2103
|
+
Daru::DataFrame.new(
|
2104
|
+
new_vectors, index: @index, order: pos)
|
1992
2105
|
end
|
1993
2106
|
else
|
1994
2107
|
unless names[1]
|
1995
|
-
pos =
|
1996
|
-
|
2108
|
+
pos = @vectors[location]
|
2109
|
+
|
2110
|
+
if pos.is_a?(Numeric)
|
2111
|
+
return @data[pos]
|
2112
|
+
else
|
2113
|
+
names = pos
|
2114
|
+
end
|
1997
2115
|
end
|
1998
2116
|
|
1999
|
-
new_vcs =
|
2117
|
+
new_vcs = []
|
2000
2118
|
names.each do |name|
|
2001
|
-
|
2002
|
-
new_vcs[name] = @data[@vectors[name]]
|
2119
|
+
new_vcs << @data[@vectors[name]].to_a
|
2003
2120
|
end
|
2004
|
-
|
2121
|
+
|
2122
|
+
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2123
|
+
Daru::DataFrame.new(new_vcs, order: order,
|
2124
|
+
index: @index, name: @name)
|
2005
2125
|
end
|
2006
2126
|
end
|
2007
2127
|
|
@@ -2009,82 +2129,55 @@ module Daru
|
|
2009
2129
|
location = names[0]
|
2010
2130
|
|
2011
2131
|
if @index.is_a?(MultiIndex)
|
2012
|
-
pos =
|
2132
|
+
pos = @index[names]
|
2013
2133
|
if pos.is_a?(Integer)
|
2014
2134
|
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
|
2015
2135
|
else
|
2016
|
-
new_rows =
|
2017
|
-
if location.is_a?(Range)
|
2018
|
-
pos.map { |tuple| populate_row_for(tuple) }
|
2019
|
-
else
|
2020
|
-
pos.map { |tuple| populate_row_for(names + tuple) }
|
2021
|
-
end
|
2136
|
+
new_rows = pos.map { |tuple| populate_row_for(tuple) }
|
2022
2137
|
|
2023
|
-
|
2024
|
-
|
2138
|
+
if !location.is_a?(Range) and names.size < @index.width
|
2139
|
+
pos = pos.drop_left_level names.size
|
2140
|
+
end
|
2141
|
+
|
2142
|
+
Daru::DataFrame.rows(
|
2143
|
+
new_rows, order: @vectors, name: @name, index: pos)
|
2025
2144
|
end
|
2026
2145
|
else
|
2027
2146
|
if names[1].nil?
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
|
2032
|
-
|
2033
|
-
location
|
2034
|
-
else
|
2035
|
-
first_index = index_arry.index location.first
|
2036
|
-
last_index = index_arry.index location.last
|
2037
|
-
|
2038
|
-
first_index..last_index
|
2147
|
+
names = @index[location]
|
2148
|
+
if names.is_a?(Numeric)
|
2149
|
+
row = []
|
2150
|
+
@data.each do |vector|
|
2151
|
+
row << vector[location]
|
2039
2152
|
end
|
2040
2153
|
|
2041
|
-
|
2042
|
-
else
|
2043
|
-
row = []
|
2044
|
-
name = named_index_for names[0]
|
2045
|
-
@vectors.each do |vector|
|
2046
|
-
row << @data[@vectors[vector]][name]
|
2047
|
-
end
|
2048
|
-
|
2049
|
-
return Daru::Vector.new(row, index: @vectors, name: set_name(name))
|
2154
|
+
return Daru::Vector.new(row, index: @vectors, name: set_name(location))
|
2050
2155
|
end
|
2051
2156
|
end
|
2052
2157
|
# Access multiple rows
|
2053
2158
|
rows = []
|
2054
2159
|
names.each do |name|
|
2055
|
-
rows << self.row[name]
|
2160
|
+
rows << self.row[name].to_a
|
2056
2161
|
end
|
2057
2162
|
|
2058
|
-
Daru::DataFrame.rows rows, name: @name
|
2059
|
-
end
|
2060
|
-
end
|
2061
|
-
|
2062
|
-
def row_index_for location
|
2063
|
-
if @index.include?(location) or location[0].is_a?(Range)
|
2064
|
-
@index[location]
|
2065
|
-
elsif location[0].is_a?(Integer)
|
2066
|
-
location[0]
|
2163
|
+
Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
|
2067
2164
|
end
|
2068
2165
|
end
|
2069
2166
|
|
2070
2167
|
def populate_row_for pos
|
2071
|
-
@
|
2072
|
-
|
2168
|
+
@data.map do |vector|
|
2169
|
+
vector[pos]
|
2073
2170
|
end
|
2074
2171
|
end
|
2075
2172
|
|
2076
2173
|
def insert_or_modify_vector name, vector
|
2077
|
-
|
2078
|
-
|
2079
|
-
end
|
2174
|
+
name = name[0] unless @vectors.is_a?(MultiIndex)
|
2175
|
+
v = nil
|
2080
2176
|
|
2081
|
-
@vectors = @vectors + name if !@vectors.include?(name)
|
2082
|
-
v = nil
|
2083
|
-
|
2084
2177
|
if @index.empty?
|
2085
2178
|
v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
|
2086
2179
|
@index = v.index
|
2087
|
-
|
2180
|
+
assign_or_add_vector name, v
|
2088
2181
|
set_size
|
2089
2182
|
|
2090
2183
|
@data.map! do |v|
|
@@ -2096,21 +2189,47 @@ module Daru
|
|
2096
2189
|
end
|
2097
2190
|
else
|
2098
2191
|
if vector.is_a?(Daru::Vector)
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2192
|
+
if vector.index == @index # so that index-by-index assignment is avoided when possible.
|
2193
|
+
v = vector.dup
|
2194
|
+
else
|
2195
|
+
v = Daru::Vector.new [], name: set_name(name), index: @index
|
2196
|
+
@index.each do |idx|
|
2197
|
+
if vector.index.include? idx
|
2198
|
+
v[idx] = vector[idx]
|
2199
|
+
else
|
2200
|
+
v[idx] = nil
|
2201
|
+
end
|
2202
|
+
end
|
2102
2203
|
end
|
2103
2204
|
else
|
2104
|
-
raise
|
2205
|
+
raise SizeError,
|
2206
|
+
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
2105
2207
|
@size != vector.size
|
2106
2208
|
|
2107
2209
|
v = Daru::Vector.new(vector, name: set_name(name), index: @index)
|
2108
2210
|
end
|
2109
2211
|
|
2110
|
-
|
2212
|
+
assign_or_add_vector name, v
|
2111
2213
|
end
|
2112
2214
|
end
|
2113
2215
|
|
2216
|
+
def assign_or_add_vector name, v
|
2217
|
+
#FIXME: fix this jugaad. need to make changes in Indexing itself.
|
2218
|
+
pos = @vectors[name]
|
2219
|
+
|
2220
|
+
if !pos.kind_of?(Daru::Index) and pos == name and
|
2221
|
+
(@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
|
2222
|
+
@data[pos] = v
|
2223
|
+
elsif pos.kind_of?(Daru::Index)
|
2224
|
+
pos.each do |p|
|
2225
|
+
@data[@vectors[p]] = v
|
2226
|
+
end
|
2227
|
+
else
|
2228
|
+
@vectors = @vectors | [name] if !@vectors.include?(name)
|
2229
|
+
@data[@vectors[name]] = v
|
2230
|
+
end
|
2231
|
+
end
|
2232
|
+
|
2114
2233
|
def insert_or_modify_row name, vector
|
2115
2234
|
if index.is_a?(MultiIndex)
|
2116
2235
|
# TODO
|
@@ -2124,13 +2243,13 @@ module Daru
|
|
2124
2243
|
end
|
2125
2244
|
|
2126
2245
|
if @index.include? name
|
2127
|
-
|
2128
|
-
|
2246
|
+
self.each_vector_with_index do |vector,i|
|
2247
|
+
vector[name] = v.index.include?(i) ? v[i] : nil
|
2129
2248
|
end
|
2130
2249
|
else
|
2131
|
-
@index =
|
2132
|
-
|
2133
|
-
|
2250
|
+
@index = @index | [name]
|
2251
|
+
self.each_vector_with_index do |vector,i|
|
2252
|
+
vector.concat((v.index.include?(i) ? v[i] : nil), name)
|
2134
2253
|
end
|
2135
2254
|
end
|
2136
2255
|
|
@@ -2184,14 +2303,11 @@ module Daru
|
|
2184
2303
|
end
|
2185
2304
|
|
2186
2305
|
def create_vectors_index_with vectors, source
|
2187
|
-
vectors = source.keys.
|
2306
|
+
vectors = source.keys.sort_by { |a| a.to_s } if vectors.nil?
|
2188
2307
|
|
2189
2308
|
@vectors =
|
2190
2309
|
unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
|
2191
|
-
Daru::Index.new((vectors + (source.keys - vectors))
|
2192
|
-
.uniq
|
2193
|
-
.map { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
2194
|
-
)
|
2310
|
+
Daru::Index.new((vectors + (source.keys - vectors)).uniq)
|
2195
2311
|
else
|
2196
2312
|
vectors
|
2197
2313
|
end
|
@@ -2200,21 +2316,17 @@ module Daru
|
|
2200
2316
|
def all_vectors_have_equal_indexes? source
|
2201
2317
|
idx = source.values[0].index
|
2202
2318
|
|
2203
|
-
source.all? do |
|
2319
|
+
source.values.all? do |vector|
|
2204
2320
|
idx == vector.index
|
2205
2321
|
end
|
2206
2322
|
end
|
2207
2323
|
|
2208
|
-
def
|
2209
|
-
Daru::Index.new
|
2210
|
-
end
|
2211
|
-
|
2212
|
-
def create_index index
|
2213
|
-
index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
|
2324
|
+
def try_create_index index
|
2325
|
+
index.kind_of?(Index) ? index : Daru::Index.new(index)
|
2214
2326
|
end
|
2215
2327
|
|
2216
2328
|
def set_name potential_name
|
2217
|
-
potential_name.is_a?(Array) ? potential_name.join
|
2329
|
+
potential_name.is_a?(Array) ? potential_name.join : potential_name
|
2218
2330
|
end
|
2219
2331
|
|
2220
2332
|
def symbolize arry
|