daru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,170 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
module MergeHelper
|
4
|
+
class << self
|
5
|
+
def replace_keys_if_duplicates hash, matcher
|
6
|
+
matched = nil
|
7
|
+
hash.keys.each { |d|
|
8
|
+
if matcher.match(Regexp.new(d.to_s))
|
9
|
+
matched = d
|
10
|
+
break
|
11
|
+
end
|
12
|
+
}
|
13
|
+
|
14
|
+
if matched
|
15
|
+
hash[matcher] = hash[matched]
|
16
|
+
hash.delete matched
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def resolve_duplicates df_hash1, df_hash2, on
|
21
|
+
hk = df_hash1.keys + df_hash2.keys - on
|
22
|
+
recoded = hk.recode_repeated.map(&:to_sym)
|
23
|
+
diff = (recoded - hk).sort
|
24
|
+
|
25
|
+
diff.each_slice(2) do |a|
|
26
|
+
replace_keys_if_duplicates df_hash1, a[0]
|
27
|
+
replace_keys_if_duplicates df_hash2, a[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def hashify df
|
32
|
+
hsh = df.to_hash
|
33
|
+
hsh.each { |k,v| hsh[k] = v.to_a }
|
34
|
+
hsh
|
35
|
+
end
|
36
|
+
|
37
|
+
def inner_join df1, df2, df_hash1, df_hash2, on
|
38
|
+
joined_hash = {}
|
39
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
40
|
+
joined_hash[k] = []
|
41
|
+
end
|
42
|
+
|
43
|
+
(0...df1.size).each do |id1|
|
44
|
+
(0...df2.size).each do |id2|
|
45
|
+
if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
|
46
|
+
joined_hash.each do |k,v|
|
47
|
+
v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
54
|
+
end
|
55
|
+
|
56
|
+
def full_outer_join df1, df2, df_hash1, df_hash2, on
|
57
|
+
left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
|
58
|
+
right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
|
59
|
+
|
60
|
+
Daru::DataFrame.rows(
|
61
|
+
(left.values.transpose | right.values.transpose), order: left.keys)
|
62
|
+
end
|
63
|
+
|
64
|
+
def left_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
|
65
|
+
joined_hash = {}
|
66
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
67
|
+
joined_hash[k] = []
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
(0...df1.size).each do |id1|
|
72
|
+
joined = false
|
73
|
+
(0...df2.size).each do |id2|
|
74
|
+
if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
|
75
|
+
joined = true
|
76
|
+
joined_hash.each do |k,v|
|
77
|
+
v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
unless joined
|
83
|
+
df_hash1.keys.each do |k|
|
84
|
+
joined_hash[k] << df_hash1[k][id1]
|
85
|
+
end
|
86
|
+
|
87
|
+
(joined_hash.keys - df_hash1.keys).each do |k|
|
88
|
+
joined_hash[k] << nil
|
89
|
+
end
|
90
|
+
joined = false
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
return joined_hash if as_hash
|
95
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
96
|
+
end
|
97
|
+
|
98
|
+
def right_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
|
99
|
+
joined_hash = {}
|
100
|
+
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
101
|
+
joined_hash[k] = []
|
102
|
+
end
|
103
|
+
|
104
|
+
(0...df2.size).each do |id1|
|
105
|
+
joined = false
|
106
|
+
(0...df1.size).each do |id2|
|
107
|
+
if on.all? { |n| df_hash2[n][id1] == df_hash1[n][id2] }
|
108
|
+
joined = true
|
109
|
+
joined_hash.each do |k,v|
|
110
|
+
v << (df_hash2.has_key?(k) ? df_hash2[k][id1] : df_hash1[k][id2])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
unless joined
|
116
|
+
df_hash2.keys.each do |k|
|
117
|
+
joined_hash[k] << df_hash2[k][id1]
|
118
|
+
end
|
119
|
+
|
120
|
+
(joined_hash.keys - df_hash2.keys).each do |k|
|
121
|
+
joined_hash[k] << nil
|
122
|
+
end
|
123
|
+
joined = false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
return joined_hash if as_hash
|
128
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
129
|
+
end
|
130
|
+
|
131
|
+
def verify_dataframes df_hash1, df_hash2, on
|
132
|
+
raise ArgumentError,
|
133
|
+
"All fields in :on must be present in self" if !on.all? { |e| df_hash1[e] }
|
134
|
+
raise ArgumentError,
|
135
|
+
"All fields in :on must be present in other DF" if !on.all? { |e| df_hash2[e] }
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# Private module containing methods for join, merge, concat operations on
|
140
|
+
# dataframes and vectors.
|
141
|
+
# @private
|
142
|
+
module Merge
|
143
|
+
class << self
|
144
|
+
def join df1, df2, opts={}
|
145
|
+
helper = MergeHelper
|
146
|
+
|
147
|
+
df_hash1 = helper.hashify df1
|
148
|
+
df_hash2 = helper.hashify df2
|
149
|
+
on = opts[:on]
|
150
|
+
|
151
|
+
helper.verify_dataframes df_hash1, df_hash2, on
|
152
|
+
helper.resolve_duplicates df_hash1, df_hash2, on
|
153
|
+
|
154
|
+
case opts[:how]
|
155
|
+
when :inner
|
156
|
+
helper.inner_join df1, df2, df_hash1, df_hash2, on
|
157
|
+
when :outer
|
158
|
+
helper.full_outer_join df1, df2, df_hash1, df_hash2, on
|
159
|
+
when :left
|
160
|
+
helper.left_outer_join df1, df2, df_hash1, df_hash2, on
|
161
|
+
when :right
|
162
|
+
helper.right_outer_join df1, df2, df_hash1, df_hash2, on
|
163
|
+
else
|
164
|
+
raise ArgumentError, "Unrecognized option in :how => #{opts[:how]}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
module Query
|
4
|
+
class BoolArray
|
5
|
+
attr_reader :barry
|
6
|
+
|
7
|
+
def initialize barry
|
8
|
+
@barry = barry
|
9
|
+
end
|
10
|
+
|
11
|
+
def & other
|
12
|
+
new_bool = []
|
13
|
+
other_barry = other.barry
|
14
|
+
@barry.each_with_index do |b, i|
|
15
|
+
new_bool << (b and other_barry[i])
|
16
|
+
end
|
17
|
+
|
18
|
+
BoolArray.new(new_bool)
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :and :&
|
22
|
+
|
23
|
+
def | other
|
24
|
+
new_bool = []
|
25
|
+
other_barry = other.barry
|
26
|
+
@barry.each_with_index do |b, i|
|
27
|
+
new_bool << (b or other_barry[i])
|
28
|
+
end
|
29
|
+
|
30
|
+
BoolArray.new(new_bool)
|
31
|
+
end
|
32
|
+
|
33
|
+
alias :or :|
|
34
|
+
|
35
|
+
def !
|
36
|
+
BoolArray.new(@barry.map { |b| !b })
|
37
|
+
end
|
38
|
+
|
39
|
+
def == other
|
40
|
+
@barry == other.barry
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_a
|
44
|
+
@barry
|
45
|
+
end
|
46
|
+
|
47
|
+
def inspect
|
48
|
+
"(#{self.class}:#{self.object_id} bool_arry=#{@barry})"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class << self
|
53
|
+
def apply_scalar_operator operator, data, other
|
54
|
+
arry = data.inject([]) do |memo,d|
|
55
|
+
memo << (d.send(operator, other) ? true : false)
|
56
|
+
memo
|
57
|
+
end
|
58
|
+
|
59
|
+
BoolArray.new(arry)
|
60
|
+
end
|
61
|
+
|
62
|
+
def apply_vector_operator operator, vector, other
|
63
|
+
bool_arry = []
|
64
|
+
vector.each_with_index do |d, i|
|
65
|
+
bool_arry << (d.send(operator, other[i]) ? true : false)
|
66
|
+
end
|
67
|
+
|
68
|
+
BoolArray.new(bool_arry)
|
69
|
+
end
|
70
|
+
|
71
|
+
def df_where data_frame, bool_array
|
72
|
+
vecs = data_frame.map do |vector|
|
73
|
+
vector.where(bool_array)
|
74
|
+
end
|
75
|
+
|
76
|
+
Daru::DataFrame.new(
|
77
|
+
vecs, order: data_frame.vectors, index: vecs[0].index, clone: false)
|
78
|
+
end
|
79
|
+
|
80
|
+
def vector_where data, index, bool_array, dtype
|
81
|
+
new_data = []
|
82
|
+
new_index = []
|
83
|
+
bool_array.to_a.each_with_index do |b, i|
|
84
|
+
if b
|
85
|
+
new_data << data[i]
|
86
|
+
new_index << index[i]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
Daru::Vector.new(new_data, index: new_index, dtype: dtype)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
$:.unshift File.dirname(__FILE__)
|
2
2
|
|
3
3
|
require 'accessors/dataframe_by_row.rb'
|
4
|
-
require 'accessors/dataframe_by_vector.rb'
|
5
4
|
require 'maths/arithmetic/dataframe.rb'
|
6
5
|
require 'maths/statistics/dataframe.rb'
|
7
6
|
require 'plotting/dataframe.rb'
|
@@ -226,11 +225,11 @@ module Daru
|
|
226
225
|
@data = []
|
227
226
|
|
228
227
|
temp_name = opts[:name]
|
229
|
-
@name = temp_name
|
228
|
+
@name = temp_name || SecureRandom.uuid
|
230
229
|
|
231
230
|
if source.empty?
|
232
|
-
@vectors =
|
233
|
-
@index =
|
231
|
+
@vectors = try_create_index vectors
|
232
|
+
@index = try_create_index index
|
234
233
|
create_empty_vectors
|
235
234
|
else
|
236
235
|
case source
|
@@ -239,8 +238,8 @@ module Daru
|
|
239
238
|
raise ArgumentError, "Number of vectors (#{vectors.size}) should \
|
240
239
|
equal order size (#{source.size})" if source.size != vectors.size
|
241
240
|
|
242
|
-
@index =
|
243
|
-
@vectors =
|
241
|
+
@index = try_create_index(index || source[0].size)
|
242
|
+
@vectors = try_create_index(vectors)
|
244
243
|
|
245
244
|
@vectors.each_with_index do |vec,idx|
|
246
245
|
@data << Daru::Vector.new(source[idx], index: @index)
|
@@ -253,9 +252,10 @@ module Daru
|
|
253
252
|
initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
|
254
253
|
else # array of hashes
|
255
254
|
if vectors.nil?
|
256
|
-
@vectors = Daru::Index.new source[0].keys
|
255
|
+
@vectors = Daru::Index.new source[0].keys
|
257
256
|
else
|
258
|
-
@vectors = Daru::Index.new
|
257
|
+
@vectors = Daru::Index.new(
|
258
|
+
(vectors + (source[0].keys - vectors)).uniq)
|
259
259
|
end
|
260
260
|
@index = Daru::Index.new(index || source.size)
|
261
261
|
|
@@ -272,8 +272,9 @@ module Daru
|
|
272
272
|
create_vectors_index_with vectors, source
|
273
273
|
if all_daru_vectors_in_source? source
|
274
274
|
if !index.nil?
|
275
|
-
@index =
|
275
|
+
@index = try_create_index index
|
276
276
|
elsif all_vectors_have_equal_indexes?(source)
|
277
|
+
vectors_have_same_index = true
|
277
278
|
@index = source.values[0].index.dup
|
278
279
|
else
|
279
280
|
all_indexes = []
|
@@ -289,17 +290,28 @@ module Daru
|
|
289
290
|
|
290
291
|
if clone
|
291
292
|
@vectors.each do |vector|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
293
|
+
# avoids matching indexes of vectors if all the supplied vectors
|
294
|
+
# have the same index.
|
295
|
+
if vectors_have_same_index
|
296
|
+
v = source[vector].dup
|
297
|
+
else
|
298
|
+
v = Daru::Vector.new([], name: vector, index: @index)
|
299
|
+
|
300
|
+
@index.each do |idx|
|
301
|
+
if source[vector].index.include? idx
|
302
|
+
v[idx] = source[vector][idx]
|
303
|
+
else
|
304
|
+
v[idx] = nil
|
305
|
+
end
|
306
|
+
end
|
296
307
|
end
|
308
|
+
@data << v
|
297
309
|
end
|
298
310
|
else
|
299
311
|
@data.concat source.values
|
300
312
|
end
|
301
313
|
else
|
302
|
-
@index =
|
314
|
+
@index = try_create_index(index || source.values[0].size)
|
303
315
|
|
304
316
|
@vectors.each do |name|
|
305
317
|
@data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
|
@@ -313,6 +325,11 @@ module Daru
|
|
313
325
|
update
|
314
326
|
end
|
315
327
|
|
328
|
+
def vector *args
|
329
|
+
$stderr.puts "#vector has been deprecated in favour of #[]. Please use that."
|
330
|
+
self[*names]
|
331
|
+
end
|
332
|
+
|
316
333
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
317
334
|
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
318
335
|
# rows or vectors. Use df.row[:a] for accessing row with index ':a' or
|
@@ -324,7 +341,6 @@ module Daru
|
|
324
341
|
else
|
325
342
|
axis = :vector
|
326
343
|
end
|
327
|
-
names.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
328
344
|
|
329
345
|
if axis == :vector
|
330
346
|
access_vector *names
|
@@ -349,7 +365,6 @@ module Daru
|
|
349
365
|
|
350
366
|
name = args[0..-2]
|
351
367
|
vector = args[-1]
|
352
|
-
name.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
353
368
|
|
354
369
|
if axis == :vector
|
355
370
|
insert_or_modify_vector name, vector
|
@@ -360,15 +375,6 @@ module Daru
|
|
360
375
|
end
|
361
376
|
end
|
362
377
|
|
363
|
-
# Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
|
364
|
-
#
|
365
|
-
# == Usage
|
366
|
-
# df.vector[:a] # access vector named ':a'
|
367
|
-
# df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
|
368
|
-
def vector
|
369
|
-
Daru::Accessors::DataFrameByVector.new(self)
|
370
|
-
end
|
371
|
-
|
372
378
|
# Access a vector by name.
|
373
379
|
def column name
|
374
380
|
vector[name]
|
@@ -398,24 +404,13 @@ module Daru
|
|
398
404
|
# * +vectors_to_dup+ - An Array specifying the names of Vectors to
|
399
405
|
# be duplicated. Will duplicate the entire DataFrame if not specified.
|
400
406
|
def dup vectors_to_dup=nil
|
401
|
-
vectors_to_dup = @vectors unless vectors_to_dup
|
407
|
+
vectors_to_dup = @vectors.to_a unless vectors_to_dup
|
402
408
|
|
403
|
-
|
404
|
-
|
405
|
-
src
|
406
|
-
vectors_to_dup.each do |vec|
|
407
|
-
src << @data[@vectors[vec]].dup
|
408
|
-
end
|
409
|
-
|
410
|
-
Daru::MultiIndex.new(vectors_to_dup)
|
411
|
-
else
|
412
|
-
src = {}
|
413
|
-
vectors_to_dup.each do |vector|
|
414
|
-
src[vector] = @data[@vectors[vector]].dup
|
415
|
-
end
|
416
|
-
|
417
|
-
Daru::Index.new(vectors_to_dup)
|
409
|
+
src = []
|
410
|
+
vectors_to_dup.each do |vec|
|
411
|
+
src << @data[@vectors[vec]].to_a
|
418
412
|
end
|
413
|
+
new_order = Daru::Index.new(vectors_to_dup)
|
419
414
|
|
420
415
|
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
|
421
416
|
end
|
@@ -465,6 +460,14 @@ module Daru
|
|
465
460
|
(vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
|
466
461
|
end
|
467
462
|
|
463
|
+
# Iterate over each index of the DataFrame.
|
464
|
+
def each_index &block
|
465
|
+
return to_enum(:each_index) unless block_given?
|
466
|
+
|
467
|
+
@index.each(&block)
|
468
|
+
self
|
469
|
+
end
|
470
|
+
|
468
471
|
# Iterate over each vector
|
469
472
|
def each_vector(&block)
|
470
473
|
return to_enum(:each_vector) unless block_given?
|
@@ -608,7 +611,7 @@ module Daru
|
|
608
611
|
#
|
609
612
|
# Recode works similarly to #map, but an important difference between
|
610
613
|
# the two is that recode returns a modified Daru::DataFrame instead
|
611
|
-
# of an Array. For this reason, #
|
614
|
+
# of an Array. For this reason, #recode expects that every run of the
|
612
615
|
# block to return a Daru::Vector.
|
613
616
|
#
|
614
617
|
# Just like map and each, recode also accepts an optional _axis_ argument.
|
@@ -667,7 +670,8 @@ module Daru
|
|
667
670
|
df = self.dup
|
668
671
|
df.each_vector_with_index do |v, i|
|
669
672
|
ret = yield v
|
670
|
-
ret.is_a?(Daru::Vector) or
|
673
|
+
ret.is_a?(Daru::Vector) or
|
674
|
+
raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
|
671
675
|
df[*i] = ret
|
672
676
|
end
|
673
677
|
|
@@ -841,7 +845,7 @@ module Daru
|
|
841
845
|
idx = named_index_for index
|
842
846
|
|
843
847
|
if @index.include? idx
|
844
|
-
@index =
|
848
|
+
@index = Daru::Index.new(@index.to_a - [idx])
|
845
849
|
self.each_vector do |vector|
|
846
850
|
vector.delete_at idx
|
847
851
|
end
|
@@ -1015,7 +1019,7 @@ module Daru
|
|
1015
1019
|
number_of_missing << row.missing_positions.size
|
1016
1020
|
end
|
1017
1021
|
|
1018
|
-
Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
|
1022
|
+
Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
|
1019
1023
|
end
|
1020
1024
|
|
1021
1025
|
# TODO: remove next version
|
@@ -1087,9 +1091,18 @@ module Daru
|
|
1087
1091
|
|
1088
1092
|
# Check if a vector is present
|
1089
1093
|
def has_vector? vector
|
1090
|
-
|
1094
|
+
@vectors.include? vector
|
1091
1095
|
end
|
1092
1096
|
|
1097
|
+
# Works like Array#any?.
|
1098
|
+
#
|
1099
|
+
# @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
|
1100
|
+
# :row. A Daru::Vector object is yielded in the block.
|
1101
|
+
# @example Using any?
|
1102
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
|
1103
|
+
# df.any?(:row) do |row|
|
1104
|
+
# row[:a] < 3 and row[:b] == 'b'
|
1105
|
+
# end #=> true
|
1093
1106
|
def any? axis=:vector, &block
|
1094
1107
|
if axis == :vector or axis == :column
|
1095
1108
|
@data.any?(&block)
|
@@ -1103,6 +1116,15 @@ module Daru
|
|
1103
1116
|
end
|
1104
1117
|
end
|
1105
1118
|
|
1119
|
+
# Works like Array#all?
|
1120
|
+
#
|
1121
|
+
# @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
|
1122
|
+
# :row. A Daru::Vector object is yielded in the block.
|
1123
|
+
# @example Using all?
|
1124
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
|
1125
|
+
# df.all?(:row) do |row|
|
1126
|
+
# row[:a] < 10
|
1127
|
+
# end #=> true
|
1106
1128
|
def all? axis=:vector, &block
|
1107
1129
|
if axis == :vector or axis == :column
|
1108
1130
|
@data.all?(&block)
|
@@ -1183,38 +1205,126 @@ module Daru
|
|
1183
1205
|
# # ["foo", "one", 3]=>[6],
|
1184
1206
|
# # ["foo", "three", 8]=>[7],
|
1185
1207
|
# # ["foo", "two", 3]=>[2, 4]}
|
1186
|
-
def group_by vectors
|
1187
|
-
vectors
|
1208
|
+
def group_by *vectors
|
1209
|
+
vectors.flatten!
|
1188
1210
|
vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
|
1189
1211
|
has_vector?(v) }
|
1190
1212
|
|
1191
1213
|
Daru::Core::GroupBy.new(self, vectors)
|
1192
1214
|
end
|
1193
1215
|
|
1194
|
-
def reindex_vectors
|
1195
|
-
raise ArgumentError, "
|
1196
|
-
|
1216
|
+
def reindex_vectors new_vectors
|
1217
|
+
raise ArgumentError, "Must pass the new index of type Index or its "\
|
1218
|
+
"subclasses, not #{new_index.class}" unless new_vectors.kind_of?(Daru::Index)
|
1219
|
+
|
1220
|
+
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
1221
|
+
new_vectors.each do |vec|
|
1222
|
+
if @vectors.include?(vec)
|
1223
|
+
cl[vec] = self[vec]
|
1224
|
+
else
|
1225
|
+
cl[vec] = [nil]*nrows
|
1226
|
+
end
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
cl
|
1230
|
+
end
|
1231
|
+
|
1232
|
+
# Concatenate another DataFrame along corresponding columns.
|
1233
|
+
# Very premature implementation. Use with caution.
|
1234
|
+
def concat other_df
|
1235
|
+
vectors = []
|
1236
|
+
@vectors.each do |v|
|
1237
|
+
vectors << self[v].to_a.concat(other_df[v].to_a)
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
Daru::DataFrame.new(vectors, order: @vectors)
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
# Set a particular column as the new DF
|
1244
|
+
def set_index new_index, opts={}
|
1245
|
+
raise ArgumentError, "All elements in new index must be unique." if
|
1246
|
+
@size != self[new_index].uniq.size
|
1247
|
+
|
1248
|
+
self.index = Daru::Index.new(self[new_index].to_a)
|
1249
|
+
self.delete_vector(new_index) unless opts[:keep]
|
1197
1250
|
|
1198
|
-
|
1251
|
+
self
|
1199
1252
|
end
|
1200
1253
|
|
1201
|
-
# Change the index of the DataFrame and
|
1254
|
+
# Change the index of the DataFrame and preserve the labels of the previous
|
1255
|
+
# indexing. New index can be Daru::Index or any of its subclasses.
|
1202
1256
|
#
|
1203
|
-
# @param [
|
1204
|
-
|
1205
|
-
|
1257
|
+
# @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
|
1258
|
+
# @example Reindexing DataFrame
|
1259
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
|
1260
|
+
# index: ['a','b','c','d'])
|
1261
|
+
# #=>
|
1262
|
+
# ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
1263
|
+
# # a b
|
1264
|
+
# # a 1 11
|
1265
|
+
# # b 2 22
|
1266
|
+
# # c 3 33
|
1267
|
+
# # d 4 44
|
1268
|
+
# df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
|
1269
|
+
# #=>
|
1270
|
+
# ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
1271
|
+
# # a b
|
1272
|
+
# # b 2 22
|
1273
|
+
# # 0 nil nil
|
1274
|
+
# # a 1 11
|
1275
|
+
# # g nil nil
|
1276
|
+
def reindex new_index
|
1277
|
+
raise ArgumentError, "Must pass the new index of type Index or its "\
|
1278
|
+
"subclasses, not #{new_index.class}" unless new_index.kind_of?(Daru::Index)
|
1206
1279
|
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1280
|
+
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
1281
|
+
new_index.each do |idx|
|
1282
|
+
if @index.include?(idx)
|
1283
|
+
cl.row[idx] = self.row[idx]
|
1284
|
+
else
|
1285
|
+
cl.row[idx] = [nil]*ncols
|
1286
|
+
end
|
1210
1287
|
end
|
1211
1288
|
|
1289
|
+
cl
|
1290
|
+
end
|
1291
|
+
|
1292
|
+
# Reassign index with a new index of type Daru::Index or any of its subclasses.
|
1293
|
+
#
|
1294
|
+
# @param [Daru::Index] idx New index object on which the rows of the dataframe
|
1295
|
+
# are to be indexed.
|
1296
|
+
# @example Reassgining index of a DataFrame
|
1297
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
|
1298
|
+
# df.index.to_a #=> [0,1,2,3]
|
1299
|
+
#
|
1300
|
+
# df.index = Daru::Index.new(['a','b','c','d'])
|
1301
|
+
# df.index.to_a #=> ['a','b','c','d']
|
1302
|
+
# df.row['a'].to_a #=> [1,11]
|
1303
|
+
def index= idx
|
1304
|
+
@data.each { |vec| vec.index = idx}
|
1305
|
+
@index = idx
|
1306
|
+
|
1212
1307
|
self
|
1213
1308
|
end
|
1214
1309
|
|
1215
|
-
#
|
1216
|
-
|
1217
|
-
|
1310
|
+
# Reassign vectors with a new index of type Daru::Index or any of its subclasses.
|
1311
|
+
#
|
1312
|
+
# @param [Daru::Index] idx The new index object on which the vectors are to
|
1313
|
+
# be indexed. Must of the same size as ncols.
|
1314
|
+
# @example Reassigning vectors of a DataFrame
|
1315
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
|
1316
|
+
# df.vectors.to_a #=> [:a, :b, :c]
|
1317
|
+
#
|
1318
|
+
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
|
1319
|
+
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
1320
|
+
def vectors= idx
|
1321
|
+
raise ArgumentError, "Can only reindex with Index and its subclasses" unless
|
1322
|
+
index.kind_of?(Daru::Index)
|
1323
|
+
raise ArgumentError, "Specified index length #{idx.size} not equal to"\
|
1324
|
+
"dataframe size #{ncols}" if idx.size != ncols
|
1325
|
+
|
1326
|
+
@vectors = idx
|
1327
|
+
self
|
1218
1328
|
end
|
1219
1329
|
|
1220
1330
|
# Return the indexes of all the numeric vectors. Will include vectors with nils
|
@@ -1231,8 +1341,8 @@ module Daru
|
|
1231
1341
|
def numeric_vector_names
|
1232
1342
|
numerics = []
|
1233
1343
|
|
1234
|
-
|
1235
|
-
numerics <<
|
1344
|
+
@vectors.each do |v|
|
1345
|
+
numerics << v if (self[v].type == :numeric)
|
1236
1346
|
end
|
1237
1347
|
numerics
|
1238
1348
|
end
|
@@ -1248,7 +1358,7 @@ module Daru
|
|
1248
1358
|
arr
|
1249
1359
|
end
|
1250
1360
|
|
1251
|
-
order =
|
1361
|
+
order = Index.new(nv)
|
1252
1362
|
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
|
1253
1363
|
end
|
1254
1364
|
|
@@ -1302,9 +1412,11 @@ module Daru
|
|
1302
1412
|
|
1303
1413
|
opts[:by] = create_logic_blocks vector_order, opts[:by]
|
1304
1414
|
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
|
1305
|
-
|
1306
|
-
send(opts[:type], vector_order,
|
1307
|
-
|
1415
|
+
idx = @index.to_a
|
1416
|
+
send(opts[:type], vector_order, idx, opts[:by], opts[:ascending])
|
1417
|
+
self.index = Daru::Index.new(idx)
|
1418
|
+
|
1419
|
+
self
|
1308
1420
|
end
|
1309
1421
|
|
1310
1422
|
# Non-destructive version of #sort!
|
@@ -1347,7 +1459,8 @@ module Daru
|
|
1347
1459
|
# # [:bar] 18 26
|
1348
1460
|
# # [:foo] 10 12
|
1349
1461
|
def pivot_table opts={}
|
1350
|
-
raise ArgumentError,
|
1462
|
+
raise ArgumentError,
|
1463
|
+
"Specify grouping index" if !opts[:index] or opts[:index].empty?
|
1351
1464
|
|
1352
1465
|
index = opts[:index]
|
1353
1466
|
vectors = opts[:vectors] || []
|
@@ -1389,18 +1502,20 @@ module Daru
|
|
1389
1502
|
end
|
1390
1503
|
end
|
1391
1504
|
|
1392
|
-
df_index = Daru::MultiIndex.
|
1505
|
+
df_index = Daru::MultiIndex.from_tuples super_hash.keys
|
1393
1506
|
|
1394
1507
|
vector_indexes = []
|
1395
1508
|
super_hash.each_value do |sub_hash|
|
1396
1509
|
vector_indexes.concat sub_hash.keys
|
1397
1510
|
end
|
1398
|
-
|
1511
|
+
|
1512
|
+
df_vectors = Daru::MultiIndex.from_tuples vector_indexes.uniq
|
1399
1513
|
pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
|
1400
1514
|
|
1401
1515
|
super_hash.each do |row_index, sub_h|
|
1402
1516
|
sub_h.each do |vector_index, val|
|
1403
|
-
pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
1517
|
+
# pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
1518
|
+
pivoted_dataframe[vector_index][row_index] = val
|
1404
1519
|
end
|
1405
1520
|
end
|
1406
1521
|
return pivoted_dataframe
|
@@ -1430,47 +1545,33 @@ module Daru
|
|
1430
1545
|
df_new
|
1431
1546
|
end
|
1432
1547
|
|
1433
|
-
# Join 2 DataFrames
|
1434
|
-
#
|
1435
|
-
#
|
1436
|
-
#
|
1437
|
-
#
|
1438
|
-
# @
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
if other_ds_hash[key].nil?
|
1462
|
-
if type == :left
|
1463
|
-
fields_new.each{|field| new_case[field] = nil}
|
1464
|
-
new_ds.add_row(Daru::Vector.new(new_case))
|
1465
|
-
end
|
1466
|
-
else
|
1467
|
-
other_ds_hash[key].each do |new_values|
|
1468
|
-
new_ds.add_row(Daru::Vector.new(new_case.merge(new_values)))
|
1469
|
-
end
|
1470
|
-
end
|
1471
|
-
end
|
1472
|
-
|
1473
|
-
new_ds
|
1548
|
+
# Join 2 DataFrames with SQL style joins. Currently supports inner, left
|
1549
|
+
# outer, right outer and full outer joins.
|
1550
|
+
#
|
1551
|
+
# @param [Daru::DataFrame] other_df Another DataFrame on which the join is
|
1552
|
+
# to be performed.
|
1553
|
+
# @param [Hash] opts Options Hash
|
1554
|
+
# @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
|
1555
|
+
# @option :on [Array] The columns on which the join is to be performed.
|
1556
|
+
# Column names specified here must be common to both DataFrames.
|
1557
|
+
# @return [Daru::DataFrame]
|
1558
|
+
# @example Inner Join
|
1559
|
+
# left = Daru::DataFrame.new({
|
1560
|
+
# :id => [1,2,3,4],
|
1561
|
+
# :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti']
|
1562
|
+
# })
|
1563
|
+
# right = Daru::DataFrame.new({
|
1564
|
+
# :id => [1,2,3,4],
|
1565
|
+
# :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
|
1566
|
+
# })
|
1567
|
+
# left.join(right, how: :inner, on: [:name])
|
1568
|
+
# #=>
|
1569
|
+
# ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
|
1570
|
+
# # id_1 name id_2
|
1571
|
+
# # 0 1 Pirate 2
|
1572
|
+
# # 1 3 Ninja 4
|
1573
|
+
def join(other_df,opts={})
|
1574
|
+
Daru::Core::Merge.join(self, other_df, opts)
|
1474
1575
|
end
|
1475
1576
|
|
1476
1577
|
|
@@ -1486,7 +1587,7 @@ module Daru
|
|
1486
1587
|
# to new dataset, and fields which responds to second
|
1487
1588
|
# pattern will be added one case for each different %n.
|
1488
1589
|
#
|
1489
|
-
#
|
1590
|
+
# @example
|
1490
1591
|
# cases=[
|
1491
1592
|
# ['1','george','red',10,'blue',20,nil,nil],
|
1492
1593
|
# ['2','fred','green',15,'orange',30,'white',20],
|
@@ -1494,17 +1595,16 @@ module Daru
|
|
1494
1595
|
# ]
|
1495
1596
|
# ds=Daru::DataFrame.rows(cases, order: [:id, :name, :car_color1, :car_value1, :car_color2, :car_value2, :car_color3, :car_value3])
|
1496
1597
|
# ds.one_to_many([:id],'car_%v%n').to_matrix
|
1497
|
-
#
|
1498
|
-
#
|
1499
|
-
#
|
1500
|
-
#
|
1501
|
-
#
|
1502
|
-
#
|
1503
|
-
#
|
1504
|
-
#
|
1598
|
+
# #=> Matrix[
|
1599
|
+
# # ["red", "1", 10],
|
1600
|
+
# # ["blue", "1", 20],
|
1601
|
+
# # ["green", "2", 15],
|
1602
|
+
# # ["orange", "2", 30],
|
1603
|
+
# # ["white", "2", 20]
|
1604
|
+
# # ]
|
1505
1605
|
def one_to_many(parent_fields, pattern)
|
1506
1606
|
re = Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
|
1507
|
-
ds_vars = parent_fields
|
1607
|
+
ds_vars = parent_fields.dup
|
1508
1608
|
vars = []
|
1509
1609
|
max_n = 0
|
1510
1610
|
h = parent_fields.inject({}) { |a,v|
|
@@ -1512,8 +1612,8 @@ module Daru
|
|
1512
1612
|
a
|
1513
1613
|
}
|
1514
1614
|
# Adding _row_id
|
1515
|
-
h[
|
1516
|
-
ds_vars.push(
|
1615
|
+
h['_col_id'] = Daru::Vector.new([])
|
1616
|
+
ds_vars.push('_col_id')
|
1517
1617
|
|
1518
1618
|
@vectors.each do |f|
|
1519
1619
|
if f =~ re
|
@@ -1529,20 +1629,20 @@ module Daru
|
|
1529
1629
|
each_row do |row|
|
1530
1630
|
row_out = {}
|
1531
1631
|
parent_fields.each do |f|
|
1532
|
-
row_out[f]=row[f]
|
1632
|
+
row_out[f] = row[f]
|
1533
1633
|
end
|
1534
1634
|
|
1535
1635
|
max_n.times do |n1|
|
1536
1636
|
n = n1+1
|
1537
1637
|
any_data = false
|
1538
1638
|
vars.each do |v|
|
1539
|
-
data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)
|
1639
|
+
data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
|
1540
1640
|
row_out[v] = data
|
1541
1641
|
any_data = true if !data.nil?
|
1542
1642
|
end
|
1543
1643
|
|
1544
1644
|
if any_data
|
1545
|
-
row_out[
|
1645
|
+
row_out['_col_id'] = n
|
1546
1646
|
ds.add_row(row_out)
|
1547
1647
|
end
|
1548
1648
|
end
|
@@ -1569,14 +1669,14 @@ module Daru
|
|
1569
1669
|
# * table - String specifying name of the table that will created in SQL.
|
1570
1670
|
# * charset - Character set. Default is "UTF8".
|
1571
1671
|
#
|
1572
|
-
#
|
1672
|
+
# @example
|
1573
1673
|
#
|
1574
1674
|
# ds = Daru::DataFrame.new({
|
1575
1675
|
# :id => Daru::Vector.new([1,2,3,4,5]),
|
1576
1676
|
# :name => Daru::Vector.new(%w{Alex Peter Susan Mary John})
|
1577
1677
|
# })
|
1578
1678
|
# ds.create_sql('names')
|
1579
|
-
#
|
1679
|
+
# #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
|
1580
1680
|
#
|
1581
1681
|
def create_sql(table,charset="UTF8")
|
1582
1682
|
sql = "CREATE TABLE #{table} ("
|
@@ -1639,6 +1739,8 @@ module Daru
|
|
1639
1739
|
arry
|
1640
1740
|
end
|
1641
1741
|
|
1742
|
+
# Convert to json. If no_index is false then the index will NOT be included
|
1743
|
+
# in the JSON thus created.
|
1642
1744
|
def to_json no_index=true
|
1643
1745
|
if no_index
|
1644
1746
|
self.to_a[0].to_json
|
@@ -1681,7 +1783,7 @@ module Daru
|
|
1681
1783
|
html += '</tr>'
|
1682
1784
|
if num > threshold
|
1683
1785
|
html += '<tr>'
|
1684
|
-
(@vectors + 1).
|
1786
|
+
(@vectors.size + 1).times { html += '<td>...</td>' }
|
1685
1787
|
html += '</tr>'
|
1686
1788
|
|
1687
1789
|
last_index = @index.to_a.last
|
@@ -1713,12 +1815,9 @@ module Daru
|
|
1713
1815
|
@data.each { |v| v.update } if Daru.lazy_update
|
1714
1816
|
end
|
1715
1817
|
|
1818
|
+
# Rename the DataFrame.
|
1716
1819
|
def rename new_name
|
1717
|
-
|
1718
|
-
@name = new_name
|
1719
|
-
return
|
1720
|
-
end
|
1721
|
-
@name = new_name.to_sym
|
1820
|
+
@name = new_name
|
1722
1821
|
end
|
1723
1822
|
|
1724
1823
|
# Write this DataFrame to a CSV file.
|
@@ -1792,7 +1891,7 @@ module Daru
|
|
1792
1891
|
# df.recast a: :nmatrix, c: :nmatrix
|
1793
1892
|
def recast opts={}
|
1794
1893
|
opts.each do |vector_name, dtype|
|
1795
|
-
|
1894
|
+
self[vector_name].cast(dtype: dtype)
|
1796
1895
|
end
|
1797
1896
|
end
|
1798
1897
|
|
@@ -1840,16 +1939,24 @@ module Daru
|
|
1840
1939
|
content
|
1841
1940
|
end
|
1842
1941
|
|
1942
|
+
# Query a DataFrame by passing a Daru::Core::Query::BoolArray object.
|
1943
|
+
def where bool_array
|
1944
|
+
Daru::Core::Query.df_where self, bool_array
|
1945
|
+
end
|
1946
|
+
|
1843
1947
|
def == other
|
1844
|
-
|
1845
|
-
@
|
1948
|
+
self.class == other.class and
|
1949
|
+
@size == other.size and
|
1950
|
+
@index == other.index and
|
1951
|
+
@vectors == other.vectors and
|
1952
|
+
@vectors.to_a.all? { |v| self[v] == other[v] }
|
1846
1953
|
end
|
1847
1954
|
|
1848
1955
|
def method_missing(name, *args, &block)
|
1849
1956
|
if md = name.match(/(.+)\=/)
|
1850
1957
|
insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
|
1851
1958
|
elsif self.has_vector? name
|
1852
|
-
self[name
|
1959
|
+
self[name]
|
1853
1960
|
else
|
1854
1961
|
super(name, *args, &block)
|
1855
1962
|
end
|
@@ -1859,7 +1966,7 @@ module Daru
|
|
1859
1966
|
|
1860
1967
|
def possibly_multi_index? index
|
1861
1968
|
if @index.is_a?(MultiIndex)
|
1862
|
-
Daru::MultiIndex.
|
1969
|
+
Daru::MultiIndex.from_tuples(index)
|
1863
1970
|
else
|
1864
1971
|
Daru::Index.new(index)
|
1865
1972
|
end
|
@@ -1892,7 +1999,7 @@ module Daru
|
|
1892
1999
|
|
1893
2000
|
def partition vector_order, index, by, ascending, left_lower, right_upper
|
1894
2001
|
mindex = (left_lower + right_upper) / 2
|
1895
|
-
mvalues = vector_order.inject([]) { |a, vector_name| a <<
|
2002
|
+
mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
|
1896
2003
|
i = left_lower
|
1897
2004
|
j = right_upper
|
1898
2005
|
descending = ascending.map { |a| !a }
|
@@ -1929,7 +2036,7 @@ module Daru
|
|
1929
2036
|
def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
|
1930
2037
|
vector_name = vector_order[vector_order_index]
|
1931
2038
|
if vector_name
|
1932
|
-
vec =
|
2039
|
+
vec = self[vector_name]
|
1933
2040
|
eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
|
1934
2041
|
|
1935
2042
|
if sort_order[vector_order_index] # sort in ascending order
|
@@ -1980,28 +2087,41 @@ module Daru
|
|
1980
2087
|
|
1981
2088
|
return dup(@vectors[location]) if location.is_a?(Range)
|
1982
2089
|
if @vectors.is_a?(MultiIndex)
|
1983
|
-
pos =
|
2090
|
+
pos = @vectors[names]
|
1984
2091
|
|
1985
2092
|
if pos.is_a?(Integer)
|
1986
2093
|
return @data[pos]
|
1987
2094
|
else # MultiIndex
|
1988
2095
|
new_vectors = pos.map do |tuple|
|
1989
|
-
@data[
|
2096
|
+
@data[@vectors[tuple]]
|
1990
2097
|
end
|
1991
|
-
|
2098
|
+
|
2099
|
+
if !location.is_a?(Range) and names.size < @vectors.width
|
2100
|
+
pos = pos.drop_left_level names.size
|
2101
|
+
end
|
2102
|
+
|
2103
|
+
Daru::DataFrame.new(
|
2104
|
+
new_vectors, index: @index, order: pos)
|
1992
2105
|
end
|
1993
2106
|
else
|
1994
2107
|
unless names[1]
|
1995
|
-
pos =
|
1996
|
-
|
2108
|
+
pos = @vectors[location]
|
2109
|
+
|
2110
|
+
if pos.is_a?(Numeric)
|
2111
|
+
return @data[pos]
|
2112
|
+
else
|
2113
|
+
names = pos
|
2114
|
+
end
|
1997
2115
|
end
|
1998
2116
|
|
1999
|
-
new_vcs =
|
2117
|
+
new_vcs = []
|
2000
2118
|
names.each do |name|
|
2001
|
-
|
2002
|
-
new_vcs[name] = @data[@vectors[name]]
|
2119
|
+
new_vcs << @data[@vectors[name]].to_a
|
2003
2120
|
end
|
2004
|
-
|
2121
|
+
|
2122
|
+
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2123
|
+
Daru::DataFrame.new(new_vcs, order: order,
|
2124
|
+
index: @index, name: @name)
|
2005
2125
|
end
|
2006
2126
|
end
|
2007
2127
|
|
@@ -2009,82 +2129,55 @@ module Daru
|
|
2009
2129
|
location = names[0]
|
2010
2130
|
|
2011
2131
|
if @index.is_a?(MultiIndex)
|
2012
|
-
pos =
|
2132
|
+
pos = @index[names]
|
2013
2133
|
if pos.is_a?(Integer)
|
2014
2134
|
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
|
2015
2135
|
else
|
2016
|
-
new_rows =
|
2017
|
-
if location.is_a?(Range)
|
2018
|
-
pos.map { |tuple| populate_row_for(tuple) }
|
2019
|
-
else
|
2020
|
-
pos.map { |tuple| populate_row_for(names + tuple) }
|
2021
|
-
end
|
2136
|
+
new_rows = pos.map { |tuple| populate_row_for(tuple) }
|
2022
2137
|
|
2023
|
-
|
2024
|
-
|
2138
|
+
if !location.is_a?(Range) and names.size < @index.width
|
2139
|
+
pos = pos.drop_left_level names.size
|
2140
|
+
end
|
2141
|
+
|
2142
|
+
Daru::DataFrame.rows(
|
2143
|
+
new_rows, order: @vectors, name: @name, index: pos)
|
2025
2144
|
end
|
2026
2145
|
else
|
2027
2146
|
if names[1].nil?
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
|
2032
|
-
|
2033
|
-
location
|
2034
|
-
else
|
2035
|
-
first_index = index_arry.index location.first
|
2036
|
-
last_index = index_arry.index location.last
|
2037
|
-
|
2038
|
-
first_index..last_index
|
2147
|
+
names = @index[location]
|
2148
|
+
if names.is_a?(Numeric)
|
2149
|
+
row = []
|
2150
|
+
@data.each do |vector|
|
2151
|
+
row << vector[location]
|
2039
2152
|
end
|
2040
2153
|
|
2041
|
-
|
2042
|
-
else
|
2043
|
-
row = []
|
2044
|
-
name = named_index_for names[0]
|
2045
|
-
@vectors.each do |vector|
|
2046
|
-
row << @data[@vectors[vector]][name]
|
2047
|
-
end
|
2048
|
-
|
2049
|
-
return Daru::Vector.new(row, index: @vectors, name: set_name(name))
|
2154
|
+
return Daru::Vector.new(row, index: @vectors, name: set_name(location))
|
2050
2155
|
end
|
2051
2156
|
end
|
2052
2157
|
# Access multiple rows
|
2053
2158
|
rows = []
|
2054
2159
|
names.each do |name|
|
2055
|
-
rows << self.row[name]
|
2160
|
+
rows << self.row[name].to_a
|
2056
2161
|
end
|
2057
2162
|
|
2058
|
-
Daru::DataFrame.rows rows, name: @name
|
2059
|
-
end
|
2060
|
-
end
|
2061
|
-
|
2062
|
-
def row_index_for location
|
2063
|
-
if @index.include?(location) or location[0].is_a?(Range)
|
2064
|
-
@index[location]
|
2065
|
-
elsif location[0].is_a?(Integer)
|
2066
|
-
location[0]
|
2163
|
+
Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
|
2067
2164
|
end
|
2068
2165
|
end
|
2069
2166
|
|
2070
2167
|
def populate_row_for pos
|
2071
|
-
@
|
2072
|
-
|
2168
|
+
@data.map do |vector|
|
2169
|
+
vector[pos]
|
2073
2170
|
end
|
2074
2171
|
end
|
2075
2172
|
|
2076
2173
|
def insert_or_modify_vector name, vector
|
2077
|
-
|
2078
|
-
|
2079
|
-
end
|
2174
|
+
name = name[0] unless @vectors.is_a?(MultiIndex)
|
2175
|
+
v = nil
|
2080
2176
|
|
2081
|
-
@vectors = @vectors + name if !@vectors.include?(name)
|
2082
|
-
v = nil
|
2083
|
-
|
2084
2177
|
if @index.empty?
|
2085
2178
|
v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
|
2086
2179
|
@index = v.index
|
2087
|
-
|
2180
|
+
assign_or_add_vector name, v
|
2088
2181
|
set_size
|
2089
2182
|
|
2090
2183
|
@data.map! do |v|
|
@@ -2096,21 +2189,47 @@ module Daru
|
|
2096
2189
|
end
|
2097
2190
|
else
|
2098
2191
|
if vector.is_a?(Daru::Vector)
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2192
|
+
if vector.index == @index # so that index-by-index assignment is avoided when possible.
|
2193
|
+
v = vector.dup
|
2194
|
+
else
|
2195
|
+
v = Daru::Vector.new [], name: set_name(name), index: @index
|
2196
|
+
@index.each do |idx|
|
2197
|
+
if vector.index.include? idx
|
2198
|
+
v[idx] = vector[idx]
|
2199
|
+
else
|
2200
|
+
v[idx] = nil
|
2201
|
+
end
|
2202
|
+
end
|
2102
2203
|
end
|
2103
2204
|
else
|
2104
|
-
raise
|
2205
|
+
raise SizeError,
|
2206
|
+
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
2105
2207
|
@size != vector.size
|
2106
2208
|
|
2107
2209
|
v = Daru::Vector.new(vector, name: set_name(name), index: @index)
|
2108
2210
|
end
|
2109
2211
|
|
2110
|
-
|
2212
|
+
assign_or_add_vector name, v
|
2111
2213
|
end
|
2112
2214
|
end
|
2113
2215
|
|
2216
|
+
def assign_or_add_vector name, v
|
2217
|
+
#FIXME: fix this jugaad. need to make changes in Indexing itself.
|
2218
|
+
pos = @vectors[name]
|
2219
|
+
|
2220
|
+
if !pos.kind_of?(Daru::Index) and pos == name and
|
2221
|
+
(@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
|
2222
|
+
@data[pos] = v
|
2223
|
+
elsif pos.kind_of?(Daru::Index)
|
2224
|
+
pos.each do |p|
|
2225
|
+
@data[@vectors[p]] = v
|
2226
|
+
end
|
2227
|
+
else
|
2228
|
+
@vectors = @vectors | [name] if !@vectors.include?(name)
|
2229
|
+
@data[@vectors[name]] = v
|
2230
|
+
end
|
2231
|
+
end
|
2232
|
+
|
2114
2233
|
def insert_or_modify_row name, vector
|
2115
2234
|
if index.is_a?(MultiIndex)
|
2116
2235
|
# TODO
|
@@ -2124,13 +2243,13 @@ module Daru
|
|
2124
2243
|
end
|
2125
2244
|
|
2126
2245
|
if @index.include? name
|
2127
|
-
|
2128
|
-
|
2246
|
+
self.each_vector_with_index do |vector,i|
|
2247
|
+
vector[name] = v.index.include?(i) ? v[i] : nil
|
2129
2248
|
end
|
2130
2249
|
else
|
2131
|
-
@index =
|
2132
|
-
|
2133
|
-
|
2250
|
+
@index = @index | [name]
|
2251
|
+
self.each_vector_with_index do |vector,i|
|
2252
|
+
vector.concat((v.index.include?(i) ? v[i] : nil), name)
|
2134
2253
|
end
|
2135
2254
|
end
|
2136
2255
|
|
@@ -2184,14 +2303,11 @@ module Daru
|
|
2184
2303
|
end
|
2185
2304
|
|
2186
2305
|
def create_vectors_index_with vectors, source
|
2187
|
-
vectors = source.keys.
|
2306
|
+
vectors = source.keys.sort_by { |a| a.to_s } if vectors.nil?
|
2188
2307
|
|
2189
2308
|
@vectors =
|
2190
2309
|
unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
|
2191
|
-
Daru::Index.new((vectors + (source.keys - vectors))
|
2192
|
-
.uniq
|
2193
|
-
.map { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
2194
|
-
)
|
2310
|
+
Daru::Index.new((vectors + (source.keys - vectors)).uniq)
|
2195
2311
|
else
|
2196
2312
|
vectors
|
2197
2313
|
end
|
@@ -2200,21 +2316,17 @@ module Daru
|
|
2200
2316
|
def all_vectors_have_equal_indexes? source
|
2201
2317
|
idx = source.values[0].index
|
2202
2318
|
|
2203
|
-
source.all? do |
|
2319
|
+
source.values.all? do |vector|
|
2204
2320
|
idx == vector.index
|
2205
2321
|
end
|
2206
2322
|
end
|
2207
2323
|
|
2208
|
-
def
|
2209
|
-
Daru::Index.new
|
2210
|
-
end
|
2211
|
-
|
2212
|
-
def create_index index
|
2213
|
-
index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
|
2324
|
+
def try_create_index index
|
2325
|
+
index.kind_of?(Index) ? index : Daru::Index.new(index)
|
2214
2326
|
end
|
2215
2327
|
|
2216
2328
|
def set_name potential_name
|
2217
|
-
potential_name.is_a?(Array) ? potential_name.join
|
2329
|
+
potential_name.is_a?(Array) ? potential_name.join : potential_name
|
2218
2330
|
end
|
2219
2331
|
|
2220
2332
|
def symbolize arry
|