daru 0.1.4.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +27 -3
- data/Guardfile +7 -0
- data/History.md +39 -1
- data/README.md +1 -1
- data/daru.gemspec +9 -2
- data/lib/daru.rb +4 -1
- data/lib/daru/accessors/gsl_wrapper.rb +93 -91
- data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
- data/lib/daru/category.rb +22 -15
- data/lib/daru/core/group_by.rb +13 -2
- data/lib/daru/core/merge.rb +37 -31
- data/lib/daru/core/query.rb +10 -2
- data/lib/daru/dataframe.rb +95 -34
- data/lib/daru/date_time/index.rb +15 -16
- data/lib/daru/date_time/offsets.rb +14 -11
- data/lib/daru/formatters/table.rb +2 -2
- data/lib/daru/index/categorical_index.rb +201 -0
- data/lib/daru/index/index.rb +289 -0
- data/lib/daru/index/multi_index.rb +266 -0
- data/lib/daru/maths/statistics/vector.rb +13 -9
- data/lib/daru/monkeys.rb +0 -7
- data/lib/daru/plotting/gruff/category.rb +1 -0
- data/lib/daru/plotting/gruff/dataframe.rb +3 -3
- data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
- data/lib/daru/vector.rb +36 -21
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
- data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
- data/spec/core/group_by_spec.rb +17 -1
- data/spec/core/merge_spec.rb +38 -1
- data/spec/core/query_spec.rb +5 -0
- data/spec/dataframe_spec.rb +230 -57
- data/spec/date_time/offsets_spec.rb +84 -3
- data/spec/formatters/table_formatter_spec.rb +9 -0
- data/spec/index/categorical_index_spec.rb +2 -0
- data/spec/index/index_spec.rb +17 -2
- data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
- data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
- data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
- data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
- data/spec/plotting/gruff/category_spec.rb +44 -0
- data/spec/plotting/gruff/dataframe_spec.rb +84 -0
- data/spec/plotting/gruff/vector_spec.rb +70 -0
- data/spec/plotting/nyaplot/category_spec.rb +51 -0
- data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
- data/spec/plotting/nyaplot/vector_spec.rb +66 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/vector_spec.rb +68 -1
- metadata +53 -24
- data/lib/daru/index.rb +0 -761
- data/spec/plotting/vector_spec.rb +0 -230
data/lib/daru/category.rb
CHANGED
@@ -3,9 +3,6 @@ module Daru
|
|
3
3
|
attr_accessor :base_category
|
4
4
|
attr_reader :index, :coding_scheme, :name
|
5
5
|
|
6
|
-
# For debuggin. To be removed
|
7
|
-
attr_reader :array, :cat_hash, :map_int_cat
|
8
|
-
|
9
6
|
# Initializes a vector to store categorical data.
|
10
7
|
# @note Base category is set to the first category encountered in the vector.
|
11
8
|
# @param [Array] data the categorical data
|
@@ -64,11 +61,13 @@ module Daru
|
|
64
61
|
case lib
|
65
62
|
when :gruff, :nyaplot
|
66
63
|
@plotting_library = lib
|
67
|
-
|
68
|
-
|
69
|
-
|
64
|
+
if Daru.send("has_#{lib}?".to_sym)
|
65
|
+
extend Module.const_get(
|
66
|
+
"Daru::Plotting::Category::#{lib.to_s.capitalize}Library"
|
67
|
+
)
|
68
|
+
end
|
70
69
|
else
|
71
|
-
raise
|
70
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
72
71
|
'Supported libraries are :nyaplot and :gruff'
|
73
72
|
end
|
74
73
|
end
|
@@ -160,6 +159,9 @@ module Daru
|
|
160
159
|
counts.map { |c| c / size.to_f }
|
161
160
|
when :percentage
|
162
161
|
counts.map { |c| c / size.to_f * 100 }
|
162
|
+
else
|
163
|
+
raise ArgumentError, 'Type should be either :count, :fraction or'\
|
164
|
+
" :percentage. #{type} not supported."
|
163
165
|
end
|
164
166
|
Daru::Vector.new values, index: categories, name: name
|
165
167
|
end
|
@@ -463,12 +465,13 @@ module Daru
|
|
463
465
|
|
464
466
|
# Contrast code the vector acording to the coding scheme set.
|
465
467
|
# @note To set the coding scheme use #coding_scheme=
|
466
|
-
# @param [
|
467
|
-
#
|
468
|
+
# @param [Hash] opts The options to pass for coding.
|
469
|
+
# @option opts [TrueClass, FalseClass] :full (false) True if you want k variables
|
470
|
+
# for k categories, false if you want k-1 variables for k categories.
|
468
471
|
# @return [Daru::DataFrame] dataframe containing all coded variables
|
469
472
|
# @example
|
470
473
|
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
471
|
-
# dv.contrast_code
|
474
|
+
# dv.contrast_code full: false
|
472
475
|
# # => #<Daru::DataFrame(5x2)>
|
473
476
|
# # daru_1 daru_c
|
474
477
|
# # 0 0 0
|
@@ -771,9 +774,10 @@ module Daru
|
|
771
774
|
|
772
775
|
def assert_ordered operation
|
773
776
|
# TODO: Change ArgumentError to something more expressive
|
777
|
+
return if ordered?
|
778
|
+
|
774
779
|
raise ArgumentError, "Can not apply #{operation} when vector is unordered. "\
|
775
780
|
'To make the categorical data ordered, use #ordered = true'\
|
776
|
-
unless ordered?
|
777
781
|
end
|
778
782
|
|
779
783
|
def dummy_coding full
|
@@ -896,14 +900,17 @@ module Daru
|
|
896
900
|
|
897
901
|
def validate_index index
|
898
902
|
# Change to SizeError
|
903
|
+
return unless size != index.size
|
904
|
+
|
899
905
|
raise ArgumentError, "Size of index (#{index.size}) does not matches"\
|
900
|
-
"size of vector (#{size})"
|
906
|
+
"size of vector (#{size})"
|
901
907
|
end
|
902
908
|
|
903
909
|
def modify_category_at pos, category
|
904
|
-
|
905
|
-
|
906
|
-
|
910
|
+
unless categories.include? category
|
911
|
+
raise ArgumentError, "Invalid category #{category}, "\
|
912
|
+
'to add a new category use #add_category'
|
913
|
+
end
|
907
914
|
old_category = category_from_position pos
|
908
915
|
@array[pos] = int_from_cat category
|
909
916
|
@cat_hash[old_category].delete pos
|
data/lib/daru/core/group_by.rb
CHANGED
@@ -25,6 +25,13 @@ module Daru
|
|
25
25
|
@context = context
|
26
26
|
vectors = names.map { |vec| context[vec].to_a }
|
27
27
|
tuples = vectors[0].zip(*vectors[1..-1])
|
28
|
+
# FIXME: It feels like we don't want to sort here. Ruby's #group_by
|
29
|
+
# never sorts:
|
30
|
+
#
|
31
|
+
# ['test', 'me', 'please'].group_by(&:size)
|
32
|
+
# # => {4=>["test"], 2=>["me"], 6=>["please"]}
|
33
|
+
#
|
34
|
+
# - zverok, 2016-09-12
|
28
35
|
keys = tuples.uniq.sort(&TUPLE_SORTER)
|
29
36
|
|
30
37
|
keys.each do |key|
|
@@ -295,12 +302,16 @@ module Daru
|
|
295
302
|
indexes << index
|
296
303
|
end
|
297
304
|
end
|
298
|
-
indexes
|
305
|
+
if indexes.count == 1
|
306
|
+
[@context.index.at(*indexes)]
|
307
|
+
else
|
308
|
+
@context.index.at(*indexes).to_a
|
309
|
+
end
|
299
310
|
end
|
300
311
|
|
301
312
|
def multi_indexed_grouping?
|
302
313
|
return false unless @groups.keys[0]
|
303
|
-
@groups.keys[0].size > 1
|
314
|
+
@groups.keys[0].size > 1
|
304
315
|
end
|
305
316
|
end
|
306
317
|
end
|
data/lib/daru/core/merge.rb
CHANGED
@@ -18,11 +18,8 @@ module Daru
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def initialize left_df, right_df, opts={}
|
21
|
-
|
22
|
-
@keep_left, @keep_right = extract_left_right(opts[:how])
|
23
|
-
|
21
|
+
init_opts(opts)
|
24
22
|
validate_on!(left_df, right_df)
|
25
|
-
|
26
23
|
key_sanitizer = ->(h) { sanitize_merge_keys(h.values_at(*on)) }
|
27
24
|
|
28
25
|
@left = df_to_a(left_df)
|
@@ -46,12 +43,12 @@ module Daru
|
|
46
43
|
row(lkey, rkey).tap { |r| res << r if r }
|
47
44
|
end
|
48
45
|
|
49
|
-
Daru::DataFrame.new(res, order:
|
46
|
+
Daru::DataFrame.new(res, order: dataframe_vector_names)
|
50
47
|
end
|
51
48
|
|
52
49
|
private
|
53
50
|
|
54
|
-
attr_reader :on,
|
51
|
+
attr_reader :on, :indicator,
|
55
52
|
:left, :left_key_values, :keep_left, :left_keys,
|
56
53
|
:right, :right_key_values, :keep_right, :right_keys
|
57
54
|
|
@@ -65,13 +62,23 @@ module Daru
|
|
65
62
|
outer: [true, true]
|
66
63
|
}.freeze
|
67
64
|
|
65
|
+
def init_opts(opts)
|
66
|
+
@on = opts[:on]
|
67
|
+
@keep_left, @keep_right = extract_left_right(opts[:how])
|
68
|
+
@indicator = opts[:indicator]
|
69
|
+
end
|
70
|
+
|
71
|
+
def dataframe_vector_names
|
72
|
+
left_keys.values + on + right_keys.values + Array(indicator)
|
73
|
+
end
|
74
|
+
|
68
75
|
def extract_left_right(how)
|
69
76
|
LEFT_RIGHT_COMBINATIONS[how] or
|
70
77
|
raise ArgumentError, "Unrecognized join option: #{how}"
|
71
78
|
end
|
72
79
|
|
73
80
|
def sanitize_merge_keys(merge_keys)
|
74
|
-
merge_keys.map { |v| v
|
81
|
+
merge_keys.map { |v| v.nil? ? NilSorter.new : v }
|
75
82
|
end
|
76
83
|
|
77
84
|
def df_to_a df
|
@@ -110,19 +117,27 @@ module Daru
|
|
110
117
|
# :nocov:
|
111
118
|
when lkey == rkey
|
112
119
|
self.merge_key = lkey
|
113
|
-
merge_matching_rows
|
120
|
+
add_indicator(merge_matching_rows, :both)
|
114
121
|
when !rkey || lt(lkey, rkey)
|
115
|
-
left_row_missing_right
|
122
|
+
add_indicator(left_row_missing_right, :left_only)
|
116
123
|
else # !lkey || lt(rkey, lkey)
|
117
|
-
right_row_missing_left
|
124
|
+
add_indicator(right_row_missing_left, :right_only)
|
118
125
|
end
|
119
126
|
end
|
120
127
|
|
128
|
+
def add_indicator(row, indicator_value)
|
129
|
+
return row unless indicator
|
130
|
+
row[indicator] = indicator_value
|
131
|
+
row
|
132
|
+
end
|
133
|
+
|
121
134
|
def merge_matching_rows
|
122
135
|
if one_to_one_merge?
|
123
136
|
merge_rows(one_to_one_left_row, one_to_one_right_row)
|
124
137
|
elsif one_to_many_merge?
|
125
|
-
merge_rows(
|
138
|
+
result = merge_rows(left.first, right.first)
|
139
|
+
one_to_many_shift
|
140
|
+
result
|
126
141
|
else
|
127
142
|
result = cartesian_product.shift
|
128
143
|
end_cartesian_product if cartesian_product.empty?
|
@@ -130,6 +145,13 @@ module Daru
|
|
130
145
|
end
|
131
146
|
end
|
132
147
|
|
148
|
+
def one_to_many_shift
|
149
|
+
shift_left = first_right_key != next_right_key
|
150
|
+
shift_right = first_left_key != next_left_key
|
151
|
+
one_to_one_left_row if shift_left
|
152
|
+
one_to_one_right_row if shift_right
|
153
|
+
end
|
154
|
+
|
133
155
|
def one_to_one_merge?
|
134
156
|
merge_key != next_left_key && merge_key != next_right_key
|
135
157
|
end
|
@@ -143,29 +165,11 @@ module Daru
|
|
143
165
|
left.shift
|
144
166
|
end
|
145
167
|
|
146
|
-
def one_to_many_left_row
|
147
|
-
if next_right_key && first_right_key == next_right_key
|
148
|
-
left.first
|
149
|
-
else
|
150
|
-
left_key_values.shift
|
151
|
-
left.shift
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
168
|
def one_to_one_right_row
|
156
169
|
right_key_values.shift
|
157
170
|
right.shift
|
158
171
|
end
|
159
172
|
|
160
|
-
def one_to_many_right_row
|
161
|
-
if next_left_key && first_left_key == next_left_key
|
162
|
-
right.first
|
163
|
-
else
|
164
|
-
right_key_values.shift
|
165
|
-
right.shift
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
173
|
def left_row_missing_right
|
170
174
|
val = one_to_one_left_row
|
171
175
|
expand_row(val, left_keys) if keep_left
|
@@ -184,6 +188,7 @@ module Daru
|
|
184
188
|
left_keys
|
185
189
|
.map { |from, to| [to, lrow[from]] }.to_h
|
186
190
|
.merge(on.map { |col| [col, lrow[col]] }.to_h)
|
191
|
+
.merge(indicator ? {indicator => nil} : {})
|
187
192
|
.merge(right_keys.map { |from, to| [to, rrow[from]] }.to_h)
|
188
193
|
end
|
189
194
|
|
@@ -191,6 +196,7 @@ module Daru
|
|
191
196
|
renamings
|
192
197
|
.map { |from, to| [to, row[from]] }.to_h
|
193
198
|
.merge(on.map { |col| [col, row[col]] }.to_h)
|
199
|
+
.merge(indicator ? {indicator => nil} : {})
|
194
200
|
end
|
195
201
|
|
196
202
|
def first_right_key
|
@@ -198,7 +204,7 @@ module Daru
|
|
198
204
|
end
|
199
205
|
|
200
206
|
def next_right_key
|
201
|
-
right_key_values
|
207
|
+
right_key_values[1]
|
202
208
|
end
|
203
209
|
|
204
210
|
def first_left_key
|
@@ -206,7 +212,7 @@ module Daru
|
|
206
212
|
end
|
207
213
|
|
208
214
|
def next_left_key
|
209
|
-
left_key_values
|
215
|
+
left_key_values[1]
|
210
216
|
end
|
211
217
|
|
212
218
|
def left_rows_at_merge_key
|
data/lib/daru/core/query.rb
CHANGED
@@ -75,8 +75,16 @@ module Daru
|
|
75
75
|
def fetch_new_data_and_index dv, bool_array
|
76
76
|
barry = bool_array.to_a
|
77
77
|
positions = dv.size.times.select { |i| barry[i] }
|
78
|
-
|
79
|
-
|
78
|
+
# FIXME: The below 4 lines should be replaced with values_at when the
|
79
|
+
# stack error is fixed in Ruby.
|
80
|
+
# eg - new_data = dv.data.data.values_at(*positions)
|
81
|
+
|
82
|
+
data = dv.type == :category ? dv.to_a : dv.data.data # non-cat optimize
|
83
|
+
new_data = positions.map { |i| data[i] }
|
84
|
+
|
85
|
+
idx = dv.index.to_a
|
86
|
+
new_index = positions.map { |i| idx[i] }
|
87
|
+
|
80
88
|
[new_data, new_index]
|
81
89
|
end
|
82
90
|
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -120,7 +120,7 @@ module Daru
|
|
120
120
|
|
121
121
|
opts[:order] ||= guess_order(source)
|
122
122
|
|
123
|
-
if ArrayHelper.array_of?(source, Array)
|
123
|
+
if ArrayHelper.array_of?(source, Array) || source.empty?
|
124
124
|
DataFrame.new(source.transpose, opts)
|
125
125
|
elsif ArrayHelper.array_of?(source, Vector)
|
126
126
|
from_vector_rows(source, opts)
|
@@ -265,9 +265,11 @@ module Daru
|
|
265
265
|
case lib
|
266
266
|
when :gruff, :nyaplot
|
267
267
|
@plotting_library = lib
|
268
|
-
|
269
|
-
|
270
|
-
|
268
|
+
if Daru.send("has_#{lib}?".to_sym)
|
269
|
+
extend Module.const_get(
|
270
|
+
"Daru::Plotting::DataFrame::#{lib.to_s.capitalize}Library"
|
271
|
+
)
|
272
|
+
end
|
271
273
|
else
|
272
274
|
raise ArguementError, "Plotting library #{lib} not supported. "\
|
273
275
|
'Supported libraries are :nyaplot and :gruff'
|
@@ -974,6 +976,26 @@ module Daru
|
|
974
976
|
Daru::Vector.new a, index: @index
|
975
977
|
end
|
976
978
|
|
979
|
+
# Reorder the vectors in a dataframe
|
980
|
+
# @param [Array] order_array new order of the vectors
|
981
|
+
# @example
|
982
|
+
# df = Daru::DataFrame({
|
983
|
+
# a: [1, 2, 3],
|
984
|
+
# b: [4, 5, 6]
|
985
|
+
# }, order: [:a, :b])
|
986
|
+
# df.order = [:b, :a]
|
987
|
+
# df
|
988
|
+
# # => #<Daru::DataFrame(3x2)>
|
989
|
+
# # b a
|
990
|
+
# # 0 4 1
|
991
|
+
# # 1 5 2
|
992
|
+
# # 2 6 3
|
993
|
+
def order=(order_array)
|
994
|
+
raise ArgumentError, 'Invalid order' unless
|
995
|
+
order_array.sort == vectors.to_a.sort
|
996
|
+
initialize(to_h, order: order_array)
|
997
|
+
end
|
998
|
+
|
977
999
|
# Returns a vector, based on a string with a calculation based
|
978
1000
|
# on vector.
|
979
1001
|
#
|
@@ -1103,7 +1125,7 @@ module Daru
|
|
1103
1125
|
each_row do |row|
|
1104
1126
|
return true if yield(row)
|
1105
1127
|
end
|
1106
|
-
|
1128
|
+
false
|
1107
1129
|
else
|
1108
1130
|
raise ArgumentError, "Unidentified axis #{axis}"
|
1109
1131
|
end
|
@@ -1204,12 +1226,16 @@ module Daru
|
|
1204
1226
|
raise(ArgumentError, "Vector #{v} does not exist") unless has_vector?(v)
|
1205
1227
|
}
|
1206
1228
|
|
1229
|
+
vectors = [@vectors.first] if vectors.empty?
|
1230
|
+
|
1207
1231
|
Daru::Core::GroupBy.new(self, vectors)
|
1208
1232
|
end
|
1209
1233
|
|
1210
1234
|
def reindex_vectors new_vectors
|
1211
|
-
|
1212
|
-
|
1235
|
+
unless new_vectors.is_a?(Daru::Index)
|
1236
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1237
|
+
"subclasses, not #{new_index.class}"
|
1238
|
+
end
|
1213
1239
|
|
1214
1240
|
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
1215
1241
|
new_vectors.each_with_object(cl) do |vec, memo|
|
@@ -1233,6 +1259,19 @@ module Daru
|
|
1233
1259
|
Daru::DataFrame.new(data, order: vectors)
|
1234
1260
|
end
|
1235
1261
|
|
1262
|
+
# Concatenates another DataFrame as #concat.
|
1263
|
+
# Additionally it tries to preserve the index. If the indices contain
|
1264
|
+
# common elements, #union will overwrite the according rows in the
|
1265
|
+
# first dataframe.
|
1266
|
+
def union other_df
|
1267
|
+
index = (@index.to_a + other_df.index.to_a).uniq
|
1268
|
+
df = row[*(@index.to_a - other_df.index.to_a)]
|
1269
|
+
|
1270
|
+
df = df.concat(other_df)
|
1271
|
+
df.index = Daru::Index.new(index)
|
1272
|
+
df
|
1273
|
+
end
|
1274
|
+
|
1236
1275
|
# Set a particular column as the new DF
|
1237
1276
|
def set_index new_index, opts={}
|
1238
1277
|
raise ArgumentError, 'All elements in new index must be unique.' if
|
@@ -1267,8 +1306,10 @@ module Daru
|
|
1267
1306
|
# # a 1 11
|
1268
1307
|
# # g nil nil
|
1269
1308
|
def reindex new_index
|
1270
|
-
|
1271
|
-
|
1309
|
+
unless new_index.is_a?(Daru::Index)
|
1310
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1311
|
+
"subclasses, not #{new_index.class}"
|
1312
|
+
end
|
1272
1313
|
|
1273
1314
|
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
1274
1315
|
new_index.each_with_object(cl) do |idx, memo|
|
@@ -1304,13 +1345,20 @@ module Daru
|
|
1304
1345
|
#
|
1305
1346
|
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
|
1306
1347
|
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
1307
|
-
def vectors=
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
"dataframe size #{ncols}" if idx.size != ncols
|
1348
|
+
def vectors= new_index
|
1349
|
+
unless new_index.is_a?(Daru::Index)
|
1350
|
+
raise ArgumentError, 'Can only reindex with Index and its subclasses'
|
1351
|
+
end
|
1312
1352
|
|
1313
|
-
|
1353
|
+
if new_index.size != ncols
|
1354
|
+
raise ArgumentError, "Specified index length #{new_index.size} not equal to"\
|
1355
|
+
"dataframe size #{ncols}"
|
1356
|
+
end
|
1357
|
+
|
1358
|
+
@vectors = new_index
|
1359
|
+
@data.zip(new_index.to_a).each do |vect, name|
|
1360
|
+
vect.name = name
|
1361
|
+
end
|
1314
1362
|
self
|
1315
1363
|
end
|
1316
1364
|
|
@@ -1521,7 +1569,7 @@ module Daru
|
|
1521
1569
|
# # [:bar] 18 26
|
1522
1570
|
# # [:foo] 10 12
|
1523
1571
|
def pivot_table opts={}
|
1524
|
-
raise ArgumentError, 'Specify grouping index' if opts[:index].
|
1572
|
+
raise ArgumentError, 'Specify grouping index' if Array(opts[:index]).empty?
|
1525
1573
|
|
1526
1574
|
index = opts[:index]
|
1527
1575
|
vectors = opts[:vectors] || []
|
@@ -1542,9 +1590,10 @@ module Daru
|
|
1542
1590
|
#
|
1543
1591
|
# @return {Daru::DataFrame}
|
1544
1592
|
def merge other_df # rubocop:disable Metrics/AbcSize
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1593
|
+
unless nrows == other_df.nrows
|
1594
|
+
raise ArgumentError,
|
1595
|
+
"Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}"
|
1596
|
+
end
|
1548
1597
|
|
1549
1598
|
new_fields = (@vectors.to_a + other_df.vectors.to_a)
|
1550
1599
|
new_fields = ArrayHelper.recode_repeated(new_fields)
|
@@ -1567,6 +1616,9 @@ module Daru
|
|
1567
1616
|
# @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
|
1568
1617
|
# @option :on [Array] The columns on which the join is to be performed.
|
1569
1618
|
# Column names specified here must be common to both DataFrames.
|
1619
|
+
# @option :indicator [Symbol] The name of a vector to add to the resultant
|
1620
|
+
# dataframe that indicates whether the record was in the left (:left_only),
|
1621
|
+
# right (:right_only), or both (:both) joining dataframes.
|
1570
1622
|
# @return [Daru::DataFrame]
|
1571
1623
|
# @example Inner Join
|
1572
1624
|
# left = Daru::DataFrame.new({
|
@@ -1892,10 +1944,15 @@ module Daru
|
|
1892
1944
|
end
|
1893
1945
|
|
1894
1946
|
def method_missing(name, *args, &block)
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1947
|
+
case
|
1948
|
+
when name =~ /(.+)\=/
|
1949
|
+
name = name[/(.+)\=/].delete('=')
|
1950
|
+
name = name.to_sym unless has_vector?(name)
|
1951
|
+
insert_or_modify_vector [name], args[0]
|
1952
|
+
when has_vector?(name)
|
1898
1953
|
self[name]
|
1954
|
+
when has_vector?(name.to_s)
|
1955
|
+
self[name.to_s]
|
1899
1956
|
else
|
1900
1957
|
super
|
1901
1958
|
end
|
@@ -2060,7 +2117,7 @@ module Daru
|
|
2060
2117
|
end
|
2061
2118
|
|
2062
2119
|
def populate_row_for pos
|
2063
|
-
@data.map { |vector| vector
|
2120
|
+
@data.map { |vector| vector.at(*pos) }
|
2064
2121
|
end
|
2065
2122
|
|
2066
2123
|
def insert_or_modify_vector name, vector
|
@@ -2128,9 +2185,10 @@ module Daru
|
|
2128
2185
|
}
|
2129
2186
|
else
|
2130
2187
|
# FIXME: No spec checks this case... And SizeError is not a thing - zverok, 2016-05-08
|
2131
|
-
|
2132
|
-
|
2133
|
-
|
2188
|
+
if @size != vector.size
|
2189
|
+
raise SizeError,
|
2190
|
+
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}"
|
2191
|
+
end
|
2134
2192
|
|
2135
2193
|
Daru::Vector.new(vector, name: coerce_name(name), index: @index)
|
2136
2194
|
end
|
@@ -2157,12 +2215,13 @@ module Daru
|
|
2157
2215
|
end
|
2158
2216
|
|
2159
2217
|
def validate_labels
|
2160
|
-
|
2161
|
-
"
|
2162
|
-
|
2218
|
+
if @vectors && @vectors.size != @data.size
|
2219
|
+
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) " \
|
2220
|
+
"for number of vectors (#{@data.size})."
|
2221
|
+
end
|
2163
2222
|
|
2164
|
-
|
2165
|
-
|
2223
|
+
return unless @index && @data[0] && @index.size != @data[0].size
|
2224
|
+
raise IndexError, 'Expected number of indexes same as number of rows'
|
2166
2225
|
end
|
2167
2226
|
|
2168
2227
|
def validate_vector_sizes
|
@@ -2228,8 +2287,10 @@ module Daru
|
|
2228
2287
|
end
|
2229
2288
|
|
2230
2289
|
def initialize_from_array_of_arrays source, vectors, index, _opts
|
2231
|
-
|
2232
|
-
|
2290
|
+
if source.size != vectors.size
|
2291
|
+
raise ArgumentError, "Number of vectors (#{vectors.size}) should " \
|
2292
|
+
"equal order size (#{source.size})"
|
2293
|
+
end
|
2233
2294
|
|
2234
2295
|
@index = Index.coerce(index || source[0].size)
|
2235
2296
|
@vectors = Index.coerce(vectors)
|
@@ -2258,7 +2319,7 @@ module Daru
|
|
2258
2319
|
@index = Daru::Index.new(index || source.size)
|
2259
2320
|
|
2260
2321
|
@data = @vectors.map do |name|
|
2261
|
-
v = source.map { |h| h
|
2322
|
+
v = source.map { |h| h.fetch(name) { h[name.to_s] } }
|
2262
2323
|
Daru::Vector.new(v, name: coerce_name(name), index: @index)
|
2263
2324
|
end
|
2264
2325
|
end
|