red_amber 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +114 -39
  3. data/CHANGELOG.md +203 -31
  4. data/Gemfile +5 -2
  5. data/README.md +62 -29
  6. data/benchmark/basic.yml +86 -0
  7. data/benchmark/combine.yml +62 -0
  8. data/benchmark/dataframe.yml +62 -0
  9. data/benchmark/drop_nil.yml +15 -3
  10. data/benchmark/group.yml +39 -0
  11. data/benchmark/reshape.yml +31 -0
  12. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  13. data/benchmark/rover/flights.yml +23 -0
  14. data/benchmark/rover/penguins.yml +23 -0
  15. data/benchmark/rover/planes.yml +23 -0
  16. data/benchmark/rover/weather.yml +23 -0
  17. data/benchmark/vector.yml +60 -0
  18. data/doc/DataFrame.md +335 -53
  19. data/doc/Vector.md +91 -0
  20. data/doc/image/dataframe/join.png +0 -0
  21. data/doc/image/dataframe/set_and_bind.png +0 -0
  22. data/doc/image/dataframe_model.png +0 -0
  23. data/lib/red_amber/data_frame.rb +167 -51
  24. data/lib/red_amber/data_frame_combinable.rb +486 -0
  25. data/lib/red_amber/data_frame_displayable.rb +6 -4
  26. data/lib/red_amber/data_frame_indexable.rb +2 -2
  27. data/lib/red_amber/data_frame_loadsave.rb +4 -1
  28. data/lib/red_amber/data_frame_reshaping.rb +35 -10
  29. data/lib/red_amber/data_frame_selectable.rb +221 -116
  30. data/lib/red_amber/data_frame_variable_operation.rb +146 -82
  31. data/lib/red_amber/group.rb +108 -18
  32. data/lib/red_amber/helper.rb +53 -43
  33. data/lib/red_amber/refinements.rb +199 -0
  34. data/lib/red_amber/vector.rb +56 -46
  35. data/lib/red_amber/vector_functions.rb +23 -83
  36. data/lib/red_amber/vector_selectable.rb +116 -69
  37. data/lib/red_amber/vector_updatable.rb +189 -65
  38. data/lib/red_amber/version.rb +1 -1
  39. data/lib/red_amber.rb +3 -0
  40. data/red_amber.gemspec +4 -3
  41. metadata +24 -10
@@ -0,0 +1,486 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-in for the class DataFrame
5
+ module DataFrameCombinable
6
+ # Refinements for Arrow::Table
7
+ using RefineArrowTable
8
+
9
+ # Concatenate other dataframe onto the bottom.
10
+ #
11
+ # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
12
+ # DataFrame/Table to concatenate onto the bottom of self.
13
+ # @return [DataFrame]
14
+ # Concatenated dataframe.
15
+ def concatenate(*other)
16
+ case other
17
+ in [] | [nil] | [[]]
18
+ return self
19
+ in [Array => array]
20
+ # Nop
21
+ else
22
+ array = other
23
+ end
24
+
25
+ table_array = array.map do |e|
26
+ case e
27
+ when Arrow::Table
28
+ e
29
+ when DataFrame
30
+ e.table
31
+ else
32
+ raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
33
+ end
34
+ end
35
+
36
+ DataFrame.create(table.concatenate(table_array))
37
+ end
38
+
39
+ alias_method :concat, :concatenate
40
+ alias_method :bind_rows, :concatenate
41
+
42
+ # Merge other DataFrame or Table from other.
43
+ # - Self and other must have same size.
44
+ # - Self and other do not share the same key.
45
+ # - If they share any keys, raise Error.
46
+ # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
47
+ # DataFrame/Table to concatenate.
48
+ # @return [DataFrame]
49
+ # Merged dataframe.
50
+ def merge(*other)
51
+ case other
52
+ in [] | [nil] | [[]]
53
+ return self
54
+ in [Array => array]
55
+ # Nop
56
+ else
57
+ array = other
58
+ end
59
+
60
+ hash = array.each_with_object({}) do |e, h|
61
+ df =
62
+ case e
63
+ when Arrow::Table
64
+ DataFrame.create(e)
65
+ when DataFrame
66
+ e
67
+ else
68
+ raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
69
+ end
70
+
71
+ if size != df.size
72
+ raise DataFrameArgumentError, "#{e} do not have same size as self"
73
+ end
74
+
75
+ k = keys.intersection(df.keys).any?
76
+ raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
77
+
78
+ h.merge!(df.to_h)
79
+ end
80
+
81
+ assign(hash)
82
+ end
83
+
84
+ alias_method :bind_cols, :merge
85
+
86
+ # Mutating joins (#inner_join, #full_join, #left_join, #right_join)
87
+
88
+ # Join another DataFrame or Table, leaving only the matching records.
89
+ # - Same as `#join` with `type: :inner`
90
+ # - A kind of mutating join.
91
+ #
92
+ # @!macro join_before
93
+ # @param other [DataFrame, Arrow::Table]
94
+ # A DataFrame or a Table to be joined with self.
95
+ #
96
+ # @!macro join_after
97
+ # @param suffix [#succ]
98
+ # a suffix to rename keys when key names conflict as a result of join.
99
+ # `suffix` must be responsible to `#succ`.
100
+ # @return [DataFrame]
101
+ # Joined dataframe.
102
+ #
103
+ # @!macro join_key_in_array
104
+ # @param join_keys [String, Symbol, Array<String, Symbol>]
105
+ # A key or keys to match.
106
+ #
107
+ # @!macro join_key_in_hash
108
+ # @param join_key_pairs [Hash]
109
+ # Pairs of a key name or key names to match in left and right.
110
+ # @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
111
+ # Join keys in `self`.
112
+ # @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
113
+ # Join keys in `other`.
114
+ #
115
+ # @overload inner_join(other, suffix: '.1')
116
+ # If `join_key` is not specified, common keys in self and other are used
117
+ # (natural keys). Returns joined dataframe.
118
+ #
119
+ # @macro join_before
120
+ # @macro join_after
121
+ #
122
+ # @overload inner_join(other, join_keys, suffix: '.1')
123
+ #
124
+ # @macro join_before
125
+ # @macro join_key_in_array
126
+ # @macro join_after
127
+ #
128
+ # @overload inner_join(other, join_key_pairs, suffix: '.1')
129
+ #
130
+ # @macro join_before
131
+ # @macro join_key_in_hash
132
+ # @macro join_after
133
+ #
134
+ def inner_join(other, join_keys = nil, suffix: '.1')
135
+ join(other, join_keys, type: :inner, suffix: suffix)
136
+ end
137
+
138
+ # Join another DataFrame or Table, leaving all records.
139
+ # - Same as `#join` with `type: :full_outer`
140
+ # - A kind of mutating join.
141
+ #
142
+ # @overload full_join(other, suffix: '.1')
143
+ # If `join_key` is not specified, common keys in self and other are used
144
+ # (natural keys). Returns joined dataframe.
145
+ #
146
+ # @macro join_before
147
+ # @macro join_after
148
+ #
149
+ # @overload full_join(other, join_keys, suffix: '.1')
150
+ #
151
+ # @macro join_before
152
+ # @macro join_key_in_array
153
+ # @macro join_after
154
+ #
155
+ # @overload full_join(other, join_key_pairs, suffix: '.1')
156
+ #
157
+ # @macro join_before
158
+ # @macro join_key_in_hash
159
+ # @macro join_after
160
+ #
161
+ def full_join(other, join_keys = nil, suffix: '.1')
162
+ join(other, join_keys, type: :full_outer, suffix: suffix)
163
+ end
164
+
165
+ alias_method :outer_join, :full_join
166
+
167
+ # Join matching values to self from other.
168
+ # - Same as `#join` with `type: :left_outer`
169
+ # - A kind of mutating join.
170
+ #
171
+ # @overload left_join(other, suffix: '.1')
172
+ # If `join_key` is not specified, common keys in self and other are used
173
+ # (natural keys). Returns joined dataframe.
174
+ #
175
+ # @macro join_before
176
+ # @macro join_after
177
+ #
178
+ # @overload left_join(other, join_keys, suffix: '.1')
179
+ #
180
+ # @macro join_before
181
+ # @macro join_key_in_array
182
+ # @macro join_after
183
+ #
184
+ # @overload left_join(other, join_key_pairs, suffix: '.1')
185
+ #
186
+ # @macro join_before
187
+ # @macro join_key_in_hash
188
+ # @macro join_after
189
+ #
190
+ def left_join(other, join_keys = nil, suffix: '.1')
191
+ join(other, join_keys, type: :left_outer, suffix: suffix)
192
+ end
193
+
194
+ # Join matching values from self to other.
195
+ # - Same as `#join` with `type: :right_outer`
196
+ # - A kind of mutating join.
197
+ #
198
+ # @overload right_join(other, suffix: '.1')
199
+ # If `join_key` is not specified, common keys in self and other are used
200
+ # (natural keys). Returns joined dataframe.
201
+ #
202
+ # @macro join_before
203
+ # @macro join_after
204
+ #
205
+ # @overload right_join(other, join_keys, suffix: '.1')
206
+ #
207
+ # @macro join_before
208
+ # @macro join_key_in_array
209
+ # @macro join_after
210
+ #
211
+ # @overload right_join(other, join_key_pairs, suffix: '.1')
212
+ #
213
+ # @macro join_before
214
+ # @macro join_key_in_hash
215
+ # @macro join_after
216
+ #
217
+ def right_join(other, join_keys = nil, suffix: '.1')
218
+ join(other, join_keys, type: :right_outer, suffix: suffix)
219
+ end
220
+
221
+ # Filtering joins (#semi_join, #anti_join)
222
+
223
+ # Return records of self that have a match in other.
224
+ # - Same as `#join` with `type: :left_semi`
225
+ # - A kind of filtering join.
226
+ #
227
+ # @overload semi_join(other, suffix: '.1')
228
+ # If `join_key` is not specified, common keys in self and other are used
229
+ # (natural keys). Returns joined dataframe.
230
+ #
231
+ # @macro join_before
232
+ # @macro join_after
233
+ #
234
+ # @overload semi_join(other, join_keys, suffix: '.1')
235
+ #
236
+ # @macro join_before
237
+ # @macro join_key_in_array
238
+ # @macro join_after
239
+ #
240
+ # @overload semi_join(other, join_key_pairs, suffix: '.1')
241
+ #
242
+ # @macro join_before
243
+ # @macro join_key_in_hash
244
+ # @macro join_after
245
+ #
246
+ def semi_join(other, join_keys = nil, suffix: '.1')
247
+ join(other, join_keys, type: :left_semi, suffix: suffix)
248
+ end
249
+
250
+ # Return records of self that do not have a match in other.
251
+ # - Same as `#join` with `type: :left_anti`
252
+ # - A kind of filtering join.
253
+ #
254
+ # @overload anti_join(other, suffix: '.1')
255
+ # If `join_key` is not specified, common keys in self and other are used
256
+ # (natural keys). Returns joined dataframe.
257
+ #
258
+ # @macro join_before
259
+ # @macro join_after
260
+ #
261
+ # @overload anti_join(other, join_keys, suffix: '.1')
262
+ #
263
+ # @macro join_before
264
+ # @macro join_key_in_array
265
+ # @macro join_after
266
+ #
267
+ # @overload anti_join(other, join_key_pairs, suffix: '.1')
268
+ #
269
+ # @macro join_before
270
+ # @macro join_key_in_hash
271
+ # @macro join_after
272
+ #
273
+ def anti_join(other, join_keys = nil, suffix: '.1')
274
+ join(other, join_keys, type: :left_anti, suffix: suffix)
275
+ end
276
+
277
+ # Set operations (#intersect, #union, #difference, #set_operable?)
278
+
279
+ # Check if set operation with self and other is possible.
280
+ #
281
+ # @macro join_before
282
+ #
283
+ # @return [Boolean] true if set operation is possible.
284
+ #
285
+ def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
286
+ keys == other.keys.map(&:to_sym)
287
+ end
288
+
289
+ # Select records appearing in both self and other.
290
+ # - Same as `#join` with `type: :inner` when keys in self are same with other.
291
+ # - A kind of set operations.
292
+ #
293
+ # @macro join_before
294
+ #
295
+ # @return [DataFrame] Joined dataframe.
296
+ #
297
+ def intersect(other)
298
+ unless keys == other.keys.map(&:to_sym)
299
+ raise DataFrameArgumentError, 'keys are not same with self and other'
300
+ end
301
+
302
+ join(other, keys, type: :inner)
303
+ end
304
+
305
+ # Select records appearing in self or other.
306
+ # - Same as `#join` with `type: :full_outer` when keys in self are same with other.
307
+ # - A kind of set operations.
308
+ #
309
+ # @macro join_before
310
+ #
311
+ # @return [DataFrame] Joined dataframe.
312
+ #
313
+ def union(other)
314
+ unless keys == other.keys.map(&:to_sym)
315
+ raise DataFrameArgumentError, 'keys are not same with self and other'
316
+ end
317
+
318
+ join(other, keys, type: :full_outer)
319
+ end
320
+
321
+ # Select records appearing in self but not in other.
322
+ # - Same as `#join` with `type: :left_anti` when keys in self are same with other.
323
+ # - A kind of set operations.
324
+ #
325
+ # @macro join_before
326
+ #
327
+ # @return [DataFrame] Joined dataframe.
328
+ #
329
+ def difference(other)
330
+ unless keys == other.keys.map(&:to_sym)
331
+ raise DataFrameArgumentError, 'keys are not same with self and other'
332
+ end
333
+
334
+ join(other, keys, type: :left_anti)
335
+ end
336
+
337
+ alias_method :setdiff, :difference
338
+
339
+ # Join another DataFrame or Table to self.
340
+ #
341
+ # @overload join(other, type: :inner, suffix: '.1')
342
+ #
343
+ # If `join_key` is not specified, common keys in self and other are used
344
+ # (natural keys). Returns joined dataframe.
345
+ #
346
+ # @!macro join_common_type
347
+ # @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
348
+ # left_outer, :right_outer, :full_outer] type of join.
349
+ #
350
+ # @macro join_before
351
+ # @macro join_common_type
352
+ # @macro join_after
353
+ #
354
+ # @overload join(other, join_keys, type: :inner, suffix: '.1')
355
+ #
356
+ # @macro join_before
357
+ # @macro join_key_in_array
358
+ # @macro join_common_type
359
+ # @macro join_after
360
+ #
361
+ # @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
362
+ #
363
+ # @macro join_before
364
+ # @macro join_key_in_hash
365
+ # @macro join_common_type
366
+ # @macro join_after
367
+ #
368
+ def join(other, join_keys = nil, type: :inner, suffix: '.1')
369
+ case other
370
+ when DataFrame
371
+ other = other.table
372
+ when Arrow::Table
373
+ # Nop
374
+ else
375
+ raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
376
+ end
377
+
378
+ table_keys = table.keys
379
+ other_keys = other.keys
380
+ type = type.to_sym
381
+
382
+ # natural keys (implicit common keys)
383
+ join_keys ||= table_keys.intersection(other_keys)
384
+
385
+ # This is not necessary if additional procedure is contributed to Red Arrow.
386
+ if join_keys.is_a?(Hash)
387
+ left_keys = join_keys[:left]
388
+ right_keys = join_keys[:right]
389
+ else
390
+ left_keys = join_keys
391
+ right_keys = join_keys
392
+ end
393
+ left_keys = Array(left_keys).map(&:to_s)
394
+ right_keys = Array(right_keys).map(&:to_s)
395
+
396
+ case type
397
+ when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
398
+ left_outputs = nil
399
+ right_outputs = nil
400
+ when :inner, :left_outer
401
+ left_outputs = table_keys
402
+ right_outputs = other_keys - right_keys
403
+ when :right_outer
404
+ left_outputs = table_keys - left_keys
405
+ right_outputs = other_keys
406
+ end
407
+
408
+ # Should we rescue errors in Arrow::Table#join for usability ?
409
+ joined_table =
410
+ table.join(other, join_keys,
411
+ type: type,
412
+ left_outputs: left_outputs,
413
+ right_outputs: right_outputs)
414
+
415
+ case type
416
+ when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
417
+ if joined_table.keys.uniq!
418
+ DataFrame.create(rename_table(joined_table, n_keys, suffix))
419
+ else
420
+ DataFrame.create(joined_table)
421
+ end
422
+ when :full_outer
423
+ renamed_table = rename_table(joined_table, n_keys, suffix)
424
+ renamed_keys = renamed_table.keys
425
+ dropper = []
426
+ DataFrame.create(renamed_table).assign do |df|
427
+ left_keys.map do |left_key|
428
+ i_left_key = renamed_keys.index(left_key)
429
+ right_key = renamed_keys[i_left_key + table_keys.size]
430
+ dropper << right_key
431
+ [left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
432
+ end
433
+ end.drop(dropper)
434
+ when :right_outer
435
+ if joined_table.keys.uniq!
436
+ DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
437
+ else
438
+ DataFrame.create(joined_table)
439
+ end.pick do
440
+ [right_keys, keys.map(&:to_s) - right_keys]
441
+ end
442
+ end
443
+ end
444
+
445
+ private
446
+
447
+ # Rename duplicate keys by suffix
448
+ def rename_table(joined_table, n_keys, suffix)
449
+ joined_keys = joined_table.keys
450
+ other_keys = joined_keys[n_keys..]
451
+
452
+ dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
453
+ renamed_right_keys =
454
+ other_keys.map do |key|
455
+ if dup_keys.include?(key)
456
+ new_key = nil
457
+ loop do
458
+ new_key = "#{key}#{suffix}"
459
+ break unless joined_keys.include?(new_key)
460
+
461
+ s = suffix.succ
462
+ raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
463
+
464
+ suffix = s
465
+ end
466
+ new_key
467
+ else
468
+ key
469
+ end
470
+ end
471
+ joined_keys[n_keys..] = renamed_right_keys
472
+
473
+ fields =
474
+ joined_keys.map.with_index do |k, i|
475
+ Arrow::Field.new(k, joined_table[i].data_type)
476
+ end
477
+ Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
478
+ end
479
+
480
+ # Merge two Arrow::Arrays
481
+ def merge_array(array1, array2)
482
+ t = Arrow::Function.find(:is_null).execute([array1])
483
+ Arrow::Function.find(:if_else).execute([t, array2, array1]).value
484
+ end
485
+ end
486
+ end
@@ -93,7 +93,8 @@ module RedAmber
93
93
  levels = tallys.map(&:size)
94
94
  type_groups = @table.columns.map { |column| type_group(column.data_type) }
95
95
  quoted_keys = keys.map(&:inspect)
96
- headers = { idx: '#', key: 'key', type: 'type', levels: 'level', data: 'data_preview' }
96
+ headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
97
+ data: 'data_preview' }
97
98
  header_format = make_header_format(levels, headers, quoted_keys)
98
99
 
99
100
  sio = StringIO.new # output string buffer
@@ -174,6 +175,8 @@ module RedAmber
174
175
  end
175
176
 
176
177
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
178
+ return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
179
+
177
180
  original = self
178
181
  indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
179
182
  df = slice(indices).assign do
@@ -199,7 +202,8 @@ module RedAmber
199
202
  vectors.each_with_object({}) do |v, assigner|
200
203
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
201
204
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
202
- assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
205
+ assigner[v.key] =
206
+ original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
203
207
  end
204
208
  end
205
209
 
@@ -263,8 +267,6 @@ module RedAmber
263
267
  format('%g', element)
264
268
  in Integer
265
269
  format('%d', element)
266
- else
267
- element
268
270
  end
269
271
  end
270
272
  end
@@ -18,7 +18,7 @@ module RedAmber
18
18
  # @return [RedAmber::Vector] Sorted indices in Vector
19
19
  def sort_indices(*sort_keys)
20
20
  indices = @table.sort_indices(sort_keys.flatten)
21
- Vector.new(indices)
21
+ Vector.create(indices)
22
22
  end
23
23
 
24
24
  # @return [RedAmber::DataFrame] Sorted DataFrame
@@ -32,7 +32,7 @@ module RedAmber
32
32
 
33
33
  def new_dataframe_by(index_array)
34
34
  t = Arrow::Function.find(:take).execute([@table, index_array]).value
35
- RedAmber::DataFrame.new(t)
35
+ DataFrame.create(t)
36
36
  end
37
37
  end
38
38
  end
@@ -17,14 +17,17 @@ module RedAmber
17
17
  end
18
18
 
19
19
  # Save DataFrame
20
+ #
21
+ # @return [DataFrame] self.
20
22
  def save(output, options = {})
21
23
  @table.save(output, options)
24
+ self
22
25
  end
23
26
 
24
27
  # Save and reload to cast automatically
25
28
  # Via tsv format file temporally as default
26
29
  #
27
- # experimental feature
30
+ # @note experimental feature
28
31
  def auto_cast(format: :tsv)
29
32
  return self if empty?
30
33
 
@@ -8,11 +8,14 @@ module RedAmber
8
8
  # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
10
  # If it is not specified, keys[0] is used.
11
- # @param new_key [Symbol] key name of transposed index column.
12
- # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
11
+ # @param name [Symbol] key name of transposed index column.
12
+ # If it is not specified, :NAME is used.
13
+ # If it already exists, :NAME1 or :NAME1.succ is used.
13
14
  # @return [DataFrame] trnsposed DataFrame
14
15
  def transpose(key: keys.first, name: :NAME)
15
- raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
+ unless keys.include?(key)
17
+ raise DataFrameArgumentError, "Self does not include: #{key}"
18
+ end
16
19
 
17
20
  # Find unused name
18
21
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
@@ -35,14 +38,24 @@ module RedAmber
35
38
  # @param value [Symbol, String] key of the column which is come **from values**.
36
39
  # @return [DataFrame] long DataFrame.
37
40
  def to_long(*keep_keys, name: :NAME, value: :VALUE)
41
+ warn('[Info] No key to keep is specified.') if keep_keys.empty?
42
+
38
43
  not_included = keep_keys - keys
39
- raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
44
+ unless not_included.empty?
45
+ raise DataFrameArgumentError, "Not have keys #{not_included}"
46
+ end
40
47
 
41
48
  name = name.to_sym
42
- raise DataFrameArgumentError, "Invalid key: #{name}" if keep_keys.include?(name)
49
+ if keep_keys.include?(name)
50
+ raise DataFrameArgumentError,
51
+ "Can't specify the key: #{name} for the column from keys."
52
+ end
43
53
 
44
54
  value = value.to_sym
45
- raise DataFrameArgumentError, "Invalid key: #{value}" if keep_keys.include?(value)
55
+ if keep_keys.include?(value)
56
+ raise DataFrameArgumentError,
57
+ "Can't specify the key: #{value} for the column from values."
58
+ end
46
59
 
47
60
  hash = Hash.new { |h, k| h[k] = [] }
48
61
  l = keys.size - keep_keys.size
@@ -62,15 +75,27 @@ module RedAmber
62
75
 
63
76
  # Reshape long DataFrame to a wide DataFrame.
64
77
  #
65
- # @param name [Symbol, String] key of the column which will be expanded **to key names**.
66
- # @param value [Symbol, String] key of the column which will be expanded **to values**.
78
+ # @param name [Symbol, String]
79
+ # key of the column which will be expanded **to key names**.
80
+ # @param value [Symbol, String]
81
+ # key of the column which will be expanded **to values**.
67
82
  # @return [DataFrame] wide DataFrame.
68
83
  def to_wide(name: :NAME, value: :VALUE)
69
84
  name = name.to_sym
70
- raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
85
+ unless keys.include?(name)
86
+ raise DataFrameArgumentError,
87
+ "You are going to keep the key: #{name}. " \
88
+ 'You may need to specify the column name ' \
89
+ 'that gives the new keys by `:name` option.'
90
+ end
71
91
 
72
92
  value = value.to_sym
73
- raise DataFrameArgumentError, "Invalid key: #{value}" unless keys.include?(value)
93
+ unless keys.include?(value)
94
+ raise DataFrameArgumentError,
95
+ "You are going to keep the key: #{value}. " \
96
+ 'You may need to specify the column name ' \
97
+ 'that gives the new values by `:value` option.'
98
+ end
74
99
 
75
100
  hash = Hash.new { |h, k| h[k] = {} }
76
101
  keep_keys = keys - [name, value]