red_amber 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +114 -39
  3. data/CHANGELOG.md +203 -31
  4. data/Gemfile +5 -2
  5. data/README.md +62 -29
  6. data/benchmark/basic.yml +86 -0
  7. data/benchmark/combine.yml +62 -0
  8. data/benchmark/dataframe.yml +62 -0
  9. data/benchmark/drop_nil.yml +15 -3
  10. data/benchmark/group.yml +39 -0
  11. data/benchmark/reshape.yml +31 -0
  12. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  13. data/benchmark/rover/flights.yml +23 -0
  14. data/benchmark/rover/penguins.yml +23 -0
  15. data/benchmark/rover/planes.yml +23 -0
  16. data/benchmark/rover/weather.yml +23 -0
  17. data/benchmark/vector.yml +60 -0
  18. data/doc/DataFrame.md +335 -53
  19. data/doc/Vector.md +91 -0
  20. data/doc/image/dataframe/join.png +0 -0
  21. data/doc/image/dataframe/set_and_bind.png +0 -0
  22. data/doc/image/dataframe_model.png +0 -0
  23. data/lib/red_amber/data_frame.rb +167 -51
  24. data/lib/red_amber/data_frame_combinable.rb +486 -0
  25. data/lib/red_amber/data_frame_displayable.rb +6 -4
  26. data/lib/red_amber/data_frame_indexable.rb +2 -2
  27. data/lib/red_amber/data_frame_loadsave.rb +4 -1
  28. data/lib/red_amber/data_frame_reshaping.rb +35 -10
  29. data/lib/red_amber/data_frame_selectable.rb +221 -116
  30. data/lib/red_amber/data_frame_variable_operation.rb +146 -82
  31. data/lib/red_amber/group.rb +108 -18
  32. data/lib/red_amber/helper.rb +53 -43
  33. data/lib/red_amber/refinements.rb +199 -0
  34. data/lib/red_amber/vector.rb +56 -46
  35. data/lib/red_amber/vector_functions.rb +23 -83
  36. data/lib/red_amber/vector_selectable.rb +116 -69
  37. data/lib/red_amber/vector_updatable.rb +189 -65
  38. data/lib/red_amber/version.rb +1 -1
  39. data/lib/red_amber.rb +3 -0
  40. data/red_amber.gemspec +4 -3
  41. metadata +24 -10
@@ -0,0 +1,486 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-in for the class DataFrame
5
+ module DataFrameCombinable
6
+ # Refinements for Arrow::Table
7
+ using RefineArrowTable
8
+
9
+ # Concatenate other dataframe onto the bottom.
10
+ #
11
+ # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
12
+ # DataFrame/Table to concatenate onto the bottom of self.
13
+ # @return [DataFrame]
14
+ # Concatenated dataframe.
15
+ def concatenate(*other)
16
+ case other
17
+ in [] | [nil] | [[]]
18
+ return self
19
+ in [Array => array]
20
+ # Nop
21
+ else
22
+ array = other
23
+ end
24
+
25
+ table_array = array.map do |e|
26
+ case e
27
+ when Arrow::Table
28
+ e
29
+ when DataFrame
30
+ e.table
31
+ else
32
+ raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
33
+ end
34
+ end
35
+
36
+ DataFrame.create(table.concatenate(table_array))
37
+ end
38
+
39
+ alias_method :concat, :concatenate
40
+ alias_method :bind_rows, :concatenate
41
+
42
+ # Merge other DataFrame or Table from other.
43
+ # - Self and other must have same size.
44
+ # - Self and other do not share the same key.
45
+ # - If they share any keys, raise Error.
46
+ # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
47
+ # DataFrame/Table to concatenate.
48
+ # @return [DataFrame]
49
+ # Merged dataframe.
50
+ def merge(*other)
51
+ case other
52
+ in [] | [nil] | [[]]
53
+ return self
54
+ in [Array => array]
55
+ # Nop
56
+ else
57
+ array = other
58
+ end
59
+
60
+ hash = array.each_with_object({}) do |e, h|
61
+ df =
62
+ case e
63
+ when Arrow::Table
64
+ DataFrame.create(e)
65
+ when DataFrame
66
+ e
67
+ else
68
+ raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
69
+ end
70
+
71
+ if size != df.size
72
+ raise DataFrameArgumentError, "#{e} do not have same size as self"
73
+ end
74
+
75
+ k = keys.intersection(df.keys).any?
76
+ raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
77
+
78
+ h.merge!(df.to_h)
79
+ end
80
+
81
+ assign(hash)
82
+ end
83
+
84
+ alias_method :bind_cols, :merge
85
+
86
+ # Mutating joins (#inner_join, #full_join, #left_join, #right_join)
87
+
88
+ # Join another DataFrame or Table, leaving only the matching records.
89
+ # - Same as `#join` with `type: :inner`
90
+ # - A kind of mutating join.
91
+ #
92
+ # @!macro join_before
93
+ # @param other [DataFrame, Arrow::Table]
94
+ # A DataFrame or a Table to be joined with self.
95
+ #
96
+ # @!macro join_after
97
+ # @param suffix [#succ]
98
+ # a suffix to rename keys when key names conflict as a result of join.
99
+ # `suffix` must be responsible to `#succ`.
100
+ # @return [DataFrame]
101
+ # Joined dataframe.
102
+ #
103
+ # @!macro join_key_in_array
104
+ # @param join_keys [String, Symbol, Array<String, Symbol>]
105
+ # A key or keys to match.
106
+ #
107
+ # @!macro join_key_in_hash
108
+ # @param join_key_pairs [Hash]
109
+ # Pairs of a key name or key names to match in left and right.
110
+ # @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
111
+ # Join keys in `self`.
112
+ # @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
113
+ # Join keys in `other`.
114
+ #
115
+ # @overload inner_join(other, suffix: '.1')
116
+ # If `join_key` is not specified, common keys in self and other are used
117
+ # (natural keys). Returns joined dataframe.
118
+ #
119
+ # @macro join_before
120
+ # @macro join_after
121
+ #
122
+ # @overload inner_join(other, join_keys, suffix: '.1')
123
+ #
124
+ # @macro join_before
125
+ # @macro join_key_in_array
126
+ # @macro join_after
127
+ #
128
+ # @overload inner_join(other, join_key_pairs, suffix: '.1')
129
+ #
130
+ # @macro join_before
131
+ # @macro join_key_in_hash
132
+ # @macro join_after
133
+ #
134
+ def inner_join(other, join_keys = nil, suffix: '.1')
135
+ join(other, join_keys, type: :inner, suffix: suffix)
136
+ end
137
+
138
+ # Join another DataFrame or Table, leaving all records.
139
+ # - Same as `#join` with `type: :full_outer`
140
+ # - A kind of mutating join.
141
+ #
142
+ # @overload full_join(other, suffix: '.1')
143
+ # If `join_key` is not specified, common keys in self and other are used
144
+ # (natural keys). Returns joined dataframe.
145
+ #
146
+ # @macro join_before
147
+ # @macro join_after
148
+ #
149
+ # @overload full_join(other, join_keys, suffix: '.1')
150
+ #
151
+ # @macro join_before
152
+ # @macro join_key_in_array
153
+ # @macro join_after
154
+ #
155
+ # @overload full_join(other, join_key_pairs, suffix: '.1')
156
+ #
157
+ # @macro join_before
158
+ # @macro join_key_in_hash
159
+ # @macro join_after
160
+ #
161
+ def full_join(other, join_keys = nil, suffix: '.1')
162
+ join(other, join_keys, type: :full_outer, suffix: suffix)
163
+ end
164
+
165
+ alias_method :outer_join, :full_join
166
+
167
+ # Join matching values to self from other.
168
+ # - Same as `#join` with `type: :left_outer`
169
+ # - A kind of mutating join.
170
+ #
171
+ # @overload left_join(other, suffix: '.1')
172
+ # If `join_key` is not specified, common keys in self and other are used
173
+ # (natural keys). Returns joined dataframe.
174
+ #
175
+ # @macro join_before
176
+ # @macro join_after
177
+ #
178
+ # @overload left_join(other, join_keys, suffix: '.1')
179
+ #
180
+ # @macro join_before
181
+ # @macro join_key_in_array
182
+ # @macro join_after
183
+ #
184
+ # @overload left_join(other, join_key_pairs, suffix: '.1')
185
+ #
186
+ # @macro join_before
187
+ # @macro join_key_in_hash
188
+ # @macro join_after
189
+ #
190
+ def left_join(other, join_keys = nil, suffix: '.1')
191
+ join(other, join_keys, type: :left_outer, suffix: suffix)
192
+ end
193
+
194
+ # Join matching values from self to other.
195
+ # - Same as `#join` with `type: :right_outer`
196
+ # - A kind of mutating join.
197
+ #
198
+ # @overload right_join(other, suffix: '.1')
199
+ # If `join_key` is not specified, common keys in self and other are used
200
+ # (natural keys). Returns joined dataframe.
201
+ #
202
+ # @macro join_before
203
+ # @macro join_after
204
+ #
205
+ # @overload right_join(other, join_keys, suffix: '.1')
206
+ #
207
+ # @macro join_before
208
+ # @macro join_key_in_array
209
+ # @macro join_after
210
+ #
211
+ # @overload right_join(other, join_key_pairs, suffix: '.1')
212
+ #
213
+ # @macro join_before
214
+ # @macro join_key_in_hash
215
+ # @macro join_after
216
+ #
217
+ def right_join(other, join_keys = nil, suffix: '.1')
218
+ join(other, join_keys, type: :right_outer, suffix: suffix)
219
+ end
220
+
221
+ # Filtering joins (#semi_join, #anti_join)
222
+
223
+ # Return records of self that have a match in other.
224
+ # - Same as `#join` with `type: :left_semi`
225
+ # - A kind of filtering join.
226
+ #
227
+ # @overload semi_join(other, suffix: '.1')
228
+ # If `join_key` is not specified, common keys in self and other are used
229
+ # (natural keys). Returns joined dataframe.
230
+ #
231
+ # @macro join_before
232
+ # @macro join_after
233
+ #
234
+ # @overload semi_join(other, join_keys, suffix: '.1')
235
+ #
236
+ # @macro join_before
237
+ # @macro join_key_in_array
238
+ # @macro join_after
239
+ #
240
+ # @overload semi_join(other, join_key_pairs, suffix: '.1')
241
+ #
242
+ # @macro join_before
243
+ # @macro join_key_in_hash
244
+ # @macro join_after
245
+ #
246
+ def semi_join(other, join_keys = nil, suffix: '.1')
247
+ join(other, join_keys, type: :left_semi, suffix: suffix)
248
+ end
249
+
250
+ # Return records of self that do not have a match in other.
251
+ # - Same as `#join` with `type: :left_anti`
252
+ # - A kind of filtering join.
253
+ #
254
+ # @overload anti_join(other, suffix: '.1')
255
+ # If `join_key` is not specified, common keys in self and other are used
256
+ # (natural keys). Returns joined dataframe.
257
+ #
258
+ # @macro join_before
259
+ # @macro join_after
260
+ #
261
+ # @overload anti_join(other, join_keys, suffix: '.1')
262
+ #
263
+ # @macro join_before
264
+ # @macro join_key_in_array
265
+ # @macro join_after
266
+ #
267
+ # @overload anti_join(other, join_key_pairs, suffix: '.1')
268
+ #
269
+ # @macro join_before
270
+ # @macro join_key_in_hash
271
+ # @macro join_after
272
+ #
273
+ def anti_join(other, join_keys = nil, suffix: '.1')
274
+ join(other, join_keys, type: :left_anti, suffix: suffix)
275
+ end
276
+
277
+ # Set operations (#intersect, #union, #difference, #set_operable?)
278
+
279
+ # Check if set operation with self and other is possible.
280
+ #
281
+ # @macro join_before
282
+ #
283
+ # @return [Boolean] true if set operation is possible.
284
+ #
285
+ def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
286
+ keys == other.keys.map(&:to_sym)
287
+ end
288
+
289
+ # Select records appearing in both self and other.
290
+ # - Same as `#join` with `type: :inner` when keys in self are same with other.
291
+ # - A kind of set operations.
292
+ #
293
+ # @macro join_before
294
+ #
295
+ # @return [DataFrame] Joined dataframe.
296
+ #
297
+ def intersect(other)
298
+ unless keys == other.keys.map(&:to_sym)
299
+ raise DataFrameArgumentError, 'keys are not same with self and other'
300
+ end
301
+
302
+ join(other, keys, type: :inner)
303
+ end
304
+
305
+ # Select records appearing in self or other.
306
+ # - Same as `#join` with `type: :full_outer` when keys in self are same with other.
307
+ # - A kind of set operations.
308
+ #
309
+ # @macro join_before
310
+ #
311
+ # @return [DataFrame] Joined dataframe.
312
+ #
313
+ def union(other)
314
+ unless keys == other.keys.map(&:to_sym)
315
+ raise DataFrameArgumentError, 'keys are not same with self and other'
316
+ end
317
+
318
+ join(other, keys, type: :full_outer)
319
+ end
320
+
321
+ # Select records appearing in self but not in other.
322
+ # - Same as `#join` with `type: :left_anti` when keys in self are same with other.
323
+ # - A kind of set operations.
324
+ #
325
+ # @macro join_before
326
+ #
327
+ # @return [DataFrame] Joined dataframe.
328
+ #
329
+ def difference(other)
330
+ unless keys == other.keys.map(&:to_sym)
331
+ raise DataFrameArgumentError, 'keys are not same with self and other'
332
+ end
333
+
334
+ join(other, keys, type: :left_anti)
335
+ end
336
+
337
+ alias_method :setdiff, :difference
338
+
339
+ # Join another DataFrame or Table to self.
340
+ #
341
+ # @overload join(other, type: :inner, suffix: '.1')
342
+ #
343
+ # If `join_key` is not specified, common keys in self and other are used
344
+ # (natural keys). Returns joined dataframe.
345
+ #
346
+ # @!macro join_common_type
347
+ # @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
348
+ # left_outer, :right_outer, :full_outer] type of join.
349
+ #
350
+ # @macro join_before
351
+ # @macro join_common_type
352
+ # @macro join_after
353
+ #
354
+ # @overload join(other, join_keys, type: :inner, suffix: '.1')
355
+ #
356
+ # @macro join_before
357
+ # @macro join_key_in_array
358
+ # @macro join_common_type
359
+ # @macro join_after
360
+ #
361
+ # @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
362
+ #
363
+ # @macro join_before
364
+ # @macro join_key_in_hash
365
+ # @macro join_common_type
366
+ # @macro join_after
367
+ #
368
+ def join(other, join_keys = nil, type: :inner, suffix: '.1')
369
+ case other
370
+ when DataFrame
371
+ other = other.table
372
+ when Arrow::Table
373
+ # Nop
374
+ else
375
+ raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
376
+ end
377
+
378
+ table_keys = table.keys
379
+ other_keys = other.keys
380
+ type = type.to_sym
381
+
382
+ # natural keys (implicit common keys)
383
+ join_keys ||= table_keys.intersection(other_keys)
384
+
385
+ # This is not necessary if additional procedure is contributed to Red Arrow.
386
+ if join_keys.is_a?(Hash)
387
+ left_keys = join_keys[:left]
388
+ right_keys = join_keys[:right]
389
+ else
390
+ left_keys = join_keys
391
+ right_keys = join_keys
392
+ end
393
+ left_keys = Array(left_keys).map(&:to_s)
394
+ right_keys = Array(right_keys).map(&:to_s)
395
+
396
+ case type
397
+ when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
398
+ left_outputs = nil
399
+ right_outputs = nil
400
+ when :inner, :left_outer
401
+ left_outputs = table_keys
402
+ right_outputs = other_keys - right_keys
403
+ when :right_outer
404
+ left_outputs = table_keys - left_keys
405
+ right_outputs = other_keys
406
+ end
407
+
408
+ # Should we rescue errors in Arrow::Table#join for usability ?
409
+ joined_table =
410
+ table.join(other, join_keys,
411
+ type: type,
412
+ left_outputs: left_outputs,
413
+ right_outputs: right_outputs)
414
+
415
+ case type
416
+ when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
417
+ if joined_table.keys.uniq!
418
+ DataFrame.create(rename_table(joined_table, n_keys, suffix))
419
+ else
420
+ DataFrame.create(joined_table)
421
+ end
422
+ when :full_outer
423
+ renamed_table = rename_table(joined_table, n_keys, suffix)
424
+ renamed_keys = renamed_table.keys
425
+ dropper = []
426
+ DataFrame.create(renamed_table).assign do |df|
427
+ left_keys.map do |left_key|
428
+ i_left_key = renamed_keys.index(left_key)
429
+ right_key = renamed_keys[i_left_key + table_keys.size]
430
+ dropper << right_key
431
+ [left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
432
+ end
433
+ end.drop(dropper)
434
+ when :right_outer
435
+ if joined_table.keys.uniq!
436
+ DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
437
+ else
438
+ DataFrame.create(joined_table)
439
+ end.pick do
440
+ [right_keys, keys.map(&:to_s) - right_keys]
441
+ end
442
+ end
443
+ end
444
+
445
+ private
446
+
447
+ # Rename duplicate keys by suffix
448
+ def rename_table(joined_table, n_keys, suffix)
449
+ joined_keys = joined_table.keys
450
+ other_keys = joined_keys[n_keys..]
451
+
452
+ dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
453
+ renamed_right_keys =
454
+ other_keys.map do |key|
455
+ if dup_keys.include?(key)
456
+ new_key = nil
457
+ loop do
458
+ new_key = "#{key}#{suffix}"
459
+ break unless joined_keys.include?(new_key)
460
+
461
+ s = suffix.succ
462
+ raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
463
+
464
+ suffix = s
465
+ end
466
+ new_key
467
+ else
468
+ key
469
+ end
470
+ end
471
+ joined_keys[n_keys..] = renamed_right_keys
472
+
473
+ fields =
474
+ joined_keys.map.with_index do |k, i|
475
+ Arrow::Field.new(k, joined_table[i].data_type)
476
+ end
477
+ Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
478
+ end
479
+
480
+ # Merge two Arrow::Arrays
481
+ def merge_array(array1, array2)
482
+ t = Arrow::Function.find(:is_null).execute([array1])
483
+ Arrow::Function.find(:if_else).execute([t, array2, array1]).value
484
+ end
485
+ end
486
+ end
@@ -93,7 +93,8 @@ module RedAmber
93
93
  levels = tallys.map(&:size)
94
94
  type_groups = @table.columns.map { |column| type_group(column.data_type) }
95
95
  quoted_keys = keys.map(&:inspect)
96
- headers = { idx: '#', key: 'key', type: 'type', levels: 'level', data: 'data_preview' }
96
+ headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
97
+ data: 'data_preview' }
97
98
  header_format = make_header_format(levels, headers, quoted_keys)
98
99
 
99
100
  sio = StringIO.new # output string buffer
@@ -174,6 +175,8 @@ module RedAmber
174
175
  end
175
176
 
176
177
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
178
+ return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
179
+
177
180
  original = self
178
181
  indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
179
182
  df = slice(indices).assign do
@@ -199,7 +202,8 @@ module RedAmber
199
202
  vectors.each_with_object({}) do |v, assigner|
200
203
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
201
204
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
202
- assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
205
+ assigner[v.key] =
206
+ original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
203
207
  end
204
208
  end
205
209
 
@@ -263,8 +267,6 @@ module RedAmber
263
267
  format('%g', element)
264
268
  in Integer
265
269
  format('%d', element)
266
- else
267
- element
268
270
  end
269
271
  end
270
272
  end
@@ -18,7 +18,7 @@ module RedAmber
18
18
  # @return [RedAmber::Vector] Sorted indices in Vector
19
19
  def sort_indices(*sort_keys)
20
20
  indices = @table.sort_indices(sort_keys.flatten)
21
- Vector.new(indices)
21
+ Vector.create(indices)
22
22
  end
23
23
 
24
24
  # @return [RedAmber::DataFrame] Sorted DataFrame
@@ -32,7 +32,7 @@ module RedAmber
32
32
 
33
33
  def new_dataframe_by(index_array)
34
34
  t = Arrow::Function.find(:take).execute([@table, index_array]).value
35
- RedAmber::DataFrame.new(t)
35
+ DataFrame.create(t)
36
36
  end
37
37
  end
38
38
  end
@@ -17,14 +17,17 @@ module RedAmber
17
17
  end
18
18
 
19
19
  # Save DataFrame
20
+ #
21
+ # @return [DataFrame] self.
20
22
  def save(output, options = {})
21
23
  @table.save(output, options)
24
+ self
22
25
  end
23
26
 
24
27
  # Save and reload to cast automatically
25
28
  # Via tsv format file temporally as default
26
29
  #
27
- # experimental feature
30
+ # @note experimental feature
28
31
  def auto_cast(format: :tsv)
29
32
  return self if empty?
30
33
 
@@ -8,11 +8,14 @@ module RedAmber
8
8
  # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
10
  # If it is not specified, keys[0] is used.
11
- # @param new_key [Symbol] key name of transposed index column.
12
- # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
11
+ # @param name [Symbol] key name of transposed index column.
12
+ # If it is not specified, :NAME is used.
13
+ # If it already exists, :NAME1 or :NAME1.succ is used.
13
14
  # @return [DataFrame] trnsposed DataFrame
14
15
  def transpose(key: keys.first, name: :NAME)
15
- raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
+ unless keys.include?(key)
17
+ raise DataFrameArgumentError, "Self does not include: #{key}"
18
+ end
16
19
 
17
20
  # Find unused name
18
21
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
@@ -35,14 +38,24 @@ module RedAmber
35
38
  # @param value [Symbol, String] key of the column which is come **from values**.
36
39
  # @return [DataFrame] long DataFrame.
37
40
  def to_long(*keep_keys, name: :NAME, value: :VALUE)
41
+ warn('[Info] No key to keep is specified.') if keep_keys.empty?
42
+
38
43
  not_included = keep_keys - keys
39
- raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
44
+ unless not_included.empty?
45
+ raise DataFrameArgumentError, "Not have keys #{not_included}"
46
+ end
40
47
 
41
48
  name = name.to_sym
42
- raise DataFrameArgumentError, "Invalid key: #{name}" if keep_keys.include?(name)
49
+ if keep_keys.include?(name)
50
+ raise DataFrameArgumentError,
51
+ "Can't specify the key: #{name} for the column from keys."
52
+ end
43
53
 
44
54
  value = value.to_sym
45
- raise DataFrameArgumentError, "Invalid key: #{value}" if keep_keys.include?(value)
55
+ if keep_keys.include?(value)
56
+ raise DataFrameArgumentError,
57
+ "Can't specify the key: #{value} for the column from values."
58
+ end
46
59
 
47
60
  hash = Hash.new { |h, k| h[k] = [] }
48
61
  l = keys.size - keep_keys.size
@@ -62,15 +75,27 @@ module RedAmber
62
75
 
63
76
  # Reshape long DataFrame to a wide DataFrame.
64
77
  #
65
- # @param name [Symbol, String] key of the column which will be expanded **to key names**.
66
- # @param value [Symbol, String] key of the column which will be expanded **to values**.
78
+ # @param name [Symbol, String]
79
+ # key of the column which will be expanded **to key names**.
80
+ # @param value [Symbol, String]
81
+ # key of the column which will be expanded **to values**.
67
82
  # @return [DataFrame] wide DataFrame.
68
83
  def to_wide(name: :NAME, value: :VALUE)
69
84
  name = name.to_sym
70
- raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
85
+ unless keys.include?(name)
86
+ raise DataFrameArgumentError,
87
+ "You are going to keep the key: #{name}. " \
88
+ 'You may need to specify the column name ' \
89
+ 'that gives the new keys by `:name` option.'
90
+ end
71
91
 
72
92
  value = value.to_sym
73
- raise DataFrameArgumentError, "Invalid key: #{value}" unless keys.include?(value)
93
+ unless keys.include?(value)
94
+ raise DataFrameArgumentError,
95
+ "You are going to keep the key: #{value}. " \
96
+ 'You may need to specify the column name ' \
97
+ 'that gives the new values by `:value` option.'
98
+ end
74
99
 
75
100
  hash = Hash.new { |h, k| h[k] = {} }
76
101
  keep_keys = keys - [name, value]