red_amber 0.2.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -51
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +203 -1
  5. data/Gemfile +2 -1
  6. data/LICENSE +1 -1
  7. data/README.md +61 -45
  8. data/benchmark/basic.yml +11 -4
  9. data/benchmark/combine.yml +3 -4
  10. data/benchmark/dataframe.yml +62 -0
  11. data/benchmark/group.yml +7 -1
  12. data/benchmark/reshape.yml +6 -2
  13. data/benchmark/vector.yml +63 -0
  14. data/doc/DataFrame.md +35 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +295 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +537 -68
  20. data/lib/red_amber/data_frame_combinable.rb +776 -123
  21. data/lib/red_amber/data_frame_displayable.rb +248 -18
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +81 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +216 -21
  25. data/lib/red_amber/data_frame_selectable.rb +781 -120
  26. data/lib/red_amber/data_frame_variable_operation.rb +561 -85
  27. data/lib/red_amber/group.rb +195 -21
  28. data/lib/red_amber/helper.rb +114 -32
  29. data/lib/red_amber/refinements.rb +206 -0
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +435 -58
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +321 -69
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +397 -24
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +15 -1
  39. data/red_amber.gemspec +4 -3
  40. metadata +19 -11
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -294
@@ -1,18 +1,98 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameReshaping
6
- # Transpose a wide DataFrame.
6
+ # Create a transposed DataFrame for the wide (messy) DataFrame.
7
7
  #
8
- # @param key [Symbol] key of the index column
8
+ # @param key [Symbol]
9
+ # key of the index column
9
10
  # to transepose into keys.
10
11
  # If it is not specified, keys[0] is used.
11
- # @param new_key [Symbol] key name of transposed index column.
12
- # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
13
- # @return [DataFrame] trnsposed DataFrame
12
+ # @param name [Symbol]
13
+ # key name of transposed index column.
14
+ # If it is not specified, :NAME is used.
15
+ # If it already exists, :NAME1 or :NAME1.succ is used.
16
+ # @return [DataFrame]
17
+ # trnsposed DataFrame
18
+ #
19
+ # @example Transpose a DataFrame without options
20
+ #
21
+ # import_cars
22
+ #
23
+ # # =>
24
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
25
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
26
+ # <int64> <int64> <int64> <int64> <int64> <int64>
27
+ # 0 2017 28336 52527 25427 68221 49040
28
+ # 1 2018 26473 50982 25984 67554 51961
29
+ # 2 2019 24222 46814 23813 66553 46794
30
+ # 3 2020 22304 35712 20196 57041 36576
31
+ # 4 2021 22535 35905 18211 51722 35215
32
+ #
33
+ # import_cars.transpose
34
+ #
35
+ # # =>
36
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
37
+ # NAME 2017 2018 2019 2020 2021
38
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
39
+ # 0 Audi 28336 26473 24222 22304 22535
40
+ # 1 BMW 52527 50982 46814 35712 35905
41
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
42
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
43
+ # 4 VW 49040 51961 46794 36576 35215
44
+ #
45
+ # The leftmost column is created by original keys and
46
+ # `:NAME` is automatically used for the column name.
47
+ #
48
+ # @example Transpose a DataFrame with `:name` option
49
+ #
50
+ # import_cars.transpose(name: :Manufacturer)
51
+ #
52
+ # # =>
53
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
54
+ # Manufacturer 2017 2018 2019 2020 2021
55
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
56
+ # 0 Audi 28336 26473 24222 22304 22535
57
+ # 1 BMW 52527 50982 46814 35712 35905
58
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
59
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
60
+ # 4 VW 49040 51961 46794 36576 35215
61
+ #
62
+ # `:name` option can specify column name.
63
+ #
64
+ # @example Transpose a DataFrame by the :key in the middle of the DataFrame
65
+ #
66
+ # import_cars_middle = import_cars.pick(1..2, 0, 3..)
67
+ #
68
+ # # =>
69
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
70
+ # Audi BMW Year BMW_MINI Mercedes-Benz VW
71
+ # <int64> <int64> <int64> <int64> <int64> <int64>
72
+ # 0 28336 52527 2017 25427 68221 49040
73
+ # 1 26473 50982 2018 25984 67554 51961
74
+ # 2 24222 46814 2019 23813 66553 46794
75
+ # 3 22304 35712 2020 20196 57041 36576
76
+ # 4 22535 35905 2021 18211 51722 35215
77
+ #
78
+ # import_cars_middle.transpose(key: :Year)
79
+ #
80
+ # # =>
81
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
82
+ # NAME 2017 2018 2019 2020 2021
83
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
84
+ # 0 Audi 28336 26473 24222 22304 22535
85
+ # 1 BMW 52527 50982 46814 35712 35905
86
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
87
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
88
+ # 4 VW 49040 51961 46794 36576 35215
89
+ #
90
+ # @since 0.2.0
91
+ #
14
92
  def transpose(key: keys.first, name: :NAME)
15
- raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
93
+ unless keys.include?(key)
94
+ raise DataFrameArgumentError, "Self does not include: #{key}"
95
+ end
16
96
 
17
97
  # Find unused name
18
98
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
@@ -28,21 +108,86 @@ module RedAmber
28
108
  DataFrame.new(hash)
29
109
  end
30
110
 
31
- # Reshape wide DataFrame to a longer DataFrame.
111
+ # Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
112
+ #
113
+ # @param keep_keys [<Symbol>]
114
+ # keys to keep.
115
+ # @param name [Symbol, String]
116
+ # a new key name of the column which is come from key names.
117
+ # @param value [Symbol, String]
118
+ # a new key name of the column which is come from values.
119
+ # @return [DataFrame]
120
+ # long DataFrame.
121
+ #
122
+ # @example `to_long` without options
123
+ #
124
+ # import_cars
125
+ #
126
+ # # =>
127
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
128
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
129
+ # <int64> <int64> <int64> <int64> <int64> <int64>
130
+ # 0 2017 28336 52527 25427 68221 49040
131
+ # 1 2018 26473 50982 25984 67554 51961
132
+ # 2 2019 24222 46814 23813 66553 46794
133
+ # 3 2020 22304 35712 20196 57041 36576
134
+ # 4 2021 22535 35905 18211 51722 35215
135
+ #
136
+ # import_cars.to_long(:Year)
137
+ #
138
+ # # =>
139
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
140
+ # Year NAME VALUE
141
+ # <uint16> <string> <uint32>
142
+ # 0 2017 Audi 28336
143
+ # 1 2017 BMW 52527
144
+ # 2 2017 BMW_MINI 25427
145
+ # 3 2017 Mercedes-Benz 68221
146
+ # 4 2017 VW 49040
147
+ # : : : :
148
+ # 22 2021 BMW_MINI 18211
149
+ # 23 2021 Mercedes-Benz 51722
150
+ # 24 2021 VW 35215
151
+ #
152
+ # @example `to_long` with options `:name` and `:value`
153
+ #
154
+ # import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
155
+ #
156
+ # # =>
157
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
158
+ # Year Manufacturer Num_of_imported
159
+ # <uint16> <string> <uint32>
160
+ # 0 2017 Audi 28336
161
+ # 1 2017 BMW 52527
162
+ # 2 2017 BMW_MINI 25427
163
+ # 3 2017 Mercedes-Benz 68221
164
+ # 4 2017 VW 49040
165
+ # : : : :
166
+ # 22 2021 BMW_MINI 18211
167
+ # 23 2021 Mercedes-Benz 51722
168
+ # 24 2021 VW 35215
169
+ #
170
+ # @since 0.2.0
32
171
  #
33
- # @param keep_keys [Array] keys to keep.
34
- # @param name [Symbol, String] key of the column which is come **from values**.
35
- # @param value [Symbol, String] key of the column which is come **from values**.
36
- # @return [DataFrame] long DataFrame.
37
172
  def to_long(*keep_keys, name: :NAME, value: :VALUE)
173
+ warn('[Info] No key to keep is specified.') if keep_keys.empty?
174
+
38
175
  not_included = keep_keys - keys
39
- raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
176
+ unless not_included.empty?
177
+ raise DataFrameArgumentError, "Not have keys #{not_included}"
178
+ end
40
179
 
41
180
  name = name.to_sym
42
- raise DataFrameArgumentError, "Invalid key: #{name}" if keep_keys.include?(name)
181
+ if keep_keys.include?(name)
182
+ raise DataFrameArgumentError,
183
+ "Can't specify the key: #{name} for the column from keys."
184
+ end
43
185
 
44
186
  value = value.to_sym
45
- raise DataFrameArgumentError, "Invalid key: #{value}" if keep_keys.include?(value)
187
+ if keep_keys.include?(value)
188
+ raise DataFrameArgumentError,
189
+ "Can't specify the key: #{value} for the column from values."
190
+ end
46
191
 
47
192
  hash = Hash.new { |h, k| h[k] = [] }
48
193
  l = keys.size - keep_keys.size
@@ -60,17 +205,67 @@ module RedAmber
60
205
  DataFrame.new(hash)
61
206
  end
62
207
 
63
- # Reshape long DataFrame to a wide DataFrame.
208
+ # Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
209
+ #
210
+ # @param name [Symbol, String]
211
+ # a new key name of the columnwhich will be expanded to key names.
212
+ # @param value [Symbol, String]
213
+ # a new key name of the column which will be expanded to values.
214
+ # @return [DataFrame]
215
+ # wide DataFrame.
216
+ #
217
+ # @example `to_wide` without options
218
+ #
219
+ # import_cars_long = import_cars.to_long(:Year)
220
+ #
221
+ # # =>
222
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
223
+ # Year NAME VALUE
224
+ # <uint16> <string> <uint32>
225
+ # 0 2017 Audi 28336
226
+ # 1 2017 BMW 52527
227
+ # 2 2017 BMW_MINI 25427
228
+ # 3 2017 Mercedes-Benz 68221
229
+ # 4 2017 VW 49040
230
+ # : : : :
231
+ # 22 2021 BMW_MINI 18211
232
+ # 23 2021 Mercedes-Benz 51722
233
+ # 24 2021 VW 35215
234
+ #
235
+ # import_cars_long.to_wide
236
+ # # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
237
+ #
238
+ # # =>
239
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
240
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
241
+ # <int64> <int64> <int64> <int64> <int64> <int64>
242
+ # 0 2017 28336 52527 25427 68221 49040
243
+ # 1 2018 26473 50982 25984 67554 51961
244
+ # 2 2019 24222 46814 23813 66553 46794
245
+ # 3 2020 22304 35712 20196 57041 36576
246
+ # 4 2021 22535 35905 18211 51722 35215
247
+ #
248
+ # Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
249
+ # automatically processed and do not need to specify.
250
+ #
251
+ # @since 0.2.0
64
252
  #
65
- # @param name [Symbol, String] key of the column which will be expanded **to key names**.
66
- # @param value [Symbol, String] key of the column which will be expanded **to values**.
67
- # @return [DataFrame] wide DataFrame.
68
253
  def to_wide(name: :NAME, value: :VALUE)
69
254
  name = name.to_sym
70
- raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
255
+ unless keys.include?(name)
256
+ raise DataFrameArgumentError,
257
+ "You are going to keep the key: #{name}. " \
258
+ 'You may need to specify the column name ' \
259
+ 'that gives the new keys by `:name` option.'
260
+ end
71
261
 
72
262
  value = value.to_sym
73
- raise DataFrameArgumentError, "Invalid key: #{value}" unless keys.include?(value)
263
+ unless keys.include?(value)
264
+ raise DataFrameArgumentError,
265
+ "You are going to keep the key: #{value}. " \
266
+ 'You may need to specify the column name ' \
267
+ 'that gives the new values by `:value` option.'
268
+ end
74
269
 
75
270
  hash = Hash.new { |h, k| h[k] = {} }
76
271
  keep_keys = keys - [name, value]