red_amber 0.2.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
@@ -1,18 +1,98 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameReshaping
|
6
|
-
#
|
6
|
+
# Create a transposed DataFrame for the wide (messy) DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol]
|
8
|
+
# @param key [Symbol]
|
9
|
+
# key of the index column
|
9
10
|
# to transepose into keys.
|
10
11
|
# If it is not specified, keys[0] is used.
|
11
|
-
# @param
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# @param name [Symbol]
|
13
|
+
# key name of transposed index column.
|
14
|
+
# If it is not specified, :NAME is used.
|
15
|
+
# If it already exists, :NAME1 or :NAME1.succ is used.
|
16
|
+
# @return [DataFrame]
|
17
|
+
# trnsposed DataFrame
|
18
|
+
#
|
19
|
+
# @example Transpose a DataFrame without options
|
20
|
+
#
|
21
|
+
# import_cars
|
22
|
+
#
|
23
|
+
# # =>
|
24
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
25
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
26
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
27
|
+
# 0 2017 28336 52527 25427 68221 49040
|
28
|
+
# 1 2018 26473 50982 25984 67554 51961
|
29
|
+
# 2 2019 24222 46814 23813 66553 46794
|
30
|
+
# 3 2020 22304 35712 20196 57041 36576
|
31
|
+
# 4 2021 22535 35905 18211 51722 35215
|
32
|
+
#
|
33
|
+
# import_cars.transpose
|
34
|
+
#
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
37
|
+
# NAME 2017 2018 2019 2020 2021
|
38
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
39
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
40
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
41
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
42
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
43
|
+
# 4 VW 49040 51961 46794 36576 35215
|
44
|
+
#
|
45
|
+
# The leftmost column is created by original keys and
|
46
|
+
# `:NAME` is automatically used for the column name.
|
47
|
+
#
|
48
|
+
# @example Transpose a DataFrame with `:name` option
|
49
|
+
#
|
50
|
+
# import_cars.transpose(name: :Manufacturer)
|
51
|
+
#
|
52
|
+
# # =>
|
53
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
54
|
+
# Manufacturer 2017 2018 2019 2020 2021
|
55
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
56
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
57
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
58
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
59
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
60
|
+
# 4 VW 49040 51961 46794 36576 35215
|
61
|
+
#
|
62
|
+
# `:name` option can specify column name.
|
63
|
+
#
|
64
|
+
# @example Transpose a DataFrame by the :key in the middle of the DataFrame
|
65
|
+
#
|
66
|
+
# import_cars_middle = import_cars.pick(1..2, 0, 3..)
|
67
|
+
#
|
68
|
+
# # =>
|
69
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
|
70
|
+
# Audi BMW Year BMW_MINI Mercedes-Benz VW
|
71
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
72
|
+
# 0 28336 52527 2017 25427 68221 49040
|
73
|
+
# 1 26473 50982 2018 25984 67554 51961
|
74
|
+
# 2 24222 46814 2019 23813 66553 46794
|
75
|
+
# 3 22304 35712 2020 20196 57041 36576
|
76
|
+
# 4 22535 35905 2021 18211 51722 35215
|
77
|
+
#
|
78
|
+
# import_cars_middle.transpose(key: :Year)
|
79
|
+
#
|
80
|
+
# # =>
|
81
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
82
|
+
# NAME 2017 2018 2019 2020 2021
|
83
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
84
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
85
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
86
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
87
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
88
|
+
# 4 VW 49040 51961 46794 36576 35215
|
89
|
+
#
|
90
|
+
# @since 0.2.0
|
91
|
+
#
|
14
92
|
def transpose(key: keys.first, name: :NAME)
|
15
|
-
|
93
|
+
unless keys.include?(key)
|
94
|
+
raise DataFrameArgumentError, "Self does not include: #{key}"
|
95
|
+
end
|
16
96
|
|
17
97
|
# Find unused name
|
18
98
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
@@ -28,21 +108,86 @@ module RedAmber
|
|
28
108
|
DataFrame.new(hash)
|
29
109
|
end
|
30
110
|
|
31
|
-
#
|
111
|
+
# Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
|
112
|
+
#
|
113
|
+
# @param keep_keys [<Symbol>]
|
114
|
+
# keys to keep.
|
115
|
+
# @param name [Symbol, String]
|
116
|
+
# a new key name of the column which is come from key names.
|
117
|
+
# @param value [Symbol, String]
|
118
|
+
# a new key name of the column which is come from values.
|
119
|
+
# @return [DataFrame]
|
120
|
+
# long DataFrame.
|
121
|
+
#
|
122
|
+
# @example `to_long` without options
|
123
|
+
#
|
124
|
+
# import_cars
|
125
|
+
#
|
126
|
+
# # =>
|
127
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
128
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
129
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
130
|
+
# 0 2017 28336 52527 25427 68221 49040
|
131
|
+
# 1 2018 26473 50982 25984 67554 51961
|
132
|
+
# 2 2019 24222 46814 23813 66553 46794
|
133
|
+
# 3 2020 22304 35712 20196 57041 36576
|
134
|
+
# 4 2021 22535 35905 18211 51722 35215
|
135
|
+
#
|
136
|
+
# import_cars.to_long(:Year)
|
137
|
+
#
|
138
|
+
# # =>
|
139
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
140
|
+
# Year NAME VALUE
|
141
|
+
# <uint16> <string> <uint32>
|
142
|
+
# 0 2017 Audi 28336
|
143
|
+
# 1 2017 BMW 52527
|
144
|
+
# 2 2017 BMW_MINI 25427
|
145
|
+
# 3 2017 Mercedes-Benz 68221
|
146
|
+
# 4 2017 VW 49040
|
147
|
+
# : : : :
|
148
|
+
# 22 2021 BMW_MINI 18211
|
149
|
+
# 23 2021 Mercedes-Benz 51722
|
150
|
+
# 24 2021 VW 35215
|
151
|
+
#
|
152
|
+
# @example `to_long` with options `:name` and `:value`
|
153
|
+
#
|
154
|
+
# import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
155
|
+
#
|
156
|
+
# # =>
|
157
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
158
|
+
# Year Manufacturer Num_of_imported
|
159
|
+
# <uint16> <string> <uint32>
|
160
|
+
# 0 2017 Audi 28336
|
161
|
+
# 1 2017 BMW 52527
|
162
|
+
# 2 2017 BMW_MINI 25427
|
163
|
+
# 3 2017 Mercedes-Benz 68221
|
164
|
+
# 4 2017 VW 49040
|
165
|
+
# : : : :
|
166
|
+
# 22 2021 BMW_MINI 18211
|
167
|
+
# 23 2021 Mercedes-Benz 51722
|
168
|
+
# 24 2021 VW 35215
|
169
|
+
#
|
170
|
+
# @since 0.2.0
|
32
171
|
#
|
33
|
-
# @param keep_keys [Array] keys to keep.
|
34
|
-
# @param name [Symbol, String] key of the column which is come **from values**.
|
35
|
-
# @param value [Symbol, String] key of the column which is come **from values**.
|
36
|
-
# @return [DataFrame] long DataFrame.
|
37
172
|
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
173
|
+
warn('[Info] No key to keep is specified.') if keep_keys.empty?
|
174
|
+
|
38
175
|
not_included = keep_keys - keys
|
39
|
-
|
176
|
+
unless not_included.empty?
|
177
|
+
raise DataFrameArgumentError, "Not have keys #{not_included}"
|
178
|
+
end
|
40
179
|
|
41
180
|
name = name.to_sym
|
42
|
-
|
181
|
+
if keep_keys.include?(name)
|
182
|
+
raise DataFrameArgumentError,
|
183
|
+
"Can't specify the key: #{name} for the column from keys."
|
184
|
+
end
|
43
185
|
|
44
186
|
value = value.to_sym
|
45
|
-
|
187
|
+
if keep_keys.include?(value)
|
188
|
+
raise DataFrameArgumentError,
|
189
|
+
"Can't specify the key: #{value} for the column from values."
|
190
|
+
end
|
46
191
|
|
47
192
|
hash = Hash.new { |h, k| h[k] = [] }
|
48
193
|
l = keys.size - keep_keys.size
|
@@ -60,17 +205,67 @@ module RedAmber
|
|
60
205
|
DataFrame.new(hash)
|
61
206
|
end
|
62
207
|
|
63
|
-
#
|
208
|
+
# Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
|
209
|
+
#
|
210
|
+
# @param name [Symbol, String]
|
211
|
+
# a new key name of the columnwhich will be expanded to key names.
|
212
|
+
# @param value [Symbol, String]
|
213
|
+
# a new key name of the column which will be expanded to values.
|
214
|
+
# @return [DataFrame]
|
215
|
+
# wide DataFrame.
|
216
|
+
#
|
217
|
+
# @example `to_wide` without options
|
218
|
+
#
|
219
|
+
# import_cars_long = import_cars.to_long(:Year)
|
220
|
+
#
|
221
|
+
# # =>
|
222
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
223
|
+
# Year NAME VALUE
|
224
|
+
# <uint16> <string> <uint32>
|
225
|
+
# 0 2017 Audi 28336
|
226
|
+
# 1 2017 BMW 52527
|
227
|
+
# 2 2017 BMW_MINI 25427
|
228
|
+
# 3 2017 Mercedes-Benz 68221
|
229
|
+
# 4 2017 VW 49040
|
230
|
+
# : : : :
|
231
|
+
# 22 2021 BMW_MINI 18211
|
232
|
+
# 23 2021 Mercedes-Benz 51722
|
233
|
+
# 24 2021 VW 35215
|
234
|
+
#
|
235
|
+
# import_cars_long.to_wide
|
236
|
+
# # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
|
237
|
+
#
|
238
|
+
# # =>
|
239
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
240
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
241
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
242
|
+
# 0 2017 28336 52527 25427 68221 49040
|
243
|
+
# 1 2018 26473 50982 25984 67554 51961
|
244
|
+
# 2 2019 24222 46814 23813 66553 46794
|
245
|
+
# 3 2020 22304 35712 20196 57041 36576
|
246
|
+
# 4 2021 22535 35905 18211 51722 35215
|
247
|
+
#
|
248
|
+
# Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
|
249
|
+
# automatically processed and do not need to specify.
|
250
|
+
#
|
251
|
+
# @since 0.2.0
|
64
252
|
#
|
65
|
-
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
66
|
-
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
67
|
-
# @return [DataFrame] wide DataFrame.
|
68
253
|
def to_wide(name: :NAME, value: :VALUE)
|
69
254
|
name = name.to_sym
|
70
|
-
|
255
|
+
unless keys.include?(name)
|
256
|
+
raise DataFrameArgumentError,
|
257
|
+
"You are going to keep the key: #{name}. " \
|
258
|
+
'You may need to specify the column name ' \
|
259
|
+
'that gives the new keys by `:name` option.'
|
260
|
+
end
|
71
261
|
|
72
262
|
value = value.to_sym
|
73
|
-
|
263
|
+
unless keys.include?(value)
|
264
|
+
raise DataFrameArgumentError,
|
265
|
+
"You are going to keep the key: #{value}. " \
|
266
|
+
'You may need to specify the column name ' \
|
267
|
+
'that gives the new values by `:value` option.'
|
268
|
+
end
|
74
269
|
|
75
270
|
hash = Hash.new { |h, k| h[k] = {} }
|
76
271
|
keep_keys = keys - [name, value]
|