red_amber 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameLoadSave
|
6
6
|
# Enable `self.load` as class method of DataFrame
|
7
7
|
def self.included(klass)
|
@@ -10,30 +10,98 @@ module RedAmber
|
|
10
10
|
|
11
11
|
# Enable `self.load` as class method of DataFrame
|
12
12
|
module ClassMethods
|
13
|
-
# Load DataFrame via Arrow::Table.load
|
14
|
-
|
15
|
-
|
13
|
+
# Load DataFrame via Arrow::Table.load.
|
14
|
+
#
|
15
|
+
# Format is automatically detected by extension.
|
16
|
+
# @!method load(input, format: nil, compression: nil, schema: nil, skip_lines: nil)
|
17
|
+
# @param input [path]
|
18
|
+
# source path.
|
19
|
+
# @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
|
20
|
+
# format specifier.
|
21
|
+
# @param compression [:gzip, nil]
|
22
|
+
# compression type.
|
23
|
+
# @param schema [Arrow::Schema]
|
24
|
+
# schema of table.
|
25
|
+
# @param skip_lines [Regexp]
|
26
|
+
# pattern of rows to skip.
|
27
|
+
# @return [DataFrame]
|
28
|
+
# loaded DataFrame.
|
29
|
+
# @example Load a tsv file
|
30
|
+
# DataFrame.load("file.tsv")
|
31
|
+
#
|
32
|
+
# @example Load a csv.gz file
|
33
|
+
# DataFrame.load("file.csv.gz")
|
34
|
+
#
|
35
|
+
# @example Load from URI
|
36
|
+
# DataFrame.load(URI("https://some_uri/file.csv"))
|
37
|
+
#
|
38
|
+
# @example Load from a Buffer
|
39
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv)
|
40
|
+
# name,age
|
41
|
+
# Yasuko,68
|
42
|
+
# Rui,49
|
43
|
+
# Hinata,28
|
44
|
+
# BUFFER
|
45
|
+
#
|
46
|
+
# @example Load from a Buffer skipping comment line
|
47
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
|
48
|
+
# # comment
|
49
|
+
# name,age
|
50
|
+
# Yasuko,68
|
51
|
+
# Rui,49
|
52
|
+
# Hinata,28
|
53
|
+
# BUFFER
|
54
|
+
#
|
55
|
+
def load(input, **options)
|
56
|
+
DataFrame.new(Arrow::Table.load(input, options))
|
16
57
|
end
|
17
58
|
end
|
18
59
|
|
19
60
|
# Save DataFrame
|
20
61
|
#
|
21
|
-
#
|
22
|
-
|
62
|
+
# Format is automatically detected by extension.
|
63
|
+
# @!method save(output, format: nil, compression: nil, schema: nil, skip_lines: nil)
|
64
|
+
# @param output [path]
|
65
|
+
# output path.
|
66
|
+
# @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
|
67
|
+
# format specifier.
|
68
|
+
# @param compression [:gzip, nil]
|
69
|
+
# compression type.
|
70
|
+
# @param schema [Arrow::Schema]
|
71
|
+
# schema of table.
|
72
|
+
# @param skip_lines [Regexp]
|
73
|
+
# pattern of rows to skip.
|
74
|
+
# @return [DataFrame]
|
75
|
+
# self.
|
76
|
+
# @example Save a csv file
|
77
|
+
# DataFrame.save("file.csv")
|
78
|
+
#
|
79
|
+
# @example Save a csv.gz file
|
80
|
+
# DataFrame.save("file.csv.gz")
|
81
|
+
#
|
82
|
+
# @example Save an arrow file
|
83
|
+
# DataFrame.save("file.arrow")
|
84
|
+
#
|
85
|
+
def save(output, **options)
|
23
86
|
@table.save(output, options)
|
24
87
|
self
|
25
88
|
end
|
26
89
|
|
27
90
|
# Save and reload to cast automatically
|
28
|
-
#
|
91
|
+
# via tsv format file temporally as default.
|
92
|
+
#
|
93
|
+
# @param format [Symbol]
|
94
|
+
# format specifier.
|
95
|
+
# @return [DataFrame]
|
96
|
+
# reloaded DataFrame.
|
29
97
|
#
|
30
98
|
# @note experimental feature
|
31
99
|
def auto_cast(format: :tsv)
|
32
100
|
return self if empty?
|
33
101
|
|
34
|
-
|
35
|
-
save(
|
36
|
-
DataFrame.load(
|
102
|
+
buffer = Arrow::ResizableBuffer.new(1024)
|
103
|
+
save(buffer, format: format)
|
104
|
+
DataFrame.load(buffer, format: format)
|
37
105
|
end
|
38
106
|
end
|
39
107
|
end
|
@@ -1,17 +1,94 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameReshaping
|
6
|
-
#
|
6
|
+
# Create a transposed DataFrame for the wide (may be messy) DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol]
|
8
|
+
# @param key [Symbol]
|
9
|
+
# key of the index column
|
9
10
|
# to transepose into keys.
|
10
11
|
# If it is not specified, keys[0] is used.
|
11
|
-
# @param name [Symbol]
|
12
|
+
# @param name [Symbol]
|
13
|
+
# key name of transposed index column.
|
12
14
|
# If it is not specified, :NAME is used.
|
13
15
|
# If it already exists, :NAME1 or :NAME1.succ is used.
|
14
|
-
# @return [DataFrame]
|
16
|
+
# @return [DataFrame]
|
17
|
+
# trnsposed DataFrame
|
18
|
+
#
|
19
|
+
# @example Transpose a DataFrame without options
|
20
|
+
#
|
21
|
+
# import_cars
|
22
|
+
#
|
23
|
+
# # =>
|
24
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
25
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
26
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
27
|
+
# 0 2017 28336 52527 25427 68221 49040
|
28
|
+
# 1 2018 26473 50982 25984 67554 51961
|
29
|
+
# 2 2019 24222 46814 23813 66553 46794
|
30
|
+
# 3 2020 22304 35712 20196 57041 36576
|
31
|
+
# 4 2021 22535 35905 18211 51722 35215
|
32
|
+
#
|
33
|
+
# import_cars.transpose
|
34
|
+
#
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
37
|
+
# NAME 2017 2018 2019 2020 2021
|
38
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
39
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
40
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
41
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
42
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
43
|
+
# 4 VW 49040 51961 46794 36576 35215
|
44
|
+
#
|
45
|
+
# The leftmost column is created by original keys and
|
46
|
+
# `:NAME` is automatically used for the column name.
|
47
|
+
#
|
48
|
+
# @example Transpose a DataFrame with `:name` option
|
49
|
+
#
|
50
|
+
# import_cars.transpose(name: :Manufacturer)
|
51
|
+
#
|
52
|
+
# # =>
|
53
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
54
|
+
# Manufacturer 2017 2018 2019 2020 2021
|
55
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
56
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
57
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
58
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
59
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
60
|
+
# 4 VW 49040 51961 46794 36576 35215
|
61
|
+
#
|
62
|
+
# `:name` option can specify column name.
|
63
|
+
#
|
64
|
+
# @example Transpose a DataFrame by the :key in the middle of the DataFrame
|
65
|
+
#
|
66
|
+
# import_cars_middle = import_cars.pick(1..2, 0, 3..)
|
67
|
+
#
|
68
|
+
# # =>
|
69
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
|
70
|
+
# Audi BMW Year BMW_MINI Mercedes-Benz VW
|
71
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
72
|
+
# 0 28336 52527 2017 25427 68221 49040
|
73
|
+
# 1 26473 50982 2018 25984 67554 51961
|
74
|
+
# 2 24222 46814 2019 23813 66553 46794
|
75
|
+
# 3 22304 35712 2020 20196 57041 36576
|
76
|
+
# 4 22535 35905 2021 18211 51722 35215
|
77
|
+
#
|
78
|
+
# import_cars_middle.transpose(key: :Year)
|
79
|
+
#
|
80
|
+
# # =>
|
81
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
82
|
+
# NAME 2017 2018 2019 2020 2021
|
83
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
84
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
85
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
86
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
87
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
88
|
+
# 4 VW 49040 51961 46794 36576 35215
|
89
|
+
#
|
90
|
+
# @since 0.2.0
|
91
|
+
#
|
15
92
|
def transpose(key: keys.first, name: :NAME)
|
16
93
|
unless keys.include?(key)
|
17
94
|
raise DataFrameArgumentError, "Self does not include: #{key}"
|
@@ -31,12 +108,67 @@ module RedAmber
|
|
31
108
|
DataFrame.new(hash)
|
32
109
|
end
|
33
110
|
|
34
|
-
#
|
111
|
+
# Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
|
112
|
+
#
|
113
|
+
# @param keep_keys [<Symbol>]
|
114
|
+
# keys to keep.
|
115
|
+
# @param name [Symbol, String]
|
116
|
+
# a new key name of the column which is come from key names.
|
117
|
+
# @param value [Symbol, String]
|
118
|
+
# a new key name of the column which is come from values.
|
119
|
+
# @return [DataFrame]
|
120
|
+
# long DataFrame.
|
121
|
+
#
|
122
|
+
# @example `to_long` without options
|
123
|
+
#
|
124
|
+
# import_cars
|
125
|
+
#
|
126
|
+
# # =>
|
127
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
128
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
129
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
130
|
+
# 0 2017 28336 52527 25427 68221 49040
|
131
|
+
# 1 2018 26473 50982 25984 67554 51961
|
132
|
+
# 2 2019 24222 46814 23813 66553 46794
|
133
|
+
# 3 2020 22304 35712 20196 57041 36576
|
134
|
+
# 4 2021 22535 35905 18211 51722 35215
|
135
|
+
#
|
136
|
+
# import_cars.to_long(:Year)
|
137
|
+
#
|
138
|
+
# # =>
|
139
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
140
|
+
# Year NAME VALUE
|
141
|
+
# <uint16> <string> <uint32>
|
142
|
+
# 0 2017 Audi 28336
|
143
|
+
# 1 2017 BMW 52527
|
144
|
+
# 2 2017 BMW_MINI 25427
|
145
|
+
# 3 2017 Mercedes-Benz 68221
|
146
|
+
# 4 2017 VW 49040
|
147
|
+
# : : : :
|
148
|
+
# 22 2021 BMW_MINI 18211
|
149
|
+
# 23 2021 Mercedes-Benz 51722
|
150
|
+
# 24 2021 VW 35215
|
151
|
+
#
|
152
|
+
# @example `to_long` with options `:name` and `:value`
|
153
|
+
#
|
154
|
+
# import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
155
|
+
#
|
156
|
+
# # =>
|
157
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
158
|
+
# Year Manufacturer Num_of_imported
|
159
|
+
# <uint16> <string> <uint32>
|
160
|
+
# 0 2017 Audi 28336
|
161
|
+
# 1 2017 BMW 52527
|
162
|
+
# 2 2017 BMW_MINI 25427
|
163
|
+
# 3 2017 Mercedes-Benz 68221
|
164
|
+
# 4 2017 VW 49040
|
165
|
+
# : : : :
|
166
|
+
# 22 2021 BMW_MINI 18211
|
167
|
+
# 23 2021 Mercedes-Benz 51722
|
168
|
+
# 24 2021 VW 35215
|
169
|
+
#
|
170
|
+
# @since 0.2.0
|
35
171
|
#
|
36
|
-
# @param keep_keys [Array] keys to keep.
|
37
|
-
# @param name [Symbol, String] key of the column which is come **from values**.
|
38
|
-
# @param value [Symbol, String] key of the column which is come **from values**.
|
39
|
-
# @return [DataFrame] long DataFrame.
|
40
172
|
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
41
173
|
warn('[Info] No key to keep is specified.') if keep_keys.empty?
|
42
174
|
|
@@ -73,13 +205,51 @@ module RedAmber
|
|
73
205
|
DataFrame.new(hash)
|
74
206
|
end
|
75
207
|
|
76
|
-
#
|
208
|
+
# Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
|
77
209
|
#
|
78
210
|
# @param name [Symbol, String]
|
79
|
-
# key of the
|
211
|
+
# a new key name of the columnwhich will be expanded to key names.
|
80
212
|
# @param value [Symbol, String]
|
81
|
-
# key of the column which will be expanded
|
82
|
-
# @return [DataFrame]
|
213
|
+
# a new key name of the column which will be expanded to values.
|
214
|
+
# @return [DataFrame]
|
215
|
+
# wide DataFrame.
|
216
|
+
#
|
217
|
+
# @example `to_wide` without options
|
218
|
+
#
|
219
|
+
# import_cars_long = import_cars.to_long(:Year)
|
220
|
+
#
|
221
|
+
# # =>
|
222
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
223
|
+
# Year NAME VALUE
|
224
|
+
# <uint16> <string> <uint32>
|
225
|
+
# 0 2017 Audi 28336
|
226
|
+
# 1 2017 BMW 52527
|
227
|
+
# 2 2017 BMW_MINI 25427
|
228
|
+
# 3 2017 Mercedes-Benz 68221
|
229
|
+
# 4 2017 VW 49040
|
230
|
+
# : : : :
|
231
|
+
# 22 2021 BMW_MINI 18211
|
232
|
+
# 23 2021 Mercedes-Benz 51722
|
233
|
+
# 24 2021 VW 35215
|
234
|
+
#
|
235
|
+
# import_cars_long.to_wide
|
236
|
+
# # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
|
237
|
+
#
|
238
|
+
# # =>
|
239
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
240
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
241
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
242
|
+
# 0 2017 28336 52527 25427 68221 49040
|
243
|
+
# 1 2018 26473 50982 25984 67554 51961
|
244
|
+
# 2 2019 24222 46814 23813 66553 46794
|
245
|
+
# 3 2020 22304 35712 20196 57041 36576
|
246
|
+
# 4 2021 22535 35905 18211 51722 35215
|
247
|
+
#
|
248
|
+
# Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
|
249
|
+
# automatically processed and do not need to specify.
|
250
|
+
#
|
251
|
+
# @since 0.2.0
|
252
|
+
#
|
83
253
|
def to_wide(name: :NAME, value: :VALUE)
|
84
254
|
name = name.to_sym
|
85
255
|
unless keys.include?(name)
|