red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameLoadSave
|
6
6
|
# Enable `self.load` as class method of DataFrame
|
7
7
|
def self.included(klass)
|
@@ -10,30 +10,98 @@ module RedAmber
|
|
10
10
|
|
11
11
|
# Enable `self.load` as class method of DataFrame
|
12
12
|
module ClassMethods
|
13
|
-
# Load DataFrame via Arrow::Table.load
|
14
|
-
|
15
|
-
|
13
|
+
# Load DataFrame via Arrow::Table.load.
|
14
|
+
#
|
15
|
+
# Format is automatically detected by extension.
|
16
|
+
# @!method load(input, format: nil, compression: nil, schema: nil, skip_lines: nil)
|
17
|
+
# @param input [path]
|
18
|
+
# source path.
|
19
|
+
# @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
|
20
|
+
# format specifier.
|
21
|
+
# @param compression [:gzip, nil]
|
22
|
+
# compression type.
|
23
|
+
# @param schema [Arrow::Schema]
|
24
|
+
# schema of table.
|
25
|
+
# @param skip_lines [Regexp]
|
26
|
+
# pattern of rows to skip.
|
27
|
+
# @return [DataFrame]
|
28
|
+
# loaded DataFrame.
|
29
|
+
# @example Load a tsv file
|
30
|
+
# DataFrame.load("file.tsv")
|
31
|
+
#
|
32
|
+
# @example Load a csv.gz file
|
33
|
+
# DataFrame.load("file.csv.gz")
|
34
|
+
#
|
35
|
+
# @example Load from URI
|
36
|
+
# DataFrame.load(URI("https://some_uri/file.csv"))
|
37
|
+
#
|
38
|
+
# @example Load from a Buffer
|
39
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv)
|
40
|
+
# name,age
|
41
|
+
# Yasuko,68
|
42
|
+
# Rui,49
|
43
|
+
# Hinata,28
|
44
|
+
# BUFFER
|
45
|
+
#
|
46
|
+
# @example Load from a Buffer skipping comment line
|
47
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
|
48
|
+
# # comment
|
49
|
+
# name,age
|
50
|
+
# Yasuko,68
|
51
|
+
# Rui,49
|
52
|
+
# Hinata,28
|
53
|
+
# BUFFER
|
54
|
+
#
|
55
|
+
def load(input, **options)
|
56
|
+
DataFrame.new(Arrow::Table.load(input, options))
|
16
57
|
end
|
17
58
|
end
|
18
59
|
|
19
60
|
# Save DataFrame
|
20
61
|
#
|
21
|
-
#
|
22
|
-
|
62
|
+
# Format is automatically detected by extension.
|
63
|
+
# @!method save(output, format: nil, compression: nil, schema: nil, skip_lines: nil)
|
64
|
+
# @param output [path]
|
65
|
+
# output path.
|
66
|
+
# @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
|
67
|
+
# format specifier.
|
68
|
+
# @param compression [:gzip, nil]
|
69
|
+
# compression type.
|
70
|
+
# @param schema [Arrow::Schema]
|
71
|
+
# schema of table.
|
72
|
+
# @param skip_lines [Regexp]
|
73
|
+
# pattern of rows to skip.
|
74
|
+
# @return [DataFrame]
|
75
|
+
# self.
|
76
|
+
# @example Save a csv file
|
77
|
+
# DataFrame.save("file.csv")
|
78
|
+
#
|
79
|
+
# @example Save a csv.gz file
|
80
|
+
# DataFrame.save("file.csv.gz")
|
81
|
+
#
|
82
|
+
# @example Save an arrow file
|
83
|
+
# DataFrame.save("file.arrow")
|
84
|
+
#
|
85
|
+
def save(output, **options)
|
23
86
|
@table.save(output, options)
|
24
87
|
self
|
25
88
|
end
|
26
89
|
|
27
90
|
# Save and reload to cast automatically
|
28
|
-
#
|
91
|
+
# via tsv format file temporally as default.
|
92
|
+
#
|
93
|
+
# @param format [Symbol]
|
94
|
+
# format specifier.
|
95
|
+
# @return [DataFrame]
|
96
|
+
# reloaded DataFrame.
|
29
97
|
#
|
30
98
|
# @note experimental feature
|
31
99
|
def auto_cast(format: :tsv)
|
32
100
|
return self if empty?
|
33
101
|
|
34
|
-
|
35
|
-
save(
|
36
|
-
DataFrame.load(
|
102
|
+
buffer = Arrow::ResizableBuffer.new(1024)
|
103
|
+
save(buffer, format: format)
|
104
|
+
DataFrame.load(buffer, format: format)
|
37
105
|
end
|
38
106
|
end
|
39
107
|
end
|
@@ -1,17 +1,94 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameReshaping
|
6
|
-
#
|
6
|
+
# Create a transposed DataFrame for the wide (may be messy) DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol]
|
8
|
+
# @param key [Symbol]
|
9
|
+
# key of the index column
|
9
10
|
# to transepose into keys.
|
10
11
|
# If it is not specified, keys[0] is used.
|
11
|
-
# @param name [Symbol]
|
12
|
+
# @param name [Symbol]
|
13
|
+
# key name of transposed index column.
|
12
14
|
# If it is not specified, :NAME is used.
|
13
15
|
# If it already exists, :NAME1 or :NAME1.succ is used.
|
14
|
-
# @return [DataFrame]
|
16
|
+
# @return [DataFrame]
|
17
|
+
# trnsposed DataFrame
|
18
|
+
#
|
19
|
+
# @example Transpose a DataFrame without options
|
20
|
+
#
|
21
|
+
# import_cars
|
22
|
+
#
|
23
|
+
# # =>
|
24
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
25
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
26
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
27
|
+
# 0 2017 28336 52527 25427 68221 49040
|
28
|
+
# 1 2018 26473 50982 25984 67554 51961
|
29
|
+
# 2 2019 24222 46814 23813 66553 46794
|
30
|
+
# 3 2020 22304 35712 20196 57041 36576
|
31
|
+
# 4 2021 22535 35905 18211 51722 35215
|
32
|
+
#
|
33
|
+
# import_cars.transpose
|
34
|
+
#
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
37
|
+
# NAME 2017 2018 2019 2020 2021
|
38
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
39
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
40
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
41
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
42
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
43
|
+
# 4 VW 49040 51961 46794 36576 35215
|
44
|
+
#
|
45
|
+
# The leftmost column is created by original keys and
|
46
|
+
# `:NAME` is automatically used for the column name.
|
47
|
+
#
|
48
|
+
# @example Transpose a DataFrame with `:name` option
|
49
|
+
#
|
50
|
+
# import_cars.transpose(name: :Manufacturer)
|
51
|
+
#
|
52
|
+
# # =>
|
53
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
54
|
+
# Manufacturer 2017 2018 2019 2020 2021
|
55
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
56
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
57
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
58
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
59
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
60
|
+
# 4 VW 49040 51961 46794 36576 35215
|
61
|
+
#
|
62
|
+
# `:name` option can specify column name.
|
63
|
+
#
|
64
|
+
# @example Transpose a DataFrame by the :key in the middle of the DataFrame
|
65
|
+
#
|
66
|
+
# import_cars_middle = import_cars.pick(1..2, 0, 3..)
|
67
|
+
#
|
68
|
+
# # =>
|
69
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
|
70
|
+
# Audi BMW Year BMW_MINI Mercedes-Benz VW
|
71
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
72
|
+
# 0 28336 52527 2017 25427 68221 49040
|
73
|
+
# 1 26473 50982 2018 25984 67554 51961
|
74
|
+
# 2 24222 46814 2019 23813 66553 46794
|
75
|
+
# 3 22304 35712 2020 20196 57041 36576
|
76
|
+
# 4 22535 35905 2021 18211 51722 35215
|
77
|
+
#
|
78
|
+
# import_cars_middle.transpose(key: :Year)
|
79
|
+
#
|
80
|
+
# # =>
|
81
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
82
|
+
# NAME 2017 2018 2019 2020 2021
|
83
|
+
# <string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
84
|
+
# 0 Audi 28336 26473 24222 22304 22535
|
85
|
+
# 1 BMW 52527 50982 46814 35712 35905
|
86
|
+
# 2 BMW_MINI 25427 25984 23813 20196 18211
|
87
|
+
# 3 Mercedes-Benz 68221 67554 66553 57041 51722
|
88
|
+
# 4 VW 49040 51961 46794 36576 35215
|
89
|
+
#
|
90
|
+
# @since 0.2.0
|
91
|
+
#
|
15
92
|
def transpose(key: keys.first, name: :NAME)
|
16
93
|
unless keys.include?(key)
|
17
94
|
raise DataFrameArgumentError, "Self does not include: #{key}"
|
@@ -31,12 +108,67 @@ module RedAmber
|
|
31
108
|
DataFrame.new(hash)
|
32
109
|
end
|
33
110
|
|
34
|
-
#
|
111
|
+
# Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
|
112
|
+
#
|
113
|
+
# @param keep_keys [<Symbol>]
|
114
|
+
# keys to keep.
|
115
|
+
# @param name [Symbol, String]
|
116
|
+
# a new key name of the column which is come from key names.
|
117
|
+
# @param value [Symbol, String]
|
118
|
+
# a new key name of the column which is come from values.
|
119
|
+
# @return [DataFrame]
|
120
|
+
# long DataFrame.
|
121
|
+
#
|
122
|
+
# @example `to_long` without options
|
123
|
+
#
|
124
|
+
# import_cars
|
125
|
+
#
|
126
|
+
# # =>
|
127
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
128
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
129
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
130
|
+
# 0 2017 28336 52527 25427 68221 49040
|
131
|
+
# 1 2018 26473 50982 25984 67554 51961
|
132
|
+
# 2 2019 24222 46814 23813 66553 46794
|
133
|
+
# 3 2020 22304 35712 20196 57041 36576
|
134
|
+
# 4 2021 22535 35905 18211 51722 35215
|
135
|
+
#
|
136
|
+
# import_cars.to_long(:Year)
|
137
|
+
#
|
138
|
+
# # =>
|
139
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
140
|
+
# Year NAME VALUE
|
141
|
+
# <uint16> <string> <uint32>
|
142
|
+
# 0 2017 Audi 28336
|
143
|
+
# 1 2017 BMW 52527
|
144
|
+
# 2 2017 BMW_MINI 25427
|
145
|
+
# 3 2017 Mercedes-Benz 68221
|
146
|
+
# 4 2017 VW 49040
|
147
|
+
# : : : :
|
148
|
+
# 22 2021 BMW_MINI 18211
|
149
|
+
# 23 2021 Mercedes-Benz 51722
|
150
|
+
# 24 2021 VW 35215
|
151
|
+
#
|
152
|
+
# @example `to_long` with options `:name` and `:value`
|
153
|
+
#
|
154
|
+
# import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
155
|
+
#
|
156
|
+
# # =>
|
157
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
158
|
+
# Year Manufacturer Num_of_imported
|
159
|
+
# <uint16> <string> <uint32>
|
160
|
+
# 0 2017 Audi 28336
|
161
|
+
# 1 2017 BMW 52527
|
162
|
+
# 2 2017 BMW_MINI 25427
|
163
|
+
# 3 2017 Mercedes-Benz 68221
|
164
|
+
# 4 2017 VW 49040
|
165
|
+
# : : : :
|
166
|
+
# 22 2021 BMW_MINI 18211
|
167
|
+
# 23 2021 Mercedes-Benz 51722
|
168
|
+
# 24 2021 VW 35215
|
169
|
+
#
|
170
|
+
# @since 0.2.0
|
35
171
|
#
|
36
|
-
# @param keep_keys [Array] keys to keep.
|
37
|
-
# @param name [Symbol, String] key of the column which is come **from values**.
|
38
|
-
# @param value [Symbol, String] key of the column which is come **from values**.
|
39
|
-
# @return [DataFrame] long DataFrame.
|
40
172
|
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
41
173
|
warn('[Info] No key to keep is specified.') if keep_keys.empty?
|
42
174
|
|
@@ -73,13 +205,51 @@ module RedAmber
|
|
73
205
|
DataFrame.new(hash)
|
74
206
|
end
|
75
207
|
|
76
|
-
#
|
208
|
+
# Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
|
77
209
|
#
|
78
210
|
# @param name [Symbol, String]
|
79
|
-
# key of the
|
211
|
+
# a new key name of the columnwhich will be expanded to key names.
|
80
212
|
# @param value [Symbol, String]
|
81
|
-
# key of the column which will be expanded
|
82
|
-
# @return [DataFrame]
|
213
|
+
# a new key name of the column which will be expanded to values.
|
214
|
+
# @return [DataFrame]
|
215
|
+
# wide DataFrame.
|
216
|
+
#
|
217
|
+
# @example `to_wide` without options
|
218
|
+
#
|
219
|
+
# import_cars_long = import_cars.to_long(:Year)
|
220
|
+
#
|
221
|
+
# # =>
|
222
|
+
# #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
223
|
+
# Year NAME VALUE
|
224
|
+
# <uint16> <string> <uint32>
|
225
|
+
# 0 2017 Audi 28336
|
226
|
+
# 1 2017 BMW 52527
|
227
|
+
# 2 2017 BMW_MINI 25427
|
228
|
+
# 3 2017 Mercedes-Benz 68221
|
229
|
+
# 4 2017 VW 49040
|
230
|
+
# : : : :
|
231
|
+
# 22 2021 BMW_MINI 18211
|
232
|
+
# 23 2021 Mercedes-Benz 51722
|
233
|
+
# 24 2021 VW 35215
|
234
|
+
#
|
235
|
+
# import_cars_long.to_wide
|
236
|
+
# # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
|
237
|
+
#
|
238
|
+
# # =>
|
239
|
+
# #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
240
|
+
# Year Audi BMW BMW_MINI Mercedes-Benz VW
|
241
|
+
# <int64> <int64> <int64> <int64> <int64> <int64>
|
242
|
+
# 0 2017 28336 52527 25427 68221 49040
|
243
|
+
# 1 2018 26473 50982 25984 67554 51961
|
244
|
+
# 2 2019 24222 46814 23813 66553 46794
|
245
|
+
# 3 2020 22304 35712 20196 57041 36576
|
246
|
+
# 4 2021 22535 35905 18211 51722 35215
|
247
|
+
#
|
248
|
+
# Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
|
249
|
+
# automatically processed and do not need to specify.
|
250
|
+
#
|
251
|
+
# @since 0.2.0
|
252
|
+
#
|
83
253
|
def to_wide(name: :NAME, value: :VALUE)
|
84
254
|
name = name.to_sym
|
85
255
|
unless keys.include?(name)
|