red_amber 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameLoadSave
6
6
  # Enable `self.load` as class method of DataFrame
7
7
  def self.included(klass)
@@ -10,30 +10,98 @@ module RedAmber
10
10
 
11
11
  # Enable `self.load` as class method of DataFrame
12
12
  module ClassMethods
13
- # Load DataFrame via Arrow::Table.load
14
- def load(path, options = {})
15
- DataFrame.new(Arrow::Table.load(path, options))
13
+ # Load DataFrame via Arrow::Table.load.
14
+ #
15
+ # Format is automatically detected by extension.
16
+ # @!method load(input, format: nil, compression: nil, schema: nil, skip_lines: nil)
17
+ # @param input [path]
18
+ # source path.
19
+ # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
20
+ # format specifier.
21
+ # @param compression [:gzip, nil]
22
+ # compression type.
23
+ # @param schema [Arrow::Schema]
24
+ # schema of table.
25
+ # @param skip_lines [Regexp]
26
+ # pattern of rows to skip.
27
+ # @return [DataFrame]
28
+ # loaded DataFrame.
29
+ # @example Load a tsv file
30
+ # DataFrame.load("file.tsv")
31
+ #
32
+ # @example Load a csv.gz file
33
+ # DataFrame.load("file.csv.gz")
34
+ #
35
+ # @example Load from URI
36
+ # DataFrame.load(URI("https://some_uri/file.csv"))
37
+ #
38
+ # @example Load from a Buffer
39
+ # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv)
40
+ # name,age
41
+ # Yasuko,68
42
+ # Rui,49
43
+ # Hinata,28
44
+ # BUFFER
45
+ #
46
+ # @example Load from a Buffer skipping comment line
47
+ # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
48
+ # # comment
49
+ # name,age
50
+ # Yasuko,68
51
+ # Rui,49
52
+ # Hinata,28
53
+ # BUFFER
54
+ #
55
+ def load(input, **options)
56
+ DataFrame.new(Arrow::Table.load(input, options))
16
57
  end
17
58
  end
18
59
 
19
60
  # Save DataFrame
20
61
  #
21
- # @return [DataFrame] self.
22
- def save(output, options = {})
62
+ # Format is automatically detected by extension.
63
+ # @!method save(output, format: nil, compression: nil, schema: nil, skip_lines: nil)
64
+ # @param output [path]
65
+ # output path.
66
+ # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
67
+ # format specifier.
68
+ # @param compression [:gzip, nil]
69
+ # compression type.
70
+ # @param schema [Arrow::Schema]
71
+ # schema of table.
72
+ # @param skip_lines [Regexp]
73
+ # pattern of rows to skip.
74
+ # @return [DataFrame]
75
+ # self.
76
+ # @example Save a csv file
77
+ # DataFrame.save("file.csv")
78
+ #
79
+ # @example Save a csv.gz file
80
+ # DataFrame.save("file.csv.gz")
81
+ #
82
+ # @example Save an arrow file
83
+ # DataFrame.save("file.arrow")
84
+ #
85
+ def save(output, **options)
23
86
  @table.save(output, options)
24
87
  self
25
88
  end
26
89
 
27
90
  # Save and reload to cast automatically
28
- # Via tsv format file temporally as default
91
+ # via tsv format file temporally as default.
92
+ #
93
+ # @param format [Symbol]
94
+ # format specifier.
95
+ # @return [DataFrame]
96
+ # reloaded DataFrame.
29
97
  #
30
98
  # @note experimental feature
31
99
  def auto_cast(format: :tsv)
32
100
  return self if empty?
33
101
 
34
- tempfile = Arrow::ResizableBuffer.new(1024)
35
- save(tempfile, format: format)
36
- DataFrame.load(tempfile, format: format)
102
+ buffer = Arrow::ResizableBuffer.new(1024)
103
+ save(buffer, format: format)
104
+ DataFrame.load(buffer, format: format)
37
105
  end
38
106
  end
39
107
  end
@@ -1,17 +1,94 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameReshaping
6
- # Transpose a wide DataFrame.
6
+ # Create a transposed DataFrame for the wide (may be messy) DataFrame.
7
7
  #
8
- # @param key [Symbol] key of the index column
8
+ # @param key [Symbol]
9
+ # key of the index column
9
10
  # to transepose into keys.
10
11
  # If it is not specified, keys[0] is used.
11
- # @param name [Symbol] key name of transposed index column.
12
+ # @param name [Symbol]
13
+ # key name of transposed index column.
12
14
  # If it is not specified, :NAME is used.
13
15
  # If it already exists, :NAME1 or :NAME1.succ is used.
14
- # @return [DataFrame] trnsposed DataFrame
16
+ # @return [DataFrame]
17
+ # trnsposed DataFrame
18
+ #
19
+ # @example Transpose a DataFrame without options
20
+ #
21
+ # import_cars
22
+ #
23
+ # # =>
24
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
25
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
26
+ # <int64> <int64> <int64> <int64> <int64> <int64>
27
+ # 0 2017 28336 52527 25427 68221 49040
28
+ # 1 2018 26473 50982 25984 67554 51961
29
+ # 2 2019 24222 46814 23813 66553 46794
30
+ # 3 2020 22304 35712 20196 57041 36576
31
+ # 4 2021 22535 35905 18211 51722 35215
32
+ #
33
+ # import_cars.transpose
34
+ #
35
+ # # =>
36
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
37
+ # NAME 2017 2018 2019 2020 2021
38
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
39
+ # 0 Audi 28336 26473 24222 22304 22535
40
+ # 1 BMW 52527 50982 46814 35712 35905
41
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
42
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
43
+ # 4 VW 49040 51961 46794 36576 35215
44
+ #
45
+ # The leftmost column is created by original keys and
46
+ # `:NAME` is automatically used for the column name.
47
+ #
48
+ # @example Transpose a DataFrame with `:name` option
49
+ #
50
+ # import_cars.transpose(name: :Manufacturer)
51
+ #
52
+ # # =>
53
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
54
+ # Manufacturer 2017 2018 2019 2020 2021
55
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
56
+ # 0 Audi 28336 26473 24222 22304 22535
57
+ # 1 BMW 52527 50982 46814 35712 35905
58
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
59
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
60
+ # 4 VW 49040 51961 46794 36576 35215
61
+ #
62
+ # `:name` option can specify column name.
63
+ #
64
+ # @example Transpose a DataFrame by the :key in the middle of the DataFrame
65
+ #
66
+ # import_cars_middle = import_cars.pick(1..2, 0, 3..)
67
+ #
68
+ # # =>
69
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
70
+ # Audi BMW Year BMW_MINI Mercedes-Benz VW
71
+ # <int64> <int64> <int64> <int64> <int64> <int64>
72
+ # 0 28336 52527 2017 25427 68221 49040
73
+ # 1 26473 50982 2018 25984 67554 51961
74
+ # 2 24222 46814 2019 23813 66553 46794
75
+ # 3 22304 35712 2020 20196 57041 36576
76
+ # 4 22535 35905 2021 18211 51722 35215
77
+ #
78
+ # import_cars_middle.transpose(key: :Year)
79
+ #
80
+ # # =>
81
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
82
+ # NAME 2017 2018 2019 2020 2021
83
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
84
+ # 0 Audi 28336 26473 24222 22304 22535
85
+ # 1 BMW 52527 50982 46814 35712 35905
86
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
87
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
88
+ # 4 VW 49040 51961 46794 36576 35215
89
+ #
90
+ # @since 0.2.0
91
+ #
15
92
  def transpose(key: keys.first, name: :NAME)
16
93
  unless keys.include?(key)
17
94
  raise DataFrameArgumentError, "Self does not include: #{key}"
@@ -31,12 +108,67 @@ module RedAmber
31
108
  DataFrame.new(hash)
32
109
  end
33
110
 
34
- # Reshape wide DataFrame to a longer DataFrame.
111
+ # Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
112
+ #
113
+ # @param keep_keys [<Symbol>]
114
+ # keys to keep.
115
+ # @param name [Symbol, String]
116
+ # a new key name of the column which is come from key names.
117
+ # @param value [Symbol, String]
118
+ # a new key name of the column which is come from values.
119
+ # @return [DataFrame]
120
+ # long DataFrame.
121
+ #
122
+ # @example `to_long` without options
123
+ #
124
+ # import_cars
125
+ #
126
+ # # =>
127
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
128
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
129
+ # <int64> <int64> <int64> <int64> <int64> <int64>
130
+ # 0 2017 28336 52527 25427 68221 49040
131
+ # 1 2018 26473 50982 25984 67554 51961
132
+ # 2 2019 24222 46814 23813 66553 46794
133
+ # 3 2020 22304 35712 20196 57041 36576
134
+ # 4 2021 22535 35905 18211 51722 35215
135
+ #
136
+ # import_cars.to_long(:Year)
137
+ #
138
+ # # =>
139
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
140
+ # Year NAME VALUE
141
+ # <uint16> <string> <uint32>
142
+ # 0 2017 Audi 28336
143
+ # 1 2017 BMW 52527
144
+ # 2 2017 BMW_MINI 25427
145
+ # 3 2017 Mercedes-Benz 68221
146
+ # 4 2017 VW 49040
147
+ # : : : :
148
+ # 22 2021 BMW_MINI 18211
149
+ # 23 2021 Mercedes-Benz 51722
150
+ # 24 2021 VW 35215
151
+ #
152
+ # @example `to_long` with options `:name` and `:value`
153
+ #
154
+ # import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
155
+ #
156
+ # # =>
157
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
158
+ # Year Manufacturer Num_of_imported
159
+ # <uint16> <string> <uint32>
160
+ # 0 2017 Audi 28336
161
+ # 1 2017 BMW 52527
162
+ # 2 2017 BMW_MINI 25427
163
+ # 3 2017 Mercedes-Benz 68221
164
+ # 4 2017 VW 49040
165
+ # : : : :
166
+ # 22 2021 BMW_MINI 18211
167
+ # 23 2021 Mercedes-Benz 51722
168
+ # 24 2021 VW 35215
169
+ #
170
+ # @since 0.2.0
35
171
  #
36
- # @param keep_keys [Array] keys to keep.
37
- # @param name [Symbol, String] key of the column which is come **from values**.
38
- # @param value [Symbol, String] key of the column which is come **from values**.
39
- # @return [DataFrame] long DataFrame.
40
172
  def to_long(*keep_keys, name: :NAME, value: :VALUE)
41
173
  warn('[Info] No key to keep is specified.') if keep_keys.empty?
42
174
 
@@ -73,13 +205,51 @@ module RedAmber
73
205
  DataFrame.new(hash)
74
206
  end
75
207
 
76
- # Reshape long DataFrame to a wide DataFrame.
208
+ # Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
77
209
  #
78
210
  # @param name [Symbol, String]
79
- # key of the column which will be expanded **to key names**.
211
+ # a new key name of the columnwhich will be expanded to key names.
80
212
  # @param value [Symbol, String]
81
- # key of the column which will be expanded **to values**.
82
- # @return [DataFrame] wide DataFrame.
213
+ # a new key name of the column which will be expanded to values.
214
+ # @return [DataFrame]
215
+ # wide DataFrame.
216
+ #
217
+ # @example `to_wide` without options
218
+ #
219
+ # import_cars_long = import_cars.to_long(:Year)
220
+ #
221
+ # # =>
222
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
223
+ # Year NAME VALUE
224
+ # <uint16> <string> <uint32>
225
+ # 0 2017 Audi 28336
226
+ # 1 2017 BMW 52527
227
+ # 2 2017 BMW_MINI 25427
228
+ # 3 2017 Mercedes-Benz 68221
229
+ # 4 2017 VW 49040
230
+ # : : : :
231
+ # 22 2021 BMW_MINI 18211
232
+ # 23 2021 Mercedes-Benz 51722
233
+ # 24 2021 VW 35215
234
+ #
235
+ # import_cars_long.to_wide
236
+ # # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
237
+ #
238
+ # # =>
239
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
240
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
241
+ # <int64> <int64> <int64> <int64> <int64> <int64>
242
+ # 0 2017 28336 52527 25427 68221 49040
243
+ # 1 2018 26473 50982 25984 67554 51961
244
+ # 2 2019 24222 46814 23813 66553 46794
245
+ # 3 2020 22304 35712 20196 57041 36576
246
+ # 4 2021 22535 35905 18211 51722 35215
247
+ #
248
+ # Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
249
+ # automatically processed and do not need to specify.
250
+ #
251
+ # @since 0.2.0
252
+ #
83
253
  def to_wide(name: :NAME, value: :VALUE)
84
254
  name = name.to_sym
85
255
  unless keys.include?(name)