red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameLoadSave
6
6
  # Enable `self.load` as class method of DataFrame
7
7
  def self.included(klass)
@@ -10,30 +10,98 @@ module RedAmber
10
10
 
11
11
  # Enable `self.load` as class method of DataFrame
12
12
  module ClassMethods
13
- # Load DataFrame via Arrow::Table.load
14
- def load(path, options = {})
15
- DataFrame.new(Arrow::Table.load(path, options))
13
+ # Load DataFrame via Arrow::Table.load.
14
+ #
15
+ # Format is automatically detected by extension.
16
+ # @!method load(input, format: nil, compression: nil, schema: nil, skip_lines: nil)
17
+ # @param input [path]
18
+ # source path.
19
+ # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
20
+ # format specifier.
21
+ # @param compression [:gzip, nil]
22
+ # compression type.
23
+ # @param schema [Arrow::Schema]
24
+ # schema of table.
25
+ # @param skip_lines [Regexp]
26
+ # pattern of rows to skip.
27
+ # @return [DataFrame]
28
+ # loaded DataFrame.
29
+ # @example Load a tsv file
30
+ # DataFrame.load("file.tsv")
31
+ #
32
+ # @example Load a csv.gz file
33
+ # DataFrame.load("file.csv.gz")
34
+ #
35
+ # @example Load from URI
36
+ # DataFrame.load(URI("https://some_uri/file.csv"))
37
+ #
38
+ # @example Load from a Buffer
39
+ # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv)
40
+ # name,age
41
+ # Yasuko,68
42
+ # Rui,49
43
+ # Hinata,28
44
+ # BUFFER
45
+ #
46
+ # @example Load from a Buffer skipping comment line
47
+ # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
48
+ # # comment
49
+ # name,age
50
+ # Yasuko,68
51
+ # Rui,49
52
+ # Hinata,28
53
+ # BUFFER
54
+ #
55
+ def load(input, **options)
56
+ DataFrame.new(Arrow::Table.load(input, options))
16
57
  end
17
58
  end
18
59
 
19
60
  # Save DataFrame
20
61
  #
21
- # @return [DataFrame] self.
22
- def save(output, options = {})
62
+ # Format is automatically detected by extension.
63
+ # @!method save(output, format: nil, compression: nil, schema: nil, skip_lines: nil)
64
+ # @param output [path]
65
+ # output path.
66
+ # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
67
+ # format specifier.
68
+ # @param compression [:gzip, nil]
69
+ # compression type.
70
+ # @param schema [Arrow::Schema]
71
+ # schema of table.
72
+ # @param skip_lines [Regexp]
73
+ # pattern of rows to skip.
74
+ # @return [DataFrame]
75
+ # self.
76
+ # @example Save a csv file
77
+ # DataFrame.save("file.csv")
78
+ #
79
+ # @example Save a csv.gz file
80
+ # DataFrame.save("file.csv.gz")
81
+ #
82
+ # @example Save an arrow file
83
+ # DataFrame.save("file.arrow")
84
+ #
85
+ def save(output, **options)
23
86
  @table.save(output, options)
24
87
  self
25
88
  end
26
89
 
27
90
  # Save and reload to cast automatically
28
- # Via tsv format file temporally as default
91
+ # via tsv format file temporally as default.
92
+ #
93
+ # @param format [Symbol]
94
+ # format specifier.
95
+ # @return [DataFrame]
96
+ # reloaded DataFrame.
29
97
  #
30
98
  # @note experimental feature
31
99
  def auto_cast(format: :tsv)
32
100
  return self if empty?
33
101
 
34
- tempfile = Arrow::ResizableBuffer.new(1024)
35
- save(tempfile, format: format)
36
- DataFrame.load(tempfile, format: format)
102
+ buffer = Arrow::ResizableBuffer.new(1024)
103
+ save(buffer, format: format)
104
+ DataFrame.load(buffer, format: format)
37
105
  end
38
106
  end
39
107
  end
@@ -1,17 +1,94 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- # mix-ins for the class DataFrame
4
+ # Mix-in for the class DataFrame
5
5
  module DataFrameReshaping
6
- # Transpose a wide DataFrame.
6
+ # Create a transposed DataFrame for the wide (may be messy) DataFrame.
7
7
  #
8
- # @param key [Symbol] key of the index column
8
+ # @param key [Symbol]
9
+ # key of the index column
9
10
  # to transepose into keys.
10
11
  # If it is not specified, keys[0] is used.
11
- # @param name [Symbol] key name of transposed index column.
12
+ # @param name [Symbol]
13
+ # key name of transposed index column.
12
14
  # If it is not specified, :NAME is used.
13
15
  # If it already exists, :NAME1 or :NAME1.succ is used.
14
- # @return [DataFrame] trnsposed DataFrame
16
+ # @return [DataFrame]
17
+ # trnsposed DataFrame
18
+ #
19
+ # @example Transpose a DataFrame without options
20
+ #
21
+ # import_cars
22
+ #
23
+ # # =>
24
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
25
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
26
+ # <int64> <int64> <int64> <int64> <int64> <int64>
27
+ # 0 2017 28336 52527 25427 68221 49040
28
+ # 1 2018 26473 50982 25984 67554 51961
29
+ # 2 2019 24222 46814 23813 66553 46794
30
+ # 3 2020 22304 35712 20196 57041 36576
31
+ # 4 2021 22535 35905 18211 51722 35215
32
+ #
33
+ # import_cars.transpose
34
+ #
35
+ # # =>
36
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
37
+ # NAME 2017 2018 2019 2020 2021
38
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
39
+ # 0 Audi 28336 26473 24222 22304 22535
40
+ # 1 BMW 52527 50982 46814 35712 35905
41
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
42
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
43
+ # 4 VW 49040 51961 46794 36576 35215
44
+ #
45
+ # The leftmost column is created by original keys and
46
+ # `:NAME` is automatically used for the column name.
47
+ #
48
+ # @example Transpose a DataFrame with `:name` option
49
+ #
50
+ # import_cars.transpose(name: :Manufacturer)
51
+ #
52
+ # # =>
53
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
54
+ # Manufacturer 2017 2018 2019 2020 2021
55
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
56
+ # 0 Audi 28336 26473 24222 22304 22535
57
+ # 1 BMW 52527 50982 46814 35712 35905
58
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
59
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
60
+ # 4 VW 49040 51961 46794 36576 35215
61
+ #
62
+ # `:name` option can specify column name.
63
+ #
64
+ # @example Transpose a DataFrame by the :key in the middle of the DataFrame
65
+ #
66
+ # import_cars_middle = import_cars.pick(1..2, 0, 3..)
67
+ #
68
+ # # =>
69
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f244>
70
+ # Audi BMW Year BMW_MINI Mercedes-Benz VW
71
+ # <int64> <int64> <int64> <int64> <int64> <int64>
72
+ # 0 28336 52527 2017 25427 68221 49040
73
+ # 1 26473 50982 2018 25984 67554 51961
74
+ # 2 24222 46814 2019 23813 66553 46794
75
+ # 3 22304 35712 2020 20196 57041 36576
76
+ # 4 22535 35905 2021 18211 51722 35215
77
+ #
78
+ # import_cars_middle.transpose(key: :Year)
79
+ #
80
+ # # =>
81
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
82
+ # NAME 2017 2018 2019 2020 2021
83
+ # <string> <uint32> <uint32> <uint32> <uint16> <uint16>
84
+ # 0 Audi 28336 26473 24222 22304 22535
85
+ # 1 BMW 52527 50982 46814 35712 35905
86
+ # 2 BMW_MINI 25427 25984 23813 20196 18211
87
+ # 3 Mercedes-Benz 68221 67554 66553 57041 51722
88
+ # 4 VW 49040 51961 46794 36576 35215
89
+ #
90
+ # @since 0.2.0
91
+ #
15
92
  def transpose(key: keys.first, name: :NAME)
16
93
  unless keys.include?(key)
17
94
  raise DataFrameArgumentError, "Self does not include: #{key}"
@@ -31,12 +108,67 @@ module RedAmber
31
108
  DataFrame.new(hash)
32
109
  end
33
110
 
34
- # Reshape wide DataFrame to a longer DataFrame.
111
+ # Create a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
112
+ #
113
+ # @param keep_keys [<Symbol>]
114
+ # keys to keep.
115
+ # @param name [Symbol, String]
116
+ # a new key name of the column which is come from key names.
117
+ # @param value [Symbol, String]
118
+ # a new key name of the column which is come from values.
119
+ # @return [DataFrame]
120
+ # long DataFrame.
121
+ #
122
+ # @example `to_long` without options
123
+ #
124
+ # import_cars
125
+ #
126
+ # # =>
127
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
128
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
129
+ # <int64> <int64> <int64> <int64> <int64> <int64>
130
+ # 0 2017 28336 52527 25427 68221 49040
131
+ # 1 2018 26473 50982 25984 67554 51961
132
+ # 2 2019 24222 46814 23813 66553 46794
133
+ # 3 2020 22304 35712 20196 57041 36576
134
+ # 4 2021 22535 35905 18211 51722 35215
135
+ #
136
+ # import_cars.to_long(:Year)
137
+ #
138
+ # # =>
139
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
140
+ # Year NAME VALUE
141
+ # <uint16> <string> <uint32>
142
+ # 0 2017 Audi 28336
143
+ # 1 2017 BMW 52527
144
+ # 2 2017 BMW_MINI 25427
145
+ # 3 2017 Mercedes-Benz 68221
146
+ # 4 2017 VW 49040
147
+ # : : : :
148
+ # 22 2021 BMW_MINI 18211
149
+ # 23 2021 Mercedes-Benz 51722
150
+ # 24 2021 VW 35215
151
+ #
152
+ # @example `to_long` with options `:name` and `:value`
153
+ #
154
+ # import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
155
+ #
156
+ # # =>
157
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
158
+ # Year Manufacturer Num_of_imported
159
+ # <uint16> <string> <uint32>
160
+ # 0 2017 Audi 28336
161
+ # 1 2017 BMW 52527
162
+ # 2 2017 BMW_MINI 25427
163
+ # 3 2017 Mercedes-Benz 68221
164
+ # 4 2017 VW 49040
165
+ # : : : :
166
+ # 22 2021 BMW_MINI 18211
167
+ # 23 2021 Mercedes-Benz 51722
168
+ # 24 2021 VW 35215
169
+ #
170
+ # @since 0.2.0
35
171
  #
36
- # @param keep_keys [Array] keys to keep.
37
- # @param name [Symbol, String] key of the column which is come **from values**.
38
- # @param value [Symbol, String] key of the column which is come **from values**.
39
- # @return [DataFrame] long DataFrame.
40
172
  def to_long(*keep_keys, name: :NAME, value: :VALUE)
41
173
  warn('[Info] No key to keep is specified.') if keep_keys.empty?
42
174
 
@@ -73,13 +205,51 @@ module RedAmber
73
205
  DataFrame.new(hash)
74
206
  end
75
207
 
76
- # Reshape long DataFrame to a wide DataFrame.
208
+ # Create a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
77
209
  #
78
210
  # @param name [Symbol, String]
79
- # key of the column which will be expanded **to key names**.
211
+ # a new key name of the columnwhich will be expanded to key names.
80
212
  # @param value [Symbol, String]
81
- # key of the column which will be expanded **to values**.
82
- # @return [DataFrame] wide DataFrame.
213
+ # a new key name of the column which will be expanded to values.
214
+ # @return [DataFrame]
215
+ # wide DataFrame.
216
+ #
217
+ # @example `to_wide` without options
218
+ #
219
+ # import_cars_long = import_cars.to_long(:Year)
220
+ #
221
+ # # =>
222
+ # #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
223
+ # Year NAME VALUE
224
+ # <uint16> <string> <uint32>
225
+ # 0 2017 Audi 28336
226
+ # 1 2017 BMW 52527
227
+ # 2 2017 BMW_MINI 25427
228
+ # 3 2017 Mercedes-Benz 68221
229
+ # 4 2017 VW 49040
230
+ # : : : :
231
+ # 22 2021 BMW_MINI 18211
232
+ # 23 2021 Mercedes-Benz 51722
233
+ # 24 2021 VW 35215
234
+ #
235
+ # import_cars_long.to_wide
236
+ # # or same as `import_cars_long.to_wide(name: :NAME, value: VALUE)`
237
+ #
238
+ # # =>
239
+ # #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
240
+ # Year Audi BMW BMW_MINI Mercedes-Benz VW
241
+ # <int64> <int64> <int64> <int64> <int64> <int64>
242
+ # 0 2017 28336 52527 25427 68221 49040
243
+ # 1 2018 26473 50982 25984 67554 51961
244
+ # 2 2019 24222 46814 23813 66553 46794
245
+ # 3 2020 22304 35712 20196 57041 36576
246
+ # 4 2021 22535 35905 18211 51722 35215
247
+ #
248
+ # Columns other than `NAME` and `VALUE` (it is `Year` for this case) will be
249
+ # automatically processed and do not need to specify.
250
+ #
251
+ # @since 0.2.0
252
+ #
83
253
  def to_wide(name: :NAME, value: :VALUE)
84
254
  name = name.to_sym
85
255
  unless keys.include?(name)