red_amber 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d239a3fa90e5796fb695f8d3c4995d0a2178ea7c8c2789bed157e688902585cb
4
- data.tar.gz: 968c02294d24a3dabaa6e5128be0bcfad713e131df15850ac0ceb64c2883dcd0
3
+ metadata.gz: 6f70451aad21c0750fb2a0bfe165baf5862ac3837541135cf9d58df4ecd732ac
4
+ data.tar.gz: c31f143278bf3792bc03e0e727e12df72fca5a001b3d6c098c3f028df456e2f0
5
5
  SHA512:
6
- metadata.gz: d1c5ffd9650dd8c9e825514cd7e2ff4914690bd731ac262fca6cc17e56c1e312679689351a05fb741dccfb59377214706a8bf6ca6fe3237ca46fb623ae1b9f10
7
- data.tar.gz: f37c4aff9170cd5105737a9d2b3d827051254dcca6968b697f5ed3a70e1b2c3cb14303e88a9c342870d1447450a538e445d6f3d37de53591d3f6d13b87aebc16
6
+ metadata.gz: c5026422e8f0c0b1b1b25f6baa97b540287937a3c0d224a16ce13c17c16a11ccd54682682f17fdf5e176190f7db40bfda7bbe5659e89a212eed8f1bf9b3567e9
7
+ data.tar.gz: 82b500a1570b8fc8925a7c988bd7d3f3db677588537ee9f8d75f965ca98f5d730a0a55be5fa6637e3beeb4aed11b55a9afd3e4dfd52aee687e123f390f0d8d2b
data/.rubocop.yml CHANGED
@@ -60,9 +60,11 @@ Layout/LineLength:
60
60
  Metrics/AbcSize:
61
61
  Max: 30
62
62
  Exclude:
63
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 43
63
64
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 55
64
65
  - 'lib/red_amber/data_frame_reshaping.rb' # Max 40.91
65
66
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 51
67
+ - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 30.15
66
68
  - 'lib/red_amber/vector_updatable.rb' # Max: 36
67
69
  - 'lib/red_amber/vector_selectable.rb' # Max: 33
68
70
 
@@ -79,13 +81,16 @@ Metrics/ClassLength:
79
81
  - 'test/**/*'
80
82
  - 'lib/red_amber/data_frame.rb' #Max: 131
81
83
  - 'lib/red_amber/vector.rb' #Max: 102
84
+ - 'lib/red_amber/group.rb' #Max: 103
82
85
 
83
86
  # Max: 7
84
87
  Metrics/CyclomaticComplexity:
85
88
  Max: 12
86
89
  Exclude:
90
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 15
87
91
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
88
92
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
93
+ - 'lib/red_amber/helper.rb' # Max: 15
89
94
  - 'lib/red_amber/vector_selectable.rb' # Max: 13
90
95
  - 'lib/red_amber/vector_updatable.rb' # Max: 14
91
96
 
@@ -93,6 +98,7 @@ Metrics/CyclomaticComplexity:
93
98
  Metrics/MethodLength:
94
99
  Max: 30
95
100
  Exclude:
101
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 38
96
102
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 33
97
103
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 38
98
104
  - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 35
@@ -101,16 +107,25 @@ Metrics/MethodLength:
101
107
  Metrics/ModuleLength:
102
108
  Max: 100
103
109
  Exclude:
110
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 108
104
111
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 132
105
112
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 141
106
113
  - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 110
107
114
  - 'lib/red_amber/vector_functions.rb' # Max: 114
108
115
 
116
+ # Max: 5
117
+ Metrics/ParameterLists:
118
+ Max: 6
119
+ # Exclude:
120
+ # - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
121
+
109
122
  # Max: 8
110
123
  Metrics/PerceivedComplexity:
111
124
  Max: 13
112
125
  Exclude:
126
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 14
113
127
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
128
+ - 'lib/red_amber/helper.rb' # Max: 15
114
129
  - 'lib/red_amber/vector_updatable.rb' # Max: 15
115
130
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 19
116
131
 
data/CHANGELOG.md CHANGED
@@ -1,42 +1,188 @@
1
+ ## [0.2.4] - 2022-12-25 (unleleased)
2
+
3
+ ## [0.2.3] - 2022-11-16
4
+
5
+ - Bug fixes
6
+
7
+ - Fix DataFrame#to_s when DataFrame.size == 0 (#125)
8
+ - Remove unused lines in funcs (#128)
9
+ - Remove unused methods in helper (#128)
10
+ - Add test for invalid arg in DataFrame.new (#128)
11
+ - Add test for Vector#shift(0) (#128)
12
+ - Fix bugs for DataFrame#[], #pick and #drop with Range of Symbols and Symbol (#135)
13
+
14
+ - New features and improvements
15
+
16
+ - Upgrade dependency to Arrow 10.0.0 (#132)
17
+
18
+ It is possible to initialize by the objects responsible to `to_arrow` since 0.2.3 .
19
+ Arrays in Numo::NArray is responsible to `to_arrow` with Red Arrow Numo::NArray 0.0.6 .
20
+ This feature is proposed by the Red Data Tools member @kojix2 and implemented by @kou.
21
+ I made also Vector to be responsible to `to_arrow` and `to_arrow_array`.
22
+ It becomes a member of ducks ('quack quack'). Thanks!
23
+
24
+ - Change dev dependency to red-dataset-arrow (#117)
25
+ - Add dev dependency for red-arrow-numo-narray (#132)
26
+ - Support Numo::NArray in Vector.new (#132)
27
+ - Support Vector#to_arrow_array (#132)
28
+
29
+ - Update group (#118)
30
+ - Introduce new DataFrame group support (experimental)
31
+
32
+ This additional API will treat a grouped DataFrame as a list of DataFrames.
33
+ I think this API has pros such as:
34
+ - API is easy to understand and flexible.
35
+ - It has good compatibility with Ruby's primitive Enumerables.
36
+ - We can only use non hash-ed aggregation functions.
37
+ - Do not need grouped DataFrame state, nor `#ungroup` method.
38
+ - May be useful for concurrent operations.
39
+
40
+ This feature is implemented by Ruby, so it is pretty slow and experimental.
41
+ Use original Group API for practical purpose.
42
+
43
+ - `include Enumerable` to Group (experimental)
44
+ - Add Group#each, #inspect
45
+ - Refactor Group to align with Arrow
46
+
47
+ - Introduce DataFrame combining methods (#125)
48
+ - Introduce DataFrame#concatenate method
49
+ - Add DataFrame#merge method
50
+ - Add DataFrame#inner_join method
51
+ - Add DataFrame#full_join method
52
+ - Add DataFrame#left_join method
53
+ - Add DataFrame#right_join method
54
+ - Add DataFrame#semi_join method
55
+ - Add DataFrame#anti_join method
56
+ - Add DataFrame#intersect method
57
+ - Add DataFrame#union method
58
+ - Add DataFrame#setdiff method
59
+ - Rename #setdiff to #difference
60
+ - Support natural join in DataFrame#join
61
+ - Support partial join_key and renaming
62
+ - Fix DataFrame#join to merge key columns
63
+ - Add DataFrame#set_operable? method
64
+ - Add join/set/bind image to DataFrame.md
65
+ - Fix DataFrame#join, #right_semi, #right_anti (#128)
66
+
67
+ - Miscellaneous
68
+ - Return Vector in DataFrame#indices (#118)
69
+
70
+ - Improve tests/ci
71
+
72
+ - Improve CI
73
+ - Add CI test on macOS (#133)
74
+ - Enable bundler-cache on macOS (#128)
75
+ - Add install gobject introspection prior to glib in CI (#133)
76
+ This will stabilize CI system installation especially with cache.
77
+
78
+ - Rename workflows/test.yml to ci.yml (#133)
79
+ - Fix link in CI badge of README.md (#118)
80
+
81
+ - Add github action for coverage (#128)
82
+
83
+ - Add benchmark
84
+ - Add benchmarks with Rover (#118)
85
+ - Introduce benchmark suite (#134)
86
+ - Add benchmark for combining operations (#134)
87
+
88
+ - Measuring test coverage
89
+ - Add test coverage measurement (#128)
90
+
91
+ - Refactoring
92
+
93
+ - Remove redundant string escape in `test_vector_function` (#132)
94
+ - Refine tests to use `assert_equal_array` (#128)
95
+ - Rewrite Vector#replace (#128)
96
+
97
+ - Documentation
98
+
99
+ - Update README.md for installation (#126)
100
+ - Add clause that keys must be unique in doc. (#126)
101
+ - Rows should be called as 'records' (#126)
102
+ - Update Jupyter Notebook `83 examples of RedAmber` (#135)
103
+
104
+ - GitHub site
105
+
106
+ - Update Jupyter notebooks in Binder
107
+ - Change default branch name from 'master' to 'main' (#127)
108
+
109
+ - Thanks
110
+
111
+ Ruby Association Grant committee
112
+ It is a great honor for selecting RedAmber as a project of Ruby Association Grant 2022.
113
+
114
+
115
+ ## [0.2.2] - 2022-10-04
116
+
117
+ - Bug fixes
118
+
119
+ - Return self when no replacement happen in Vector#replace. (#92)
120
+ - Limit n-digits in to_iruby. (#111)
121
+ - Fix displaying space in to_iruby. (#111)
122
+ - Raise error if key is duplicated. (#113)
123
+ - Fix DataFrame#pick/#drop with endless Range. (#113)
124
+ - Change type from dictionary to string in DataFrame reshaping methods. (#113)
125
+ - Fix arguments parser to accept Enumerator. (#114)
126
+
127
+ - New features and improvements
128
+
129
+ - Support to make a data frame from a to_arrow-responsible object. (#106) [Patch by Kenta Murata]
130
+ - Introduce DataFrame#auto_cast (experimental feature) (#105)
131
+ - Change default name in DataFrame#transpose, #to_long, #to_wide. (#110)
132
+ - Add Vector#dictionary? method. (#113)
133
+ - Add display mode 'Plain' and 'Minimum'. (#113)
134
+ - Refactor code
135
+ - Refine test_vector_selectable. (#92)
136
+ - Refine test_vector_updatable. (#92)
137
+ - Refine Vector.new. (#113)
138
+ - Refine DataFrame#pick, #drop. (#113)
139
+
140
+ - Documents
141
+
142
+ - Update images. (#90, #105, #113)
143
+ - Update README to use simpler examples. (#112)
144
+ - Update README with a new screenshot example. (#113)
145
+
146
+ - GitHub site
147
+
148
+ - Update Jupyter notebooks in Binder (#88, #115)
149
+ - Move binder support to heronshoes/docker-stacks repository.
150
+ - Update README notebook on binder.
151
+ - Add examples_of_RedAmber notebook on binder.
152
+
153
+ - Start to use discussions.
154
+
155
+ - Thanks
156
+
157
+ - Kenta Murata
158
+
1
159
  ## [0.2.1] - 2022-09-07
2
160
 
3
- -Bug fixes
161
+ - Bug fixes
4
162
 
5
163
  - Fix `Vector#each` with block (#66)
6
164
  `Vector#each` will return value of each element with block.
7
-
8
165
  - Fix table format at size == 9 (#67)
9
-
10
166
  - Fix to support Vector in `DataFrame#assign` (#77)
11
-
12
167
  - Add `assert_delta` functionality for `assert_with_NaN` (#78)
13
-
14
168
  - Fix Vector#is_in when self is chunked (#79)
15
-
16
169
  - Fix Array type error (uint/int) (#79)
17
170
 
18
171
  - New features and improvements
19
172
 
20
173
  - Refine `DataFrame#indices` method (#67)
21
-
22
174
  - Update DataFrame reshaping methods (#73)
23
-
24
175
  - Change default option value of DataFrame reshaping
25
-
26
176
  - Change the order of import_cars example
27
177
 
28
178
  - Add `DataFrame#method_missing` to get column vector by method (#75)
29
-
30
179
  - Add `DataFrame#method_missing` to get column (#75)
31
180
 
32
181
  - Accept both args and block in `DataFrame#assign` (#75)
33
-
34
182
  - Accept indices in `DataFrame#pick` and `DataFrame#drop` (#76)
35
183
 
36
184
  - Add `DataFrame#slice_by` method (#77)
37
-
38
185
  - Add new Vector functions (#78)
39
-
40
186
  - Add inverse trigonometric function for Vector
41
187
  - `acos`
42
188
  - `asin`
@@ -49,12 +195,15 @@
49
195
 
50
196
  - Add binary function `Vector#logb`
51
197
 
52
- - Docker image and Jupyter Notebook (Thanks to @mrkn)
198
+ - Docker image and Jupyter Notebook [Thanks to Kenta Murata]
53
199
  - Add link to RubyData in README
54
200
  - Add link to interactive README by Binder
55
201
 
56
202
  - Update Jupyter Notebook `71 examples of RedAmber`
57
203
 
204
+ - Thanks
205
+
206
+ - Kenta Murata
58
207
 
59
208
  ## [0.2.0] - 2022-08-15
60
209
 
@@ -63,25 +212,19 @@
63
212
  - Bug fixes
64
213
 
65
214
  - Fix order of multiple group keys (#55)
66
-
67
215
  Only 1 group key comes to left. Other keys remain in right.
68
216
 
69
217
  - Remove optional `require` for rover (#55)
70
-
71
218
  Fix DataFrame.new for argument with Rover::DataFrame.
72
-
73
219
  - Fix occasional failure in CI (#59)
74
-
75
220
  Sometimes the CI test fails. I added -dev dependency
76
221
  in Arrow install by apt, not doing in bundler.
77
222
 
78
223
  - Fix calling :take in V#[] (#56)
79
-
80
224
  Fixed to call Arrow function :take instead of :array_take in Vector#take_by_vector. This will prevent the error below
81
225
  when called with Arrow::ChunkedArray.
82
226
 
83
227
  - Raise error renaming non existing key (#61)
84
-
85
228
  Add error when specified key is not exist.
86
229
 
87
230
  - Fix DataFrame#rename #assign by array (#65)
@@ -294,6 +437,13 @@
294
437
  - Documentation
295
438
  - Fix typo in DataFrame.md
296
439
 
440
+ - Github site
441
+ - Add gem and status badges in README. (#42) [Patch by kojix2]
442
+
443
+ - Thanks
444
+
445
+ - kojix2
446
+
297
447
  ## [0.1.5] - 2022-06-12 (experimental)
298
448
 
299
449
  - Bug fixes
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '>= 9.0.0'
10
+ gem 'red-parquet', '~> 10.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
@@ -21,5 +21,7 @@ group :test do
21
21
  gem 'yard'
22
22
 
23
23
  gem 'benchmark_driver'
24
- gem 'red-datasets'
24
+ gem 'red-arrow-numo-narray'
25
+ gem 'red-datasets-arrow'
26
+ gem 'simplecov'
25
27
  end