red_amber 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d239a3fa90e5796fb695f8d3c4995d0a2178ea7c8c2789bed157e688902585cb
4
- data.tar.gz: 968c02294d24a3dabaa6e5128be0bcfad713e131df15850ac0ceb64c2883dcd0
3
+ metadata.gz: 6f70451aad21c0750fb2a0bfe165baf5862ac3837541135cf9d58df4ecd732ac
4
+ data.tar.gz: c31f143278bf3792bc03e0e727e12df72fca5a001b3d6c098c3f028df456e2f0
5
5
  SHA512:
6
- metadata.gz: d1c5ffd9650dd8c9e825514cd7e2ff4914690bd731ac262fca6cc17e56c1e312679689351a05fb741dccfb59377214706a8bf6ca6fe3237ca46fb623ae1b9f10
7
- data.tar.gz: f37c4aff9170cd5105737a9d2b3d827051254dcca6968b697f5ed3a70e1b2c3cb14303e88a9c342870d1447450a538e445d6f3d37de53591d3f6d13b87aebc16
6
+ metadata.gz: c5026422e8f0c0b1b1b25f6baa97b540287937a3c0d224a16ce13c17c16a11ccd54682682f17fdf5e176190f7db40bfda7bbe5659e89a212eed8f1bf9b3567e9
7
+ data.tar.gz: 82b500a1570b8fc8925a7c988bd7d3f3db677588537ee9f8d75f965ca98f5d730a0a55be5fa6637e3beeb4aed11b55a9afd3e4dfd52aee687e123f390f0d8d2b
data/.rubocop.yml CHANGED
@@ -60,9 +60,11 @@ Layout/LineLength:
60
60
  Metrics/AbcSize:
61
61
  Max: 30
62
62
  Exclude:
63
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 43
63
64
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 55
64
65
  - 'lib/red_amber/data_frame_reshaping.rb' # Max 40.91
65
66
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 51
67
+ - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 30.15
66
68
  - 'lib/red_amber/vector_updatable.rb' # Max: 36
67
69
  - 'lib/red_amber/vector_selectable.rb' # Max: 33
68
70
 
@@ -79,13 +81,16 @@ Metrics/ClassLength:
79
81
  - 'test/**/*'
80
82
  - 'lib/red_amber/data_frame.rb' #Max: 131
81
83
  - 'lib/red_amber/vector.rb' #Max: 102
84
+ - 'lib/red_amber/group.rb' #Max: 103
82
85
 
83
86
  # Max: 7
84
87
  Metrics/CyclomaticComplexity:
85
88
  Max: 12
86
89
  Exclude:
90
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 15
87
91
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
88
92
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
93
+ - 'lib/red_amber/helper.rb' # Max: 15
89
94
  - 'lib/red_amber/vector_selectable.rb' # Max: 13
90
95
  - 'lib/red_amber/vector_updatable.rb' # Max: 14
91
96
 
@@ -93,6 +98,7 @@ Metrics/CyclomaticComplexity:
93
98
  Metrics/MethodLength:
94
99
  Max: 30
95
100
  Exclude:
101
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 38
96
102
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 33
97
103
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 38
98
104
  - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 35
@@ -101,16 +107,25 @@ Metrics/MethodLength:
101
107
  Metrics/ModuleLength:
102
108
  Max: 100
103
109
  Exclude:
110
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 108
104
111
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 132
105
112
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 141
106
113
  - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 110
107
114
  - 'lib/red_amber/vector_functions.rb' # Max: 114
108
115
 
116
+ # Max: 5
117
+ Metrics/ParameterLists:
118
+ Max: 6
119
+ # Exclude:
120
+ # - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
121
+
109
122
  # Max: 8
110
123
  Metrics/PerceivedComplexity:
111
124
  Max: 13
112
125
  Exclude:
126
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 14
113
127
  - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
128
+ - 'lib/red_amber/helper.rb' # Max: 15
114
129
  - 'lib/red_amber/vector_updatable.rb' # Max: 15
115
130
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 19
116
131
 
data/CHANGELOG.md CHANGED
@@ -1,42 +1,188 @@
1
+ ## [0.2.4] - 2022-12-25 (unleleased)
2
+
3
+ ## [0.2.3] - 2022-11-16
4
+
5
+ - Bug fixes
6
+
7
+ - Fix DataFrame#to_s when DataFrame.size == 0 (#125)
8
+ - Remove unused lines in funcs (#128)
9
+ - Remove unused methods in helper (#128)
10
+ - Add test for invalid arg in DataFrame.new (#128)
11
+ - Add test for Vector#shift(0) (#128)
12
+ - Fix bugs for DataFrame#[], #pick and #drop with Range of Symbols and Symbol (#135)
13
+
14
+ - New features and improvements
15
+
16
+ - Upgrade dependency to Arrow 10.0.0 (#132)
17
+
18
+ It is possible to initialize by the objects responsible to `to_arrow` since 0.2.3 .
19
+ Arrays in Numo::NArray is responsible to `to_arrow` with Red Arrow Numo::NArray 0.0.6 .
20
+ This feature is proposed by the Red Data Tools member @kojix2 and implemented by @kou.
21
+ I made also Vector to be responsible to `to_arrow` and `to_arrow_array`.
22
+ It becomes a member of ducks ('quack quack'). Thanks!
23
+
24
+ - Change dev dependency to red-dataset-arrow (#117)
25
+ - Add dev dependency for red-arrow-numo-narray (#132)
26
+ - Support Numo::NArray in Vector.new (#132)
27
+ - Support Vector#to_arrow_array (#132)
28
+
29
+ - Update group (#118)
30
+ - Introduce new DataFrame group support (experimental)
31
+
32
+ This additional API will treat a grouped DataFrame as a list of DataFrames.
33
+ I think this API has pros such as:
34
+ - API is easy to understand and flexible.
35
+ - It has good compatibility with Ruby's primitive Enumerables.
36
+ - We can only use non hash-ed aggregation functions.
37
+ - Do not need grouped DataFrame state, nor `#ungroup` method.
38
+ - May be useful for concurrent operations.
39
+
40
+ This feature is implemented by Ruby, so it is pretty slow and experimental.
41
+ Use original Group API for practical purpose.
42
+
43
+ - `include Enumerable` to Group (experimental)
44
+ - Add Group#each, #inspect
45
+ - Refactor Group to align with Arrow
46
+
47
+ - Introduce DataFrame combining methods (#125)
48
+ - Introduce DataFrame#concatenate method
49
+ - Add DataFrame#merge method
50
+ - Add DataFrame#inner_join method
51
+ - Add DataFrame#full_join method
52
+ - Add DataFrame#left_join method
53
+ - Add DataFrame#right_join method
54
+ - Add DataFrame#semi_join method
55
+ - Add DataFrame#anti_join method
56
+ - Add DataFrame#intersect method
57
+ - Add DataFrame#union method
58
+ - Add DataFrame#setdiff method
59
+ - Rename #setdiff to #difference
60
+ - Support natural join in DataFrame#join
61
+ - Support partial join_key and renaming
62
+ - Fix DataFrame#join to merge key columns
63
+ - Add DataFrame#set_operable? method
64
+ - Add join/set/bind image to DataFrame.md
65
+ - Fix DataFrame#join, #right_semi, #right_anti (#128)
66
+
67
+ - Miscellaneous
68
+ - Return Vector in DataFrame#indices (#118)
69
+
70
+ - Improve tests/ci
71
+
72
+ - Improve CI
73
+ - Add CI test on macOS (#133)
74
+ - Enable bundler-cache on macOS (#128)
75
+ - Add install gobject introspection prior to glib in CI (#133)
76
+ This will stabilize CI system installation especially with cache.
77
+
78
+ - Rename workflows/test.yml to ci.yml (#133)
79
+ - Fix link in CI badge of README.md (#118)
80
+
81
+ - Add github action for coverage (#128)
82
+
83
+ - Add benchmark
84
+ - Add benchmarks with Rover (#118)
85
+ - Introduce benchmark suite (#134)
86
+ - Add benchmark for combining operations (#134)
87
+
88
+ - Measuring test coverage
89
+ - Add test coverage measurement (#128)
90
+
91
+ - Refactoring
92
+
93
+ - Remove redundant string escape in `test_vector_function` (#132)
94
+ - Refine tests to use `assert_equal_array` (#128)
95
+ - Rewrite Vector#replace (#128)
96
+
97
+ - Documentation
98
+
99
+ - Update README.md for installation (#126)
100
+ - Add clause that keys must be unique in doc. (#126)
101
+ - Rows should be called as 'records' (#126)
102
+ - Update Jupyter Notebook `83 examples of RedAmber` (#135)
103
+
104
+ - GitHub site
105
+
106
+ - Update Jupyter notebooks in Binder
107
+ - Change default branch name from 'master' to 'main' (#127)
108
+
109
+ - Thanks
110
+
111
+ Ruby Association Grant committee
112
+ It is a great honor for selecting RedAmber as a project of Ruby Association Grant 2022.
113
+
114
+
115
+ ## [0.2.2] - 2022-10-04
116
+
117
+ - Bug fixes
118
+
119
+ - Return self when no replacement happen in Vector#replace. (#92)
120
+ - Limit n-digits in to_iruby. (#111)
121
+ - Fix displaying space in to_iruby. (#111)
122
+ - Raise error if key is duplicated. (#113)
123
+ - Fix DataFrame#pick/#drop with endless Range. (#113)
124
+ - Change type from dictionary to string in DataFrame reshaping methods. (#113)
125
+ - Fix arguments parser to accept Enumerator. (#114)
126
+
127
+ - New features and improvements
128
+
129
+ - Support to make a data frame from a to_arrow-responsible object. (#106) [Patch by Kenta Murata]
130
+ - Introduce DataFrame#auto_cast (experimental feature) (#105)
131
+ - Change default name in DataFrame#transpose, #to_long, #to_wide. (#110)
132
+ - Add Vector#dictionary? method. (#113)
133
+ - Add display mode 'Plain' and 'Minimum'. (#113)
134
+ - Refactor code
135
+ - Refine test_vector_selectable. (#92)
136
+ - Refine test_vector_updatable. (#92)
137
+ - Refine Vector.new. (#113)
138
+ - Refine DataFrame#pick, #drop. (#113)
139
+
140
+ - Documents
141
+
142
+ - Update images. (#90, #105, #113)
143
+ - Update README to use simpler examples. (#112)
144
+ - Update README with a new screenshot example. (#113)
145
+
146
+ - GitHub site
147
+
148
+ - Update Jupyter notebooks in Binder (#88, #115)
149
+ - Move binder support to heronshoes/docker-stacks repository.
150
+ - Update README notebook on binder.
151
+ - Add examples_of_RedAmber notebook on binder.
152
+
153
+ - Start to use discussions.
154
+
155
+ - Thanks
156
+
157
+ - Kenta Murata
158
+
1
159
  ## [0.2.1] - 2022-09-07
2
160
 
3
- -Bug fixes
161
+ - Bug fixes
4
162
 
5
163
  - Fix `Vector#each` with block (#66)
6
164
  `Vector#each` will return value of each element with block.
7
-
8
165
  - Fix table format at size == 9 (#67)
9
-
10
166
  - Fix to support Vector in `DataFrame#assign` (#77)
11
-
12
167
  - Add `assert_delta` functionality for `assert_with_NaN` (#78)
13
-
14
168
  - Fix Vector#is_in when self is chunked (#79)
15
-
16
169
  - Fix Array type error (uint/int) (#79)
17
170
 
18
171
  - New features and improvements
19
172
 
20
173
  - Refine `DataFrame#indices` method (#67)
21
-
22
174
  - Update DataFrame reshaping methods (#73)
23
-
24
175
  - Change default option value of DataFrame reshaping
25
-
26
176
  - Change the order of import_cars example
27
177
 
28
178
  - Add `DataFrame#method_missing` to get column vector by method (#75)
29
-
30
179
  - Add `DataFrame#method_missing` to get column (#75)
31
180
 
32
181
  - Accept both args and block in `DataFrame#assign` (#75)
33
-
34
182
  - Accept indices in `DataFrame#pick` and `DataFrame#drop` (#76)
35
183
 
36
184
  - Add `DataFrame#slice_by` method (#77)
37
-
38
185
  - Add new Vector functions (#78)
39
-
40
186
  - Add inverse trigonometric function for Vector
41
187
  - `acos`
42
188
  - `asin`
@@ -49,12 +195,15 @@
49
195
 
50
196
  - Add binary function `Vector#logb`
51
197
 
52
- - Docker image and Jupyter Notebook (Thanks to @mrkn)
198
+ - Docker image and Jupyter Notebook [Thanks to Kenta Murata]
53
199
  - Add link to RubyData in README
54
200
  - Add link to interactive README by Binder
55
201
 
56
202
  - Update Jupyter Notebook `71 examples of RedAmber`
57
203
 
204
+ - Thanks
205
+
206
+ - Kenta Murata
58
207
 
59
208
  ## [0.2.0] - 2022-08-15
60
209
 
@@ -63,25 +212,19 @@
63
212
  - Bug fixes
64
213
 
65
214
  - Fix order of multiple group keys (#55)
66
-
67
215
  Only 1 group key comes to left. Other keys remain in right.
68
216
 
69
217
  - Remove optional `require` for rover (#55)
70
-
71
218
  Fix DataFrame.new for argument with Rover::DataFrame.
72
-
73
219
  - Fix occasional failure in CI (#59)
74
-
75
220
  Sometimes the CI test fails. I added -dev dependency
76
221
  in Arrow install by apt, not doing in bundler.
77
222
 
78
223
  - Fix calling :take in V#[] (#56)
79
-
80
224
  Fixed to call Arrow function :take instead of :array_take in Vector#take_by_vector. This will prevent the error below
81
225
  when called with Arrow::ChunkedArray.
82
226
 
83
227
  - Raise error renaming non existing key (#61)
84
-
85
228
  Add error when specified key is not exist.
86
229
 
87
230
  - Fix DataFrame#rename #assign by array (#65)
@@ -294,6 +437,13 @@
294
437
  - Documentation
295
438
  - Fix typo in DataFrame.md
296
439
 
440
+ - Github site
441
+ - Add gem and status badges in README. (#42) [Patch by kojix2]
442
+
443
+ - Thanks
444
+
445
+ - kojix2
446
+
297
447
  ## [0.1.5] - 2022-06-12 (experimental)
298
448
 
299
449
  - Bug fixes
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '>= 9.0.0'
10
+ gem 'red-parquet', '~> 10.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
@@ -21,5 +21,7 @@ group :test do
21
21
  gem 'yard'
22
22
 
23
23
  gem 'benchmark_driver'
24
- gem 'red-datasets'
24
+ gem 'red-arrow-numo-narray'
25
+ gem 'red-datasets-arrow'
26
+ gem 'simplecov'
25
27
  end