red_amber 0.2.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +15 -0
- data/CHANGELOG.md +170 -20
- data/Gemfile +4 -2
- data/README.md +121 -302
- data/benchmark/basic.yml +79 -0
- data/benchmark/combine.yml +63 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +33 -0
- data/benchmark/reshape.yml +27 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/doc/DataFrame.md +611 -318
- data/doc/Vector.md +31 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +16 -42
- data/lib/red_amber/data_frame_combinable.rb +283 -0
- data/lib/red_amber/data_frame_displayable.rb +58 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_selectable.rb +9 -9
- data/lib/red_amber/data_frame_variable_operation.rb +27 -21
- data/lib/red_amber/group.rb +100 -17
- data/lib/red_amber/helper.rb +20 -30
- data/lib/red_amber/vector.rb +56 -30
- data/lib/red_amber/vector_functions.rb +0 -8
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +61 -63
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +1 -1
- metadata +32 -11
- data/doc/examples_of_red_amber.ipynb +0 -8979
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f70451aad21c0750fb2a0bfe165baf5862ac3837541135cf9d58df4ecd732ac
|
4
|
+
data.tar.gz: c31f143278bf3792bc03e0e727e12df72fca5a001b3d6c098c3f028df456e2f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5026422e8f0c0b1b1b25f6baa97b540287937a3c0d224a16ce13c17c16a11ccd54682682f17fdf5e176190f7db40bfda7bbe5659e89a212eed8f1bf9b3567e9
|
7
|
+
data.tar.gz: 82b500a1570b8fc8925a7c988bd7d3f3db677588537ee9f8d75f965ca98f5d730a0a55be5fa6637e3beeb4aed11b55a9afd3e4dfd52aee687e123f390f0d8d2b
|
data/.rubocop.yml
CHANGED
@@ -60,9 +60,11 @@ Layout/LineLength:
|
|
60
60
|
Metrics/AbcSize:
|
61
61
|
Max: 30
|
62
62
|
Exclude:
|
63
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 43
|
63
64
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 55
|
64
65
|
- 'lib/red_amber/data_frame_reshaping.rb' # Max 40.91
|
65
66
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 51
|
67
|
+
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 30.15
|
66
68
|
- 'lib/red_amber/vector_updatable.rb' # Max: 36
|
67
69
|
- 'lib/red_amber/vector_selectable.rb' # Max: 33
|
68
70
|
|
@@ -79,13 +81,16 @@ Metrics/ClassLength:
|
|
79
81
|
- 'test/**/*'
|
80
82
|
- 'lib/red_amber/data_frame.rb' #Max: 131
|
81
83
|
- 'lib/red_amber/vector.rb' #Max: 102
|
84
|
+
- 'lib/red_amber/group.rb' #Max: 103
|
82
85
|
|
83
86
|
# Max: 7
|
84
87
|
Metrics/CyclomaticComplexity:
|
85
88
|
Max: 12
|
86
89
|
Exclude:
|
90
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 15
|
87
91
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
88
92
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
93
|
+
- 'lib/red_amber/helper.rb' # Max: 15
|
89
94
|
- 'lib/red_amber/vector_selectable.rb' # Max: 13
|
90
95
|
- 'lib/red_amber/vector_updatable.rb' # Max: 14
|
91
96
|
|
@@ -93,6 +98,7 @@ Metrics/CyclomaticComplexity:
|
|
93
98
|
Metrics/MethodLength:
|
94
99
|
Max: 30
|
95
100
|
Exclude:
|
101
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 38
|
96
102
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 33
|
97
103
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 38
|
98
104
|
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 35
|
@@ -101,16 +107,25 @@ Metrics/MethodLength:
|
|
101
107
|
Metrics/ModuleLength:
|
102
108
|
Max: 100
|
103
109
|
Exclude:
|
110
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 108
|
104
111
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 132
|
105
112
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 141
|
106
113
|
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 110
|
107
114
|
- 'lib/red_amber/vector_functions.rb' # Max: 114
|
108
115
|
|
116
|
+
# Max: 5
|
117
|
+
Metrics/ParameterLists:
|
118
|
+
Max: 6
|
119
|
+
# Exclude:
|
120
|
+
# - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
|
121
|
+
|
109
122
|
# Max: 8
|
110
123
|
Metrics/PerceivedComplexity:
|
111
124
|
Max: 13
|
112
125
|
Exclude:
|
126
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 14
|
113
127
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
128
|
+
- 'lib/red_amber/helper.rb' # Max: 15
|
114
129
|
- 'lib/red_amber/vector_updatable.rb' # Max: 15
|
115
130
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 19
|
116
131
|
|
data/CHANGELOG.md
CHANGED
@@ -1,42 +1,188 @@
|
|
1
|
+
## [0.2.4] - 2022-12-25 (unleleased)
|
2
|
+
|
3
|
+
## [0.2.3] - 2022-11-16
|
4
|
+
|
5
|
+
- Bug fixes
|
6
|
+
|
7
|
+
- Fix DataFrame#to_s when DataFrame.size == 0 (#125)
|
8
|
+
- Remove unused lines in funcs (#128)
|
9
|
+
- Remove unused methods in helper (#128)
|
10
|
+
- Add test for invalid arg in DataFrame.new (#128)
|
11
|
+
- Add test for Vector#shift(0) (#128)
|
12
|
+
- Fix bugs for DataFrame#[], #pick and #drop with Range of Symbols and Symbol (#135)
|
13
|
+
|
14
|
+
- New features and improvements
|
15
|
+
|
16
|
+
- Upgrade dependency to Arrow 10.0.0 (#132)
|
17
|
+
|
18
|
+
It is possible to initialize by the objects responsible to `to_arrow` since 0.2.3 .
|
19
|
+
Arrays in Numo::NArray is responsible to `to_arrow` with Red Arrow Numo::NArray 0.0.6 .
|
20
|
+
This feature is proposed by the Red Data Tools member @kojix2 and implemented by @kou.
|
21
|
+
I made also Vector to be responsible to `to_arrow` and `to_arrow_array`.
|
22
|
+
It becomes a member of ducks ('quack quack'). Thanks!
|
23
|
+
|
24
|
+
- Change dev dependency to red-dataset-arrow (#117)
|
25
|
+
- Add dev dependency for red-arrow-numo-narray (#132)
|
26
|
+
- Support Numo::NArray in Vector.new (#132)
|
27
|
+
- Support Vector#to_arrow_array (#132)
|
28
|
+
|
29
|
+
- Update group (#118)
|
30
|
+
- Introduce new DataFrame group support (experimental)
|
31
|
+
|
32
|
+
This additional API will treat a grouped DataFrame as a list of DataFrames.
|
33
|
+
I think this API has pros such as:
|
34
|
+
- API is easy to understand and flexible.
|
35
|
+
- It has good compatibility with Ruby's primitive Enumerables.
|
36
|
+
- We can only use non hash-ed aggregation functions.
|
37
|
+
- Do not need grouped DataFrame state, nor `#ungroup` method.
|
38
|
+
- May be useful for concurrent operations.
|
39
|
+
|
40
|
+
This feature is implemented by Ruby, so it is pretty slow and experimental.
|
41
|
+
Use original Group API for practical purpose.
|
42
|
+
|
43
|
+
- `include Enumerable` to Group (experimental)
|
44
|
+
- Add Group#each, #inspect
|
45
|
+
- Refactor Group to align with Arrow
|
46
|
+
|
47
|
+
- Introduce DataFrame combining methods (#125)
|
48
|
+
- Introduce DataFrame#concatenate method
|
49
|
+
- Add DataFrame#merge method
|
50
|
+
- Add DataFrame#inner_join method
|
51
|
+
- Add DataFrame#full_join method
|
52
|
+
- Add DataFrame#left_join method
|
53
|
+
- Add DataFrame#right_join method
|
54
|
+
- Add DataFrame#semi_join method
|
55
|
+
- Add DataFrame#anti_join method
|
56
|
+
- Add DataFrame#intersect method
|
57
|
+
- Add DataFrame#union method
|
58
|
+
- Add DataFrame#setdiff method
|
59
|
+
- Rename #setdiff to #difference
|
60
|
+
- Support natural join in DataFrame#join
|
61
|
+
- Support partial join_key and renaming
|
62
|
+
- Fix DataFrame#join to merge key columns
|
63
|
+
- Add DataFrame#set_operable? method
|
64
|
+
- Add join/set/bind image to DataFrame.md
|
65
|
+
- Fix DataFrame#join, #right_semi, #right_anti (#128)
|
66
|
+
|
67
|
+
- Miscellaneous
|
68
|
+
- Return Vector in DataFrame#indices (#118)
|
69
|
+
|
70
|
+
- Improve tests/ci
|
71
|
+
|
72
|
+
- Improve CI
|
73
|
+
- Add CI test on macOS (#133)
|
74
|
+
- Enable bundler-cache on macOS (#128)
|
75
|
+
- Add install gobject introspection prior to glib in CI (#133)
|
76
|
+
This will stabilize CI system installation especially with cache.
|
77
|
+
|
78
|
+
- Rename workflows/test.yml to ci.yml (#133)
|
79
|
+
- Fix link in CI badge of README.md (#118)
|
80
|
+
|
81
|
+
- Add github action for coverage (#128)
|
82
|
+
|
83
|
+
- Add benchmark
|
84
|
+
- Add benchmarks with Rover (#118)
|
85
|
+
- Introduce benchmark suite (#134)
|
86
|
+
- Add benchmark for combining operations (#134)
|
87
|
+
|
88
|
+
- Measuring test coverage
|
89
|
+
- Add test coverage measurement (#128)
|
90
|
+
|
91
|
+
- Refactoring
|
92
|
+
|
93
|
+
- Remove redundant string escape in `test_vector_function` (#132)
|
94
|
+
- Refine tests to use `assert_equal_array` (#128)
|
95
|
+
- Rewrite Vector#replace (#128)
|
96
|
+
|
97
|
+
- Documentation
|
98
|
+
|
99
|
+
- Update README.md for installation (#126)
|
100
|
+
- Add clause that keys must be unique in doc. (#126)
|
101
|
+
- Rows should be called as 'records' (#126)
|
102
|
+
- Update Jupyter Notebook `83 examples of RedAmber` (#135)
|
103
|
+
|
104
|
+
- GitHub site
|
105
|
+
|
106
|
+
- Update Jupyter notebooks in Binder
|
107
|
+
- Change default branch name from 'master' to 'main' (#127)
|
108
|
+
|
109
|
+
- Thanks
|
110
|
+
|
111
|
+
Ruby Association Grant committee
|
112
|
+
It is a great honor for selecting RedAmber as a project of Ruby Association Grant 2022.
|
113
|
+
|
114
|
+
|
115
|
+
## [0.2.2] - 2022-10-04
|
116
|
+
|
117
|
+
- Bug fixes
|
118
|
+
|
119
|
+
- Return self when no replacement happen in Vector#replace. (#92)
|
120
|
+
- Limit n-digits in to_iruby. (#111)
|
121
|
+
- Fix displaying space in to_iruby. (#111)
|
122
|
+
- Raise error if key is duplicated. (#113)
|
123
|
+
- Fix DataFrame#pick/#drop with endless Range. (#113)
|
124
|
+
- Change type from dictionary to string in DataFrame reshaping methods. (#113)
|
125
|
+
- Fix arguments parser to accept Enumerator. (#114)
|
126
|
+
|
127
|
+
- New features and improvements
|
128
|
+
|
129
|
+
- Support to make a data frame from a to_arrow-responsible object. (#106) [Patch by Kenta Murata]
|
130
|
+
- Introduce DataFrame#auto_cast (experimental feature) (#105)
|
131
|
+
- Change default name in DataFrame#transpose, #to_long, #to_wide. (#110)
|
132
|
+
- Add Vector#dictionary? method. (#113)
|
133
|
+
- Add display mode 'Plain' and 'Minimum'. (#113)
|
134
|
+
- Refactor code
|
135
|
+
- Refine test_vector_selectable. (#92)
|
136
|
+
- Refine test_vector_updatable. (#92)
|
137
|
+
- Refine Vector.new. (#113)
|
138
|
+
- Refine DataFrame#pick, #drop. (#113)
|
139
|
+
|
140
|
+
- Documents
|
141
|
+
|
142
|
+
- Update images. (#90, #105, #113)
|
143
|
+
- Update README to use simpler examples. (#112)
|
144
|
+
- Update README with a new screenshot example. (#113)
|
145
|
+
|
146
|
+
- GitHub site
|
147
|
+
|
148
|
+
- Update Jupyter notebooks in Binder (#88, #115)
|
149
|
+
- Move binder support to heronshoes/docker-stacks repository.
|
150
|
+
- Update README notebook on binder.
|
151
|
+
- Add examples_of_RedAmber notebook on binder.
|
152
|
+
|
153
|
+
- Start to use discussions.
|
154
|
+
|
155
|
+
- Thanks
|
156
|
+
|
157
|
+
- Kenta Murata
|
158
|
+
|
1
159
|
## [0.2.1] - 2022-09-07
|
2
160
|
|
3
|
-
-Bug fixes
|
161
|
+
- Bug fixes
|
4
162
|
|
5
163
|
- Fix `Vector#each` with block (#66)
|
6
164
|
`Vector#each` will return value of each element with block.
|
7
|
-
|
8
165
|
- Fix table format at size == 9 (#67)
|
9
|
-
|
10
166
|
- Fix to support Vector in `DataFrame#assign` (#77)
|
11
|
-
|
12
167
|
- Add `assert_delta` functionality for `assert_with_NaN` (#78)
|
13
|
-
|
14
168
|
- Fix Vector#is_in when self is chunked (#79)
|
15
|
-
|
16
169
|
- Fix Array type error (uint/int) (#79)
|
17
170
|
|
18
171
|
- New features and improvements
|
19
172
|
|
20
173
|
- Refine `DataFrame#indices` method (#67)
|
21
|
-
|
22
174
|
- Update DataFrame reshaping methods (#73)
|
23
|
-
|
24
175
|
- Change default option value of DataFrame reshaping
|
25
|
-
|
26
176
|
- Change the order of import_cars example
|
27
177
|
|
28
178
|
- Add `DataFrame#method_missing` to get column vector by method (#75)
|
29
|
-
|
30
179
|
- Add `DataFrame#method_missing` to get column (#75)
|
31
180
|
|
32
181
|
- Accept both args and block in `DataFrame#assign` (#75)
|
33
|
-
|
34
182
|
- Accept indices in `DataFrame#pick` and `DataFrame#drop` (#76)
|
35
183
|
|
36
184
|
- Add `DataFrame#slice_by` method (#77)
|
37
|
-
|
38
185
|
- Add new Vector functions (#78)
|
39
|
-
|
40
186
|
- Add inverse trigonometric function for Vector
|
41
187
|
- `acos`
|
42
188
|
- `asin`
|
@@ -49,12 +195,15 @@
|
|
49
195
|
|
50
196
|
- Add binary function `Vector#logb`
|
51
197
|
|
52
|
-
- Docker image and Jupyter Notebook
|
198
|
+
- Docker image and Jupyter Notebook [Thanks to Kenta Murata]
|
53
199
|
- Add link to RubyData in README
|
54
200
|
- Add link to interactive README by Binder
|
55
201
|
|
56
202
|
- Update Jupyter Notebook `71 examples of RedAmber`
|
57
203
|
|
204
|
+
- Thanks
|
205
|
+
|
206
|
+
- Kenta Murata
|
58
207
|
|
59
208
|
## [0.2.0] - 2022-08-15
|
60
209
|
|
@@ -63,25 +212,19 @@
|
|
63
212
|
- Bug fixes
|
64
213
|
|
65
214
|
- Fix order of multiple group keys (#55)
|
66
|
-
|
67
215
|
Only 1 group key comes to left. Other keys remain in right.
|
68
216
|
|
69
217
|
- Remove optional `require` for rover (#55)
|
70
|
-
|
71
218
|
Fix DataFrame.new for argument with Rover::DataFrame.
|
72
|
-
|
73
219
|
- Fix occasional failure in CI (#59)
|
74
|
-
|
75
220
|
Sometimes the CI test fails. I added -dev dependency
|
76
221
|
in Arrow install by apt, not doing in bundler.
|
77
222
|
|
78
223
|
- Fix calling :take in V#[] (#56)
|
79
|
-
|
80
224
|
Fixed to call Arrow function :take instead of :array_take in Vector#take_by_vector. This will prevent the error below
|
81
225
|
when called with Arrow::ChunkedArray.
|
82
226
|
|
83
227
|
- Raise error renaming non existing key (#61)
|
84
|
-
|
85
228
|
Add error when specified key is not exist.
|
86
229
|
|
87
230
|
- Fix DataFrame#rename #assign by array (#65)
|
@@ -294,6 +437,13 @@
|
|
294
437
|
- Documentation
|
295
438
|
- Fix typo in DataFrame.md
|
296
439
|
|
440
|
+
- Github site
|
441
|
+
- Add gem and status badges in README. (#42) [Patch by kojix2]
|
442
|
+
|
443
|
+
- Thanks
|
444
|
+
|
445
|
+
- kojix2
|
446
|
+
|
297
447
|
## [0.1.5] - 2022-06-12 (experimental)
|
298
448
|
|
299
449
|
- Bug fixes
|
data/Gemfile
CHANGED
@@ -7,7 +7,7 @@ gemspec
|
|
7
7
|
group :test do
|
8
8
|
gem 'rake'
|
9
9
|
|
10
|
-
gem 'red-parquet', '
|
10
|
+
gem 'red-parquet', '~> 10.0.0'
|
11
11
|
gem 'rover-df', '~> 0.3.0'
|
12
12
|
|
13
13
|
gem 'rubocop'
|
@@ -21,5 +21,7 @@ group :test do
|
|
21
21
|
gem 'yard'
|
22
22
|
|
23
23
|
gem 'benchmark_driver'
|
24
|
-
gem 'red-
|
24
|
+
gem 'red-arrow-numo-narray'
|
25
|
+
gem 'red-datasets-arrow'
|
26
|
+
gem 'simplecov'
|
25
27
|
end
|