red_amber 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +15 -0
- data/CHANGELOG.md +170 -20
- data/Gemfile +4 -2
- data/README.md +121 -302
- data/benchmark/basic.yml +79 -0
- data/benchmark/combine.yml +63 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +33 -0
- data/benchmark/reshape.yml +27 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/doc/DataFrame.md +611 -318
- data/doc/Vector.md +31 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +16 -42
- data/lib/red_amber/data_frame_combinable.rb +283 -0
- data/lib/red_amber/data_frame_displayable.rb +58 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_selectable.rb +9 -9
- data/lib/red_amber/data_frame_variable_operation.rb +27 -21
- data/lib/red_amber/group.rb +100 -17
- data/lib/red_amber/helper.rb +20 -30
- data/lib/red_amber/vector.rb +56 -30
- data/lib/red_amber/vector_functions.rb +0 -8
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +61 -63
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +1 -1
- metadata +32 -11
- data/doc/examples_of_red_amber.ipynb +0 -8979
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f70451aad21c0750fb2a0bfe165baf5862ac3837541135cf9d58df4ecd732ac
|
4
|
+
data.tar.gz: c31f143278bf3792bc03e0e727e12df72fca5a001b3d6c098c3f028df456e2f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5026422e8f0c0b1b1b25f6baa97b540287937a3c0d224a16ce13c17c16a11ccd54682682f17fdf5e176190f7db40bfda7bbe5659e89a212eed8f1bf9b3567e9
|
7
|
+
data.tar.gz: 82b500a1570b8fc8925a7c988bd7d3f3db677588537ee9f8d75f965ca98f5d730a0a55be5fa6637e3beeb4aed11b55a9afd3e4dfd52aee687e123f390f0d8d2b
|
data/.rubocop.yml
CHANGED
@@ -60,9 +60,11 @@ Layout/LineLength:
|
|
60
60
|
Metrics/AbcSize:
|
61
61
|
Max: 30
|
62
62
|
Exclude:
|
63
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 43
|
63
64
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 55
|
64
65
|
- 'lib/red_amber/data_frame_reshaping.rb' # Max 40.91
|
65
66
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 51
|
67
|
+
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 30.15
|
66
68
|
- 'lib/red_amber/vector_updatable.rb' # Max: 36
|
67
69
|
- 'lib/red_amber/vector_selectable.rb' # Max: 33
|
68
70
|
|
@@ -79,13 +81,16 @@ Metrics/ClassLength:
|
|
79
81
|
- 'test/**/*'
|
80
82
|
- 'lib/red_amber/data_frame.rb' #Max: 131
|
81
83
|
- 'lib/red_amber/vector.rb' #Max: 102
|
84
|
+
- 'lib/red_amber/group.rb' #Max: 103
|
82
85
|
|
83
86
|
# Max: 7
|
84
87
|
Metrics/CyclomaticComplexity:
|
85
88
|
Max: 12
|
86
89
|
Exclude:
|
90
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 15
|
87
91
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
88
92
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
93
|
+
- 'lib/red_amber/helper.rb' # Max: 15
|
89
94
|
- 'lib/red_amber/vector_selectable.rb' # Max: 13
|
90
95
|
- 'lib/red_amber/vector_updatable.rb' # Max: 14
|
91
96
|
|
@@ -93,6 +98,7 @@ Metrics/CyclomaticComplexity:
|
|
93
98
|
Metrics/MethodLength:
|
94
99
|
Max: 30
|
95
100
|
Exclude:
|
101
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 38
|
96
102
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 33
|
97
103
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 38
|
98
104
|
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 35
|
@@ -101,16 +107,25 @@ Metrics/MethodLength:
|
|
101
107
|
Metrics/ModuleLength:
|
102
108
|
Max: 100
|
103
109
|
Exclude:
|
110
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 108
|
104
111
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 132
|
105
112
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 141
|
106
113
|
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 110
|
107
114
|
- 'lib/red_amber/vector_functions.rb' # Max: 114
|
108
115
|
|
116
|
+
# Max: 5
|
117
|
+
Metrics/ParameterLists:
|
118
|
+
Max: 6
|
119
|
+
# Exclude:
|
120
|
+
# - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
|
121
|
+
|
109
122
|
# Max: 8
|
110
123
|
Metrics/PerceivedComplexity:
|
111
124
|
Max: 13
|
112
125
|
Exclude:
|
126
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 14
|
113
127
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
128
|
+
- 'lib/red_amber/helper.rb' # Max: 15
|
114
129
|
- 'lib/red_amber/vector_updatable.rb' # Max: 15
|
115
130
|
- 'lib/red_amber/data_frame_displayable.rb' # Max: 19
|
116
131
|
|
data/CHANGELOG.md
CHANGED
@@ -1,42 +1,188 @@
|
|
1
|
+
## [0.2.4] - 2022-12-25 (unleleased)
|
2
|
+
|
3
|
+
## [0.2.3] - 2022-11-16
|
4
|
+
|
5
|
+
- Bug fixes
|
6
|
+
|
7
|
+
- Fix DataFrame#to_s when DataFrame.size == 0 (#125)
|
8
|
+
- Remove unused lines in funcs (#128)
|
9
|
+
- Remove unused methods in helper (#128)
|
10
|
+
- Add test for invalid arg in DataFrame.new (#128)
|
11
|
+
- Add test for Vector#shift(0) (#128)
|
12
|
+
- Fix bugs for DataFrame#[], #pick and #drop with Range of Symbols and Symbol (#135)
|
13
|
+
|
14
|
+
- New features and improvements
|
15
|
+
|
16
|
+
- Upgrade dependency to Arrow 10.0.0 (#132)
|
17
|
+
|
18
|
+
It is possible to initialize by the objects responsible to `to_arrow` since 0.2.3 .
|
19
|
+
Arrays in Numo::NArray is responsible to `to_arrow` with Red Arrow Numo::NArray 0.0.6 .
|
20
|
+
This feature is proposed by the Red Data Tools member @kojix2 and implemented by @kou.
|
21
|
+
I made also Vector to be responsible to `to_arrow` and `to_arrow_array`.
|
22
|
+
It becomes a member of ducks ('quack quack'). Thanks!
|
23
|
+
|
24
|
+
- Change dev dependency to red-dataset-arrow (#117)
|
25
|
+
- Add dev dependency for red-arrow-numo-narray (#132)
|
26
|
+
- Support Numo::NArray in Vector.new (#132)
|
27
|
+
- Support Vector#to_arrow_array (#132)
|
28
|
+
|
29
|
+
- Update group (#118)
|
30
|
+
- Introduce new DataFrame group support (experimental)
|
31
|
+
|
32
|
+
This additional API will treat a grouped DataFrame as a list of DataFrames.
|
33
|
+
I think this API has pros such as:
|
34
|
+
- API is easy to understand and flexible.
|
35
|
+
- It has good compatibility with Ruby's primitive Enumerables.
|
36
|
+
- We can only use non hash-ed aggregation functions.
|
37
|
+
- Do not need grouped DataFrame state, nor `#ungroup` method.
|
38
|
+
- May be useful for concurrent operations.
|
39
|
+
|
40
|
+
This feature is implemented by Ruby, so it is pretty slow and experimental.
|
41
|
+
Use original Group API for practical purpose.
|
42
|
+
|
43
|
+
- `include Enumerable` to Group (experimental)
|
44
|
+
- Add Group#each, #inspect
|
45
|
+
- Refactor Group to align with Arrow
|
46
|
+
|
47
|
+
- Introduce DataFrame combining methods (#125)
|
48
|
+
- Introduce DataFrame#concatenate method
|
49
|
+
- Add DataFrame#merge method
|
50
|
+
- Add DataFrame#inner_join method
|
51
|
+
- Add DataFrame#full_join method
|
52
|
+
- Add DataFrame#left_join method
|
53
|
+
- Add DataFrame#right_join method
|
54
|
+
- Add DataFrame#semi_join method
|
55
|
+
- Add DataFrame#anti_join method
|
56
|
+
- Add DataFrame#intersect method
|
57
|
+
- Add DataFrame#union method
|
58
|
+
- Add DataFrame#setdiff method
|
59
|
+
- Rename #setdiff to #difference
|
60
|
+
- Support natural join in DataFrame#join
|
61
|
+
- Support partial join_key and renaming
|
62
|
+
- Fix DataFrame#join to merge key columns
|
63
|
+
- Add DataFrame#set_operable? method
|
64
|
+
- Add join/set/bind image to DataFrame.md
|
65
|
+
- Fix DataFrame#join, #right_semi, #right_anti (#128)
|
66
|
+
|
67
|
+
- Miscellaneous
|
68
|
+
- Return Vector in DataFrame#indices (#118)
|
69
|
+
|
70
|
+
- Improve tests/ci
|
71
|
+
|
72
|
+
- Improve CI
|
73
|
+
- Add CI test on macOS (#133)
|
74
|
+
- Enable bundler-cache on macOS (#128)
|
75
|
+
- Add install gobject introspection prior to glib in CI (#133)
|
76
|
+
This will stabilize CI system installation especially with cache.
|
77
|
+
|
78
|
+
- Rename workflows/test.yml to ci.yml (#133)
|
79
|
+
- Fix link in CI badge of README.md (#118)
|
80
|
+
|
81
|
+
- Add github action for coverage (#128)
|
82
|
+
|
83
|
+
- Add benchmark
|
84
|
+
- Add benchmarks with Rover (#118)
|
85
|
+
- Introduce benchmark suite (#134)
|
86
|
+
- Add benchmark for combining operations (#134)
|
87
|
+
|
88
|
+
- Measuring test coverage
|
89
|
+
- Add test coverage measurement (#128)
|
90
|
+
|
91
|
+
- Refactoring
|
92
|
+
|
93
|
+
- Remove redundant string escape in `test_vector_function` (#132)
|
94
|
+
- Refine tests to use `assert_equal_array` (#128)
|
95
|
+
- Rewrite Vector#replace (#128)
|
96
|
+
|
97
|
+
- Documentation
|
98
|
+
|
99
|
+
- Update README.md for installation (#126)
|
100
|
+
- Add clause that keys must be unique in doc. (#126)
|
101
|
+
- Rows should be called as 'records' (#126)
|
102
|
+
- Update Jupyter Notebook `83 examples of RedAmber` (#135)
|
103
|
+
|
104
|
+
- GitHub site
|
105
|
+
|
106
|
+
- Update Jupyter notebooks in Binder
|
107
|
+
- Change default branch name from 'master' to 'main' (#127)
|
108
|
+
|
109
|
+
- Thanks
|
110
|
+
|
111
|
+
Ruby Association Grant committee
|
112
|
+
It is a great honor for selecting RedAmber as a project of Ruby Association Grant 2022.
|
113
|
+
|
114
|
+
|
115
|
+
## [0.2.2] - 2022-10-04
|
116
|
+
|
117
|
+
- Bug fixes
|
118
|
+
|
119
|
+
- Return self when no replacement happen in Vector#replace. (#92)
|
120
|
+
- Limit n-digits in to_iruby. (#111)
|
121
|
+
- Fix displaying space in to_iruby. (#111)
|
122
|
+
- Raise error if key is duplicated. (#113)
|
123
|
+
- Fix DataFrame#pick/#drop with endless Range. (#113)
|
124
|
+
- Change type from dictionary to string in DataFrame reshaping methods. (#113)
|
125
|
+
- Fix arguments parser to accept Enumerator. (#114)
|
126
|
+
|
127
|
+
- New features and improvements
|
128
|
+
|
129
|
+
- Support to make a data frame from a to_arrow-responsible object. (#106) [Patch by Kenta Murata]
|
130
|
+
- Introduce DataFrame#auto_cast (experimental feature) (#105)
|
131
|
+
- Change default name in DataFrame#transpose, #to_long, #to_wide. (#110)
|
132
|
+
- Add Vector#dictionary? method. (#113)
|
133
|
+
- Add display mode 'Plain' and 'Minimum'. (#113)
|
134
|
+
- Refactor code
|
135
|
+
- Refine test_vector_selectable. (#92)
|
136
|
+
- Refine test_vector_updatable. (#92)
|
137
|
+
- Refine Vector.new. (#113)
|
138
|
+
- Refine DataFrame#pick, #drop. (#113)
|
139
|
+
|
140
|
+
- Documents
|
141
|
+
|
142
|
+
- Update images. (#90, #105, #113)
|
143
|
+
- Update README to use simpler examples. (#112)
|
144
|
+
- Update README with a new screenshot example. (#113)
|
145
|
+
|
146
|
+
- GitHub site
|
147
|
+
|
148
|
+
- Update Jupyter notebooks in Binder (#88, #115)
|
149
|
+
- Move binder support to heronshoes/docker-stacks repository.
|
150
|
+
- Update README notebook on binder.
|
151
|
+
- Add examples_of_RedAmber notebook on binder.
|
152
|
+
|
153
|
+
- Start to use discussions.
|
154
|
+
|
155
|
+
- Thanks
|
156
|
+
|
157
|
+
- Kenta Murata
|
158
|
+
|
1
159
|
## [0.2.1] - 2022-09-07
|
2
160
|
|
3
|
-
-Bug fixes
|
161
|
+
- Bug fixes
|
4
162
|
|
5
163
|
- Fix `Vector#each` with block (#66)
|
6
164
|
`Vector#each` will return value of each element with block.
|
7
|
-
|
8
165
|
- Fix table format at size == 9 (#67)
|
9
|
-
|
10
166
|
- Fix to support Vector in `DataFrame#assign` (#77)
|
11
|
-
|
12
167
|
- Add `assert_delta` functionality for `assert_with_NaN` (#78)
|
13
|
-
|
14
168
|
- Fix Vector#is_in when self is chunked (#79)
|
15
|
-
|
16
169
|
- Fix Array type error (uint/int) (#79)
|
17
170
|
|
18
171
|
- New features and improvements
|
19
172
|
|
20
173
|
- Refine `DataFrame#indices` method (#67)
|
21
|
-
|
22
174
|
- Update DataFrame reshaping methods (#73)
|
23
|
-
|
24
175
|
- Change default option value of DataFrame reshaping
|
25
|
-
|
26
176
|
- Change the order of import_cars example
|
27
177
|
|
28
178
|
- Add `DataFrame#method_missing` to get column vector by method (#75)
|
29
|
-
|
30
179
|
- Add `DataFrame#method_missing` to get column (#75)
|
31
180
|
|
32
181
|
- Accept both args and block in `DataFrame#assign` (#75)
|
33
|
-
|
34
182
|
- Accept indices in `DataFrame#pick` and `DataFrame#drop` (#76)
|
35
183
|
|
36
184
|
- Add `DataFrame#slice_by` method (#77)
|
37
|
-
|
38
185
|
- Add new Vector functions (#78)
|
39
|
-
|
40
186
|
- Add inverse trigonometric function for Vector
|
41
187
|
- `acos`
|
42
188
|
- `asin`
|
@@ -49,12 +195,15 @@
|
|
49
195
|
|
50
196
|
- Add binary function `Vector#logb`
|
51
197
|
|
52
|
-
- Docker image and Jupyter Notebook
|
198
|
+
- Docker image and Jupyter Notebook [Thanks to Kenta Murata]
|
53
199
|
- Add link to RubyData in README
|
54
200
|
- Add link to interactive README by Binder
|
55
201
|
|
56
202
|
- Update Jupyter Notebook `71 examples of RedAmber`
|
57
203
|
|
204
|
+
- Thanks
|
205
|
+
|
206
|
+
- Kenta Murata
|
58
207
|
|
59
208
|
## [0.2.0] - 2022-08-15
|
60
209
|
|
@@ -63,25 +212,19 @@
|
|
63
212
|
- Bug fixes
|
64
213
|
|
65
214
|
- Fix order of multiple group keys (#55)
|
66
|
-
|
67
215
|
Only 1 group key comes to left. Other keys remain in right.
|
68
216
|
|
69
217
|
- Remove optional `require` for rover (#55)
|
70
|
-
|
71
218
|
Fix DataFrame.new for argument with Rover::DataFrame.
|
72
|
-
|
73
219
|
- Fix occasional failure in CI (#59)
|
74
|
-
|
75
220
|
Sometimes the CI test fails. I added -dev dependency
|
76
221
|
in Arrow install by apt, not doing in bundler.
|
77
222
|
|
78
223
|
- Fix calling :take in V#[] (#56)
|
79
|
-
|
80
224
|
Fixed to call Arrow function :take instead of :array_take in Vector#take_by_vector. This will prevent the error below
|
81
225
|
when called with Arrow::ChunkedArray.
|
82
226
|
|
83
227
|
- Raise error renaming non existing key (#61)
|
84
|
-
|
85
228
|
Add error when specified key is not exist.
|
86
229
|
|
87
230
|
- Fix DataFrame#rename #assign by array (#65)
|
@@ -294,6 +437,13 @@
|
|
294
437
|
- Documentation
|
295
438
|
- Fix typo in DataFrame.md
|
296
439
|
|
440
|
+
- Github site
|
441
|
+
- Add gem and status badges in README. (#42) [Patch by kojix2]
|
442
|
+
|
443
|
+
- Thanks
|
444
|
+
|
445
|
+
- kojix2
|
446
|
+
|
297
447
|
## [0.1.5] - 2022-06-12 (experimental)
|
298
448
|
|
299
449
|
- Bug fixes
|
data/Gemfile
CHANGED
@@ -7,7 +7,7 @@ gemspec
|
|
7
7
|
group :test do
|
8
8
|
gem 'rake'
|
9
9
|
|
10
|
-
gem 'red-parquet', '
|
10
|
+
gem 'red-parquet', '~> 10.0.0'
|
11
11
|
gem 'rover-df', '~> 0.3.0'
|
12
12
|
|
13
13
|
gem 'rubocop'
|
@@ -21,5 +21,7 @@ group :test do
|
|
21
21
|
gem 'yard'
|
22
22
|
|
23
23
|
gem 'benchmark_driver'
|
24
|
-
gem 'red-
|
24
|
+
gem 'red-arrow-numo-narray'
|
25
|
+
gem 'red-datasets-arrow'
|
26
|
+
gem 'simplecov'
|
25
27
|
end
|