red_amber 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +31 -7
  3. data/CHANGELOG.md +214 -10
  4. data/Gemfile +4 -0
  5. data/README.md +117 -342
  6. data/benchmark/csv_load_penguins.yml +15 -0
  7. data/benchmark/drop_nil.yml +11 -0
  8. data/doc/DataFrame.md +854 -0
  9. data/doc/Vector.md +449 -0
  10. data/doc/image/arrow_table_new.png +0 -0
  11. data/doc/image/dataframe/assign.png +0 -0
  12. data/doc/image/dataframe/drop.png +0 -0
  13. data/doc/image/dataframe/pick.png +0 -0
  14. data/doc/image/dataframe/remove.png +0 -0
  15. data/doc/image/dataframe/rename.png +0 -0
  16. data/doc/image/dataframe/slice.png +0 -0
  17. data/doc/image/dataframe_model.png +0 -0
  18. data/doc/image/example_in_red_arrow.png +0 -0
  19. data/doc/image/tdr.png +0 -0
  20. data/doc/image/tdr_and_table.png +0 -0
  21. data/doc/image/tidy_data_in_TDR.png +0 -0
  22. data/doc/image/vector/binary_element_wise.png +0 -0
  23. data/doc/image/vector/unary_aggregation.png +0 -0
  24. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  25. data/doc/image/vector/unary_element_wise.png +0 -0
  26. data/doc/tdr.md +56 -0
  27. data/doc/tdr_ja.md +56 -0
  28. data/lib/red-amber.rb +27 -0
  29. data/lib/red_amber/data_frame.rb +91 -37
  30. data/lib/red_amber/{data_frame_output.rb → data_frame_displayable.rb} +49 -41
  31. data/lib/red_amber/data_frame_indexable.rb +38 -0
  32. data/lib/red_amber/data_frame_observation_operation.rb +11 -0
  33. data/lib/red_amber/data_frame_selectable.rb +155 -48
  34. data/lib/red_amber/data_frame_variable_operation.rb +137 -0
  35. data/lib/red_amber/helper.rb +61 -0
  36. data/lib/red_amber/vector.rb +69 -16
  37. data/lib/red_amber/vector_functions.rb +80 -45
  38. data/lib/red_amber/vector_selectable.rb +124 -0
  39. data/lib/red_amber/vector_updatable.rb +104 -0
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -16
  42. data/red_amber.gemspec +3 -6
  43. metadata +38 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0308ff686bf7b49b767b7cd28ddc068e02170c00c093dcd42c7187e438e0adf3'
4
- data.tar.gz: 98397e31bce1a440e951357d5d3b475814a6ecc08f21a0908c0fdf58c6189be4
3
+ metadata.gz: ae6a6696e0f01ae7d621d11542e203803ba117fc1ee3d286a1444b3c4ac746fc
4
+ data.tar.gz: 722d4ad538fe4f0c85db4911e773e1f87eb03f47fa63c954529bf04babc55d8c
5
5
  SHA512:
6
- metadata.gz: 7ad71d8259d04535d08567bde6ca0fc419e0d9de15d1e812dbc642fb3901f1c744c69766dbf409e876212e426e309ac0032968b767df3a960a8e6eb40d4f3c19
7
- data.tar.gz: eee78ae4316b007d95714d6e2920ad32518497942d9cd5adb373476321e6f9e6e8099f9c721ee8bac05df2617fb3f3c747ce92ec74b1cb84da0b0bd4664051cf
6
+ metadata.gz: 96887abfbdd44330e80a6a97f91597c00706fc99492d086683702f1d3e757331e90fb275e5796a2b7b3228b476f8c7799ab22727411baedb79ae39acafd2d3f0
7
+ data.tar.gz: c020bba60734fccdeb4a18efecb98260f70fa76c5b8ba2c7f2830d2dac6de66e9776a1fa2ffc5d396e7270b29c5e4dd9737dee70b5dcc47dd3113aaafe4f4d22
data/.rubocop.yml CHANGED
@@ -53,9 +53,12 @@ Layout/LineLength:
53
53
  # 18..30 unsatisfactory
54
54
  # > 30 dangerous
55
55
  Metrics/AbcSize:
56
- Max: 23
56
+ Max: 30
57
57
  Exclude:
58
- - 'lib/red_amber/data_frame_output.rb' # Max: 51
58
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 55
59
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 51
60
+ - 'lib/red_amber/vector_updatable.rb' # Max: 36
61
+ - 'lib/red_amber/vector_selectable.rb' # Max: 33
59
62
 
60
63
  # Max: 25
61
64
  Metrics/BlockLength:
@@ -68,33 +71,54 @@ Metrics/ClassLength:
68
71
  Max: 100
69
72
  Exclude:
70
73
  - 'test/**/*'
74
+ - 'lib/red_amber/data_frame.rb' #Max: 131
75
+ - 'lib/red_amber/vector.rb' #Max: 102
71
76
 
72
77
  # Max: 7
73
78
  Metrics/CyclomaticComplexity:
74
- Max: 10
79
+ Max: 12
80
+ Exclude:
81
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
82
+ - 'lib/red_amber/vector_updatable.rb' # Max: 14
75
83
 
76
84
  # Max: 10
77
85
  Metrics/MethodLength:
78
- Max: 18
86
+ Max: 30
79
87
  Exclude:
80
- - 'lib/red_amber/data_frame_output.rb' # Max: 31
88
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 33
81
89
 
82
90
  # Max: 100
83
91
  Metrics/ModuleLength:
84
92
  Max: 100
85
93
  Exclude:
94
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 141
86
95
  - 'lib/red_amber/vector_functions.rb' # Max: 114
87
96
 
88
97
  # Max: 8
89
98
  Metrics/PerceivedComplexity:
90
- Max: 11
99
+ Max: 13
100
+ Exclude:
101
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
102
+ - 'lib/red_amber/vector_updatable.rb' # Max: 15
103
+
104
+ Naming/FileName:
105
+ Exclude:
106
+ - 'lib/red-amber.rb'
91
107
 
92
- # Necessary to define is_na
108
+ # Necessary to define is_na, is_in, etc.
93
109
  Naming/PredicateName:
94
110
  Exclude:
95
111
  - 'lib/red_amber/vector_functions.rb'
112
+ - 'lib/red_amber/vector.rb'
113
+ - 'lib/red_amber/vector_selectable.rb'
96
114
 
97
115
  # Necessary to test when range.end == -1
98
116
  Style/SlicingWithRange:
99
117
  Exclude:
100
118
  - 'test/test_data_frame_selectable.rb'
119
+
120
+ # Necessary to Vector < 0 element-wise comparison
121
+ Style/NumericPredicate:
122
+ Exclude:
123
+ - 'lib/red_amber/data_frame_selectable.rb'
124
+ - 'lib/red_amber/vector_selectable.rb'
data/CHANGELOG.md CHANGED
@@ -1,17 +1,221 @@
1
- ## [0.1.4] - Unreleased
1
+ ## - unreleased
2
2
 
3
- - Prepare documents for the 'Transposed DataFrame Representation'
4
- - Feedback to Red Arrow
5
- - Separate documents
3
+ - Document
4
+ - YARD support
5
+
6
+ - `datasets-red-amber` gem
7
+ - `red-amber` gem
8
+
9
+ - `Vector#divmod`
10
+ - Introduce if Arrow's function is ready
11
+
12
+ ## - Unreleased, will be after Arrow 9.0.0 released
6
13
 
7
14
  - `DataFrame`
8
- - Introduce updating capabilities
9
- - Introduce NA support
10
- - Add slice method
15
+ - Introduce `summary` or ``describe`
16
+ - `Quantile` will be available
17
+
18
+ ## [0.1.7] - Unreleased, may be 2022-07-10
19
+
20
+ - Feedback something to Red Data Tools
21
+ - Support more functions
22
+ - Improve as more performant
23
+ - More examples of frequently needed tasks
24
+
25
+ - New `Group` API
26
+ - `DataFrame#join features
27
+
28
+ ## [0.1.6] - 2022-06-26 (experimental)
29
+
30
+ - Bug fixes
31
+ - Fix mime-type of empty DataFrame in `#to_iruby` (#31)
32
+ - Fix mime setting in `DataFrame#to_iruby` (#36)
33
+ - Fix unmatched return val in Selectable (#34)
34
+ - Fix to return same error as `#[]` in `DataFrame#slice` (#34)
35
+
36
+ - New features and improvements
37
+ - Introduce Jupyter support (#29, #30, #31, #32)
38
+ - Add `DataFrame#to_html (changed to use #to_iruby)
39
+ - Add feature to show nil in to_iruby
40
+ - nil is expressed as (nil)
41
+ - empty string('') is ""
42
+ - blank spaces are " "
43
+
44
+ - Enable to change DataFrame display mode by ENV (#36)
45
+ - Support ENV['RED_AMBER_OUTPUT_STYLE'] to change display mode in `#inspect` and `#to_iruby`
46
+ - ENV['RED_AMBER_OUTPUT_STYLE'] = 'table' # => Table mode
47
+ - ENV['RED_AMBER_OUTPUT_STYLE'] = nil or other than 'table' # => TDR mode
48
+
49
+ - Support `require 'red-amber'`, as well (#34)
50
+
51
+ - Refine Vector slicing methods (#31)
52
+ - Introduce `Vector#take` method
53
+ - Introduce `Vector#filter` method
54
+ - Improve `Vector#[]` to overload take and filter
55
+ - Introduce `Vector#drop_nil` method
56
+ - Introduce `Vector#if_else` method
57
+ - Intorduce `Vector#is_in` method
58
+ - Add alias `Vector#all?`, `#any?` methods (#32)
59
+ - Add `Vector#has_nil?` method(#32)
60
+ - Add `Vector#empty?` method
61
+ - Add `Vector#primitive_invert` method
62
+ - Refactor `Vector#take`, `#filter`
63
+ - Move `Vector#if_else` from function to Updatable
64
+ - Move if_else test to updatable
65
+ - Rename updatable in test
66
+ - Remove method `Vector#take_out_element_wise`
67
+ - Rename inner metthod name
68
+
69
+ - Refine DataFrame slicing methods (#31)
70
+ - Introduce `DataFrame#take method
71
+ - #take is implemented as vector calculation by #if_else
72
+ - Introduce `DataFrame#fliter method
73
+ - Change `DataFrame#[] to use take and filter
74
+ - Float indices is acceptable (#10)
75
+ - Negative index (like Array) is also acceptable
76
+
77
+ - Further refinement in DataFrame slicing methods (#34)
78
+ - Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
79
+ - It parses arguments to Vector internally.
80
+ - Used Kernel#Array to simplify code (#16) .
81
+ - recycle: Move `DataFrame#slice`, `#remove` to Selectable
82
+ - Refine `DataFrame#take`, `#filter` (undocumented)
83
+
84
+ - Introduce coerce in Vector (#35)
85
+ - Introduce `Vector#coerce`
86
+ - Now we can `-1 * Vector.new([1, 2, 3])`
87
+ - Add `Vector#to_ary` method
88
+ - Now we can `[1, 2] + Vector.new([3, 4, 5])`
89
+
90
+ - Other new feature or refinements
91
+ - Common
92
+ - Refactor helper as common for DataFrame and Vector (#35)
93
+ - Change name row/col to obs/var (#34)
94
+ - Rename internal function name (#34)
95
+ - Delete unused methods (#34)
96
+ - DataFrame
97
+ - Change to return instance variable in `#to_arrow`, `#keys` and `#key_index` (#34)
98
+ - Change to return an Array in `DataFrame#indices` (#35)
99
+ - Vector
100
+ - Introduce `Vector#replace` method
101
+ - Accept Range and expanded Array in `Vector#new`
102
+ - Add `Vector#indices` method (#35)
103
+ - Add `Vector#index` method (#35)
104
+ - Rename VectorCompensable to *Updatable (#33)
105
+
106
+ - Documentation
107
+ - Fix typo in DataFrame.md
108
+
109
+ ## [0.1.5] - 2022-06-12 (experimental)
110
+
111
+ - Bug fixes
112
+ - Fix DataFrame#tdr to display timestamp type (#19)
113
+ - Add TZ setting in CI test to pass temporal tests (#19)
114
+ - Fix example in document of #load(csv_from_URI) (#23)
115
+
116
+ - New features and improvements
117
+ - Improve usability of DataFrame manipulating block (#19)
118
+ - Add `DataFrame#v` to select a Vector
119
+ - Add `DataFrame#variables` method
120
+ - Add `DataFrame#to_arrow`
121
+ - Add instance variables in DataFrame with lazy initialization
122
+ - Add `Vector#key` to get key name
123
+ - Add `Vector#temporal?` to check if temporal type
124
+ - Refine around DataFrame#variables
125
+ - Refine init of instance variables
126
+ - Refine DataFrame#type_classes, Vector#ectortype_class
127
+ - Refine DataFrame#tdr to shorten temporal data
128
+
129
+ - Add supports to make up for missing values (#20)
130
+ - Add VectorArgumentError
131
+ - Add `Vector#replace_with`
132
+ - Add helper function to assert with NaN
133
+ - To assert NaN == NaN
134
+ - Add `Vector#fill_nil_backward`, `Vector#forward`
135
+ - Add `DataFrame#remove_nil` method
136
+ - Change to accept nil as replacement in Vector#replace_with
137
+
138
+ - Introduce index related methods (#22)
139
+ - Add `Vector#sort_indexes` method
140
+ - Add `Vector#uniq` method
141
+ - Add `Vector#tally` and `Vectorvalue_counts` methods
142
+ - Add `DataFrame#sort` method
143
+ - Add `DataFrame#group` method
144
+ - Change to use DataFrame#map_indices in #[]
145
+
146
+ - Add rounding functions with opts (#21)
147
+ - With options :mode and :n_digits
148
+ - :n_digits also can be specified with :multiple option in `Vector#round_to_multiple`
149
+ - `Vector#round`
150
+ - `Vector#ceil`
151
+ - `Vector#floor`
152
+ - `Vector#trunc`
153
+
154
+ - Documentation
155
+ - Update TDR, TDR_ja documents to latest (#18)
156
+ - Refinement and small fix in DataFrame.md (#18)
157
+ - Update README to use more effective example (#18)
158
+ - Delete expired TDR_operations.pdf (#23)
159
+ - Update README and dataframe_model image (#23)
160
+ - Update description about rover-df in README (#23)
161
+ - Add installation of Arrow in README (#23)
162
+
163
+ - Others
164
+ - Tried but cannot use bundler cache in ci test (#17)
165
+ - Bump up requirements to Arrow 8.0.0 (#25)
166
+ - Arrow 7.0.0 with Ubuntu 21.04 causes an fatal error in replace_with_mask function.
167
+ - Update the description of gem (#23)
168
+ - Add benchmark tests (#26)
169
+
170
+ ## [0.1.4] - 2022-05-29 (experimental)
171
+
172
+ - Bug fixes
173
+ - Fix missing support for scalar argument (#1)
174
+ - Fix type name of boolean in DataFrame#types to be same as Vector#type (#6, #7)
175
+ - Fix zero picking to return empty DataFrame (#8)
176
+ - Fix code at both args and a block given (#8)
177
+
178
+ - New features and improvements
179
+ - `DataFrame`
180
+ - Refine module name `Displayable`
181
+ - Rename nrow/ncol methods to `size`/`n_keys` to align with TDR concept (#4)
182
+ - Remain `n_row`/`n_col` for compatibility
183
+ - Rename `ls` method to `tdr` (#4)
184
+ - Add limit option to `tdr`
185
+ - Shorten option name (#11)
186
+ - Introduce `pick` method to create sub DataFrame (#8)
187
+ - Add boolean support (#8)
188
+ - Refactor `pick` (#9)
189
+ - Introduce `drop` method to create sub DataFrame (#8)
190
+ - Add boolean support (#8)
191
+ - Refactor `drop` (#9)
192
+ - Add boolean array support for `[]` (#9)
193
+ - Add `indexes`/`indices` to use with selecting observations (#9)
194
+ - Introduce `slice` method to create sub DataFrame (#8)
195
+ - Refactor `slice` (#9)
196
+ - Introduce `remove` method to create sub DataFrame (#9)
197
+ - Introduce `rename` method to create sub DataFrame (#14)
198
+ - Introduce `assign` method to create sub DataFrame (#14)
199
+ - Improve to call block by instance_eval (#13)
200
+
201
+ - `Vector`
202
+ - Refine `find(function)`
203
+ - Add `min_max` method (#2)
204
+ - Add `std`/`sd` method (ddof=0 version: `stddev`) (#2)
205
+ - Add `var` method (ddof=0 version: `variance`) (#2)
206
+ - Add `VectorFunctions.arrow_doc(func_name)` (temporally)
11
207
 
12
- - `Vector`
13
- - Add NaN support for functions
14
- - Support more functions
208
+ - Documentation
209
+ - Show code in README
210
+ - Change row/column names for **TDR** concept (#4)
211
+ - Add documents about **TDR** concept (#4)
212
+ - Add example about TDR (#4)
213
+ - Separate README to create DataFrame and Vector documents (#12)
214
+ - Add DataFrame model concept image to README (#12)
215
+
216
+ - GitHub site
217
+ - Switched to use merge on GitHub (not to push merged master) (#1)
218
+ - Create lifetime issue #3 to show the goal of this project (#3)
15
219
 
16
220
  ## [0.1.3] - 2022-05-15 (experimental)
17
221
 
data/Gemfile CHANGED
@@ -12,6 +12,10 @@ group :test do
12
12
  gem 'rubocop-rake'
13
13
  gem 'rubocop-rubycw', require: false
14
14
 
15
+ gem 'iruby'
15
16
  gem 'test-unit'
16
17
  gem 'webrick'
18
+
19
+ gem 'benchmark_driver'
20
+ gem 'red-datasets-arrow'
17
21
  end