red_amber 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +33 -5
  3. data/.rubocop_todo.yml +2 -15
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +164 -18
  6. data/Gemfile +6 -1
  7. data/README.md +247 -33
  8. data/Rakefile +1 -0
  9. data/benchmark/csv_load_penguins.yml +1 -1
  10. data/doc/DataFrame.md +383 -219
  11. data/doc/Vector.md +247 -37
  12. data/doc/examples_of_red_amber.ipynb +5454 -0
  13. data/doc/image/dataframe/assign.png +0 -0
  14. data/doc/image/dataframe/drop.png +0 -0
  15. data/doc/image/dataframe/pick.png +0 -0
  16. data/doc/image/dataframe/remove.png +0 -0
  17. data/doc/image/dataframe/rename.png +0 -0
  18. data/doc/image/dataframe/slice.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/vector/binary_element_wise.png +0 -0
  21. data/doc/image/vector/unary_aggregation.png +0 -0
  22. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  23. data/doc/image/vector/unary_element_wise.png +0 -0
  24. data/lib/red-amber.rb +3 -0
  25. data/lib/red_amber/data_frame.rb +62 -10
  26. data/lib/red_amber/data_frame_displayable.rb +86 -9
  27. data/lib/red_amber/data_frame_selectable.rb +151 -32
  28. data/lib/red_amber/data_frame_variable_operation.rb +4 -0
  29. data/lib/red_amber/group.rb +59 -0
  30. data/lib/red_amber/helper.rb +61 -0
  31. data/lib/red_amber/vector.rb +59 -15
  32. data/lib/red_amber/vector_functions.rb +47 -38
  33. data/lib/red_amber/vector_selectable.rb +126 -0
  34. data/lib/red_amber/vector_updatable.rb +125 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +6 -3
  37. data/red_amber.gemspec +0 -2
  38. metadata +9 -33
  39. data/lib/red_amber/data_frame_helper.rb +0 -64
  40. data/lib/red_amber/data_frame_observation_operation.rb +0 -83
  41. data/lib/red_amber/vector_compensable.rb +0 -68
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4d18eedf5de7fd06fe52e8a82ad38fe12d590dc10929c96872e557b9e946f785
4
- data.tar.gz: dda93f0af421096410e00ecf2261e8846a236634bd96ae9941d1b5cd49cd5eb2
3
+ metadata.gz: 3853e70f378cac65013a3bcfc51a2d55cb70cc494f3f3b70675bed944cc15b49
4
+ data.tar.gz: 3c65999cf978f1edf8c2c7fcce9a0ccb192d4da051f34fa0bf3f66ddc178eb1c
5
5
  SHA512:
6
- metadata.gz: 7c1b1edd6c1f6f3f275ea765c4bc8765327c88a36120a4c5a66dd8afa59f5913db4a5b436d80378554e03403bab823edf7467beea0f44e2803e36f3e9677a065
7
- data.tar.gz: 949fd15d2076d4e53fb141375bde282228c7f6566e137047344134c54964fe77fd2f9757b0bdc324eb3cfa14091f2ae928e0e844d28f3ebbcfa17fc7d388bbd0
6
+ metadata.gz: fac66ba0bf5955cfe0d21a51b90ec16407182b9053e9b586dfe9f8e2526de4e90efecdd8eba1e8b3c99b12fc44544c82fb2f6af4b666b97876a64a6ee4deedf1
7
+ data.tar.gz: 1a4cc526ce9f097438f2b7d018552a4cd6aaa2d900012297cd1777c4b9e39063cc2988af91c138e93f291a56175aefb6a6b00c211f9b9c5bd38d75d6bc40acb9
data/.rubocop.yml CHANGED
@@ -43,6 +43,11 @@ Lint/BinaryOperatorWithIdenticalOperands:
43
43
  Exclude:
44
44
  - 'test/test_vector_function.rb'
45
45
 
46
+ # Need for test with empty block
47
+ Lint/EmptyBlock:
48
+ Exclude:
49
+ - 'test/test_group.rb'
50
+
46
51
  # Max: 120
47
52
  Layout/LineLength:
48
53
  Max: 118
@@ -56,7 +61,9 @@ Metrics/AbcSize:
56
61
  Max: 30
57
62
  Exclude:
58
63
  - 'lib/red_amber/data_frame_displayable.rb' # Max: 55
59
- - 'lib/red_amber/vector_compensable.rb' # Max: 36
64
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 51
65
+ - 'lib/red_amber/vector_updatable.rb' # Max: 36
66
+ - 'lib/red_amber/vector_selectable.rb' # Max: 33
60
67
 
61
68
  # Max: 25
62
69
  Metrics/BlockLength:
@@ -66,15 +73,20 @@ Metrics/BlockLength:
66
73
 
67
74
  # Max: 100
68
75
  Metrics/ClassLength:
69
- Max: 120
76
+ Max: 100
70
77
  Exclude:
71
78
  - 'test/**/*'
79
+ - 'lib/red_amber/data_frame.rb' #Max: 131
80
+ - 'lib/red_amber/vector.rb' #Max: 102
72
81
 
73
82
  # Max: 7
74
83
  Metrics/CyclomaticComplexity:
75
84
  Max: 12
76
85
  Exclude:
77
- - 'lib/red_amber/vector_compensable.rb' # Max: 14
86
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
87
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
88
+ - 'lib/red_amber/vector_selectable.rb' # Max: 13
89
+ - 'lib/red_amber/vector_updatable.rb' # Max: 14
78
90
 
79
91
  # Max: 10
80
92
  Metrics/MethodLength:
@@ -86,20 +98,36 @@ Metrics/MethodLength:
86
98
  Metrics/ModuleLength:
87
99
  Max: 100
88
100
  Exclude:
101
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 141
89
102
  - 'lib/red_amber/vector_functions.rb' # Max: 114
103
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 132
90
104
 
91
105
  # Max: 8
92
106
  Metrics/PerceivedComplexity:
93
107
  Max: 13
94
108
  Exclude:
95
- - 'lib/red_amber/vector_compensable.rb' # Max: 15
109
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
110
+ - 'lib/red_amber/vector_updatable.rb' # Max: 15
111
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 19
96
112
 
97
- # Necessary to define is_na
113
+ Naming/FileName:
114
+ Exclude:
115
+ - 'lib/red-amber.rb'
116
+
117
+ # Necessary to define is_na, is_in, etc.
98
118
  Naming/PredicateName:
99
119
  Exclude:
100
120
  - 'lib/red_amber/vector_functions.rb'
121
+ - 'lib/red_amber/vector.rb'
122
+ - 'lib/red_amber/vector_selectable.rb'
101
123
 
102
124
  # Necessary to test when range.end == -1
103
125
  Style/SlicingWithRange:
104
126
  Exclude:
105
127
  - 'test/test_data_frame_selectable.rb'
128
+
129
+ # Necessary to Vector < 0 element-wise comparison
130
+ Style/NumericPredicate:
131
+ Exclude:
132
+ - 'lib/red_amber/data_frame_selectable.rb'
133
+ - 'lib/red_amber/vector_selectable.rb'
data/.rubocop_todo.yml CHANGED
@@ -1,15 +1,2 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 1
10
- # This cop supports unsafe auto-correction (--auto-correct-all).
11
- # Configuration parameters: EnforcedStyle.
12
- # SupportedStyles: forbid_for_all_comparison_operators, forbid_for_equality_operators_only, require_for_all_comparison_operators, require_for_equality_operators_only
13
- Style/YodaCondition:
14
- Exclude:
15
- - 'lib/red_amber/data_frame.rb'
1
+ # We will use cops to detect bugs in an early stage
2
+ # Feel free to use .rubocop_todo.yml by --auto-gen-config
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --output-dir doc/yard
data/CHANGELOG.md CHANGED
@@ -1,30 +1,176 @@
1
- ## [0.2.0] - unreleased
1
+ ## [0.1.9] - Unreleased
2
2
 
3
- - Document
4
- - YARD support
3
+ - Supports Arrow 9.0.0
5
4
 
6
- - DataFrame#join features
5
+ ## [0.1.8] - 2022-08-04 (experimental)
7
6
 
8
- ## [0.1.6] - Unreleased
7
+ - Bug fixes
8
+
9
+ - Fix unnamed column in table formatter (#52)
10
+ - Fix DataFrame#key?, DataFrame#key_index when @keys.nil? (#52)
11
+ - Align order of replacer in Vector#replace (#53, resolved #38)
12
+
13
+ - New features and improvements
14
+
15
+ - Refine DataFrame.new for empty arguments (#50)
16
+ - Delete .rubocop_todo.yml for not to use yoda condition (#50)
17
+
18
+ - Refine Group (#52, resolved #28)
19
+ - Refine Group methods creation
20
+ - Make group key at first(left)
21
+ - Show only one group count when same counts
22
+ - Add block acceptability for group
23
+ - Rename empty key to :unnamed in DataFrame.new
24
+ - Rename Group#aggregated_by to #summarize (#54)
25
+
26
+ - Add Vector#shift (#51)
27
+
28
+ - Vector#[] accepts Range as an argument (#51)
29
+
30
+ - Update documents
31
+
32
+ - Add support for yard (#54)
9
33
 
10
- - Feedback something to Red Data Tools
34
+ - Renew jupyter notebook '53 examples' (#54)
11
35
 
12
- - `DataFrame`
13
- - Introduce `summary` or ``describe`
14
- - Add `Quantile` by own code?
15
- - Improve dataframe obs. manipuration methods to accept float as a index (#10)
16
- - Improve as more performant by benchmark check.
36
+ - Add more examples and images in README (#52)
37
+ - Add document of group manipulations in README (#52)
38
+ - Renew DF#group document in DataFrame.md (#52)
39
+
40
+ ## [0.1.7] - 2022-07-15 (experimental)
41
+
42
+ - Bug fixes
43
+
44
+ - Remove development dependency for red-dataset-arrow (#47)
45
+ - To avoid irregular fails in CI test
46
+ - Add red-datasets to development dependency instead (#49)
47
+
48
+ - Supress useless log in tests (#46)
49
+ Suppress log of Webrick and iruby.
50
+
51
+ - New features and improvements
52
+
53
+ - Use Table mode as default preview mode in `inspect`/`to_s` (#40)
54
+ - Show examples in documents in Table
55
+ - Use the word rows/columns
56
+ - Update images of data processing in Table style
57
+
58
+ - Introduce a new Table formatter (#47)
59
+ - Migrate from the Arrow's formatter
60
+ - Do not use TAB, format by spaces only.
61
+ - Align column width with head rows and tail rows.
62
+ - Show nils.
63
+ - Show data types.
64
+ - Refine documents to use new formatter output
65
+
66
+ - Simplify options of Vector functions (#46)
67
+ Vector functions with options use optional argument opt in previous code.
17
68
 
18
- - `Vector`
19
- - Support more functions
20
- - Support coerece
69
+ - Add `#float?`, `#integer?` to Vector (#46)
70
+ - Add `#each` to Vector (#47)
21
71
 
22
- - More examples of frequently needed tasks
72
+ - Introduce class `Group` (#48)
73
+ - Refine `DataFrame#group` to use class Group
74
+ - Add methods to Group
75
+
76
+ - Move parquet and rover to development dependency (#49)
77
+
78
+ - Refine text in `DataFrame#to_iruby` (#40)
79
+
80
+ - Add badges in Github site
81
+ - Gitter badge for Red Data Tools (#42)
82
+ - Gem version and CI status badge (#45)
83
+
84
+ - Exchange containers in red-amber.rb and red_amber.rb (#47)
85
+ - Mainly use red_amber by consistency with the folder name
86
+
87
+ - Add Jupyter notebook '47 Examples of Red Amber' (#49)
88
+
89
+ ## [0.1.6] - 2022-06-26 (experimental)
90
+
91
+ - Bug fixes
92
+ - Fix mime-type of empty DataFrame in `#to_iruby` (#31)
93
+ - Fix mime setting in `DataFrame#to_iruby` (#36)
94
+ - Fix unmatched return val in Selectable (#34)
95
+ - Fix to return same error as `#[]` in `DataFrame#slice` (#34)
96
+
97
+ - New features and improvements
98
+ - Introduce Jupyter support (#29, #30, #31, #32)
99
+ - Add `DataFrame#to_html (changed to use #to_iruby)
100
+ - Add feature to show nil in to_iruby
101
+ - nil is expressed as (nil)
102
+ - empty string('') is ""
103
+ - blank spaces are " "
104
+
105
+ - Enable to change DataFrame display mode by ENV (#36)
106
+ - Support ENV['RED_AMBER_OUTPUT_STYLE'] to change display mode in `#inspect` and `#to_iruby`
107
+ - ENV['RED_AMBER_OUTPUT_STYLE'] = 'table' # => Table mode
108
+ - ENV['RED_AMBER_OUTPUT_STYLE'] = nil or other than 'table' # => TDR mode
109
+
110
+ - Support `require 'red-amber'`, as well (#34)
111
+
112
+ - Refine Vector slicing methods (#31)
113
+ - Introduce `Vector#take` method
114
+ - Introduce `Vector#filter` method
115
+ - Improve `Vector#[]` to overload take and filter
116
+ - Introduce `Vector#drop_nil` method
117
+ - Introduce `Vector#if_else` method
118
+ - Intorduce `Vector#is_in` method
119
+ - Add alias `Vector#all?`, `#any?` methods (#32)
120
+ - Add `Vector#has_nil?` method(#32)
121
+ - Add `Vector#empty?` method
122
+ - Add `Vector#primitive_invert` method
123
+ - Refactor `Vector#take`, `#filter`
124
+ - Move `Vector#if_else` from function to Updatable
125
+ - Move if_else test to updatable
126
+ - Rename updatable in test
127
+ - Remove method `Vector#take_out_element_wise`
128
+ - Rename inner metthod name
129
+
130
+ - Refine DataFrame slicing methods (#31)
131
+ - Introduce `DataFrame#take method
132
+ - #take is implemented as vector calculation by #if_else
133
+ - Introduce `DataFrame#fliter method
134
+ - Change `DataFrame#[] to use take and filter
135
+ - Float indices is acceptable (#10)
136
+ - Negative index (like Array) is also acceptable
137
+
138
+ - Further refinement in DataFrame slicing methods (#34)
139
+ - Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
140
+ - It parses arguments to Vector internally.
141
+ - Used Kernel#Array to simplify code (#16) .
142
+ - recycle: Move `DataFrame#slice`, `#remove` to Selectable
143
+ - Refine `DataFrame#take`, `#filter` (undocumented)
144
+
145
+ - Introduce coerce in Vector (#35)
146
+ - Introduce `Vector#coerce`
147
+ - Now we can `-1 * Vector.new([1, 2, 3])`
148
+ - Add `Vector#to_ary` method
149
+ - Now we can `[1, 2] + Vector.new([3, 4, 5])`
150
+
151
+ - Other new feature or refinements
152
+ - Common
153
+ - Refactor helper as common for DataFrame and Vector (#35)
154
+ - Change name row/col to obs/var (#34)
155
+ - Rename internal function name (#34)
156
+ - Delete unused methods (#34)
157
+ - DataFrame
158
+ - Change to return instance variable in `#to_arrow`, `#keys` and `#key_index` (#34)
159
+ - Change to return an Array in `DataFrame#indices` (#35)
160
+ - Vector
161
+ - Introduce `Vector#replace` method
162
+ - Accept Range and expanded Array in `Vector#new`
163
+ - Add `Vector#indices` method (#35)
164
+ - Add `Vector#index` method (#35)
165
+ - Rename VectorCompensable to *Updatable (#33)
166
+
167
+ - Documentation
168
+ - Fix typo in DataFrame.md
23
169
 
24
170
  ## [0.1.5] - 2022-06-12 (experimental)
25
171
 
26
172
  - Bug fixes
27
- - Fix DF#tdr to display timestamp type (#19)
173
+ - Fix DataFrame#tdr to display timestamp type (#19)
28
174
  - Add TZ setting in CI test to pass temporal tests (#19)
29
175
  - Fix example in document of #load(csv_from_URI) (#23)
30
176
 
@@ -38,7 +184,7 @@
38
184
  - Add `Vector#temporal?` to check if temporal type
39
185
  - Refine around DataFrame#variables
40
186
  - Refine init of instance variables
41
- - Refine DataFrame#type_classes, V#ectortype_class
187
+ - Refine DataFrame#type_classes, Vector#ectortype_class
42
188
  - Refine DataFrame#tdr to shorten temporal data
43
189
 
44
190
  - Add supports to make up for missing values (#20)
@@ -86,7 +232,7 @@
86
232
 
87
233
  - Bug fixes
88
234
  - Fix missing support for scalar argument (#1)
89
- - Fix type name of boolean in DF#types to be same as Vector#type (#6, #7)
235
+ - Fix type name of boolean in DataFrame#types to be same as Vector#type (#6, #7)
90
236
  - Fix zero picking to return empty DataFrame (#8)
91
237
  - Fix code at both args and a block given (#8)
92
238
 
data/Gemfile CHANGED
@@ -7,14 +7,19 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
+ gem 'red-parquet', '>= 8.0.0'
11
+ gem 'rover-df', '~> 0.3.0'
12
+
10
13
  gem 'rubocop'
11
14
  gem 'rubocop-performance', require: false
12
15
  gem 'rubocop-rake'
13
16
  gem 'rubocop-rubycw', require: false
14
17
 
18
+ gem 'iruby'
15
19
  gem 'test-unit'
16
20
  gem 'webrick'
21
+ gem 'yard'
17
22
 
18
23
  gem 'benchmark_driver'
19
- gem 'red-datasets-arrow'
24
+ gem 'red-datasets'
20
25
  end