red_amber 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa6f3c47b47df7271d7d150a800013c7c9d8bd75ca6066f54506c922f12eea09
4
- data.tar.gz: 763f19f54a6508648fe9f1bdd0a11f678a86f554b58b71d7bed66aa5df7df2a7
3
+ metadata.gz: 264e7637475fd01946900335751a1592a3859e9bfa772ecc0800ab05c4d852f0
4
+ data.tar.gz: a57400445419698a66d6b5c94e15fa8c040f2f3930f9fbf75603ffb6e18bd9cf
5
5
  SHA512:
6
- metadata.gz: 433ca52f7a62f055f327e0426426cfd86f563009e4ec4811d7cf8297152309081271b7b7625d39ffa31ecf455d352ee305d76b6d09e4d1dab0d90aa6c2bffb3e
7
- data.tar.gz: 717d8618dd428d165c80420e7c35f1b7f870a059227a91bd5224f67b9cd3b8bdafcaed523fee170524738036cc9b43e914712fa01e88f7eb9ca1f0cc18c98dbf
6
+ metadata.gz: 0fdbcdb732e36bb866a8251800ab3fa1a714fa075234bf8cd516f2542ab6704ebfa429a7177da2bd8cd6fa6eb1158efb0d68f46f43d1dc088a9a0f0debdc5c54
7
+ data.tar.gz: f9c1dffaa157ecf34b0b4fec6c1d7972b4773bbf7a11101a345172d621753cd9fc3818753b329dd2906a506af294d6a96c0180a0fb4dc84c2b54bceef6b520f5
data/.rubocop.yml CHANGED
@@ -44,17 +44,16 @@ Layout/LineLength:
44
44
  Layout/MultilineMethodCallIndentation:
45
45
  EnforcedStyle: indented_relative_to_receiver
46
46
 
47
- # avoid unused variable asignment
48
- Rubycw/Rubycw:
49
- Exclude:
50
- - 'test/**/*'
51
-
52
47
  # Disabled to define Vector operators
53
48
  # Offense count: 38
54
49
  Lint/BinaryOperatorWithIdenticalOperands:
55
50
  Exclude:
56
51
  - 'test/test_vector_binary_element_wise.rb'
57
52
 
53
+ Lint/Debugger:
54
+ Exclude:
55
+ - 'bin/example'
56
+
58
57
  # Need for test with empty block
59
58
  # Offense count: 1
60
59
  # Configuration parameters: AllowComments, AllowEmptyLambdas.
@@ -87,6 +86,7 @@ Metrics/AbcSize:
87
86
  'drop', # 31.42
88
87
  '[]', # 33.76
89
88
  'split', # 37.35
89
+ 'aggregate', # 38.13
90
90
  ]
91
91
 
92
92
  # Max: 25
@@ -110,6 +110,7 @@ Metrics/ClassLength:
110
110
  - 'lib/red_amber/group.rb' # 105
111
111
  - 'lib/red_amber/subframes.rb' # 110
112
112
  - 'lib/red_amber/vector.rb' # 152
113
+ - 'lib/red_amber/vector_binary_element_wise.rb' # 109
113
114
 
114
115
  # Only for monitoring. I will measure by PerceivedComplexity.
115
116
  # Max: 7
@@ -127,6 +128,8 @@ Metrics/CyclomaticComplexity:
127
128
  'parse_range', # 14
128
129
  'remove', # 14
129
130
  '[]', # 13
131
+ 'drop', # 13
132
+ 'aggregate', # 13
130
133
  ]
131
134
 
132
135
  # Max: 10
@@ -140,6 +143,7 @@ Metrics/MethodLength:
140
143
  'format_table', # 53
141
144
  'slice_by', # 38
142
145
  'assign_update', # 35
146
+ 'drop', # 32
143
147
  'aggregate', # 31
144
148
  ]
145
149
 
@@ -187,6 +191,7 @@ Metrics/PerceivedComplexity:
187
191
  'filters', # 11
188
192
  'html_table', # 11
189
193
  'slice', # 11
194
+ 'pick', # 11
190
195
  ]
191
196
 
192
197
  # Offense count: 1
@@ -210,6 +215,12 @@ Naming/PredicateName:
210
215
  - 'lib/red_amber/vector_functions.rb'
211
216
  - 'lib/red_amber/vector_selectable.rb'
212
217
 
218
+ # avoid unused variable asignment
219
+ Rubycw/Rubycw:
220
+ Exclude:
221
+ - 'test/**/*'
222
+ - 'bin/example'
223
+
213
224
  # Offense count: 16
214
225
  # This cop supports safe autocorrection (--autocorrect).
215
226
  Style/OperatorMethodCall:
@@ -223,6 +234,10 @@ Style/SlicingWithRange:
223
234
  Exclude:
224
235
  - 'test/test_data_frame_selectable.rb'
225
236
 
237
+ Style/MixinUsage:
238
+ Exclude:
239
+ - 'bin/example'
240
+
226
241
  # Necessary to Vector < 0 element-wise comparison
227
242
  # Offense count: 5
228
243
  # This cop supports unsafe autocorrection (--autocorrect-all).
data/CHANGELOG.md CHANGED
@@ -1,6 +1,70 @@
1
- ## [0.4.0] - 2023-02-25
1
+ ## [0.4.1] - 2023-03-11
2
+
3
+ - Breaking change
4
+ - Remove Vector.aggregate? method (#200)
5
+
6
+ - Bug fixes
7
+ - Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
8
+ - Return self in DataFrame#rename when renaming to same name (#193)
9
+ - Return self in DataFrame#pick when pick itself (#199)
10
+ - Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
11
+ - This change uses String#width.
12
+ - Fix DataFrame#to_iruby when data is date32 type (#193)
13
+ - Fix DataFrame#shorthand to show temporal type data simply (#193)
14
+ - Fix Vector#rank when data is ChunkedArray (#198)
15
+ - Fix Vector element-wise functions with nil as scalar (#198)
16
+ - Support :force_order for all methods of join family (#199)
17
+ - Supports :force_order option to force sorting after join for all #join familiy.
18
+ - This will valuable in some cases such as large dataframes.
19
+ - Ensure baseframe's schema for SubFrames (#200)
20
+
21
+ - New features and improvements
22
+ - Add Vector#first, #last method (#198)
23
+ - This method will be used in SubFrames feature.
24
+ - Add Vector#modulo method (#198)
25
+ - The divmod function in Arrow C++ is still in draft state.
26
+ This method was created by combining existing functions
27
+ - Add Vector#quotient method (#198)
28
+ - Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
29
+ - Add Vector#*_checked functions (#198)
30
+ - This functions will check numeric range overflow.
31
+ - Add 'tdra' and 'plain' in display mode (#193)
32
+ - The plain mode and default inspect will show up to 128 rows and 128 columns.
33
+ - Add String#width method in refinements (#193)
34
+ - This will be used to update DataFrame#to_s.
35
+ - Introduce pre-loaded REPL environment (#199)
36
+ - This commit will add bin/example and it will start irb environment
37
+ with enabled commonly used datasets such as penguins, diamonds, etc.
38
+ - Upgrade SubFrames#aggregate to accept block (#200)
39
+
40
+ - Refactoring
41
+ - Use symbolized keys in refinements of Table#keys, #key? (#193)
42
+ - This can be treat Tables and DataFrames as same manner.
43
+ - Use key_name.succ in suffix of DataFrame#join (#193)
44
+ - This will make simple to get name candidate.
45
+ - Use ||= to memorize instance variables (#193)
46
+ - Refine vector projection to use #variables (#193)
47
+ - #variables is fastest when picking Vectors.
48
+ - Refine Vector#is_in to avoid #pack (#198)
49
+ - Refine Vector#index (#198)
50
+
51
+ - Improve in tests/CI
52
+ - Tests
53
+ - Update benchmarks to test from older version (#193)
54
+ - Refine test of Vector function with scalar (#198)
55
+ - Refine test subframes and test_vector_selectable (#200)
56
+
57
+ - Cops
58
+ - CI
59
+
60
+ - Documentation
61
+ - Update documents(small fix) (#201)
2
62
 
3
- :memo: Update documents for consistency
63
+ - GitHub site
64
+
65
+ - Thanks
66
+
67
+ ## [0.4.0] - 2023-02-25
4
68
 
5
69
  - Breaking change
6
70
  - Upgrade dependency to Arrow 11.0.0 (#188)
@@ -73,7 +137,8 @@
73
137
  - CI
74
138
  - Fix setting up Arrow by homebrew in CI (#167)
75
139
  - Fix CI error on homebrew deleting python link (#167)
76
- - Set cache-version to get new C extensions in CI (#173) Thanks to @kou for suggestion.
140
+ - Set cache-version to get new C extensions in CI (#173)
141
+ - Thanks to @kou for suggestion.
77
142
 
78
143
  - Documentation
79
144
  - Update DataFrame.md about loading csv without headers (#165)
data/README.md CHANGED
@@ -18,7 +18,7 @@ A simple dataframe library for Ruby.
18
18
  ## Requirements
19
19
  ### Ruby
20
20
  Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
21
- - I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
21
+ - I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
22
22
 
23
23
  ### Libraries
24
24
  ```ruby
@@ -29,7 +29,7 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
29
29
 
30
30
  ## Installation
31
31
 
32
- Install requirements before you install Red Amber.
32
+ Install requirements before you install RedAmber.
33
33
 
34
34
  - Apache Arrow (~> 11.0.0)
35
35
  - Apache Arrow GLib (~> 11.0.0)
@@ -88,12 +88,12 @@ Also you can try the contents of this README interactively by [Binder](https://m
88
88
  Comparison of basic features of RedAmber with Python
89
89
  [pandas](https://pandas.pydata.org/),
90
90
  R [Tidyverse](https://www.tidyverse.org/) and
91
- Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [here](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
91
+ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
92
92
 
93
93
  ## Data frame in `RedAmber`
94
94
 
95
95
  Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
96
- The entity is a Red Arrow's Table object.
96
+ Its entity is a Red Arrow's Table object.
97
97
 
98
98
  ![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
99
99
 
@@ -115,7 +115,7 @@ then
115
115
  require 'datasets-arrow' # to load sample data
116
116
 
117
117
  dataset = Datasets::Diamonds.new
118
- diamonds = DataFrame.new(dataset) # from v0.2.2, should be `dataset.to_arrow` if older.
118
+ diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
119
119
 
120
120
  # =>
121
121
  #<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
@@ -174,7 +174,7 @@ df.rename('mean(price)': :mean_price_USD)
174
174
 
175
175
  ### Example: starwars dataset
176
176
 
177
- Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleansing.
177
+ Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
178
178
 
179
179
  ```ruby
180
180
  uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
data/benchmark/basic.yml CHANGED
@@ -1,18 +1,18 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
4
+ - name: 0.1.5
8
5
  gems:
9
- red_amber: 0.3.0
6
+ red_amber: 0.1.5
10
7
  - name: 0.2.0
11
8
  gems:
12
9
  red_amber: 0.2.0
13
- - name: 0.1.5
10
+ - name: 0.3.0
14
11
  gems:
15
- red_amber: 0.1.5
12
+ red_amber: 0.3.0
13
+ - name: HEAD
14
+ prelude: |
15
+ $LOAD_PATH.unshift(File.expand_path('lib'))
16
16
 
17
17
  prelude: |
18
18
  require 'red_amber'
@@ -1,12 +1,12 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
4
  - name: 0.3.0
8
5
  gems:
9
6
  red_amber: 0.3.0
7
+ - name: HEAD
8
+ prelude: |
9
+ $LOAD_PATH.unshift(File.expand_path('lib'))
10
10
 
11
11
  prelude: |
12
12
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.0
11
5
  gems:
12
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -19,8 +19,14 @@ prelude: |
19
19
 
20
20
  starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
21
21
 
22
- uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
23
- import_cars = RedAmber::DataFrame.load(uri)
22
+ import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
23
+ Year Audi BMW BMW_MINI Mercedes-Benz VW
24
+ 2017 28336 52527 25427 68221 49040
25
+ 2018 26473 50982 25984 67554 51961
26
+ 2019 24222 46814 23813 66553 46794
27
+ 2020 22304 35712 20196 57041 36576
28
+ 2021 22535 35905 18211 51722 35215
29
+ TSV
24
30
 
25
31
  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
32
  simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
@@ -43,7 +49,7 @@ benchmark:
43
49
  .group(:species) { [count(:species), mean(:height, :mass)] }
44
50
  .slice { v(:count) > 1 }
45
51
 
46
- 'D03: Inport cars test': |
52
+ 'D03: Import cars test': |
47
53
  import_cars
48
54
  .to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
49
55
  .to_wide(name: :Manufacturer, value: :Num_of_imported)
data/benchmark/group.yml CHANGED
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
data/benchmark/vector.yml CHANGED
@@ -1,15 +1,15 @@
1
1
  loop_count: 10
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.0
11
5
  gems:
12
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -33,6 +33,23 @@ module RedAmber
33
33
  instance.instance_variable_set(:@table, table)
34
34
  instance
35
35
  end
36
+
37
+ # Return new DataFrame for specified schema and value.
38
+ #
39
+ # @param dataframe_for_schema [Dataframe]
40
+ # schema of this dataframe will be used.
41
+ # @param dataframe_for_value [DataFrame]
42
+ # column values of thes dataframe will be used.
43
+ # @return [DataFrame]
44
+ # created DataFrame.
45
+ # @since 0.4.1
46
+ #
47
+ def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
48
+ DataFrame.create(
49
+ Arrow::Table.new(dataframe_for_schema.table.schema,
50
+ dataframe_for_value.table.columns)
51
+ )
52
+ end
36
53
  end
37
54
 
38
55
  # Creates a new DataFrame.
@@ -194,7 +211,7 @@ module RedAmber
194
211
  # `key => Vector` pairs for each columns.
195
212
  #
196
213
  def variables
197
- @variables || @variables = init_instance_vars(:variables)
214
+ @variables ||= init_instance_vars(:variables)
198
215
  end
199
216
  alias_method :vars, :variables
200
217
 
@@ -204,7 +221,7 @@ module RedAmber
204
221
  # keys in an Array.
205
222
  #
206
223
  def keys
207
- @keys || @keys = init_instance_vars(:keys)
224
+ @keys ||= init_instance_vars(:keys)
208
225
  end
209
226
  alias_method :column_names, :keys
210
227
  alias_method :var_names, :keys
@@ -240,7 +257,7 @@ module RedAmber
240
257
  # abbreviated Red Arrow data type names.
241
258
  #
242
259
  def types
243
- @types || @types = @table.columns.map do |column|
260
+ @types ||= @table.columns.map do |column|
244
261
  column.data.value_type.nick.to_sym
245
262
  end
246
263
  end
@@ -251,7 +268,7 @@ module RedAmber
251
268
  # an Array of Red Arrow data type Classes.
252
269
  #
253
270
  def type_classes
254
- @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
271
+ @type_classes ||= @table.columns.map { |column| column.data_type.class }
255
272
  end
256
273
 
257
274
  # Returns Vectors in an Array.
@@ -260,7 +277,7 @@ module RedAmber
260
277
  # an Array of Vector.
261
278
  #
262
279
  def vectors
263
- @vectors || @vectors = init_instance_vars(:vectors)
280
+ @vectors ||= init_instance_vars(:vectors)
264
281
  end
265
282
 
266
283
  # Returns column-oriented data in a Hash.
@@ -682,7 +699,7 @@ module RedAmber
682
699
 
683
700
  # Catch variable (column) key as method name.
684
701
  def method_missing(name, *args, &block)
685
- return v(name) if args.empty? && key?(name)
702
+ return variables[name] if args.empty? && key?(name)
686
703
 
687
704
  super
688
705
  end
@@ -723,11 +740,9 @@ module RedAmber
723
740
  end
724
741
 
725
742
  def name_unnamed_keys
726
- return unless @table.key?('')
743
+ return unless @table.key?(:'')
727
744
 
728
- # We can't use #keys because it causes mismatch of @table and @keys
729
- keys = @table.schema.fields.map { |f| f.name.to_sym }
730
- unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
745
+ unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
731
746
  fields =
732
747
  @table.schema.fields.map do |field|
733
748
  if field.name.empty?