red_amber 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa6f3c47b47df7271d7d150a800013c7c9d8bd75ca6066f54506c922f12eea09
4
- data.tar.gz: 763f19f54a6508648fe9f1bdd0a11f678a86f554b58b71d7bed66aa5df7df2a7
3
+ metadata.gz: 264e7637475fd01946900335751a1592a3859e9bfa772ecc0800ab05c4d852f0
4
+ data.tar.gz: a57400445419698a66d6b5c94e15fa8c040f2f3930f9fbf75603ffb6e18bd9cf
5
5
  SHA512:
6
- metadata.gz: 433ca52f7a62f055f327e0426426cfd86f563009e4ec4811d7cf8297152309081271b7b7625d39ffa31ecf455d352ee305d76b6d09e4d1dab0d90aa6c2bffb3e
7
- data.tar.gz: 717d8618dd428d165c80420e7c35f1b7f870a059227a91bd5224f67b9cd3b8bdafcaed523fee170524738036cc9b43e914712fa01e88f7eb9ca1f0cc18c98dbf
6
+ metadata.gz: 0fdbcdb732e36bb866a8251800ab3fa1a714fa075234bf8cd516f2542ab6704ebfa429a7177da2bd8cd6fa6eb1158efb0d68f46f43d1dc088a9a0f0debdc5c54
7
+ data.tar.gz: f9c1dffaa157ecf34b0b4fec6c1d7972b4773bbf7a11101a345172d621753cd9fc3818753b329dd2906a506af294d6a96c0180a0fb4dc84c2b54bceef6b520f5
data/.rubocop.yml CHANGED
@@ -44,17 +44,16 @@ Layout/LineLength:
44
44
  Layout/MultilineMethodCallIndentation:
45
45
  EnforcedStyle: indented_relative_to_receiver
46
46
 
47
- # avoid unused variable asignment
48
- Rubycw/Rubycw:
49
- Exclude:
50
- - 'test/**/*'
51
-
52
47
  # Disabled to define Vector operators
53
48
  # Offense count: 38
54
49
  Lint/BinaryOperatorWithIdenticalOperands:
55
50
  Exclude:
56
51
  - 'test/test_vector_binary_element_wise.rb'
57
52
 
53
+ Lint/Debugger:
54
+ Exclude:
55
+ - 'bin/example'
56
+
58
57
  # Need for test with empty block
59
58
  # Offense count: 1
60
59
  # Configuration parameters: AllowComments, AllowEmptyLambdas.
@@ -87,6 +86,7 @@ Metrics/AbcSize:
87
86
  'drop', # 31.42
88
87
  '[]', # 33.76
89
88
  'split', # 37.35
89
+ 'aggregate', # 38.13
90
90
  ]
91
91
 
92
92
  # Max: 25
@@ -110,6 +110,7 @@ Metrics/ClassLength:
110
110
  - 'lib/red_amber/group.rb' # 105
111
111
  - 'lib/red_amber/subframes.rb' # 110
112
112
  - 'lib/red_amber/vector.rb' # 152
113
+ - 'lib/red_amber/vector_binary_element_wise.rb' # 109
113
114
 
114
115
  # Only for monitoring. I will measure by PerceivedComplexity.
115
116
  # Max: 7
@@ -127,6 +128,8 @@ Metrics/CyclomaticComplexity:
127
128
  'parse_range', # 14
128
129
  'remove', # 14
129
130
  '[]', # 13
131
+ 'drop', # 13
132
+ 'aggregate', # 13
130
133
  ]
131
134
 
132
135
  # Max: 10
@@ -140,6 +143,7 @@ Metrics/MethodLength:
140
143
  'format_table', # 53
141
144
  'slice_by', # 38
142
145
  'assign_update', # 35
146
+ 'drop', # 32
143
147
  'aggregate', # 31
144
148
  ]
145
149
 
@@ -187,6 +191,7 @@ Metrics/PerceivedComplexity:
187
191
  'filters', # 11
188
192
  'html_table', # 11
189
193
  'slice', # 11
194
+ 'pick', # 11
190
195
  ]
191
196
 
192
197
  # Offense count: 1
@@ -210,6 +215,12 @@ Naming/PredicateName:
210
215
  - 'lib/red_amber/vector_functions.rb'
211
216
  - 'lib/red_amber/vector_selectable.rb'
212
217
 
218
+ # avoid unused variable asignment
219
+ Rubycw/Rubycw:
220
+ Exclude:
221
+ - 'test/**/*'
222
+ - 'bin/example'
223
+
213
224
  # Offense count: 16
214
225
  # This cop supports safe autocorrection (--autocorrect).
215
226
  Style/OperatorMethodCall:
@@ -223,6 +234,10 @@ Style/SlicingWithRange:
223
234
  Exclude:
224
235
  - 'test/test_data_frame_selectable.rb'
225
236
 
237
+ Style/MixinUsage:
238
+ Exclude:
239
+ - 'bin/example'
240
+
226
241
  # Necessary to Vector < 0 element-wise comparison
227
242
  # Offense count: 5
228
243
  # This cop supports unsafe autocorrection (--autocorrect-all).
data/CHANGELOG.md CHANGED
@@ -1,6 +1,70 @@
1
- ## [0.4.0] - 2023-02-25
1
+ ## [0.4.1] - 2023-03-11
2
+
3
+ - Breaking change
4
+ - Remove Vector.aggregate? method (#200)
5
+
6
+ - Bug fixes
7
+ - Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
8
+ - Return self in DataFrame#rename when renaming to same name (#193)
9
+ - Return self in DataFrame#pick when pick itself (#199)
10
+ - Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
11
+ - This change uses String#width.
12
+ - Fix DataFrame#to_iruby when data is date32 type (#193)
13
+ - Fix DataFrame#shorthand to show temporal type data simply (#193)
14
+ - Fix Vector#rank when data is ChunkedArray (#198)
15
+ - Fix Vector element-wise functions with nil as scalar (#198)
16
+ - Support :force_order for all methods of join family (#199)
17
+ - Supports :force_order option to force sorting after join for all #join familiy.
18
+ - This will valuable in some cases such as large dataframes.
19
+ - Ensure baseframe's schema for SubFrames (#200)
20
+
21
+ - New features and improvements
22
+ - Add Vector#first, #last method (#198)
23
+ - This method will be used in SubFrames feature.
24
+ - Add Vector#modulo method (#198)
25
+ - The divmod function in Arrow C++ is still in draft state.
26
+ This method was created by combining existing functions
27
+ - Add Vector#quotient method (#198)
28
+ - Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
29
+ - Add Vector#*_checked functions (#198)
30
+ - This functions will check numeric range overflow.
31
+ - Add 'tdra' and 'plain' in display mode (#193)
32
+ - The plain mode and default inspect will show up to 128 rows and 128 columns.
33
+ - Add String#width method in refinements (#193)
34
+ - This will be used to update DataFrame#to_s.
35
+ - Introduce pre-loaded REPL environment (#199)
36
+ - This commit will add bin/example and it will start irb environment
37
+ with enabled commonly used datasets such as penguins, diamonds, etc.
38
+ - Upgrade SubFrames#aggregate to accept block (#200)
39
+
40
+ - Refactoring
41
+ - Use symbolized keys in refinements of Table#keys, #key? (#193)
42
+ - This can be treat Tables and DataFrames as same manner.
43
+ - Use key_name.succ in suffix of DataFrame#join (#193)
44
+ - This will make simple to get name candidate.
45
+ - Use ||= to memorize instance variables (#193)
46
+ - Refine vector projection to use #variables (#193)
47
+ - #variables is fastest when picking Vectors.
48
+ - Refine Vector#is_in to avoid #pack (#198)
49
+ - Refine Vector#index (#198)
50
+
51
+ - Improve in tests/CI
52
+ - Tests
53
+ - Update benchmarks to test from older version (#193)
54
+ - Refine test of Vector function with scalar (#198)
55
+ - Refine test subframes and test_vector_selectable (#200)
56
+
57
+ - Cops
58
+ - CI
59
+
60
+ - Documentation
61
+ - Update documents(small fix) (#201)
2
62
 
3
- :memo: Update documents for consistency
63
+ - GitHub site
64
+
65
+ - Thanks
66
+
67
+ ## [0.4.0] - 2023-02-25
4
68
 
5
69
  - Breaking change
6
70
  - Upgrade dependency to Arrow 11.0.0 (#188)
@@ -73,7 +137,8 @@
73
137
  - CI
74
138
  - Fix setting up Arrow by homebrew in CI (#167)
75
139
  - Fix CI error on homebrew deleting python link (#167)
76
- - Set cache-version to get new C extensions in CI (#173) Thanks to @kou for suggestion.
140
+ - Set cache-version to get new C extensions in CI (#173)
141
+ - Thanks to @kou for suggestion.
77
142
 
78
143
  - Documentation
79
144
  - Update DataFrame.md about loading csv without headers (#165)
data/README.md CHANGED
@@ -18,7 +18,7 @@ A simple dataframe library for Ruby.
18
18
  ## Requirements
19
19
  ### Ruby
20
20
  Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
21
- - I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
21
+ - I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
22
22
 
23
23
  ### Libraries
24
24
  ```ruby
@@ -29,7 +29,7 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
29
29
 
30
30
  ## Installation
31
31
 
32
- Install requirements before you install Red Amber.
32
+ Install requirements before you install RedAmber.
33
33
 
34
34
  - Apache Arrow (~> 11.0.0)
35
35
  - Apache Arrow GLib (~> 11.0.0)
@@ -88,12 +88,12 @@ Also you can try the contents of this README interactively by [Binder](https://m
88
88
  Comparison of basic features of RedAmber with Python
89
89
  [pandas](https://pandas.pydata.org/),
90
90
  R [Tidyverse](https://www.tidyverse.org/) and
91
- Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [here](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
91
+ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
92
92
 
93
93
  ## Data frame in `RedAmber`
94
94
 
95
95
  Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
96
- The entity is a Red Arrow's Table object.
96
+ Its entity is a Red Arrow's Table object.
97
97
 
98
98
  ![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
99
99
 
@@ -115,7 +115,7 @@ then
115
115
  require 'datasets-arrow' # to load sample data
116
116
 
117
117
  dataset = Datasets::Diamonds.new
118
- diamonds = DataFrame.new(dataset) # from v0.2.2, should be `dataset.to_arrow` if older.
118
+ diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
119
119
 
120
120
  # =>
121
121
  #<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
@@ -174,7 +174,7 @@ df.rename('mean(price)': :mean_price_USD)
174
174
 
175
175
  ### Example: starwars dataset
176
176
 
177
- Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleansing.
177
+ Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
178
178
 
179
179
  ```ruby
180
180
  uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
data/benchmark/basic.yml CHANGED
@@ -1,18 +1,18 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
4
+ - name: 0.1.5
8
5
  gems:
9
- red_amber: 0.3.0
6
+ red_amber: 0.1.5
10
7
  - name: 0.2.0
11
8
  gems:
12
9
  red_amber: 0.2.0
13
- - name: 0.1.5
10
+ - name: 0.3.0
14
11
  gems:
15
- red_amber: 0.1.5
12
+ red_amber: 0.3.0
13
+ - name: HEAD
14
+ prelude: |
15
+ $LOAD_PATH.unshift(File.expand_path('lib'))
16
16
 
17
17
  prelude: |
18
18
  require 'red_amber'
@@ -1,12 +1,12 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
4
  - name: 0.3.0
8
5
  gems:
9
6
  red_amber: 0.3.0
7
+ - name: HEAD
8
+ prelude: |
9
+ $LOAD_PATH.unshift(File.expand_path('lib'))
10
10
 
11
11
  prelude: |
12
12
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.0
11
5
  gems:
12
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -19,8 +19,14 @@ prelude: |
19
19
 
20
20
  starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
21
21
 
22
- uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
23
- import_cars = RedAmber::DataFrame.load(uri)
22
+ import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
23
+ Year Audi BMW BMW_MINI Mercedes-Benz VW
24
+ 2017 28336 52527 25427 68221 49040
25
+ 2018 26473 50982 25984 67554 51961
26
+ 2019 24222 46814 23813 66553 46794
27
+ 2020 22304 35712 20196 57041 36576
28
+ 2021 22535 35905 18211 51722 35215
29
+ TSV
24
30
 
25
31
  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
32
  simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
@@ -43,7 +49,7 @@ benchmark:
43
49
  .group(:species) { [count(:species), mean(:height, :mass)] }
44
50
  .slice { v(:count) > 1 }
45
51
 
46
- 'D03: Inport cars test': |
52
+ 'D03: Import cars test': |
47
53
  import_cars
48
54
  .to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
49
55
  .to_wide(name: :Manufacturer, value: :Num_of_imported)
data/benchmark/group.yml CHANGED
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
data/benchmark/vector.yml CHANGED
@@ -1,15 +1,15 @@
1
1
  loop_count: 10
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.3.0
8
- gems:
9
- red_amber: 0.3.0
10
4
  - name: 0.2.0
11
5
  gems:
12
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -33,6 +33,23 @@ module RedAmber
33
33
  instance.instance_variable_set(:@table, table)
34
34
  instance
35
35
  end
36
+
37
+ # Return new DataFrame for specified schema and value.
38
+ #
39
+ # @param dataframe_for_schema [Dataframe]
40
+ # schema of this dataframe will be used.
41
+ # @param dataframe_for_value [DataFrame]
42
+ # column values of thes dataframe will be used.
43
+ # @return [DataFrame]
44
+ # created DataFrame.
45
+ # @since 0.4.1
46
+ #
47
+ def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
48
+ DataFrame.create(
49
+ Arrow::Table.new(dataframe_for_schema.table.schema,
50
+ dataframe_for_value.table.columns)
51
+ )
52
+ end
36
53
  end
37
54
 
38
55
  # Creates a new DataFrame.
@@ -194,7 +211,7 @@ module RedAmber
194
211
  # `key => Vector` pairs for each columns.
195
212
  #
196
213
  def variables
197
- @variables || @variables = init_instance_vars(:variables)
214
+ @variables ||= init_instance_vars(:variables)
198
215
  end
199
216
  alias_method :vars, :variables
200
217
 
@@ -204,7 +221,7 @@ module RedAmber
204
221
  # keys in an Array.
205
222
  #
206
223
  def keys
207
- @keys || @keys = init_instance_vars(:keys)
224
+ @keys ||= init_instance_vars(:keys)
208
225
  end
209
226
  alias_method :column_names, :keys
210
227
  alias_method :var_names, :keys
@@ -240,7 +257,7 @@ module RedAmber
240
257
  # abbreviated Red Arrow data type names.
241
258
  #
242
259
  def types
243
- @types || @types = @table.columns.map do |column|
260
+ @types ||= @table.columns.map do |column|
244
261
  column.data.value_type.nick.to_sym
245
262
  end
246
263
  end
@@ -251,7 +268,7 @@ module RedAmber
251
268
  # an Array of Red Arrow data type Classes.
252
269
  #
253
270
  def type_classes
254
- @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
271
+ @type_classes ||= @table.columns.map { |column| column.data_type.class }
255
272
  end
256
273
 
257
274
  # Returns Vectors in an Array.
@@ -260,7 +277,7 @@ module RedAmber
260
277
  # an Array of Vector.
261
278
  #
262
279
  def vectors
263
- @vectors || @vectors = init_instance_vars(:vectors)
280
+ @vectors ||= init_instance_vars(:vectors)
264
281
  end
265
282
 
266
283
  # Returns column-oriented data in a Hash.
@@ -682,7 +699,7 @@ module RedAmber
682
699
 
683
700
  # Catch variable (column) key as method name.
684
701
  def method_missing(name, *args, &block)
685
- return v(name) if args.empty? && key?(name)
702
+ return variables[name] if args.empty? && key?(name)
686
703
 
687
704
  super
688
705
  end
@@ -723,11 +740,9 @@ module RedAmber
723
740
  end
724
741
 
725
742
  def name_unnamed_keys
726
- return unless @table.key?('')
743
+ return unless @table.key?(:'')
727
744
 
728
- # We can't use #keys because it causes mismatch of @table and @keys
729
- keys = @table.schema.fields.map { |f| f.name.to_sym }
730
- unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
745
+ unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
731
746
  fields =
732
747
  @table.schema.fields.map do |field|
733
748
  if field.name.empty?