red_amber 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,10 +13,7 @@ module RedAmber
13
13
 
14
14
  def initialize(*args)
15
15
  @variables = @keys = @vectors = @types = @data_types = nil
16
- # bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
17
- # [Arrow::Table] == [nil] shows ArgumentError
18
- # temporary use yoda condition to workaround
19
- if args.empty? || args == [[]] || args == [{}] || [nil] == args
16
+ if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
20
17
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
21
18
  # returns empty DataFrame
22
19
  @table = Arrow::Table.new({}, [])
@@ -34,6 +31,7 @@ module RedAmber
34
31
  raise DataFrameTypeError, "invalid argument: #{arg}"
35
32
  end
36
33
  end
34
+ name_unnamed_keys
37
35
  end
38
36
 
39
37
  def self.load(path, options = {})
@@ -78,12 +76,12 @@ module RedAmber
78
76
  alias_method :var_names, :keys
79
77
 
80
78
  def key?(key)
81
- @keys.include?(key.to_sym)
79
+ keys.include?(key.to_sym)
82
80
  end
83
81
  alias_method :has_key?, :key?
84
82
 
85
83
  def key_index(key)
86
- @keys.find_index(key.to_sym)
84
+ keys.find_index(key.to_sym)
87
85
  end
88
86
  alias_method :find_index, :key_index
89
87
  alias_method :index, :key_index
@@ -144,8 +142,10 @@ module RedAmber
144
142
  end
145
143
  end
146
144
 
147
- def group(*group_keys)
148
- Group.new(self, group_keys)
145
+ def group(*group_keys, &block)
146
+ g = Group.new(self, group_keys)
147
+ g = g.summarize(&block) if block
148
+ g
149
149
  end
150
150
 
151
151
  private
@@ -182,5 +182,23 @@ module RedAmber
182
182
  html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
183
183
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
184
184
  end
185
+
186
+ def name_unnamed_keys
187
+ return unless @table[:'']
188
+
189
+ # We can't use #keys because it causes mismatch of @table and @keys
190
+ keys = @table.schema.fields.map { |f| f.name.to_sym }
191
+ unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
192
+ fields =
193
+ @table.schema.fields.map do |field|
194
+ if field.name.empty?
195
+ Arrow::Field.new(unnamed, field.data_type)
196
+ else
197
+ field
198
+ end
199
+ end
200
+ schema = Arrow::Schema.new(fields)
201
+ @table = Arrow::Table.new(schema, @table.columns)
202
+ end
185
203
  end
186
204
  end
@@ -5,6 +5,8 @@ require 'stringio'
5
5
  module RedAmber
6
6
  # mix-ins for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ INDEX_KEY = :index_key_for_format_table
9
+
8
10
  def to_s
9
11
  return '' if empty?
10
12
 
@@ -139,7 +141,7 @@ module RedAmber
139
141
  original = self
140
142
  indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
141
143
  df = slice(indices).assign do
142
- assigner = { '': indices.map { |i| (i + 1).to_s } }
144
+ assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
143
145
  vectors.each_with_object(assigner) do |v, a|
144
146
  a[v.key] = v.to_a.map do |e|
145
147
  if e.nil?
@@ -155,12 +157,12 @@ module RedAmber
155
157
  end
156
158
  end
157
159
 
158
- df = df.pick { [keys[-1], keys[0..-2]] }
160
+ df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
159
161
  df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
160
162
  df = df.assign do
161
163
  vectors.each_with_object({}) do |v, assigner|
162
- vec = v.replace(0, v.key.to_s)
163
- .replace(1, v.key == :'' ? '' : "<#{original[v.key].type}>")
164
+ vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
165
+ .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
164
166
  assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
165
167
  end
166
168
  end
@@ -197,7 +199,7 @@ module RedAmber
197
199
  end
198
200
 
199
201
  def format_for_column(vector, original, width)
200
- if vector.key != :'' && !original[vector.key].numeric?
202
+ if vector.key != INDEX_KEY && !original[vector.key].numeric?
201
203
  "%-#{width}s"
202
204
  else
203
205
  "%#{width}s"
@@ -16,36 +16,30 @@ module RedAmber
16
16
  @group = @table.group(*@group_keys)
17
17
  end
18
18
 
19
- def count(*summary_keys)
20
- by(:count, summary_keys)
19
+ functions = %i[count sum product mean min max stddev variance]
20
+ functions.each do |function|
21
+ define_method(function) do |*summary_keys|
22
+ by(function, summary_keys)
23
+ end
21
24
  end
22
25
 
23
- def sum(*summary_keys)
24
- by(:sum, summary_keys)
26
+ def inspect
27
+ tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
28
+ h[v.key] = v.tally
29
+ end
30
+ "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
25
31
  end
26
32
 
27
- def product(*summary_keys)
28
- by(:product, summary_keys)
29
- end
30
-
31
- def mean(*summary_keys)
32
- by(:mean, summary_keys)
33
- end
34
-
35
- def min(*summary_keys)
36
- by(:min, summary_keys)
37
- end
38
-
39
- def max(*summary_keys)
40
- by(:max, summary_keys)
41
- end
42
-
43
- def stddev(*summary_keys)
44
- by(:stddev, summary_keys)
45
- end
46
-
47
- def variance(*summary_keys)
48
- by(:variance, summary_keys)
33
+ def summarize(&block)
34
+ agg = instance_eval(&block)
35
+ case agg
36
+ when DataFrame
37
+ agg
38
+ when Array
39
+ agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
40
+ else
41
+ raise GroupArgumentError, "Unknown argument: #{agg}"
42
+ end
49
43
  end
50
44
 
51
45
  private
@@ -55,7 +49,11 @@ module RedAmber
55
49
  d = summary_keys - @dataframe.keys
56
50
  raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
57
51
 
58
- RedAmber::DataFrame.new(@group.send(func, *summary_keys))
52
+ df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
53
+ df = df[df.keys[-1], df.keys[0...-1]]
54
+ # if counts are the same (no nil included), aggregate count columns.
55
+ df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
56
+ df
59
57
  end
60
58
  end
61
59
  end
@@ -64,6 +64,8 @@ module RedAmber
64
64
  return filter_by_array(arg)
65
65
  when Arrow::Array
66
66
  array = arg
67
+ when Range
68
+ array = normalize_element(arg)
67
69
  else
68
70
  unless arg.is_a?(Numeric) || booleans?([arg])
69
71
  raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
@@ -12,7 +12,15 @@ module RedAmber
12
12
  # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
13
  # @return [Vector] Replaced new Vector
14
14
  def replace(args, replacer)
15
- args = args.is_a?(Array) ? args : Array(args)
15
+ args =
16
+ case args
17
+ when Array
18
+ args
19
+ when Range
20
+ normalize_element(args)
21
+ else
22
+ Array(args)
23
+ end
16
24
  replacer = Array(replacer)
17
25
  return self if args.empty? || args[0].nil?
18
26
 
@@ -22,6 +30,7 @@ module RedAmber
22
30
  if vector.boolean?
23
31
  vector
24
32
  elsif vector.numeric?
33
+ replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
25
34
  Vector.new(indices).is_in(vector)
26
35
  else
27
36
  raise VectorArgumentError, "Invalid data type #{args}"
@@ -50,6 +59,18 @@ module RedAmber
50
59
  is_nil.if_else(false, self).invert
51
60
  end
52
61
 
62
+ def shift(amount = 1, fill: nil)
63
+ raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
64
+
65
+ if amount.positive?
66
+ replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
67
+ elsif amount.negative?
68
+ replace(0...amount, self[-amount..]).replace(amount..-1, fill)
69
+ else # amount == 0
70
+ self
71
+ end
72
+ end
73
+
53
74
  private
54
75
 
55
76
  # [Ternary]: replace_with(booleans, replacements) => vector
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.1.7'
4
+ VERSION = '0.1.8'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-15 00:00:00.000000000 Z
11
+ date: 2022-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -34,6 +34,7 @@ extra_rdoc_files: []
34
34
  files:
35
35
  - ".rubocop.yml"
36
36
  - ".rubocop_todo.yml"
37
+ - ".yardopts"
37
38
  - CHANGELOG.md
38
39
  - Gemfile
39
40
  - LICENSE
@@ -41,10 +42,10 @@ files:
41
42
  - Rakefile
42
43
  - benchmark/csv_load_penguins.yml
43
44
  - benchmark/drop_nil.yml
44
- - doc/47_examples_of_red_amber.ipynb
45
45
  - doc/CODE_OF_CONDUCT.md
46
46
  - doc/DataFrame.md
47
47
  - doc/Vector.md
48
+ - doc/examples_of_red_amber.ipynb
48
49
  - doc/image/arrow_table_new.png
49
50
  - doc/image/dataframe/assign.png
50
51
  - doc/image/dataframe/drop.png