red_amber 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,10 +13,7 @@ module RedAmber
13
13
 
14
14
  def initialize(*args)
15
15
  @variables = @keys = @vectors = @types = @data_types = nil
16
- # bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
17
- # [Arrow::Table] == [nil] shows ArgumentError
18
- # temporary use yoda condition to workaround
19
- if args.empty? || args == [[]] || args == [{}] || [nil] == args
16
+ if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
20
17
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
21
18
  # returns empty DataFrame
22
19
  @table = Arrow::Table.new({}, [])
@@ -34,6 +31,7 @@ module RedAmber
34
31
  raise DataFrameTypeError, "invalid argument: #{arg}"
35
32
  end
36
33
  end
34
+ name_unnamed_keys
37
35
  end
38
36
 
39
37
  def self.load(path, options = {})
@@ -78,12 +76,12 @@ module RedAmber
78
76
  alias_method :var_names, :keys
79
77
 
80
78
  def key?(key)
81
- @keys.include?(key.to_sym)
79
+ keys.include?(key.to_sym)
82
80
  end
83
81
  alias_method :has_key?, :key?
84
82
 
85
83
  def key_index(key)
86
- @keys.find_index(key.to_sym)
84
+ keys.find_index(key.to_sym)
87
85
  end
88
86
  alias_method :find_index, :key_index
89
87
  alias_method :index, :key_index
@@ -144,8 +142,10 @@ module RedAmber
144
142
  end
145
143
  end
146
144
 
147
- def group(*group_keys)
148
- Group.new(self, group_keys)
145
+ def group(*group_keys, &block)
146
+ g = Group.new(self, group_keys)
147
+ g = g.summarize(&block) if block
148
+ g
149
149
  end
150
150
 
151
151
  private
@@ -182,5 +182,23 @@ module RedAmber
182
182
  html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
183
183
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
184
184
  end
185
+
186
+ def name_unnamed_keys
187
+ return unless @table[:'']
188
+
189
+ # We can't use #keys because it causes mismatch of @table and @keys
190
+ keys = @table.schema.fields.map { |f| f.name.to_sym }
191
+ unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
192
+ fields =
193
+ @table.schema.fields.map do |field|
194
+ if field.name.empty?
195
+ Arrow::Field.new(unnamed, field.data_type)
196
+ else
197
+ field
198
+ end
199
+ end
200
+ schema = Arrow::Schema.new(fields)
201
+ @table = Arrow::Table.new(schema, @table.columns)
202
+ end
185
203
  end
186
204
  end
@@ -5,6 +5,8 @@ require 'stringio'
5
5
  module RedAmber
6
6
  # mix-ins for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ INDEX_KEY = :index_key_for_format_table
9
+
8
10
  def to_s
9
11
  return '' if empty?
10
12
 
@@ -139,7 +141,7 @@ module RedAmber
139
141
  original = self
140
142
  indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
141
143
  df = slice(indices).assign do
142
- assigner = { '': indices.map { |i| (i + 1).to_s } }
144
+ assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
143
145
  vectors.each_with_object(assigner) do |v, a|
144
146
  a[v.key] = v.to_a.map do |e|
145
147
  if e.nil?
@@ -155,12 +157,12 @@ module RedAmber
155
157
  end
156
158
  end
157
159
 
158
- df = df.pick { [keys[-1], keys[0..-2]] }
160
+ df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
159
161
  df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
160
162
  df = df.assign do
161
163
  vectors.each_with_object({}) do |v, assigner|
162
- vec = v.replace(0, v.key.to_s)
163
- .replace(1, v.key == :'' ? '' : "<#{original[v.key].type}>")
164
+ vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
165
+ .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
164
166
  assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
165
167
  end
166
168
  end
@@ -197,7 +199,7 @@ module RedAmber
197
199
  end
198
200
 
199
201
  def format_for_column(vector, original, width)
200
- if vector.key != :'' && !original[vector.key].numeric?
202
+ if vector.key != INDEX_KEY && !original[vector.key].numeric?
201
203
  "%-#{width}s"
202
204
  else
203
205
  "%#{width}s"
@@ -16,36 +16,30 @@ module RedAmber
16
16
  @group = @table.group(*@group_keys)
17
17
  end
18
18
 
19
- def count(*summary_keys)
20
- by(:count, summary_keys)
19
+ functions = %i[count sum product mean min max stddev variance]
20
+ functions.each do |function|
21
+ define_method(function) do |*summary_keys|
22
+ by(function, summary_keys)
23
+ end
21
24
  end
22
25
 
23
- def sum(*summary_keys)
24
- by(:sum, summary_keys)
26
+ def inspect
27
+ tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
28
+ h[v.key] = v.tally
29
+ end
30
+ "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
25
31
  end
26
32
 
27
- def product(*summary_keys)
28
- by(:product, summary_keys)
29
- end
30
-
31
- def mean(*summary_keys)
32
- by(:mean, summary_keys)
33
- end
34
-
35
- def min(*summary_keys)
36
- by(:min, summary_keys)
37
- end
38
-
39
- def max(*summary_keys)
40
- by(:max, summary_keys)
41
- end
42
-
43
- def stddev(*summary_keys)
44
- by(:stddev, summary_keys)
45
- end
46
-
47
- def variance(*summary_keys)
48
- by(:variance, summary_keys)
33
+ def summarize(&block)
34
+ agg = instance_eval(&block)
35
+ case agg
36
+ when DataFrame
37
+ agg
38
+ when Array
39
+ agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
40
+ else
41
+ raise GroupArgumentError, "Unknown argument: #{agg}"
42
+ end
49
43
  end
50
44
 
51
45
  private
@@ -55,7 +49,11 @@ module RedAmber
55
49
  d = summary_keys - @dataframe.keys
56
50
  raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
57
51
 
58
- RedAmber::DataFrame.new(@group.send(func, *summary_keys))
52
+ df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
53
+ df = df[df.keys[-1], df.keys[0...-1]]
54
+ # if counts are the same (no nil included), aggregate count columns.
55
+ df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
56
+ df
59
57
  end
60
58
  end
61
59
  end
@@ -64,6 +64,8 @@ module RedAmber
64
64
  return filter_by_array(arg)
65
65
  when Arrow::Array
66
66
  array = arg
67
+ when Range
68
+ array = normalize_element(arg)
67
69
  else
68
70
  unless arg.is_a?(Numeric) || booleans?([arg])
69
71
  raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
@@ -12,7 +12,15 @@ module RedAmber
12
12
  # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
13
  # @return [Vector] Replaced new Vector
14
14
  def replace(args, replacer)
15
- args = args.is_a?(Array) ? args : Array(args)
15
+ args =
16
+ case args
17
+ when Array
18
+ args
19
+ when Range
20
+ normalize_element(args)
21
+ else
22
+ Array(args)
23
+ end
16
24
  replacer = Array(replacer)
17
25
  return self if args.empty? || args[0].nil?
18
26
 
@@ -22,6 +30,7 @@ module RedAmber
22
30
  if vector.boolean?
23
31
  vector
24
32
  elsif vector.numeric?
33
+ replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
25
34
  Vector.new(indices).is_in(vector)
26
35
  else
27
36
  raise VectorArgumentError, "Invalid data type #{args}"
@@ -50,6 +59,18 @@ module RedAmber
50
59
  is_nil.if_else(false, self).invert
51
60
  end
52
61
 
62
+ def shift(amount = 1, fill: nil)
63
+ raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
64
+
65
+ if amount.positive?
66
+ replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
67
+ elsif amount.negative?
68
+ replace(0...amount, self[-amount..]).replace(amount..-1, fill)
69
+ else # amount == 0
70
+ self
71
+ end
72
+ end
73
+
53
74
  private
54
75
 
55
76
  # [Ternary]: replace_with(booleans, replacements) => vector
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.1.7'
4
+ VERSION = '0.1.8'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-15 00:00:00.000000000 Z
11
+ date: 2022-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -34,6 +34,7 @@ extra_rdoc_files: []
34
34
  files:
35
35
  - ".rubocop.yml"
36
36
  - ".rubocop_todo.yml"
37
+ - ".yardopts"
37
38
  - CHANGELOG.md
38
39
  - Gemfile
39
40
  - LICENSE
@@ -41,10 +42,10 @@ files:
41
42
  - Rakefile
42
43
  - benchmark/csv_load_penguins.yml
43
44
  - benchmark/drop_nil.yml
44
- - doc/47_examples_of_red_amber.ipynb
45
45
  - doc/CODE_OF_CONDUCT.md
46
46
  - doc/DataFrame.md
47
47
  - doc/Vector.md
48
+ - doc/examples_of_red_amber.ipynb
48
49
  - doc/image/arrow_table_new.png
49
50
  - doc/image/dataframe/assign.png
50
51
  - doc/image/dataframe/drop.png