red_amber 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
data/lib/red_amber/data_frame.rb
CHANGED
@@ -13,10 +13,7 @@ module RedAmber
|
|
13
13
|
|
14
14
|
def initialize(*args)
|
15
15
|
@variables = @keys = @vectors = @types = @data_types = nil
|
16
|
-
|
17
|
-
# [Arrow::Table] == [nil] shows ArgumentError
|
18
|
-
# temporary use yoda condition to workaround
|
19
|
-
if args.empty? || args == [[]] || args == [{}] || [nil] == args
|
16
|
+
if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
|
20
17
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
21
18
|
# returns empty DataFrame
|
22
19
|
@table = Arrow::Table.new({}, [])
|
@@ -34,6 +31,7 @@ module RedAmber
|
|
34
31
|
raise DataFrameTypeError, "invalid argument: #{arg}"
|
35
32
|
end
|
36
33
|
end
|
34
|
+
name_unnamed_keys
|
37
35
|
end
|
38
36
|
|
39
37
|
def self.load(path, options = {})
|
@@ -78,12 +76,12 @@ module RedAmber
|
|
78
76
|
alias_method :var_names, :keys
|
79
77
|
|
80
78
|
def key?(key)
|
81
|
-
|
79
|
+
keys.include?(key.to_sym)
|
82
80
|
end
|
83
81
|
alias_method :has_key?, :key?
|
84
82
|
|
85
83
|
def key_index(key)
|
86
|
-
|
84
|
+
keys.find_index(key.to_sym)
|
87
85
|
end
|
88
86
|
alias_method :find_index, :key_index
|
89
87
|
alias_method :index, :key_index
|
@@ -144,8 +142,10 @@ module RedAmber
|
|
144
142
|
end
|
145
143
|
end
|
146
144
|
|
147
|
-
def group(*group_keys)
|
148
|
-
Group.new(self, group_keys)
|
145
|
+
def group(*group_keys, &block)
|
146
|
+
g = Group.new(self, group_keys)
|
147
|
+
g = g.summarize(&block) if block
|
148
|
+
g
|
149
149
|
end
|
150
150
|
|
151
151
|
private
|
@@ -182,5 +182,23 @@ module RedAmber
|
|
182
182
|
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
183
183
|
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
184
184
|
end
|
185
|
+
|
186
|
+
def name_unnamed_keys
|
187
|
+
return unless @table[:'']
|
188
|
+
|
189
|
+
# We can't use #keys because it causes mismatch of @table and @keys
|
190
|
+
keys = @table.schema.fields.map { |f| f.name.to_sym }
|
191
|
+
unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
|
192
|
+
fields =
|
193
|
+
@table.schema.fields.map do |field|
|
194
|
+
if field.name.empty?
|
195
|
+
Arrow::Field.new(unnamed, field.data_type)
|
196
|
+
else
|
197
|
+
field
|
198
|
+
end
|
199
|
+
end
|
200
|
+
schema = Arrow::Schema.new(fields)
|
201
|
+
@table = Arrow::Table.new(schema, @table.columns)
|
202
|
+
end
|
185
203
|
end
|
186
204
|
end
|
@@ -5,6 +5,8 @@ require 'stringio'
|
|
5
5
|
module RedAmber
|
6
6
|
# mix-ins for the class DataFrame
|
7
7
|
module DataFrameDisplayable
|
8
|
+
INDEX_KEY = :index_key_for_format_table
|
9
|
+
|
8
10
|
def to_s
|
9
11
|
return '' if empty?
|
10
12
|
|
@@ -139,7 +141,7 @@ module RedAmber
|
|
139
141
|
original = self
|
140
142
|
indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
|
141
143
|
df = slice(indices).assign do
|
142
|
-
assigner = {
|
144
|
+
assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
|
143
145
|
vectors.each_with_object(assigner) do |v, a|
|
144
146
|
a[v.key] = v.to_a.map do |e|
|
145
147
|
if e.nil?
|
@@ -155,12 +157,12 @@ module RedAmber
|
|
155
157
|
end
|
156
158
|
end
|
157
159
|
|
158
|
-
df = df.pick { [keys
|
160
|
+
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
159
161
|
df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
|
160
162
|
df = df.assign do
|
161
163
|
vectors.each_with_object({}) do |v, assigner|
|
162
|
-
vec = v.replace(0, v.key.to_s)
|
163
|
-
.replace(1, v.key ==
|
164
|
+
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
165
|
+
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
164
166
|
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
165
167
|
end
|
166
168
|
end
|
@@ -197,7 +199,7 @@ module RedAmber
|
|
197
199
|
end
|
198
200
|
|
199
201
|
def format_for_column(vector, original, width)
|
200
|
-
if vector.key !=
|
202
|
+
if vector.key != INDEX_KEY && !original[vector.key].numeric?
|
201
203
|
"%-#{width}s"
|
202
204
|
else
|
203
205
|
"%#{width}s"
|
data/lib/red_amber/group.rb
CHANGED
@@ -16,36 +16,30 @@ module RedAmber
|
|
16
16
|
@group = @table.group(*@group_keys)
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
functions = %i[count sum product mean min max stddev variance]
|
20
|
+
functions.each do |function|
|
21
|
+
define_method(function) do |*summary_keys|
|
22
|
+
by(function, summary_keys)
|
23
|
+
end
|
21
24
|
end
|
22
25
|
|
23
|
-
def
|
24
|
-
|
26
|
+
def inspect
|
27
|
+
tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
|
28
|
+
h[v.key] = v.tally
|
29
|
+
end
|
30
|
+
"#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
|
25
31
|
end
|
26
32
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def max(*summary_keys)
|
40
|
-
by(:max, summary_keys)
|
41
|
-
end
|
42
|
-
|
43
|
-
def stddev(*summary_keys)
|
44
|
-
by(:stddev, summary_keys)
|
45
|
-
end
|
46
|
-
|
47
|
-
def variance(*summary_keys)
|
48
|
-
by(:variance, summary_keys)
|
33
|
+
def summarize(&block)
|
34
|
+
agg = instance_eval(&block)
|
35
|
+
case agg
|
36
|
+
when DataFrame
|
37
|
+
agg
|
38
|
+
when Array
|
39
|
+
agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
|
40
|
+
else
|
41
|
+
raise GroupArgumentError, "Unknown argument: #{agg}"
|
42
|
+
end
|
49
43
|
end
|
50
44
|
|
51
45
|
private
|
@@ -55,7 +49,11 @@ module RedAmber
|
|
55
49
|
d = summary_keys - @dataframe.keys
|
56
50
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
57
51
|
|
58
|
-
RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
52
|
+
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
53
|
+
df = df[df.keys[-1], df.keys[0...-1]]
|
54
|
+
# if counts are the same (no nil included), aggregate count columns.
|
55
|
+
df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
|
56
|
+
df
|
59
57
|
end
|
60
58
|
end
|
61
59
|
end
|
@@ -64,6 +64,8 @@ module RedAmber
|
|
64
64
|
return filter_by_array(arg)
|
65
65
|
when Arrow::Array
|
66
66
|
array = arg
|
67
|
+
when Range
|
68
|
+
array = normalize_element(arg)
|
67
69
|
else
|
68
70
|
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
71
|
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
@@ -12,7 +12,15 @@ module RedAmber
|
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
13
|
# @return [Vector] Replaced new Vector
|
14
14
|
def replace(args, replacer)
|
15
|
-
args =
|
15
|
+
args =
|
16
|
+
case args
|
17
|
+
when Array
|
18
|
+
args
|
19
|
+
when Range
|
20
|
+
normalize_element(args)
|
21
|
+
else
|
22
|
+
Array(args)
|
23
|
+
end
|
16
24
|
replacer = Array(replacer)
|
17
25
|
return self if args.empty? || args[0].nil?
|
18
26
|
|
@@ -22,6 +30,7 @@ module RedAmber
|
|
22
30
|
if vector.boolean?
|
23
31
|
vector
|
24
32
|
elsif vector.numeric?
|
33
|
+
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
25
34
|
Vector.new(indices).is_in(vector)
|
26
35
|
else
|
27
36
|
raise VectorArgumentError, "Invalid data type #{args}"
|
@@ -50,6 +59,18 @@ module RedAmber
|
|
50
59
|
is_nil.if_else(false, self).invert
|
51
60
|
end
|
52
61
|
|
62
|
+
def shift(amount = 1, fill: nil)
|
63
|
+
raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
|
64
|
+
|
65
|
+
if amount.positive?
|
66
|
+
replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
|
67
|
+
elsif amount.negative?
|
68
|
+
replace(0...amount, self[-amount..]).replace(amount..-1, fill)
|
69
|
+
else # amount == 0
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
53
74
|
private
|
54
75
|
|
55
76
|
# [Ternary]: replace_with(booleans, replacements) => vector
|
data/lib/red_amber/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -34,6 +34,7 @@ extra_rdoc_files: []
|
|
34
34
|
files:
|
35
35
|
- ".rubocop.yml"
|
36
36
|
- ".rubocop_todo.yml"
|
37
|
+
- ".yardopts"
|
37
38
|
- CHANGELOG.md
|
38
39
|
- Gemfile
|
39
40
|
- LICENSE
|
@@ -41,10 +42,10 @@ files:
|
|
41
42
|
- Rakefile
|
42
43
|
- benchmark/csv_load_penguins.yml
|
43
44
|
- benchmark/drop_nil.yml
|
44
|
-
- doc/47_examples_of_red_amber.ipynb
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
+
- doc/examples_of_red_amber.ipynb
|
48
49
|
- doc/image/arrow_table_new.png
|
49
50
|
- doc/image/dataframe/assign.png
|
50
51
|
- doc/image/dataframe/drop.png
|