red_amber 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
data/lib/red_amber/data_frame.rb
CHANGED
@@ -13,10 +13,7 @@ module RedAmber
|
|
13
13
|
|
14
14
|
def initialize(*args)
|
15
15
|
@variables = @keys = @vectors = @types = @data_types = nil
|
16
|
-
|
17
|
-
# [Arrow::Table] == [nil] shows ArgumentError
|
18
|
-
# temporary use yoda condition to workaround
|
19
|
-
if args.empty? || args == [[]] || args == [{}] || [nil] == args
|
16
|
+
if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
|
20
17
|
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
21
18
|
# returns empty DataFrame
|
22
19
|
@table = Arrow::Table.new({}, [])
|
@@ -34,6 +31,7 @@ module RedAmber
|
|
34
31
|
raise DataFrameTypeError, "invalid argument: #{arg}"
|
35
32
|
end
|
36
33
|
end
|
34
|
+
name_unnamed_keys
|
37
35
|
end
|
38
36
|
|
39
37
|
def self.load(path, options = {})
|
@@ -78,12 +76,12 @@ module RedAmber
|
|
78
76
|
alias_method :var_names, :keys
|
79
77
|
|
80
78
|
def key?(key)
|
81
|
-
|
79
|
+
keys.include?(key.to_sym)
|
82
80
|
end
|
83
81
|
alias_method :has_key?, :key?
|
84
82
|
|
85
83
|
def key_index(key)
|
86
|
-
|
84
|
+
keys.find_index(key.to_sym)
|
87
85
|
end
|
88
86
|
alias_method :find_index, :key_index
|
89
87
|
alias_method :index, :key_index
|
@@ -144,8 +142,10 @@ module RedAmber
|
|
144
142
|
end
|
145
143
|
end
|
146
144
|
|
147
|
-
def group(*group_keys)
|
148
|
-
Group.new(self, group_keys)
|
145
|
+
def group(*group_keys, &block)
|
146
|
+
g = Group.new(self, group_keys)
|
147
|
+
g = g.summarize(&block) if block
|
148
|
+
g
|
149
149
|
end
|
150
150
|
|
151
151
|
private
|
@@ -182,5 +182,23 @@ module RedAmber
|
|
182
182
|
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
183
183
|
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
184
184
|
end
|
185
|
+
|
186
|
+
def name_unnamed_keys
|
187
|
+
return unless @table[:'']
|
188
|
+
|
189
|
+
# We can't use #keys because it causes mismatch of @table and @keys
|
190
|
+
keys = @table.schema.fields.map { |f| f.name.to_sym }
|
191
|
+
unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
|
192
|
+
fields =
|
193
|
+
@table.schema.fields.map do |field|
|
194
|
+
if field.name.empty?
|
195
|
+
Arrow::Field.new(unnamed, field.data_type)
|
196
|
+
else
|
197
|
+
field
|
198
|
+
end
|
199
|
+
end
|
200
|
+
schema = Arrow::Schema.new(fields)
|
201
|
+
@table = Arrow::Table.new(schema, @table.columns)
|
202
|
+
end
|
185
203
|
end
|
186
204
|
end
|
@@ -5,6 +5,8 @@ require 'stringio'
|
|
5
5
|
module RedAmber
|
6
6
|
# mix-ins for the class DataFrame
|
7
7
|
module DataFrameDisplayable
|
8
|
+
INDEX_KEY = :index_key_for_format_table
|
9
|
+
|
8
10
|
def to_s
|
9
11
|
return '' if empty?
|
10
12
|
|
@@ -139,7 +141,7 @@ module RedAmber
|
|
139
141
|
original = self
|
140
142
|
indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
|
141
143
|
df = slice(indices).assign do
|
142
|
-
assigner = {
|
144
|
+
assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
|
143
145
|
vectors.each_with_object(assigner) do |v, a|
|
144
146
|
a[v.key] = v.to_a.map do |e|
|
145
147
|
if e.nil?
|
@@ -155,12 +157,12 @@ module RedAmber
|
|
155
157
|
end
|
156
158
|
end
|
157
159
|
|
158
|
-
df = df.pick { [keys
|
160
|
+
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
159
161
|
df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
|
160
162
|
df = df.assign do
|
161
163
|
vectors.each_with_object({}) do |v, assigner|
|
162
|
-
vec = v.replace(0, v.key.to_s)
|
163
|
-
.replace(1, v.key ==
|
164
|
+
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
165
|
+
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
164
166
|
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
165
167
|
end
|
166
168
|
end
|
@@ -197,7 +199,7 @@ module RedAmber
|
|
197
199
|
end
|
198
200
|
|
199
201
|
def format_for_column(vector, original, width)
|
200
|
-
if vector.key !=
|
202
|
+
if vector.key != INDEX_KEY && !original[vector.key].numeric?
|
201
203
|
"%-#{width}s"
|
202
204
|
else
|
203
205
|
"%#{width}s"
|
data/lib/red_amber/group.rb
CHANGED
@@ -16,36 +16,30 @@ module RedAmber
|
|
16
16
|
@group = @table.group(*@group_keys)
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
functions = %i[count sum product mean min max stddev variance]
|
20
|
+
functions.each do |function|
|
21
|
+
define_method(function) do |*summary_keys|
|
22
|
+
by(function, summary_keys)
|
23
|
+
end
|
21
24
|
end
|
22
25
|
|
23
|
-
def
|
24
|
-
|
26
|
+
def inspect
|
27
|
+
tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
|
28
|
+
h[v.key] = v.tally
|
29
|
+
end
|
30
|
+
"#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
|
25
31
|
end
|
26
32
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def max(*summary_keys)
|
40
|
-
by(:max, summary_keys)
|
41
|
-
end
|
42
|
-
|
43
|
-
def stddev(*summary_keys)
|
44
|
-
by(:stddev, summary_keys)
|
45
|
-
end
|
46
|
-
|
47
|
-
def variance(*summary_keys)
|
48
|
-
by(:variance, summary_keys)
|
33
|
+
def summarize(&block)
|
34
|
+
agg = instance_eval(&block)
|
35
|
+
case agg
|
36
|
+
when DataFrame
|
37
|
+
agg
|
38
|
+
when Array
|
39
|
+
agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
|
40
|
+
else
|
41
|
+
raise GroupArgumentError, "Unknown argument: #{agg}"
|
42
|
+
end
|
49
43
|
end
|
50
44
|
|
51
45
|
private
|
@@ -55,7 +49,11 @@ module RedAmber
|
|
55
49
|
d = summary_keys - @dataframe.keys
|
56
50
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
57
51
|
|
58
|
-
RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
52
|
+
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
53
|
+
df = df[df.keys[-1], df.keys[0...-1]]
|
54
|
+
# if counts are the same (no nil included), aggregate count columns.
|
55
|
+
df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
|
56
|
+
df
|
59
57
|
end
|
60
58
|
end
|
61
59
|
end
|
@@ -64,6 +64,8 @@ module RedAmber
|
|
64
64
|
return filter_by_array(arg)
|
65
65
|
when Arrow::Array
|
66
66
|
array = arg
|
67
|
+
when Range
|
68
|
+
array = normalize_element(arg)
|
67
69
|
else
|
68
70
|
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
71
|
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
@@ -12,7 +12,15 @@ module RedAmber
|
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
13
|
# @return [Vector] Replaced new Vector
|
14
14
|
def replace(args, replacer)
|
15
|
-
args =
|
15
|
+
args =
|
16
|
+
case args
|
17
|
+
when Array
|
18
|
+
args
|
19
|
+
when Range
|
20
|
+
normalize_element(args)
|
21
|
+
else
|
22
|
+
Array(args)
|
23
|
+
end
|
16
24
|
replacer = Array(replacer)
|
17
25
|
return self if args.empty? || args[0].nil?
|
18
26
|
|
@@ -22,6 +30,7 @@ module RedAmber
|
|
22
30
|
if vector.boolean?
|
23
31
|
vector
|
24
32
|
elsif vector.numeric?
|
33
|
+
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
25
34
|
Vector.new(indices).is_in(vector)
|
26
35
|
else
|
27
36
|
raise VectorArgumentError, "Invalid data type #{args}"
|
@@ -50,6 +59,18 @@ module RedAmber
|
|
50
59
|
is_nil.if_else(false, self).invert
|
51
60
|
end
|
52
61
|
|
62
|
+
def shift(amount = 1, fill: nil)
|
63
|
+
raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
|
64
|
+
|
65
|
+
if amount.positive?
|
66
|
+
replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
|
67
|
+
elsif amount.negative?
|
68
|
+
replace(0...amount, self[-amount..]).replace(amount..-1, fill)
|
69
|
+
else # amount == 0
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
53
74
|
private
|
54
75
|
|
55
76
|
# [Ternary]: replace_with(booleans, replacements) => vector
|
data/lib/red_amber/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -34,6 +34,7 @@ extra_rdoc_files: []
|
|
34
34
|
files:
|
35
35
|
- ".rubocop.yml"
|
36
36
|
- ".rubocop_todo.yml"
|
37
|
+
- ".yardopts"
|
37
38
|
- CHANGELOG.md
|
38
39
|
- Gemfile
|
39
40
|
- LICENSE
|
@@ -41,10 +42,10 @@ files:
|
|
41
42
|
- Rakefile
|
42
43
|
- benchmark/csv_load_penguins.yml
|
43
44
|
- benchmark/drop_nil.yml
|
44
|
-
- doc/47_examples_of_red_amber.ipynb
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
+
- doc/examples_of_red_amber.ipynb
|
48
49
|
- doc/image/arrow_table_new.png
|
49
50
|
- doc/image/dataframe/assign.png
|
50
51
|
- doc/image/dataframe/drop.png
|