red_amber 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +33 -5
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +164 -18
- data/Gemfile +6 -1
- data/README.md +247 -33
- data/Rakefile +1 -0
- data/benchmark/csv_load_penguins.yml +1 -1
- data/doc/DataFrame.md +383 -219
- data/doc/Vector.md +247 -37
- data/doc/examples_of_red_amber.ipynb +5454 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red-amber.rb +3 -0
- data/lib/red_amber/data_frame.rb +62 -10
- data/lib/red_amber/data_frame_displayable.rb +86 -9
- data/lib/red_amber/data_frame_selectable.rb +151 -32
- data/lib/red_amber/data_frame_variable_operation.rb +4 -0
- data/lib/red_amber/group.rb +59 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +59 -15
- data/lib/red_amber/vector_functions.rb +47 -38
- data/lib/red_amber/vector_selectable.rb +126 -0
- data/lib/red_amber/vector_updatable.rb +125 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +6 -3
- data/red_amber.gemspec +0 -2
- metadata +9 -33
- data/lib/red_amber/data_frame_helper.rb +0 -64
- data/lib/red_amber/data_frame_observation_operation.rb +0 -83
- data/lib/red_amber/vector_compensable.rb +0 -68
@@ -1,64 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module RedAmber
|
4
|
-
# mix-in for the class DataFrame
|
5
|
-
module DataFrameHelper
|
6
|
-
private
|
7
|
-
|
8
|
-
def expand_range(args)
|
9
|
-
ary = args.each_with_object([]) do |e, a|
|
10
|
-
e.is_a?(Range) ? a.concat(normalized_array(e)) : a.append(e)
|
11
|
-
end
|
12
|
-
ary.map do |e|
|
13
|
-
if e.is_a?(Integer) && e.negative?
|
14
|
-
e + size
|
15
|
-
else
|
16
|
-
e
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def normalized_array(range)
|
22
|
-
both_end = [range.begin, range.end]
|
23
|
-
both_end[1] -= 1 if range.exclude_end? && range.end.is_a?(Integer)
|
24
|
-
|
25
|
-
if both_end.any?(Integer) || both_end.all?(&:nil?)
|
26
|
-
if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
|
27
|
-
raise DataFrameArgumentError, "Index out of range: #{range} for 0..#{size - 1}"
|
28
|
-
end
|
29
|
-
|
30
|
-
(0...size).to_a[range]
|
31
|
-
else
|
32
|
-
range.to_a
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def out_of_range?(indeces)
|
37
|
-
indeces.max >= size || indeces.min < -size
|
38
|
-
end
|
39
|
-
|
40
|
-
def integers?(enum)
|
41
|
-
enum.all?(Integer)
|
42
|
-
end
|
43
|
-
|
44
|
-
def sym_or_str?(enum)
|
45
|
-
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
46
|
-
end
|
47
|
-
|
48
|
-
def booleans?(enum)
|
49
|
-
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
50
|
-
end
|
51
|
-
|
52
|
-
def create_dataframe_from_vector(key, vector)
|
53
|
-
DataFrame.new(key => vector.data)
|
54
|
-
end
|
55
|
-
|
56
|
-
def select_obs_by_boolean(array)
|
57
|
-
DataFrame.new(@table.filter(array))
|
58
|
-
end
|
59
|
-
|
60
|
-
def keys_by_booleans(booleans)
|
61
|
-
keys.select.with_index { |_, i| booleans[i] }
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,83 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module RedAmber
|
4
|
-
# mix-ins for the class DataFrame
|
5
|
-
module DataFrameObservationOperation
|
6
|
-
# slice and select some observations to create sub DataFrame
|
7
|
-
def slice(*args, &block)
|
8
|
-
slicer = args
|
9
|
-
if block
|
10
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
|
-
|
12
|
-
slicer = instance_eval(&block)
|
13
|
-
end
|
14
|
-
slicer = [slicer].flatten
|
15
|
-
return remove_all_values if slicer.empty? || slicer[0].nil?
|
16
|
-
|
17
|
-
# filter with same length
|
18
|
-
booleans = nil
|
19
|
-
if slicer[0].is_a?(Vector) || slicer[0].is_a?(Arrow::BooleanArray)
|
20
|
-
booleans = slicer[0].to_a
|
21
|
-
elsif slicer.size == size && booleans?(slicer)
|
22
|
-
booleans = slicer
|
23
|
-
end
|
24
|
-
return select_obs_by_boolean(booleans) if booleans
|
25
|
-
|
26
|
-
# filter with indexes
|
27
|
-
slicer = expand_range(slicer)
|
28
|
-
return map_indices(*slicer) if integers?(slicer)
|
29
|
-
|
30
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
31
|
-
end
|
32
|
-
|
33
|
-
# remove selected observations to create sub DataFrame
|
34
|
-
def remove(*args, &block)
|
35
|
-
remover = args
|
36
|
-
if block
|
37
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
38
|
-
|
39
|
-
remover = instance_eval(&block)
|
40
|
-
end
|
41
|
-
remover = [remover].flatten
|
42
|
-
|
43
|
-
return self if remover.empty?
|
44
|
-
|
45
|
-
# filter with same length
|
46
|
-
booleans = nil
|
47
|
-
if remover[0].is_a?(Vector) || remover[0].is_a?(Arrow::BooleanArray)
|
48
|
-
booleans = remover[0].to_a
|
49
|
-
elsif remover.size == size && booleans?(remover)
|
50
|
-
booleans = remover
|
51
|
-
end
|
52
|
-
if booleans
|
53
|
-
inverted = booleans.map(&:!)
|
54
|
-
return select_obs_by_boolean(inverted)
|
55
|
-
end
|
56
|
-
|
57
|
-
# filter with indexes
|
58
|
-
slicer = indexes.to_a - expand_range(remover)
|
59
|
-
return remove_all_values if slicer.empty?
|
60
|
-
return map_indices(*slicer) if integers?(slicer)
|
61
|
-
|
62
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
63
|
-
end
|
64
|
-
|
65
|
-
def remove_nil
|
66
|
-
func = Arrow::Function.find(:drop_null)
|
67
|
-
DataFrame.new(func.execute([table]).value)
|
68
|
-
end
|
69
|
-
alias_method :drop_nil, :remove_nil
|
70
|
-
|
71
|
-
def group(aggregating_keys, func, target_keys)
|
72
|
-
t = table.group(*aggregating_keys)
|
73
|
-
RedAmber::DataFrame.new(t.send(func, *target_keys))
|
74
|
-
end
|
75
|
-
|
76
|
-
private
|
77
|
-
|
78
|
-
# return a DataFrame with same keys as self without values
|
79
|
-
def remove_all_values
|
80
|
-
DataFrame.new(keys.each_with_object({}) { |key, h| h[key] = [] })
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
-
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
-
|
6
|
-
module RedAmber
|
7
|
-
# mix-ins for class Vector
|
8
|
-
# Functions to make up some data (especially missing) for new data.
|
9
|
-
module VectorCompensable
|
10
|
-
# [Ternary]: replace_with(booleans, replacements) => vector
|
11
|
-
# Replace items selected with a boolean mask
|
12
|
-
#
|
13
|
-
# (from Arrow C++ inline doc.)
|
14
|
-
# Given an array and a boolean mask (either scalar or of equal length),
|
15
|
-
# along with replacement values (either scalar or array),
|
16
|
-
# each element of the array for which the corresponding mask element is
|
17
|
-
# true will be replaced by the next value from the replacements,
|
18
|
-
# or with null if the mask is null.
|
19
|
-
# Hence, for replacement arrays, len(replacements) == sum(mask == true).
|
20
|
-
|
21
|
-
def replace_with(booleans, replacements = nil)
|
22
|
-
specifier =
|
23
|
-
if booleans.is_a?(Arrow::BooleanArray)
|
24
|
-
booleans
|
25
|
-
elsif booleans.is_a?(Vector) && booleans.boolean?
|
26
|
-
booleans.data
|
27
|
-
elsif booleans.is_a?(Array) && booleans?(booleans)
|
28
|
-
Arrow::BooleanArray.new(booleans)
|
29
|
-
else
|
30
|
-
raise VectorTypeError, 'Not a valid type'
|
31
|
-
end
|
32
|
-
raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
|
33
|
-
raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
|
34
|
-
|
35
|
-
r = Array(replacements) # scalar to [scalar]
|
36
|
-
r = [nil] if r.empty?
|
37
|
-
|
38
|
-
replacer =
|
39
|
-
if r.size == 1
|
40
|
-
case replacements
|
41
|
-
when Arrow::Array then replacements
|
42
|
-
when Vector then replacements.data
|
43
|
-
else
|
44
|
-
Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
|
45
|
-
end
|
46
|
-
else
|
47
|
-
Arrow::Array.new(r)
|
48
|
-
end
|
49
|
-
replacer = data.class.new(replacer) if replacer.uniq == [nil]
|
50
|
-
|
51
|
-
raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
|
52
|
-
|
53
|
-
values = replacer.class.new(data)
|
54
|
-
|
55
|
-
datum = find('replace_with_mask').execute([values, specifier, replacer])
|
56
|
-
take_out_element_wise(datum)
|
57
|
-
end
|
58
|
-
|
59
|
-
# (related functions)
|
60
|
-
# fill_null_backward, fill_null_forward
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def booleans?(enum)
|
65
|
-
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|