red_amber 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -10
- data/CHANGELOG.md +162 -6
- data/Gemfile +3 -0
- data/README.md +89 -303
- data/benchmark/csv_load_penguins.yml +15 -0
- data/benchmark/drop_nil.yml +11 -0
- data/doc/DataFrame.md +840 -0
- data/doc/Vector.md +317 -0
- data/doc/image/arrow_table_new.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/example_in_red_arrow.png +0 -0
- data/doc/image/tdr.png +0 -0
- data/doc/image/tdr_and_table.png +0 -0
- data/doc/image/tidy_data_in_TDR.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/doc/tdr.md +56 -0
- data/doc/tdr_ja.md +56 -0
- data/lib/red_amber/data_frame.rb +68 -35
- data/lib/red_amber/data_frame_displayable.rb +132 -0
- data/lib/red_amber/data_frame_helper.rb +64 -0
- data/lib/red_amber/data_frame_indexable.rb +38 -0
- data/lib/red_amber/data_frame_observation_operation.rb +83 -0
- data/lib/red_amber/data_frame_selectable.rb +34 -43
- data/lib/red_amber/data_frame_variable_operation.rb +133 -0
- data/lib/red_amber/vector.rb +58 -6
- data/lib/red_amber/vector_compensable.rb +68 -0
- data/lib/red_amber/vector_functions.rb +147 -68
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +9 -1
- data/red_amber.gemspec +3 -6
- metadata +36 -9
- data/lib/red_amber/data_frame_output.rb +0 -116
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to make up some data (especially missing) for new data.
|
9
|
+
module VectorCompensable
|
10
|
+
# [Ternary]: replace_with(booleans, replacements) => vector
|
11
|
+
# Replace items selected with a boolean mask
|
12
|
+
#
|
13
|
+
# (from Arrow C++ inline doc.)
|
14
|
+
# Given an array and a boolean mask (either scalar or of equal length),
|
15
|
+
# along with replacement values (either scalar or array),
|
16
|
+
# each element of the array for which the corresponding mask element is
|
17
|
+
# true will be replaced by the next value from the replacements,
|
18
|
+
# or with null if the mask is null.
|
19
|
+
# Hence, for replacement arrays, len(replacements) == sum(mask == true).
|
20
|
+
|
21
|
+
def replace_with(booleans, replacements = nil)
|
22
|
+
specifier =
|
23
|
+
if booleans.is_a?(Arrow::BooleanArray)
|
24
|
+
booleans
|
25
|
+
elsif booleans.is_a?(Vector) && booleans.boolean?
|
26
|
+
booleans.data
|
27
|
+
elsif booleans.is_a?(Array) && booleans?(booleans)
|
28
|
+
Arrow::BooleanArray.new(booleans)
|
29
|
+
else
|
30
|
+
raise VectorTypeError, 'Not a valid type'
|
31
|
+
end
|
32
|
+
raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
|
33
|
+
raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
|
34
|
+
|
35
|
+
r = Array(replacements) # scalar to [scalar]
|
36
|
+
r = [nil] if r.empty?
|
37
|
+
|
38
|
+
replacer =
|
39
|
+
if r.size == 1
|
40
|
+
case replacements
|
41
|
+
when Arrow::Array then replacements
|
42
|
+
when Vector then replacements.data
|
43
|
+
else
|
44
|
+
Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
|
45
|
+
end
|
46
|
+
else
|
47
|
+
Arrow::Array.new(r)
|
48
|
+
end
|
49
|
+
replacer = data.class.new(replacer) if replacer.uniq == [nil]
|
50
|
+
|
51
|
+
raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
|
52
|
+
|
53
|
+
values = replacer.class.new(data)
|
54
|
+
|
55
|
+
datum = find('replace_with_mask').execute([values, specifier, replacer])
|
56
|
+
take_out_element_wise(datum)
|
57
|
+
end
|
58
|
+
|
59
|
+
# (related functions)
|
60
|
+
# fill_null_backward, fill_null_forward
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def booleans?(enum)
|
65
|
+
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,69 +1,128 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
# Not implemented in Red Arrow 8.0.0
|
7
|
+
# divmod, # '%',
|
8
|
+
# true_unless_null
|
9
|
+
|
3
10
|
module RedAmber
|
4
11
|
# mix-ins for class Vector
|
5
12
|
module VectorFunctions
|
6
|
-
#
|
7
|
-
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
8
|
-
|
9
|
-
# [Unary aggregations]: vector.func => Scalar
|
13
|
+
# [Unary aggregations]: vector.func => scalar
|
10
14
|
unary_aggregations =
|
11
|
-
%i[all any approximate_median count count_distinct max mean min
|
12
|
-
product stddev sum variance]
|
15
|
+
%i[all any approximate_median count count_distinct max mean min min_max product stddev sum variance]
|
13
16
|
unary_aggregations.each do |function|
|
14
|
-
define_method(function)
|
17
|
+
define_method(function) do |opts: nil|
|
18
|
+
datum = exec_func_unary(function, options: opts)
|
19
|
+
take_out_scalar(datum)
|
20
|
+
end
|
15
21
|
end
|
22
|
+
alias_method :median, :approximate_median
|
16
23
|
alias_method :count_uniq, :count_distinct
|
17
24
|
|
25
|
+
def unbiased_variance
|
26
|
+
variance(opts: { ddof: 1 })
|
27
|
+
end
|
28
|
+
alias_method :var, :unbiased_variance
|
29
|
+
|
30
|
+
def sd
|
31
|
+
stddev(opts: { ddof: 1 })
|
32
|
+
end
|
33
|
+
alias_method :std, :sd
|
34
|
+
|
18
35
|
# option(s) required
|
19
|
-
# index
|
36
|
+
# - index
|
20
37
|
|
21
38
|
# Returns other than value
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
39
|
+
# - mode
|
40
|
+
# - quantile
|
41
|
+
# - tdigest
|
42
|
+
|
43
|
+
# [Unary element-wise]: vector.func => vector
|
44
|
+
unary_element_wise =
|
45
|
+
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward fill_null_forward floor is_finite
|
46
|
+
is_inf is_nan is_null is_valid round round_to_multiple sign sin tan trunc unique]
|
29
47
|
unary_element_wise.each do |function|
|
30
|
-
define_method(function)
|
48
|
+
define_method(function) do |opts: nil|
|
49
|
+
datum = exec_func_unary(function, options: opts)
|
50
|
+
take_out_element_wise(datum)
|
51
|
+
end
|
31
52
|
end
|
53
|
+
alias_method :is_nil, :is_null
|
32
54
|
|
33
|
-
|
55
|
+
def is_na
|
56
|
+
numeric? ? (is_nil | is_nan) : is_nil
|
57
|
+
end
|
58
|
+
|
59
|
+
alias_method :fill_nil_backward, :fill_null_backward
|
60
|
+
alias_method :fill_nil_forward, :fill_null_forward
|
61
|
+
|
62
|
+
alias_method :sort_indexes, :array_sort_indices
|
63
|
+
alias_method :sort_indices, :array_sort_indices
|
64
|
+
|
65
|
+
alias_method :uniq, :unique
|
66
|
+
|
67
|
+
# [Unary element-wise with operator]: vector.func => vector, op vector
|
34
68
|
unary_element_wise_op = {
|
69
|
+
invert: '!',
|
35
70
|
negate: '-@',
|
36
71
|
}
|
37
72
|
unary_element_wise_op.each do |function, operator|
|
38
|
-
define_method(function)
|
39
|
-
|
40
|
-
|
73
|
+
define_method(function) do |opts: nil|
|
74
|
+
datum = exec_func_unary(function, options: opts)
|
75
|
+
take_out_element_wise(datum)
|
76
|
+
end
|
41
77
|
|
42
|
-
|
78
|
+
define_method(operator) do |opts: nil|
|
79
|
+
datum = exec_func_unary(function, options: opts)
|
80
|
+
take_out_element_wise(datum)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
alias_method :not, :invert
|
43
84
|
|
44
85
|
# NaN support needed
|
45
|
-
#
|
86
|
+
# - acos asin ln log10 log1p log2
|
46
87
|
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
88
|
+
# Functions with numerical range check
|
89
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
90
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
50
91
|
|
51
|
-
# [Binary element-wise]: vector.func(other) =>
|
52
|
-
binary_element_wise =
|
92
|
+
# [Binary element-wise]: vector.func(other) => vector
|
93
|
+
binary_element_wise =
|
94
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
53
95
|
binary_element_wise.each do |function|
|
54
|
-
define_method(function) do |other|
|
55
|
-
|
96
|
+
define_method(function) do |other, opts: nil|
|
97
|
+
datum = exec_func_binary(function, other, options: opts)
|
98
|
+
take_out_element_wise(datum)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# [Logical binary element-wise]: vector.func(other) => vector
|
103
|
+
logical_binary_element_wise = {
|
104
|
+
'&': :and_kleene,
|
105
|
+
and_kleene: :and_kleene,
|
106
|
+
and_org: :and,
|
107
|
+
'|': :or_kleene,
|
108
|
+
or_kleene: :or_kleene,
|
109
|
+
or_org: :or,
|
110
|
+
}
|
111
|
+
logical_binary_element_wise.each do |method, function|
|
112
|
+
define_method(method) do |other, opts: nil|
|
113
|
+
datum = exec_func_binary(function, other, options: opts)
|
114
|
+
take_out_element_wise(datum)
|
56
115
|
end
|
57
116
|
end
|
58
117
|
|
59
118
|
# NaN support needed
|
60
|
-
# logb
|
119
|
+
# - logb
|
61
120
|
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
121
|
+
# Functions with numerical range check
|
122
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
123
|
+
# shift_left_checked shift_right_checked
|
65
124
|
|
66
|
-
# [Binary element-wise with operator]: vector.func(other) =>
|
125
|
+
# [Binary element-wise with operator]: vector.func(other) => vector
|
67
126
|
binary_element_wise_op = {
|
68
127
|
add: '+',
|
69
128
|
divide: '/',
|
@@ -71,9 +130,7 @@ module RedAmber
|
|
71
130
|
power: '**',
|
72
131
|
subtract: '-',
|
73
132
|
|
74
|
-
|
75
|
-
bit_wise_or: '|',
|
76
|
-
bit_wise_xor: '^',
|
133
|
+
xor: '^',
|
77
134
|
shift_left: '<<',
|
78
135
|
shift_right: '>>',
|
79
136
|
|
@@ -85,11 +142,14 @@ module RedAmber
|
|
85
142
|
not_equal: '!=',
|
86
143
|
}
|
87
144
|
binary_element_wise_op.each do |function, operator|
|
88
|
-
define_method(function) do |other|
|
89
|
-
|
145
|
+
define_method(function) do |other, opts: nil|
|
146
|
+
datum = exec_func_binary(function, other, options: opts)
|
147
|
+
take_out_element_wise(datum)
|
90
148
|
end
|
91
|
-
|
92
|
-
|
149
|
+
|
150
|
+
define_method(operator) do |other, opts: nil|
|
151
|
+
datum = exec_func_binary(function, other, options: opts)
|
152
|
+
take_out_element_wise(datum)
|
93
153
|
end
|
94
154
|
end
|
95
155
|
alias_method :eq, :equal
|
@@ -99,16 +159,11 @@ module RedAmber
|
|
99
159
|
alias_method :lt, :less
|
100
160
|
alias_method :ne, :not_equal
|
101
161
|
|
102
|
-
# mod: '%',
|
103
|
-
|
104
162
|
# (array functions)
|
105
|
-
# array_filter,
|
106
|
-
# dictionary_encode,
|
107
|
-
# hash_count, hash_count_distinct, hash_distinct, hash_max, hash_mean, hash_min,
|
108
|
-
# hash_min_max, hash_product, hash_stddev, hash_sum, hash_tdigest, hash_variance,
|
163
|
+
# array_filter, array_take
|
164
|
+
# dictionary_encode,
|
109
165
|
# partition_nth_indices,
|
110
|
-
# quarter, quarters_between,
|
111
|
-
# value_counts
|
166
|
+
# quarter, quarters_between,
|
112
167
|
|
113
168
|
# (strings)
|
114
169
|
# ascii_capitalize, ascii_center, ascii_is_alnum, ascii_is_alpha, ascii_is_decimal,
|
@@ -143,30 +198,54 @@ module RedAmber
|
|
143
198
|
# choose, index_in, index_in_meta_binary, indices_nonzero
|
144
199
|
|
145
200
|
# (others)
|
146
|
-
# coalesce, drop_null,
|
147
|
-
# filter,
|
201
|
+
# coalesce, drop_null,
|
202
|
+
# filter, is_in, is_in_meta_binary,
|
148
203
|
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
149
|
-
# max_element_wise, min_element_wise, random,
|
204
|
+
# max_element_wise, min_element_wise, random, select_k_unstable,
|
150
205
|
# sort_indices, struct_field, take
|
151
206
|
|
152
207
|
private # =======
|
153
208
|
|
154
|
-
def
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
209
|
+
def exec_func_unary(function, options: nil)
|
210
|
+
find(function).execute([data], options)
|
211
|
+
end
|
212
|
+
|
213
|
+
def exec_func_binary(function, other, options: nil)
|
214
|
+
case other
|
215
|
+
when Vector
|
216
|
+
find(function).execute([data, other.data], options)
|
217
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Array, Numeric, String, TrueClass, FalseClass
|
218
|
+
find(function).execute([data, other], options)
|
219
|
+
else
|
220
|
+
raise VectorArgumentError, "Operand is not supported: #{other.class}"
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def take_out_scalar(datum)
|
225
|
+
output = datum.value
|
226
|
+
case output
|
227
|
+
when Arrow::StringScalar then output.to_s
|
228
|
+
when Arrow::StructScalar
|
229
|
+
output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
|
230
|
+
else
|
231
|
+
output.value
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
def take_out_element_wise(datum)
|
236
|
+
Vector.new(datum.value)
|
237
|
+
end
|
238
|
+
|
239
|
+
module_function # ======
|
240
|
+
|
241
|
+
def find(function_name)
|
242
|
+
Arrow::Function.find(function_name)
|
243
|
+
end
|
244
|
+
|
245
|
+
# temporary API until RedAmber document prepared.
|
246
|
+
def arrow_doc(function_name)
|
247
|
+
f = find(function_name)
|
248
|
+
"#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
|
170
249
|
end
|
171
250
|
end
|
172
251
|
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -3,9 +3,14 @@
|
|
3
3
|
require 'arrow'
|
4
4
|
require 'rover-df'
|
5
5
|
|
6
|
-
require_relative 'red_amber/
|
6
|
+
require_relative 'red_amber/data_frame_displayable'
|
7
|
+
require_relative 'red_amber/data_frame_helper'
|
8
|
+
require_relative 'red_amber/data_frame_indexable'
|
7
9
|
require_relative 'red_amber/data_frame_selectable'
|
10
|
+
require_relative 'red_amber/data_frame_observation_operation'
|
11
|
+
require_relative 'red_amber/data_frame_variable_operation'
|
8
12
|
require_relative 'red_amber/data_frame'
|
13
|
+
require_relative 'red_amber/vector_compensable'
|
9
14
|
require_relative 'red_amber/vector_functions'
|
10
15
|
require_relative 'red_amber/vector'
|
11
16
|
require_relative 'red_amber/version'
|
@@ -15,4 +20,7 @@ module RedAmber
|
|
15
20
|
|
16
21
|
class DataFrameArgumentError < ArgumentError; end
|
17
22
|
class DataFrameTypeError < TypeError; end
|
23
|
+
|
24
|
+
class VectorArgumentError < ArgumentError; end
|
25
|
+
class VectorTypeError < TypeError; end
|
18
26
|
end
|
data/red_amber.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
11
|
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
-
spec.description = 'RedAmber is a simple dataframe library
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
|
13
13
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 2.7'
|
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '>=
|
34
|
-
spec.add_dependency 'red-parquet', '>=
|
33
|
+
spec.add_dependency 'red-arrow', '>= 8.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '>= 8.0.0'
|
35
35
|
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
36
|
|
37
37
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
38
38
|
|
39
39
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
40
|
-
|
41
|
-
# For more information and examples about making a new gem, check out our
|
42
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
43
40
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 8.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 8.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: red-parquet
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 8.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 8.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rover-df
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,8 +52,8 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.3.0
|
55
|
-
description: RedAmber is a simple dataframe library
|
56
|
-
|
55
|
+
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
56
|
+
by Red Arrow.
|
57
57
|
email:
|
58
58
|
- heronshoes877@gmail.com
|
59
59
|
executables: []
|
@@ -67,12 +67,39 @@ files:
|
|
67
67
|
- LICENSE
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
|
+
- benchmark/csv_load_penguins.yml
|
71
|
+
- benchmark/drop_nil.yml
|
70
72
|
- doc/CODE_OF_CONDUCT.md
|
73
|
+
- doc/DataFrame.md
|
74
|
+
- doc/Vector.md
|
75
|
+
- doc/image/arrow_table_new.png
|
76
|
+
- doc/image/dataframe/assign.png
|
77
|
+
- doc/image/dataframe/drop.png
|
78
|
+
- doc/image/dataframe/pick.png
|
79
|
+
- doc/image/dataframe/remove.png
|
80
|
+
- doc/image/dataframe/rename.png
|
81
|
+
- doc/image/dataframe/slice.png
|
82
|
+
- doc/image/dataframe_model.png
|
83
|
+
- doc/image/example_in_red_arrow.png
|
84
|
+
- doc/image/tdr.png
|
85
|
+
- doc/image/tdr_and_table.png
|
86
|
+
- doc/image/tidy_data_in_TDR.png
|
87
|
+
- doc/image/vector/binary_element_wise.png
|
88
|
+
- doc/image/vector/unary_aggregation.png
|
89
|
+
- doc/image/vector/unary_aggregation_w_option.png
|
90
|
+
- doc/image/vector/unary_element_wise.png
|
91
|
+
- doc/tdr.md
|
92
|
+
- doc/tdr_ja.md
|
71
93
|
- lib/red_amber.rb
|
72
94
|
- lib/red_amber/data_frame.rb
|
73
|
-
- lib/red_amber/
|
95
|
+
- lib/red_amber/data_frame_displayable.rb
|
96
|
+
- lib/red_amber/data_frame_helper.rb
|
97
|
+
- lib/red_amber/data_frame_indexable.rb
|
98
|
+
- lib/red_amber/data_frame_observation_operation.rb
|
74
99
|
- lib/red_amber/data_frame_selectable.rb
|
100
|
+
- lib/red_amber/data_frame_variable_operation.rb
|
75
101
|
- lib/red_amber/vector.rb
|
102
|
+
- lib/red_amber/vector_compensable.rb
|
76
103
|
- lib/red_amber/vector_functions.rb
|
77
104
|
- lib/red_amber/version.rb
|
78
105
|
- red_amber.gemspec
|
@@ -1,116 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'stringio'
|
4
|
-
|
5
|
-
module RedAmber
|
6
|
-
# mix-ins for the class DataFrame
|
7
|
-
module DataFrameOutput
|
8
|
-
def to_s
|
9
|
-
@table.to_s
|
10
|
-
end
|
11
|
-
|
12
|
-
# def describe() end
|
13
|
-
|
14
|
-
# def summary() end
|
15
|
-
|
16
|
-
def inspect_raw
|
17
|
-
format "#<#{self.class}:0x%016x>\n#{self}", object_id
|
18
|
-
end
|
19
|
-
|
20
|
-
# - tally_level: max level to use tally mode
|
21
|
-
# - max_element: max element to show values in each row
|
22
|
-
# TODO: Is it better to change name other than `inspect` ?
|
23
|
-
# TODO: Add na count capability
|
24
|
-
# TODO: Fall back to inspect_raw when treating large dataset
|
25
|
-
# TODO: Refactor code to smaller methods
|
26
|
-
def inspect(tally_level: 5, max_element: 5)
|
27
|
-
return '#<RedAmber::DataFrame (empty)>' if empty?
|
28
|
-
|
29
|
-
stringio = StringIO.new # output string buffer
|
30
|
-
|
31
|
-
# 1st row: show shape of the dataframe
|
32
|
-
r = pl(nrow)
|
33
|
-
c = pl(ncol)
|
34
|
-
stringio.puts \
|
35
|
-
"#{self.class} : #{nrow} observation#{r}(row#{r}) of #{ncol} variable#{c}(column#{c})"
|
36
|
-
|
37
|
-
# 2nd row: show var counts by type
|
38
|
-
type_groups = data_types.map { |t| type_group(t) }
|
39
|
-
|
40
|
-
stringio.puts "Variable#{pl(ncol)} : #{var_type_count(type_groups).join(', ')}"
|
41
|
-
|
42
|
-
# 3rd row: print header of rows
|
43
|
-
levels = vectors.map { |v| v.to_a.uniq.size }
|
44
|
-
row_headers = { idx: '#', key: 'key', type: 'type', levels: 'level', data: 'data_preview' }
|
45
|
-
# find longest word to adjust column width
|
46
|
-
w_idx = ncol.to_s.size
|
47
|
-
w_key = (keys.map { |key| key.size + 1 } << row_headers[:key].size).max
|
48
|
-
w_type = (types.map(&:size) << row_headers[:type].size).max
|
49
|
-
w_row = (levels.map { |l| l.to_s.size } << row_headers[:levels].size).max
|
50
|
-
stringio.printf("%-#{w_idx}s %-#{w_key}s %-#{w_type}s %-#{w_row}s %s\n", *row_headers.values)
|
51
|
-
|
52
|
-
# (4) show details for each column (vector)
|
53
|
-
vectors.each.with_index(1) do |vector, i|
|
54
|
-
key = keys[i - 1]
|
55
|
-
type = types[i - 1]
|
56
|
-
type_group = type_groups[i - 1]
|
57
|
-
data_tally = vector.tally
|
58
|
-
|
59
|
-
str = format("%#{w_row}d ", data_tally.size)
|
60
|
-
str <<
|
61
|
-
case type_group
|
62
|
-
when :numeric, :string, :boolean
|
63
|
-
if data_tally.size <= tally_level && data_tally.size != nrow
|
64
|
-
data_tally.to_s
|
65
|
-
else
|
66
|
-
reduced_vector_presentation(vector, nrow, max_element)
|
67
|
-
end
|
68
|
-
# c = vector.is_na.tally[1] # release when `#is_na` impremented
|
69
|
-
# str << " #{c} NaN#{pl(c)}" if c&.>(0) # safely call c>0
|
70
|
-
else
|
71
|
-
reduced_vector_presentation(vector, nrow, max_element)
|
72
|
-
end
|
73
|
-
|
74
|
-
stringio.printf("%#{w_idx}d %-#{w_key}s %-#{w_type}s %s\n", i, ":#{key}", type, str)
|
75
|
-
end
|
76
|
-
|
77
|
-
stringio.string
|
78
|
-
end
|
79
|
-
|
80
|
-
private # =====
|
81
|
-
|
82
|
-
def pl(num)
|
83
|
-
num > 1 ? 's' : ''
|
84
|
-
end
|
85
|
-
|
86
|
-
def type_group(type)
|
87
|
-
if Arrow::NumericDataType >= type
|
88
|
-
:numeric
|
89
|
-
elsif Arrow::StringDataType >= type
|
90
|
-
:string
|
91
|
-
elsif Arrow::BooleanDataType >= type
|
92
|
-
:boolean
|
93
|
-
elsif Arrow::TemporalDataType >= type
|
94
|
-
:temporal
|
95
|
-
else
|
96
|
-
:other
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def var_type_count(type_groups)
|
101
|
-
tg = type_groups.tally
|
102
|
-
a = []
|
103
|
-
a << "#{tg[:numeric]} numeric" if tg[:numeric]
|
104
|
-
a << "#{tg[:string]} string#{pl(tg[:string])}" if tg[:string]
|
105
|
-
a << "#{tg[:boolean]} boolean" if tg[:boolean]
|
106
|
-
a << "#{tg[:temporal]} temporal" if tg[:temporal]
|
107
|
-
a
|
108
|
-
end
|
109
|
-
|
110
|
-
def reduced_vector_presentation(vector, nrow, max_element)
|
111
|
-
a = vector.to_a.take(max_element)
|
112
|
-
a << '...' if nrow > max_element
|
113
|
-
"[#{a.join(', ')}]"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|