red_amber 0.1.2 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +21 -10
  3. data/CHANGELOG.md +162 -6
  4. data/Gemfile +3 -0
  5. data/README.md +89 -303
  6. data/benchmark/csv_load_penguins.yml +15 -0
  7. data/benchmark/drop_nil.yml +11 -0
  8. data/doc/DataFrame.md +840 -0
  9. data/doc/Vector.md +317 -0
  10. data/doc/image/arrow_table_new.png +0 -0
  11. data/doc/image/dataframe/assign.png +0 -0
  12. data/doc/image/dataframe/drop.png +0 -0
  13. data/doc/image/dataframe/pick.png +0 -0
  14. data/doc/image/dataframe/remove.png +0 -0
  15. data/doc/image/dataframe/rename.png +0 -0
  16. data/doc/image/dataframe/slice.png +0 -0
  17. data/doc/image/dataframe_model.png +0 -0
  18. data/doc/image/example_in_red_arrow.png +0 -0
  19. data/doc/image/tdr.png +0 -0
  20. data/doc/image/tdr_and_table.png +0 -0
  21. data/doc/image/tidy_data_in_TDR.png +0 -0
  22. data/doc/image/vector/binary_element_wise.png +0 -0
  23. data/doc/image/vector/unary_aggregation.png +0 -0
  24. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  25. data/doc/image/vector/unary_element_wise.png +0 -0
  26. data/doc/tdr.md +56 -0
  27. data/doc/tdr_ja.md +56 -0
  28. data/lib/red_amber/data_frame.rb +68 -35
  29. data/lib/red_amber/data_frame_displayable.rb +132 -0
  30. data/lib/red_amber/data_frame_helper.rb +64 -0
  31. data/lib/red_amber/data_frame_indexable.rb +38 -0
  32. data/lib/red_amber/data_frame_observation_operation.rb +83 -0
  33. data/lib/red_amber/data_frame_selectable.rb +34 -43
  34. data/lib/red_amber/data_frame_variable_operation.rb +133 -0
  35. data/lib/red_amber/vector.rb +58 -6
  36. data/lib/red_amber/vector_compensable.rb +68 -0
  37. data/lib/red_amber/vector_functions.rb +147 -68
  38. data/lib/red_amber/version.rb +1 -1
  39. data/lib/red_amber.rb +9 -1
  40. data/red_amber.gemspec +3 -6
  41. metadata +36 -9
  42. data/lib/red_amber/data_frame_output.rb +0 -116
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
+ # reference: https://arrow.apache.org/docs/cpp/compute.html
5
+
6
+ module RedAmber
7
+ # mix-ins for class Vector
8
+ # Functions to make up some data (especially missing) for new data.
9
+ module VectorCompensable
10
+ # [Ternary]: replace_with(booleans, replacements) => vector
11
+ # Replace items selected with a boolean mask
12
+ #
13
+ # (from Arrow C++ inline doc.)
14
+ # Given an array and a boolean mask (either scalar or of equal length),
15
+ # along with replacement values (either scalar or array),
16
+ # each element of the array for which the corresponding mask element is
17
+ # true will be replaced by the next value from the replacements,
18
+ # or with null if the mask is null.
19
+ # Hence, for replacement arrays, len(replacements) == sum(mask == true).
20
+
21
+ def replace_with(booleans, replacements = nil)
22
+ specifier =
23
+ if booleans.is_a?(Arrow::BooleanArray)
24
+ booleans
25
+ elsif booleans.is_a?(Vector) && booleans.boolean?
26
+ booleans.data
27
+ elsif booleans.is_a?(Array) && booleans?(booleans)
28
+ Arrow::BooleanArray.new(booleans)
29
+ else
30
+ raise VectorTypeError, 'Not a valid type'
31
+ end
32
+ raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
33
+ raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
34
+
35
+ r = Array(replacements) # scalar to [scalar]
36
+ r = [nil] if r.empty?
37
+
38
+ replacer =
39
+ if r.size == 1
40
+ case replacements
41
+ when Arrow::Array then replacements
42
+ when Vector then replacements.data
43
+ else
44
+ Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
45
+ end
46
+ else
47
+ Arrow::Array.new(r)
48
+ end
49
+ replacer = data.class.new(replacer) if replacer.uniq == [nil]
50
+
51
+ raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
52
+
53
+ values = replacer.class.new(data)
54
+
55
+ datum = find('replace_with_mask').execute([values, specifier, replacer])
56
+ take_out_element_wise(datum)
57
+ end
58
+
59
+ # (related functions)
60
+ # fill_null_backward, fill_null_forward
61
+
62
+ private
63
+
64
+ def booleans?(enum)
65
+ enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
66
+ end
67
+ end
68
+ end
@@ -1,69 +1,128 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
+ # reference: https://arrow.apache.org/docs/cpp/compute.html
5
+
6
+ # Not implemented in Red Arrow 8.0.0
7
+ # divmod, # '%',
8
+ # true_unless_null
9
+
3
10
  module RedAmber
4
11
  # mix-ins for class Vector
5
12
  module VectorFunctions
6
- # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
7
- # reference: https://arrow.apache.org/docs/cpp/compute.html
8
-
9
- # [Unary aggregations]: vector.func => Scalar
13
+ # [Unary aggregations]: vector.func => scalar
10
14
  unary_aggregations =
11
- %i[all any approximate_median count count_distinct max mean min \
12
- product stddev sum variance]
15
+ %i[all any approximate_median count count_distinct max mean min min_max product stddev sum variance]
13
16
  unary_aggregations.each do |function|
14
- define_method(function) { exec_func(function, other: nil, options: { aggregate: true }) }
17
+ define_method(function) do |opts: nil|
18
+ datum = exec_func_unary(function, options: opts)
19
+ take_out_scalar(datum)
20
+ end
15
21
  end
22
+ alias_method :median, :approximate_median
16
23
  alias_method :count_uniq, :count_distinct
17
24
 
25
+ def unbiased_variance
26
+ variance(opts: { ddof: 1 })
27
+ end
28
+ alias_method :var, :unbiased_variance
29
+
30
+ def sd
31
+ stddev(opts: { ddof: 1 })
32
+ end
33
+ alias_method :std, :sd
34
+
18
35
  # option(s) required
19
- # index
36
+ # - index
20
37
 
21
38
  # Returns other than value
22
- # min_max
23
- # mode
24
- # quantile
25
- # tdigest
26
-
27
- # [Unary element-wise]: vector.func => Vector
28
- unary_element_wise = %i[abs atan ceil cos floor sign sin tan trunc]
39
+ # - mode
40
+ # - quantile
41
+ # - tdigest
42
+
43
+ # [Unary element-wise]: vector.func => vector
44
+ unary_element_wise =
45
+ %i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward fill_null_forward floor is_finite
46
+ is_inf is_nan is_null is_valid round round_to_multiple sign sin tan trunc unique]
29
47
  unary_element_wise.each do |function|
30
- define_method(function) { exec_func(function, other: nil, options: {}) }
48
+ define_method(function) do |opts: nil|
49
+ datum = exec_func_unary(function, options: opts)
50
+ take_out_element_wise(datum)
51
+ end
31
52
  end
53
+ alias_method :is_nil, :is_null
32
54
 
33
- # [Unary element-wise with operator]: vector.func => Vector
55
+ def is_na
56
+ numeric? ? (is_nil | is_nan) : is_nil
57
+ end
58
+
59
+ alias_method :fill_nil_backward, :fill_null_backward
60
+ alias_method :fill_nil_forward, :fill_null_forward
61
+
62
+ alias_method :sort_indexes, :array_sort_indices
63
+ alias_method :sort_indices, :array_sort_indices
64
+
65
+ alias_method :uniq, :unique
66
+
67
+ # [Unary element-wise with operator]: vector.func => vector, op vector
34
68
  unary_element_wise_op = {
69
+ invert: '!',
35
70
  negate: '-@',
36
71
  }
37
72
  unary_element_wise_op.each do |function, operator|
38
- define_method(function) { exec_func(function, other: nil, options: {}) }
39
- define_method(operator) { exec_func(function, other: nil, options: {}) }
40
- end
73
+ define_method(function) do |opts: nil|
74
+ datum = exec_func_unary(function, options: opts)
75
+ take_out_element_wise(datum)
76
+ end
41
77
 
42
- # bit_wise_not => '!', invert, round, round_to_multiple
78
+ define_method(operator) do |opts: nil|
79
+ datum = exec_func_unary(function, options: opts)
80
+ take_out_element_wise(datum)
81
+ end
82
+ end
83
+ alias_method :not, :invert
43
84
 
44
85
  # NaN support needed
45
- # %i[acos asin ln log10 log1p log2]
86
+ # - acos asin ln log10 log1p log2
46
87
 
47
- # With numerical range check
48
- # %i[abs_checked acos_checked asin_checked cos_checked ln_checked \
49
- # log10_checked log1p_checked log2_checked sin_checked tan_checked]
88
+ # Functions with numerical range check
89
+ # - abs_checked acos_checked asin_checked cos_checked ln_checked
90
+ # log10_checked log1p_checked log2_checked sin_checked tan_checked
50
91
 
51
- # [Binary element-wise]: vector.func(other) => Vector
52
- binary_element_wise = %i[atan2 and and_kleene and_not and_not_kleene or or_kleene xor]
92
+ # [Binary element-wise]: vector.func(other) => vector
93
+ binary_element_wise =
94
+ %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
53
95
  binary_element_wise.each do |function|
54
- define_method(function) do |other|
55
- exec_func(function, other: other, options: {})
96
+ define_method(function) do |other, opts: nil|
97
+ datum = exec_func_binary(function, other, options: opts)
98
+ take_out_element_wise(datum)
99
+ end
100
+ end
101
+
102
+ # [Logical binary element-wise]: vector.func(other) => vector
103
+ logical_binary_element_wise = {
104
+ '&': :and_kleene,
105
+ and_kleene: :and_kleene,
106
+ and_org: :and,
107
+ '|': :or_kleene,
108
+ or_kleene: :or_kleene,
109
+ or_org: :or,
110
+ }
111
+ logical_binary_element_wise.each do |method, function|
112
+ define_method(method) do |other, opts: nil|
113
+ datum = exec_func_binary(function, other, options: opts)
114
+ take_out_element_wise(datum)
56
115
  end
57
116
  end
58
117
 
59
118
  # NaN support needed
60
- # logb
119
+ # - logb
61
120
 
62
- # With numerical range check
63
- # %i[add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked \
64
- # shift_left_checked shift_right_checked]
121
+ # Functions with numerical range check
122
+ # - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
123
+ # shift_left_checked shift_right_checked
65
124
 
66
- # [Binary element-wise with operator]: vector.func(other) => Vector
125
+ # [Binary element-wise with operator]: vector.func(other) => vector
67
126
  binary_element_wise_op = {
68
127
  add: '+',
69
128
  divide: '/',
@@ -71,9 +130,7 @@ module RedAmber
71
130
  power: '**',
72
131
  subtract: '-',
73
132
 
74
- bit_wise_and: '&',
75
- bit_wise_or: '|',
76
- bit_wise_xor: '^',
133
+ xor: '^',
77
134
  shift_left: '<<',
78
135
  shift_right: '>>',
79
136
 
@@ -85,11 +142,14 @@ module RedAmber
85
142
  not_equal: '!=',
86
143
  }
87
144
  binary_element_wise_op.each do |function, operator|
88
- define_method(function) do |other|
89
- exec_func(function, other: other, options: {})
145
+ define_method(function) do |other, opts: nil|
146
+ datum = exec_func_binary(function, other, options: opts)
147
+ take_out_element_wise(datum)
90
148
  end
91
- define_method(operator) do |other|
92
- exec_func(function, other: other, options: {})
149
+
150
+ define_method(operator) do |other, opts: nil|
151
+ datum = exec_func_binary(function, other, options: opts)
152
+ take_out_element_wise(datum)
93
153
  end
94
154
  end
95
155
  alias_method :eq, :equal
@@ -99,16 +159,11 @@ module RedAmber
99
159
  alias_method :lt, :less
100
160
  alias_method :ne, :not_equal
101
161
 
102
- # mod: '%',
103
-
104
162
  # (array functions)
105
- # array_filter, array_sort_indices, array_take
106
- # dictionary_encode, hash_all, hash_any, hash_approximate_median,
107
- # hash_count, hash_count_distinct, hash_distinct, hash_max, hash_mean, hash_min,
108
- # hash_min_max, hash_product, hash_stddev, hash_sum, hash_tdigest, hash_variance,
163
+ # array_filter, array_take
164
+ # dictionary_encode,
109
165
  # partition_nth_indices,
110
- # quarter, quarters_between, unique,
111
- # value_counts
166
+ # quarter, quarters_between,
112
167
 
113
168
  # (strings)
114
169
  # ascii_capitalize, ascii_center, ascii_is_alnum, ascii_is_alpha, ascii_is_decimal,
@@ -143,30 +198,54 @@ module RedAmber
143
198
  # choose, index_in, index_in_meta_binary, indices_nonzero
144
199
 
145
200
  # (others)
146
- # coalesce, drop_null, fill_null_backward, fill_null_forward,
147
- # filter, is_finite, is_in, is_in_meta_binary, is_inf, is_nan, is_null, is_valid,
201
+ # coalesce, drop_null,
202
+ # filter, is_in, is_in_meta_binary,
148
203
  # list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
149
- # max_element_wise, min_element_wise, random, replace_with_mask, select_k_unstable,
204
+ # max_element_wise, min_element_wise, random, select_k_unstable,
150
205
  # sort_indices, struct_field, take
151
206
 
152
207
  private # =======
153
208
 
154
- def exec_func(function, other: nil, options: {})
155
- func = Arrow::Function.find(function)
156
- output =
157
- case other
158
- when nil
159
- func.execute([data])
160
- when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Numeric
161
- func.execute([data, other])
162
- when Vector
163
- func.execute([data, other.data])
164
- when Rover::Vector
165
- func.execute([data, other.to_a])
166
- else
167
- raise ArgumentError, "Operand is not supported: #{other.class}"
168
- end
169
- options[:aggregate] ? output.value : Vector.new(output.value)
209
+ def exec_func_unary(function, options: nil)
210
+ find(function).execute([data], options)
211
+ end
212
+
213
+ def exec_func_binary(function, other, options: nil)
214
+ case other
215
+ when Vector
216
+ find(function).execute([data, other.data], options)
217
+ when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Array, Numeric, String, TrueClass, FalseClass
218
+ find(function).execute([data, other], options)
219
+ else
220
+ raise VectorArgumentError, "Operand is not supported: #{other.class}"
221
+ end
222
+ end
223
+
224
+ def take_out_scalar(datum)
225
+ output = datum.value
226
+ case output
227
+ when Arrow::StringScalar then output.to_s
228
+ when Arrow::StructScalar
229
+ output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
230
+ else
231
+ output.value
232
+ end
233
+ end
234
+
235
+ def take_out_element_wise(datum)
236
+ Vector.new(datum.value)
237
+ end
238
+
239
+ module_function # ======
240
+
241
+ def find(function_name)
242
+ Arrow::Function.find(function_name)
243
+ end
244
+
245
+ # temporary API until RedAmber document prepared.
246
+ def arrow_doc(function_name)
247
+ f = find(function_name)
248
+ "#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
170
249
  end
171
250
  end
172
251
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.1.2'
4
+ VERSION = '0.1.5'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -3,9 +3,14 @@
3
3
  require 'arrow'
4
4
  require 'rover-df'
5
5
 
6
- require_relative 'red_amber/data_frame_output'
6
+ require_relative 'red_amber/data_frame_displayable'
7
+ require_relative 'red_amber/data_frame_helper'
8
+ require_relative 'red_amber/data_frame_indexable'
7
9
  require_relative 'red_amber/data_frame_selectable'
10
+ require_relative 'red_amber/data_frame_observation_operation'
11
+ require_relative 'red_amber/data_frame_variable_operation'
8
12
  require_relative 'red_amber/data_frame'
13
+ require_relative 'red_amber/vector_compensable'
9
14
  require_relative 'red_amber/vector_functions'
10
15
  require_relative 'red_amber/vector'
11
16
  require_relative 'red_amber/version'
@@ -15,4 +20,7 @@ module RedAmber
15
20
 
16
21
  class DataFrameArgumentError < ArgumentError; end
17
22
  class DataFrameTypeError < TypeError; end
23
+
24
+ class VectorArgumentError < ArgumentError; end
25
+ class VectorTypeError < TypeError; end
18
26
  end
data/red_amber.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ['heronshoes877@gmail.com']
10
10
 
11
11
  spec.summary = 'Simple dataframe library for Ruby'
12
- spec.description = 'RedAmber is a simple dataframe library powered by Red Arrow with simple API similar to Rover-df.'
12
+ spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
13
13
  spec.homepage = 'https://github.com/heronshoes/red_amber'
14
14
  spec.license = 'MIT'
15
15
  spec.required_ruby_version = '>= 2.7'
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'red-arrow', '>= 7.0.0'
34
- spec.add_dependency 'red-parquet', '>= 7.0.0'
33
+ spec.add_dependency 'red-arrow', '>= 8.0.0'
34
+ spec.add_dependency 'red-parquet', '>= 8.0.0'
35
35
  spec.add_dependency 'rover-df', '~> 0.3.0'
36
36
 
37
37
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
38
38
 
39
39
  spec.metadata['rubygems_mfa_required'] = 'true'
40
-
41
- # For more information and examples about making a new gem, check out our
42
- # guide at: https://bundler.io/guides/creating_gem.html
43
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-08 00:00:00.000000000 Z
11
+ date: 2022-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 7.0.0
19
+ version: 8.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 7.0.0
26
+ version: 8.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: red-parquet
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 7.0.0
33
+ version: 8.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 7.0.0
40
+ version: 8.0.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rover-df
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,8 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.3.0
55
- description: RedAmber is a simple dataframe library powered by Red Arrow with simple
56
- API similar to Rover-df.
55
+ description: RedAmber is a simple dataframe library inspired by Rover-df and powered
56
+ by Red Arrow.
57
57
  email:
58
58
  - heronshoes877@gmail.com
59
59
  executables: []
@@ -67,12 +67,39 @@ files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  - Rakefile
70
+ - benchmark/csv_load_penguins.yml
71
+ - benchmark/drop_nil.yml
70
72
  - doc/CODE_OF_CONDUCT.md
73
+ - doc/DataFrame.md
74
+ - doc/Vector.md
75
+ - doc/image/arrow_table_new.png
76
+ - doc/image/dataframe/assign.png
77
+ - doc/image/dataframe/drop.png
78
+ - doc/image/dataframe/pick.png
79
+ - doc/image/dataframe/remove.png
80
+ - doc/image/dataframe/rename.png
81
+ - doc/image/dataframe/slice.png
82
+ - doc/image/dataframe_model.png
83
+ - doc/image/example_in_red_arrow.png
84
+ - doc/image/tdr.png
85
+ - doc/image/tdr_and_table.png
86
+ - doc/image/tidy_data_in_TDR.png
87
+ - doc/image/vector/binary_element_wise.png
88
+ - doc/image/vector/unary_aggregation.png
89
+ - doc/image/vector/unary_aggregation_w_option.png
90
+ - doc/image/vector/unary_element_wise.png
91
+ - doc/tdr.md
92
+ - doc/tdr_ja.md
71
93
  - lib/red_amber.rb
72
94
  - lib/red_amber/data_frame.rb
73
- - lib/red_amber/data_frame_output.rb
95
+ - lib/red_amber/data_frame_displayable.rb
96
+ - lib/red_amber/data_frame_helper.rb
97
+ - lib/red_amber/data_frame_indexable.rb
98
+ - lib/red_amber/data_frame_observation_operation.rb
74
99
  - lib/red_amber/data_frame_selectable.rb
100
+ - lib/red_amber/data_frame_variable_operation.rb
75
101
  - lib/red_amber/vector.rb
102
+ - lib/red_amber/vector_compensable.rb
76
103
  - lib/red_amber/vector_functions.rb
77
104
  - lib/red_amber/version.rb
78
105
  - red_amber.gemspec
@@ -1,116 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'stringio'
4
-
5
- module RedAmber
6
- # mix-ins for the class DataFrame
7
- module DataFrameOutput
8
- def to_s
9
- @table.to_s
10
- end
11
-
12
- # def describe() end
13
-
14
- # def summary() end
15
-
16
- def inspect_raw
17
- format "#<#{self.class}:0x%016x>\n#{self}", object_id
18
- end
19
-
20
- # - tally_level: max level to use tally mode
21
- # - max_element: max element to show values in each row
22
- # TODO: Is it better to change name other than `inspect` ?
23
- # TODO: Add na count capability
24
- # TODO: Fall back to inspect_raw when treating large dataset
25
- # TODO: Refactor code to smaller methods
26
- def inspect(tally_level: 5, max_element: 5)
27
- return '#<RedAmber::DataFrame (empty)>' if empty?
28
-
29
- stringio = StringIO.new # output string buffer
30
-
31
- # 1st row: show shape of the dataframe
32
- r = pl(nrow)
33
- c = pl(ncol)
34
- stringio.puts \
35
- "#{self.class} : #{nrow} observation#{r}(row#{r}) of #{ncol} variable#{c}(column#{c})"
36
-
37
- # 2nd row: show var counts by type
38
- type_groups = data_types.map { |t| type_group(t) }
39
-
40
- stringio.puts "Variable#{pl(ncol)} : #{var_type_count(type_groups).join(', ')}"
41
-
42
- # 3rd row: print header of rows
43
- levels = vectors.map { |v| v.to_a.uniq.size }
44
- row_headers = { idx: '#', key: 'key', type: 'type', levels: 'level', data: 'data_preview' }
45
- # find longest word to adjust column width
46
- w_idx = ncol.to_s.size
47
- w_key = (keys.map { |key| key.size + 1 } << row_headers[:key].size).max
48
- w_type = (types.map(&:size) << row_headers[:type].size).max
49
- w_row = (levels.map { |l| l.to_s.size } << row_headers[:levels].size).max
50
- stringio.printf("%-#{w_idx}s %-#{w_key}s %-#{w_type}s %-#{w_row}s %s\n", *row_headers.values)
51
-
52
- # (4) show details for each column (vector)
53
- vectors.each.with_index(1) do |vector, i|
54
- key = keys[i - 1]
55
- type = types[i - 1]
56
- type_group = type_groups[i - 1]
57
- data_tally = vector.tally
58
-
59
- str = format("%#{w_row}d ", data_tally.size)
60
- str <<
61
- case type_group
62
- when :numeric, :string, :boolean
63
- if data_tally.size <= tally_level && data_tally.size != nrow
64
- data_tally.to_s
65
- else
66
- reduced_vector_presentation(vector, nrow, max_element)
67
- end
68
- # c = vector.is_na.tally[1] # release when `#is_na` impremented
69
- # str << " #{c} NaN#{pl(c)}" if c&.>(0) # safely call c>0
70
- else
71
- reduced_vector_presentation(vector, nrow, max_element)
72
- end
73
-
74
- stringio.printf("%#{w_idx}d %-#{w_key}s %-#{w_type}s %s\n", i, ":#{key}", type, str)
75
- end
76
-
77
- stringio.string
78
- end
79
-
80
- private # =====
81
-
82
- def pl(num)
83
- num > 1 ? 's' : ''
84
- end
85
-
86
- def type_group(type)
87
- if Arrow::NumericDataType >= type
88
- :numeric
89
- elsif Arrow::StringDataType >= type
90
- :string
91
- elsif Arrow::BooleanDataType >= type
92
- :boolean
93
- elsif Arrow::TemporalDataType >= type
94
- :temporal
95
- else
96
- :other
97
- end
98
- end
99
-
100
- def var_type_count(type_groups)
101
- tg = type_groups.tally
102
- a = []
103
- a << "#{tg[:numeric]} numeric" if tg[:numeric]
104
- a << "#{tg[:string]} string#{pl(tg[:string])}" if tg[:string]
105
- a << "#{tg[:boolean]} boolean" if tg[:boolean]
106
- a << "#{tg[:temporal]} temporal" if tg[:temporal]
107
- a
108
- end
109
-
110
- def reduced_vector_presentation(vector, nrow, max_element)
111
- a = vector.to_a.take(max_element)
112
- a << '...' if nrow > max_element
113
- "[#{a.join(', ')}]"
114
- end
115
- end
116
- end