red_amber 0.4.0 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,9 @@ require 'stringio'
5
5
  module RedAmber
6
6
  # Mix-in for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ # Refineme class String
9
+ using RefineString
10
+
8
11
  # Used internally to display table.
9
12
  INDEX_KEY = :index_key_for_format_table
10
13
  private_constant :INDEX_KEY
@@ -25,19 +28,20 @@ module RedAmber
25
28
  # puts penguins.to_s
26
29
  #
27
30
  # # =>
28
- # species island bill_length_mm bill_depth_mm flipper_length_mm ... year
29
- # <string> <string> <double> <double> <uint8> ... <uint16>
30
- # 0 Adelie Torgersen 39.1 18.7 181 ... 2007
31
- # 1 Adelie Torgersen 39.5 17.4 186 ... 2007
32
- # 2 Adelie Torgersen 40.3 18.0 195 ... 2007
33
- # 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
34
- # 4 Adelie Torgersen 36.7 19.3 193 ... 2007
35
- # : : : : : : ... :
36
- # 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
37
- # 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
38
- # 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
39
- #
40
- def to_s(width: 80, head: 5, tail: 3)
31
+ # species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
32
+ # <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
33
+ # 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
34
+ # 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
35
+ # 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
36
+ # 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
37
+ # 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
38
+ # : : : : : : : ... :
39
+ # 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
40
+ # 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
41
+ # 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
42
+ # 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
43
+ #
44
+ def to_s(width: 90, head: 5, tail: 4)
41
45
  return '' if empty?
42
46
 
43
47
  format_table(width: width, head: head, tail: tail)
@@ -52,8 +56,7 @@ module RedAmber
52
56
  # @return [DataFrame]
53
57
  # a new dataframe.
54
58
  # @example Statistical summary of penguins dataset
55
- # # needs more width to show all stats in this example
56
- # puts penguins.summary.to_s(width: 82)
59
+ # puts penguins.summary.to_s
57
60
  #
58
61
  # # =>
59
62
  # variables count mean std min 25% median 75% max
@@ -84,13 +87,19 @@ module RedAmber
84
87
  # Show information of self.
85
88
  #
86
89
  # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
87
- # - If it is 'TDR', returns class, shape and transposed preview by 3 rows.
88
- # - If it is 'MINIMUM', returns class and shape.
89
- # - If it is 'TABLE' or otherwise, returns class, shape and Table preview.
90
+ # - If it is 'TDR', returns class name, shape, object id
91
+ # and transposed preview for up to 10 variables.
92
+ # - If it is 'TDRA', returns class name, shape, object id
93
+ # and transposed preview for all variables.
94
+ # - If it is 'MINIMUM', returns class name and shape.
95
+ # - If it is 'PLAIN', returns class name, shape and Table preview
96
+ # for up to 512 columns and 128 columns.
97
+ # - If it is 'TABLE' or otherwise, returns class name, shape, object id
98
+ # and Table preview for up to 512 rows and 512 columns.
90
99
  # Default value of the ENV is 'Table'.
91
100
  # @return [String]
92
101
  # information of self.
93
- # @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
102
+ # @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
94
103
  # puts df.inspect
95
104
  #
96
105
  # # =>
@@ -121,11 +130,15 @@ module RedAmber
121
130
  mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
122
131
  case mode.upcase
123
132
  when 'TDR'
124
- "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
133
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
134
+ when 'TDRA'
135
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
125
136
  when 'MINIMUM'
126
137
  shape_str
138
+ when 'PLAIN'
139
+ "#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
127
140
  else
128
- "#<#{shape_str(with_id: true)}>\n#{self}"
141
+ "#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
129
142
  end
130
143
  end
131
144
 
@@ -147,18 +160,34 @@ module RedAmber
147
160
  # diamonds
148
161
  #
149
162
  # # =>
150
- # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x000000000000c314>
151
- # index carat cut color clarity depth table price ... z
152
- # <uint16> <double> <string> <string> <string> <double> <double> <uint16> ... <double>
153
- # 0 0 0.23 Ideal E SI2 61.5 55.0 326 ... 2.43
154
- # 1 1 0.21 Premium E SI1 59.8 61.0 326 ... 2.31
155
- # 2 2 0.23 Good E VS1 56.9 65.0 327 ... 2.31
156
- # 3 3 0.29 Premium I VS2 62.4 58.0 334 ... 2.63
157
- # 4 4 0.31 Good J SI2 63.3 58.0 335 ... 2.75
158
- # : : : : : : : : : ... :
159
- # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 ... 3.56
160
- # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 ... 3.74
161
- # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 ... 3.64
163
+ # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
164
+ # index carat cut color clarity depth table price x y z
165
+ # <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
166
+ # 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
167
+ # 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
168
+ # 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
169
+ # 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
170
+ # 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
171
+ # 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
172
+ # 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
173
+ # 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
174
+ # 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
175
+ # 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
176
+ # 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
177
+ # 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
178
+ # 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
179
+ # 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
180
+ # 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
181
+ # 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
182
+ # 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
183
+ # 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
184
+ # 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
185
+ # 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
186
+ # : : : : : : : : : : : :
187
+ # 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
188
+ # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
189
+ # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
190
+ # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
162
191
  #
163
192
  # diamonds.tdr
164
193
  #
@@ -266,6 +295,7 @@ module RedAmber
266
295
  # - If it is 'MINIMUM', returns shape by plain text.
267
296
  # - If it is 'PLAIN', returns `#inspect` value by plain text.
268
297
  # - If it is 'TDR', returns shape and transposed preview by plain text.
298
+ # - If it is 'TDRA', returns shape and transposed preview by plain text.
269
299
  # - If it is 'TABLE' or otherwise, returns Table preview by html format.
270
300
  # Default value of the ENV is 'TABLE'.
271
301
  # @return [String]
@@ -283,6 +313,8 @@ module RedAmber
283
313
  ['text/plain', shape_str]
284
314
  when 'TDR'
285
315
  size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
316
+ when 'TDRA'
317
+ ['text/plain', tdr_str(:all)]
286
318
  else # 'TABLE'
287
319
  ['text/html', html_table]
288
320
  end
@@ -325,7 +357,7 @@ module RedAmber
325
357
  quoted_keys = keys.map(&:inspect)
326
358
  headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
327
359
  data: 'data_preview' }
328
- header_format = make_header_format(levels, headers, quoted_keys)
360
+ header_format = make_header_format(levels, headers, quoted_keys, limit)
329
361
 
330
362
  sio = StringIO.new # output string buffer
331
363
  sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
@@ -355,9 +387,9 @@ module RedAmber
355
387
  sio.string
356
388
  end
357
389
 
358
- def make_header_format(levels, headers, quoted_keys)
390
+ def make_header_format(levels, headers, quoted_keys, limit)
359
391
  # find longest word to adjust width
360
- w_idx = n_keys.to_s.size
392
+ w_idx = ([n_keys, limit].min - 1).to_s.size
361
393
  w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
362
394
  w_type = [types.map(&:size).max, headers[:type].size].max
363
395
  w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
@@ -386,10 +418,17 @@ module RedAmber
386
418
  end
387
419
 
388
420
  def shorthand(vector, size, max_element)
389
- max = vector.temporal? ? 2 : max_element
390
- a = vector.to_a.take(max)
391
- a.map! { |e| e.nil? ? 'nil' : e.inspect }
392
- a << '... ' if size > max
421
+ a = vector.to_a.take(max_element)
422
+ a.map! do |e|
423
+ if e.nil?
424
+ 'nil'
425
+ elsif vector.temporal?
426
+ e.to_s.inspect
427
+ else
428
+ e.inspect
429
+ end
430
+ end
431
+ a << '... ' if size > max_element
393
432
  "[#{a.join(', ')}]"
394
433
  end
395
434
 
@@ -437,7 +476,7 @@ module RedAmber
437
476
  end
438
477
  end
439
478
 
440
- width_list = df.vectors.map { |v| v.to_a.map(&:length).max }
479
+ width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
441
480
  total_length = width_list[-1] # reserved for last column
442
481
 
443
482
  formats = []
@@ -446,14 +485,13 @@ module RedAmber
446
485
  w = width_list[i]
447
486
  if total_length + w > width && i < df.n_keys - 1
448
487
  row_ellipsis = i
449
- formats << '%3s'
450
- formats << format_for_column(df.vectors[-1], original, width_list[-1])
488
+ formats << 3
489
+ formats << format_width(df.vectors[-1], original, width_list[-1])
451
490
  break
452
491
  end
453
- formats << format_for_column(v, original, w)
492
+ formats << format_width(v, original, w)
454
493
  total_length += w
455
494
  end
456
- format_str = formats.join(' ')
457
495
 
458
496
  str = StringIO.new
459
497
  if row_ellipsis
@@ -462,22 +500,31 @@ module RedAmber
462
500
  end
463
501
 
464
502
  df.to_a.each do |row|
465
- str.puts format(format_str, *row).rstrip
503
+ a =
504
+ row.zip(formats).map do |elem, format|
505
+ non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
+ if format.negative?
507
+ elem.ljust(-format + non_ascii_diff)
508
+ else
509
+ elem.rjust(format + non_ascii_diff)
510
+ end
511
+ end
512
+ str.puts a.join(' ').rstrip
466
513
  end
467
514
 
468
515
  str.string
469
516
  end
470
517
 
471
- def format_for_column(vector, original, width)
518
+ def format_width(vector, original, width)
472
519
  if vector.key != INDEX_KEY && !original[vector.key].numeric?
473
- "%-#{width}s"
520
+ -width
474
521
  else
475
- "%#{width}s"
522
+ width
476
523
  end
477
524
  end
478
525
 
479
526
  def html_table
480
- reduced = size > 8 ? self[0..4, -4..-1] : self
527
+ reduced = size > 10 ? self[0..5, -5..-1] : self
481
528
 
482
529
  converted = reduced.assign do
483
530
  vectors.select.with_object({}) do |vector, assigner|
@@ -497,12 +544,14 @@ module RedAmber
497
544
  format('%g', element)
498
545
  in Integer
499
546
  format('%d', element)
547
+ else
548
+ element
500
549
  end
501
550
  end
502
551
  end
503
552
  end
504
553
 
505
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
554
+ html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
506
555
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
507
556
  end
508
557
  end
@@ -49,9 +49,9 @@ module RedAmber
49
49
  #
50
50
  # @param sort_keys [Arrow::SortKey]
51
51
  # :key, "key" or "+key" denotes ascending,
52
- # "-key" denotes descending order
52
+ # :"-key" or "-key" denotes descending order.
53
53
  # @return [RedAmber::Vector]
54
- # sorted indices in Vector
54
+ # sorted indices in Vector.
55
55
  # @example
56
56
  # df
57
57
  #
@@ -79,9 +79,9 @@ module RedAmber
79
79
  #
80
80
  # @param sort_keys [Arrow::SortKey]
81
81
  # :key, "key" or "+key" denotes ascending,
82
- # "-key" denotes descending order
82
+ # :"-key" or "-key" denotes descending order.
83
83
  # @return [RedAmber::DataFrame]
84
- # sorted DataFrame
84
+ # sorted DataFrame.
85
85
  # @example Sort by a key
86
86
  # df
87
87
  #
@@ -3,7 +3,7 @@
3
3
  module RedAmber
4
4
  # Mix-in for the class DataFrame
5
5
  module DataFrameReshaping
6
- # Create a transposed DataFrame for the wide (messy) DataFrame.
6
+ # Create a transposed DataFrame for the wide (may be messy) DataFrame.
7
7
  #
8
8
  # @param key [Symbol]
9
9
  # key of the index column
@@ -177,10 +177,7 @@ module RedAmber
177
177
  # [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
178
178
  #
179
179
  def v(key)
180
- unless key.is_a?(Symbol) || key.is_a?(String)
181
- raise DataFrameArgumentError, "Key is not a Symbol or a String: [#{key}]"
182
- end
183
- raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key? key
180
+ raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key?(key)
184
181
 
185
182
  variables[key.to_sym]
186
183
  end
@@ -146,6 +146,8 @@ module RedAmber
146
146
  picker.compact!
147
147
  raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!
148
148
 
149
+ return self if picker == keys
150
+
149
151
  DataFrame.create(@table.select_columns(*picker))
150
152
  end
151
153
 
@@ -254,7 +256,7 @@ module RedAmber
254
256
 
255
257
  args = [instance_eval(&block)]
256
258
  end
257
- return self if args.empty? || empty?
259
+ return self if args.compact.empty? || empty?
258
260
 
259
261
  picker =
260
262
  if args.symbol?
@@ -265,7 +267,9 @@ module RedAmber
265
267
  keys.reject_by_indices(args)
266
268
  else
267
269
  dropper = parse_args(args, n_keys)
268
- if dropper.boolean?
270
+ if dropper.compact.empty?
271
+ return self
272
+ elsif dropper.boolean?
269
273
  keys.reject_by_booleans(dropper)
270
274
  elsif dropper.symbol?
271
275
  keys - dropper
@@ -646,6 +650,7 @@ module RedAmber
646
650
  unless not_existing_keys.empty?
647
651
  raise DataFrameArgumentError, "Not existing: #{not_existing_keys}"
648
652
  end
653
+ return self if key_pairs.all? { |k, v| k == v }
649
654
 
650
655
  fields =
651
656
  keys.map do |key|
@@ -42,8 +42,7 @@ module RedAmber
42
42
 
43
43
  table = @group.aggregate(*build_aggregation_keys("hash_#{function}",
44
44
  summary_keys))
45
- g = @group_keys.map(&:to_s)
46
- DataFrame.new(table[g + (table.keys - g)])
45
+ DataFrame.new(table[@group_keys + (table.keys - @group_keys)])
47
46
  end
48
47
  end
49
48
  end
@@ -61,11 +60,11 @@ module RedAmber
61
60
  #
62
61
  # # =>
63
62
  # #<RedAmber::Group : 0x000000000000f410>
64
- # species group_count
65
- # <string> <uint8>
66
- # 0 Adelie 152
67
- # 1 Chinstrap 68
68
- # 2 Gentoo 124
63
+ # species count
64
+ # <string> <uint8>
65
+ # 0 Adelie 152
66
+ # 1 Chinstrap 68
67
+ # 2 Gentoo 124
69
68
  #
70
69
  def initialize(dataframe, *group_keys)
71
70
  @dataframe = dataframe
@@ -187,14 +186,14 @@ module RedAmber
187
186
  #
188
187
  # # =>
189
188
  # #<RedAmber::Group : 0x0000000000003a98>
190
- # species group_count
191
- # <string> <uint8>
192
- # 0 Adelie 152
193
- # 1 Chinstrap 68
194
- # 2 Gentoo 124
189
+ # species count
190
+ # <string> <uint8>
191
+ # 0 Adelie 152
192
+ # 1 Chinstrap 68
193
+ # 2 Gentoo 124
195
194
  #
196
195
  def inspect
197
- "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
196
+ "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{count(@group_keys)}"
198
197
  end
199
198
 
200
199
  # Summarize Group by aggregation functions from the block.
@@ -211,11 +210,11 @@ module RedAmber
211
210
  #
212
211
  # # =>
213
212
  # #<RedAmber::Group : 0x000000000000c314>
214
- # species group_count
215
- # <string> <uint8>
216
- # 0 Adelie 152
217
- # 1 Chinstrap 68
218
- # 2 Gentoo 124
213
+ # species count
214
+ # <string> <uint8>
215
+ # 0 Adelie 152
216
+ # 1 Chinstrap 68
217
+ # 2 Gentoo 124
219
218
  #
220
219
  # group.summarize { mean(:bill_length_mm) }
221
220
  #
@@ -28,19 +28,19 @@ module RedAmber
28
28
  # parsed flat Array.
29
29
  # @note This method is recursively called to parse.
30
30
  #
31
- def parse_args(args, array_size)
31
+ def parse_args(args, array_size, symbolize: true)
32
32
  args.flat_map do |elem|
33
33
  case elem
34
34
  when Integer, Symbol, NilClass, TrueClass, FalseClass
35
35
  elem
36
36
  when Array
37
- parse_args(elem, array_size)
37
+ parse_args(elem, array_size, symbolize: symbolize)
38
38
  when Range
39
39
  parse_range(elem, array_size)
40
40
  when Enumerator
41
- parse_args(Array(elem), array_size)
41
+ parse_args(Array(elem), array_size, symbolize: symbolize)
42
42
  when String
43
- elem.to_sym
43
+ symbolize ? elem.to_sym : elem
44
44
  when Float
45
45
  elem.floor.to_i
46
46
  else
@@ -143,7 +143,7 @@ module RedAmber
143
143
  module RefineArrowTable
144
144
  refine Arrow::Table do
145
145
  def keys
146
- columns.map(&:name)
146
+ columns.map { |column| column.name.to_sym }
147
147
  end
148
148
 
149
149
  def key?(key)
@@ -202,5 +202,18 @@ module RedAmber
202
202
  end
203
203
  end
204
204
 
205
- private_constant :RefineArray, :RefineArrayLike, :RefineArrowTable, :RefineHash
205
+ # Add additional capabilities to String
206
+ module RefineString
207
+ refine String do
208
+ def width
209
+ chars
210
+ .partition(&:ascii_only?)
211
+ .map.with_index(1) { |a, i| a.size * i }
212
+ .sum
213
+ end
214
+ end
215
+ end
216
+
217
+ private_constant :RefineArray, :RefineArrayLike, :RefineArrowTable,
218
+ :RefineHash, :RefineString
206
219
  end