red_amber 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,9 @@ require 'stringio'
5
5
  module RedAmber
6
6
  # Mix-in for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ # Refineme class String
9
+ using RefineString
10
+
8
11
  # Used internally to display table.
9
12
  INDEX_KEY = :index_key_for_format_table
10
13
  private_constant :INDEX_KEY
@@ -25,19 +28,20 @@ module RedAmber
25
28
  # puts penguins.to_s
26
29
  #
27
30
  # # =>
28
- # species island bill_length_mm bill_depth_mm flipper_length_mm ... year
29
- # <string> <string> <double> <double> <uint8> ... <uint16>
30
- # 0 Adelie Torgersen 39.1 18.7 181 ... 2007
31
- # 1 Adelie Torgersen 39.5 17.4 186 ... 2007
32
- # 2 Adelie Torgersen 40.3 18.0 195 ... 2007
33
- # 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
34
- # 4 Adelie Torgersen 36.7 19.3 193 ... 2007
35
- # : : : : : : ... :
36
- # 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
37
- # 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
38
- # 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
39
- #
40
- def to_s(width: 80, head: 5, tail: 3)
31
+ # species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year
32
+ # <string> <string> <double> <double> <uint8> <uint16> ... <uint16>
33
+ # 0 Adelie Torgersen 39.1 18.7 181 3750 ... 2007
34
+ # 1 Adelie Torgersen 39.5 17.4 186 3800 ... 2007
35
+ # 2 Adelie Torgersen 40.3 18.0 195 3250 ... 2007
36
+ # 3 Adelie Torgersen (nil) (nil) (nil) (nil) ... 2007
37
+ # 4 Adelie Torgersen 36.7 19.3 193 3450 ... 2007
38
+ # : : : : : : : ... :
39
+ # 340 Gentoo Biscoe 46.8 14.3 215 4850 ... 2009
40
+ # 341 Gentoo Biscoe 50.4 15.7 222 5750 ... 2009
41
+ # 342 Gentoo Biscoe 45.2 14.8 212 5200 ... 2009
42
+ # 343 Gentoo Biscoe 49.9 16.1 213 5400 ... 2009
43
+ #
44
+ def to_s(width: 90, head: 5, tail: 4)
41
45
  return '' if empty?
42
46
 
43
47
  format_table(width: width, head: head, tail: tail)
@@ -52,8 +56,7 @@ module RedAmber
52
56
  # @return [DataFrame]
53
57
  # a new dataframe.
54
58
  # @example Statistical summary of penguins dataset
55
- # # needs more width to show all stats in this example
56
- # puts penguins.summary.to_s(width: 82)
59
+ # puts penguins.summary.to_s
57
60
  #
58
61
  # # =>
59
62
  # variables count mean std min 25% median 75% max
@@ -84,13 +87,19 @@ module RedAmber
84
87
  # Show information of self.
85
88
  #
86
89
  # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
87
- # - If it is 'TDR', returns class, shape and transposed preview by 3 rows.
88
- # - If it is 'MINIMUM', returns class and shape.
89
- # - If it is 'TABLE' or otherwise, returns class, shape and Table preview.
90
+ # - If it is 'TDR', returns class name, shape, object id
91
+ # and transposed preview for up to 10 variables.
92
+ # - If it is 'TDRA', returns class name, shape, object id
93
+ # and transposed preview for all variables.
94
+ # - If it is 'MINIMUM', returns class name and shape.
95
+ # - If it is 'PLAIN', returns class name, shape and Table preview
96
+ # for up to 512 columns and 128 columns.
97
+ # - If it is 'TABLE' or otherwise, returns class name, shape, object id
98
+ # and Table preview for up to 512 rows and 512 columns.
90
99
  # Default value of the ENV is 'Table'.
91
100
  # @return [String]
92
101
  # information of self.
93
- # @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
102
+ # @example Default for ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table'
94
103
  # puts df.inspect
95
104
  #
96
105
  # # =>
@@ -121,11 +130,15 @@ module RedAmber
121
130
  mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
122
131
  case mode.upcase
123
132
  when 'TDR'
124
- "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
133
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(10)}"
134
+ when 'TDRA'
135
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(:all)}"
125
136
  when 'MINIMUM'
126
137
  shape_str
138
+ when 'PLAIN'
139
+ "#<#{shape_str}>\n#{to_s(width: 128, head: 128)}"
127
140
  else
128
- "#<#{shape_str(with_id: true)}>\n#{self}"
141
+ "#<#{shape_str(with_id: true)}>\n#{to_s(width: 100, head: 20)}"
129
142
  end
130
143
  end
131
144
 
@@ -147,18 +160,34 @@ module RedAmber
147
160
  # diamonds
148
161
  #
149
162
  # # =>
150
- # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x000000000000c314>
151
- # index carat cut color clarity depth table price ... z
152
- # <uint16> <double> <string> <string> <string> <double> <double> <uint16> ... <double>
153
- # 0 0 0.23 Ideal E SI2 61.5 55.0 326 ... 2.43
154
- # 1 1 0.21 Premium E SI1 59.8 61.0 326 ... 2.31
155
- # 2 2 0.23 Good E VS1 56.9 65.0 327 ... 2.31
156
- # 3 3 0.29 Premium I VS2 62.4 58.0 334 ... 2.63
157
- # 4 4 0.31 Good J SI2 63.3 58.0 335 ... 2.75
158
- # : : : : : : : : : ... :
159
- # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 ... 3.56
160
- # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 ... 3.74
161
- # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 ... 3.64
163
+ # #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x0000000000035084>
164
+ # index carat cut color clarity depth table price x y z
165
+ # <uint16> <double> <string> <string> <string> <double> <double> <uint16> <double> <double> <double>
166
+ # 0 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
167
+ # 1 1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
168
+ # 2 2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
169
+ # 3 3 0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63
170
+ # 4 4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
171
+ # 5 5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
172
+ # 6 6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
173
+ # 7 7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
174
+ # 8 8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
175
+ # 9 9 0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39
176
+ # 10 10 0.3 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
177
+ # 11 11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.9 2.46
178
+ # 12 12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
179
+ # 13 13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
180
+ # 14 14 0.2 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
181
+ # 15 15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
182
+ # 16 16 0.3 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
183
+ # 17 17 0.3 Good J SI1 63.4 54.0 351 4.23 4.29 2.7
184
+ # 18 18 0.3 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
185
+ # 19 19 0.3 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
186
+ # : : : : : : : : : : : :
187
+ # 53936 53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
188
+ # 53937 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
189
+ # 53938 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
190
+ # 53939 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64
162
191
  #
163
192
  # diamonds.tdr
164
193
  #
@@ -266,6 +295,7 @@ module RedAmber
266
295
  # - If it is 'MINIMUM', returns shape by plain text.
267
296
  # - If it is 'PLAIN', returns `#inspect` value by plain text.
268
297
  # - If it is 'TDR', returns shape and transposed preview by plain text.
298
+ # - If it is 'TDRA', returns shape and transposed preview by plain text.
269
299
  # - If it is 'TABLE' or otherwise, returns Table preview by html format.
270
300
  # Default value of the ENV is 'TABLE'.
271
301
  # @return [String]
@@ -283,6 +313,8 @@ module RedAmber
283
313
  ['text/plain', shape_str]
284
314
  when 'TDR'
285
315
  size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
316
+ when 'TDRA'
317
+ ['text/plain', tdr_str(:all)]
286
318
  else # 'TABLE'
287
319
  ['text/html', html_table]
288
320
  end
@@ -325,7 +357,7 @@ module RedAmber
325
357
  quoted_keys = keys.map(&:inspect)
326
358
  headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
327
359
  data: 'data_preview' }
328
- header_format = make_header_format(levels, headers, quoted_keys)
360
+ header_format = make_header_format(levels, headers, quoted_keys, limit)
329
361
 
330
362
  sio = StringIO.new # output string buffer
331
363
  sio.puts "Vector#{pl(n_keys)} : #{var_type_count(type_groups).join(', ')}"
@@ -355,9 +387,9 @@ module RedAmber
355
387
  sio.string
356
388
  end
357
389
 
358
- def make_header_format(levels, headers, quoted_keys)
390
+ def make_header_format(levels, headers, quoted_keys, limit)
359
391
  # find longest word to adjust width
360
- w_idx = n_keys.to_s.size
392
+ w_idx = ([n_keys, limit].min - 1).to_s.size
361
393
  w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
362
394
  w_type = [types.map(&:size).max, headers[:type].size].max
363
395
  w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
@@ -386,10 +418,17 @@ module RedAmber
386
418
  end
387
419
 
388
420
  def shorthand(vector, size, max_element)
389
- max = vector.temporal? ? 2 : max_element
390
- a = vector.to_a.take(max)
391
- a.map! { |e| e.nil? ? 'nil' : e.inspect }
392
- a << '... ' if size > max
421
+ a = vector.to_a.take(max_element)
422
+ a.map! do |e|
423
+ if e.nil?
424
+ 'nil'
425
+ elsif vector.temporal?
426
+ e.to_s.inspect
427
+ else
428
+ e.inspect
429
+ end
430
+ end
431
+ a << '... ' if size > max_element
393
432
  "[#{a.join(', ')}]"
394
433
  end
395
434
 
@@ -437,7 +476,7 @@ module RedAmber
437
476
  end
438
477
  end
439
478
 
440
- width_list = df.vectors.map { |v| v.to_a.map(&:length).max }
479
+ width_list = df.vectors.map { |v| v.to_a.map(&:width).max }
441
480
  total_length = width_list[-1] # reserved for last column
442
481
 
443
482
  formats = []
@@ -446,14 +485,13 @@ module RedAmber
446
485
  w = width_list[i]
447
486
  if total_length + w > width && i < df.n_keys - 1
448
487
  row_ellipsis = i
449
- formats << '%3s'
450
- formats << format_for_column(df.vectors[-1], original, width_list[-1])
488
+ formats << 3
489
+ formats << format_width(df.vectors[-1], original, width_list[-1])
451
490
  break
452
491
  end
453
- formats << format_for_column(v, original, w)
492
+ formats << format_width(v, original, w)
454
493
  total_length += w
455
494
  end
456
- format_str = formats.join(' ')
457
495
 
458
496
  str = StringIO.new
459
497
  if row_ellipsis
@@ -462,22 +500,31 @@ module RedAmber
462
500
  end
463
501
 
464
502
  df.to_a.each do |row|
465
- str.puts format(format_str, *row).rstrip
503
+ a =
504
+ row.zip(formats).map do |elem, format|
505
+ non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
+ if format.negative?
507
+ elem.ljust(-format + non_ascii_diff)
508
+ else
509
+ elem.rjust(format + non_ascii_diff)
510
+ end
511
+ end
512
+ str.puts a.join(' ').rstrip
466
513
  end
467
514
 
468
515
  str.string
469
516
  end
470
517
 
471
- def format_for_column(vector, original, width)
518
+ def format_width(vector, original, width)
472
519
  if vector.key != INDEX_KEY && !original[vector.key].numeric?
473
- "%-#{width}s"
520
+ -width
474
521
  else
475
- "%#{width}s"
522
+ width
476
523
  end
477
524
  end
478
525
 
479
526
  def html_table
480
- reduced = size > 8 ? self[0..4, -4..-1] : self
527
+ reduced = size > 10 ? self[0..5, -5..-1] : self
481
528
 
482
529
  converted = reduced.assign do
483
530
  vectors.select.with_object({}) do |vector, assigner|
@@ -497,12 +544,14 @@ module RedAmber
497
544
  format('%g', element)
498
545
  in Integer
499
546
  format('%d', element)
547
+ else
548
+ element
500
549
  end
501
550
  end
502
551
  end
503
552
  end
504
553
 
505
- html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
554
+ html = IRuby::HTML.table(converted.to_h, maxrows: 10, maxcols: 15)
506
555
  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
507
556
  end
508
557
  end
@@ -49,9 +49,9 @@ module RedAmber
49
49
  #
50
50
  # @param sort_keys [Arrow::SortKey]
51
51
  # :key, "key" or "+key" denotes ascending,
52
- # "-key" denotes descending order
52
+ # :"-key" or "-key" denotes descending order.
53
53
  # @return [RedAmber::Vector]
54
- # sorted indices in Vector
54
+ # sorted indices in Vector.
55
55
  # @example
56
56
  # df
57
57
  #
@@ -79,9 +79,9 @@ module RedAmber
79
79
  #
80
80
  # @param sort_keys [Arrow::SortKey]
81
81
  # :key, "key" or "+key" denotes ascending,
82
- # "-key" denotes descending order
82
+ # :"-key" or "-key" denotes descending order.
83
83
  # @return [RedAmber::DataFrame]
84
- # sorted DataFrame
84
+ # sorted DataFrame.
85
85
  # @example Sort by a key
86
86
  # df
87
87
  #
@@ -3,7 +3,7 @@
3
3
  module RedAmber
4
4
  # Mix-in for the class DataFrame
5
5
  module DataFrameReshaping
6
- # Create a transposed DataFrame for the wide (messy) DataFrame.
6
+ # Create a transposed DataFrame for the wide (may be messy) DataFrame.
7
7
  #
8
8
  # @param key [Symbol]
9
9
  # key of the index column
@@ -177,10 +177,7 @@ module RedAmber
177
177
  # [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
178
178
  #
179
179
  def v(key)
180
- unless key.is_a?(Symbol) || key.is_a?(String)
181
- raise DataFrameArgumentError, "Key is not a Symbol or a String: [#{key}]"
182
- end
183
- raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key? key
180
+ raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key?(key)
184
181
 
185
182
  variables[key.to_sym]
186
183
  end
@@ -146,6 +146,8 @@ module RedAmber
146
146
  picker.compact!
147
147
  raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!
148
148
 
149
+ return self if picker == keys
150
+
149
151
  DataFrame.create(@table.select_columns(*picker))
150
152
  end
151
153
 
@@ -254,7 +256,7 @@ module RedAmber
254
256
 
255
257
  args = [instance_eval(&block)]
256
258
  end
257
- return self if args.empty? || empty?
259
+ return self if args.compact.empty? || empty?
258
260
 
259
261
  picker =
260
262
  if args.symbol?
@@ -265,7 +267,9 @@ module RedAmber
265
267
  keys.reject_by_indices(args)
266
268
  else
267
269
  dropper = parse_args(args, n_keys)
268
- if dropper.boolean?
270
+ if dropper.compact.empty?
271
+ return self
272
+ elsif dropper.boolean?
269
273
  keys.reject_by_booleans(dropper)
270
274
  elsif dropper.symbol?
271
275
  keys - dropper
@@ -646,6 +650,7 @@ module RedAmber
646
650
  unless not_existing_keys.empty?
647
651
  raise DataFrameArgumentError, "Not existing: #{not_existing_keys}"
648
652
  end
653
+ return self if key_pairs.all? { |k, v| k == v }
649
654
 
650
655
  fields =
651
656
  keys.map do |key|
@@ -42,8 +42,7 @@ module RedAmber
42
42
 
43
43
  table = @group.aggregate(*build_aggregation_keys("hash_#{function}",
44
44
  summary_keys))
45
- g = @group_keys.map(&:to_s)
46
- DataFrame.new(table[g + (table.keys - g)])
45
+ DataFrame.new(table[@group_keys + (table.keys - @group_keys)])
47
46
  end
48
47
  end
49
48
  end
@@ -28,19 +28,19 @@ module RedAmber
28
28
  # parsed flat Array.
29
29
  # @note This method is recursively called to parse.
30
30
  #
31
- def parse_args(args, array_size)
31
+ def parse_args(args, array_size, symbolize: true)
32
32
  args.flat_map do |elem|
33
33
  case elem
34
34
  when Integer, Symbol, NilClass, TrueClass, FalseClass
35
35
  elem
36
36
  when Array
37
- parse_args(elem, array_size)
37
+ parse_args(elem, array_size, symbolize: symbolize)
38
38
  when Range
39
39
  parse_range(elem, array_size)
40
40
  when Enumerator
41
- parse_args(Array(elem), array_size)
41
+ parse_args(Array(elem), array_size, symbolize: symbolize)
42
42
  when String
43
- elem.to_sym
43
+ symbolize ? elem.to_sym : elem
44
44
  when Float
45
45
  elem.floor.to_i
46
46
  else
@@ -143,7 +143,7 @@ module RedAmber
143
143
  module RefineArrowTable
144
144
  refine Arrow::Table do
145
145
  def keys
146
- columns.map(&:name)
146
+ columns.map { |column| column.name.to_sym }
147
147
  end
148
148
 
149
149
  def key?(key)
@@ -202,5 +202,18 @@ module RedAmber
202
202
  end
203
203
  end
204
204
 
205
- private_constant :RefineArray, :RefineArrayLike, :RefineArrowTable, :RefineHash
205
+ # Add additional capabilities to String
206
+ module RefineString
207
+ refine String do
208
+ def width
209
+ chars
210
+ .partition(&:ascii_only?)
211
+ .map.with_index(1) { |a, i| a.size * i }
212
+ .sum
213
+ end
214
+ end
215
+ end
216
+
217
+ private_constant :RefineArray, :RefineArrayLike, :RefineArrowTable,
218
+ :RefineHash, :RefineString
206
219
  end