fat_table 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,522 @@
1
+ module FatTable
2
+ # Column objects are a thin wrapper around an Array to allow columns to be
3
+ # summed and have other aggregate operations performed on them, but compacting
4
+ # out nils before proceeding. They are characterized by a header, which gives
5
+ # the Column a name, a type, which limits the kinds of items that can be
6
+ # stored in the Column, and the items themselves, which all must either be nil
7
+ # or objects compatible with the Column's type. The valid types are Boolean,
8
+ # DateTime, Numeric, String, and NilClass, the last of which is used as the
9
+ # initial type until items added to the Column fix its type as one of the
10
+ # others.
11
+ class Column
12
+ # The symbol representing this Column.
13
+ attr_reader :header
14
+
15
+ # The header as provided by the caller before its conversion to a symbol.
16
+ # You can use this to recover the original string form of the header.
17
+ attr_reader :raw_header
18
+
19
+ # A string representing the deduced type of this Column. One of
20
+ # Column::TYPES.
21
+ attr_reader :type
22
+
23
+ # An Array of the items of this Column, all of which must be values of the
24
+ # Columns type or a nil. This Array contains the value of the item after
25
+ # conversion to a native Ruby type, such as TrueClass, Date, DateTime,
26
+ # Integer, String, etc. Thus, you can perform operations on the items,
27
+ # perhaps after removing nils with +.items.compact+.
28
+ attr_reader :items
29
+
30
+ # Valid Column types as strings.
31
+ TYPES = %w(NilClass Boolean DateTime Numeric String).freeze
32
+
33
+ # :category: Constructors
34
+
35
+ # Create a new Column with the given +header+ and initialized with the given
36
+ # +items+, as an array of either strings or ruby objects that are one of the
37
+ # permissible types or strings parsable as one of the permissible types. If
38
+ # no +items+ are passed, returns an empty Column to which items may be added
39
+ # with the Column#<< method. The item types must be one of the following types or
40
+ # strings parseable as one of them:
41
+ #
42
+ # Boolean::
43
+ # an object of type TrueClass or FalseClass or a string that is either
44
+ # 't', 'true', 'y', 'yes', 'f', 'false', 'n', or 'no', in each case,
45
+ # regardless of case.
46
+ #
47
+ # DateTime::
48
+ # an object of class Date, DateTime, or a string that matches
49
+ # +/\d\d\d\d[-\/]\d\d?[-\/]\d\d?/+ and is parseable by DateTime.parse.
50
+ #
51
+ # Numeric::
52
+ # on object that is of class Numeric, or a string that looks
53
+ # like a number after removing '+$+', '+,+', and '+_+' as well as Rationals
54
+ # in the form /<number>:<number>/ or <number>/<number>, where <number>
55
+ # is an integer.
56
+ #
57
+ # String::
58
+ # if the object is a non-blank string that does not parse as any
59
+ # of the foregoing, it its treated as a Sting type, and once a column
60
+ # is typed as such, blank strings represent blank strings rather than
61
+ # nil values.
62
+ #
63
+ # NilClass::
64
+ # until a Column sees an item that qualifies as one of the
65
+ # foregoing, it is typed as NilClass, meaning that the type is
66
+ # undetermined. Until a column obtains a type, blank strings are
67
+ # treated as nils and do not affect the type of the column. After a
68
+ # column acquires a type, blank strings are treated as nil values
69
+ # except in the case of String columns, which retain them a blank
70
+ # strings.
71
+ #
72
+ # Examples:
73
+ #
74
+ # require 'fat_table'
75
+ # col = FatTable::Column.new(header: 'date')
76
+ # col << Date.today - 30
77
+ # col << '2017-05-04'
78
+ # col.type #=> 'DateTime'
79
+ # col.header #=> :date
80
+ # nums = [35.25, 18, '35:14', '$18_321']
81
+ # col = FatTable::Column.new(header: :prices, items: nums)
82
+ # col.type #=> 'Numeric'
83
+ # col.header #=> :prices
84
+ # col.sum #=> 18376.75
85
+ def initialize(header:, items: [])
86
+ @raw_header = header
87
+ @header =
88
+ if @raw_header.is_a?(Symbol)
89
+ @raw_header
90
+ else
91
+ @raw_header.to_s.as_sym
92
+ end
93
+ @type = 'NilClass'
94
+ raise UserError, "Unknown column type '#{type}" unless TYPES.include?(@type.to_s)
95
+ @items = []
96
+ items.each { |i| self << i }
97
+ end
98
+
99
+ ##########################################################################
100
+ # Attributes
101
+ ##########################################################################
102
+
103
+ # :category: Attributes
104
+
105
+ # Return the item of the Column at the given index.
106
+ def [](k)
107
+ items[k]
108
+ end
109
+
110
+ # :category: Attributes
111
+
112
+ # Return a dupped Array of this Column's items. To get the non-dupped items,
113
+ # just use the .items accessor.
114
+ def to_a
115
+ items.deep_dup
116
+ end
117
+
118
+ # :category: Attributes
119
+
120
+ # Return the size of the Column, including any nils.
121
+ def size
122
+ items.size
123
+ end
124
+
125
+ # :category: Attributes
126
+
127
+ # Return true if there are no items in the Column.
128
+ def empty?
129
+ items.empty?
130
+ end
131
+
132
+ # :category: Attributes
133
+
134
+ # Return the index of the last item in the Column.
135
+ def last_i
136
+ size - 1
137
+ end
138
+
139
+ ##########################################################################
140
+ # Enumerable
141
+ ##########################################################################
142
+
143
+ include Enumerable
144
+
145
+ # :category: Attributes
146
+
147
+ # Yield each item in the Column in the order in which they appear in the
148
+ # Column. This makes Columns Enumerable, so all the Enumerable methods are
149
+ # available on a Column.
150
+ def each
151
+ items.each { |itm| yield itm }
152
+ end
153
+
154
+ ##########################################################################
155
+ # Aggregates
156
+ ##########################################################################
157
+
158
+ # :category: Aggregates
159
+
160
+ # The names of the known aggregate operations that can be performed on a
161
+ # Column.
162
+ VALID_AGGREGATES = %s(first last rng
163
+ sum count min max avg var dev
164
+ any? all? none? one?)
165
+
166
+ # :category: Aggregates
167
+
168
+ # Return the first non-nil item in the Column. Works with any Column type.
169
+ def first
170
+ items.compact.first
171
+ end
172
+
173
+ # :category: Aggregates
174
+
175
+ # Return the last non-nil item in the Column. Works with any Column type.
176
+ def last
177
+ items.compact.last
178
+ end
179
+
180
+ # :category: Aggregates
181
+
182
+ # Return a string of the #first and #last non-nil values in the Column.
183
+ # Works with any Column type.
184
+ def rng
185
+ "#{first}..#{last}"
186
+ end
187
+
188
+ # :category: Aggregates
189
+
190
+ # Return the sum of the non-nil items in the Column. Works with numeric and
191
+ # string Columns. For a string Column, it will return the concatenation of
192
+ # the non-nil items.
193
+ def sum
194
+ only_with('sum', 'Numeric', 'String')
195
+ items.compact.sum
196
+ end
197
+
198
+ # :category: Aggregates
199
+
200
+ # Return a count of the non-nil items in the Column. Works with any Column
201
+ # type.
202
+ def count
203
+ items.compact.count.to_d
204
+ end
205
+
206
+ # :category: Aggregates
207
+
208
+ # Return the smallest non-nil item in the Column. Works with numeric,
209
+ # string, and datetime Columns.
210
+ def min
211
+ only_with('min', 'NilClass', 'Numeric', 'String', 'DateTime')
212
+ items.compact.min
213
+ end
214
+
215
+ # :category: Aggregates
216
+
217
+ # Return the largest non-nil item in the Column. Works with numeric,
218
+ # string, and datetime Columns.
219
+ def max
220
+ only_with('max', 'NilClass', 'Numeric', 'String', 'DateTime')
221
+ items.compact.max
222
+ end
223
+
224
+ # :category: Aggregates
225
+
226
+ # Return the average value of the non-nil items in the Column. Works with
227
+ # numeric and datetime Columns. For datetime Columns, it converts each date
228
+ # to its Julian day number, computes the average, and then converts the
229
+ # average back to a DateTime.
230
+ def avg
231
+ only_with('avg', 'DateTime', 'Numeric')
232
+ if type == 'DateTime'
233
+ avg_jd = items.compact.map(&:jd).sum / items.compact.size.to_d
234
+ DateTime.jd(avg_jd)
235
+ else
236
+ sum / items.compact.size.to_d
237
+ end
238
+ end
239
+
240
+ # :category: Aggregates
241
+
242
+ # Return the sample variance (the unbiased estimator of the population
243
+ # variance using a divisor of N-1) as the average squared deviation from the
244
+ # mean, of the non-nil items in the Column. Works with numeric and datetime
245
+ # Columns. For datetime Columns, it converts each date to its Julian day
246
+ # number and computes the variance of those numbers.
247
+ def var
248
+ only_with('var', 'DateTime', 'Numeric')
249
+ all_items =
250
+ if type == 'DateTime'
251
+ items.compact.map(&:jd)
252
+ else
253
+ items.compact
254
+ end
255
+ n = count
256
+ return BigDecimal('0.0') if n <= 1
257
+ mu = Column.new(header: :mu, items: all_items).avg
258
+ sq_dev = BigDecimal('0.0')
259
+ all_items.each do |itm|
260
+ sq_dev += (itm - mu) * (itm - mu)
261
+ end
262
+ sq_dev / (n - 1)
263
+ end
264
+
265
+ # :category: Aggregates
266
+
267
+ # Return the population variance (the biased estimator of the population
268
+ # variance using a divisor of N) as the average squared deviation from the
269
+ # mean, of the non-nil items in the Column. Works with numeric and datetime
270
+ # Columns. For datetime Columns, it converts each date to its Julian day
271
+ # number and computes the variance of those numbers.
272
+ def pvar
273
+ only_with('var', 'DateTime', 'Numeric')
274
+ n = items.compact.size.to_d
275
+ return BigDecimal('0.0') if n <= 1
276
+ var * ((n - 1) / n)
277
+ end
278
+
279
+ # :category: Aggregates
280
+
281
+ # Return the sample standard deviation (the unbiased estimator of the
282
+ # population standard deviation using a divisor of N-1) as the square root
283
+ # of the sample variance, of the non-nil items in the Column. Works with
284
+ # numeric and datetime Columns. For datetime Columns, it converts each date
285
+ # to its Julian day number and computes the standard deviation of those
286
+ # numbers.
287
+ def dev
288
+ only_with('dev', 'DateTime', 'Numeric')
289
+ var.sqrt(20)
290
+ end
291
+
292
+ # :category: Aggregates
293
+
294
+ # Return the population standard deviation (the biased estimator of the
295
+ # population standard deviation using a divisor of N) as the square root of
296
+ # the population variance, of the non-nil items in the Column. Works with
297
+ # numeric and datetime Columns. For datetime Columns, it converts each date
298
+ # to its Julian day number and computes the standard deviation of those
299
+ # numbers.
300
+ def pdev
301
+ only_with('dev', 'DateTime', 'Numeric')
302
+ Math.sqrt(pvar)
303
+ end
304
+
305
+ # :category: Aggregates
306
+
307
+ # Return true if any of the items in the Column are true; otherwise return
308
+ # false. Works only with boolean Columns.
309
+ def any?
310
+ only_with('any?', 'Boolean')
311
+ items.compact.any?
312
+ end
313
+
314
+ # :category: Aggregates
315
+
316
+ # Return true if all of the items in the Column are true; otherwise return
317
+ # false. Works only with boolean Columns.
318
+ def all?
319
+ only_with('all?', 'Boolean')
320
+ items.compact.all?
321
+ end
322
+
323
+ # :category: Aggregates
324
+
325
+ # Return true if none of the items in the Column are true; otherwise return
326
+ # false. Works only with boolean Columns.
327
+ def none?
328
+ only_with('none?', 'Boolean')
329
+ items.compact.none?
330
+ end
331
+
332
+ # :category: Aggregates
333
+
334
+ # Return true if precisely one of the items in the Column is true;
335
+ # otherwise return false. Works only with boolean Columns.
336
+ def one?
337
+ only_with('one?', 'Boolean')
338
+ items.compact.one?
339
+ end
340
+
341
+ private
342
+
343
+ def only_with(agg, *valid_types)
344
+ return self if valid_types.include?(type)
345
+ raise UserError, "Aggregate '#{agg}' cannot be applied to a #{type} column"
346
+ end
347
+
348
+ public
349
+
350
+ ##########################################################################
351
+ # Construction
352
+ ##########################################################################
353
+
354
+ # :category: Constructors
355
+
356
+ # Append +itm+ to end of the Column after converting it to the Column's
357
+ # type. If the Column's type is still open, i.e. NilClass, attempt to fix
358
+ # the Column's type based on the type of +itm+ as with Column.new.
359
+ def <<(itm)
360
+ items << convert_to_type(itm)
361
+ end
362
+
363
+ # :category: Constructors
364
+
365
+ # Return a new Column appending the items of other to this Column's items,
366
+ # checking for type compatibility. Use the header of this Column as the
367
+ # header of the new Column.
368
+ def +(other)
369
+ raise UserError, 'Cannot combine columns with different types' unless type == other.type
370
+ Column.new(header: header, items: items + other.items)
371
+ end
372
+
373
+ private
374
+
375
+ # Convert val to the type of key, a ruby class constant, such as Date,
376
+ # Numeric, etc. If type is NilClass, the type is open, and a non-blank val
377
+ # will attempt conversion to one of the allowed types, typing it as a String
378
+ # if no other type is recognized. If the val is blank, and the type is nil,
379
+ # the Column type remains open. If the val is nil or a blank and the type is
380
+ # already determined, the val is set to nil, and should be filtered from any
381
+ # Column computations. If the val is non-blank and the Column type
382
+ # determined, raise an error if the val cannot be converted to the Column
383
+ # type. Otherwise, returns the converted val as an object of the correct
384
+ # class.
385
+ def convert_to_type(val)
386
+ case type
387
+ when 'NilClass'
388
+ if val != false && val.blank?
389
+ # Leave the type of the Column open. Unfortunately, false counts as
390
+ # blank and we don't want it to. It should be classified as a boolean.
391
+ new_val = nil
392
+ else
393
+ # Only non-blank values are allowed to set the type of the Column
394
+ bool_val = convert_to_boolean(val)
395
+ new_val =
396
+ if bool_val.nil?
397
+ convert_to_date_time(val) ||
398
+ convert_to_numeric(val) ||
399
+ convert_to_string(val)
400
+ else
401
+ bool_val
402
+ end
403
+ @type =
404
+ if new_val == true || new_val == false
405
+ 'Boolean'
406
+ elsif new_val.is_a?(Date) || new_val.is_a?(DateTime)
407
+ 'DateTime'
408
+ elsif new_val.is_a?(Numeric)
409
+ 'Numeric'
410
+ elsif new_val.is_a?(String)
411
+ 'String'
412
+ else
413
+ raise UserError, "Cannot add #{val} of type #{new_val.class.name} to a column"
414
+ end
415
+ end
416
+ new_val
417
+ when 'Boolean'
418
+ if (val.is_a?(String) && val.blank? || val.nil?)
419
+ nil
420
+ else
421
+ new_val = convert_to_boolean(val)
422
+ if new_val.nil?
423
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
424
+ end
425
+ new_val
426
+ end
427
+ when 'DateTime'
428
+ if val.blank?
429
+ nil
430
+ else
431
+ new_val = convert_to_date_time(val)
432
+ if new_val.nil?
433
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
434
+ end
435
+ new_val
436
+ end
437
+ when 'Numeric'
438
+ if val.blank?
439
+ nil
440
+ else
441
+ new_val = convert_to_numeric(val)
442
+ if new_val.nil?
443
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
444
+ end
445
+ new_val
446
+ end
447
+ when 'String'
448
+ if val.nil?
449
+ nil
450
+ else
451
+ new_val = convert_to_string(val)
452
+ if new_val.nil?
453
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
454
+ end
455
+ new_val
456
+ end
457
+ else
458
+ raise UserError, "Mysteriously, column has unknown type '#{type}'"
459
+ end
460
+ end
461
+
462
+ # Convert the val to a boolean if it looks like one, otherwise return nil.
463
+ # Any boolean or a string of t, f, true, false, y, n, yes, or no, regardless
464
+ # of case is assumed to be a boolean.
465
+ def convert_to_boolean(val)
466
+ return val if val.is_a?(TrueClass) || val.is_a?(FalseClass)
467
+ val = val.to_s.clean
468
+ return nil if val.blank?
469
+ if val =~ /\A(false|f|n|no)\z/i
470
+ false
471
+ elsif val =~ /\A(true|t|y|yes)\z/i
472
+ true
473
+ end
474
+ end
475
+
476
+ # Convert the val to a DateTime if it is either a DateTime, a Date, or a
477
+ # String that can be parsed as a DateTime, otherwise return nil. It only
478
+ # recognizes strings that contain a something like '2016-01-14' or
479
+ # '2/12/1985' within them, otherwise DateTime.parse would treat many bare
480
+ # numbers as dates, such as '2841381', which it would recognize as a valid
481
+ # date, but the user probably does not intend it to be so treated.
482
+ def convert_to_date_time(val)
483
+ return val if val.is_a?(DateTime)
484
+ return val if val.is_a?(Date)
485
+ begin
486
+ val = val.to_s.clean
487
+ return nil if val.blank?
488
+ return nil unless val =~ %r{\b\d\d\d\d[-/]\d\d?[-/]\d\d?\b}
489
+ val = DateTime.parse(val.to_s.clean)
490
+ val = val.to_date if val.seconds_since_midnight.zero?
491
+ val
492
+ rescue ArgumentError
493
+ return nil
494
+ end
495
+ end
496
+
497
+ # Convert the val to a Numeric if is already a Numberic or is a String that
498
+ # looks like one. Any Float is promoted to a BigDecimal. Otherwise return
499
+ # nil.
500
+ def convert_to_numeric(val)
501
+ return BigDecimal.new(val, Float::DIG) if val.is_a?(Float)
502
+ return val if val.is_a?(Numeric)
503
+ # Eliminate any commas, $'s (or other currency symbol), or _'s.
504
+ cursym = Regexp.quote(FatTable.currency_symbol)
505
+ clean_re = /[,_#{cursym}]/
506
+ val = val.to_s.clean.gsub(clean_re, '')
507
+ return nil if val.blank?
508
+ case val
509
+ when /\A(\d+\.\d*)|(\d*\.\d+)\z/
510
+ BigDecimal.new(val.to_s.clean)
511
+ when /\A[\d]+\z/
512
+ val.to_i
513
+ when %r{\A(\d+)\s*[:/]\s*(\d+)\z}
514
+ Rational($1, $2)
515
+ end
516
+ end
517
+
518
+ def convert_to_string(val)
519
+ val.to_s
520
+ end
521
+ end
522
+ end