fat_table 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,522 @@
1
+ module FatTable
2
+ # Column objects are a thin wrapper around an Array to allow columns to be
3
+ # summed and have other aggregate operations performed on them, but compacting
4
+ # out nils before proceeding. They are characterized by a header, which gives
5
+ # the Column a name, a type, which limits the kinds of items that can be
6
+ # stored in the Column, and the items themselves, which all must either be nil
7
+ # or objects compatible with the Column's type. The valid types are Boolean,
8
+ # DateTime, Numeric, String, and NilClass, the last of which is used as the
9
+ # initial type until items added to the Column fix its type as one of the
10
+ # others.
11
+ class Column
12
+ # The symbol representing this Column.
13
+ attr_reader :header
14
+
15
+ # The header as provided by the caller before its conversion to a symbol.
16
+ # You can use this to recover the original string form of the header.
17
+ attr_reader :raw_header
18
+
19
+ # A string representing the deduced type of this Column. One of
20
+ # Column::TYPES.
21
+ attr_reader :type
22
+
23
+ # An Array of the items of this Column, all of which must be values of the
24
+ # Columns type or a nil. This Array contains the value of the item after
25
+ # conversion to a native Ruby type, such as TrueClass, Date, DateTime,
26
+ # Integer, String, etc. Thus, you can perform operations on the items,
27
+ # perhaps after removing nils with +.items.compact+.
28
+ attr_reader :items
29
+
30
+ # Valid Column types as strings.
31
+ TYPES = %w(NilClass Boolean DateTime Numeric String).freeze
32
+
33
+ # :category: Constructors
34
+
35
+ # Create a new Column with the given +header+ and initialized with the given
36
+ # +items+, as an array of either strings or ruby objects that are one of the
37
+ # permissible types or strings parsable as one of the permissible types. If
38
+ # no +items+ are passed, returns an empty Column to which items may be added
39
+ # with the Column#<< method. The item types must be one of the following types or
40
+ # strings parseable as one of them:
41
+ #
42
+ # Boolean::
43
+ # an object of type TrueClass or FalseClass or a string that is either
44
+ # 't', 'true', 'y', 'yes', 'f', 'false', 'n', or 'no', in each case,
45
+ # regardless of case.
46
+ #
47
+ # DateTime::
48
+ # an object of class Date, DateTime, or a string that matches
49
+ # +/\d\d\d\d[-\/]\d\d?[-\/]\d\d?/+ and is parseable by DateTime.parse.
50
+ #
51
+ # Numeric::
52
+ # on object that is of class Numeric, or a string that looks
53
+ # like a number after removing '+$+', '+,+', and '+_+' as well as Rationals
54
+ # in the form /<number>:<number>/ or <number>/<number>, where <number>
55
+ # is an integer.
56
+ #
57
+ # String::
58
+ # if the object is a non-blank string that does not parse as any
59
+ # of the foregoing, it its treated as a Sting type, and once a column
60
+ # is typed as such, blank strings represent blank strings rather than
61
+ # nil values.
62
+ #
63
+ # NilClass::
64
+ # until a Column sees an item that qualifies as one of the
65
+ # foregoing, it is typed as NilClass, meaning that the type is
66
+ # undetermined. Until a column obtains a type, blank strings are
67
+ # treated as nils and do not affect the type of the column. After a
68
+ # column acquires a type, blank strings are treated as nil values
69
+ # except in the case of String columns, which retain them a blank
70
+ # strings.
71
+ #
72
+ # Examples:
73
+ #
74
+ # require 'fat_table'
75
+ # col = FatTable::Column.new(header: 'date')
76
+ # col << Date.today - 30
77
+ # col << '2017-05-04'
78
+ # col.type #=> 'DateTime'
79
+ # col.header #=> :date
80
+ # nums = [35.25, 18, '35:14', '$18_321']
81
+ # col = FatTable::Column.new(header: :prices, items: nums)
82
+ # col.type #=> 'Numeric'
83
+ # col.header #=> :prices
84
+ # col.sum #=> 18376.75
85
+ def initialize(header:, items: [])
86
+ @raw_header = header
87
+ @header =
88
+ if @raw_header.is_a?(Symbol)
89
+ @raw_header
90
+ else
91
+ @raw_header.to_s.as_sym
92
+ end
93
+ @type = 'NilClass'
94
+ raise UserError, "Unknown column type '#{type}" unless TYPES.include?(@type.to_s)
95
+ @items = []
96
+ items.each { |i| self << i }
97
+ end
98
+
99
+ ##########################################################################
100
+ # Attributes
101
+ ##########################################################################
102
+
103
+ # :category: Attributes
104
+
105
+ # Return the item of the Column at the given index.
106
+ def [](k)
107
+ items[k]
108
+ end
109
+
110
+ # :category: Attributes
111
+
112
+ # Return a dupped Array of this Column's items. To get the non-dupped items,
113
+ # just use the .items accessor.
114
+ def to_a
115
+ items.deep_dup
116
+ end
117
+
118
+ # :category: Attributes
119
+
120
+ # Return the size of the Column, including any nils.
121
+ def size
122
+ items.size
123
+ end
124
+
125
+ # :category: Attributes
126
+
127
+ # Return true if there are no items in the Column.
128
+ def empty?
129
+ items.empty?
130
+ end
131
+
132
+ # :category: Attributes
133
+
134
+ # Return the index of the last item in the Column.
135
+ def last_i
136
+ size - 1
137
+ end
138
+
139
+ ##########################################################################
140
+ # Enumerable
141
+ ##########################################################################
142
+
143
+ include Enumerable
144
+
145
+ # :category: Attributes
146
+
147
+ # Yield each item in the Column in the order in which they appear in the
148
+ # Column. This makes Columns Enumerable, so all the Enumerable methods are
149
+ # available on a Column.
150
+ def each
151
+ items.each { |itm| yield itm }
152
+ end
153
+
154
+ ##########################################################################
155
+ # Aggregates
156
+ ##########################################################################
157
+
158
+ # :category: Aggregates
159
+
160
+ # The names of the known aggregate operations that can be performed on a
161
+ # Column.
162
+ VALID_AGGREGATES = %s(first last rng
163
+ sum count min max avg var dev
164
+ any? all? none? one?)
165
+
166
+ # :category: Aggregates
167
+
168
+ # Return the first non-nil item in the Column. Works with any Column type.
169
+ def first
170
+ items.compact.first
171
+ end
172
+
173
+ # :category: Aggregates
174
+
175
+ # Return the last non-nil item in the Column. Works with any Column type.
176
+ def last
177
+ items.compact.last
178
+ end
179
+
180
+ # :category: Aggregates
181
+
182
+ # Return a string of the #first and #last non-nil values in the Column.
183
+ # Works with any Column type.
184
+ def rng
185
+ "#{first}..#{last}"
186
+ end
187
+
188
+ # :category: Aggregates
189
+
190
+ # Return the sum of the non-nil items in the Column. Works with numeric and
191
+ # string Columns. For a string Column, it will return the concatenation of
192
+ # the non-nil items.
193
+ def sum
194
+ only_with('sum', 'Numeric', 'String')
195
+ items.compact.sum
196
+ end
197
+
198
+ # :category: Aggregates
199
+
200
+ # Return a count of the non-nil items in the Column. Works with any Column
201
+ # type.
202
+ def count
203
+ items.compact.count.to_d
204
+ end
205
+
206
+ # :category: Aggregates
207
+
208
+ # Return the smallest non-nil item in the Column. Works with numeric,
209
+ # string, and datetime Columns.
210
+ def min
211
+ only_with('min', 'NilClass', 'Numeric', 'String', 'DateTime')
212
+ items.compact.min
213
+ end
214
+
215
+ # :category: Aggregates
216
+
217
+ # Return the largest non-nil item in the Column. Works with numeric,
218
+ # string, and datetime Columns.
219
+ def max
220
+ only_with('max', 'NilClass', 'Numeric', 'String', 'DateTime')
221
+ items.compact.max
222
+ end
223
+
224
+ # :category: Aggregates
225
+
226
+ # Return the average value of the non-nil items in the Column. Works with
227
+ # numeric and datetime Columns. For datetime Columns, it converts each date
228
+ # to its Julian day number, computes the average, and then converts the
229
+ # average back to a DateTime.
230
+ def avg
231
+ only_with('avg', 'DateTime', 'Numeric')
232
+ if type == 'DateTime'
233
+ avg_jd = items.compact.map(&:jd).sum / items.compact.size.to_d
234
+ DateTime.jd(avg_jd)
235
+ else
236
+ sum / items.compact.size.to_d
237
+ end
238
+ end
239
+
240
+ # :category: Aggregates
241
+
242
+ # Return the sample variance (the unbiased estimator of the population
243
+ # variance using a divisor of N-1) as the average squared deviation from the
244
+ # mean, of the non-nil items in the Column. Works with numeric and datetime
245
+ # Columns. For datetime Columns, it converts each date to its Julian day
246
+ # number and computes the variance of those numbers.
247
+ def var
248
+ only_with('var', 'DateTime', 'Numeric')
249
+ all_items =
250
+ if type == 'DateTime'
251
+ items.compact.map(&:jd)
252
+ else
253
+ items.compact
254
+ end
255
+ n = count
256
+ return BigDecimal('0.0') if n <= 1
257
+ mu = Column.new(header: :mu, items: all_items).avg
258
+ sq_dev = BigDecimal('0.0')
259
+ all_items.each do |itm|
260
+ sq_dev += (itm - mu) * (itm - mu)
261
+ end
262
+ sq_dev / (n - 1)
263
+ end
264
+
265
+ # :category: Aggregates
266
+
267
+ # Return the population variance (the biased estimator of the population
268
+ # variance using a divisor of N) as the average squared deviation from the
269
+ # mean, of the non-nil items in the Column. Works with numeric and datetime
270
+ # Columns. For datetime Columns, it converts each date to its Julian day
271
+ # number and computes the variance of those numbers.
272
+ def pvar
273
+ only_with('var', 'DateTime', 'Numeric')
274
+ n = items.compact.size.to_d
275
+ return BigDecimal('0.0') if n <= 1
276
+ var * ((n - 1) / n)
277
+ end
278
+
279
+ # :category: Aggregates
280
+
281
+ # Return the sample standard deviation (the unbiased estimator of the
282
+ # population standard deviation using a divisor of N-1) as the square root
283
+ # of the sample variance, of the non-nil items in the Column. Works with
284
+ # numeric and datetime Columns. For datetime Columns, it converts each date
285
+ # to its Julian day number and computes the standard deviation of those
286
+ # numbers.
287
+ def dev
288
+ only_with('dev', 'DateTime', 'Numeric')
289
+ var.sqrt(20)
290
+ end
291
+
292
+ # :category: Aggregates
293
+
294
+ # Return the population standard deviation (the biased estimator of the
295
+ # population standard deviation using a divisor of N) as the square root of
296
+ # the population variance, of the non-nil items in the Column. Works with
297
+ # numeric and datetime Columns. For datetime Columns, it converts each date
298
+ # to its Julian day number and computes the standard deviation of those
299
+ # numbers.
300
+ def pdev
301
+ only_with('dev', 'DateTime', 'Numeric')
302
+ Math.sqrt(pvar)
303
+ end
304
+
305
+ # :category: Aggregates
306
+
307
+ # Return true if any of the items in the Column are true; otherwise return
308
+ # false. Works only with boolean Columns.
309
+ def any?
310
+ only_with('any?', 'Boolean')
311
+ items.compact.any?
312
+ end
313
+
314
+ # :category: Aggregates
315
+
316
+ # Return true if all of the items in the Column are true; otherwise return
317
+ # false. Works only with boolean Columns.
318
+ def all?
319
+ only_with('all?', 'Boolean')
320
+ items.compact.all?
321
+ end
322
+
323
+ # :category: Aggregates
324
+
325
+ # Return true if none of the items in the Column are true; otherwise return
326
+ # false. Works only with boolean Columns.
327
+ def none?
328
+ only_with('none?', 'Boolean')
329
+ items.compact.none?
330
+ end
331
+
332
+ # :category: Aggregates
333
+
334
+ # Return true if precisely one of the items in the Column is true;
335
+ # otherwise return false. Works only with boolean Columns.
336
+ def one?
337
+ only_with('one?', 'Boolean')
338
+ items.compact.one?
339
+ end
340
+
341
+ private
342
+
343
+ def only_with(agg, *valid_types)
344
+ return self if valid_types.include?(type)
345
+ raise UserError, "Aggregate '#{agg}' cannot be applied to a #{type} column"
346
+ end
347
+
348
+ public
349
+
350
+ ##########################################################################
351
+ # Construction
352
+ ##########################################################################
353
+
354
+ # :category: Constructors
355
+
356
+ # Append +itm+ to end of the Column after converting it to the Column's
357
+ # type. If the Column's type is still open, i.e. NilClass, attempt to fix
358
+ # the Column's type based on the type of +itm+ as with Column.new.
359
+ def <<(itm)
360
+ items << convert_to_type(itm)
361
+ end
362
+
363
+ # :category: Constructors
364
+
365
+ # Return a new Column appending the items of other to this Column's items,
366
+ # checking for type compatibility. Use the header of this Column as the
367
+ # header of the new Column.
368
+ def +(other)
369
+ raise UserError, 'Cannot combine columns with different types' unless type == other.type
370
+ Column.new(header: header, items: items + other.items)
371
+ end
372
+
373
+ private
374
+
375
+ # Convert val to the type of key, a ruby class constant, such as Date,
376
+ # Numeric, etc. If type is NilClass, the type is open, and a non-blank val
377
+ # will attempt conversion to one of the allowed types, typing it as a String
378
+ # if no other type is recognized. If the val is blank, and the type is nil,
379
+ # the Column type remains open. If the val is nil or a blank and the type is
380
+ # already determined, the val is set to nil, and should be filtered from any
381
+ # Column computations. If the val is non-blank and the Column type
382
+ # determined, raise an error if the val cannot be converted to the Column
383
+ # type. Otherwise, returns the converted val as an object of the correct
384
+ # class.
385
+ def convert_to_type(val)
386
+ case type
387
+ when 'NilClass'
388
+ if val != false && val.blank?
389
+ # Leave the type of the Column open. Unfortunately, false counts as
390
+ # blank and we don't want it to. It should be classified as a boolean.
391
+ new_val = nil
392
+ else
393
+ # Only non-blank values are allowed to set the type of the Column
394
+ bool_val = convert_to_boolean(val)
395
+ new_val =
396
+ if bool_val.nil?
397
+ convert_to_date_time(val) ||
398
+ convert_to_numeric(val) ||
399
+ convert_to_string(val)
400
+ else
401
+ bool_val
402
+ end
403
+ @type =
404
+ if new_val == true || new_val == false
405
+ 'Boolean'
406
+ elsif new_val.is_a?(Date) || new_val.is_a?(DateTime)
407
+ 'DateTime'
408
+ elsif new_val.is_a?(Numeric)
409
+ 'Numeric'
410
+ elsif new_val.is_a?(String)
411
+ 'String'
412
+ else
413
+ raise UserError, "Cannot add #{val} of type #{new_val.class.name} to a column"
414
+ end
415
+ end
416
+ new_val
417
+ when 'Boolean'
418
+ if (val.is_a?(String) && val.blank? || val.nil?)
419
+ nil
420
+ else
421
+ new_val = convert_to_boolean(val)
422
+ if new_val.nil?
423
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
424
+ end
425
+ new_val
426
+ end
427
+ when 'DateTime'
428
+ if val.blank?
429
+ nil
430
+ else
431
+ new_val = convert_to_date_time(val)
432
+ if new_val.nil?
433
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
434
+ end
435
+ new_val
436
+ end
437
+ when 'Numeric'
438
+ if val.blank?
439
+ nil
440
+ else
441
+ new_val = convert_to_numeric(val)
442
+ if new_val.nil?
443
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
444
+ end
445
+ new_val
446
+ end
447
+ when 'String'
448
+ if val.nil?
449
+ nil
450
+ else
451
+ new_val = convert_to_string(val)
452
+ if new_val.nil?
453
+ raise UserError, "Attempt to add '#{val}' to a column already typed as #{type}"
454
+ end
455
+ new_val
456
+ end
457
+ else
458
+ raise UserError, "Mysteriously, column has unknown type '#{type}'"
459
+ end
460
+ end
461
+
462
+ # Convert the val to a boolean if it looks like one, otherwise return nil.
463
+ # Any boolean or a string of t, f, true, false, y, n, yes, or no, regardless
464
+ # of case is assumed to be a boolean.
465
+ def convert_to_boolean(val)
466
+ return val if val.is_a?(TrueClass) || val.is_a?(FalseClass)
467
+ val = val.to_s.clean
468
+ return nil if val.blank?
469
+ if val =~ /\A(false|f|n|no)\z/i
470
+ false
471
+ elsif val =~ /\A(true|t|y|yes)\z/i
472
+ true
473
+ end
474
+ end
475
+
476
+ # Convert the val to a DateTime if it is either a DateTime, a Date, or a
477
+ # String that can be parsed as a DateTime, otherwise return nil. It only
478
+ # recognizes strings that contain a something like '2016-01-14' or
479
+ # '2/12/1985' within them, otherwise DateTime.parse would treat many bare
480
+ # numbers as dates, such as '2841381', which it would recognize as a valid
481
+ # date, but the user probably does not intend it to be so treated.
482
+ def convert_to_date_time(val)
483
+ return val if val.is_a?(DateTime)
484
+ return val if val.is_a?(Date)
485
+ begin
486
+ val = val.to_s.clean
487
+ return nil if val.blank?
488
+ return nil unless val =~ %r{\b\d\d\d\d[-/]\d\d?[-/]\d\d?\b}
489
+ val = DateTime.parse(val.to_s.clean)
490
+ val = val.to_date if val.seconds_since_midnight.zero?
491
+ val
492
+ rescue ArgumentError
493
+ return nil
494
+ end
495
+ end
496
+
497
+ # Convert the val to a Numeric if is already a Numberic or is a String that
498
+ # looks like one. Any Float is promoted to a BigDecimal. Otherwise return
499
+ # nil.
500
+ def convert_to_numeric(val)
501
+ return BigDecimal.new(val, Float::DIG) if val.is_a?(Float)
502
+ return val if val.is_a?(Numeric)
503
+ # Eliminate any commas, $'s (or other currency symbol), or _'s.
504
+ cursym = Regexp.quote(FatTable.currency_symbol)
505
+ clean_re = /[,_#{cursym}]/
506
+ val = val.to_s.clean.gsub(clean_re, '')
507
+ return nil if val.blank?
508
+ case val
509
+ when /\A(\d+\.\d*)|(\d*\.\d+)\z/
510
+ BigDecimal.new(val.to_s.clean)
511
+ when /\A[\d]+\z/
512
+ val.to_i
513
+ when %r{\A(\d+)\s*[:/]\s*(\d+)\z}
514
+ Rational($1, $2)
515
+ end
516
+ end
517
+
518
+ def convert_to_string(val)
519
+ val.to_s
520
+ end
521
+ end
522
+ end