fat_core 1.5.1 → 1.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6d9999db399e323a385070ca59d15deb692d65d
4
- data.tar.gz: 5bb8ade666435e0c67afeaa293e236082cb19906
3
+ metadata.gz: 4617dd301a8a4f6ea796c27cfff91364f72b79f2
4
+ data.tar.gz: e6eb1b4057994c9672712b28189066af2ebc56d8
5
5
  SHA512:
6
- metadata.gz: 806ba3e817c5899bd5cb3ffa7a13069d8710c2a29406ae9b6060cf2d30e5d28cc3fc1b983d8b8b3aa9beafc6ceab505695a218c39e8e605ec811bc0c671a3bf7
7
- data.tar.gz: e11602a00d01b81bd201092b4bf3cbe8780435d2da3bd3f708f83b2e94677e43a2993b0fc4066fdf8d5d45b258e2516c928c72790e93e84dc77f64714b202f59
6
+ metadata.gz: 478b2f046e8cfc211eaed97bcd20ddf80b9cbb7c38e022931ceaeade08ba91d39f94e4ad085145426b7789a3795648112149cc0e53256bce17c0cc6241fbee2c
7
+ data.tar.gz: 32a4dbf0406779b323a76f61319f9553b9442d8a5ebce0c8567720b3134c9894087b6f22d38720d08f0070884f11eb74d8655ec771b51c4ac8f8e47f6442975a
@@ -62,6 +62,10 @@ module FatCore
62
62
  items.compact.sum
63
63
  end
64
64
 
65
+ def count
66
+ items.compact.count
67
+ end
68
+
65
69
  def min
66
70
  only_with('min', 'NilClass', 'Numeric', 'String', 'DateTime')
67
71
  items.compact.min
@@ -46,6 +46,7 @@ module FatCore
46
46
  def initialize(input = nil, ext = '.csv')
47
47
  @columns = []
48
48
  @footers = {}
49
+ @boundaries = []
49
50
  return self if input.nil?
50
51
  case input
51
52
  when IO, StringIO
@@ -121,6 +122,11 @@ module FatCore
121
122
  columns.first.size
122
123
  end
123
124
 
125
+ # Return whether this table is empty.
126
+ def empty?
127
+ size.zero?
128
+ end
129
+
124
130
  # Return the rows of the table as an array of hashes, keyed by the headers.
125
131
  def rows
126
132
  rows = []
@@ -136,8 +142,121 @@ module FatCore
136
142
  rows
137
143
  end
138
144
 
139
- def empty?
140
- rows.empty?
145
+ protected
146
+
147
+ # Return the rows from first to last. We could just index #rows, but in a
148
+ # large table, that would require that we construct all the rows for a range
149
+ # of any size.
150
+ def rows_range(first = 0, last = size - 1)
151
+ raise ArgumentError, 'first must be <= last' unless first <= last
152
+ rows = []
153
+ unless columns.empty?
154
+ first.upto(last) do |rnum|
155
+ row = {}
156
+ columns.each do |col|
157
+ row[col.header] = col[rnum]
158
+ end
159
+ rows << row
160
+ end
161
+ end
162
+ rows
163
+ end
164
+
165
+ ## ###########################################################################
166
+ ## Group Boundaries
167
+ ##
168
+ ## Boundaries mark the last row in each "group" within the table. The last
169
+ ## row of the table is always an implicit boundary, and having the last row
170
+ ## as the sole boundary is the default for new tables unless mentioned
171
+ ## otherwise. Resetting the boundaries means to put it back in that default
172
+ ## state.
173
+ ##
174
+ ## Note that tables are for the most part, immutable. That is, the data
175
+ ## rows of the table, once set, are never changed by methods on the
176
+ ## table. Any transformation of a table results in a new table. Boundaries
177
+ ## and footers are exceptions to immutability, but even they only affect
178
+ ## the boundary and footer attributes of the table, not the data rows.
179
+ ##
180
+ ## Boundaries can be added when a table is read in, for example, from the
181
+ ## text of an org table in which each hline (other than the one separating
182
+ ## the headers from the body) marks a boundary for the row immediately
183
+ ## preceding the hline.
184
+ ##
185
+ ## The #order_by method resets the boundaries then adds boundaries at the
186
+ ## last row of each group as a boundary.
187
+ ##
188
+ ## The #union_all (but not #union since it deletes duplicates) method adds
189
+ ## a boundary between the constituent tables. #union_all also preserves any
190
+ ## boundary markers within the constituent tables. In doing so, the
191
+ ## boundaries of the second table in the #union_all are increased by the
192
+ ## size of the first table so that they refer to rows in the new table.
193
+ ##
194
+ ## The #select method preserves any boundaries from the parent table
195
+ ## without change, since it only selects columns for the output and deletes
196
+ ## no rows.
197
+ ##
198
+ ## All the other table-transforming methods reset the boundaries in the new
199
+ ## table. For example, #order_by and #where re-arrange and delete rows, so
200
+ ## the old boundaries would make no sense anyway. Likewise, #union,
201
+ ## #intersection, #except, and #join reset the boundaries to their default.
202
+ ## ###########################################################################
203
+
204
+ public
205
+
206
+ # Return an array of an array of row hashes for the groups in this Table.
207
+ def groups
208
+ normalize_boundaries
209
+ groups = []
210
+ (0..boundaries.size - 1).each do |k|
211
+ groups << group_rows(k)
212
+ end
213
+ groups
214
+ end
215
+
216
+ protected
217
+
218
+ # Reader for boundaries, but not public.
219
+ def boundaries
220
+ @boundaries
221
+ end
222
+
223
+ # Writer for boundaries, but not public.
224
+ def boundaries=(bounds)
225
+ @boundaries = bounds
226
+ end
227
+
228
+ # Make sure size - 1 is last boundary and that they are unique and sorted.
229
+ def normalize_boundaries
230
+ unless empty?
231
+ boundaries.push(size - 1) unless boundaries.include?(size - 1)
232
+ self.boundaries = boundaries.uniq.sort
233
+ end
234
+ boundaries
235
+ end
236
+
237
+ # Mark a boundary at k, and if k is nil, the last row in the table
238
+ # as a group boundary.
239
+ def mark_boundary(k = nil)
240
+ if k
241
+ boundaries.push(k)
242
+ else
243
+ boundaries.push(size - 1)
244
+ end
245
+ end
246
+
247
+ # Concatenate the array of argument bounds to this table's boundaries, but
248
+ # increase each of the indexes in bounds by shift. This is used in the
249
+ # #union_all method.
250
+ def append_boundaries(bounds, shift: 0)
251
+ @boundaries += bounds.map { |k| k + shift }
252
+ end
253
+
254
+ def group_rows(k)
255
+ normalize_boundaries
256
+ return [] unless k < boundaries.size
257
+ first = k.zero? ? 0 : boundaries[k - 1] + 1
258
+ last = boundaries[k]
259
+ rows_range(first, last)
141
260
  end
142
261
 
143
262
  ############################################################################
@@ -145,9 +264,11 @@ module FatCore
145
264
  # all return a new Table object rather than modifying the table in place.
146
265
  ############################################################################
147
266
 
267
+ public
268
+
148
269
  # Return a new Table sorted on the rows of this Table on the possibly
149
270
  # multiple keys given in the array of syms in headers. Append a ! to the
150
- # symbol name to indicate reverse sorting on that column.
271
+ # symbol name to indicate reverse sorting on that column. Resets groups.
151
272
  def order_by(*sort_heads)
152
273
  sort_heads = [sort_heads].flatten
153
274
  rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
@@ -158,65 +279,62 @@ module FatCore
158
279
  key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
159
280
  key1 <=> key2
160
281
  end
282
+ # Add the new rows to the table, but mark a group boundary at the points
283
+ # where the sort key changes value.
161
284
  new_tab = Table.new
162
- new_rows.each do |nrow|
285
+ last_key = nil
286
+ new_rows.each_with_index do |nrow, k|
163
287
  new_tab << nrow
288
+ key = nrow.fetch_values(*sort_heads)
289
+ new_tab.mark_boundary(k - 1) if last_key && key != last_key
290
+ last_key = key
164
291
  end
165
292
  new_tab
166
293
  end
167
294
 
168
295
  # Return a Table having the selected column expressions. Each expression can
169
- # be either a (1) symbol, (2) a hash of symbol => symbol, or (3) a hash of
170
- # symbol => 'string', though the bare symbol arguments (1) must precede any
171
- # hash arguments. Each expression results in a column in the resulting Table
172
- # in the order given. The expressions are evaluated in order as well.
173
- def select(*exps)
296
+ # be either a (1) symbol, :old_col, representing a column in the current
297
+ # table, (2) a hash of new_col: :old_col to rename an existing :old_col
298
+ # column as :new_col, or (3) a hash of new_col: 'expression', to add a new
299
+ # column that is computed as an arbitrary ruby expression of the existing
300
+ # columns (whether selected for the output table or not) or any new_col
301
+ # defined earlier in the argument list. The expression string can also
302
+ # access the instance variable @row as the row number of the row being
303
+ # evaluated. The bare symbol arguments (1) must precede any hash arguments
304
+ # (2) or (3). Each expression results in a column in the resulting Table in
305
+ # the order given. The expressions are evaluated in left-to-right order as
306
+ # well. The output table preserves any groups present in the input table.
307
+ def select(*cols, **new_cols)
174
308
  result = Table.new
175
- new_cols = {}
176
309
  ev = Evaluator.new(vars: { row: 0 }, before: '@row += 1')
177
310
  rows.each do |old_row|
178
- new_heads = []
179
- new_row ||= {}
180
- exps.each do |exp|
181
- case exp
182
- when Symbol, String
183
- h = exp.as_sym
184
- raise "Column '#{h}' in select does not exist" unless column?(h)
185
- new_row[h] = old_row[h]
186
- when Hash
187
- # Note that when one of the exps is a Hash, it will contain an
188
- # output expression for each member of the Hash, so we have to loop
189
- # through them here.
190
- exp.each_pair do |key, val|
191
- # Gather the new values computed so far for this row
192
- vars = old_row.merge(new_row)
193
- case val
194
- when Symbol
195
- h = val.as_sym
196
- raise "Column '#{h}' in select does not exist" unless vars.keys.include?(h)
197
- new_row[key] = vars[h]
198
- when String
199
- # Now we have a hash, vars, of all local variables we want to be
200
- # defined while evaluating expression xp as the value of column
201
- # key in the new column.
202
- h = key.as_sym
203
- new_row[h] = ev.evaluate(val, vars: vars)
204
- # Don't add this column to new_heads until after the eval so it
205
- # does not shadow the existing value of row[h].
206
- else
207
- raise 'Hash parameters to select must be a symbol or string'
208
- end
209
- end
311
+ new_row = {}
312
+ cols.each do |k|
313
+ h = k.as_sym
314
+ raise "Column '#{h}' in select does not exist" unless column?(h)
315
+ new_row[h] = old_row[h]
316
+ end
317
+ new_cols.each_pair do |key, val|
318
+ key = key.as_sym
319
+ vars = old_row.merge(new_row)
320
+ case val
321
+ when Symbol
322
+ raise "Column '#{val}' in select does not exist" unless vars.keys.include?(val)
323
+ new_row[key] = vars[val]
324
+ when String
325
+ new_row[key] = ev.evaluate(val, vars: vars)
210
326
  else
211
- raise 'Parameters to select must be a symbol, string, or hash'
327
+ raise 'Hash parameters to select must be a symbol or string'
212
328
  end
213
329
  end
214
330
  result << new_row
215
331
  end
332
+ result.boundaries = boundaries
216
333
  result
217
334
  end
218
335
 
219
- # Return a Table containing only rows matching the where expression.
336
+ # Return a Table containing only rows matching the where expression. Resets
337
+ # groups.
220
338
  def where(expr)
221
339
  expr = expr.to_s
222
340
  result = Table.new
@@ -227,7 +345,7 @@ module FatCore
227
345
  result
228
346
  end
229
347
 
230
- # Return this table with all duplicate rows eliminated.
348
+ # Return this table with all duplicate rows eliminated. Resets groups.
231
349
  def distinct
232
350
  result = Table.new
233
351
  uniq_rows = rows.uniq
@@ -237,6 +355,7 @@ module FatCore
237
355
  result
238
356
  end
239
357
 
358
+ # Return this table with all duplicate rows eliminated. Resets groups.
240
359
  def uniq
241
360
  distinct
242
361
  end
@@ -247,23 +366,31 @@ module FatCore
247
366
  # the same type in the two tables, or an exception will be thrown.
248
367
  # Duplicates are eliminated from the result.
249
368
  def union(other)
250
- set_operation(other, :+, true)
369
+ set_operation(other, :+,
370
+ distinct: true,
371
+ add_boundaries: true)
251
372
  end
252
373
 
253
374
  # Return a Table that combines this table with another table. In other
254
375
  # words, return the union of this table with the other. The headers of this
255
376
  # table are used in the result. There must be the same number of columns of
256
377
  # the same type in the two tables, or an exception will be thrown.
257
- # Duplicates are not eliminated from the result.
378
+ # Duplicates are not eliminated from the result. Adds group boundaries at
379
+ # boundaries of the constituent tables. Preserves and adjusts the group
380
+ # boundaries of the constituent table.
258
381
  def union_all(other)
259
- set_operation(other, :+, false)
382
+ set_operation(other, :+,
383
+ distinct: false,
384
+ add_boundaries: true,
385
+ inherit_boundaries: true)
260
386
  end
261
387
 
262
388
  # Return a Table that includes the rows that appear in this table and in
263
389
  # another table. In other words, return the intersection of this table with
264
390
  # the other. The headers of this table are used in the result. There must be
265
391
  # the same number of columns of the same type in the two tables, or an
266
- # exception will be thrown. Duplicates are eliminated from the result.
392
+ # exception will be thrown. Duplicates are eliminated from the
393
+ # result. Resets groups.
267
394
  def intersect(other)
268
395
  set_operation(other, :intersect, true)
269
396
  end
@@ -272,7 +399,8 @@ module FatCore
272
399
  # another table. In other words, return the intersection of this table with
273
400
  # the other. The headers of this table are used in the result. There must be
274
401
  # the same number of columns of the same type in the two tables, or an
275
- # exception will be thrown. Duplicates are not eliminated from the result.
402
+ # exception will be thrown. Duplicates are not eliminated from the
403
+ # result. Resets groups.
276
404
  def intersect_all(other)
277
405
  set_operation(other, :intersect, false)
278
406
  end
@@ -282,7 +410,7 @@ module FatCore
282
410
  # set difference between this table an the other. The headers of this table
283
411
  # are used in the result. There must be the same number of columns of the
284
412
  # same type in the two tables, or an exception will be thrown. Duplicates
285
- # are eliminated from the result.
413
+ # are eliminated from the result. Resets groups.
286
414
  def except(other)
287
415
  set_operation(other, :difference, true)
288
416
  end
@@ -292,7 +420,7 @@ module FatCore
292
420
  # set difference between this table an the other. The headers of this table
293
421
  # are used in the result. There must be the same number of columns of the
294
422
  # same type in the two tables, or an exception will be thrown. Duplicates
295
- # are not eliminated from the result.
423
+ # are not eliminated from the result. Resets groups.
296
424
  def except_all(other)
297
425
  set_operation(other, :difference, false)
298
426
  end
@@ -302,7 +430,10 @@ module FatCore
302
430
  # Apply the set operation given by op between this table and the other table
303
431
  # given in the first argument. If distinct is true, eliminate duplicates
304
432
  # from the result.
305
- def set_operation(other, op = :+, distinct = true)
433
+ def set_operation(other, op = :+,
434
+ distinct: true,
435
+ add_boundaries: false,
436
+ inherit_boundaries: false)
306
437
  unless columns.size == other.columns.size
307
438
  raise 'Cannot apply a set operation to tables with a different number of columns.'
308
439
  end
@@ -312,8 +443,14 @@ module FatCore
312
443
  other_rows = other.rows.map { |r| r.replace_keys(headers) }
313
444
  result = Table.new
314
445
  new_rows = rows.send(op, other_rows)
315
- new_rows.each do |row|
446
+ new_rows.each_with_index do |row, k|
316
447
  result << row
448
+ result.mark_boundary if k == size - 1 && add_boundaries
449
+ end
450
+ if inherit_boundaries
451
+ result.boundaries = normalize_boundaries
452
+ other.normalize_boundaries
453
+ result.append_boundaries(other.boundaries, shift: size)
317
454
  end
318
455
  distinct ? result.distinct : result
319
456
  end
@@ -381,7 +518,7 @@ module FatCore
381
518
  # of all columns in T1 followed by all columns in T2. If the tables
382
519
  # have N and M rows respectively, the joined table will have N * M
383
520
  # rows.
384
- #
521
+ # Resets groups.
385
522
  JOIN_TYPES = [:inner, :left, :right, :full, :cross]
386
523
 
387
524
  def join(other, *exps, join_type: :inner)
@@ -449,6 +586,73 @@ module FatCore
449
586
  join(other, join_type: :cross)
450
587
  end
451
588
 
589
+ # Return a Table with a single row for each group of rows in the input table
590
+ # where the value of all columns named as simple symbols are equal. All
591
+ # other columns are set to the result of aggregating the values of that
592
+ # column within the group according to a aggregate function (:count, :sum,
593
+ # :min, :max, etc.), which defaults to the :first function, giving the value
594
+ # of that column for the first row in the group. You can specify a
595
+ # different aggregate function for a column by adding a hash parameter with
596
+ # the column as the key and a symbol for the aggregate function as the
597
+ # value. For example, consider the following call:
598
+ #
599
+ # tab.group_by(:date, :code, :price, shares: :sum, ).
600
+ #
601
+ # The first three parameters are simple symbols, so the table is divided
602
+ # into groups of rows in which the value of :date, :code, and :price are
603
+ # equal. The shares: hash parameter is set to the aggregate function :sum,
604
+ # so it will appear in the result as the sum of all the :shares values in
605
+ # each group. Any non-aggregate columns that have no aggregate function set
606
+ # default to using the aggregate function :first. Because of the way Ruby
607
+ # parses parameters to a method call, all the grouping symbols must appear
608
+ # first in the parameter list before any hash parameters.
609
+ def group_by(*group_cols, **agg_cols)
610
+ default_agg_func = :first
611
+ default_cols = headers - group_cols - agg_cols.keys
612
+ default_cols.each do |h|
613
+ agg_cols[h] = default_agg_func
614
+ end
615
+
616
+ sorted_tab = order_by(group_cols)
617
+ groups = sorted_tab.rows.group_by do |r|
618
+ group_cols.map { |k| r[k] }
619
+ end
620
+ result = Table.new
621
+ groups.each_pair do |_vals, grp_rows|
622
+ result << row_from_group(grp_rows, group_cols, agg_cols)
623
+ end
624
+ result
625
+ end
626
+
627
+ ############################################################################
628
+ # Footer methods
629
+ ############################################################################
630
+ def add_footer(label: 'Total', aggregate: :sum, heads: [])
631
+ foot = {}
632
+ heads.each do |h|
633
+ raise "No #{h} column in table to #{aggregate}" unless headers.include?(h)
634
+ foot[h] = column(h).send(aggregate)
635
+ end
636
+ @footers[label.as_sym] = foot
637
+ self
638
+ end
639
+
640
+ def add_sum_footer(cols, label = 'Total')
641
+ add_footer(heads: cols)
642
+ end
643
+
644
+ def add_avg_footer(cols, label = 'Average')
645
+ add_footer(label: label, aggregate: :avg, heads: cols)
646
+ end
647
+
648
+ def add_min_footer(cols, label = 'Minimum')
649
+ add_footer(label: label, aggregate: :min, heads: cols)
650
+ end
651
+
652
+ def add_max_footer(cols, label = 'Maximum')
653
+ add_footer(label: label, aggregate: :max, heads: cols)
654
+ end
655
+
452
656
  private
453
657
 
454
658
  # Return an output row appropriate to the given join type, including all the
@@ -585,64 +789,6 @@ module FatCore
585
789
  self
586
790
  end
587
791
 
588
- public
589
-
590
- # Return a Table in which all rows of the table are divided into groups
591
- # where the value of all columns named as simple symbols are equal. All
592
- # other columns are set to the result of aggregating the values of that
593
- # column within the group according to the Column aggregate function (:sum,
594
- # :min, :max, etc.) set in a hash parameter with the non-aggregate column
595
- # name as a key and the symbol for the aggregate function as a value. For
596
- # example, consider the following call:
597
- #
598
- # #+BEGIN_EXAMPLE
599
- # tab.group_by(:date, :code, :price, shares: :sum, ).
600
- # #+END_EXAMPLE
601
- #
602
- # The first three parameters are simple symbols, so the table is divided
603
- # into groups of rows in which the value of :date, :code, and :price are
604
- # equal. The :shares parameter is set to the aggregate function :sum, so it
605
- # will appear in the result as the sum of all the :shares values in each
606
- # group. Any non-aggregate columns that have no aggregate function set
607
- # default to using the aggregate function :first. Note that because of the
608
- # way Ruby parses parameters to a method call, all the grouping symbols must
609
- # appear first in the parameter list.
610
- def group_by(*exprs)
611
- group_cols = []
612
- agg_cols = {}
613
- exprs.each do |xp|
614
- case xp
615
- when Symbol
616
- group_cols << xp
617
- when Hash
618
- agg_cols = xp
619
- else
620
- raise "Cannot group by parameter '#{xp}'"
621
- end
622
- end
623
- default_agg_func = :first
624
- default_cols = headers - group_cols - agg_cols.keys
625
- default_cols.each do |h|
626
- agg_cols[h] = default_agg_func
627
- end
628
-
629
- sorted_tab = order_by(group_cols)
630
- groups = sorted_tab.rows.group_by do |r|
631
- group_cols.map { |k| r[k] }
632
- end
633
- result_rows = []
634
- groups.each_pair do |_vals, grp_rows|
635
- result_rows << row_from_group(grp_rows, group_cols, agg_cols)
636
- end
637
- result = Table.new
638
- result_rows.each do |row|
639
- result << row
640
- end
641
- result
642
- end
643
-
644
- private
645
-
646
792
  def row_from_group(rows, grp_cols, agg_cols)
647
793
  new_row = {}
648
794
  grp_cols.each do |h|
@@ -663,38 +809,14 @@ module FatCore
663
809
 
664
810
  public
665
811
 
666
- def add_footer(label: 'Total', aggregate: :sum, heads: [])
667
- foot = {}
668
- heads.each do |h|
669
- raise "No #{h} column in table to #{aggregate}" unless headers.include?(h)
670
- foot[h] = column(h).send(aggregate)
671
- end
672
- @footers[label.as_sym] = foot
673
- self
674
- end
675
-
676
- def add_sum_footer(cols, label = 'Total')
677
- add_footer(heads: cols)
678
- end
679
-
680
- def add_avg_footer(cols, label = 'Average')
681
- add_footer(label: label, aggregate: :avg, heads: cols)
682
- end
683
-
684
- def add_min_footer(cols, label = 'Minimum')
685
- add_footer(label: label, aggregate: :min, heads: cols)
686
- end
687
-
688
- def add_max_footer(cols, label = 'Maximum')
689
- add_footer(label: label, aggregate: :max, heads: cols)
690
- end
691
-
692
812
  # This returns the table as an Array of Arrays with formatting applied.
693
813
  # This would normally called after all calculations on the table are done
694
814
  # and you want to return the results. The Array of Arrays structure is
695
815
  # what org-mode src blocks will render as an org table in the buffer.
696
816
  def to_org(formats: {})
697
817
  result = []
818
+
819
+ # Headers
698
820
  header_row = []
699
821
  headers.each do |hdr|
700
822
  header_row << hdr.entitle
@@ -703,6 +825,7 @@ module FatCore
703
825
  # This causes org to place an hline under the header row
704
826
  result << nil unless header_row.empty?
705
827
 
828
+ # Body
706
829
  rows.each do |row|
707
830
  out_row = []
708
831
  headers.each do |hdr|
@@ -710,6 +833,8 @@ module FatCore
710
833
  end
711
834
  result << out_row
712
835
  end
836
+
837
+ # Footers
713
838
  footers.each_pair do |label, footer|
714
839
  foot_row = []
715
840
  columns.each do |col|
@@ -726,17 +851,20 @@ module FatCore
726
851
  # Table construction methods.
727
852
  ############################################################################
728
853
 
729
- # Add a row represented by a Hash having the headers as keys. All tables
730
- # should be built ultimately using this method as a primitive.
731
- def add_row(row)
854
+ # Add a row represented by a Hash having the headers as keys. If mark is
855
+ # true, mark this row as a boundary. All tables should be built ultimately
856
+ # using this method as a primitive.
857
+ def add_row(row, mark: false)
732
858
  row.each_pair do |k, v|
733
859
  key = k.as_sym
734
860
  columns << Column.new(header: k) unless column?(k)
735
861
  column(key) << v
736
862
  end
863
+ @boundaries << (size - 1) if mark
737
864
  self
738
865
  end
739
866
 
867
+ # Add a row without marking.
740
868
  def <<(row)
741
869
  add_row(row)
742
870
  end
@@ -753,24 +881,41 @@ module FatCore
753
881
  # respond to #to_hash.
754
882
  def from_array_of_hashes(rows)
755
883
  rows.each do |row|
884
+ if row.nil?
885
+ mark_boundary
886
+ next
887
+ end
756
888
  add_row(row.to_hash)
757
889
  end
758
890
  self
759
891
  end
760
892
 
893
+ # Construct a new table from an array of arrays. If the second element of
894
+ # the array is a nil, a string that looks like an hrule, or an array whose
895
+ # first element is a string that looks like an hrule, interpret the first
896
+ # element of the array as a row of headers. Otherwise, synthesize headers of
897
+ # the form "col1", "col2", ... and so forth. The remaining elements are
898
+ # taken as the body of the table, except that if an element of the outer
899
+ # array is a nil or a string that looks like an hrule, mark the preceding
900
+ # row as a boundary.
761
901
  def from_array_of_arrays(rows)
902
+ hrule_re = /\A\s*\|[-+]+/
762
903
  headers = []
763
- if rows[0].any? { |itm| itm.to_s.number? }
764
- headers = (1..rows[0].size).to_a.map { |k| "col#{k}".as_sym }
765
- first_data_row = 0
766
- else
904
+ if rows[1].nil? || rows[1] =~ hrule_re || rows[1].first =~ hrule_re
905
+ # Take the first row as headers
767
906
  # Use first row 0 as headers
768
907
  headers = rows[0].map(&:as_sym)
769
- first_data_row = 1
908
+ first_data_row = 2
909
+ else
910
+ # Synthesize headers
911
+ headers = (1..rows[0].size).to_a.map { |k| "col#{k}".as_sym }
912
+ first_data_row = 0
770
913
  end
771
- hrule_re = /\A\s*\|[-+]+/
772
914
  rows[first_data_row..-1].each do |row|
773
- next if row[0] =~ hrule_re
915
+ if row.nil? || row[0] =~ hrule_re
916
+ mark_boundary
917
+ next
918
+ end
774
919
  row = row.map { |s| s.to_s.strip }
775
920
  hash_row = Hash[headers.zip(row)]
776
921
  add_row(hash_row)
@@ -797,18 +942,27 @@ module FatCore
797
942
  unless table_found
798
943
  # Skip through the file until a table is found
799
944
  next unless line =~ table_re
945
+ unless line =~ hrule_re
946
+ line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
947
+ rows << line.split('|').map(&:clean)
948
+ end
800
949
  table_found = true
950
+ next
801
951
  end
802
952
  break unless line =~ table_re
803
953
  if !header_found && line =~ hrule_re
954
+ rows << nil
804
955
  header_found = true
805
956
  next
806
957
  elsif header_found && line =~ hrule_re
958
+ # Mark the boundary with a nil
959
+ rows << nil
960
+ elsif line !~ table_re
807
961
  # Stop reading at the second hline
808
962
  break
809
963
  else
810
964
  line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
811
- rows << line.split('|')
965
+ rows << line.split('|').map(&:clean)
812
966
  end
813
967
  end
814
968
  from_array_of_arrays(rows)
@@ -1,7 +1,7 @@
1
1
  module FatCore
2
2
  MAJOR = 1
3
3
  MINOR = 5
4
- PATCH = 1
4
+ PATCH = 2
5
5
 
6
6
  VERSION = [MAJOR, MINOR, PATCH].compact.join('.')
7
7
  end
@@ -124,6 +124,33 @@ EOS
124
124
  | 42 | 2013-05-30 | S | 6,679 | 18 | 25.04710 | ZMEAC |
125
125
 
126
126
  * Another Heading
127
+ EOS
128
+
129
+ @org_file_body_with_groups = <<EOS
130
+
131
+ #+TBLNAME: morgan_tab
132
+ |-----+------------+------+---------+--------+----------+--------|
133
+ | Ref | Date | Code | Raw | Shares | Price | Info |
134
+ |-----+------------+------+---------+--------+----------+--------|
135
+ | 29 | 2013-05-02 | P | 795,546 | 2,609 | 1.18500 | ZMPEF1 |
136
+ |-----+------------+------+---------+--------+----------+--------|
137
+ | 30 | 2013-05-02 | P | 118,186 | 388 | 11.85000 | ZMPEF1 |
138
+ | 31 | 2013-05-02 | P | 340,948 | 1,926 | 1.18500 | ZMPEF2 |
139
+ | 32 | 2013-05-02 | P | 50,651 | 286 | 11.85000 | ZMPEF2 |
140
+ |-----+------------+------+---------+--------+----------+--------|
141
+ | 33 | 2013-05-20 | S | 12,000 | 32 | 28.28040 | ZMEAC |
142
+ | 34 | 2013-05-20 | S | 85,000 | 226 | 28.32240 | ZMEAC |
143
+ | 35 | 2013-05-20 | S | 33,302 | 88 | 28.63830 | ZMEAC |
144
+ | 36 | 2013-05-23 | S | 8,000 | 21 | 27.10830 | ZMEAC |
145
+ | 37 | 2013-05-23 | S | 23,054 | 61 | 26.80150 | ZMEAC |
146
+ | 38 | 2013-05-23 | S | 39,906 | 106 | 25.17490 | ZMEAC |
147
+ | 39 | 2013-05-29 | S | 13,459 | 36 | 24.74640 | ZMEAC |
148
+ |-----+------------+------+---------+--------+----------+--------|
149
+ | 40 | 2013-05-29 | S | 15,700 | 42 | 24.77900 | ZMEAC |
150
+ | 41 | 2013-05-29 | S | 15,900 | 42 | 24.58020 | ZMEAC |
151
+ | 42 | 2013-05-30 | S | 6,679 | 18 | 25.04710 | ZMEAC |
152
+ |-----+------------+------+---------+--------+----------+--------|
153
+
127
154
  EOS
128
155
  end
129
156
 
@@ -171,6 +198,28 @@ EOS
171
198
  end
172
199
  end
173
200
 
201
+ it 'should be create-able from an Org IO object with groups' do
202
+ tab = Table.new(StringIO.new(@org_file_body), '.org')
203
+ expect(tab.class).to eq(Table)
204
+ expect(tab.rows.size).to be > 10
205
+ expect(tab.headers.sort)
206
+ .to eq [:code, :date, :info, :price, :raw, :ref, :shares]
207
+ tab.rows.each do |row|
208
+ row.each_pair do |k, _v|
209
+ expect(k.class).to eq Symbol
210
+ end
211
+ expect(row[:code].class).to eq String
212
+ expect(row[:date].class).to eq Date
213
+ expect(row[:shares].is_a?(Numeric)).to be true
214
+ unless row[:rawshares].nil?
215
+ expect(row[:rawshares].is_a?(Numeric)).to be true
216
+ end
217
+ expect(row[:price].is_a?(BigDecimal)).to be true
218
+ expect([Numeric, String].any? { |t| row[:ref].is_a?(t) }).to be true
219
+ expect(row[:info].class).to eq String
220
+ end
221
+ end
222
+
174
223
  it 'should be create-able from a CSV file' do
175
224
  File.open('/tmp/junk.csv', 'w') { |f| f.write(@csv_file_body) }
176
225
  tab = Table.new('/tmp/junk.csv')
@@ -241,9 +290,10 @@ EOS
241
290
  end
242
291
  end
243
292
 
244
- it 'should be create-able from an Array of Arrays with header no hrule' do
293
+ it 'should be create-able from an Array of Arrays with nil-marked header' do
245
294
  aoa = [
246
295
  ['First', 'Second', 'Third'],
296
+ nil,
247
297
  ['1', '2', '3.2'],
248
298
  ['4', '5', '6.4'],
249
299
  ['7', '8', '9.0'],
@@ -633,6 +683,7 @@ EOS
633
683
  it 'should select by boolean columns' do
634
684
  tab =
635
685
  [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
686
+ nil,
636
687
  [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
637
688
  [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
638
689
  [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
@@ -770,6 +821,168 @@ EOS
770
821
  end
771
822
  end
772
823
 
824
+ describe 'group boundaries' do
825
+ before :all do
826
+ @tab_a = Table.new([
827
+ { id: 1, name: 'Paul', age: 32, address: 'California', salary: 20000, join_date: '2001-07-13' },
828
+ { id: 3, name: 'Teddy', age: 23, address: 'Norway', salary: 20000},
829
+ { id: 4, name: 'Mark', age: 25, address: 'Rich-Mond', salary: 65000, join_date: '2007-12-13' },
830
+ { id: 5, name: 'David', age: 27, address: 'Texas', salary: 85000, join_date: '2007-12-13' },
831
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
832
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
833
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
834
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' }
835
+ ])
836
+ # Union compatible with tab_a
837
+ @tab_a1 = Table.new([
838
+ { id: 21, name: 'Paula', age: 23, address: 'Kansas', salary: 20000, join_date: '2001-07-13' },
839
+ { id: 23, name: 'Jenny', age: 32, address: 'Missouri', salary: 20000},
840
+ { id: 24, name: 'Forrest', age: 52, address: 'Richmond', salary: 65000, join_date: '2007-12-13' },
841
+ { id: 25, name: 'Syrano', age: 72, address: 'Nebraska', salary: 85000, join_date: '2007-12-13' },
842
+ # Next four are the same as row as in @tab_a
843
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
844
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
845
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
846
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' },
847
+ { id: 22, name: 'Paula', age: 52, address: 'Iowa', salary: nil, join_date: '2007-12-13' },
848
+ { id: 28, name: 'Paula', age: 42, address: 'Oklahoma', salary: 20000, join_date: '2005-07-13' },
849
+ { id: 29, name: 'Patrick', age: 44, address: 'Lindsbourg', salary: 5000, join_date: '2005-07-13' },
850
+ { id: 30, name: 'James', age: 54, address: 'Ottawa', salary: 5000, join_date: '2005-07-13' }
851
+ ])
852
+ @tab_b = Table.new([
853
+ { id: 1, dept: 'IT Billing', emp_id: 1 },
854
+ { id: 2, dept: 'Engineering', emp_id: 2 },
855
+ { id: 3, dept: 'Finance', emp_id: 7 }
856
+ ])
857
+ @aoa =
858
+ [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
859
+ nil,
860
+ [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
861
+ nil,
862
+ [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
863
+ [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
864
+ [8, '2013-05-20', 'S', 85_000.00, 35_742.50, 28.3224, 'ZMEAC', 'T'],
865
+ nil,
866
+ [9, '2013-05-20', 'S', 33_302.00, 14_003.49, 28.6383, 'ZMEAC', 'T'],
867
+ [10, '2013-05-23', 'S', 8000.00, 3364.00, 27.1083, 'ZMEAC', 'T'],
868
+ [11, '2013-05-23', 'S', 23_054.00, 9694.21, 26.8015, 'ZMEAC', 'F'],
869
+ [12, '2013-05-23', 'S', 39_906.00, 16_780.47, 25.1749, 'ZMEAC', 'T'],
870
+ [13, '2013-05-29', 'S', 13_459.00, 5659.51, 24.7464, 'ZMEAC', 'T'],
871
+ [14, '2013-05-29', 'S', 15_700.00, 6601.85, 24.7790, 'ZMEAC', 'F'],
872
+ [15, '2013-05-29', 'S', 15_900.00, 6685.95, 24.5802, 'ZMEAC', 'T'],
873
+ nil,
874
+ [16, '2013-05-30', 'S', 6_679.00, 2808.52, 25.0471, 'ZMEAC', 'T']]
875
+ @aoh = [
876
+ { id: 1, name: 'Paul', age: 32, address: 'California', salary: 20000, join_date: '2001-07-13' },
877
+ nil,
878
+ { id: 3, name: 'Teddy', age: 23, address: 'Norway', salary: 20000},
879
+ { id: 4, name: 'Mark', age: 25, address: 'Rich-Mond', salary: 65000, join_date: '2007-12-13' },
880
+ { id: 5, name: 'David', age: 27, address: 'Texas', salary: 85000, join_date: '2007-12-13' },
881
+ nil,
882
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
883
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
884
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
885
+ nil,
886
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' }
887
+ ]
888
+ end
889
+
890
+ it 'an empty table should have no groups' do
891
+ expect(Table.new.groups.size).to eq(0)
892
+ end
893
+
894
+ it 'default group boundaries of whole table' do
895
+ expect(@tab_a.groups.size).to eq(1)
896
+ end
897
+
898
+ it 'add group boundaries on reading from org text' do
899
+ tab = Table.new(StringIO.new(@org_file_body_with_groups), '.org')
900
+ expect(tab.groups.size).to eq(4)
901
+ expect(tab.groups[0].size).to eq(1)
902
+ expect(tab.groups[1].size).to eq(3)
903
+ expect(tab.groups[2].size).to eq(7)
904
+ expect(tab.groups[3].size).to eq(3)
905
+ end
906
+
907
+ it 'add group boundaries on reading from aoa' do
908
+ tab = Table.new(@aoa)
909
+ expect(tab.groups.size).to eq(4)
910
+ expect(tab.groups[0].size).to eq(1)
911
+ expect(tab.groups[1].size).to eq(3)
912
+ expect(tab.groups[2].size).to eq(7)
913
+ expect(tab.groups[3].size).to eq(1)
914
+ end
915
+
916
+ it 'add group boundaries on reading from aoh' do
917
+ tab = Table.new(@aoh)
918
+ expect(tab.groups.size).to eq(4)
919
+ expect(tab.groups[0].size).to eq(1)
920
+ expect(tab.groups[1].size).to eq(3)
921
+ expect(tab.groups[2].size).to eq(3)
922
+ expect(tab.groups[3].size).to eq(1)
923
+ end
924
+
925
+ it 'add group boundaries on order_by' do
926
+ tab = @tab_a.order_by(:name)
927
+ # Now the table is ordered by name, and the names are: Allen, David,
928
+ # James, James, Mark, Paul, Paul, Teddy. So there are groups of size 1,
929
+ # 1, 2, 1, 2, and 1. Six groups in all.
930
+ expect(tab.groups.size).to eq(6)
931
+ expect(tab.groups[0].size).to eq(1)
932
+ expect(tab.groups[1].size).to eq(1)
933
+ expect(tab.groups[2].size).to eq(2)
934
+ tab.groups[2].each do |row|
935
+ expect(row[:name]).to eq('James')
936
+ end
937
+ expect(tab.groups[3].size).to eq(1)
938
+ expect(tab.groups[4].size).to eq(2)
939
+ tab.groups[4].each do |row|
940
+ expect(row[:name]).to eq('Paul')
941
+ end
942
+ expect(tab.groups[5].size).to eq(1)
943
+ end
944
+
945
+ it 'add group boundaries on union_all' do
946
+ tab = @tab_a.union_all(@tab_a1)
947
+ expect(tab.size).to eq(20)
948
+ expect(tab.groups.size).to eq(2)
949
+ expect(tab.groups[0].size).to eq(8)
950
+ expect(tab.groups[1].size).to eq(12)
951
+ end
952
+
953
+ it 'inherit group boundaries on union_all' do
954
+ tab1 = @tab_a.order_by(:name)
955
+ tab2 = @tab_a1.order_by(:name)
956
+ tab = tab1.union_all(tab2)
957
+ expect(tab.size).to eq(20)
958
+ expect(tab.groups.size).to eq(tab1.groups.size + tab2.groups.size)
959
+ tab.groups.each do |grp|
960
+ names = grp.map {|r| r[:name]}
961
+ expect(names.uniq.size).to eq(1)
962
+ end
963
+ end
964
+
965
+ it 'inherit group boundaries on select' do
966
+ tab = @tab_a.order_by(:name).select(:name, :age, :join_date)
967
+ # Now the table is ordered by name, and the names are: Allen, David,
968
+ # James, James, Mark, Paul, Paul, Teddy. So there are groups of size 1,
969
+ # 1, 2, 1, 2, and 1. Six groups in all.
970
+ expect(tab.groups.size).to eq(6)
971
+ expect(tab.groups[0].size).to eq(1)
972
+ expect(tab.groups[1].size).to eq(1)
973
+ expect(tab.groups[2].size).to eq(2)
974
+ tab.groups[2].each do |row|
975
+ expect(row[:name]).to eq('James')
976
+ end
977
+ expect(tab.groups[3].size).to eq(1)
978
+ expect(tab.groups[4].size).to eq(2)
979
+ tab.groups[4].each do |row|
980
+ expect(row[:name]).to eq('Paul')
981
+ end
982
+ expect(tab.groups[5].size).to eq(1)
983
+ end
984
+ end
985
+
773
986
  describe 'output' do
774
987
  it 'should be able to return itself as an array of arrays' do
775
988
  aoh = [
@@ -789,6 +1002,7 @@ EOS
789
1002
  # blocks.
790
1003
  tab =
791
1004
  [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
1005
+ nil,
792
1006
  [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
793
1007
  [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
794
1008
  [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fat_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel E. Doherty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-03 00:00:00.000000000 Z
11
+ date: 2017-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: simplecov