fat_core 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6d9999db399e323a385070ca59d15deb692d65d
4
- data.tar.gz: 5bb8ade666435e0c67afeaa293e236082cb19906
3
+ metadata.gz: 4617dd301a8a4f6ea796c27cfff91364f72b79f2
4
+ data.tar.gz: e6eb1b4057994c9672712b28189066af2ebc56d8
5
5
  SHA512:
6
- metadata.gz: 806ba3e817c5899bd5cb3ffa7a13069d8710c2a29406ae9b6060cf2d30e5d28cc3fc1b983d8b8b3aa9beafc6ceab505695a218c39e8e605ec811bc0c671a3bf7
7
- data.tar.gz: e11602a00d01b81bd201092b4bf3cbe8780435d2da3bd3f708f83b2e94677e43a2993b0fc4066fdf8d5d45b258e2516c928c72790e93e84dc77f64714b202f59
6
+ metadata.gz: 478b2f046e8cfc211eaed97bcd20ddf80b9cbb7c38e022931ceaeade08ba91d39f94e4ad085145426b7789a3795648112149cc0e53256bce17c0cc6241fbee2c
7
+ data.tar.gz: 32a4dbf0406779b323a76f61319f9553b9442d8a5ebce0c8567720b3134c9894087b6f22d38720d08f0070884f11eb74d8655ec771b51c4ac8f8e47f6442975a
@@ -62,6 +62,10 @@ module FatCore
62
62
  items.compact.sum
63
63
  end
64
64
 
65
+ def count
66
+ items.compact.count
67
+ end
68
+
65
69
  def min
66
70
  only_with('min', 'NilClass', 'Numeric', 'String', 'DateTime')
67
71
  items.compact.min
@@ -46,6 +46,7 @@ module FatCore
46
46
  def initialize(input = nil, ext = '.csv')
47
47
  @columns = []
48
48
  @footers = {}
49
+ @boundaries = []
49
50
  return self if input.nil?
50
51
  case input
51
52
  when IO, StringIO
@@ -121,6 +122,11 @@ module FatCore
121
122
  columns.first.size
122
123
  end
123
124
 
125
+ # Return whether this table is empty.
126
+ def empty?
127
+ size.zero?
128
+ end
129
+
124
130
  # Return the rows of the table as an array of hashes, keyed by the headers.
125
131
  def rows
126
132
  rows = []
@@ -136,8 +142,121 @@ module FatCore
136
142
  rows
137
143
  end
138
144
 
139
- def empty?
140
- rows.empty?
145
+ protected
146
+
147
+ # Return the rows from first to last. We could just index #rows, but in a
148
+ # large table, that would require that we construct all the rows for a range
149
+ # of any size.
150
+ def rows_range(first = 0, last = size - 1)
151
+ raise ArgumentError, 'first must be <= last' unless first <= last
152
+ rows = []
153
+ unless columns.empty?
154
+ first.upto(last) do |rnum|
155
+ row = {}
156
+ columns.each do |col|
157
+ row[col.header] = col[rnum]
158
+ end
159
+ rows << row
160
+ end
161
+ end
162
+ rows
163
+ end
164
+
165
+ ## ###########################################################################
166
+ ## Group Boundaries
167
+ ##
168
+ ## Boundaries mark the last row in each "group" within the table. The last
169
+ ## row of the table is always an implicit boundary, and having the last row
170
+ ## as the sole boundary is the default for new tables unless mentioned
171
+ ## otherwise. Resetting the boundaries means to put it back in that default
172
+ ## state.
173
+ ##
174
+ ## Note that tables are for the most part, immutable. That is, the data
175
+ ## rows of the table, once set, are never changed by methods on the
176
+ ## table. Any transformation of a table results in a new table. Boundaries
177
+ ## and footers are exceptions to immutability, but even they only affect
178
+ ## the boundary and footer attributes of the table, not the data rows.
179
+ ##
180
+ ## Boundaries can be added when a table is read in, for example, from the
181
+ ## text of an org table in which each hline (other than the one separating
182
+ ## the headers from the body) marks a boundary for the row immediately
183
+ ## preceding the hline.
184
+ ##
185
+ ## The #order_by method resets the boundaries then adds boundaries at the
186
+ ## last row of each group as a boundary.
187
+ ##
188
+ ## The #union_all (but not #union since it deletes duplicates) method adds
189
+ ## a boundary between the constituent tables. #union_all also preserves any
190
+ ## boundary markers within the constituent tables. In doing so, the
191
+ ## boundaries of the second table in the #union_all are increased by the
192
+ ## size of the first table so that they refer to rows in the new table.
193
+ ##
194
+ ## The #select method preserves any boundaries from the parent table
195
+ ## without change, since it only selects columns for the output and deletes
196
+ ## no rows.
197
+ ##
198
+ ## All the other table-transforming methods reset the boundaries in the new
199
+ ## table. For example, #order_by and #where re-arrange and delete rows, so
200
+ ## the old boundaries would make no sense anyway. Likewise, #union,
201
+ ## #intersection, #except, and #join reset the boundaries to their default.
202
+ ## ###########################################################################
203
+
204
+ public
205
+
206
+ # Return an array of an array of row hashes for the groups in this Table.
207
+ def groups
208
+ normalize_boundaries
209
+ groups = []
210
+ (0..boundaries.size - 1).each do |k|
211
+ groups << group_rows(k)
212
+ end
213
+ groups
214
+ end
215
+
216
+ protected
217
+
218
+ # Reader for boundaries, but not public.
219
+ def boundaries
220
+ @boundaries
221
+ end
222
+
223
+ # Writer for boundaries, but not public.
224
+ def boundaries=(bounds)
225
+ @boundaries = bounds
226
+ end
227
+
228
+ # Make sure size - 1 is last boundary and that they are unique and sorted.
229
+ def normalize_boundaries
230
+ unless empty?
231
+ boundaries.push(size - 1) unless boundaries.include?(size - 1)
232
+ self.boundaries = boundaries.uniq.sort
233
+ end
234
+ boundaries
235
+ end
236
+
237
+ # Mark a boundary at k, and if k is nil, the last row in the table
238
+ # as a group boundary.
239
+ def mark_boundary(k = nil)
240
+ if k
241
+ boundaries.push(k)
242
+ else
243
+ boundaries.push(size - 1)
244
+ end
245
+ end
246
+
247
+ # Concatenate the array of argument bounds to this table's boundaries, but
248
+ # increase each of the indexes in bounds by shift. This is used in the
249
+ # #union_all method.
250
+ def append_boundaries(bounds, shift: 0)
251
+ @boundaries += bounds.map { |k| k + shift }
252
+ end
253
+
254
+ def group_rows(k)
255
+ normalize_boundaries
256
+ return [] unless k < boundaries.size
257
+ first = k.zero? ? 0 : boundaries[k - 1] + 1
258
+ last = boundaries[k]
259
+ rows_range(first, last)
141
260
  end
142
261
 
143
262
  ############################################################################
@@ -145,9 +264,11 @@ module FatCore
145
264
  # all return a new Table object rather than modifying the table in place.
146
265
  ############################################################################
147
266
 
267
+ public
268
+
148
269
  # Return a new Table sorted on the rows of this Table on the possibly
149
270
  # multiple keys given in the array of syms in headers. Append a ! to the
150
- # symbol name to indicate reverse sorting on that column.
271
+ # symbol name to indicate reverse sorting on that column. Resets groups.
151
272
  def order_by(*sort_heads)
152
273
  sort_heads = [sort_heads].flatten
153
274
  rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
@@ -158,65 +279,62 @@ module FatCore
158
279
  key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
159
280
  key1 <=> key2
160
281
  end
282
+ # Add the new rows to the table, but mark a group boundary at the points
283
+ # where the sort key changes value.
161
284
  new_tab = Table.new
162
- new_rows.each do |nrow|
285
+ last_key = nil
286
+ new_rows.each_with_index do |nrow, k|
163
287
  new_tab << nrow
288
+ key = nrow.fetch_values(*sort_heads)
289
+ new_tab.mark_boundary(k - 1) if last_key && key != last_key
290
+ last_key = key
164
291
  end
165
292
  new_tab
166
293
  end
167
294
 
168
295
  # Return a Table having the selected column expressions. Each expression can
169
- # be either a (1) symbol, (2) a hash of symbol => symbol, or (3) a hash of
170
- # symbol => 'string', though the bare symbol arguments (1) must precede any
171
- # hash arguments. Each expression results in a column in the resulting Table
172
- # in the order given. The expressions are evaluated in order as well.
173
- def select(*exps)
296
+ # be either a (1) symbol, :old_col, representing a column in the current
297
+ # table, (2) a hash of new_col: :old_col to rename an existing :old_col
298
+ # column as :new_col, or (3) a hash of new_col: 'expression', to add a new
299
+ # column that is computed as an arbitrary ruby expression of the existing
300
+ # columns (whether selected for the output table or not) or any new_col
301
+ # defined earlier in the argument list. The expression string can also
302
+ # access the instance variable @row as the row number of the row being
303
+ # evaluated. The bare symbol arguments (1) must precede any hash arguments
304
+ # (2) or (3). Each expression results in a column in the resulting Table in
305
+ # the order given. The expressions are evaluated in left-to-right order as
306
+ # well. The output table preserves any groups present in the input table.
307
+ def select(*cols, **new_cols)
174
308
  result = Table.new
175
- new_cols = {}
176
309
  ev = Evaluator.new(vars: { row: 0 }, before: '@row += 1')
177
310
  rows.each do |old_row|
178
- new_heads = []
179
- new_row ||= {}
180
- exps.each do |exp|
181
- case exp
182
- when Symbol, String
183
- h = exp.as_sym
184
- raise "Column '#{h}' in select does not exist" unless column?(h)
185
- new_row[h] = old_row[h]
186
- when Hash
187
- # Note that when one of the exps is a Hash, it will contain an
188
- # output expression for each member of the Hash, so we have to loop
189
- # through them here.
190
- exp.each_pair do |key, val|
191
- # Gather the new values computed so far for this row
192
- vars = old_row.merge(new_row)
193
- case val
194
- when Symbol
195
- h = val.as_sym
196
- raise "Column '#{h}' in select does not exist" unless vars.keys.include?(h)
197
- new_row[key] = vars[h]
198
- when String
199
- # Now we have a hash, vars, of all local variables we want to be
200
- # defined while evaluating expression xp as the value of column
201
- # key in the new column.
202
- h = key.as_sym
203
- new_row[h] = ev.evaluate(val, vars: vars)
204
- # Don't add this column to new_heads until after the eval so it
205
- # does not shadow the existing value of row[h].
206
- else
207
- raise 'Hash parameters to select must be a symbol or string'
208
- end
209
- end
311
+ new_row = {}
312
+ cols.each do |k|
313
+ h = k.as_sym
314
+ raise "Column '#{h}' in select does not exist" unless column?(h)
315
+ new_row[h] = old_row[h]
316
+ end
317
+ new_cols.each_pair do |key, val|
318
+ key = key.as_sym
319
+ vars = old_row.merge(new_row)
320
+ case val
321
+ when Symbol
322
+ raise "Column '#{val}' in select does not exist" unless vars.keys.include?(val)
323
+ new_row[key] = vars[val]
324
+ when String
325
+ new_row[key] = ev.evaluate(val, vars: vars)
210
326
  else
211
- raise 'Parameters to select must be a symbol, string, or hash'
327
+ raise 'Hash parameters to select must be a symbol or string'
212
328
  end
213
329
  end
214
330
  result << new_row
215
331
  end
332
+ result.boundaries = boundaries
216
333
  result
217
334
  end
218
335
 
219
- # Return a Table containing only rows matching the where expression.
336
+ # Return a Table containing only rows matching the where expression. Resets
337
+ # groups.
220
338
  def where(expr)
221
339
  expr = expr.to_s
222
340
  result = Table.new
@@ -227,7 +345,7 @@ module FatCore
227
345
  result
228
346
  end
229
347
 
230
- # Return this table with all duplicate rows eliminated.
348
+ # Return this table with all duplicate rows eliminated. Resets groups.
231
349
  def distinct
232
350
  result = Table.new
233
351
  uniq_rows = rows.uniq
@@ -237,6 +355,7 @@ module FatCore
237
355
  result
238
356
  end
239
357
 
358
+ # Return this table with all duplicate rows eliminated. Resets groups.
240
359
  def uniq
241
360
  distinct
242
361
  end
@@ -247,23 +366,31 @@ module FatCore
247
366
  # the same type in the two tables, or an exception will be thrown.
248
367
  # Duplicates are eliminated from the result.
249
368
  def union(other)
250
- set_operation(other, :+, true)
369
+ set_operation(other, :+,
370
+ distinct: true,
371
+ add_boundaries: true)
251
372
  end
252
373
 
253
374
  # Return a Table that combines this table with another table. In other
254
375
  # words, return the union of this table with the other. The headers of this
255
376
  # table are used in the result. There must be the same number of columns of
256
377
  # the same type in the two tables, or an exception will be thrown.
257
- # Duplicates are not eliminated from the result.
378
+ # Duplicates are not eliminated from the result. Adds group boundaries at
379
+ # boundaries of the constituent tables. Preserves and adjusts the group
380
+ # boundaries of the constituent table.
258
381
  def union_all(other)
259
- set_operation(other, :+, false)
382
+ set_operation(other, :+,
383
+ distinct: false,
384
+ add_boundaries: true,
385
+ inherit_boundaries: true)
260
386
  end
261
387
 
262
388
  # Return a Table that includes the rows that appear in this table and in
263
389
  # another table. In other words, return the intersection of this table with
264
390
  # the other. The headers of this table are used in the result. There must be
265
391
  # the same number of columns of the same type in the two tables, or an
266
- # exception will be thrown. Duplicates are eliminated from the result.
392
+ # exception will be thrown. Duplicates are eliminated from the
393
+ # result. Resets groups.
267
394
  def intersect(other)
268
395
  set_operation(other, :intersect, true)
269
396
  end
@@ -272,7 +399,8 @@ module FatCore
272
399
  # another table. In other words, return the intersection of this table with
273
400
  # the other. The headers of this table are used in the result. There must be
274
401
  # the same number of columns of the same type in the two tables, or an
275
- # exception will be thrown. Duplicates are not eliminated from the result.
402
+ # exception will be thrown. Duplicates are not eliminated from the
403
+ # result. Resets groups.
276
404
  def intersect_all(other)
277
405
  set_operation(other, :intersect, false)
278
406
  end
@@ -282,7 +410,7 @@ module FatCore
282
410
  # set difference between this table an the other. The headers of this table
283
411
  # are used in the result. There must be the same number of columns of the
284
412
  # same type in the two tables, or an exception will be thrown. Duplicates
285
- # are eliminated from the result.
413
+ # are eliminated from the result. Resets groups.
286
414
  def except(other)
287
415
  set_operation(other, :difference, true)
288
416
  end
@@ -292,7 +420,7 @@ module FatCore
292
420
  # set difference between this table an the other. The headers of this table
293
421
  # are used in the result. There must be the same number of columns of the
294
422
  # same type in the two tables, or an exception will be thrown. Duplicates
295
- # are not eliminated from the result.
423
+ # are not eliminated from the result. Resets groups.
296
424
  def except_all(other)
297
425
  set_operation(other, :difference, false)
298
426
  end
@@ -302,7 +430,10 @@ module FatCore
302
430
  # Apply the set operation given by op between this table and the other table
303
431
  # given in the first argument. If distinct is true, eliminate duplicates
304
432
  # from the result.
305
- def set_operation(other, op = :+, distinct = true)
433
+ def set_operation(other, op = :+,
434
+ distinct: true,
435
+ add_boundaries: false,
436
+ inherit_boundaries: false)
306
437
  unless columns.size == other.columns.size
307
438
  raise 'Cannot apply a set operation to tables with a different number of columns.'
308
439
  end
@@ -312,8 +443,14 @@ module FatCore
312
443
  other_rows = other.rows.map { |r| r.replace_keys(headers) }
313
444
  result = Table.new
314
445
  new_rows = rows.send(op, other_rows)
315
- new_rows.each do |row|
446
+ new_rows.each_with_index do |row, k|
316
447
  result << row
448
+ result.mark_boundary if k == size - 1 && add_boundaries
449
+ end
450
+ if inherit_boundaries
451
+ result.boundaries = normalize_boundaries
452
+ other.normalize_boundaries
453
+ result.append_boundaries(other.boundaries, shift: size)
317
454
  end
318
455
  distinct ? result.distinct : result
319
456
  end
@@ -381,7 +518,7 @@ module FatCore
381
518
  # of all columns in T1 followed by all columns in T2. If the tables
382
519
  # have N and M rows respectively, the joined table will have N * M
383
520
  # rows.
384
- #
521
+ # Resets groups.
385
522
  JOIN_TYPES = [:inner, :left, :right, :full, :cross]
386
523
 
387
524
  def join(other, *exps, join_type: :inner)
@@ -449,6 +586,73 @@ module FatCore
449
586
  join(other, join_type: :cross)
450
587
  end
451
588
 
589
+ # Return a Table with a single row for each group of rows in the input table
590
+ # where the value of all columns named as simple symbols are equal. All
591
+ # other columns are set to the result of aggregating the values of that
592
+ # column within the group according to a aggregate function (:count, :sum,
593
+ # :min, :max, etc.), which defaults to the :first function, giving the value
594
+ # of that column for the first row in the group. You can specify a
595
+ # different aggregate function for a column by adding a hash parameter with
596
+ # the column as the key and a symbol for the aggregate function as the
597
+ # value. For example, consider the following call:
598
+ #
599
+ # tab.group_by(:date, :code, :price, shares: :sum, ).
600
+ #
601
+ # The first three parameters are simple symbols, so the table is divided
602
+ # into groups of rows in which the value of :date, :code, and :price are
603
+ # equal. The shares: hash parameter is set to the aggregate function :sum,
604
+ # so it will appear in the result as the sum of all the :shares values in
605
+ # each group. Any non-aggregate columns that have no aggregate function set
606
+ # default to using the aggregate function :first. Because of the way Ruby
607
+ # parses parameters to a method call, all the grouping symbols must appear
608
+ # first in the parameter list before any hash parameters.
609
+ def group_by(*group_cols, **agg_cols)
610
+ default_agg_func = :first
611
+ default_cols = headers - group_cols - agg_cols.keys
612
+ default_cols.each do |h|
613
+ agg_cols[h] = default_agg_func
614
+ end
615
+
616
+ sorted_tab = order_by(group_cols)
617
+ groups = sorted_tab.rows.group_by do |r|
618
+ group_cols.map { |k| r[k] }
619
+ end
620
+ result = Table.new
621
+ groups.each_pair do |_vals, grp_rows|
622
+ result << row_from_group(grp_rows, group_cols, agg_cols)
623
+ end
624
+ result
625
+ end
626
+
627
+ ############################################################################
628
+ # Footer methods
629
+ ############################################################################
630
+ def add_footer(label: 'Total', aggregate: :sum, heads: [])
631
+ foot = {}
632
+ heads.each do |h|
633
+ raise "No #{h} column in table to #{aggregate}" unless headers.include?(h)
634
+ foot[h] = column(h).send(aggregate)
635
+ end
636
+ @footers[label.as_sym] = foot
637
+ self
638
+ end
639
+
640
+ def add_sum_footer(cols, label = 'Total')
641
+ add_footer(heads: cols)
642
+ end
643
+
644
+ def add_avg_footer(cols, label = 'Average')
645
+ add_footer(label: label, aggregate: :avg, heads: cols)
646
+ end
647
+
648
+ def add_min_footer(cols, label = 'Minimum')
649
+ add_footer(label: label, aggregate: :min, heads: cols)
650
+ end
651
+
652
+ def add_max_footer(cols, label = 'Maximum')
653
+ add_footer(label: label, aggregate: :max, heads: cols)
654
+ end
655
+
452
656
  private
453
657
 
454
658
  # Return an output row appropriate to the given join type, including all the
@@ -585,64 +789,6 @@ module FatCore
585
789
  self
586
790
  end
587
791
 
588
- public
589
-
590
- # Return a Table in which all rows of the table are divided into groups
591
- # where the value of all columns named as simple symbols are equal. All
592
- # other columns are set to the result of aggregating the values of that
593
- # column within the group according to the Column aggregate function (:sum,
594
- # :min, :max, etc.) set in a hash parameter with the non-aggregate column
595
- # name as a key and the symbol for the aggregate function as a value. For
596
- # example, consider the following call:
597
- #
598
- # #+BEGIN_EXAMPLE
599
- # tab.group_by(:date, :code, :price, shares: :sum, ).
600
- # #+END_EXAMPLE
601
- #
602
- # The first three parameters are simple symbols, so the table is divided
603
- # into groups of rows in which the value of :date, :code, and :price are
604
- # equal. The :shares parameter is set to the aggregate function :sum, so it
605
- # will appear in the result as the sum of all the :shares values in each
606
- # group. Any non-aggregate columns that have no aggregate function set
607
- # default to using the aggregate function :first. Note that because of the
608
- # way Ruby parses parameters to a method call, all the grouping symbols must
609
- # appear first in the parameter list.
610
- def group_by(*exprs)
611
- group_cols = []
612
- agg_cols = {}
613
- exprs.each do |xp|
614
- case xp
615
- when Symbol
616
- group_cols << xp
617
- when Hash
618
- agg_cols = xp
619
- else
620
- raise "Cannot group by parameter '#{xp}'"
621
- end
622
- end
623
- default_agg_func = :first
624
- default_cols = headers - group_cols - agg_cols.keys
625
- default_cols.each do |h|
626
- agg_cols[h] = default_agg_func
627
- end
628
-
629
- sorted_tab = order_by(group_cols)
630
- groups = sorted_tab.rows.group_by do |r|
631
- group_cols.map { |k| r[k] }
632
- end
633
- result_rows = []
634
- groups.each_pair do |_vals, grp_rows|
635
- result_rows << row_from_group(grp_rows, group_cols, agg_cols)
636
- end
637
- result = Table.new
638
- result_rows.each do |row|
639
- result << row
640
- end
641
- result
642
- end
643
-
644
- private
645
-
646
792
  def row_from_group(rows, grp_cols, agg_cols)
647
793
  new_row = {}
648
794
  grp_cols.each do |h|
@@ -663,38 +809,14 @@ module FatCore
663
809
 
664
810
  public
665
811
 
666
- def add_footer(label: 'Total', aggregate: :sum, heads: [])
667
- foot = {}
668
- heads.each do |h|
669
- raise "No #{h} column in table to #{aggregate}" unless headers.include?(h)
670
- foot[h] = column(h).send(aggregate)
671
- end
672
- @footers[label.as_sym] = foot
673
- self
674
- end
675
-
676
- def add_sum_footer(cols, label = 'Total')
677
- add_footer(heads: cols)
678
- end
679
-
680
- def add_avg_footer(cols, label = 'Average')
681
- add_footer(label: label, aggregate: :avg, heads: cols)
682
- end
683
-
684
- def add_min_footer(cols, label = 'Minimum')
685
- add_footer(label: label, aggregate: :min, heads: cols)
686
- end
687
-
688
- def add_max_footer(cols, label = 'Maximum')
689
- add_footer(label: label, aggregate: :max, heads: cols)
690
- end
691
-
692
812
  # This returns the table as an Array of Arrays with formatting applied.
693
813
  # This would normally called after all calculations on the table are done
694
814
  # and you want to return the results. The Array of Arrays structure is
695
815
  # what org-mode src blocks will render as an org table in the buffer.
696
816
  def to_org(formats: {})
697
817
  result = []
818
+
819
+ # Headers
698
820
  header_row = []
699
821
  headers.each do |hdr|
700
822
  header_row << hdr.entitle
@@ -703,6 +825,7 @@ module FatCore
703
825
  # This causes org to place an hline under the header row
704
826
  result << nil unless header_row.empty?
705
827
 
828
+ # Body
706
829
  rows.each do |row|
707
830
  out_row = []
708
831
  headers.each do |hdr|
@@ -710,6 +833,8 @@ module FatCore
710
833
  end
711
834
  result << out_row
712
835
  end
836
+
837
+ # Footers
713
838
  footers.each_pair do |label, footer|
714
839
  foot_row = []
715
840
  columns.each do |col|
@@ -726,17 +851,20 @@ module FatCore
726
851
  # Table construction methods.
727
852
  ############################################################################
728
853
 
729
- # Add a row represented by a Hash having the headers as keys. All tables
730
- # should be built ultimately using this method as a primitive.
731
- def add_row(row)
854
+ # Add a row represented by a Hash having the headers as keys. If mark is
855
+ # true, mark this row as a boundary. All tables should be built ultimately
856
+ # using this method as a primitive.
857
+ def add_row(row, mark: false)
732
858
  row.each_pair do |k, v|
733
859
  key = k.as_sym
734
860
  columns << Column.new(header: k) unless column?(k)
735
861
  column(key) << v
736
862
  end
863
+ @boundaries << (size - 1) if mark
737
864
  self
738
865
  end
739
866
 
867
+ # Add a row without marking.
740
868
  def <<(row)
741
869
  add_row(row)
742
870
  end
@@ -753,24 +881,41 @@ module FatCore
753
881
  # respond to #to_hash.
754
882
  def from_array_of_hashes(rows)
755
883
  rows.each do |row|
884
+ if row.nil?
885
+ mark_boundary
886
+ next
887
+ end
756
888
  add_row(row.to_hash)
757
889
  end
758
890
  self
759
891
  end
760
892
 
893
+ # Construct a new table from an array of arrays. If the second element of
894
+ # the array is a nil, a string that looks like an hrule, or an array whose
895
+ # first element is a string that looks like an hrule, interpret the first
896
+ # element of the array as a row of headers. Otherwise, synthesize headers of
897
+ # the form "col1", "col2", ... and so forth. The remaining elements are
898
+ # taken as the body of the table, except that if an element of the outer
899
+ # array is a nil or a string that looks like an hrule, mark the preceding
900
+ # row as a boundary.
761
901
  def from_array_of_arrays(rows)
902
+ hrule_re = /\A\s*\|[-+]+/
762
903
  headers = []
763
- if rows[0].any? { |itm| itm.to_s.number? }
764
- headers = (1..rows[0].size).to_a.map { |k| "col#{k}".as_sym }
765
- first_data_row = 0
766
- else
904
+ if rows[1].nil? || rows[1] =~ hrule_re || rows[1].first =~ hrule_re
905
+ # Take the first row as headers
767
906
  # Use first row 0 as headers
768
907
  headers = rows[0].map(&:as_sym)
769
- first_data_row = 1
908
+ first_data_row = 2
909
+ else
910
+ # Synthesize headers
911
+ headers = (1..rows[0].size).to_a.map { |k| "col#{k}".as_sym }
912
+ first_data_row = 0
770
913
  end
771
- hrule_re = /\A\s*\|[-+]+/
772
914
  rows[first_data_row..-1].each do |row|
773
- next if row[0] =~ hrule_re
915
+ if row.nil? || row[0] =~ hrule_re
916
+ mark_boundary
917
+ next
918
+ end
774
919
  row = row.map { |s| s.to_s.strip }
775
920
  hash_row = Hash[headers.zip(row)]
776
921
  add_row(hash_row)
@@ -797,18 +942,27 @@ module FatCore
797
942
  unless table_found
798
943
  # Skip through the file until a table is found
799
944
  next unless line =~ table_re
945
+ unless line =~ hrule_re
946
+ line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
947
+ rows << line.split('|').map(&:clean)
948
+ end
800
949
  table_found = true
950
+ next
801
951
  end
802
952
  break unless line =~ table_re
803
953
  if !header_found && line =~ hrule_re
954
+ rows << nil
804
955
  header_found = true
805
956
  next
806
957
  elsif header_found && line =~ hrule_re
958
+ # Mark the boundary with a nil
959
+ rows << nil
960
+ elsif line !~ table_re
807
961
  # Stop reading at the second hline
808
962
  break
809
963
  else
810
964
  line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
811
- rows << line.split('|')
965
+ rows << line.split('|').map(&:clean)
812
966
  end
813
967
  end
814
968
  from_array_of_arrays(rows)
@@ -1,7 +1,7 @@
1
1
  module FatCore
2
2
  MAJOR = 1
3
3
  MINOR = 5
4
- PATCH = 1
4
+ PATCH = 2
5
5
 
6
6
  VERSION = [MAJOR, MINOR, PATCH].compact.join('.')
7
7
  end
@@ -124,6 +124,33 @@ EOS
124
124
  | 42 | 2013-05-30 | S | 6,679 | 18 | 25.04710 | ZMEAC |
125
125
 
126
126
  * Another Heading
127
+ EOS
128
+
129
+ @org_file_body_with_groups = <<EOS
130
+
131
+ #+TBLNAME: morgan_tab
132
+ |-----+------------+------+---------+--------+----------+--------|
133
+ | Ref | Date | Code | Raw | Shares | Price | Info |
134
+ |-----+------------+------+---------+--------+----------+--------|
135
+ | 29 | 2013-05-02 | P | 795,546 | 2,609 | 1.18500 | ZMPEF1 |
136
+ |-----+------------+------+---------+--------+----------+--------|
137
+ | 30 | 2013-05-02 | P | 118,186 | 388 | 11.85000 | ZMPEF1 |
138
+ | 31 | 2013-05-02 | P | 340,948 | 1,926 | 1.18500 | ZMPEF2 |
139
+ | 32 | 2013-05-02 | P | 50,651 | 286 | 11.85000 | ZMPEF2 |
140
+ |-----+------------+------+---------+--------+----------+--------|
141
+ | 33 | 2013-05-20 | S | 12,000 | 32 | 28.28040 | ZMEAC |
142
+ | 34 | 2013-05-20 | S | 85,000 | 226 | 28.32240 | ZMEAC |
143
+ | 35 | 2013-05-20 | S | 33,302 | 88 | 28.63830 | ZMEAC |
144
+ | 36 | 2013-05-23 | S | 8,000 | 21 | 27.10830 | ZMEAC |
145
+ | 37 | 2013-05-23 | S | 23,054 | 61 | 26.80150 | ZMEAC |
146
+ | 38 | 2013-05-23 | S | 39,906 | 106 | 25.17490 | ZMEAC |
147
+ | 39 | 2013-05-29 | S | 13,459 | 36 | 24.74640 | ZMEAC |
148
+ |-----+------------+------+---------+--------+----------+--------|
149
+ | 40 | 2013-05-29 | S | 15,700 | 42 | 24.77900 | ZMEAC |
150
+ | 41 | 2013-05-29 | S | 15,900 | 42 | 24.58020 | ZMEAC |
151
+ | 42 | 2013-05-30 | S | 6,679 | 18 | 25.04710 | ZMEAC |
152
+ |-----+------------+------+---------+--------+----------+--------|
153
+
127
154
  EOS
128
155
  end
129
156
 
@@ -171,6 +198,28 @@ EOS
171
198
  end
172
199
  end
173
200
 
201
+ it 'should be create-able from an Org IO object with groups' do
202
+ tab = Table.new(StringIO.new(@org_file_body), '.org')
203
+ expect(tab.class).to eq(Table)
204
+ expect(tab.rows.size).to be > 10
205
+ expect(tab.headers.sort)
206
+ .to eq [:code, :date, :info, :price, :raw, :ref, :shares]
207
+ tab.rows.each do |row|
208
+ row.each_pair do |k, _v|
209
+ expect(k.class).to eq Symbol
210
+ end
211
+ expect(row[:code].class).to eq String
212
+ expect(row[:date].class).to eq Date
213
+ expect(row[:shares].is_a?(Numeric)).to be true
214
+ unless row[:rawshares].nil?
215
+ expect(row[:rawshares].is_a?(Numeric)).to be true
216
+ end
217
+ expect(row[:price].is_a?(BigDecimal)).to be true
218
+ expect([Numeric, String].any? { |t| row[:ref].is_a?(t) }).to be true
219
+ expect(row[:info].class).to eq String
220
+ end
221
+ end
222
+
174
223
  it 'should be create-able from a CSV file' do
175
224
  File.open('/tmp/junk.csv', 'w') { |f| f.write(@csv_file_body) }
176
225
  tab = Table.new('/tmp/junk.csv')
@@ -241,9 +290,10 @@ EOS
241
290
  end
242
291
  end
243
292
 
244
- it 'should be create-able from an Array of Arrays with header no hrule' do
293
+ it 'should be create-able from an Array of Arrays with nil-marked header' do
245
294
  aoa = [
246
295
  ['First', 'Second', 'Third'],
296
+ nil,
247
297
  ['1', '2', '3.2'],
248
298
  ['4', '5', '6.4'],
249
299
  ['7', '8', '9.0'],
@@ -633,6 +683,7 @@ EOS
633
683
  it 'should select by boolean columns' do
634
684
  tab =
635
685
  [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
686
+ nil,
636
687
  [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
637
688
  [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
638
689
  [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
@@ -770,6 +821,168 @@ EOS
770
821
  end
771
822
  end
772
823
 
824
+ describe 'group boundaries' do
825
+ before :all do
826
+ @tab_a = Table.new([
827
+ { id: 1, name: 'Paul', age: 32, address: 'California', salary: 20000, join_date: '2001-07-13' },
828
+ { id: 3, name: 'Teddy', age: 23, address: 'Norway', salary: 20000},
829
+ { id: 4, name: 'Mark', age: 25, address: 'Rich-Mond', salary: 65000, join_date: '2007-12-13' },
830
+ { id: 5, name: 'David', age: 27, address: 'Texas', salary: 85000, join_date: '2007-12-13' },
831
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
832
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
833
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
834
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' }
835
+ ])
836
+ # Union compatible with tab_a
837
+ @tab_a1 = Table.new([
838
+ { id: 21, name: 'Paula', age: 23, address: 'Kansas', salary: 20000, join_date: '2001-07-13' },
839
+ { id: 23, name: 'Jenny', age: 32, address: 'Missouri', salary: 20000},
840
+ { id: 24, name: 'Forrest', age: 52, address: 'Richmond', salary: 65000, join_date: '2007-12-13' },
841
+ { id: 25, name: 'Syrano', age: 72, address: 'Nebraska', salary: 85000, join_date: '2007-12-13' },
842
+ # Next four are the same as row as in @tab_a
843
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
844
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
845
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
846
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' },
847
+ { id: 22, name: 'Paula', age: 52, address: 'Iowa', salary: nil, join_date: '2007-12-13' },
848
+ { id: 28, name: 'Paula', age: 42, address: 'Oklahoma', salary: 20000, join_date: '2005-07-13' },
849
+ { id: 29, name: 'Patrick', age: 44, address: 'Lindsbourg', salary: 5000, join_date: '2005-07-13' },
850
+ { id: 30, name: 'James', age: 54, address: 'Ottawa', salary: 5000, join_date: '2005-07-13' }
851
+ ])
852
+ @tab_b = Table.new([
853
+ { id: 1, dept: 'IT Billing', emp_id: 1 },
854
+ { id: 2, dept: 'Engineering', emp_id: 2 },
855
+ { id: 3, dept: 'Finance', emp_id: 7 }
856
+ ])
857
+ @aoa =
858
+ [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
859
+ nil,
860
+ [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
861
+ nil,
862
+ [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
863
+ [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
864
+ [8, '2013-05-20', 'S', 85_000.00, 35_742.50, 28.3224, 'ZMEAC', 'T'],
865
+ nil,
866
+ [9, '2013-05-20', 'S', 33_302.00, 14_003.49, 28.6383, 'ZMEAC', 'T'],
867
+ [10, '2013-05-23', 'S', 8000.00, 3364.00, 27.1083, 'ZMEAC', 'T'],
868
+ [11, '2013-05-23', 'S', 23_054.00, 9694.21, 26.8015, 'ZMEAC', 'F'],
869
+ [12, '2013-05-23', 'S', 39_906.00, 16_780.47, 25.1749, 'ZMEAC', 'T'],
870
+ [13, '2013-05-29', 'S', 13_459.00, 5659.51, 24.7464, 'ZMEAC', 'T'],
871
+ [14, '2013-05-29', 'S', 15_700.00, 6601.85, 24.7790, 'ZMEAC', 'F'],
872
+ [15, '2013-05-29', 'S', 15_900.00, 6685.95, 24.5802, 'ZMEAC', 'T'],
873
+ nil,
874
+ [16, '2013-05-30', 'S', 6_679.00, 2808.52, 25.0471, 'ZMEAC', 'T']]
875
+ @aoh = [
876
+ { id: 1, name: 'Paul', age: 32, address: 'California', salary: 20000, join_date: '2001-07-13' },
877
+ nil,
878
+ { id: 3, name: 'Teddy', age: 23, address: 'Norway', salary: 20000},
879
+ { id: 4, name: 'Mark', age: 25, address: 'Rich-Mond', salary: 65000, join_date: '2007-12-13' },
880
+ { id: 5, name: 'David', age: 27, address: 'Texas', salary: 85000, join_date: '2007-12-13' },
881
+ nil,
882
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
883
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
884
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
885
+ nil,
886
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' }
887
+ ]
888
+ end
889
+
890
+ it 'an empty table should have no groups' do
891
+ expect(Table.new.groups.size).to eq(0)
892
+ end
893
+
894
+ it 'default group boundaries of whole table' do
895
+ expect(@tab_a.groups.size).to eq(1)
896
+ end
897
+
898
+ it 'add group boundaries on reading from org text' do
899
+ tab = Table.new(StringIO.new(@org_file_body_with_groups), '.org')
900
+ expect(tab.groups.size).to eq(4)
901
+ expect(tab.groups[0].size).to eq(1)
902
+ expect(tab.groups[1].size).to eq(3)
903
+ expect(tab.groups[2].size).to eq(7)
904
+ expect(tab.groups[3].size).to eq(3)
905
+ end
906
+
907
+ it 'add group boundaries on reading from aoa' do
908
+ tab = Table.new(@aoa)
909
+ expect(tab.groups.size).to eq(4)
910
+ expect(tab.groups[0].size).to eq(1)
911
+ expect(tab.groups[1].size).to eq(3)
912
+ expect(tab.groups[2].size).to eq(7)
913
+ expect(tab.groups[3].size).to eq(1)
914
+ end
915
+
916
+ it 'add group boundaries on reading from aoh' do
917
+ tab = Table.new(@aoh)
918
+ expect(tab.groups.size).to eq(4)
919
+ expect(tab.groups[0].size).to eq(1)
920
+ expect(tab.groups[1].size).to eq(3)
921
+ expect(tab.groups[2].size).to eq(3)
922
+ expect(tab.groups[3].size).to eq(1)
923
+ end
924
+
925
+ it 'add group boundaries on order_by' do
926
+ tab = @tab_a.order_by(:name)
927
+ # Now the table is ordered by name, and the names are: Allen, David,
928
+ # James, James, Mark, Paul, Paul, Teddy. So there are groups of size 1,
929
+ # 1, 2, 1, 2, and 1. Six groups in all.
930
+ expect(tab.groups.size).to eq(6)
931
+ expect(tab.groups[0].size).to eq(1)
932
+ expect(tab.groups[1].size).to eq(1)
933
+ expect(tab.groups[2].size).to eq(2)
934
+ tab.groups[2].each do |row|
935
+ expect(row[:name]).to eq('James')
936
+ end
937
+ expect(tab.groups[3].size).to eq(1)
938
+ expect(tab.groups[4].size).to eq(2)
939
+ tab.groups[4].each do |row|
940
+ expect(row[:name]).to eq('Paul')
941
+ end
942
+ expect(tab.groups[5].size).to eq(1)
943
+ end
944
+
945
+ it 'add group boundaries on union_all' do
946
+ tab = @tab_a.union_all(@tab_a1)
947
+ expect(tab.size).to eq(20)
948
+ expect(tab.groups.size).to eq(2)
949
+ expect(tab.groups[0].size).to eq(8)
950
+ expect(tab.groups[1].size).to eq(12)
951
+ end
952
+
953
+ it 'inherit group boundaries on union_all' do
954
+ tab1 = @tab_a.order_by(:name)
955
+ tab2 = @tab_a1.order_by(:name)
956
+ tab = tab1.union_all(tab2)
957
+ expect(tab.size).to eq(20)
958
+ expect(tab.groups.size).to eq(tab1.groups.size + tab2.groups.size)
959
+ tab.groups.each do |grp|
960
+ names = grp.map {|r| r[:name]}
961
+ expect(names.uniq.size).to eq(1)
962
+ end
963
+ end
964
+
965
+ it 'inherit group boundaries on select' do
966
+ tab = @tab_a.order_by(:name).select(:name, :age, :join_date)
967
+ # Now the table is ordered by name, and the names are: Allen, David,
968
+ # James, James, Mark, Paul, Paul, Teddy. So there are groups of size 1,
969
+ # 1, 2, 1, 2, and 1. Six groups in all.
970
+ expect(tab.groups.size).to eq(6)
971
+ expect(tab.groups[0].size).to eq(1)
972
+ expect(tab.groups[1].size).to eq(1)
973
+ expect(tab.groups[2].size).to eq(2)
974
+ tab.groups[2].each do |row|
975
+ expect(row[:name]).to eq('James')
976
+ end
977
+ expect(tab.groups[3].size).to eq(1)
978
+ expect(tab.groups[4].size).to eq(2)
979
+ tab.groups[4].each do |row|
980
+ expect(row[:name]).to eq('Paul')
981
+ end
982
+ expect(tab.groups[5].size).to eq(1)
983
+ end
984
+ end
985
+
773
986
  describe 'output' do
774
987
  it 'should be able to return itself as an array of arrays' do
775
988
  aoh = [
@@ -789,6 +1002,7 @@ EOS
789
1002
  # blocks.
790
1003
  tab =
791
1004
  [['Ref', 'Date', 'Code', 'Raw', 'Shares', 'Price', 'Info', 'Bool'],
1005
+ nil,
792
1006
  [1, '2013-05-02', 'P', 795_546.20, 795_546.2, 1.1850, 'ZMPEF1', 'T'],
793
1007
  [2, '2013-05-02', 'P', 118_186.40, 118_186.4, 11.8500, 'ZMPEF1', 'T'],
794
1008
  [7, '2013-05-20', 'S', 12_000.00, 5046.00, 28.2804, 'ZMEAC', 'F'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fat_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel E. Doherty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-03 00:00:00.000000000 Z
11
+ date: 2017-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: simplecov