fat_table 0.4.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,7 +53,17 @@ module FatTable
53
53
  class Table
54
54
  # An Array of FatTable::Columns that constitute the table.
55
55
  attr_reader :columns
56
- attr_accessor :boundaries
56
+
57
+ # Record boundaries set explicitly with mark_boundaries or from reading
58
+ # hlines from input. When we want to access boundaries, however, we want
59
+ # to add an implict boundary at the last row of the table. Since, as the
60
+ # table grows, the implict boundary changes index, we synthesize the
61
+ # boundaries by dynamically adding the final boundary with the #boundaries
62
+ # method call.
63
+ attr_accessor :explicit_boundaries
64
+
65
+ # An Array of FatTable::Columns that should be tolerant.
66
+ attr_reader :tolerant_columns
57
67
 
58
68
  ###########################################################################
59
69
  # Constructors
@@ -61,19 +71,71 @@ module FatTable
61
71
 
62
72
  # :category: Constructors
63
73
 
64
- # Return an empty FatTable::Table object.
65
- def initialize
74
+ # Return an empty FatTable::Table object. Specifying headers is optional.
75
+ # Any headers ending with a ! are marked as tolerant, in that, if an
76
+ # incompatible type is added to it, the column is re-typed as a String
77
+ # column, and construction proceeds. The ! is stripped from the header to
78
+ # form the column key, though. You can also provide the names of columns
79
+ # that should be tolerant by using the +tolerant_columns key-word to
80
+ # provide an array of headers that should be tolerant. The special string
81
+ # '*' or the symbol :* indicates that all columns should be created
82
+ # tolerant.
83
+ def initialize(*heads, tolerant_columns: [])
84
+ @columns = []
85
+ @explicit_boundaries = []
86
+ @tolerant_columns =
87
+ case tolerant_columns
88
+ when Array
89
+ tolerant_columns.map { |h| h.to_s.as_sym }
90
+ when String
91
+ if tolerant_columns.strip == '*'
92
+ ['*'.to_sym]
93
+ else
94
+ [tolerant_columns.as_sym]
95
+ end
96
+ when Symbol
97
+ if tolerant_columns.to_s.strip == '*'
98
+ ['*'.to_sym]
99
+ else
100
+ [tolerant_columns.to_s.as_sym]
101
+ end
102
+ else
103
+ raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either"
104
+ end
105
+ unless heads.empty?
106
+ heads.each do |h|
107
+ if h.to_s.end_with?('!') || @tolerant_columns.include?(h)
108
+ @columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), tolerant: true)
109
+ else
110
+ @columns << Column.new(header: h)
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ # :category: Constructors
117
+
118
+ # Return an empty duplicate of self. This allows the library to create an
119
+ # empty table that preserves all the instance variables from self. Even
120
+ # though FatTable::Table objects have no instance variables, a class that
121
+ # inherits from it might.
122
+ def empty_dup
123
+ self.dup.__empty!
124
+ end
125
+
126
+ def __empty!
66
127
  @columns = []
67
- @boundaries = []
128
+ @explicit_boundaries = []
129
+ self
68
130
  end
69
131
 
70
132
  # :category: Constructors
71
133
 
72
134
  # Construct a Table from the contents of a CSV file named +fname+. Headers
73
135
  # will be taken from the first CSV row and converted to symbols.
74
- def self.from_csv_file(fname)
136
+ def self.from_csv_file(fname, tolerant_columns: [])
75
137
  File.open(fname, 'r') do |io|
76
- from_csv_io(io)
138
+ from_csv_io(io, tolerant_columns: tolerant_columns)
77
139
  end
78
140
  end
79
141
 
@@ -81,8 +143,8 @@ module FatTable
81
143
 
82
144
  # Construct a Table from a CSV string +str+, treated in the same manner as
83
145
  # the input from a CSV file in ::from_org_file.
84
- def self.from_csv_string(str)
85
- from_csv_io(StringIO.new(str))
146
+ def self.from_csv_string(str, tolerant_columns: [])
147
+ from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns)
86
148
  end
87
149
 
88
150
  # :category: Constructors
@@ -91,9 +153,9 @@ module FatTable
91
153
  # file named +fname+. Headers are taken from the first row if the second row
92
154
  # is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+,
93
155
  # etc. are created.
94
- def self.from_org_file(fname)
156
+ def self.from_org_file(fname, tolerant_columns: [])
95
157
  File.open(fname, 'r') do |io|
96
- from_org_io(io)
158
+ from_org_io(io, tolerant_columns: tolerant_columns)
97
159
  end
98
160
  end
99
161
 
@@ -101,8 +163,8 @@ module FatTable
101
163
 
102
164
  # Construct a Table from a string +str+, treated in the same manner as the
103
165
  # contents of an org-mode file in ::from_org_file.
104
- def self.from_org_string(str)
105
- from_org_io(StringIO.new(str))
166
+ def self.from_org_string(str, tolerant_columns: [])
167
+ from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns)
106
168
  end
107
169
 
108
170
  # :category: Constructors
@@ -121,8 +183,8 @@ module FatTable
121
183
  # :hlines no +) org-mode strips all hrules from the table; otherwise (+
122
184
  # HEADER: :hlines yes +) they are indicated with nil elements in the outer
123
185
  # array.
124
- def self.from_aoa(aoa, hlines: false)
125
- from_array_of_arrays(aoa, hlines: hlines)
186
+ def self.from_aoa(aoa, hlines: false, tolerant_columns: [])
187
+ from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns)
126
188
  end
127
189
 
128
190
  # :category: Constructors
@@ -132,9 +194,9 @@ module FatTable
132
194
  # keys, which, when converted to symbols will become the headers for the
133
195
  # Table. If hlines is set true, mark a group boundary whenever a nil, rather
134
196
  # than a hash appears in the outer array.
135
- def self.from_aoh(aoh, hlines: false)
197
+ def self.from_aoh(aoh, hlines: false, tolerant_columns: [])
136
198
  if aoh.first.respond_to?(:to_h)
137
- from_array_of_hashes(aoh, hlines: hlines)
199
+ from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns)
138
200
  else
139
201
  raise UserError,
140
202
  "Cannot initialize Table with an array of #{input[0].class}"
@@ -153,7 +215,7 @@ module FatTable
153
215
 
154
216
  # Construct a Table by running a SQL +query+ against the database set up
155
217
  # with FatTable.connect, with the rows of the query result as rows.
156
- def self.from_sql(query)
218
+ def self.from_sql(query, tolerant_columns: [])
157
219
  msg = 'FatTable.db must be set with FatTable.connect'
158
220
  raise UserError, msg if FatTable.db.nil?
159
221
 
@@ -175,8 +237,8 @@ module FatTable
175
237
  # Construct table from an array of hashes or an array of any object that
176
238
  # can respond to #to_h. If an array element is a nil, mark it as a group
177
239
  # boundary in the Table.
178
- def from_array_of_hashes(hashes, hlines: false)
179
- result = new
240
+ def from_array_of_hashes(hashes, hlines: false, tolerant_columns: [])
241
+ result = new(tolerant_columns: tolerant_columns)
180
242
  hashes.each do |hsh|
181
243
  if hsh.nil?
182
244
  unless hlines
@@ -188,6 +250,7 @@ module FatTable
188
250
  end
189
251
  result << hsh.to_h
190
252
  end
253
+ result.normalize_boundaries
191
254
  result
192
255
  end
193
256
 
@@ -203,8 +266,8 @@ module FatTable
203
266
  # hlines are stripped from the table, otherwise (:hlines yes) they are
204
267
  # indicated with nil elements in the outer array as expected by this
205
268
  # method when hlines is set true.
206
- def from_array_of_arrays(rows, hlines: false)
207
- result = new
269
+ def from_array_of_arrays(rows, hlines: false, tolerant_columns: [])
270
+ result = new(tolerant_columns: tolerant_columns)
208
271
  headers = []
209
272
  if !hlines
210
273
  # Take the first row as headers
@@ -236,15 +299,17 @@ module FatTable
236
299
  hash_row = Hash[headers.zip(row)]
237
300
  result << hash_row
238
301
  end
302
+ result.normalize_boundaries
239
303
  result
240
304
  end
241
305
 
242
- def from_csv_io(io)
243
- result = new
306
+ def from_csv_io(io, tolerant_columns: [])
307
+ result = new(tolerant_columns: tolerant_columns)
244
308
  ::CSV.new(io, headers: true, header_converters: :symbol,
245
309
  skip_blanks: true).each do |row|
246
310
  result << row.to_h
247
311
  end
312
+ result.normalize_boundaries
248
313
  result
249
314
  end
250
315
 
@@ -252,7 +317,7 @@ module FatTable
252
317
  # header row must be marked with an hline (i.e, a row that looks like
253
318
  # '|---+--...--|') and groups of rows may be marked with hlines to
254
319
  # indicate group boundaries.
255
- def from_org_io(io)
320
+ def from_org_io(io, tolerant_columns: [])
256
321
  table_re = /\A\s*\|/
257
322
  hrule_re = /\A\s*\|[-+]+/
258
323
  rows = []
@@ -287,7 +352,7 @@ module FatTable
287
352
  rows << line.split('|').map(&:clean)
288
353
  end
289
354
  end
290
- from_array_of_arrays(rows, hlines: true)
355
+ from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns)
291
356
  end
292
357
  end
293
358
 
@@ -317,8 +382,11 @@ module FatTable
317
382
  # Set the column type for Column with the given +key+ as a String type,
318
383
  # but only if empty. Otherwise, we would have to worry about converting
319
384
  # existing items in the column to String. Perhaps that's a TODO.
320
- def set_column_to_string_type(key)
321
- column(key).force_to_string_type
385
+ def force_string!(*keys)
386
+ keys.each do |h|
387
+ column(h).force_string!
388
+ end
389
+ self
322
390
  end
323
391
 
324
392
  # :category: Attributes
@@ -378,6 +446,15 @@ module FatTable
378
446
 
379
447
  # :category: Attributes
380
448
 
449
+ # Return whether the column with the given head should be made tolerant.
450
+ def tolerant_col?(h)
451
+ return true if tolerant_columns.include?(:'*')
452
+
453
+ tolerant_columns.include?(h)
454
+ end
455
+
456
+ # :category: Attributes
457
+
381
458
  # Return the number of rows in the Table.
382
459
  def size
383
460
  return 0 if columns.empty?
@@ -426,8 +503,6 @@ module FatTable
426
503
  # large table, that would require that we construct all the rows for a range
427
504
  # of any size.
428
505
  def rows_range(first = 0, last = nil) # :nodoc:
429
- last ||= size - 1
430
- last = [last, 0].max
431
506
  raise UserError, 'first must be <= last' unless first <= last
432
507
 
433
508
  rows = []
@@ -473,6 +548,8 @@ module FatTable
473
548
  # the headers from the body) marks a boundary for the row immediately
474
549
  # preceding the hline.
475
550
  #
551
+ # Boundaries can also be added manually with the +mark_boundary+ method.
552
+ #
476
553
  # The #order_by method resets the boundaries then adds boundaries at the
477
554
  # last row of each group of rows on which the sort keys were equal as a
478
555
  # boundary.
@@ -506,6 +583,43 @@ module FatTable
506
583
  groups
507
584
  end
508
585
 
586
+ # Return the number of groups in the table.
587
+ def number_of_groups
588
+ empty? ? 0 : boundaries.size
589
+ end
590
+
591
+ # Return the range of row indexes for boundary number +k+
592
+ def group_row_range(k)
593
+ last_k = boundaries.size - 1
594
+ if k < 0 || k > last_k
595
+ raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range"
596
+ end
597
+
598
+ if boundaries.size == 1
599
+ (0..boundaries.first)
600
+ elsif k.zero?
601
+ # Keep index at or above zero
602
+ (0..boundaries[k])
603
+ else
604
+ ((boundaries[k - 1] + 1)..boundaries[k])
605
+ end
606
+ end
607
+
608
+ # Return an Array of Column objects for header +col+ representing a
609
+ # sub-column for each group in the table under that header.
610
+ def group_cols(col)
611
+ normalize_boundaries
612
+ cols = []
613
+ (0..boundaries.size - 1).each do |k|
614
+ range = group_row_range(k)
615
+ tab_col = column(col)
616
+ gitems = tab_col.items[range]
617
+ cols << Column.new(header: col, items: gitems,
618
+ type: tab_col.type, tolerant: tab_col.tolerant?)
619
+ end
620
+ cols
621
+ end
622
+
509
623
  # :category: Operators
510
624
 
511
625
  # Return this table mutated with all groups removed. Useful after something
@@ -513,56 +627,99 @@ module FatTable
513
627
  # the groups displayed in the output. This modifies the input table, so is a
514
628
  # departure from the otherwise immutability of Tables.
515
629
  def degroup!
516
- @boundaries = []
630
+ self.explicit_boundaries = []
517
631
  self
518
632
  end
519
633
 
520
634
  # Mark a group boundary at row +row+, and if +row+ is +nil+, mark the last
521
- # row in the table as a group boundary. This is mainly used for internal
522
- # purposes.
523
- def mark_boundary(row = nil) # :nodoc:
524
- if row
525
- boundaries.push(row)
526
- else
527
- boundaries.push(size - 1)
635
+ # row in the table as a group boundary. An attempt to add a boundary to
636
+ # an empty table has no effect. We adopt the convention that the last row
637
+ # of the table always marks an implicit boundary even if it is not in the
638
+ # @explicit_boundaries array. When we "mark" a boundary, we intend it to
639
+ # be an explicit boundary, even if it marks the last row of the table.
640
+ def mark_boundary(row_num = nil)
641
+ return self if empty?
642
+
643
+ if row_num
644
+ unless row_num < size
645
+ raise ArgumentError, "can't mark boundary at row #{row_num}, last row is #{size - 1}"
646
+ end
647
+ unless row_num >= 0
648
+ raise ArgumentError, "can't mark boundary at non-positive row #{row_num}"
649
+ end
650
+ explicit_boundaries.push(row_num)
651
+ elsif size > 0
652
+ explicit_boundaries.push(size - 1)
528
653
  end
654
+ normalize_boundaries
655
+ self
529
656
  end
530
657
 
531
- protected
532
-
533
658
  # :stopdoc:
534
659
 
535
660
  # Make sure size - 1 is last boundary and that they are unique and sorted.
536
661
  def normalize_boundaries
537
662
  unless empty?
538
- boundaries.push(size - 1) unless boundaries.include?(size - 1)
539
- self.boundaries = boundaries.uniq.sort
663
+ self.explicit_boundaries = explicit_boundaries.uniq.sort
664
+ end
665
+ explicit_boundaries
666
+ end
667
+
668
+ # Return the explicit_boundaries, augmented by an implicit boundary for
669
+ # the end of the table, unless it's already an implicit boundary.
670
+ def boundaries
671
+ return [] if empty?
672
+
673
+ if explicit_boundaries.last == size - 1
674
+ explicit_boundaries
675
+ else
676
+ explicit_boundaries + [size - 1]
540
677
  end
541
- boundaries
542
678
  end
543
679
 
680
+ protected
681
+
544
682
  # Concatenate the array of argument bounds to this table's boundaries, but
545
683
  # increase each of the indexes in bounds by shift. This is used in the
546
684
  # #union_all method.
547
685
  def append_boundaries(bounds, shift: 0)
548
- @boundaries += bounds.map { |k| k + shift }
686
+ @explicit_boundaries += bounds.map { |k| k + shift }
549
687
  end
550
688
 
551
- # Return the group number to which row ~row~ belongs. Groups, from the
552
- # user's point of view are indexed starting at 1.
553
- def row_index_to_group_index(row)
689
+ # Return the group number to which row ~row_num~ belongs. Groups, from the
690
+ # user's point of view are indexed starting at 0.
691
+ def row_index_to_group_index(row_num)
554
692
  boundaries.each_with_index do |b_last, g_num|
555
- return (g_num + 1) if row <= b_last
693
+ return (g_num + 1) if row_num <= b_last
694
+ end
695
+ 0
696
+ end
697
+
698
+ # Return the index of the first row in group number +grp_num+
699
+ def first_row_num_in_group(grp_num)
700
+ if grp_num >= boundaries.size || grp_num < 0
701
+ raise ArgumentError, "group number #{grp_num} out of bounds"
702
+ end
703
+
704
+ grp_num.zero? ? 0 : boundaries[grp_num - 1] + 1
705
+ end
706
+
707
+ # Return the index of the last row in group number +grp_num+
708
+ def last_row_num_in_group(grp_num)
709
+ if grp_num > boundaries.size || grp_num < 0
710
+ raise ArgumentError, "group number #{grp_num} out of bounds"
711
+ else
712
+ boundaries[grp_num]
556
713
  end
557
- 1
558
714
  end
559
715
 
560
- def group_rows(row) # :nodoc:
716
+ # Return the rows for group number +grp_num+.
717
+ def group_rows(grp_num) # :nodoc:
561
718
  normalize_boundaries
562
- return [] unless row < boundaries.size
719
+ return [] unless grp_num < boundaries.size
563
720
 
564
- first = row.zero? ? 0 : boundaries[row - 1] + 1
565
- last = boundaries[row]
721
+ first = first_row_num_in_group(grp_num)
722
+ last = last_row_num_in_group(grp_num)
566
723
  rows_range(first, last)
567
724
  end
568
725
 
@@ -587,22 +744,43 @@ module FatTable
587
744
  # After sorting, the output Table will have group boundaries added after
588
745
  # each row where the sort key changes.
589
746
  def order_by(*sort_heads)
590
- sort_heads = [sort_heads].flatten
591
- rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
592
- sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
593
- rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
747
+ # Sort the rows in order and add to new_rows.
748
+ key_hash = partition_sort_keys(sort_heads)
594
749
  new_rows = rows.sort do |r1, r2|
595
- key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] }
596
- key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
597
- key1 <=> key2
750
+ # Set the sort keys based on direction
751
+ key1 = []
752
+ key2 = []
753
+ key_hash.each_pair do |h, dir|
754
+ if dir == :forward
755
+ key1 << r1[h]
756
+ key2 << r2[h]
757
+ else
758
+ key1 << r2[h]
759
+ key2 << r1[h]
760
+ end
761
+ end
762
+ # Make any booleans comparable with <=>
763
+ key1 = key1.map_booleans
764
+ key2 = key2.map_booleans
765
+
766
+ # If there are any nils, <=> will return nil, and we have to use the
767
+ # special comparison method, compare_with_nils, instead.
768
+ result = (key1 <=> key2)
769
+ result.nil? ? compare_with_nils(key1, key2) : result
598
770
  end
599
- # Add the new rows to the table, but mark a group boundary at the points
600
- # where the sort key changes value.
601
- new_tab = Table.new
771
+
772
+ # Add the new_rows to the table, but mark a group boundary at the points
773
+ # where the sort key changes value. NB: I use self.class.new here
774
+ # rather than Table.new because if this class is inherited, I want the
775
+ # new_tab to be an instance of the subclass. With Table.new, this
776
+ # method's result will be an instance of FatTable::Table rather than of
777
+ # the subclass.
778
+ new_tab = empty_dup
602
779
  last_key = nil
603
780
  new_rows.each_with_index do |nrow, k|
604
781
  new_tab << nrow
605
- key = nrow.fetch_values(*sort_heads)
782
+ # key = nrow.fetch_values(*sort_heads)
783
+ key = nrow.fetch_values(*key_hash.keys)
606
784
  new_tab.mark_boundary(k - 1) if last_key && key != last_key
607
785
  last_key = key
608
786
  end
@@ -610,6 +788,33 @@ module FatTable
610
788
  new_tab
611
789
  end
612
790
 
791
+ # :category: Operators
792
+
793
+ # Return a new Table sorting the rows of this Table on an any expression
794
+ # +expr+ that is valid with the +select+ method, except that they
795
+ # expression may end with an exclamation mark +!+ to indicate a reverse
796
+ # sort. The new table will have an additional column called +sort_key+
797
+ # populated with the result of evaluating the given expression and will be
798
+ # sorted (or reverse sorted) on that column.
799
+ #
800
+ # tab.order_with('date.year') => table sorted by date's year
801
+ # tab.order_with('date.year!') => table reverse sorted by date's year
802
+ #
803
+ # After sorting, the output Table will have group boundaries added after
804
+ # each row where the sort key changes.
805
+ def order_with(expr)
806
+ unless expr.is_a?(String)
807
+ raise "must call FatTable::Table\#order_with with a single string expression"
808
+ end
809
+ rev = false
810
+ if expr.match?(/\s*!\s*\z/)
811
+ rev = true
812
+ expr = expr.sub(/\s*!\s*\z/, '')
813
+ end
814
+ sort_sym = rev ? :sort_key! : :sort_key
815
+ dup.select(*headers, sort_key: expr).order_by(sort_sym)
816
+ end
817
+
613
818
  # :category: Operators
614
819
  #
615
820
  # Return a Table having the selected column expressions. Each expression can
@@ -713,7 +918,7 @@ module FatTable
713
918
  before: before_hook,
714
919
  after: after_hook)
715
920
  # Compute the new Table from this Table
716
- result = Table.new
921
+ result = empty_dup
717
922
  normalize_boundaries
718
923
  rows.each_with_index do |old_row, old_k|
719
924
  # Set the group number in the before hook and run the hook with the
@@ -723,7 +928,15 @@ module FatTable
723
928
  ev.eval_before_hook(locals: old_row)
724
929
  # Compute the new row.
725
930
  new_row = {}
726
- cols.each do |k|
931
+ # Allow the :omni col to stand for all columns if it is alone and
932
+ # first.
933
+ cols_to_include =
934
+ if cols.size == 1 && cols.first.as_sym == :omni
935
+ headers
936
+ else
937
+ cols
938
+ end
939
+ cols_to_include.each do |k|
727
940
  h = k.as_sym
728
941
  msg = "Column '#{h}' in select does not exist"
729
942
  raise UserError, msg unless column?(h)
@@ -752,7 +965,7 @@ module FatTable
752
965
  ev.eval_after_hook(locals: new_row)
753
966
  result << new_row
754
967
  end
755
- result.boundaries = boundaries
968
+ result.explicit_boundaries = explicit_boundaries
756
969
  result.normalize_boundaries
757
970
  result
758
971
  end
@@ -770,9 +983,14 @@ module FatTable
770
983
  # tab.where('@row.even? && shares > 500') => even rows with lots of shares
771
984
  def where(expr)
772
985
  expr = expr.to_s
773
- result = Table.new
986
+ result = empty_dup
774
987
  headers.each do |h|
775
- col = Column.new(header: h)
988
+ col =
989
+ if tolerant_col?(h)
990
+ Column.new(header: h, tolerant: true)
991
+ else
992
+ Column.new(header: h)
993
+ end
776
994
  result.add_column(col)
777
995
  end
778
996
  ev = Evaluator.new(ivars: { row: 0, group: 0 })
@@ -792,7 +1010,7 @@ module FatTable
792
1010
  # Return a new table with all duplicate rows eliminated. Resets groups. Same
793
1011
  # as #uniq.
794
1012
  def distinct
795
- result = Table.new
1013
+ result = empty_dup
796
1014
  uniq_rows = rows.uniq
797
1015
  uniq_rows.each do |row|
798
1016
  result << row
@@ -889,38 +1107,6 @@ module FatTable
889
1107
  set_operation(other, :difference, distinct: false)
890
1108
  end
891
1109
 
892
- private
893
-
894
- # Apply the set operation given by ~oper~ between this table and the other
895
- # table given in the first argument. If distinct is true, eliminate
896
- # duplicates from the result.
897
- def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
898
- unless columns.size == other.columns.size
899
- msg = "can't apply set ops to tables with a different number of columns"
900
- raise UserError, msg
901
- end
902
- unless columns.map(&:type) == other.columns.map(&:type)
903
- msg = "can't apply a set ops to tables with different column types."
904
- raise UserError, msg
905
- end
906
- other_rows = other.rows.map { |r| r.replace_keys(headers) }
907
- result = Table.new
908
- new_rows = rows.send(oper, other_rows)
909
- new_rows.each_with_index do |row, k|
910
- result << row
911
- result.mark_boundary if k == size - 1 && add_boundaries
912
- end
913
- if inherit_boundaries
914
- result.boundaries = normalize_boundaries
915
- other.normalize_boundaries
916
- result.append_boundaries(other.boundaries, shift: size)
917
- end
918
- result.normalize_boundaries
919
- distinct ? result.distinct : result
920
- end
921
-
922
- public
923
-
924
1110
  # An Array of symbols for the valid join types.
925
1111
  JOIN_TYPES = %i[inner left right full cross].freeze
926
1112
 
@@ -1011,7 +1197,7 @@ module FatTable
1011
1197
  join_exp, other_common_heads =
1012
1198
  build_join_expression(exps, other, join_type)
1013
1199
  ev = Evaluator.new
1014
- result = Table.new
1200
+ result = empty_dup
1015
1201
  other_rows = other.rows
1016
1202
  other_row_matches = Array.new(other_rows.size, false)
1017
1203
  rows.each do |self_row|
@@ -1029,14 +1215,14 @@ module FatTable
1029
1215
  type: join_type)
1030
1216
  result << out_row
1031
1217
  end
1032
- next unless %i[left full].include?(join_type)
1218
+ next unless [:left, :full].include?(join_type)
1033
1219
  next if self_row_matched
1034
1220
 
1035
1221
  result << build_out_row(row_a: self_row,
1036
1222
  row_b: other_row_nils,
1037
1223
  type: join_type)
1038
1224
  end
1039
- if %i[right full].include?(join_type)
1225
+ if [:right, :full].include?(join_type)
1040
1226
  other_rows.each_with_index do |other_row, k|
1041
1227
  next if other_row_matches[k]
1042
1228
 
@@ -1165,7 +1351,7 @@ module FatTable
1165
1351
  partial_result = nil
1166
1352
  else
1167
1353
  # First of a pair of _a or _b
1168
- partial_result = String.new("(#{a_head}_a == ")
1354
+ partial_result = +"(#{a_head}_a == "
1169
1355
  end
1170
1356
  last_sym = a_head
1171
1357
  when /\A(?<sy>.*)_b\z/
@@ -1184,7 +1370,7 @@ module FatTable
1184
1370
  partial_result = nil
1185
1371
  else
1186
1372
  # First of a pair of _a or _b
1187
- partial_result = String.new("(#{b_head}_b == ")
1373
+ partial_result = +"(#{b_head}_b == "
1188
1374
  end
1189
1375
  b_common_heads << b_head
1190
1376
  last_sym = b_head
@@ -1259,7 +1445,7 @@ module FatTable
1259
1445
  groups = sorted_tab.rows.group_by do |r|
1260
1446
  group_cols.map { |k| r[k] }
1261
1447
  end
1262
- result = Table.new
1448
+ result = empty_dup
1263
1449
  groups.each_pair do |_vals, grp_rows|
1264
1450
  result << row_from_group(grp_rows, group_cols, agg_cols)
1265
1451
  end
@@ -1269,6 +1455,9 @@ module FatTable
1269
1455
 
1270
1456
  private
1271
1457
 
1458
+ # Collapse a group of rows to a single row by applying the aggregator from
1459
+ # the +agg_cols+ to the items in that column and the presumably identical
1460
+ # value in the +grp_cols to those columns.
1272
1461
  def row_from_group(rows, grp_cols, agg_cols)
1273
1462
  new_row = {}
1274
1463
  grp_cols.each do |h|
@@ -1291,15 +1480,6 @@ module FatTable
1291
1480
 
1292
1481
  # :category: Constructors
1293
1482
 
1294
- # Add a group boundary mark at the given row, or at the end of the table
1295
- # by default.
1296
- def add_boundary(at_row = nil)
1297
- row = at_row || (size - 1)
1298
- @boundaries << row
1299
- end
1300
-
1301
- # :category: Constructors
1302
-
1303
1483
  # Add a +row+ represented by a Hash having the headers as keys. If +mark:+
1304
1484
  # is set true, mark this row as a boundary. All tables should be built
1305
1485
  # ultimately using this method as a primitive.
@@ -1312,7 +1492,7 @@ module FatTable
1312
1492
  # This column is new, so it needs nil items for all prior rows lest
1313
1493
  # the value be added to a prior row.
1314
1494
  items = Array.new(size, nil)
1315
- columns << Column.new(header: h, items: items)
1495
+ columns << Column.new(header: h, items: items, tolerant: tolerant_col?(h))
1316
1496
  end
1317
1497
  headers.each do |h|
1318
1498
  # NB: This adds a nil if h is not in row.
@@ -1486,5 +1666,74 @@ module FatTable
1486
1666
  yield fmt if block_given?
1487
1667
  fmt.output
1488
1668
  end
1669
+
1670
+ private
1671
+
1672
+ # Apply the set operation given by ~oper~ between this table and the other
1673
+ # table given in the first argument. If distinct is true, eliminate
1674
+ # duplicates from the result.
1675
+ def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
1676
+ unless columns.size == other.columns.size
1677
+ msg = "can't apply set ops to tables with a different number of columns"
1678
+ raise UserError, msg
1679
+ end
1680
+ unless columns.map(&:type) == other.columns.map(&:type)
1681
+ msg = "can't apply a set ops to tables with different column types."
1682
+ raise UserError, msg
1683
+ end
1684
+ other_rows = other.rows.map { |r| r.replace_keys(headers) }
1685
+ result = empty_dup
1686
+ new_rows = rows.send(oper, other_rows)
1687
+ new_rows.each_with_index do |row, k|
1688
+ result << row
1689
+ result.mark_boundary if k == size - 1 && add_boundaries
1690
+ end
1691
+ if inherit_boundaries
1692
+ result.explicit_boundaries = boundaries
1693
+ result.append_boundaries(other.boundaries, shift: size)
1694
+ end
1695
+ result.normalize_boundaries
1696
+ distinct ? result.distinct : result
1697
+ end
1698
+
1699
+ # Return a hash with the key being the header to sort on and the value
1700
+ # being either :forward or :reverse to indicate the sort order on that
1701
+ # key.
1702
+ def partition_sort_keys(keys)
1703
+ result = {}
1704
+ [keys].flatten.each do |h|
1705
+ if h.to_s.match?(/\s*!\s*\z/)
1706
+ result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse
1707
+ else
1708
+ result[h] = :forward
1709
+ end
1710
+ end
1711
+ result
1712
+ end
1713
+
1714
+ # The <=> operator cannot handle nils without some help. Treat a nil as
1715
+ # smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be
1716
+ # compared with <=>.
1717
+ def compare_with_nils(key1, key2)
1718
+ result = nil
1719
+ key1.zip(key2) do |k1, k2|
1720
+ if k1.nil? && k2.nil?
1721
+ result = 0
1722
+ next
1723
+ elsif k1.nil?
1724
+ result = -1
1725
+ break
1726
+ elsif k2.nil?
1727
+ result = 1
1728
+ break
1729
+ elsif (k1 <=> k2) == 0
1730
+ next
1731
+ else
1732
+ result = (k1 <=> k2)
1733
+ break
1734
+ end
1735
+ end
1736
+ result
1737
+ end
1489
1738
  end
1490
1739
  end