fat_table 0.4.0 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,7 +53,17 @@ module FatTable
53
53
  class Table
54
54
  # An Array of FatTable::Columns that constitute the table.
55
55
  attr_reader :columns
56
- attr_accessor :boundaries
56
+
57
+ # Record boundaries set explicitly with mark_boundaries or from reading
58
+ # hlines from input. When we want to access boundaries, however, we want
59
+ # to add an implict boundary at the last row of the table. Since, as the
60
+ # table grows, the implict boundary changes index, we synthesize the
61
+ # boundaries by dynamically adding the final boundary with the #boundaries
62
+ # method call.
63
+ attr_accessor :explicit_boundaries
64
+
65
+ # An Array of FatTable::Columns that should be tolerant.
66
+ attr_reader :tolerant_columns
57
67
 
58
68
  ###########################################################################
59
69
  # Constructors
@@ -61,19 +71,71 @@ module FatTable
61
71
 
62
72
  # :category: Constructors
63
73
 
64
- # Return an empty FatTable::Table object.
65
- def initialize
74
+ # Return an empty FatTable::Table object. Specifying headers is optional.
75
+ # Any headers ending with a ! are marked as tolerant, in that, if an
76
+ # incompatible type is added to it, the column is re-typed as a String
77
+ # column, and construction proceeds. The ! is stripped from the header to
78
+ # form the column key, though. You can also provide the names of columns
79
+ # that should be tolerant by using the +tolerant_columns key-word to
80
+ # provide an array of headers that should be tolerant. The special string
81
+ # '*' or the symbol :* indicates that all columns should be created
82
+ # tolerant.
83
+ def initialize(*heads, tolerant_columns: [])
84
+ @columns = []
85
+ @explicit_boundaries = []
86
+ @tolerant_columns =
87
+ case tolerant_columns
88
+ when Array
89
+ tolerant_columns.map { |h| h.to_s.as_sym }
90
+ when String
91
+ if tolerant_columns.strip == '*'
92
+ ['*'.to_sym]
93
+ else
94
+ [tolerant_columns.as_sym]
95
+ end
96
+ when Symbol
97
+ if tolerant_columns.to_s.strip == '*'
98
+ ['*'.to_sym]
99
+ else
100
+ [tolerant_columns.to_s.as_sym]
101
+ end
102
+ else
103
+ raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either"
104
+ end
105
+ unless heads.empty?
106
+ heads.each do |h|
107
+ if h.to_s.end_with?('!') || @tolerant_columns.include?(h)
108
+ @columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), tolerant: true)
109
+ else
110
+ @columns << Column.new(header: h)
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ # :category: Constructors
117
+
118
+ # Return an empty duplicate of self. This allows the library to create an
119
+ # empty table that preserves all the instance variables from self. Even
120
+ # though FatTable::Table objects have no instance variables, a class that
121
+ # inherits from it might.
122
+ def empty_dup
123
+ self.dup.__empty!
124
+ end
125
+
126
+ def __empty!
66
127
  @columns = []
67
- @boundaries = []
128
+ @explicit_boundaries = []
129
+ self
68
130
  end
69
131
 
70
132
  # :category: Constructors
71
133
 
72
134
  # Construct a Table from the contents of a CSV file named +fname+. Headers
73
135
  # will be taken from the first CSV row and converted to symbols.
74
- def self.from_csv_file(fname)
136
+ def self.from_csv_file(fname, tolerant_columns: [])
75
137
  File.open(fname, 'r') do |io|
76
- from_csv_io(io)
138
+ from_csv_io(io, tolerant_columns: tolerant_columns)
77
139
  end
78
140
  end
79
141
 
@@ -81,8 +143,8 @@ module FatTable
81
143
 
82
144
  # Construct a Table from a CSV string +str+, treated in the same manner as
83
145
  # the input from a CSV file in ::from_org_file.
84
- def self.from_csv_string(str)
85
- from_csv_io(StringIO.new(str))
146
+ def self.from_csv_string(str, tolerant_columns: [])
147
+ from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns)
86
148
  end
87
149
 
88
150
  # :category: Constructors
@@ -91,9 +153,9 @@ module FatTable
91
153
  # file named +fname+. Headers are taken from the first row if the second row
92
154
  # is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+,
93
155
  # etc. are created.
94
- def self.from_org_file(fname)
156
+ def self.from_org_file(fname, tolerant_columns: [])
95
157
  File.open(fname, 'r') do |io|
96
- from_org_io(io)
158
+ from_org_io(io, tolerant_columns: tolerant_columns)
97
159
  end
98
160
  end
99
161
 
@@ -101,8 +163,8 @@ module FatTable
101
163
 
102
164
  # Construct a Table from a string +str+, treated in the same manner as the
103
165
  # contents of an org-mode file in ::from_org_file.
104
- def self.from_org_string(str)
105
- from_org_io(StringIO.new(str))
166
+ def self.from_org_string(str, tolerant_columns: [])
167
+ from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns)
106
168
  end
107
169
 
108
170
  # :category: Constructors
@@ -121,8 +183,8 @@ module FatTable
121
183
  # :hlines no +) org-mode strips all hrules from the table; otherwise (+
122
184
  # HEADER: :hlines yes +) they are indicated with nil elements in the outer
123
185
  # array.
124
- def self.from_aoa(aoa, hlines: false)
125
- from_array_of_arrays(aoa, hlines: hlines)
186
+ def self.from_aoa(aoa, hlines: false, tolerant_columns: [])
187
+ from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns)
126
188
  end
127
189
 
128
190
  # :category: Constructors
@@ -132,9 +194,9 @@ module FatTable
132
194
  # keys, which, when converted to symbols will become the headers for the
133
195
  # Table. If hlines is set true, mark a group boundary whenever a nil, rather
134
196
  # than a hash appears in the outer array.
135
- def self.from_aoh(aoh, hlines: false)
197
+ def self.from_aoh(aoh, hlines: false, tolerant_columns: [])
136
198
  if aoh.first.respond_to?(:to_h)
137
- from_array_of_hashes(aoh, hlines: hlines)
199
+ from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns)
138
200
  else
139
201
  raise UserError,
140
202
  "Cannot initialize Table with an array of #{input[0].class}"
@@ -153,7 +215,7 @@ module FatTable
153
215
 
154
216
  # Construct a Table by running a SQL +query+ against the database set up
155
217
  # with FatTable.connect, with the rows of the query result as rows.
156
- def self.from_sql(query)
218
+ def self.from_sql(query, tolerant_columns: [])
157
219
  msg = 'FatTable.db must be set with FatTable.connect'
158
220
  raise UserError, msg if FatTable.db.nil?
159
221
 
@@ -175,8 +237,8 @@ module FatTable
175
237
  # Construct table from an array of hashes or an array of any object that
176
238
  # can respond to #to_h. If an array element is a nil, mark it as a group
177
239
  # boundary in the Table.
178
- def from_array_of_hashes(hashes, hlines: false)
179
- result = new
240
+ def from_array_of_hashes(hashes, hlines: false, tolerant_columns: [])
241
+ result = new(tolerant_columns: tolerant_columns)
180
242
  hashes.each do |hsh|
181
243
  if hsh.nil?
182
244
  unless hlines
@@ -188,6 +250,7 @@ module FatTable
188
250
  end
189
251
  result << hsh.to_h
190
252
  end
253
+ result.normalize_boundaries
191
254
  result
192
255
  end
193
256
 
@@ -203,8 +266,8 @@ module FatTable
203
266
  # hlines are stripped from the table, otherwise (:hlines yes) they are
204
267
  # indicated with nil elements in the outer array as expected by this
205
268
  # method when hlines is set true.
206
- def from_array_of_arrays(rows, hlines: false)
207
- result = new
269
+ def from_array_of_arrays(rows, hlines: false, tolerant_columns: [])
270
+ result = new(tolerant_columns: tolerant_columns)
208
271
  headers = []
209
272
  if !hlines
210
273
  # Take the first row as headers
@@ -236,15 +299,17 @@ module FatTable
236
299
  hash_row = Hash[headers.zip(row)]
237
300
  result << hash_row
238
301
  end
302
+ result.normalize_boundaries
239
303
  result
240
304
  end
241
305
 
242
- def from_csv_io(io)
243
- result = new
306
+ def from_csv_io(io, tolerant_columns: [])
307
+ result = new(tolerant_columns: tolerant_columns)
244
308
  ::CSV.new(io, headers: true, header_converters: :symbol,
245
309
  skip_blanks: true).each do |row|
246
310
  result << row.to_h
247
311
  end
312
+ result.normalize_boundaries
248
313
  result
249
314
  end
250
315
 
@@ -252,7 +317,7 @@ module FatTable
252
317
  # header row must be marked with an hline (i.e, a row that looks like
253
318
  # '|---+--...--|') and groups of rows may be marked with hlines to
254
319
  # indicate group boundaries.
255
- def from_org_io(io)
320
+ def from_org_io(io, tolerant_columns: [])
256
321
  table_re = /\A\s*\|/
257
322
  hrule_re = /\A\s*\|[-+]+/
258
323
  rows = []
@@ -287,7 +352,7 @@ module FatTable
287
352
  rows << line.split('|').map(&:clean)
288
353
  end
289
354
  end
290
- from_array_of_arrays(rows, hlines: true)
355
+ from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns)
291
356
  end
292
357
  end
293
358
 
@@ -317,8 +382,11 @@ module FatTable
317
382
  # Set the column type for Column with the given +key+ as a String type,
318
383
  # but only if empty. Otherwise, we would have to worry about converting
319
384
  # existing items in the column to String. Perhaps that's a TODO.
320
- def set_column_to_string_type(key)
321
- column(key).force_to_string_type
385
+ def force_string!(*keys)
386
+ keys.each do |h|
387
+ column(h).force_string!
388
+ end
389
+ self
322
390
  end
323
391
 
324
392
  # :category: Attributes
@@ -378,6 +446,15 @@ module FatTable
378
446
 
379
447
  # :category: Attributes
380
448
 
449
+ # Return whether the column with the given head should be made tolerant.
450
+ def tolerant_col?(h)
451
+ return true if tolerant_columns.include?(:'*')
452
+
453
+ tolerant_columns.include?(h)
454
+ end
455
+
456
+ # :category: Attributes
457
+
381
458
  # Return the number of rows in the Table.
382
459
  def size
383
460
  return 0 if columns.empty?
@@ -426,8 +503,6 @@ module FatTable
426
503
  # large table, that would require that we construct all the rows for a range
427
504
  # of any size.
428
505
  def rows_range(first = 0, last = nil) # :nodoc:
429
- last ||= size - 1
430
- last = [last, 0].max
431
506
  raise UserError, 'first must be <= last' unless first <= last
432
507
 
433
508
  rows = []
@@ -473,6 +548,8 @@ module FatTable
473
548
  # the headers from the body) marks a boundary for the row immediately
474
549
  # preceding the hline.
475
550
  #
551
+ # Boundaries can also be added manually with the +mark_boundary+ method.
552
+ #
476
553
  # The #order_by method resets the boundaries then adds boundaries at the
477
554
  # last row of each group of rows on which the sort keys were equal as a
478
555
  # boundary.
@@ -506,6 +583,43 @@ module FatTable
506
583
  groups
507
584
  end
508
585
 
586
+ # Return the number of groups in the table.
587
+ def number_of_groups
588
+ empty? ? 0 : boundaries.size
589
+ end
590
+
591
+ # Return the range of row indexes for boundary number +k+
592
+ def group_row_range(k)
593
+ last_k = boundaries.size - 1
594
+ if k < 0 || k > last_k
595
+ raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range"
596
+ end
597
+
598
+ if boundaries.size == 1
599
+ (0..boundaries.first)
600
+ elsif k.zero?
601
+ # Keep index at or above zero
602
+ (0..boundaries[k])
603
+ else
604
+ ((boundaries[k - 1] + 1)..boundaries[k])
605
+ end
606
+ end
607
+
608
+ # Return an Array of Column objects for header +col+ representing a
609
+ # sub-column for each group in the table under that header.
610
+ def group_cols(col)
611
+ normalize_boundaries
612
+ cols = []
613
+ (0..boundaries.size - 1).each do |k|
614
+ range = group_row_range(k)
615
+ tab_col = column(col)
616
+ gitems = tab_col.items[range]
617
+ cols << Column.new(header: col, items: gitems,
618
+ type: tab_col.type, tolerant: tab_col.tolerant?)
619
+ end
620
+ cols
621
+ end
622
+
509
623
  # :category: Operators
510
624
 
511
625
  # Return this table mutated with all groups removed. Useful after something
@@ -513,56 +627,99 @@ module FatTable
513
627
  # the groups displayed in the output. This modifies the input table, so is a
514
628
  # departure from the otherwise immutability of Tables.
515
629
  def degroup!
516
- @boundaries = []
630
+ self.explicit_boundaries = []
517
631
  self
518
632
  end
519
633
 
520
634
  # Mark a group boundary at row +row+, and if +row+ is +nil+, mark the last
521
- # row in the table as a group boundary. This is mainly used for internal
522
- # purposes.
523
- def mark_boundary(row = nil) # :nodoc:
524
- if row
525
- boundaries.push(row)
526
- else
527
- boundaries.push(size - 1)
635
+ # row in the table as a group boundary. An attempt to add a boundary to
636
+ # an empty table has no effect. We adopt the convention that the last row
637
+ # of the table always marks an implicit boundary even if it is not in the
638
+ # @explicit_boundaries array. When we "mark" a boundary, we intend it to
639
+ # be an explicit boundary, even if it marks the last row of the table.
640
+ def mark_boundary(row_num = nil)
641
+ return self if empty?
642
+
643
+ if row_num
644
+ unless row_num < size
645
+ raise ArgumentError, "can't mark boundary at row #{row_num}, last row is #{size - 1}"
646
+ end
647
+ unless row_num >= 0
648
+ raise ArgumentError, "can't mark boundary at non-positive row #{row_num}"
649
+ end
650
+ explicit_boundaries.push(row_num)
651
+ elsif size > 0
652
+ explicit_boundaries.push(size - 1)
528
653
  end
654
+ normalize_boundaries
655
+ self
529
656
  end
530
657
 
531
- protected
532
-
533
658
  # :stopdoc:
534
659
 
535
660
  # Make sure size - 1 is last boundary and that they are unique and sorted.
536
661
  def normalize_boundaries
537
662
  unless empty?
538
- boundaries.push(size - 1) unless boundaries.include?(size - 1)
539
- self.boundaries = boundaries.uniq.sort
663
+ self.explicit_boundaries = explicit_boundaries.uniq.sort
664
+ end
665
+ explicit_boundaries
666
+ end
667
+
668
+ # Return the explicit_boundaries, augmented by an implicit boundary for
669
+ # the end of the table, unless it's already an implicit boundary.
670
+ def boundaries
671
+ return [] if empty?
672
+
673
+ if explicit_boundaries.last == size - 1
674
+ explicit_boundaries
675
+ else
676
+ explicit_boundaries + [size - 1]
540
677
  end
541
- boundaries
542
678
  end
543
679
 
680
+ protected
681
+
544
682
  # Concatenate the array of argument bounds to this table's boundaries, but
545
683
  # increase each of the indexes in bounds by shift. This is used in the
546
684
  # #union_all method.
547
685
  def append_boundaries(bounds, shift: 0)
548
- @boundaries += bounds.map { |k| k + shift }
686
+ @explicit_boundaries += bounds.map { |k| k + shift }
549
687
  end
550
688
 
551
- # Return the group number to which row ~row~ belongs. Groups, from the
552
- # user's point of view are indexed starting at 1.
553
- def row_index_to_group_index(row)
689
+ # Return the group number to which row ~row_num~ belongs. Groups, from the
690
+ # user's point of view are indexed starting at 0.
691
+ def row_index_to_group_index(row_num)
554
692
  boundaries.each_with_index do |b_last, g_num|
555
- return (g_num + 1) if row <= b_last
693
+ return (g_num + 1) if row_num <= b_last
694
+ end
695
+ 0
696
+ end
697
+
698
+ # Return the index of the first row in group number +grp_num+
699
+ def first_row_num_in_group(grp_num)
700
+ if grp_num >= boundaries.size || grp_num < 0
701
+ raise ArgumentError, "group number #{grp_num} out of bounds"
702
+ end
703
+
704
+ grp_num.zero? ? 0 : boundaries[grp_num - 1] + 1
705
+ end
706
+
707
+ # Return the index of the last row in group number +grp_num+
708
+ def last_row_num_in_group(grp_num)
709
+ if grp_num > boundaries.size || grp_num < 0
710
+ raise ArgumentError, "group number #{grp_num} out of bounds"
711
+ else
712
+ boundaries[grp_num]
556
713
  end
557
- 1
558
714
  end
559
715
 
560
- def group_rows(row) # :nodoc:
716
+ # Return the rows for group number +grp_num+.
717
+ def group_rows(grp_num) # :nodoc:
561
718
  normalize_boundaries
562
- return [] unless row < boundaries.size
719
+ return [] unless grp_num < boundaries.size
563
720
 
564
- first = row.zero? ? 0 : boundaries[row - 1] + 1
565
- last = boundaries[row]
721
+ first = first_row_num_in_group(grp_num)
722
+ last = last_row_num_in_group(grp_num)
566
723
  rows_range(first, last)
567
724
  end
568
725
 
@@ -587,22 +744,43 @@ module FatTable
587
744
  # After sorting, the output Table will have group boundaries added after
588
745
  # each row where the sort key changes.
589
746
  def order_by(*sort_heads)
590
- sort_heads = [sort_heads].flatten
591
- rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
592
- sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
593
- rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
747
+ # Sort the rows in order and add to new_rows.
748
+ key_hash = partition_sort_keys(sort_heads)
594
749
  new_rows = rows.sort do |r1, r2|
595
- key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] }
596
- key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
597
- key1 <=> key2
750
+ # Set the sort keys based on direction
751
+ key1 = []
752
+ key2 = []
753
+ key_hash.each_pair do |h, dir|
754
+ if dir == :forward
755
+ key1 << r1[h]
756
+ key2 << r2[h]
757
+ else
758
+ key1 << r2[h]
759
+ key2 << r1[h]
760
+ end
761
+ end
762
+ # Make any booleans comparable with <=>
763
+ key1 = key1.map_booleans
764
+ key2 = key2.map_booleans
765
+
766
+ # If there are any nils, <=> will return nil, and we have to use the
767
+ # special comparison method, compare_with_nils, instead.
768
+ result = (key1 <=> key2)
769
+ result.nil? ? compare_with_nils(key1, key2) : result
598
770
  end
599
- # Add the new rows to the table, but mark a group boundary at the points
600
- # where the sort key changes value.
601
- new_tab = Table.new
771
+
772
+ # Add the new_rows to the table, but mark a group boundary at the points
773
+ # where the sort key changes value. NB: I use self.class.new here
774
+ # rather than Table.new because if this class is inherited, I want the
775
+ # new_tab to be an instance of the subclass. With Table.new, this
776
+ # method's result will be an instance of FatTable::Table rather than of
777
+ # the subclass.
778
+ new_tab = empty_dup
602
779
  last_key = nil
603
780
  new_rows.each_with_index do |nrow, k|
604
781
  new_tab << nrow
605
- key = nrow.fetch_values(*sort_heads)
782
+ # key = nrow.fetch_values(*sort_heads)
783
+ key = nrow.fetch_values(*key_hash.keys)
606
784
  new_tab.mark_boundary(k - 1) if last_key && key != last_key
607
785
  last_key = key
608
786
  end
@@ -610,6 +788,33 @@ module FatTable
610
788
  new_tab
611
789
  end
612
790
 
791
+ # :category: Operators
792
+
793
+ # Return a new Table sorting the rows of this Table on an any expression
794
+ # +expr+ that is valid with the +select+ method, except that they
795
+ # expression may end with an exclamation mark +!+ to indicate a reverse
796
+ # sort. The new table will have an additional column called +sort_key+
797
+ # populated with the result of evaluating the given expression and will be
798
+ # sorted (or reverse sorted) on that column.
799
+ #
800
+ # tab.order_with('date.year') => table sorted by date's year
801
+ # tab.order_with('date.year!') => table reverse sorted by date's year
802
+ #
803
+ # After sorting, the output Table will have group boundaries added after
804
+ # each row where the sort key changes.
805
+ def order_with(expr)
806
+ unless expr.is_a?(String)
807
+ raise "must call FatTable::Table\#order_with with a single string expression"
808
+ end
809
+ rev = false
810
+ if expr.match?(/\s*!\s*\z/)
811
+ rev = true
812
+ expr = expr.sub(/\s*!\s*\z/, '')
813
+ end
814
+ sort_sym = rev ? :sort_key! : :sort_key
815
+ dup.select(*headers, sort_key: expr).order_by(sort_sym)
816
+ end
817
+
613
818
  # :category: Operators
614
819
  #
615
820
  # Return a Table having the selected column expressions. Each expression can
@@ -713,7 +918,7 @@ module FatTable
713
918
  before: before_hook,
714
919
  after: after_hook)
715
920
  # Compute the new Table from this Table
716
- result = Table.new
921
+ result = empty_dup
717
922
  normalize_boundaries
718
923
  rows.each_with_index do |old_row, old_k|
719
924
  # Set the group number in the before hook and run the hook with the
@@ -723,7 +928,15 @@ module FatTable
723
928
  ev.eval_before_hook(locals: old_row)
724
929
  # Compute the new row.
725
930
  new_row = {}
726
- cols.each do |k|
931
+ # Allow the :omni col to stand for all columns if it is alone and
932
+ # first.
933
+ cols_to_include =
934
+ if cols.size == 1 && cols.first.as_sym == :omni
935
+ headers
936
+ else
937
+ cols
938
+ end
939
+ cols_to_include.each do |k|
727
940
  h = k.as_sym
728
941
  msg = "Column '#{h}' in select does not exist"
729
942
  raise UserError, msg unless column?(h)
@@ -752,7 +965,7 @@ module FatTable
752
965
  ev.eval_after_hook(locals: new_row)
753
966
  result << new_row
754
967
  end
755
- result.boundaries = boundaries
968
+ result.explicit_boundaries = explicit_boundaries
756
969
  result.normalize_boundaries
757
970
  result
758
971
  end
@@ -770,9 +983,14 @@ module FatTable
770
983
  # tab.where('@row.even? && shares > 500') => even rows with lots of shares
771
984
  def where(expr)
772
985
  expr = expr.to_s
773
- result = Table.new
986
+ result = empty_dup
774
987
  headers.each do |h|
775
- col = Column.new(header: h)
988
+ col =
989
+ if tolerant_col?(h)
990
+ Column.new(header: h, tolerant: true)
991
+ else
992
+ Column.new(header: h)
993
+ end
776
994
  result.add_column(col)
777
995
  end
778
996
  ev = Evaluator.new(ivars: { row: 0, group: 0 })
@@ -792,7 +1010,7 @@ module FatTable
792
1010
  # Return a new table with all duplicate rows eliminated. Resets groups. Same
793
1011
  # as #uniq.
794
1012
  def distinct
795
- result = Table.new
1013
+ result = empty_dup
796
1014
  uniq_rows = rows.uniq
797
1015
  uniq_rows.each do |row|
798
1016
  result << row
@@ -889,38 +1107,6 @@ module FatTable
889
1107
  set_operation(other, :difference, distinct: false)
890
1108
  end
891
1109
 
892
- private
893
-
894
- # Apply the set operation given by ~oper~ between this table and the other
895
- # table given in the first argument. If distinct is true, eliminate
896
- # duplicates from the result.
897
- def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
898
- unless columns.size == other.columns.size
899
- msg = "can't apply set ops to tables with a different number of columns"
900
- raise UserError, msg
901
- end
902
- unless columns.map(&:type) == other.columns.map(&:type)
903
- msg = "can't apply a set ops to tables with different column types."
904
- raise UserError, msg
905
- end
906
- other_rows = other.rows.map { |r| r.replace_keys(headers) }
907
- result = Table.new
908
- new_rows = rows.send(oper, other_rows)
909
- new_rows.each_with_index do |row, k|
910
- result << row
911
- result.mark_boundary if k == size - 1 && add_boundaries
912
- end
913
- if inherit_boundaries
914
- result.boundaries = normalize_boundaries
915
- other.normalize_boundaries
916
- result.append_boundaries(other.boundaries, shift: size)
917
- end
918
- result.normalize_boundaries
919
- distinct ? result.distinct : result
920
- end
921
-
922
- public
923
-
924
1110
  # An Array of symbols for the valid join types.
925
1111
  JOIN_TYPES = %i[inner left right full cross].freeze
926
1112
 
@@ -1011,7 +1197,7 @@ module FatTable
1011
1197
  join_exp, other_common_heads =
1012
1198
  build_join_expression(exps, other, join_type)
1013
1199
  ev = Evaluator.new
1014
- result = Table.new
1200
+ result = empty_dup
1015
1201
  other_rows = other.rows
1016
1202
  other_row_matches = Array.new(other_rows.size, false)
1017
1203
  rows.each do |self_row|
@@ -1029,14 +1215,14 @@ module FatTable
1029
1215
  type: join_type)
1030
1216
  result << out_row
1031
1217
  end
1032
- next unless %i[left full].include?(join_type)
1218
+ next unless [:left, :full].include?(join_type)
1033
1219
  next if self_row_matched
1034
1220
 
1035
1221
  result << build_out_row(row_a: self_row,
1036
1222
  row_b: other_row_nils,
1037
1223
  type: join_type)
1038
1224
  end
1039
- if %i[right full].include?(join_type)
1225
+ if [:right, :full].include?(join_type)
1040
1226
  other_rows.each_with_index do |other_row, k|
1041
1227
  next if other_row_matches[k]
1042
1228
 
@@ -1165,7 +1351,7 @@ module FatTable
1165
1351
  partial_result = nil
1166
1352
  else
1167
1353
  # First of a pair of _a or _b
1168
- partial_result = String.new("(#{a_head}_a == ")
1354
+ partial_result = +"(#{a_head}_a == "
1169
1355
  end
1170
1356
  last_sym = a_head
1171
1357
  when /\A(?<sy>.*)_b\z/
@@ -1184,7 +1370,7 @@ module FatTable
1184
1370
  partial_result = nil
1185
1371
  else
1186
1372
  # First of a pair of _a or _b
1187
- partial_result = String.new("(#{b_head}_b == ")
1373
+ partial_result = +"(#{b_head}_b == "
1188
1374
  end
1189
1375
  b_common_heads << b_head
1190
1376
  last_sym = b_head
@@ -1259,7 +1445,7 @@ module FatTable
1259
1445
  groups = sorted_tab.rows.group_by do |r|
1260
1446
  group_cols.map { |k| r[k] }
1261
1447
  end
1262
- result = Table.new
1448
+ result = empty_dup
1263
1449
  groups.each_pair do |_vals, grp_rows|
1264
1450
  result << row_from_group(grp_rows, group_cols, agg_cols)
1265
1451
  end
@@ -1269,6 +1455,9 @@ module FatTable
1269
1455
 
1270
1456
  private
1271
1457
 
1458
+ # Collapse a group of rows to a single row by applying the aggregator from
1459
+ # the +agg_cols+ to the items in that column and the presumably identical
1460
+ # value in the +grp_cols to those columns.
1272
1461
  def row_from_group(rows, grp_cols, agg_cols)
1273
1462
  new_row = {}
1274
1463
  grp_cols.each do |h|
@@ -1291,15 +1480,6 @@ module FatTable
1291
1480
 
1292
1481
  # :category: Constructors
1293
1482
 
1294
- # Add a group boundary mark at the given row, or at the end of the table
1295
- # by default.
1296
- def add_boundary(at_row = nil)
1297
- row = at_row || (size - 1)
1298
- @boundaries << row
1299
- end
1300
-
1301
- # :category: Constructors
1302
-
1303
1483
  # Add a +row+ represented by a Hash having the headers as keys. If +mark:+
1304
1484
  # is set true, mark this row as a boundary. All tables should be built
1305
1485
  # ultimately using this method as a primitive.
@@ -1312,7 +1492,7 @@ module FatTable
1312
1492
  # This column is new, so it needs nil items for all prior rows lest
1313
1493
  # the value be added to a prior row.
1314
1494
  items = Array.new(size, nil)
1315
- columns << Column.new(header: h, items: items)
1495
+ columns << Column.new(header: h, items: items, tolerant: tolerant_col?(h))
1316
1496
  end
1317
1497
  headers.each do |h|
1318
1498
  # NB: This adds a nil if h is not in row.
@@ -1486,5 +1666,74 @@ module FatTable
1486
1666
  yield fmt if block_given?
1487
1667
  fmt.output
1488
1668
  end
1669
+
1670
+ private
1671
+
1672
+ # Apply the set operation given by ~oper~ between this table and the other
1673
+ # table given in the first argument. If distinct is true, eliminate
1674
+ # duplicates from the result.
1675
+ def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
1676
+ unless columns.size == other.columns.size
1677
+ msg = "can't apply set ops to tables with a different number of columns"
1678
+ raise UserError, msg
1679
+ end
1680
+ unless columns.map(&:type) == other.columns.map(&:type)
1681
+ msg = "can't apply a set ops to tables with different column types."
1682
+ raise UserError, msg
1683
+ end
1684
+ other_rows = other.rows.map { |r| r.replace_keys(headers) }
1685
+ result = empty_dup
1686
+ new_rows = rows.send(oper, other_rows)
1687
+ new_rows.each_with_index do |row, k|
1688
+ result << row
1689
+ result.mark_boundary if k == size - 1 && add_boundaries
1690
+ end
1691
+ if inherit_boundaries
1692
+ result.explicit_boundaries = boundaries
1693
+ result.append_boundaries(other.boundaries, shift: size)
1694
+ end
1695
+ result.normalize_boundaries
1696
+ distinct ? result.distinct : result
1697
+ end
1698
+
1699
+ # Return a hash with the key being the header to sort on and the value
1700
+ # being either :forward or :reverse to indicate the sort order on that
1701
+ # key.
1702
+ def partition_sort_keys(keys)
1703
+ result = {}
1704
+ [keys].flatten.each do |h|
1705
+ if h.to_s.match?(/\s*!\s*\z/)
1706
+ result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse
1707
+ else
1708
+ result[h] = :forward
1709
+ end
1710
+ end
1711
+ result
1712
+ end
1713
+
1714
+ # The <=> operator cannot handle nils without some help. Treat a nil as
1715
+ # smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be
1716
+ # compared with <=>.
1717
+ def compare_with_nils(key1, key2)
1718
+ result = nil
1719
+ key1.zip(key2) do |k1, k2|
1720
+ if k1.nil? && k2.nil?
1721
+ result = 0
1722
+ next
1723
+ elsif k1.nil?
1724
+ result = -1
1725
+ break
1726
+ elsif k2.nil?
1727
+ result = 1
1728
+ break
1729
+ elsif (k1 <=> k2) == 0
1730
+ next
1731
+ else
1732
+ result = (k1 <=> k2)
1733
+ break
1734
+ end
1735
+ end
1736
+ result
1737
+ end
1489
1738
  end
1490
1739
  end