fat_table 0.4.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +2 -1
- data/README.org +1426 -447
- data/README.rdoc +1 -2
- data/TODO.org +17 -10
- data/examples/create_trans.sql +14 -0
- data/examples/quick.pdf +0 -0
- data/examples/quick.png +0 -0
- data/examples/quick.ppm +0 -0
- data/examples/quick.tex +8 -0
- data/examples/quick_small.png +0 -0
- data/examples/quicktable.tex +123 -0
- data/examples/trades.db +0 -0
- data/examples/trans.csv +13 -0
- data/fat_table.gemspec +1 -0
- data/lib/ext/array.rb +15 -0
- data/lib/fat_table/column.rb +89 -208
- data/lib/fat_table/convert.rb +174 -0
- data/lib/fat_table/errors.rb +4 -0
- data/lib/fat_table/evaluator.rb +7 -0
- data/lib/fat_table/footer.rb +228 -0
- data/lib/fat_table/formatters/formatter.rb +200 -166
- data/lib/fat_table/formatters/latex_formatter.rb +9 -7
- data/lib/fat_table/table.rb +366 -117
- data/lib/fat_table/version.rb +1 -1
- data/lib/fat_table.rb +19 -16
- data/md/README.md +1 -2
- metadata +31 -5
data/lib/fat_table/table.rb
CHANGED
@@ -53,7 +53,17 @@ module FatTable
|
|
53
53
|
class Table
|
54
54
|
# An Array of FatTable::Columns that constitute the table.
|
55
55
|
attr_reader :columns
|
56
|
-
|
56
|
+
|
57
|
+
# Record boundaries set explicitly with mark_boundaries or from reading
|
58
|
+
# hlines from input. When we want to access boundaries, however, we want
|
59
|
+
# to add an implict boundary at the last row of the table. Since, as the
|
60
|
+
# table grows, the implict boundary changes index, we synthesize the
|
61
|
+
# boundaries by dynamically adding the final boundary with the #boundaries
|
62
|
+
# method call.
|
63
|
+
attr_accessor :explicit_boundaries
|
64
|
+
|
65
|
+
# An Array of FatTable::Columns that should be tolerant.
|
66
|
+
attr_reader :tolerant_columns
|
57
67
|
|
58
68
|
###########################################################################
|
59
69
|
# Constructors
|
@@ -61,19 +71,71 @@ module FatTable
|
|
61
71
|
|
62
72
|
# :category: Constructors
|
63
73
|
|
64
|
-
# Return an empty FatTable::Table object.
|
65
|
-
|
74
|
+
# Return an empty FatTable::Table object. Specifying headers is optional.
|
75
|
+
# Any headers ending with a ! are marked as tolerant, in that, if an
|
76
|
+
# incompatible type is added to it, the column is re-typed as a String
|
77
|
+
# column, and construction proceeds. The ! is stripped from the header to
|
78
|
+
# form the column key, though. You can also provide the names of columns
|
79
|
+
# that should be tolerant by using the +tolerant_columns key-word to
|
80
|
+
# provide an array of headers that should be tolerant. The special string
|
81
|
+
# '*' or the symbol :* indicates that all columns should be created
|
82
|
+
# tolerant.
|
83
|
+
def initialize(*heads, tolerant_columns: [])
|
84
|
+
@columns = []
|
85
|
+
@explicit_boundaries = []
|
86
|
+
@tolerant_columns =
|
87
|
+
case tolerant_columns
|
88
|
+
when Array
|
89
|
+
tolerant_columns.map { |h| h.to_s.as_sym }
|
90
|
+
when String
|
91
|
+
if tolerant_columns.strip == '*'
|
92
|
+
['*'.to_sym]
|
93
|
+
else
|
94
|
+
[tolerant_columns.as_sym]
|
95
|
+
end
|
96
|
+
when Symbol
|
97
|
+
if tolerant_columns.to_s.strip == '*'
|
98
|
+
['*'.to_sym]
|
99
|
+
else
|
100
|
+
[tolerant_columns.to_s.as_sym]
|
101
|
+
end
|
102
|
+
else
|
103
|
+
raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either"
|
104
|
+
end
|
105
|
+
unless heads.empty?
|
106
|
+
heads.each do |h|
|
107
|
+
if h.to_s.end_with?('!') || @tolerant_columns.include?(h)
|
108
|
+
@columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), tolerant: true)
|
109
|
+
else
|
110
|
+
@columns << Column.new(header: h)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# :category: Constructors
|
117
|
+
|
118
|
+
# Return an empty duplicate of self. This allows the library to create an
|
119
|
+
# empty table that preserves all the instance variables from self. Even
|
120
|
+
# though FatTable::Table objects have no instance variables, a class that
|
121
|
+
# inherits from it might.
|
122
|
+
def empty_dup
|
123
|
+
self.dup.__empty!
|
124
|
+
end
|
125
|
+
|
126
|
+
def __empty!
|
66
127
|
@columns = []
|
67
|
-
@
|
128
|
+
@explicit_boundaries = []
|
129
|
+
self
|
68
130
|
end
|
69
131
|
|
70
132
|
# :category: Constructors
|
71
133
|
|
72
134
|
# Construct a Table from the contents of a CSV file named +fname+. Headers
|
73
135
|
# will be taken from the first CSV row and converted to symbols.
|
74
|
-
def self.from_csv_file(fname)
|
136
|
+
def self.from_csv_file(fname, tolerant_columns: [])
|
75
137
|
File.open(fname, 'r') do |io|
|
76
|
-
from_csv_io(io)
|
138
|
+
from_csv_io(io, tolerant_columns: tolerant_columns)
|
77
139
|
end
|
78
140
|
end
|
79
141
|
|
@@ -81,8 +143,8 @@ module FatTable
|
|
81
143
|
|
82
144
|
# Construct a Table from a CSV string +str+, treated in the same manner as
|
83
145
|
# the input from a CSV file in ::from_org_file.
|
84
|
-
def self.from_csv_string(str)
|
85
|
-
from_csv_io(StringIO.new(str))
|
146
|
+
def self.from_csv_string(str, tolerant_columns: [])
|
147
|
+
from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns)
|
86
148
|
end
|
87
149
|
|
88
150
|
# :category: Constructors
|
@@ -91,9 +153,9 @@ module FatTable
|
|
91
153
|
# file named +fname+. Headers are taken from the first row if the second row
|
92
154
|
# is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+,
|
93
155
|
# etc. are created.
|
94
|
-
def self.from_org_file(fname)
|
156
|
+
def self.from_org_file(fname, tolerant_columns: [])
|
95
157
|
File.open(fname, 'r') do |io|
|
96
|
-
from_org_io(io)
|
158
|
+
from_org_io(io, tolerant_columns: tolerant_columns)
|
97
159
|
end
|
98
160
|
end
|
99
161
|
|
@@ -101,8 +163,8 @@ module FatTable
|
|
101
163
|
|
102
164
|
# Construct a Table from a string +str+, treated in the same manner as the
|
103
165
|
# contents of an org-mode file in ::from_org_file.
|
104
|
-
def self.from_org_string(str)
|
105
|
-
from_org_io(StringIO.new(str))
|
166
|
+
def self.from_org_string(str, tolerant_columns: [])
|
167
|
+
from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns)
|
106
168
|
end
|
107
169
|
|
108
170
|
# :category: Constructors
|
@@ -121,8 +183,8 @@ module FatTable
|
|
121
183
|
# :hlines no +) org-mode strips all hrules from the table; otherwise (+
|
122
184
|
# HEADER: :hlines yes +) they are indicated with nil elements in the outer
|
123
185
|
# array.
|
124
|
-
def self.from_aoa(aoa, hlines: false)
|
125
|
-
from_array_of_arrays(aoa, hlines: hlines)
|
186
|
+
def self.from_aoa(aoa, hlines: false, tolerant_columns: [])
|
187
|
+
from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns)
|
126
188
|
end
|
127
189
|
|
128
190
|
# :category: Constructors
|
@@ -132,9 +194,9 @@ module FatTable
|
|
132
194
|
# keys, which, when converted to symbols will become the headers for the
|
133
195
|
# Table. If hlines is set true, mark a group boundary whenever a nil, rather
|
134
196
|
# than a hash appears in the outer array.
|
135
|
-
def self.from_aoh(aoh, hlines: false)
|
197
|
+
def self.from_aoh(aoh, hlines: false, tolerant_columns: [])
|
136
198
|
if aoh.first.respond_to?(:to_h)
|
137
|
-
from_array_of_hashes(aoh, hlines: hlines)
|
199
|
+
from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns)
|
138
200
|
else
|
139
201
|
raise UserError,
|
140
202
|
"Cannot initialize Table with an array of #{input[0].class}"
|
@@ -153,7 +215,7 @@ module FatTable
|
|
153
215
|
|
154
216
|
# Construct a Table by running a SQL +query+ against the database set up
|
155
217
|
# with FatTable.connect, with the rows of the query result as rows.
|
156
|
-
def self.from_sql(query)
|
218
|
+
def self.from_sql(query, tolerant_columns: [])
|
157
219
|
msg = 'FatTable.db must be set with FatTable.connect'
|
158
220
|
raise UserError, msg if FatTable.db.nil?
|
159
221
|
|
@@ -175,8 +237,8 @@ module FatTable
|
|
175
237
|
# Construct table from an array of hashes or an array of any object that
|
176
238
|
# can respond to #to_h. If an array element is a nil, mark it as a group
|
177
239
|
# boundary in the Table.
|
178
|
-
def from_array_of_hashes(hashes, hlines: false)
|
179
|
-
result = new
|
240
|
+
def from_array_of_hashes(hashes, hlines: false, tolerant_columns: [])
|
241
|
+
result = new(tolerant_columns: tolerant_columns)
|
180
242
|
hashes.each do |hsh|
|
181
243
|
if hsh.nil?
|
182
244
|
unless hlines
|
@@ -188,6 +250,7 @@ module FatTable
|
|
188
250
|
end
|
189
251
|
result << hsh.to_h
|
190
252
|
end
|
253
|
+
result.normalize_boundaries
|
191
254
|
result
|
192
255
|
end
|
193
256
|
|
@@ -203,8 +266,8 @@ module FatTable
|
|
203
266
|
# hlines are stripped from the table, otherwise (:hlines yes) they are
|
204
267
|
# indicated with nil elements in the outer array as expected by this
|
205
268
|
# method when hlines is set true.
|
206
|
-
def from_array_of_arrays(rows, hlines: false)
|
207
|
-
result = new
|
269
|
+
def from_array_of_arrays(rows, hlines: false, tolerant_columns: [])
|
270
|
+
result = new(tolerant_columns: tolerant_columns)
|
208
271
|
headers = []
|
209
272
|
if !hlines
|
210
273
|
# Take the first row as headers
|
@@ -236,15 +299,17 @@ module FatTable
|
|
236
299
|
hash_row = Hash[headers.zip(row)]
|
237
300
|
result << hash_row
|
238
301
|
end
|
302
|
+
result.normalize_boundaries
|
239
303
|
result
|
240
304
|
end
|
241
305
|
|
242
|
-
def from_csv_io(io)
|
243
|
-
result = new
|
306
|
+
def from_csv_io(io, tolerant_columns: [])
|
307
|
+
result = new(tolerant_columns: tolerant_columns)
|
244
308
|
::CSV.new(io, headers: true, header_converters: :symbol,
|
245
309
|
skip_blanks: true).each do |row|
|
246
310
|
result << row.to_h
|
247
311
|
end
|
312
|
+
result.normalize_boundaries
|
248
313
|
result
|
249
314
|
end
|
250
315
|
|
@@ -252,7 +317,7 @@ module FatTable
|
|
252
317
|
# header row must be marked with an hline (i.e, a row that looks like
|
253
318
|
# '|---+--...--|') and groups of rows may be marked with hlines to
|
254
319
|
# indicate group boundaries.
|
255
|
-
def from_org_io(io)
|
320
|
+
def from_org_io(io, tolerant_columns: [])
|
256
321
|
table_re = /\A\s*\|/
|
257
322
|
hrule_re = /\A\s*\|[-+]+/
|
258
323
|
rows = []
|
@@ -287,7 +352,7 @@ module FatTable
|
|
287
352
|
rows << line.split('|').map(&:clean)
|
288
353
|
end
|
289
354
|
end
|
290
|
-
from_array_of_arrays(rows, hlines: true)
|
355
|
+
from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns)
|
291
356
|
end
|
292
357
|
end
|
293
358
|
|
@@ -317,8 +382,11 @@ module FatTable
|
|
317
382
|
# Set the column type for Column with the given +key+ as a String type,
|
318
383
|
# but only if empty. Otherwise, we would have to worry about converting
|
319
384
|
# existing items in the column to String. Perhaps that's a TODO.
|
320
|
-
def
|
321
|
-
|
385
|
+
def force_string!(*keys)
|
386
|
+
keys.each do |h|
|
387
|
+
column(h).force_string!
|
388
|
+
end
|
389
|
+
self
|
322
390
|
end
|
323
391
|
|
324
392
|
# :category: Attributes
|
@@ -378,6 +446,15 @@ module FatTable
|
|
378
446
|
|
379
447
|
# :category: Attributes
|
380
448
|
|
449
|
+
# Return whether the column with the given head should be made tolerant.
|
450
|
+
def tolerant_col?(h)
|
451
|
+
return true if tolerant_columns.include?(:'*')
|
452
|
+
|
453
|
+
tolerant_columns.include?(h)
|
454
|
+
end
|
455
|
+
|
456
|
+
# :category: Attributes
|
457
|
+
|
381
458
|
# Return the number of rows in the Table.
|
382
459
|
def size
|
383
460
|
return 0 if columns.empty?
|
@@ -426,8 +503,6 @@ module FatTable
|
|
426
503
|
# large table, that would require that we construct all the rows for a range
|
427
504
|
# of any size.
|
428
505
|
def rows_range(first = 0, last = nil) # :nodoc:
|
429
|
-
last ||= size - 1
|
430
|
-
last = [last, 0].max
|
431
506
|
raise UserError, 'first must be <= last' unless first <= last
|
432
507
|
|
433
508
|
rows = []
|
@@ -473,6 +548,8 @@ module FatTable
|
|
473
548
|
# the headers from the body) marks a boundary for the row immediately
|
474
549
|
# preceding the hline.
|
475
550
|
#
|
551
|
+
# Boundaries can also be added manually with the +mark_boundary+ method.
|
552
|
+
#
|
476
553
|
# The #order_by method resets the boundaries then adds boundaries at the
|
477
554
|
# last row of each group of rows on which the sort keys were equal as a
|
478
555
|
# boundary.
|
@@ -506,6 +583,43 @@ module FatTable
|
|
506
583
|
groups
|
507
584
|
end
|
508
585
|
|
586
|
+
# Return the number of groups in the table.
|
587
|
+
def number_of_groups
|
588
|
+
empty? ? 0 : boundaries.size
|
589
|
+
end
|
590
|
+
|
591
|
+
# Return the range of row indexes for boundary number +k+
|
592
|
+
def group_row_range(k)
|
593
|
+
last_k = boundaries.size - 1
|
594
|
+
if k < 0 || k > last_k
|
595
|
+
raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range"
|
596
|
+
end
|
597
|
+
|
598
|
+
if boundaries.size == 1
|
599
|
+
(0..boundaries.first)
|
600
|
+
elsif k.zero?
|
601
|
+
# Keep index at or above zero
|
602
|
+
(0..boundaries[k])
|
603
|
+
else
|
604
|
+
((boundaries[k - 1] + 1)..boundaries[k])
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
# Return an Array of Column objects for header +col+ representing a
|
609
|
+
# sub-column for each group in the table under that header.
|
610
|
+
def group_cols(col)
|
611
|
+
normalize_boundaries
|
612
|
+
cols = []
|
613
|
+
(0..boundaries.size - 1).each do |k|
|
614
|
+
range = group_row_range(k)
|
615
|
+
tab_col = column(col)
|
616
|
+
gitems = tab_col.items[range]
|
617
|
+
cols << Column.new(header: col, items: gitems,
|
618
|
+
type: tab_col.type, tolerant: tab_col.tolerant?)
|
619
|
+
end
|
620
|
+
cols
|
621
|
+
end
|
622
|
+
|
509
623
|
# :category: Operators
|
510
624
|
|
511
625
|
# Return this table mutated with all groups removed. Useful after something
|
@@ -513,56 +627,99 @@ module FatTable
|
|
513
627
|
# the groups displayed in the output. This modifies the input table, so is a
|
514
628
|
# departure from the otherwise immutability of Tables.
|
515
629
|
def degroup!
|
516
|
-
|
630
|
+
self.explicit_boundaries = []
|
517
631
|
self
|
518
632
|
end
|
519
633
|
|
520
634
|
# Mark a group boundary at row +row+, and if +row+ is +nil+, mark the last
|
521
|
-
# row in the table as a group boundary.
|
522
|
-
#
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
635
|
+
# row in the table as a group boundary. An attempt to add a boundary to
|
636
|
+
# an empty table has no effect. We adopt the convention that the last row
|
637
|
+
# of the table always marks an implicit boundary even if it is not in the
|
638
|
+
# @explicit_boundaries array. When we "mark" a boundary, we intend it to
|
639
|
+
# be an explicit boundary, even if it marks the last row of the table.
|
640
|
+
def mark_boundary(row_num = nil)
|
641
|
+
return self if empty?
|
642
|
+
|
643
|
+
if row_num
|
644
|
+
unless row_num < size
|
645
|
+
raise ArgumentError, "can't mark boundary at row #{row_num}, last row is #{size - 1}"
|
646
|
+
end
|
647
|
+
unless row_num >= 0
|
648
|
+
raise ArgumentError, "can't mark boundary at non-positive row #{row_num}"
|
649
|
+
end
|
650
|
+
explicit_boundaries.push(row_num)
|
651
|
+
elsif size > 0
|
652
|
+
explicit_boundaries.push(size - 1)
|
528
653
|
end
|
654
|
+
normalize_boundaries
|
655
|
+
self
|
529
656
|
end
|
530
657
|
|
531
|
-
protected
|
532
|
-
|
533
658
|
# :stopdoc:
|
534
659
|
|
535
660
|
# Make sure size - 1 is last boundary and that they are unique and sorted.
|
536
661
|
def normalize_boundaries
|
537
662
|
unless empty?
|
538
|
-
|
539
|
-
|
663
|
+
self.explicit_boundaries = explicit_boundaries.uniq.sort
|
664
|
+
end
|
665
|
+
explicit_boundaries
|
666
|
+
end
|
667
|
+
|
668
|
+
# Return the explicit_boundaries, augmented by an implicit boundary for
|
669
|
+
# the end of the table, unless it's already an implicit boundary.
|
670
|
+
def boundaries
|
671
|
+
return [] if empty?
|
672
|
+
|
673
|
+
if explicit_boundaries.last == size - 1
|
674
|
+
explicit_boundaries
|
675
|
+
else
|
676
|
+
explicit_boundaries + [size - 1]
|
540
677
|
end
|
541
|
-
boundaries
|
542
678
|
end
|
543
679
|
|
680
|
+
protected
|
681
|
+
|
544
682
|
# Concatenate the array of argument bounds to this table's boundaries, but
|
545
683
|
# increase each of the indexes in bounds by shift. This is used in the
|
546
684
|
# #union_all method.
|
547
685
|
def append_boundaries(bounds, shift: 0)
|
548
|
-
@
|
686
|
+
@explicit_boundaries += bounds.map { |k| k + shift }
|
549
687
|
end
|
550
688
|
|
551
|
-
# Return the group number to which row ~
|
552
|
-
# user's point of view are indexed starting at
|
553
|
-
def row_index_to_group_index(
|
689
|
+
# Return the group number to which row ~row_num~ belongs. Groups, from the
|
690
|
+
# user's point of view are indexed starting at 0.
|
691
|
+
def row_index_to_group_index(row_num)
|
554
692
|
boundaries.each_with_index do |b_last, g_num|
|
555
|
-
return (g_num + 1) if
|
693
|
+
return (g_num + 1) if row_num <= b_last
|
694
|
+
end
|
695
|
+
0
|
696
|
+
end
|
697
|
+
|
698
|
+
# Return the index of the first row in group number +grp_num+
|
699
|
+
def first_row_num_in_group(grp_num)
|
700
|
+
if grp_num >= boundaries.size || grp_num < 0
|
701
|
+
raise ArgumentError, "group number #{grp_num} out of bounds"
|
702
|
+
end
|
703
|
+
|
704
|
+
grp_num.zero? ? 0 : boundaries[grp_num - 1] + 1
|
705
|
+
end
|
706
|
+
|
707
|
+
# Return the index of the last row in group number +grp_num+
|
708
|
+
def last_row_num_in_group(grp_num)
|
709
|
+
if grp_num > boundaries.size || grp_num < 0
|
710
|
+
raise ArgumentError, "group number #{grp_num} out of bounds"
|
711
|
+
else
|
712
|
+
boundaries[grp_num]
|
556
713
|
end
|
557
|
-
1
|
558
714
|
end
|
559
715
|
|
560
|
-
|
716
|
+
# Return the rows for group number +grp_num+.
|
717
|
+
def group_rows(grp_num) # :nodoc:
|
561
718
|
normalize_boundaries
|
562
|
-
return [] unless
|
719
|
+
return [] unless grp_num < boundaries.size
|
563
720
|
|
564
|
-
first =
|
565
|
-
last =
|
721
|
+
first = first_row_num_in_group(grp_num)
|
722
|
+
last = last_row_num_in_group(grp_num)
|
566
723
|
rows_range(first, last)
|
567
724
|
end
|
568
725
|
|
@@ -587,22 +744,43 @@ module FatTable
|
|
587
744
|
# After sorting, the output Table will have group boundaries added after
|
588
745
|
# each row where the sort key changes.
|
589
746
|
def order_by(*sort_heads)
|
590
|
-
|
591
|
-
|
592
|
-
sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
593
|
-
rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
747
|
+
# Sort the rows in order and add to new_rows.
|
748
|
+
key_hash = partition_sort_keys(sort_heads)
|
594
749
|
new_rows = rows.sort do |r1, r2|
|
595
|
-
|
596
|
-
|
597
|
-
|
750
|
+
# Set the sort keys based on direction
|
751
|
+
key1 = []
|
752
|
+
key2 = []
|
753
|
+
key_hash.each_pair do |h, dir|
|
754
|
+
if dir == :forward
|
755
|
+
key1 << r1[h]
|
756
|
+
key2 << r2[h]
|
757
|
+
else
|
758
|
+
key1 << r2[h]
|
759
|
+
key2 << r1[h]
|
760
|
+
end
|
761
|
+
end
|
762
|
+
# Make any booleans comparable with <=>
|
763
|
+
key1 = key1.map_booleans
|
764
|
+
key2 = key2.map_booleans
|
765
|
+
|
766
|
+
# If there are any nils, <=> will return nil, and we have to use the
|
767
|
+
# special comparison method, compare_with_nils, instead.
|
768
|
+
result = (key1 <=> key2)
|
769
|
+
result.nil? ? compare_with_nils(key1, key2) : result
|
598
770
|
end
|
599
|
-
|
600
|
-
#
|
601
|
-
|
771
|
+
|
772
|
+
# Add the new_rows to the table, but mark a group boundary at the points
|
773
|
+
# where the sort key changes value. NB: I use self.class.new here
|
774
|
+
# rather than Table.new because if this class is inherited, I want the
|
775
|
+
# new_tab to be an instance of the subclass. With Table.new, this
|
776
|
+
# method's result will be an instance of FatTable::Table rather than of
|
777
|
+
# the subclass.
|
778
|
+
new_tab = empty_dup
|
602
779
|
last_key = nil
|
603
780
|
new_rows.each_with_index do |nrow, k|
|
604
781
|
new_tab << nrow
|
605
|
-
key = nrow.fetch_values(*sort_heads)
|
782
|
+
# key = nrow.fetch_values(*sort_heads)
|
783
|
+
key = nrow.fetch_values(*key_hash.keys)
|
606
784
|
new_tab.mark_boundary(k - 1) if last_key && key != last_key
|
607
785
|
last_key = key
|
608
786
|
end
|
@@ -610,6 +788,33 @@ module FatTable
|
|
610
788
|
new_tab
|
611
789
|
end
|
612
790
|
|
791
|
+
# :category: Operators
|
792
|
+
|
793
|
+
# Return a new Table sorting the rows of this Table on an any expression
|
794
|
+
# +expr+ that is valid with the +select+ method, except that they
|
795
|
+
# expression may end with an exclamation mark +!+ to indicate a reverse
|
796
|
+
# sort. The new table will have an additional column called +sort_key+
|
797
|
+
# populated with the result of evaluating the given expression and will be
|
798
|
+
# sorted (or reverse sorted) on that column.
|
799
|
+
#
|
800
|
+
# tab.order_with('date.year') => table sorted by date's year
|
801
|
+
# tab.order_with('date.year!') => table reverse sorted by date's year
|
802
|
+
#
|
803
|
+
# After sorting, the output Table will have group boundaries added after
|
804
|
+
# each row where the sort key changes.
|
805
|
+
def order_with(expr)
|
806
|
+
unless expr.is_a?(String)
|
807
|
+
raise "must call FatTable::Table\#order_with with a single string expression"
|
808
|
+
end
|
809
|
+
rev = false
|
810
|
+
if expr.match?(/\s*!\s*\z/)
|
811
|
+
rev = true
|
812
|
+
expr = expr.sub(/\s*!\s*\z/, '')
|
813
|
+
end
|
814
|
+
sort_sym = rev ? :sort_key! : :sort_key
|
815
|
+
dup.select(*headers, sort_key: expr).order_by(sort_sym)
|
816
|
+
end
|
817
|
+
|
613
818
|
# :category: Operators
|
614
819
|
#
|
615
820
|
# Return a Table having the selected column expressions. Each expression can
|
@@ -713,7 +918,7 @@ module FatTable
|
|
713
918
|
before: before_hook,
|
714
919
|
after: after_hook)
|
715
920
|
# Compute the new Table from this Table
|
716
|
-
result =
|
921
|
+
result = empty_dup
|
717
922
|
normalize_boundaries
|
718
923
|
rows.each_with_index do |old_row, old_k|
|
719
924
|
# Set the group number in the before hook and run the hook with the
|
@@ -723,7 +928,15 @@ module FatTable
|
|
723
928
|
ev.eval_before_hook(locals: old_row)
|
724
929
|
# Compute the new row.
|
725
930
|
new_row = {}
|
726
|
-
|
931
|
+
# Allow the :omni col to stand for all columns if it is alone and
|
932
|
+
# first.
|
933
|
+
cols_to_include =
|
934
|
+
if cols.size == 1 && cols.first.as_sym == :omni
|
935
|
+
headers
|
936
|
+
else
|
937
|
+
cols
|
938
|
+
end
|
939
|
+
cols_to_include.each do |k|
|
727
940
|
h = k.as_sym
|
728
941
|
msg = "Column '#{h}' in select does not exist"
|
729
942
|
raise UserError, msg unless column?(h)
|
@@ -752,7 +965,7 @@ module FatTable
|
|
752
965
|
ev.eval_after_hook(locals: new_row)
|
753
966
|
result << new_row
|
754
967
|
end
|
755
|
-
result.
|
968
|
+
result.explicit_boundaries = explicit_boundaries
|
756
969
|
result.normalize_boundaries
|
757
970
|
result
|
758
971
|
end
|
@@ -770,9 +983,14 @@ module FatTable
|
|
770
983
|
# tab.where('@row.even? && shares > 500') => even rows with lots of shares
|
771
984
|
def where(expr)
|
772
985
|
expr = expr.to_s
|
773
|
-
result =
|
986
|
+
result = empty_dup
|
774
987
|
headers.each do |h|
|
775
|
-
col =
|
988
|
+
col =
|
989
|
+
if tolerant_col?(h)
|
990
|
+
Column.new(header: h, tolerant: true)
|
991
|
+
else
|
992
|
+
Column.new(header: h)
|
993
|
+
end
|
776
994
|
result.add_column(col)
|
777
995
|
end
|
778
996
|
ev = Evaluator.new(ivars: { row: 0, group: 0 })
|
@@ -792,7 +1010,7 @@ module FatTable
|
|
792
1010
|
# Return a new table with all duplicate rows eliminated. Resets groups. Same
|
793
1011
|
# as #uniq.
|
794
1012
|
def distinct
|
795
|
-
result =
|
1013
|
+
result = empty_dup
|
796
1014
|
uniq_rows = rows.uniq
|
797
1015
|
uniq_rows.each do |row|
|
798
1016
|
result << row
|
@@ -889,38 +1107,6 @@ module FatTable
|
|
889
1107
|
set_operation(other, :difference, distinct: false)
|
890
1108
|
end
|
891
1109
|
|
892
|
-
private
|
893
|
-
|
894
|
-
# Apply the set operation given by ~oper~ between this table and the other
|
895
|
-
# table given in the first argument. If distinct is true, eliminate
|
896
|
-
# duplicates from the result.
|
897
|
-
def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
|
898
|
-
unless columns.size == other.columns.size
|
899
|
-
msg = "can't apply set ops to tables with a different number of columns"
|
900
|
-
raise UserError, msg
|
901
|
-
end
|
902
|
-
unless columns.map(&:type) == other.columns.map(&:type)
|
903
|
-
msg = "can't apply a set ops to tables with different column types."
|
904
|
-
raise UserError, msg
|
905
|
-
end
|
906
|
-
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
907
|
-
result = Table.new
|
908
|
-
new_rows = rows.send(oper, other_rows)
|
909
|
-
new_rows.each_with_index do |row, k|
|
910
|
-
result << row
|
911
|
-
result.mark_boundary if k == size - 1 && add_boundaries
|
912
|
-
end
|
913
|
-
if inherit_boundaries
|
914
|
-
result.boundaries = normalize_boundaries
|
915
|
-
other.normalize_boundaries
|
916
|
-
result.append_boundaries(other.boundaries, shift: size)
|
917
|
-
end
|
918
|
-
result.normalize_boundaries
|
919
|
-
distinct ? result.distinct : result
|
920
|
-
end
|
921
|
-
|
922
|
-
public
|
923
|
-
|
924
1110
|
# An Array of symbols for the valid join types.
|
925
1111
|
JOIN_TYPES = %i[inner left right full cross].freeze
|
926
1112
|
|
@@ -1011,7 +1197,7 @@ module FatTable
|
|
1011
1197
|
join_exp, other_common_heads =
|
1012
1198
|
build_join_expression(exps, other, join_type)
|
1013
1199
|
ev = Evaluator.new
|
1014
|
-
result =
|
1200
|
+
result = empty_dup
|
1015
1201
|
other_rows = other.rows
|
1016
1202
|
other_row_matches = Array.new(other_rows.size, false)
|
1017
1203
|
rows.each do |self_row|
|
@@ -1029,14 +1215,14 @@ module FatTable
|
|
1029
1215
|
type: join_type)
|
1030
1216
|
result << out_row
|
1031
1217
|
end
|
1032
|
-
next unless
|
1218
|
+
next unless [:left, :full].include?(join_type)
|
1033
1219
|
next if self_row_matched
|
1034
1220
|
|
1035
1221
|
result << build_out_row(row_a: self_row,
|
1036
1222
|
row_b: other_row_nils,
|
1037
1223
|
type: join_type)
|
1038
1224
|
end
|
1039
|
-
if
|
1225
|
+
if [:right, :full].include?(join_type)
|
1040
1226
|
other_rows.each_with_index do |other_row, k|
|
1041
1227
|
next if other_row_matches[k]
|
1042
1228
|
|
@@ -1165,7 +1351,7 @@ module FatTable
|
|
1165
1351
|
partial_result = nil
|
1166
1352
|
else
|
1167
1353
|
# First of a pair of _a or _b
|
1168
|
-
partial_result =
|
1354
|
+
partial_result = +"(#{a_head}_a == "
|
1169
1355
|
end
|
1170
1356
|
last_sym = a_head
|
1171
1357
|
when /\A(?<sy>.*)_b\z/
|
@@ -1184,7 +1370,7 @@ module FatTable
|
|
1184
1370
|
partial_result = nil
|
1185
1371
|
else
|
1186
1372
|
# First of a pair of _a or _b
|
1187
|
-
partial_result =
|
1373
|
+
partial_result = +"(#{b_head}_b == "
|
1188
1374
|
end
|
1189
1375
|
b_common_heads << b_head
|
1190
1376
|
last_sym = b_head
|
@@ -1259,7 +1445,7 @@ module FatTable
|
|
1259
1445
|
groups = sorted_tab.rows.group_by do |r|
|
1260
1446
|
group_cols.map { |k| r[k] }
|
1261
1447
|
end
|
1262
|
-
result =
|
1448
|
+
result = empty_dup
|
1263
1449
|
groups.each_pair do |_vals, grp_rows|
|
1264
1450
|
result << row_from_group(grp_rows, group_cols, agg_cols)
|
1265
1451
|
end
|
@@ -1269,6 +1455,9 @@ module FatTable
|
|
1269
1455
|
|
1270
1456
|
private
|
1271
1457
|
|
1458
|
+
# Collapse a group of rows to a single row by applying the aggregator from
|
1459
|
+
# the +agg_cols+ to the items in that column and the presumably identical
|
1460
|
+
# value in the +grp_cols to those columns.
|
1272
1461
|
def row_from_group(rows, grp_cols, agg_cols)
|
1273
1462
|
new_row = {}
|
1274
1463
|
grp_cols.each do |h|
|
@@ -1291,15 +1480,6 @@ module FatTable
|
|
1291
1480
|
|
1292
1481
|
# :category: Constructors
|
1293
1482
|
|
1294
|
-
# Add a group boundary mark at the given row, or at the end of the table
|
1295
|
-
# by default.
|
1296
|
-
def add_boundary(at_row = nil)
|
1297
|
-
row = at_row || (size - 1)
|
1298
|
-
@boundaries << row
|
1299
|
-
end
|
1300
|
-
|
1301
|
-
# :category: Constructors
|
1302
|
-
|
1303
1483
|
# Add a +row+ represented by a Hash having the headers as keys. If +mark:+
|
1304
1484
|
# is set true, mark this row as a boundary. All tables should be built
|
1305
1485
|
# ultimately using this method as a primitive.
|
@@ -1312,7 +1492,7 @@ module FatTable
|
|
1312
1492
|
# This column is new, so it needs nil items for all prior rows lest
|
1313
1493
|
# the value be added to a prior row.
|
1314
1494
|
items = Array.new(size, nil)
|
1315
|
-
columns << Column.new(header: h, items: items)
|
1495
|
+
columns << Column.new(header: h, items: items, tolerant: tolerant_col?(h))
|
1316
1496
|
end
|
1317
1497
|
headers.each do |h|
|
1318
1498
|
# NB: This adds a nil if h is not in row.
|
@@ -1486,5 +1666,74 @@ module FatTable
|
|
1486
1666
|
yield fmt if block_given?
|
1487
1667
|
fmt.output
|
1488
1668
|
end
|
1669
|
+
|
1670
|
+
private
|
1671
|
+
|
1672
|
+
# Apply the set operation given by ~oper~ between this table and the other
|
1673
|
+
# table given in the first argument. If distinct is true, eliminate
|
1674
|
+
# duplicates from the result.
|
1675
|
+
def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
|
1676
|
+
unless columns.size == other.columns.size
|
1677
|
+
msg = "can't apply set ops to tables with a different number of columns"
|
1678
|
+
raise UserError, msg
|
1679
|
+
end
|
1680
|
+
unless columns.map(&:type) == other.columns.map(&:type)
|
1681
|
+
msg = "can't apply a set ops to tables with different column types."
|
1682
|
+
raise UserError, msg
|
1683
|
+
end
|
1684
|
+
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
1685
|
+
result = empty_dup
|
1686
|
+
new_rows = rows.send(oper, other_rows)
|
1687
|
+
new_rows.each_with_index do |row, k|
|
1688
|
+
result << row
|
1689
|
+
result.mark_boundary if k == size - 1 && add_boundaries
|
1690
|
+
end
|
1691
|
+
if inherit_boundaries
|
1692
|
+
result.explicit_boundaries = boundaries
|
1693
|
+
result.append_boundaries(other.boundaries, shift: size)
|
1694
|
+
end
|
1695
|
+
result.normalize_boundaries
|
1696
|
+
distinct ? result.distinct : result
|
1697
|
+
end
|
1698
|
+
|
1699
|
+
# Return a hash with the key being the header to sort on and the value
|
1700
|
+
# being either :forward or :reverse to indicate the sort order on that
|
1701
|
+
# key.
|
1702
|
+
def partition_sort_keys(keys)
|
1703
|
+
result = {}
|
1704
|
+
[keys].flatten.each do |h|
|
1705
|
+
if h.to_s.match?(/\s*!\s*\z/)
|
1706
|
+
result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse
|
1707
|
+
else
|
1708
|
+
result[h] = :forward
|
1709
|
+
end
|
1710
|
+
end
|
1711
|
+
result
|
1712
|
+
end
|
1713
|
+
|
1714
|
+
# The <=> operator cannot handle nils without some help. Treat a nil as
|
1715
|
+
# smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be
|
1716
|
+
# compared with <=>.
|
1717
|
+
def compare_with_nils(key1, key2)
|
1718
|
+
result = nil
|
1719
|
+
key1.zip(key2) do |k1, k2|
|
1720
|
+
if k1.nil? && k2.nil?
|
1721
|
+
result = 0
|
1722
|
+
next
|
1723
|
+
elsif k1.nil?
|
1724
|
+
result = -1
|
1725
|
+
break
|
1726
|
+
elsif k2.nil?
|
1727
|
+
result = 1
|
1728
|
+
break
|
1729
|
+
elsif (k1 <=> k2) == 0
|
1730
|
+
next
|
1731
|
+
else
|
1732
|
+
result = (k1 <=> k2)
|
1733
|
+
break
|
1734
|
+
end
|
1735
|
+
end
|
1736
|
+
result
|
1737
|
+
end
|
1489
1738
|
end
|
1490
1739
|
end
|