fat_table 0.4.0 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +2 -1
- data/README.org +1426 -447
- data/README.rdoc +1 -2
- data/TODO.org +17 -10
- data/examples/create_trans.sql +14 -0
- data/examples/quick.pdf +0 -0
- data/examples/quick.png +0 -0
- data/examples/quick.ppm +0 -0
- data/examples/quick.tex +8 -0
- data/examples/quick_small.png +0 -0
- data/examples/quicktable.tex +123 -0
- data/examples/trades.db +0 -0
- data/examples/trans.csv +13 -0
- data/fat_table.gemspec +1 -0
- data/lib/ext/array.rb +15 -0
- data/lib/fat_table/column.rb +89 -208
- data/lib/fat_table/convert.rb +174 -0
- data/lib/fat_table/errors.rb +4 -0
- data/lib/fat_table/evaluator.rb +7 -0
- data/lib/fat_table/footer.rb +228 -0
- data/lib/fat_table/formatters/formatter.rb +200 -166
- data/lib/fat_table/formatters/latex_formatter.rb +9 -7
- data/lib/fat_table/table.rb +366 -117
- data/lib/fat_table/version.rb +1 -1
- data/lib/fat_table.rb +19 -16
- data/md/README.md +1 -2
- metadata +31 -5
data/lib/fat_table/table.rb
CHANGED
@@ -53,7 +53,17 @@ module FatTable
|
|
53
53
|
class Table
|
54
54
|
# An Array of FatTable::Columns that constitute the table.
|
55
55
|
attr_reader :columns
|
56
|
-
|
56
|
+
|
57
|
+
# Record boundaries set explicitly with mark_boundaries or from reading
|
58
|
+
# hlines from input. When we want to access boundaries, however, we want
|
59
|
+
# to add an implict boundary at the last row of the table. Since, as the
|
60
|
+
# table grows, the implict boundary changes index, we synthesize the
|
61
|
+
# boundaries by dynamically adding the final boundary with the #boundaries
|
62
|
+
# method call.
|
63
|
+
attr_accessor :explicit_boundaries
|
64
|
+
|
65
|
+
# An Array of FatTable::Columns that should be tolerant.
|
66
|
+
attr_reader :tolerant_columns
|
57
67
|
|
58
68
|
###########################################################################
|
59
69
|
# Constructors
|
@@ -61,19 +71,71 @@ module FatTable
|
|
61
71
|
|
62
72
|
# :category: Constructors
|
63
73
|
|
64
|
-
# Return an empty FatTable::Table object.
|
65
|
-
|
74
|
+
# Return an empty FatTable::Table object. Specifying headers is optional.
|
75
|
+
# Any headers ending with a ! are marked as tolerant, in that, if an
|
76
|
+
# incompatible type is added to it, the column is re-typed as a String
|
77
|
+
# column, and construction proceeds. The ! is stripped from the header to
|
78
|
+
# form the column key, though. You can also provide the names of columns
|
79
|
+
# that should be tolerant by using the +tolerant_columns key-word to
|
80
|
+
# provide an array of headers that should be tolerant. The special string
|
81
|
+
# '*' or the symbol :* indicates that all columns should be created
|
82
|
+
# tolerant.
|
83
|
+
def initialize(*heads, tolerant_columns: [])
|
84
|
+
@columns = []
|
85
|
+
@explicit_boundaries = []
|
86
|
+
@tolerant_columns =
|
87
|
+
case tolerant_columns
|
88
|
+
when Array
|
89
|
+
tolerant_columns.map { |h| h.to_s.as_sym }
|
90
|
+
when String
|
91
|
+
if tolerant_columns.strip == '*'
|
92
|
+
['*'.to_sym]
|
93
|
+
else
|
94
|
+
[tolerant_columns.as_sym]
|
95
|
+
end
|
96
|
+
when Symbol
|
97
|
+
if tolerant_columns.to_s.strip == '*'
|
98
|
+
['*'.to_sym]
|
99
|
+
else
|
100
|
+
[tolerant_columns.to_s.as_sym]
|
101
|
+
end
|
102
|
+
else
|
103
|
+
raise ArgumentError, "set tolerant_columns to String, Symbol, or an Array of either"
|
104
|
+
end
|
105
|
+
unless heads.empty?
|
106
|
+
heads.each do |h|
|
107
|
+
if h.to_s.end_with?('!') || @tolerant_columns.include?(h)
|
108
|
+
@columns << Column.new(header: h.to_s.sub(/!\s*\z/, ''), tolerant: true)
|
109
|
+
else
|
110
|
+
@columns << Column.new(header: h)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# :category: Constructors
|
117
|
+
|
118
|
+
# Return an empty duplicate of self. This allows the library to create an
|
119
|
+
# empty table that preserves all the instance variables from self. Even
|
120
|
+
# though FatTable::Table objects have no instance variables, a class that
|
121
|
+
# inherits from it might.
|
122
|
+
def empty_dup
|
123
|
+
self.dup.__empty!
|
124
|
+
end
|
125
|
+
|
126
|
+
def __empty!
|
66
127
|
@columns = []
|
67
|
-
@
|
128
|
+
@explicit_boundaries = []
|
129
|
+
self
|
68
130
|
end
|
69
131
|
|
70
132
|
# :category: Constructors
|
71
133
|
|
72
134
|
# Construct a Table from the contents of a CSV file named +fname+. Headers
|
73
135
|
# will be taken from the first CSV row and converted to symbols.
|
74
|
-
def self.from_csv_file(fname)
|
136
|
+
def self.from_csv_file(fname, tolerant_columns: [])
|
75
137
|
File.open(fname, 'r') do |io|
|
76
|
-
from_csv_io(io)
|
138
|
+
from_csv_io(io, tolerant_columns: tolerant_columns)
|
77
139
|
end
|
78
140
|
end
|
79
141
|
|
@@ -81,8 +143,8 @@ module FatTable
|
|
81
143
|
|
82
144
|
# Construct a Table from a CSV string +str+, treated in the same manner as
|
83
145
|
# the input from a CSV file in ::from_org_file.
|
84
|
-
def self.from_csv_string(str)
|
85
|
-
from_csv_io(StringIO.new(str))
|
146
|
+
def self.from_csv_string(str, tolerant_columns: [])
|
147
|
+
from_csv_io(StringIO.new(str), tolerant_columns: tolerant_columns)
|
86
148
|
end
|
87
149
|
|
88
150
|
# :category: Constructors
|
@@ -91,9 +153,9 @@ module FatTable
|
|
91
153
|
# file named +fname+. Headers are taken from the first row if the second row
|
92
154
|
# is an hrule. Otherwise, synthetic headers of the form +:col_1+, +:col_2+,
|
93
155
|
# etc. are created.
|
94
|
-
def self.from_org_file(fname)
|
156
|
+
def self.from_org_file(fname, tolerant_columns: [])
|
95
157
|
File.open(fname, 'r') do |io|
|
96
|
-
from_org_io(io)
|
158
|
+
from_org_io(io, tolerant_columns: tolerant_columns)
|
97
159
|
end
|
98
160
|
end
|
99
161
|
|
@@ -101,8 +163,8 @@ module FatTable
|
|
101
163
|
|
102
164
|
# Construct a Table from a string +str+, treated in the same manner as the
|
103
165
|
# contents of an org-mode file in ::from_org_file.
|
104
|
-
def self.from_org_string(str)
|
105
|
-
from_org_io(StringIO.new(str))
|
166
|
+
def self.from_org_string(str, tolerant_columns: [])
|
167
|
+
from_org_io(StringIO.new(str), tolerant_columns: tolerant_columns)
|
106
168
|
end
|
107
169
|
|
108
170
|
# :category: Constructors
|
@@ -121,8 +183,8 @@ module FatTable
|
|
121
183
|
# :hlines no +) org-mode strips all hrules from the table; otherwise (+
|
122
184
|
# HEADER: :hlines yes +) they are indicated with nil elements in the outer
|
123
185
|
# array.
|
124
|
-
def self.from_aoa(aoa, hlines: false)
|
125
|
-
from_array_of_arrays(aoa, hlines: hlines)
|
186
|
+
def self.from_aoa(aoa, hlines: false, tolerant_columns: [])
|
187
|
+
from_array_of_arrays(aoa, hlines: hlines, tolerant_columns: tolerant_columns)
|
126
188
|
end
|
127
189
|
|
128
190
|
# :category: Constructors
|
@@ -132,9 +194,9 @@ module FatTable
|
|
132
194
|
# keys, which, when converted to symbols will become the headers for the
|
133
195
|
# Table. If hlines is set true, mark a group boundary whenever a nil, rather
|
134
196
|
# than a hash appears in the outer array.
|
135
|
-
def self.from_aoh(aoh, hlines: false)
|
197
|
+
def self.from_aoh(aoh, hlines: false, tolerant_columns: [])
|
136
198
|
if aoh.first.respond_to?(:to_h)
|
137
|
-
from_array_of_hashes(aoh, hlines: hlines)
|
199
|
+
from_array_of_hashes(aoh, hlines: hlines, tolerant_columns: tolerant_columns)
|
138
200
|
else
|
139
201
|
raise UserError,
|
140
202
|
"Cannot initialize Table with an array of #{input[0].class}"
|
@@ -153,7 +215,7 @@ module FatTable
|
|
153
215
|
|
154
216
|
# Construct a Table by running a SQL +query+ against the database set up
|
155
217
|
# with FatTable.connect, with the rows of the query result as rows.
|
156
|
-
def self.from_sql(query)
|
218
|
+
def self.from_sql(query, tolerant_columns: [])
|
157
219
|
msg = 'FatTable.db must be set with FatTable.connect'
|
158
220
|
raise UserError, msg if FatTable.db.nil?
|
159
221
|
|
@@ -175,8 +237,8 @@ module FatTable
|
|
175
237
|
# Construct table from an array of hashes or an array of any object that
|
176
238
|
# can respond to #to_h. If an array element is a nil, mark it as a group
|
177
239
|
# boundary in the Table.
|
178
|
-
def from_array_of_hashes(hashes, hlines: false)
|
179
|
-
result = new
|
240
|
+
def from_array_of_hashes(hashes, hlines: false, tolerant_columns: [])
|
241
|
+
result = new(tolerant_columns: tolerant_columns)
|
180
242
|
hashes.each do |hsh|
|
181
243
|
if hsh.nil?
|
182
244
|
unless hlines
|
@@ -188,6 +250,7 @@ module FatTable
|
|
188
250
|
end
|
189
251
|
result << hsh.to_h
|
190
252
|
end
|
253
|
+
result.normalize_boundaries
|
191
254
|
result
|
192
255
|
end
|
193
256
|
|
@@ -203,8 +266,8 @@ module FatTable
|
|
203
266
|
# hlines are stripped from the table, otherwise (:hlines yes) they are
|
204
267
|
# indicated with nil elements in the outer array as expected by this
|
205
268
|
# method when hlines is set true.
|
206
|
-
def from_array_of_arrays(rows, hlines: false)
|
207
|
-
result = new
|
269
|
+
def from_array_of_arrays(rows, hlines: false, tolerant_columns: [])
|
270
|
+
result = new(tolerant_columns: tolerant_columns)
|
208
271
|
headers = []
|
209
272
|
if !hlines
|
210
273
|
# Take the first row as headers
|
@@ -236,15 +299,17 @@ module FatTable
|
|
236
299
|
hash_row = Hash[headers.zip(row)]
|
237
300
|
result << hash_row
|
238
301
|
end
|
302
|
+
result.normalize_boundaries
|
239
303
|
result
|
240
304
|
end
|
241
305
|
|
242
|
-
def from_csv_io(io)
|
243
|
-
result = new
|
306
|
+
def from_csv_io(io, tolerant_columns: [])
|
307
|
+
result = new(tolerant_columns: tolerant_columns)
|
244
308
|
::CSV.new(io, headers: true, header_converters: :symbol,
|
245
309
|
skip_blanks: true).each do |row|
|
246
310
|
result << row.to_h
|
247
311
|
end
|
312
|
+
result.normalize_boundaries
|
248
313
|
result
|
249
314
|
end
|
250
315
|
|
@@ -252,7 +317,7 @@ module FatTable
|
|
252
317
|
# header row must be marked with an hline (i.e, a row that looks like
|
253
318
|
# '|---+--...--|') and groups of rows may be marked with hlines to
|
254
319
|
# indicate group boundaries.
|
255
|
-
def from_org_io(io)
|
320
|
+
def from_org_io(io, tolerant_columns: [])
|
256
321
|
table_re = /\A\s*\|/
|
257
322
|
hrule_re = /\A\s*\|[-+]+/
|
258
323
|
rows = []
|
@@ -287,7 +352,7 @@ module FatTable
|
|
287
352
|
rows << line.split('|').map(&:clean)
|
288
353
|
end
|
289
354
|
end
|
290
|
-
from_array_of_arrays(rows, hlines: true)
|
355
|
+
from_array_of_arrays(rows, hlines: true, tolerant_columns: tolerant_columns)
|
291
356
|
end
|
292
357
|
end
|
293
358
|
|
@@ -317,8 +382,11 @@ module FatTable
|
|
317
382
|
# Set the column type for Column with the given +key+ as a String type,
|
318
383
|
# but only if empty. Otherwise, we would have to worry about converting
|
319
384
|
# existing items in the column to String. Perhaps that's a TODO.
|
320
|
-
def
|
321
|
-
|
385
|
+
def force_string!(*keys)
|
386
|
+
keys.each do |h|
|
387
|
+
column(h).force_string!
|
388
|
+
end
|
389
|
+
self
|
322
390
|
end
|
323
391
|
|
324
392
|
# :category: Attributes
|
@@ -378,6 +446,15 @@ module FatTable
|
|
378
446
|
|
379
447
|
# :category: Attributes
|
380
448
|
|
449
|
+
# Return whether the column with the given head should be made tolerant.
|
450
|
+
def tolerant_col?(h)
|
451
|
+
return true if tolerant_columns.include?(:'*')
|
452
|
+
|
453
|
+
tolerant_columns.include?(h)
|
454
|
+
end
|
455
|
+
|
456
|
+
# :category: Attributes
|
457
|
+
|
381
458
|
# Return the number of rows in the Table.
|
382
459
|
def size
|
383
460
|
return 0 if columns.empty?
|
@@ -426,8 +503,6 @@ module FatTable
|
|
426
503
|
# large table, that would require that we construct all the rows for a range
|
427
504
|
# of any size.
|
428
505
|
def rows_range(first = 0, last = nil) # :nodoc:
|
429
|
-
last ||= size - 1
|
430
|
-
last = [last, 0].max
|
431
506
|
raise UserError, 'first must be <= last' unless first <= last
|
432
507
|
|
433
508
|
rows = []
|
@@ -473,6 +548,8 @@ module FatTable
|
|
473
548
|
# the headers from the body) marks a boundary for the row immediately
|
474
549
|
# preceding the hline.
|
475
550
|
#
|
551
|
+
# Boundaries can also be added manually with the +mark_boundary+ method.
|
552
|
+
#
|
476
553
|
# The #order_by method resets the boundaries then adds boundaries at the
|
477
554
|
# last row of each group of rows on which the sort keys were equal as a
|
478
555
|
# boundary.
|
@@ -506,6 +583,43 @@ module FatTable
|
|
506
583
|
groups
|
507
584
|
end
|
508
585
|
|
586
|
+
# Return the number of groups in the table.
|
587
|
+
def number_of_groups
|
588
|
+
empty? ? 0 : boundaries.size
|
589
|
+
end
|
590
|
+
|
591
|
+
# Return the range of row indexes for boundary number +k+
|
592
|
+
def group_row_range(k)
|
593
|
+
last_k = boundaries.size - 1
|
594
|
+
if k < 0 || k > last_k
|
595
|
+
raise ArgumentError, "boundary number '#{k}' out of range in boundary_row_range"
|
596
|
+
end
|
597
|
+
|
598
|
+
if boundaries.size == 1
|
599
|
+
(0..boundaries.first)
|
600
|
+
elsif k.zero?
|
601
|
+
# Keep index at or above zero
|
602
|
+
(0..boundaries[k])
|
603
|
+
else
|
604
|
+
((boundaries[k - 1] + 1)..boundaries[k])
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
# Return an Array of Column objects for header +col+ representing a
|
609
|
+
# sub-column for each group in the table under that header.
|
610
|
+
def group_cols(col)
|
611
|
+
normalize_boundaries
|
612
|
+
cols = []
|
613
|
+
(0..boundaries.size - 1).each do |k|
|
614
|
+
range = group_row_range(k)
|
615
|
+
tab_col = column(col)
|
616
|
+
gitems = tab_col.items[range]
|
617
|
+
cols << Column.new(header: col, items: gitems,
|
618
|
+
type: tab_col.type, tolerant: tab_col.tolerant?)
|
619
|
+
end
|
620
|
+
cols
|
621
|
+
end
|
622
|
+
|
509
623
|
# :category: Operators
|
510
624
|
|
511
625
|
# Return this table mutated with all groups removed. Useful after something
|
@@ -513,56 +627,99 @@ module FatTable
|
|
513
627
|
# the groups displayed in the output. This modifies the input table, so is a
|
514
628
|
# departure from the otherwise immutability of Tables.
|
515
629
|
def degroup!
|
516
|
-
|
630
|
+
self.explicit_boundaries = []
|
517
631
|
self
|
518
632
|
end
|
519
633
|
|
520
634
|
# Mark a group boundary at row +row+, and if +row+ is +nil+, mark the last
|
521
|
-
# row in the table as a group boundary.
|
522
|
-
#
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
635
|
+
# row in the table as a group boundary. An attempt to add a boundary to
|
636
|
+
# an empty table has no effect. We adopt the convention that the last row
|
637
|
+
# of the table always marks an implicit boundary even if it is not in the
|
638
|
+
# @explicit_boundaries array. When we "mark" a boundary, we intend it to
|
639
|
+
# be an explicit boundary, even if it marks the last row of the table.
|
640
|
+
def mark_boundary(row_num = nil)
|
641
|
+
return self if empty?
|
642
|
+
|
643
|
+
if row_num
|
644
|
+
unless row_num < size
|
645
|
+
raise ArgumentError, "can't mark boundary at row #{row_num}, last row is #{size - 1}"
|
646
|
+
end
|
647
|
+
unless row_num >= 0
|
648
|
+
raise ArgumentError, "can't mark boundary at non-positive row #{row_num}"
|
649
|
+
end
|
650
|
+
explicit_boundaries.push(row_num)
|
651
|
+
elsif size > 0
|
652
|
+
explicit_boundaries.push(size - 1)
|
528
653
|
end
|
654
|
+
normalize_boundaries
|
655
|
+
self
|
529
656
|
end
|
530
657
|
|
531
|
-
protected
|
532
|
-
|
533
658
|
# :stopdoc:
|
534
659
|
|
535
660
|
# Make sure size - 1 is last boundary and that they are unique and sorted.
|
536
661
|
def normalize_boundaries
|
537
662
|
unless empty?
|
538
|
-
|
539
|
-
|
663
|
+
self.explicit_boundaries = explicit_boundaries.uniq.sort
|
664
|
+
end
|
665
|
+
explicit_boundaries
|
666
|
+
end
|
667
|
+
|
668
|
+
# Return the explicit_boundaries, augmented by an implicit boundary for
|
669
|
+
# the end of the table, unless it's already an implicit boundary.
|
670
|
+
def boundaries
|
671
|
+
return [] if empty?
|
672
|
+
|
673
|
+
if explicit_boundaries.last == size - 1
|
674
|
+
explicit_boundaries
|
675
|
+
else
|
676
|
+
explicit_boundaries + [size - 1]
|
540
677
|
end
|
541
|
-
boundaries
|
542
678
|
end
|
543
679
|
|
680
|
+
protected
|
681
|
+
|
544
682
|
# Concatenate the array of argument bounds to this table's boundaries, but
|
545
683
|
# increase each of the indexes in bounds by shift. This is used in the
|
546
684
|
# #union_all method.
|
547
685
|
def append_boundaries(bounds, shift: 0)
|
548
|
-
@
|
686
|
+
@explicit_boundaries += bounds.map { |k| k + shift }
|
549
687
|
end
|
550
688
|
|
551
|
-
# Return the group number to which row ~
|
552
|
-
# user's point of view are indexed starting at
|
553
|
-
def row_index_to_group_index(
|
689
|
+
# Return the group number to which row ~row_num~ belongs. Groups, from the
|
690
|
+
# user's point of view are indexed starting at 0.
|
691
|
+
def row_index_to_group_index(row_num)
|
554
692
|
boundaries.each_with_index do |b_last, g_num|
|
555
|
-
return (g_num + 1) if
|
693
|
+
return (g_num + 1) if row_num <= b_last
|
694
|
+
end
|
695
|
+
0
|
696
|
+
end
|
697
|
+
|
698
|
+
# Return the index of the first row in group number +grp_num+
|
699
|
+
def first_row_num_in_group(grp_num)
|
700
|
+
if grp_num >= boundaries.size || grp_num < 0
|
701
|
+
raise ArgumentError, "group number #{grp_num} out of bounds"
|
702
|
+
end
|
703
|
+
|
704
|
+
grp_num.zero? ? 0 : boundaries[grp_num - 1] + 1
|
705
|
+
end
|
706
|
+
|
707
|
+
# Return the index of the last row in group number +grp_num+
|
708
|
+
def last_row_num_in_group(grp_num)
|
709
|
+
if grp_num > boundaries.size || grp_num < 0
|
710
|
+
raise ArgumentError, "group number #{grp_num} out of bounds"
|
711
|
+
else
|
712
|
+
boundaries[grp_num]
|
556
713
|
end
|
557
|
-
1
|
558
714
|
end
|
559
715
|
|
560
|
-
|
716
|
+
# Return the rows for group number +grp_num+.
|
717
|
+
def group_rows(grp_num) # :nodoc:
|
561
718
|
normalize_boundaries
|
562
|
-
return [] unless
|
719
|
+
return [] unless grp_num < boundaries.size
|
563
720
|
|
564
|
-
first =
|
565
|
-
last =
|
721
|
+
first = first_row_num_in_group(grp_num)
|
722
|
+
last = last_row_num_in_group(grp_num)
|
566
723
|
rows_range(first, last)
|
567
724
|
end
|
568
725
|
|
@@ -587,22 +744,43 @@ module FatTable
|
|
587
744
|
# After sorting, the output Table will have group boundaries added after
|
588
745
|
# each row where the sort key changes.
|
589
746
|
def order_by(*sort_heads)
|
590
|
-
|
591
|
-
|
592
|
-
sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
593
|
-
rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
747
|
+
# Sort the rows in order and add to new_rows.
|
748
|
+
key_hash = partition_sort_keys(sort_heads)
|
594
749
|
new_rows = rows.sort do |r1, r2|
|
595
|
-
|
596
|
-
|
597
|
-
|
750
|
+
# Set the sort keys based on direction
|
751
|
+
key1 = []
|
752
|
+
key2 = []
|
753
|
+
key_hash.each_pair do |h, dir|
|
754
|
+
if dir == :forward
|
755
|
+
key1 << r1[h]
|
756
|
+
key2 << r2[h]
|
757
|
+
else
|
758
|
+
key1 << r2[h]
|
759
|
+
key2 << r1[h]
|
760
|
+
end
|
761
|
+
end
|
762
|
+
# Make any booleans comparable with <=>
|
763
|
+
key1 = key1.map_booleans
|
764
|
+
key2 = key2.map_booleans
|
765
|
+
|
766
|
+
# If there are any nils, <=> will return nil, and we have to use the
|
767
|
+
# special comparison method, compare_with_nils, instead.
|
768
|
+
result = (key1 <=> key2)
|
769
|
+
result.nil? ? compare_with_nils(key1, key2) : result
|
598
770
|
end
|
599
|
-
|
600
|
-
#
|
601
|
-
|
771
|
+
|
772
|
+
# Add the new_rows to the table, but mark a group boundary at the points
|
773
|
+
# where the sort key changes value. NB: I use self.class.new here
|
774
|
+
# rather than Table.new because if this class is inherited, I want the
|
775
|
+
# new_tab to be an instance of the subclass. With Table.new, this
|
776
|
+
# method's result will be an instance of FatTable::Table rather than of
|
777
|
+
# the subclass.
|
778
|
+
new_tab = empty_dup
|
602
779
|
last_key = nil
|
603
780
|
new_rows.each_with_index do |nrow, k|
|
604
781
|
new_tab << nrow
|
605
|
-
key = nrow.fetch_values(*sort_heads)
|
782
|
+
# key = nrow.fetch_values(*sort_heads)
|
783
|
+
key = nrow.fetch_values(*key_hash.keys)
|
606
784
|
new_tab.mark_boundary(k - 1) if last_key && key != last_key
|
607
785
|
last_key = key
|
608
786
|
end
|
@@ -610,6 +788,33 @@ module FatTable
|
|
610
788
|
new_tab
|
611
789
|
end
|
612
790
|
|
791
|
+
# :category: Operators
|
792
|
+
|
793
|
+
# Return a new Table sorting the rows of this Table on an any expression
|
794
|
+
# +expr+ that is valid with the +select+ method, except that they
|
795
|
+
# expression may end with an exclamation mark +!+ to indicate a reverse
|
796
|
+
# sort. The new table will have an additional column called +sort_key+
|
797
|
+
# populated with the result of evaluating the given expression and will be
|
798
|
+
# sorted (or reverse sorted) on that column.
|
799
|
+
#
|
800
|
+
# tab.order_with('date.year') => table sorted by date's year
|
801
|
+
# tab.order_with('date.year!') => table reverse sorted by date's year
|
802
|
+
#
|
803
|
+
# After sorting, the output Table will have group boundaries added after
|
804
|
+
# each row where the sort key changes.
|
805
|
+
def order_with(expr)
|
806
|
+
unless expr.is_a?(String)
|
807
|
+
raise "must call FatTable::Table\#order_with with a single string expression"
|
808
|
+
end
|
809
|
+
rev = false
|
810
|
+
if expr.match?(/\s*!\s*\z/)
|
811
|
+
rev = true
|
812
|
+
expr = expr.sub(/\s*!\s*\z/, '')
|
813
|
+
end
|
814
|
+
sort_sym = rev ? :sort_key! : :sort_key
|
815
|
+
dup.select(*headers, sort_key: expr).order_by(sort_sym)
|
816
|
+
end
|
817
|
+
|
613
818
|
# :category: Operators
|
614
819
|
#
|
615
820
|
# Return a Table having the selected column expressions. Each expression can
|
@@ -713,7 +918,7 @@ module FatTable
|
|
713
918
|
before: before_hook,
|
714
919
|
after: after_hook)
|
715
920
|
# Compute the new Table from this Table
|
716
|
-
result =
|
921
|
+
result = empty_dup
|
717
922
|
normalize_boundaries
|
718
923
|
rows.each_with_index do |old_row, old_k|
|
719
924
|
# Set the group number in the before hook and run the hook with the
|
@@ -723,7 +928,15 @@ module FatTable
|
|
723
928
|
ev.eval_before_hook(locals: old_row)
|
724
929
|
# Compute the new row.
|
725
930
|
new_row = {}
|
726
|
-
|
931
|
+
# Allow the :omni col to stand for all columns if it is alone and
|
932
|
+
# first.
|
933
|
+
cols_to_include =
|
934
|
+
if cols.size == 1 && cols.first.as_sym == :omni
|
935
|
+
headers
|
936
|
+
else
|
937
|
+
cols
|
938
|
+
end
|
939
|
+
cols_to_include.each do |k|
|
727
940
|
h = k.as_sym
|
728
941
|
msg = "Column '#{h}' in select does not exist"
|
729
942
|
raise UserError, msg unless column?(h)
|
@@ -752,7 +965,7 @@ module FatTable
|
|
752
965
|
ev.eval_after_hook(locals: new_row)
|
753
966
|
result << new_row
|
754
967
|
end
|
755
|
-
result.
|
968
|
+
result.explicit_boundaries = explicit_boundaries
|
756
969
|
result.normalize_boundaries
|
757
970
|
result
|
758
971
|
end
|
@@ -770,9 +983,14 @@ module FatTable
|
|
770
983
|
# tab.where('@row.even? && shares > 500') => even rows with lots of shares
|
771
984
|
def where(expr)
|
772
985
|
expr = expr.to_s
|
773
|
-
result =
|
986
|
+
result = empty_dup
|
774
987
|
headers.each do |h|
|
775
|
-
col =
|
988
|
+
col =
|
989
|
+
if tolerant_col?(h)
|
990
|
+
Column.new(header: h, tolerant: true)
|
991
|
+
else
|
992
|
+
Column.new(header: h)
|
993
|
+
end
|
776
994
|
result.add_column(col)
|
777
995
|
end
|
778
996
|
ev = Evaluator.new(ivars: { row: 0, group: 0 })
|
@@ -792,7 +1010,7 @@ module FatTable
|
|
792
1010
|
# Return a new table with all duplicate rows eliminated. Resets groups. Same
|
793
1011
|
# as #uniq.
|
794
1012
|
def distinct
|
795
|
-
result =
|
1013
|
+
result = empty_dup
|
796
1014
|
uniq_rows = rows.uniq
|
797
1015
|
uniq_rows.each do |row|
|
798
1016
|
result << row
|
@@ -889,38 +1107,6 @@ module FatTable
|
|
889
1107
|
set_operation(other, :difference, distinct: false)
|
890
1108
|
end
|
891
1109
|
|
892
|
-
private
|
893
|
-
|
894
|
-
# Apply the set operation given by ~oper~ between this table and the other
|
895
|
-
# table given in the first argument. If distinct is true, eliminate
|
896
|
-
# duplicates from the result.
|
897
|
-
def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
|
898
|
-
unless columns.size == other.columns.size
|
899
|
-
msg = "can't apply set ops to tables with a different number of columns"
|
900
|
-
raise UserError, msg
|
901
|
-
end
|
902
|
-
unless columns.map(&:type) == other.columns.map(&:type)
|
903
|
-
msg = "can't apply a set ops to tables with different column types."
|
904
|
-
raise UserError, msg
|
905
|
-
end
|
906
|
-
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
907
|
-
result = Table.new
|
908
|
-
new_rows = rows.send(oper, other_rows)
|
909
|
-
new_rows.each_with_index do |row, k|
|
910
|
-
result << row
|
911
|
-
result.mark_boundary if k == size - 1 && add_boundaries
|
912
|
-
end
|
913
|
-
if inherit_boundaries
|
914
|
-
result.boundaries = normalize_boundaries
|
915
|
-
other.normalize_boundaries
|
916
|
-
result.append_boundaries(other.boundaries, shift: size)
|
917
|
-
end
|
918
|
-
result.normalize_boundaries
|
919
|
-
distinct ? result.distinct : result
|
920
|
-
end
|
921
|
-
|
922
|
-
public
|
923
|
-
|
924
1110
|
# An Array of symbols for the valid join types.
|
925
1111
|
JOIN_TYPES = %i[inner left right full cross].freeze
|
926
1112
|
|
@@ -1011,7 +1197,7 @@ module FatTable
|
|
1011
1197
|
join_exp, other_common_heads =
|
1012
1198
|
build_join_expression(exps, other, join_type)
|
1013
1199
|
ev = Evaluator.new
|
1014
|
-
result =
|
1200
|
+
result = empty_dup
|
1015
1201
|
other_rows = other.rows
|
1016
1202
|
other_row_matches = Array.new(other_rows.size, false)
|
1017
1203
|
rows.each do |self_row|
|
@@ -1029,14 +1215,14 @@ module FatTable
|
|
1029
1215
|
type: join_type)
|
1030
1216
|
result << out_row
|
1031
1217
|
end
|
1032
|
-
next unless
|
1218
|
+
next unless [:left, :full].include?(join_type)
|
1033
1219
|
next if self_row_matched
|
1034
1220
|
|
1035
1221
|
result << build_out_row(row_a: self_row,
|
1036
1222
|
row_b: other_row_nils,
|
1037
1223
|
type: join_type)
|
1038
1224
|
end
|
1039
|
-
if
|
1225
|
+
if [:right, :full].include?(join_type)
|
1040
1226
|
other_rows.each_with_index do |other_row, k|
|
1041
1227
|
next if other_row_matches[k]
|
1042
1228
|
|
@@ -1165,7 +1351,7 @@ module FatTable
|
|
1165
1351
|
partial_result = nil
|
1166
1352
|
else
|
1167
1353
|
# First of a pair of _a or _b
|
1168
|
-
partial_result =
|
1354
|
+
partial_result = +"(#{a_head}_a == "
|
1169
1355
|
end
|
1170
1356
|
last_sym = a_head
|
1171
1357
|
when /\A(?<sy>.*)_b\z/
|
@@ -1184,7 +1370,7 @@ module FatTable
|
|
1184
1370
|
partial_result = nil
|
1185
1371
|
else
|
1186
1372
|
# First of a pair of _a or _b
|
1187
|
-
partial_result =
|
1373
|
+
partial_result = +"(#{b_head}_b == "
|
1188
1374
|
end
|
1189
1375
|
b_common_heads << b_head
|
1190
1376
|
last_sym = b_head
|
@@ -1259,7 +1445,7 @@ module FatTable
|
|
1259
1445
|
groups = sorted_tab.rows.group_by do |r|
|
1260
1446
|
group_cols.map { |k| r[k] }
|
1261
1447
|
end
|
1262
|
-
result =
|
1448
|
+
result = empty_dup
|
1263
1449
|
groups.each_pair do |_vals, grp_rows|
|
1264
1450
|
result << row_from_group(grp_rows, group_cols, agg_cols)
|
1265
1451
|
end
|
@@ -1269,6 +1455,9 @@ module FatTable
|
|
1269
1455
|
|
1270
1456
|
private
|
1271
1457
|
|
1458
|
+
# Collapse a group of rows to a single row by applying the aggregator from
|
1459
|
+
# the +agg_cols+ to the items in that column and the presumably identical
|
1460
|
+
# value in the +grp_cols to those columns.
|
1272
1461
|
def row_from_group(rows, grp_cols, agg_cols)
|
1273
1462
|
new_row = {}
|
1274
1463
|
grp_cols.each do |h|
|
@@ -1291,15 +1480,6 @@ module FatTable
|
|
1291
1480
|
|
1292
1481
|
# :category: Constructors
|
1293
1482
|
|
1294
|
-
# Add a group boundary mark at the given row, or at the end of the table
|
1295
|
-
# by default.
|
1296
|
-
def add_boundary(at_row = nil)
|
1297
|
-
row = at_row || (size - 1)
|
1298
|
-
@boundaries << row
|
1299
|
-
end
|
1300
|
-
|
1301
|
-
# :category: Constructors
|
1302
|
-
|
1303
1483
|
# Add a +row+ represented by a Hash having the headers as keys. If +mark:+
|
1304
1484
|
# is set true, mark this row as a boundary. All tables should be built
|
1305
1485
|
# ultimately using this method as a primitive.
|
@@ -1312,7 +1492,7 @@ module FatTable
|
|
1312
1492
|
# This column is new, so it needs nil items for all prior rows lest
|
1313
1493
|
# the value be added to a prior row.
|
1314
1494
|
items = Array.new(size, nil)
|
1315
|
-
columns << Column.new(header: h, items: items)
|
1495
|
+
columns << Column.new(header: h, items: items, tolerant: tolerant_col?(h))
|
1316
1496
|
end
|
1317
1497
|
headers.each do |h|
|
1318
1498
|
# NB: This adds a nil if h is not in row.
|
@@ -1486,5 +1666,74 @@ module FatTable
|
|
1486
1666
|
yield fmt if block_given?
|
1487
1667
|
fmt.output
|
1488
1668
|
end
|
1669
|
+
|
1670
|
+
private
|
1671
|
+
|
1672
|
+
# Apply the set operation given by ~oper~ between this table and the other
|
1673
|
+
# table given in the first argument. If distinct is true, eliminate
|
1674
|
+
# duplicates from the result.
|
1675
|
+
def set_operation(other, oper = :+, distinct: true, add_boundaries: true, inherit_boundaries: false)
|
1676
|
+
unless columns.size == other.columns.size
|
1677
|
+
msg = "can't apply set ops to tables with a different number of columns"
|
1678
|
+
raise UserError, msg
|
1679
|
+
end
|
1680
|
+
unless columns.map(&:type) == other.columns.map(&:type)
|
1681
|
+
msg = "can't apply a set ops to tables with different column types."
|
1682
|
+
raise UserError, msg
|
1683
|
+
end
|
1684
|
+
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
1685
|
+
result = empty_dup
|
1686
|
+
new_rows = rows.send(oper, other_rows)
|
1687
|
+
new_rows.each_with_index do |row, k|
|
1688
|
+
result << row
|
1689
|
+
result.mark_boundary if k == size - 1 && add_boundaries
|
1690
|
+
end
|
1691
|
+
if inherit_boundaries
|
1692
|
+
result.explicit_boundaries = boundaries
|
1693
|
+
result.append_boundaries(other.boundaries, shift: size)
|
1694
|
+
end
|
1695
|
+
result.normalize_boundaries
|
1696
|
+
distinct ? result.distinct : result
|
1697
|
+
end
|
1698
|
+
|
1699
|
+
# Return a hash with the key being the header to sort on and the value
|
1700
|
+
# being either :forward or :reverse to indicate the sort order on that
|
1701
|
+
# key.
|
1702
|
+
def partition_sort_keys(keys)
|
1703
|
+
result = {}
|
1704
|
+
[keys].flatten.each do |h|
|
1705
|
+
if h.to_s.match?(/\s*!\s*\z/)
|
1706
|
+
result[h.to_s.sub(/\s*!\s*\z/, '').to_sym] = :reverse
|
1707
|
+
else
|
1708
|
+
result[h] = :forward
|
1709
|
+
end
|
1710
|
+
end
|
1711
|
+
result
|
1712
|
+
end
|
1713
|
+
|
1714
|
+
# The <=> operator cannot handle nils without some help. Treat a nil as
|
1715
|
+
# smaller than any other value, but equal to other nils. The two keys are assumed to be arrays of values to be
|
1716
|
+
# compared with <=>.
|
1717
|
+
def compare_with_nils(key1, key2)
|
1718
|
+
result = nil
|
1719
|
+
key1.zip(key2) do |k1, k2|
|
1720
|
+
if k1.nil? && k2.nil?
|
1721
|
+
result = 0
|
1722
|
+
next
|
1723
|
+
elsif k1.nil?
|
1724
|
+
result = -1
|
1725
|
+
break
|
1726
|
+
elsif k2.nil?
|
1727
|
+
result = 1
|
1728
|
+
break
|
1729
|
+
elsif (k1 <=> k2) == 0
|
1730
|
+
next
|
1731
|
+
else
|
1732
|
+
result = (k1 <=> k2)
|
1733
|
+
break
|
1734
|
+
end
|
1735
|
+
end
|
1736
|
+
result
|
1737
|
+
end
|
1489
1738
|
end
|
1490
1739
|
end
|