fat_table 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.org +2106 -0
- data/README.rdoc +1965 -0
- data/Rakefile +12 -0
- data/TODO.org +31 -0
- data/bin/ft_console +119 -0
- data/bin/setup +8 -0
- data/fat_table.gemspec +80 -0
- data/lib/fat_table.rb +225 -0
- data/lib/fat_table/column.rb +522 -0
- data/lib/fat_table/db_handle.rb +81 -0
- data/lib/fat_table/errors.rb +13 -0
- data/lib/fat_table/evaluator.rb +55 -0
- data/lib/fat_table/formatters.rb +7 -0
- data/lib/fat_table/formatters/aoa_formatter.rb +91 -0
- data/lib/fat_table/formatters/aoh_formatter.rb +91 -0
- data/lib/fat_table/formatters/formatter.rb +1248 -0
- data/lib/fat_table/formatters/latex_formatter.rb +208 -0
- data/lib/fat_table/formatters/org_formatter.rb +72 -0
- data/lib/fat_table/formatters/term_formatter.rb +297 -0
- data/lib/fat_table/formatters/text_formatter.rb +92 -0
- data/lib/fat_table/table.rb +1322 -0
- data/lib/fat_table/version.rb +4 -0
- metadata +331 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
module FatTable
|
2
|
+
# Output the table as plain text. This is almost identical to OrgFormatter
|
3
|
+
# except that dates do not get formatted as inactive timestamps and the
|
4
|
+
# connector at the beginning of hlines is a '+' rather than a '|' as for org
|
5
|
+
# tables.
|
6
|
+
class TextFormatter < Formatter
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
# Does this Formatter require a second pass over the cells to align the
|
11
|
+
# columns according to the alignment formatting instruction to the width of
|
12
|
+
# the widest cell in each column?
|
13
|
+
def aligned?
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def pre_header(widths)
|
18
|
+
result = '+'
|
19
|
+
widths.values.each do |w|
|
20
|
+
result += '=' * (w + 2) + '+'
|
21
|
+
end
|
22
|
+
result[-1] = '+'
|
23
|
+
result + "\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def pre_row
|
27
|
+
'|'
|
28
|
+
end
|
29
|
+
|
30
|
+
def pre_cell(_h)
|
31
|
+
''
|
32
|
+
end
|
33
|
+
|
34
|
+
def quote_cell(v)
|
35
|
+
v
|
36
|
+
end
|
37
|
+
|
38
|
+
def post_cell
|
39
|
+
''
|
40
|
+
end
|
41
|
+
|
42
|
+
def inter_cell
|
43
|
+
'|'
|
44
|
+
end
|
45
|
+
|
46
|
+
def post_row
|
47
|
+
"|\n"
|
48
|
+
end
|
49
|
+
|
50
|
+
def hline(widths)
|
51
|
+
result = '+'
|
52
|
+
widths.values.each do |w|
|
53
|
+
result += '-' * (w + 2) + '+'
|
54
|
+
end
|
55
|
+
result[-1] = '+'
|
56
|
+
result + "\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
def pre_group
|
60
|
+
''
|
61
|
+
end
|
62
|
+
|
63
|
+
def post_group
|
64
|
+
''
|
65
|
+
end
|
66
|
+
|
67
|
+
def pre_gfoot
|
68
|
+
''
|
69
|
+
end
|
70
|
+
|
71
|
+
def post_gfoot
|
72
|
+
''
|
73
|
+
end
|
74
|
+
|
75
|
+
def pre_foot
|
76
|
+
''
|
77
|
+
end
|
78
|
+
|
79
|
+
def post_foot
|
80
|
+
''
|
81
|
+
end
|
82
|
+
|
83
|
+
def post_footers(widths)
|
84
|
+
result = '+'
|
85
|
+
widths.values.each do |w|
|
86
|
+
result += '=' * (w + 2) + '+'
|
87
|
+
end
|
88
|
+
result[-1] = '+'
|
89
|
+
result + "\n"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,1322 @@
|
|
1
|
+
module FatTable
|
2
|
+
# A container for a two-dimensional table. All cells in the table must be a
|
3
|
+
# String, a DateTime (or Date), a Numeric (Bignum, Integer, or BigDecimal), or
|
4
|
+
# a Boolean (TrueClass or FalseClass). All columns must be of one of those
|
5
|
+
# types or be a string convertible into one of them. It is considered an error
|
6
|
+
# if a single column contains cells of different types. Any cell that cannot
|
7
|
+
# be parsed as one of the Numeric, DateTime, or Boolean types will be treated
|
8
|
+
# as a String and have #to_s applied. Until the column type is determined, it
|
9
|
+
# will have the type NilClass.
|
10
|
+
#
|
11
|
+
# You can initialize a Table in several ways:
|
12
|
+
#
|
13
|
+
# 1. ::new, which will return an empty table to which rows or
|
14
|
+
# columns can be added later,
|
15
|
+
#
|
16
|
+
# 2. ::from_csv_file('table.csv'), where the argument is the
|
17
|
+
# name of a .csv file, in which case, the headers will be taken from the
|
18
|
+
# first row of the data.
|
19
|
+
#
|
20
|
+
# 3. ::from_org_file('table.org'), where the argument is the
|
21
|
+
# name of an .org file and the first Emacs org mode table found in the file
|
22
|
+
# will be read. The headers will be taken from the first row of the table
|
23
|
+
# if it is followed by an hrule, otherwise the headers will be synthesized
|
24
|
+
# as +:col_1+, +:col_2+, etc.
|
25
|
+
#
|
26
|
+
# 4. ::from_csv_string('csv_string'), where +csv_string+ is a
|
27
|
+
# string in the same form as a .csv file, and it will be parsed in the same
|
28
|
+
# way.
|
29
|
+
#
|
30
|
+
# 5. ::from_org_string('org_string'), where +org_string+ is a
|
31
|
+
# string in the same form as an Emacs org mode table, and it will be parsed
|
32
|
+
# in the same way.
|
33
|
+
#
|
34
|
+
# 6. ::from_aoa(+aoa+), where +aoa+ is an Array of elements that
|
35
|
+
# are either Arrays or nil. The headers will be taken from the first Array
|
36
|
+
# if it is followed by a nil, otherwise the headers will be synthesized as
|
37
|
+
# +:col_1+, +:col_2+, etc. Each inner Array will be read as a row of the
|
38
|
+
# table and each nil, after the first will be take as a group boundary.
|
39
|
+
#
|
40
|
+
# 7. ::from_aoh(+aoh+), where +aoh+ is an Array of elements each
|
41
|
+
# of which is either (1) a Hash (or any object that responds to #to_h) or
|
42
|
+
# (2) a nil. All Hashes must have the same keys, which become the headers
|
43
|
+
# for the table. Each nil will be taken as marking a group boundary.
|
44
|
+
#
|
45
|
+
# 9. ::from_table(+table+), where +table+ is another FatTable::Table
|
46
|
+
# object.
|
47
|
+
#
|
48
|
+
# In the resulting Table, the headers are converted into symbols, with all
|
49
|
+
# spaces converted to underscore and everything down-cased. So, the heading,
|
50
|
+
# 'Two Words' becomes the header +:two_words+.
|
51
|
+
class Table
|
52
|
+
|
53
|
+
# An Array of FatTable::Columns that constitute the table.
|
54
|
+
attr_reader :columns
|
55
|
+
|
56
|
+
###########################################################################
|
57
|
+
# Constructors
|
58
|
+
###########################################################################
|
59
|
+
|
60
|
+
# :category: Constructors
|
61
|
+
# Return an empty FatTable::Table object.
|
62
|
+
def initialize
|
63
|
+
@columns = []
|
64
|
+
@boundaries = []
|
65
|
+
end
|
66
|
+
|
67
|
+
# :category: Constructors
|
68
|
+
# Construct a Table from the contents of a CSV file. Headers will be taken
|
69
|
+
# from the first row and converted to symbols.
|
70
|
+
def self.from_csv_file(fname)
|
71
|
+
File.open(fname, 'r') do |io|
|
72
|
+
from_csv_io(io)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# :category: Constructors
|
77
|
+
# Construct a Table from a string, treated as the input from a CSV file.
|
78
|
+
def self.from_csv_string(str)
|
79
|
+
from_csv_io(StringIO.new(str))
|
80
|
+
end
|
81
|
+
|
82
|
+
# :category: Constructors
|
83
|
+
|
84
|
+
# Construct a Table from the first table found in the given Emacs org-mode
|
85
|
+
# file. Headers are taken from the first row if the second row is an hrule.
|
86
|
+
# Otherwise, synthetic headers of the form +:col_1+, +:col_2+, etc. are
|
87
|
+
# created.
|
88
|
+
def self.from_org_file(fname)
|
89
|
+
File.open(fname, 'r') do |io|
|
90
|
+
from_org_io(io)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# :category: Constructors
|
95
|
+
# Construct a Table from a string, treated as the contents of an org-mode
|
96
|
+
# file.
|
97
|
+
def self.from_org_string(str)
|
98
|
+
from_org_io(StringIO.new(str))
|
99
|
+
end
|
100
|
+
|
101
|
+
# :category: Constructors
|
102
|
+
|
103
|
+
# Construct a new table from an array of arrays. By default, with +hlines+
|
104
|
+
# false, do not look for separators, i.e. nil or a string of dashes, just
|
105
|
+
# treat the first row as headers. With +hlines+ true, expect separators to
|
106
|
+
# mark the header row and any boundaries. If the second element of the array
|
107
|
+
# is a +nil+, interpret the first element of the array as a row of headers.
|
108
|
+
# Otherwise, synthesize headers of the form +:col_1+, +:col_2+, ... and so
|
109
|
+
# forth. The remaining elements are taken as the body of the table, except
|
110
|
+
# that if an element of the outer array is a +nil+, mark the preceding row
|
111
|
+
# as a boundary. Note: In org mode code blocks, by default (+:hlines no+)
|
112
|
+
# all hlines are stripped from the table, otherwise (+:hlines yes+) they are
|
113
|
+
# indicated with nil elements in the outer array.
|
114
|
+
def self.from_aoa(aoa, hlines: false)
|
115
|
+
from_array_of_arrays(aoa, hlines: hlines)
|
116
|
+
end
|
117
|
+
|
118
|
+
# :category: Constructors
|
119
|
+
|
120
|
+
# Construct a Table from an array of hashes, or any objects that respond to
|
121
|
+
# the #to_h method. All hashes must have the same keys, which, when
|
122
|
+
# converted to symbols will become the headers for the Table. If hlines is
|
123
|
+
# set true, mark a group boundary whenever a nil, rather than a hash
|
124
|
+
# appears in the outer array.
|
125
|
+
def self.from_aoh(aoh, hlines: false)
|
126
|
+
if aoh.first.respond_to?(:to_h)
|
127
|
+
from_array_of_hashes(aoh, hlines: hlines)
|
128
|
+
else
|
129
|
+
raise UserError,
|
130
|
+
"Cannot initialize Table with an array of #{input[0].class}"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# :category: Constructors
|
135
|
+
|
136
|
+
# Construct a Table from another Table. Inherit any group boundaries from
|
137
|
+
# the input table.
|
138
|
+
def self.from_table(table)
|
139
|
+
table.deep_dup
|
140
|
+
end
|
141
|
+
|
142
|
+
# :category: Constructors
|
143
|
+
|
144
|
+
# Construct a Table by running a SQL query against the database set up with
|
145
|
+
# FatTable.set_db. Return the Table with the query results as rows.
|
146
|
+
def self.from_sql(query)
|
147
|
+
raise UserError, 'FatTable.db must be set with FatTable.set_db' if FatTable.db.nil?
|
148
|
+
result = Table.new
|
149
|
+
sth = FatTable.db.prepare(query)
|
150
|
+
sth.execute
|
151
|
+
sth.fetch_hash do |h|
|
152
|
+
result << h
|
153
|
+
end
|
154
|
+
result
|
155
|
+
end
|
156
|
+
|
157
|
+
############################################################################
|
158
|
+
# Class-level constructor helpers
|
159
|
+
############################################################################
|
160
|
+
|
161
|
+
class << self
|
162
|
+
private
|
163
|
+
|
164
|
+
# Construct table from an array of hashes or an array of any object that can
|
165
|
+
# respond to #to_h. If an array element is a nil, mark it as a group
|
166
|
+
# boundary in the Table.
|
167
|
+
def from_array_of_hashes(hashes, hlines: false)
|
168
|
+
result = new
|
169
|
+
hashes.each do |hsh|
|
170
|
+
if hsh.nil?
|
171
|
+
unless hlines
|
172
|
+
raise UserError, 'found an hline in input with hlines false; try setting hlines true'
|
173
|
+
end
|
174
|
+
result.mark_boundary
|
175
|
+
next
|
176
|
+
end
|
177
|
+
result << hsh.to_h
|
178
|
+
end
|
179
|
+
result
|
180
|
+
end
|
181
|
+
|
182
|
+
# Construct a new table from an array of arrays. By default, with hlines
|
183
|
+
# false, do not look for separators, i.e. nils, just treat the first row
|
184
|
+
# as headers. With hlines true, expect nil separators to mark the header
|
185
|
+
# row and any boundaries. If the second element of the array is a nil,
|
186
|
+
# interpret the first element of the array as a row of headers. Otherwise,
|
187
|
+
# synthesize headers of the form :col_1, :col_2, ... and so forth. The
|
188
|
+
# remaining elements are taken as the body of the table, except that if an
|
189
|
+
# element of the outer array is a nil, mark the preceding row as a group
|
190
|
+
# boundary. Note: In org mode code blocks, by default (:hlines no) all
|
191
|
+
# hlines are stripped from the table, otherwise (:hlines yes) they are
|
192
|
+
# indicated with nil elements in the outer array as expected by this
|
193
|
+
# method when hlines is set true.
|
194
|
+
def from_array_of_arrays(rows, hlines: false)
|
195
|
+
result = new
|
196
|
+
headers = []
|
197
|
+
if !hlines
|
198
|
+
# Take the first row as headers
|
199
|
+
# Second row et seq as data
|
200
|
+
headers = rows[0].map(&:to_s).map(&:as_sym)
|
201
|
+
first_data_row = 1
|
202
|
+
elsif rows[1].nil?
|
203
|
+
# Use first row 0 as headers
|
204
|
+
# Row 1 is an hline
|
205
|
+
# Row 2 et seq are data
|
206
|
+
headers = rows[0].map(&:to_s).map(&:as_sym)
|
207
|
+
first_data_row = 2
|
208
|
+
else
|
209
|
+
# Synthesize headers
|
210
|
+
# Row 0 et seq are data
|
211
|
+
headers = (1..rows[0].size).to_a.map { |k| "col_#{k}".as_sym }
|
212
|
+
first_data_row = 0
|
213
|
+
end
|
214
|
+
rows[first_data_row..-1].each do |row|
|
215
|
+
if row.nil?
|
216
|
+
unless hlines
|
217
|
+
raise UserError, 'found an hline in input with hlines false; try setting hlines true'
|
218
|
+
end
|
219
|
+
result.mark_boundary
|
220
|
+
next
|
221
|
+
end
|
222
|
+
row = row.map { |s| s.to_s.strip }
|
223
|
+
hash_row = Hash[headers.zip(row)]
|
224
|
+
result << hash_row
|
225
|
+
end
|
226
|
+
result
|
227
|
+
end
|
228
|
+
|
229
|
+
def from_csv_io(io)
|
230
|
+
result = new
|
231
|
+
::CSV.new(io, headers: true, header_converters: :symbol,
|
232
|
+
skip_blanks: true).each do |row|
|
233
|
+
result << row.to_h
|
234
|
+
end
|
235
|
+
result
|
236
|
+
end
|
237
|
+
|
238
|
+
# Form rows of table by reading the first table found in the org file. The
|
239
|
+
# header row must be marked with an hline (i.e, a row that looks like
|
240
|
+
# '|---+--...--|') and groups of rows may be marked with hlines to
|
241
|
+
# indicate group boundaries.
|
242
|
+
def from_org_io(io)
|
243
|
+
table_re = /\A\s*\|/
|
244
|
+
hrule_re = /\A\s*\|[-+]+/
|
245
|
+
rows = []
|
246
|
+
table_found = false
|
247
|
+
header_found = false
|
248
|
+
io.each do |line|
|
249
|
+
unless table_found
|
250
|
+
# Skip through the file until a table is found
|
251
|
+
next unless line =~ table_re
|
252
|
+
unless line =~ hrule_re
|
253
|
+
line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
|
254
|
+
rows << line.split('|').map(&:clean)
|
255
|
+
end
|
256
|
+
table_found = true
|
257
|
+
next
|
258
|
+
end
|
259
|
+
break unless line =~ table_re
|
260
|
+
if !header_found && line =~ hrule_re
|
261
|
+
rows << nil
|
262
|
+
header_found = true
|
263
|
+
next
|
264
|
+
elsif header_found && line =~ hrule_re
|
265
|
+
# Mark the boundary with a nil
|
266
|
+
rows << nil
|
267
|
+
elsif line !~ table_re
|
268
|
+
# Stop reading at the second hline
|
269
|
+
break
|
270
|
+
else
|
271
|
+
line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
|
272
|
+
rows << line.split('|').map(&:clean)
|
273
|
+
end
|
274
|
+
end
|
275
|
+
from_array_of_arrays(rows, hlines: true)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
###########################################################################
|
280
|
+
# Attributes
|
281
|
+
###########################################################################
|
282
|
+
|
283
|
+
# :category: Attributes
|
284
|
+
# Return the Column with the given header.
|
285
|
+
def column(key)
|
286
|
+
columns.detect { |c| c.header == key.as_sym }
|
287
|
+
end
|
288
|
+
|
289
|
+
# :category: Attributes
|
290
|
+
# Return the type of the Column with the given header
|
291
|
+
def type(key)
|
292
|
+
column(key).type
|
293
|
+
end
|
294
|
+
|
295
|
+
# :category: Attributes
|
296
|
+
|
297
|
+
# Return the array of items of the column with the given header symbol
|
298
|
+
# +key+, or if +key+ is an Integer, return that row number. So a table's
|
299
|
+
# rows can be accessed by number, and its columns can be accessed by column
|
300
|
+
# header. Also, double indexing works in either row-major or column-major
|
301
|
+
# order: \tab\[:id\]\[8\] returns the 9th item in the column headed :id and
|
302
|
+
# so does \tab\[8\]\[:id\].
|
303
|
+
def [](key)
|
304
|
+
case key
|
305
|
+
when Integer
|
306
|
+
raise UserError, "index '#{key}' out of range" unless (0..size-1).cover?(key.abs)
|
307
|
+
rows[key]
|
308
|
+
when String
|
309
|
+
raise UserError, "header '#{key}' not in table" unless headers.include?(key)
|
310
|
+
column(key).items
|
311
|
+
when Symbol
|
312
|
+
raise UserError, "header ':#{key}' not in table" unless headers.include?(key)
|
313
|
+
column(key).items
|
314
|
+
else
|
315
|
+
raise UserError, "cannot index table with a #{key.class}"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# :category: Attributes
|
320
|
+
|
321
|
+
# Return true if the table has a Column with the given +key+ as a header.
|
322
|
+
def column?(key)
|
323
|
+
headers.include?(key.as_sym)
|
324
|
+
end
|
325
|
+
|
326
|
+
# :category: Attributes
|
327
|
+
|
328
|
+
# Return a Hash of the Table's Column header symbols to types.
|
329
|
+
def types
|
330
|
+
result = {}
|
331
|
+
columns.each do |c|
|
332
|
+
result[c.header] = c.type
|
333
|
+
end
|
334
|
+
result
|
335
|
+
end
|
336
|
+
|
337
|
+
# :category: Attributes
|
338
|
+
|
339
|
+
# Return the headers for the Table as an array of symbols.
|
340
|
+
def headers
|
341
|
+
columns.map(&:header)
|
342
|
+
end
|
343
|
+
|
344
|
+
# :category: Attributes
|
345
|
+
|
346
|
+
# Return the number of rows in the Table.
|
347
|
+
def size
|
348
|
+
return 0 if columns.empty?
|
349
|
+
columns.first.size
|
350
|
+
end
|
351
|
+
|
352
|
+
# :category: Attributes
|
353
|
+
|
354
|
+
# Return the number of Columns in the Table.
|
355
|
+
def width
|
356
|
+
return 0 if columns.empty?
|
357
|
+
columns.size
|
358
|
+
end
|
359
|
+
|
360
|
+
# :category: Attributes
|
361
|
+
|
362
|
+
# Return whether this Table is empty.
|
363
|
+
def empty?
|
364
|
+
size.zero?
|
365
|
+
end
|
366
|
+
|
367
|
+
# :category: Attributes
|
368
|
+
|
369
|
+
# Return the rows of the Table as an Array of Hashes, keyed by the headers.
|
370
|
+
def rows
|
371
|
+
rows = []
|
372
|
+
unless columns.empty?
|
373
|
+
0.upto(columns.first.items.last_i) do |rnum|
|
374
|
+
row = {}
|
375
|
+
columns.each do |col|
|
376
|
+
row[col.header] = col[rnum]
|
377
|
+
end
|
378
|
+
rows << row
|
379
|
+
end
|
380
|
+
end
|
381
|
+
rows
|
382
|
+
end
|
383
|
+
|
384
|
+
protected
|
385
|
+
|
386
|
+
# :category: Attributes
|
387
|
+
|
388
|
+
# Return the rows from first to last. We could just index #rows, but in a
|
389
|
+
# large table, that would require that we construct all the rows for a range
|
390
|
+
# of any size.
|
391
|
+
def rows_range(first = 0, last = nil) # :nodoc:
|
392
|
+
last ||= size - 1
|
393
|
+
last = [last, 0].max
|
394
|
+
raise UserError, 'first must be <= last' unless first <= last
|
395
|
+
rows = []
|
396
|
+
unless columns.empty?
|
397
|
+
first.upto(last) do |rnum|
|
398
|
+
row = {}
|
399
|
+
columns.each do |col|
|
400
|
+
row[col.header] = col[rnum]
|
401
|
+
end
|
402
|
+
rows << row
|
403
|
+
end
|
404
|
+
end
|
405
|
+
rows
|
406
|
+
end
|
407
|
+
|
408
|
+
#############################################################################
|
409
|
+
# Enumerable
|
410
|
+
#############################################################################
|
411
|
+
|
412
|
+
public
|
413
|
+
|
414
|
+
include Enumerable
|
415
|
+
|
416
|
+
# :category: Attributes
|
417
|
+
|
418
|
+
# Yield each row of the table as a Hash with the column symbols as keys.
|
419
|
+
def each
|
420
|
+
rows.each do |row|
|
421
|
+
yield row
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
|
426
|
+
public
|
427
|
+
|
428
|
+
# :category: Attributes
|
429
|
+
|
430
|
+
# Boundaries mark the last row in each "group" within the table. The last
|
431
|
+
# row of the table is always an implicit boundary, and having the last row
|
432
|
+
# as the sole boundary is the default for new tables unless mentioned
|
433
|
+
# otherwise. Resetting the boundaries means to put it back in that default
|
434
|
+
# state.
|
435
|
+
#
|
436
|
+
# Boundaries can be added when a table is read in, for example, from the
|
437
|
+
# text of an org table in which each hline (other than the one separating
|
438
|
+
# the headers from the body) marks a boundary for the row immediately
|
439
|
+
# preceding the hline.
|
440
|
+
#
|
441
|
+
# The #order_by method resets the boundaries then adds boundaries at the
|
442
|
+
# last row of each group of rows on which the sort keys were equal as a
|
443
|
+
# boundary.
|
444
|
+
#
|
445
|
+
# The #union_all (but not #union since it deletes duplicates) method adds a
|
446
|
+
# boundary between the constituent tables. #union_all also preserves any
|
447
|
+
# boundary markers within the constituent tables. In doing so, the
|
448
|
+
# boundaries of the second table in the #union_all are increased by the size
|
449
|
+
# of the first table so that they refer to rows in the new table.
|
450
|
+
#
|
451
|
+
# The #select method preserves any boundaries from the input table without
|
452
|
+
# change, since it only selects columns for the output and deletes no rows.
|
453
|
+
#
|
454
|
+
# Perhaps surprisingly, the #group_by method does /not/ result in any groups
|
455
|
+
# in the output table since the result of #group_by is to reduce all groups
|
456
|
+
# it finds into a single row, and having a group for each row of the output
|
457
|
+
# table would have no use.
|
458
|
+
#
|
459
|
+
# All the other table-transforming methods reset the boundaries in the new
|
460
|
+
# table. For example, #where re-arranges and deletes rows, so the old
|
461
|
+
# boundaries would make no sense anyway. Likewise, #union, #intersection,
|
462
|
+
# #except, and #join reset the boundaries to their default.
|
463
|
+
#
|
464
|
+
# Return an array of an array of row hashes for the groups in this Table.
|
465
|
+
def groups
|
466
|
+
normalize_boundaries
|
467
|
+
groups = []
|
468
|
+
(0..boundaries.size - 1).each do |k|
|
469
|
+
groups << group_rows(k)
|
470
|
+
end
|
471
|
+
groups
|
472
|
+
end
|
473
|
+
|
474
|
+
# :category: Operators
|
475
|
+
|
476
|
+
# Return this table mutated with all groups removed. Useful after something
|
477
|
+
# like #order_by, which adds groups as a side-effect, when you do not want
|
478
|
+
# the groups displayed in the output. This modifies the input table, so is a
|
479
|
+
# departure from the otherwise immutability of Tables.
|
480
|
+
def degroup!
|
481
|
+
@boundaries = []
|
482
|
+
self
|
483
|
+
end
|
484
|
+
|
485
|
+
# Mark a boundary at k, and if k is nil, mark the last row in the table as a
|
486
|
+
# group boundary. This is used for internal purposes.
|
487
|
+
def mark_boundary(k = nil) # :nodoc:
|
488
|
+
if k
|
489
|
+
boundaries.push(k)
|
490
|
+
else
|
491
|
+
boundaries.push(size - 1)
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
protected
|
496
|
+
|
497
|
+
# :stopdoc:
|
498
|
+
|
499
|
+
# Reader for boundaries, but not public.
|
500
|
+
def boundaries
|
501
|
+
@boundaries
|
502
|
+
end
|
503
|
+
|
504
|
+
# Writer for boundaries, but not public.
|
505
|
+
def boundaries=(bounds)
|
506
|
+
@boundaries = bounds
|
507
|
+
end
|
508
|
+
|
509
|
+
# Make sure size - 1 is last boundary and that they are unique and sorted.
|
510
|
+
def normalize_boundaries
|
511
|
+
unless empty?
|
512
|
+
boundaries.push(size - 1) unless boundaries.include?(size - 1)
|
513
|
+
self.boundaries = boundaries.uniq.sort
|
514
|
+
end
|
515
|
+
boundaries
|
516
|
+
end
|
517
|
+
|
518
|
+
# Concatenate the array of argument bounds to this table's boundaries, but
|
519
|
+
# increase each of the indexes in bounds by shift. This is used in the
|
520
|
+
# #union_all method.
|
521
|
+
def append_boundaries(bounds, shift: 0)
|
522
|
+
@boundaries += bounds.map { |k| k + shift }
|
523
|
+
end
|
524
|
+
|
525
|
+
# Return the group number to which row k belongs. Groups, from the user's
|
526
|
+
# point of view are indexed starting at 1.
|
527
|
+
def row_index_to_group_index(k)
|
528
|
+
boundaries.each_with_index do |b_last, g_num|
|
529
|
+
return (g_num + 1) if k <= b_last
|
530
|
+
end
|
531
|
+
1
|
532
|
+
end
|
533
|
+
|
534
|
+
def group_rows(k) # :nodoc:
|
535
|
+
normalize_boundaries
|
536
|
+
return [] unless k < boundaries.size
|
537
|
+
first = k.zero? ? 0 : boundaries[k - 1] + 1
|
538
|
+
last = boundaries[k]
|
539
|
+
rows_range(first, last)
|
540
|
+
end
|
541
|
+
|
542
|
+
# :startdoc:
|
543
|
+
|
544
|
+
############################################################################
|
545
|
+
# SQL look-alikes. The following methods are based on SQL equivalents and
|
546
|
+
# all return a new Table object rather than modifying the table in place.
|
547
|
+
############################################################################
|
548
|
+
|
549
|
+
public
|
550
|
+
|
551
|
+
# :category: Operators
|
552
|
+
|
553
|
+
# Return a new Table sorting the rows of this Table on the possibly multiple
|
554
|
+
# keys given in the array of syms in headers. Append a ! to the symbol name
|
555
|
+
# to indicate reverse sorting on that column.
|
556
|
+
#
|
557
|
+
# tab.order_by(:ref, :date) => sorted table
|
558
|
+
# tab.order_by(:date!) => reverse sort on :date
|
559
|
+
#
|
560
|
+
# After sorting, the output Table will have group boundaries added after
|
561
|
+
# each row where the sort key changes.
|
562
|
+
def order_by(*sort_heads)
|
563
|
+
sort_heads = [sort_heads].flatten
|
564
|
+
rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
|
565
|
+
sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
566
|
+
rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
567
|
+
new_rows = rows.sort do |r1, r2|
|
568
|
+
key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] }
|
569
|
+
key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
|
570
|
+
key1 <=> key2
|
571
|
+
end
|
572
|
+
# Add the new rows to the table, but mark a group boundary at the points
|
573
|
+
# where the sort key changes value.
|
574
|
+
new_tab = Table.new
|
575
|
+
last_key = nil
|
576
|
+
new_rows.each_with_index do |nrow, k|
|
577
|
+
new_tab << nrow
|
578
|
+
key = nrow.fetch_values(*sort_heads)
|
579
|
+
new_tab.mark_boundary(k - 1) if last_key && key != last_key
|
580
|
+
last_key = key
|
581
|
+
end
|
582
|
+
new_tab.normalize_boundaries
|
583
|
+
new_tab
|
584
|
+
end
|
585
|
+
|
586
|
+
# :category: Operators
|
587
|
+
|
588
|
+
# Return a Table having the selected column expressions. Each expression can
|
589
|
+
# be either a
|
590
|
+
#
|
591
|
+
# 1. a symbol, +:old_col+, representing a column in the current table,
|
592
|
+
#
|
593
|
+
# 2. a hash of +new_col: :old_col+ to rename an existing +:old_col+ column as
|
594
|
+
# +:new_col+, or
|
595
|
+
#
|
596
|
+
# 3. a hash of +new_col: 'expression'+, to add a new column that is computed
|
597
|
+
# as an arbitrary ruby expression of the existing columns (whether
|
598
|
+
# selected for the output table or not) or any new_col defined earlier in
|
599
|
+
# the argument list defined as local variables in the expression. The
|
600
|
+
# expression string can also access the instance variable @row, as the row
|
601
|
+
# number of the row being evaluated, and @group, as the group number of
|
602
|
+
# the row being evaluated.
|
603
|
+
#
|
604
|
+
# The bare symbol arguments (1) must precede any hash arguments (2) or (3).
|
605
|
+
# Each expression results in a column in the resulting Table in the order
|
606
|
+
# given. The expressions are evaluated in left-to-right order as well. The
|
607
|
+
# output table preserves any groups present in the input table.
|
608
|
+
#
|
609
|
+
# tab.select(:ref, :date, :shares) => table with only 3 columns selected
|
610
|
+
# tab.select(:ref, :date, shares: :quantity) => rename :shares->:quantity
|
611
|
+
# tab.select(:ref, :date, :shares, cost: 'price * shares') => new column
|
612
|
+
# tab.select(:ref, :date, :shares, seq: '@row') => add sequential nums
|
613
|
+
def select(*cols, **new_cols)
|
614
|
+
result = Table.new
|
615
|
+
normalize_boundaries
|
616
|
+
ev = Evaluator.new(vars: { row: 0, group: 1 },
|
617
|
+
before: '@row = __row; @group = __group')
|
618
|
+
rows.each_with_index do |old_row, old_k|
|
619
|
+
new_row = {}
|
620
|
+
cols.each do |k|
|
621
|
+
h = k.as_sym
|
622
|
+
raise UserError, "Column '#{h}' in select does not exist" unless column?(h)
|
623
|
+
new_row[h] = old_row[h]
|
624
|
+
end
|
625
|
+
new_cols.each_pair do |key, val|
|
626
|
+
key = key.as_sym
|
627
|
+
vars = old_row.merge(new_row)
|
628
|
+
vars[:__row] = old_k + 1
|
629
|
+
vars[:__group] = row_index_to_group_index(old_k)
|
630
|
+
case val
|
631
|
+
when Symbol
|
632
|
+
raise UserError, "Column '#{val}' in select does not exist" unless vars.keys.include?(val)
|
633
|
+
new_row[key] = vars[val]
|
634
|
+
when String
|
635
|
+
new_row[key] = ev.evaluate(val, vars: vars)
|
636
|
+
else
|
637
|
+
raise UserError, 'Hash parameters to select must be a symbol or string'
|
638
|
+
end
|
639
|
+
end
|
640
|
+
result << new_row
|
641
|
+
end
|
642
|
+
result.boundaries = boundaries
|
643
|
+
result.normalize_boundaries
|
644
|
+
result
|
645
|
+
end
|
646
|
+
|
647
|
+
# :category: Operators
|
648
|
+
|
649
|
+
# Return a Table containing only rows for which the Ruby where expression,
|
650
|
+
# +exp+, evaluates to a truthy value. Within the string expression +exp+,
|
651
|
+
# each header is a local variable bound to the value of the current row in
|
652
|
+
# that column, and the instance variables @row and @group are available as
|
653
|
+
# the row and group number of the row being evaluated. Any groups present in
|
654
|
+
# the input Table are eliminated in the output Table.
|
655
|
+
#
|
656
|
+
# tab.where('date > Date.today - 30') => rows with recent dates
|
657
|
+
# tab.where('@row.even? && shares > 500') => even rows with lots of shares
|
658
|
+
def where(expr)
|
659
|
+
expr = expr.to_s
|
660
|
+
result = Table.new
|
661
|
+
headers.each do |h|
|
662
|
+
col = Column.new(header: h)
|
663
|
+
result.add_column(col)
|
664
|
+
end
|
665
|
+
ev = Evaluator.new(vars: { row: 0 },
|
666
|
+
before: '@row = __row; @group = __group')
|
667
|
+
rows.each_with_index do |row, k|
|
668
|
+
vars = row.dup
|
669
|
+
vars[:__row] = k + 1
|
670
|
+
vars[:__group] = row_index_to_group_index(k)
|
671
|
+
result << row if ev.evaluate(expr, vars: vars)
|
672
|
+
end
|
673
|
+
result.normalize_boundaries
|
674
|
+
result
|
675
|
+
end
|
676
|
+
|
677
|
+
# :category: Operators
|
678
|
+
|
679
|
+
# Return this table with all duplicate rows eliminated. Resets groups. Same
|
680
|
+
# as #uniq.
|
681
|
+
def distinct
|
682
|
+
result = Table.new
|
683
|
+
uniq_rows = rows.uniq
|
684
|
+
uniq_rows.each do |row|
|
685
|
+
result << row
|
686
|
+
end
|
687
|
+
result
|
688
|
+
end
|
689
|
+
|
690
|
+
# :category: Operators
|
691
|
+
|
692
|
+
# Return this table with all duplicate rows eliminated. Resets groups. Same
|
693
|
+
# as #distinct.
|
694
|
+
def uniq
|
695
|
+
distinct
|
696
|
+
end
|
697
|
+
|
698
|
+
# :category: Operators
|
699
|
+
|
700
|
+
# Return a Table that combines this table with +other+ table. In other
|
701
|
+
# words, return the union of this table with the other. The headers of this
|
702
|
+
# table are used in the result. There must be the same number of columns of
|
703
|
+
# the same type in the two tables, or an exception will be thrown.
|
704
|
+
# Duplicates are eliminated from the result. Any groups present in either
|
705
|
+
# Table are eliminated in the output Table.
|
706
|
+
def union(other)
|
707
|
+
set_operation(other, :+,
|
708
|
+
distinct: true,
|
709
|
+
add_boundaries: true)
|
710
|
+
end
|
711
|
+
|
712
|
+
# :category: Operators
|
713
|
+
|
714
|
+
# Return a Table that combines this table with +other+ table. In other
|
715
|
+
# words, return the union of this table with the other. The headers of this
|
716
|
+
# table are used in the result. There must be the same number of columns of
|
717
|
+
# the same type in the two tables, or an exception will be thrown.
|
718
|
+
# Duplicates are not eliminated from the result. Adds group boundaries at
|
719
|
+
# boundaries of the constituent tables. Preserves and adjusts the group
|
720
|
+
# boundaries of the constituent table.
|
721
|
+
def union_all(other)
|
722
|
+
set_operation(other, :+,
|
723
|
+
distinct: false,
|
724
|
+
add_boundaries: true,
|
725
|
+
inherit_boundaries: true)
|
726
|
+
end
|
727
|
+
|
728
|
+
# :category: Operators
|
729
|
+
|
730
|
+
# Return a Table that includes the rows that appear in this table and in
|
731
|
+
# +other+ table. In other words, return the intersection of this table with
|
732
|
+
# the other. The headers of this table are used in the result. There must be
|
733
|
+
# the same number of columns of the same type in the two tables, or an
|
734
|
+
# exception will be thrown. Duplicates are eliminated from the result. Any
|
735
|
+
# groups present in either Table are eliminated in the output Table.
|
736
|
+
def intersect(other)
|
737
|
+
set_operation(other, :intersect, distinct: true)
|
738
|
+
end
|
739
|
+
|
740
|
+
# :category: Operators
|
741
|
+
|
742
|
+
# Return a Table that includes all the rows in this table that also occur in
|
743
|
+
# +other+ table. Note that the order of the operands matters. Duplicates in
|
744
|
+
# this table will be included in the output, but duplicates in other will
|
745
|
+
# not. The headers of this table are used in the result. There must be the
|
746
|
+
# same number of columns of the same type in the two tables, or an exception
|
747
|
+
# will be thrown. Duplicates are not eliminated from the result. Resets
|
748
|
+
# groups.
|
749
|
+
def intersect_all(other)
|
750
|
+
set_operation(other, :intersect, distinct: false)
|
751
|
+
end
|
752
|
+
|
753
|
+
# :category: Operators
|
754
|
+
|
755
|
+
# Return a Table that includes the rows of this table except for any rows
|
756
|
+
# that are the same as those in another table. In other words, return the
|
757
|
+
# set difference between this table an the other. The headers of this table
|
758
|
+
# are used in the result. There must be the same number of columns of the
|
759
|
+
# same type in the two tables, or an exception will be thrown. Duplicates
|
760
|
+
# are eliminated from the result. Any groups present in either Table are
|
761
|
+
# eliminated in the output Table.
|
762
|
+
def except(other)
|
763
|
+
set_operation(other, :difference, distinct: true)
|
764
|
+
end
|
765
|
+
|
766
|
+
# :category: Operators
|
767
|
+
|
768
|
+
# Return a Table that includes the rows of this table except for any rows
|
769
|
+
# that are the same as those in +other+ Table. In other words, return the
|
770
|
+
# set difference between this table an the other. The headers of this table
|
771
|
+
# are used in the result. There must be the same number of columns of the
|
772
|
+
# same type in the two tables, or an exception will be thrown. Duplicates
|
773
|
+
# are not eliminated from the result. Any groups present in either Table are
|
774
|
+
# eliminated in the output Table.
|
775
|
+
def except_all(other)
|
776
|
+
set_operation(other, :difference, distinct: false)
|
777
|
+
end
|
778
|
+
|
779
|
+
private
|
780
|
+
|
781
|
+
# Apply the set operation given by op between this table and the other table
|
782
|
+
# given in the first argument. If distinct is true, eliminate duplicates
|
783
|
+
# from the result.
|
784
|
+
def set_operation(other, op = :+,
|
785
|
+
distinct: true,
|
786
|
+
add_boundaries: true,
|
787
|
+
inherit_boundaries: false)
|
788
|
+
unless columns.size == other.columns.size
|
789
|
+
raise UserError, 'Cannot apply a set operation to tables with a different number of columns.'
|
790
|
+
end
|
791
|
+
unless columns.map(&:type) == other.columns.map(&:type)
|
792
|
+
raise UserError, 'Cannot apply a set operation to tables with different column types.'
|
793
|
+
end
|
794
|
+
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
795
|
+
result = Table.new
|
796
|
+
new_rows = rows.send(op, other_rows)
|
797
|
+
new_rows.each_with_index do |row, k|
|
798
|
+
result << row
|
799
|
+
result.mark_boundary if k == size - 1 && add_boundaries
|
800
|
+
end
|
801
|
+
if inherit_boundaries
|
802
|
+
result.boundaries = normalize_boundaries
|
803
|
+
other.normalize_boundaries
|
804
|
+
result.append_boundaries(other.boundaries, shift: size)
|
805
|
+
end
|
806
|
+
result.normalize_boundaries
|
807
|
+
distinct ? result.distinct : result
|
808
|
+
end
|
809
|
+
|
810
|
+
public
|
811
|
+
|
812
|
+
# An Array of symbols for the valid join types.
|
813
|
+
JOIN_TYPES = [:inner, :left, :right, :full, :cross].freeze
|
814
|
+
|
815
|
+
# :category: Operators
|
816
|
+
#
|
817
|
+
# Return a table that joins this table to another based on one or more join
|
818
|
+
# expressions. There are several possibilities for the join expressions:
|
819
|
+
#
|
820
|
+
# 1. If no join expressions are given, the tables will be joined when all
|
821
|
+
# values with the same name in both tables have the same value, a
|
822
|
+
# "natural" join. However, if the join type is :cross, the join
|
823
|
+
# expression will be taken to be 'true'. Otherwise, if there are no
|
824
|
+
# common column names, an exception will be raised.
|
825
|
+
#
|
826
|
+
# 2. If the join expressions are one or more symbols, the join condition
|
827
|
+
# requires that the values of both tables are equal for all columns named
|
828
|
+
# by the symbols. A column that appears in both tables can be given
|
829
|
+
# without modification and will be assumed to require equality on that
|
830
|
+
# column. If an unmodified symbol is not a name that appears in both
|
831
|
+
# tables, an exception will be raised. Column names that are unique to
|
832
|
+
# the first table must have a '_a' appended to the column name and column
|
833
|
+
# names that are unique to the other table must have a '_b' appended to
|
834
|
+
# the column name. These disambiguated column names must come in pairs,
|
835
|
+
# one for the first table and one for the second, and they will imply a
|
836
|
+
# join condition that the columns must be equal on those columns. Several
|
837
|
+
# such symbol expressions will require that all such implied pairs are
|
838
|
+
# equal in order for the join condition to be met.
|
839
|
+
#
|
840
|
+
# 3. Finally, a string expression can be given that contains an arbitrary
|
841
|
+
# ruby expression that will be evaluated for truthiness. Within the
|
842
|
+
# string, all column names must be disambiguated with the '_a' or '_b'
|
843
|
+
# modifiers whether they are common to both tables or not. The names of
|
844
|
+
# the columns in both tables (without the leading ':' for symbols) are
|
845
|
+
# available as variables within the expression.
|
846
|
+
#
|
847
|
+
# The join_type parameter specifies what sort of join is performed, :inner,
|
848
|
+
# :left, :right, :full, or :cross. The default is an :inner join. The types
|
849
|
+
# of joins are defined as follows where T1 means this table, the receiver,
|
850
|
+
# and T2 means other. These descriptions are taken from the Postgresql
|
851
|
+
# documentation.
|
852
|
+
#
|
853
|
+
# :inner:: For each row R1 of T1, the joined table has a row for each row in
|
854
|
+
# T2 that satisfies the join condition with R1.
|
855
|
+
#
|
856
|
+
# :left:: First, an inner join is performed. Then, for each row in T1 that
|
857
|
+
# does not satisfy the join condition with any row in T2, a joined
|
858
|
+
# row is added with null values in columns of T2. Thus, the joined
|
859
|
+
# table always has at least one row for each row in T1.
|
860
|
+
#
|
861
|
+
# :right:: First, an inner join is performed. Then, for each row in T2 that
|
862
|
+
# does not satisfy the join condition with any row in T1, a joined
|
863
|
+
# row is added with null values in columns of T1. This is the
|
864
|
+
# converse of a left join: the result table will always have a row
|
865
|
+
# for each row in T2.
|
866
|
+
#
|
867
|
+
# :full:: First, an inner join is performed. Then, for each row in T1 that
|
868
|
+
# does not satisfy the join condition with any row in T2, a joined
|
869
|
+
# row is added with null values in columns of T2. Also, for each row
|
870
|
+
# of T2 that does not satisfy the join condition with any row in T1,
|
871
|
+
# a joined row with null values in the columns of T1 is added.
|
872
|
+
#
|
873
|
+
# :cross:: For every possible combination of rows from T1 and T2 (i.e., a
|
874
|
+
# Cartesian product), the joined table will contain a row
|
875
|
+
# consisting of all columns in T1 followed by all columns in T2. If
|
876
|
+
# the tables have N and M rows respectively, the joined table will
|
877
|
+
# have N * M rows.
|
878
|
+
#
|
879
|
+
# Any groups present in either Table are eliminated in the output Table.
|
880
|
+
# See the README for examples.
|
881
|
+
def join(other, *exps, join_type: :inner)
|
882
|
+
unless other.is_a?(Table)
|
883
|
+
raise UserError, 'need other table as first argument to join'
|
884
|
+
end
|
885
|
+
unless JOIN_TYPES.include?(join_type)
|
886
|
+
raise UserError, "join_type may only be: #{JOIN_TYPES.join(', ')}"
|
887
|
+
end
|
888
|
+
# These may be needed for outer joins.
|
889
|
+
self_row_nils = headers.map { |h| [h, nil] }.to_h
|
890
|
+
other_row_nils = other.headers.map { |h| [h, nil] }.to_h
|
891
|
+
join_expression, other_common_heads = build_join_expression(exps, other, join_type)
|
892
|
+
ev = Evaluator.new
|
893
|
+
result = Table.new
|
894
|
+
other_rows = other.rows
|
895
|
+
other_row_matches = Array.new(other_rows.size, false)
|
896
|
+
rows.each do |self_row|
|
897
|
+
self_row_matched = false
|
898
|
+
other_rows.each_with_index do |other_row, k|
|
899
|
+
# Same as other_row, but with keys that are common with self and equal
|
900
|
+
# in value, removed, so the output table need not repeat them.
|
901
|
+
locals = build_locals_hash(row_a: self_row, row_b: other_row)
|
902
|
+
matches = ev.evaluate(join_expression, vars: locals)
|
903
|
+
next unless matches
|
904
|
+
self_row_matched = other_row_matches[k] = true
|
905
|
+
out_row = build_out_row(row_a: self_row, row_b: other_row,
|
906
|
+
common_heads: other_common_heads,
|
907
|
+
type: join_type)
|
908
|
+
result << out_row
|
909
|
+
end
|
910
|
+
if join_type == :left || join_type == :full
|
911
|
+
unless self_row_matched
|
912
|
+
out_row = build_out_row(row_a: self_row, row_b: other_row_nils, type: join_type)
|
913
|
+
result << out_row
|
914
|
+
end
|
915
|
+
end
|
916
|
+
end
|
917
|
+
if join_type == :right || join_type == :full
|
918
|
+
other_rows.each_with_index do |other_row, k|
|
919
|
+
unless other_row_matches[k]
|
920
|
+
out_row = build_out_row(row_a: self_row_nils, row_b: other_row, type: join_type)
|
921
|
+
result << out_row
|
922
|
+
end
|
923
|
+
end
|
924
|
+
end
|
925
|
+
result.normalize_boundaries
|
926
|
+
result
|
927
|
+
end
|
928
|
+
|
929
|
+
# :category: Operators
|
930
|
+
# Perform an inner join as described in FatTable::Table.join.
|
931
|
+
def inner_join(other, *exps)
|
932
|
+
join(other, *exps)
|
933
|
+
end
|
934
|
+
|
935
|
+
# :category: Operators
|
936
|
+
# Perform a left join as described in FatTable::Table.join.
|
937
|
+
def left_join(other, *exps)
|
938
|
+
join(other, *exps, join_type: :left)
|
939
|
+
end
|
940
|
+
|
941
|
+
# :category: Operators
|
942
|
+
# Perform a right join as described in FatTable::Table.join.
|
943
|
+
def right_join(other, *exps)
|
944
|
+
join(other, *exps, join_type: :right)
|
945
|
+
end
|
946
|
+
|
947
|
+
# :category: Operators
|
948
|
+
# Perform a full join as described in FatTable::Table.join.
|
949
|
+
def full_join(other, *exps)
|
950
|
+
join(other, *exps, join_type: :full)
|
951
|
+
end
|
952
|
+
|
953
|
+
# :category: Operators
|
954
|
+
# Perform a cross join as described in FatTable::Table.join.
|
955
|
+
def cross_join(other)
|
956
|
+
join(other, join_type: :cross)
|
957
|
+
end
|
958
|
+
|
959
|
+
private
|
960
|
+
|
961
|
+
# Return an output row appropriate to the given join type, including all the
|
962
|
+
# keys of row_a, the non-common keys of row_b for an :inner join, or all the
|
963
|
+
# keys of row_b for other joins. If any of the row_b keys are also row_a
|
964
|
+
# keys, change the key name by appending a '_b' so the keys will not repeat.
|
965
|
+
def build_out_row(row_a:, row_b:, common_heads: [], type: :inner)
|
966
|
+
if type == :inner
|
967
|
+
# Eliminate the keys that are common with row_a and were matched for
|
968
|
+
# equality
|
969
|
+
row_b = row_b.reject { |k, _| common_heads.include?(k) }
|
970
|
+
end
|
971
|
+
# Translate any remaining row_b heads to append '_b' if they have the
|
972
|
+
# same name as a row_a key.
|
973
|
+
a_heads = row_a.keys
|
974
|
+
row_b = row_b.to_a.each.map { |k, v|
|
975
|
+
[a_heads.include?(k) ? "#{k}_b".to_sym : k, v]
|
976
|
+
}.to_h
|
977
|
+
row_a.merge(row_b)
|
978
|
+
end
|
979
|
+
|
980
|
+
# Return a hash for the local variables of a join expression in which all
|
981
|
+
# the keys in row_a have an '_a' appended and all the keys in row_b have a
|
982
|
+
# '_b' appended.
|
983
|
+
def build_locals_hash(row_a:, row_b:)
|
984
|
+
row_a = row_a.to_a.each.map { |k, v| ["#{k}_a".to_sym, v] }.to_h
|
985
|
+
row_b = row_b.to_a.each.map { |k, v| ["#{k}_b".to_sym, v] }.to_h
|
986
|
+
row_a.merge(row_b)
|
987
|
+
end
|
988
|
+
|
989
|
+
# Return an array of two elements: (1) a ruby expression that expresses the
|
990
|
+
# AND of all join conditions as described in the comment to the #join method
|
991
|
+
# and (2) the heads from other table that (a) are known to be tested for
|
992
|
+
# equality with a head in self table and (b) have the same name. Assume that
|
993
|
+
# the expression will be evaluated in the context of a binding in which the
|
994
|
+
# local variables are all the headers in the self table with '_a' appended
|
995
|
+
# and all the headers in the other table with '_b' appended.
|
996
|
+
def build_join_expression(exps, other, type)
|
997
|
+
return ['true', []] if type == :cross
|
998
|
+
a_heads = headers
|
999
|
+
b_heads = other.headers
|
1000
|
+
common_heads = a_heads & b_heads
|
1001
|
+
b_common_heads = []
|
1002
|
+
if exps.empty?
|
1003
|
+
if common_heads.empty?
|
1004
|
+
raise UserError,
|
1005
|
+
'A non-cross join with no common column names requires join expressions'
|
1006
|
+
else
|
1007
|
+
# A Natural join on all common heads
|
1008
|
+
common_heads.each do |h|
|
1009
|
+
ensure_common_types!(self_h: h, other_h: h, other: other)
|
1010
|
+
end
|
1011
|
+
nat_exp = common_heads.map { |h| "(#{h}_a == #{h}_b)" }.join(' && ')
|
1012
|
+
[nat_exp, common_heads]
|
1013
|
+
end
|
1014
|
+
else
|
1015
|
+
# We have expressions to evaluate
|
1016
|
+
and_conds = []
|
1017
|
+
partial_result = nil
|
1018
|
+
last_sym = nil
|
1019
|
+
exps.each do |exp|
|
1020
|
+
case exp
|
1021
|
+
when Symbol
|
1022
|
+
case exp.to_s.clean
|
1023
|
+
when /\A(.*)_a\z/
|
1024
|
+
a_head = $1.to_sym
|
1025
|
+
unless a_heads.include?(a_head)
|
1026
|
+
raise UserError, "no column '#{a_head}' in table"
|
1027
|
+
end
|
1028
|
+
if partial_result
|
1029
|
+
# Second of a pair
|
1030
|
+
ensure_common_types!(self_h: a_head, other_h: last_sym, other: other)
|
1031
|
+
partial_result << "#{a_head}_a)"
|
1032
|
+
and_conds << partial_result
|
1033
|
+
partial_result = nil
|
1034
|
+
else
|
1035
|
+
# First of a pair of _a or _b
|
1036
|
+
partial_result = "(#{a_head}_a == "
|
1037
|
+
end
|
1038
|
+
last_sym = a_head
|
1039
|
+
when /\A(.*)_b\z/
|
1040
|
+
b_head = $1.to_sym
|
1041
|
+
unless b_heads.include?(b_head)
|
1042
|
+
raise UserError, "no column '#{b_head}' in second table"
|
1043
|
+
end
|
1044
|
+
if partial_result
|
1045
|
+
# Second of a pair
|
1046
|
+
ensure_common_types!(self_h: last_sym, other_h: b_head, other: other)
|
1047
|
+
partial_result << "#{b_head}_b)"
|
1048
|
+
and_conds << partial_result
|
1049
|
+
partial_result = nil
|
1050
|
+
else
|
1051
|
+
# First of a pair of _a or _b
|
1052
|
+
partial_result = "(#{b_head}_b == "
|
1053
|
+
end
|
1054
|
+
b_common_heads << b_head
|
1055
|
+
last_sym = b_head
|
1056
|
+
else
|
1057
|
+
# No modifier, so must be one of the common columns
|
1058
|
+
unless partial_result.nil?
|
1059
|
+
# We were expecting the second of a modified pair, but got an
|
1060
|
+
# unmodified symbol instead.
|
1061
|
+
msg =
|
1062
|
+
"must follow '#{last_sym}' by qualified exp from the other table"
|
1063
|
+
raise UserError, msg
|
1064
|
+
end
|
1065
|
+
# We have an unqualified symbol that must appear in both tables
|
1066
|
+
unless common_heads.include?(exp)
|
1067
|
+
raise UserError, "unqualified column '#{exp}' must occur in both tables"
|
1068
|
+
end
|
1069
|
+
ensure_common_types!(self_h: exp, other_h: exp, other: other)
|
1070
|
+
and_conds << "(#{exp}_a == #{exp}_b)"
|
1071
|
+
b_common_heads << exp
|
1072
|
+
end
|
1073
|
+
when String
|
1074
|
+
# We have a string expression in which all column references must be
|
1075
|
+
# qualified.
|
1076
|
+
and_conds << "(#{exp})"
|
1077
|
+
else
|
1078
|
+
raise UserError, "invalid join expression '#{exp}' of class #{exp.class}"
|
1079
|
+
end
|
1080
|
+
end
|
1081
|
+
[and_conds.join(' && '), b_common_heads]
|
1082
|
+
end
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
# Raise an exception unless self_h in this table and other_h in other table
|
1086
|
+
# have the same types.
|
1087
|
+
def ensure_common_types!(self_h:, other_h:, other:)
|
1088
|
+
unless column(self_h).type == other.column(other_h).type
|
1089
|
+
raise UserError,
|
1090
|
+
"type of column '#{self_h}' does not match type of column '#{other_h}"
|
1091
|
+
end
|
1092
|
+
self
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
###################################################################################
|
1096
|
+
# Group By
|
1097
|
+
###################################################################################
|
1098
|
+
|
1099
|
+
public
|
1100
|
+
|
1101
|
+
# :category: Operators
|
1102
|
+
# Return a Table with a single row for each group of rows in the input table
|
1103
|
+
# where the value of all columns named as simple symbols are equal. All
|
1104
|
+
# other columns are set to the result of aggregating the values of that
|
1105
|
+
# column within the group according to a aggregate function (:count, :sum,
|
1106
|
+
# :min, :max, etc.) that you can specify by adding a hash parameter with the
|
1107
|
+
# column as the key and a symbol for the aggregate function as the value.
|
1108
|
+
# For example, consider the following call:
|
1109
|
+
#
|
1110
|
+
# tab.group_by(:date, :code, :price, shares: :sum).
|
1111
|
+
#
|
1112
|
+
# The first three parameters are simple symbols, so the table is divided
|
1113
|
+
# into groups of rows in which the value of :date, :code, and :price are
|
1114
|
+
# equal. The shares: hash parameter is set to the aggregate function :sum,
|
1115
|
+
# so it will appear in the result as the sum of all the :shares values in
|
1116
|
+
# each group. Because of the way Ruby parses parameters to a method call,
|
1117
|
+
# all the grouping symbols must appear first in the parameter list before
|
1118
|
+
# any hash parameters.
|
1119
|
+
def group_by(*group_cols, **agg_cols)
|
1120
|
+
sorted_tab = order_by(group_cols)
|
1121
|
+
groups = sorted_tab.rows.group_by do |r|
|
1122
|
+
group_cols.map { |k| r[k] }
|
1123
|
+
end
|
1124
|
+
result = Table.new
|
1125
|
+
groups.each_pair do |_vals, grp_rows|
|
1126
|
+
result << row_from_group(grp_rows, group_cols, agg_cols)
|
1127
|
+
end
|
1128
|
+
result.normalize_boundaries
|
1129
|
+
result
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
private
|
1133
|
+
|
1134
|
+
def row_from_group(rows, grp_cols, agg_cols)
|
1135
|
+
new_row = {}
|
1136
|
+
grp_cols.each do |h|
|
1137
|
+
new_row[h] = rows.first[h]
|
1138
|
+
end
|
1139
|
+
agg_cols.each_pair do |h, agg_func|
|
1140
|
+
items = rows.map { |r| r[h] }
|
1141
|
+
new_h = "#{agg_func}_#{h}".as_sym
|
1142
|
+
new_row[new_h] = Column.new(header: h,
|
1143
|
+
items: items).send(agg_func)
|
1144
|
+
end
|
1145
|
+
new_row
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
############################################################################
|
1149
|
+
# Table construction methods.
|
1150
|
+
############################################################################
|
1151
|
+
|
1152
|
+
public
|
1153
|
+
|
1154
|
+
# :category: Constructors
|
1155
|
+
# Add a row represented by a Hash having the headers as keys. If mark is
|
1156
|
+
# true, mark this row as a boundary. All tables should be built ultimately
|
1157
|
+
# using this method as a primitive.
|
1158
|
+
def add_row(row, mark: false)
|
1159
|
+
row.each_pair do |k, v|
|
1160
|
+
key = k.as_sym
|
1161
|
+
columns << Column.new(header: k) unless column?(k)
|
1162
|
+
column(key) << v
|
1163
|
+
end
|
1164
|
+
@boundaries << (size - 1) if mark
|
1165
|
+
self
|
1166
|
+
end
|
1167
|
+
|
1168
|
+
# :category: Constructors
|
1169
|
+
# Add a row without marking.
|
1170
|
+
def <<(row)
|
1171
|
+
add_row(row)
|
1172
|
+
end
|
1173
|
+
|
1174
|
+
# :category: Constructors
|
1175
|
+
# Add a FatTable::Column object +col+ to the table.
|
1176
|
+
def add_column(col)
|
1177
|
+
raise "Table already has a column with header '#{col.header}'" if column?(col.header)
|
1178
|
+
columns << col
|
1179
|
+
self
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
############################################################################
|
1183
|
+
# Convenience output methods
|
1184
|
+
############################################################################
|
1185
|
+
|
1186
|
+
# :category: Output
|
1187
|
+
|
1188
|
+
# In the same spirit as the FatTable module-level functions, the following
|
1189
|
+
# simply tee-up a Formatter for self so that the user need not instantiate
|
1190
|
+
# actual Formatter objects. Thus, one of these methods can be invoked as the
|
1191
|
+
# last method in a chain of Table operations.
|
1192
|
+
|
1193
|
+
# :category: Output
|
1194
|
+
|
1195
|
+
# Return a string or ruby object according to the format specified in
|
1196
|
+
# FatTable.format. If a block is given, it will yield a Formatter of the
|
1197
|
+
# appropriate type to which format and footers can be applied. Otherwise, the
|
1198
|
+
# default format for the type will be used.
|
1199
|
+
#
|
1200
|
+
# :call-seq:
|
1201
|
+
# to_format(options = {}) { |fmt| ... }
|
1202
|
+
#
|
1203
|
+
def to_format(options = {})
|
1204
|
+
if block_given?
|
1205
|
+
to_any(FatTable.format, self, options, &Proc.new)
|
1206
|
+
else
|
1207
|
+
to_any(FatTable.format, self, options)
|
1208
|
+
end
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
# :category: Output
|
1212
|
+
|
1213
|
+
# Return a string or ruby object according to the format type +fmt_type+
|
1214
|
+
# given in the first argument. Valid format types are :psv, :aoa, :aoh,
|
1215
|
+
# :latex, :org, :term, :text, or their string equivalents. If a block is
|
1216
|
+
# given, it will yield a Formatter of the appropriate type to which format
|
1217
|
+
# and footers can be applied. Otherwise, the default format for the type
|
1218
|
+
# will be used.
|
1219
|
+
#
|
1220
|
+
# :call-seq: to_any(fmt_type, options = {}) { |fmt| ... }
|
1221
|
+
#
|
1222
|
+
def to_any(fmt_type, options = {})
|
1223
|
+
fmt = fmt_type.as_sym
|
1224
|
+
raise UserError, "unknown format '#{fmt}'" unless FatTable::FORMATS.include?(fmt)
|
1225
|
+
method = "to_#{fmt}"
|
1226
|
+
if block_given?
|
1227
|
+
send method, options, &Proc.new
|
1228
|
+
else
|
1229
|
+
send method, options
|
1230
|
+
end
|
1231
|
+
end
|
1232
|
+
|
1233
|
+
# :category: Output
|
1234
|
+
|
1235
|
+
# Return the table as a string formatted as a pipe-separated values. If no
|
1236
|
+
# block is given, default formatting is applies to the table's cells. If a
|
1237
|
+
# block is given, it yields a Formatter to the block to which formatting
|
1238
|
+
# instructions and footers can be added by calling methods on it. Since the
|
1239
|
+
# pipe-separated format is the default format for Formatter, there is no
|
1240
|
+
# class PsvFormatter as you might expect.
|
1241
|
+
def to_psv(options = {})
|
1242
|
+
fmt = Formatter.new(self, options)
|
1243
|
+
yield fmt if block_given?
|
1244
|
+
fmt.output
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
# :category: Output
|
1248
|
+
|
1249
|
+
# Return the table as an Array of Array of Strings. If no block is given,
|
1250
|
+
# default formatting is applies to the table's cells. If a block is given,
|
1251
|
+
# it yields an AoaFormatter to the block to which formatting instructions
|
1252
|
+
# and footers can be added by calling methods on it.
|
1253
|
+
def to_aoa(options = {})
|
1254
|
+
fmt = FatTable::AoaFormatter.new(self, options)
|
1255
|
+
yield fmt if block_given?
|
1256
|
+
fmt.output
|
1257
|
+
end
|
1258
|
+
|
1259
|
+
# :category: Output
|
1260
|
+
|
1261
|
+
# Return the table as an Array of Hashes. Each inner hash uses the Table's
|
1262
|
+
# columns as keys and it values are strings representing the cells of the
|
1263
|
+
# table. If no block is given, default formatting is applies to the table's
|
1264
|
+
# cells. If a block is given, it yields an AohFormatter to the block to
|
1265
|
+
# which formatting instructions and footers can be added by calling methods
|
1266
|
+
# on it.
|
1267
|
+
def to_aoh(options = {})
|
1268
|
+
fmt = AohFormatter.new(self, options)
|
1269
|
+
yield fmt if block_given?
|
1270
|
+
fmt.output
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
# :category: Output
|
1274
|
+
|
1275
|
+
# Return the table as a string containing a LaTeX table. If no block is
|
1276
|
+
# given, default formatting applies to the table's cells. If a block is
|
1277
|
+
# given, it yields a LaTeXFormatter to the block to which formatting
|
1278
|
+
# instructions and footers can be added by calling methods on it.
|
1279
|
+
def to_latex(options = {})
|
1280
|
+
fmt = LaTeXFormatter.new(self, options)
|
1281
|
+
yield fmt if block_given?
|
1282
|
+
fmt.output
|
1283
|
+
end
|
1284
|
+
|
1285
|
+
# :category: Output
|
1286
|
+
|
1287
|
+
# Return the table as a string containing an Emacs org-mode table. If no
|
1288
|
+
# block is given, default formatting applies to the table's cells. If a
|
1289
|
+
# block is given, it yields a OrgFormatter to the block to which formatting
|
1290
|
+
# instructions and footers can be added by calling methods on it.
|
1291
|
+
def to_org(options = {})
|
1292
|
+
fmt = OrgFormatter.new(self, options)
|
1293
|
+
yield fmt if block_given?
|
1294
|
+
fmt.output
|
1295
|
+
end
|
1296
|
+
|
1297
|
+
# :category: Output
|
1298
|
+
|
1299
|
+
# Return the table as a string containing ANSI terminal text representing
|
1300
|
+
# table. If no block is given, default formatting applies to the table's
|
1301
|
+
# cells. If a block is given, it yields a TermFormatter to the block to
|
1302
|
+
# which formatting instructions and footers can be added by calling methods
|
1303
|
+
# on it.
|
1304
|
+
def to_term(options = {})
|
1305
|
+
fmt = TermFormatter.new(self, options)
|
1306
|
+
yield fmt if block_given?
|
1307
|
+
fmt.output
|
1308
|
+
end
|
1309
|
+
|
1310
|
+
# :category: Output
|
1311
|
+
|
1312
|
+
# Return the table as a string containing ordinary text representing table.
|
1313
|
+
# If no block is given, default formatting applies to the table's cells. If
|
1314
|
+
# a block is given, it yields a TextFormatter to the block to which
|
1315
|
+
# formatting instructions and footers can be added by calling methods on it.
|
1316
|
+
def to_text(options = {})
|
1317
|
+
fmt = TextFormatter.new(self, options)
|
1318
|
+
yield fmt if block_given?
|
1319
|
+
fmt.output
|
1320
|
+
end
|
1321
|
+
end
|
1322
|
+
end
|