fat_table 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.org +2106 -0
- data/README.rdoc +1965 -0
- data/Rakefile +12 -0
- data/TODO.org +31 -0
- data/bin/ft_console +119 -0
- data/bin/setup +8 -0
- data/fat_table.gemspec +80 -0
- data/lib/fat_table.rb +225 -0
- data/lib/fat_table/column.rb +522 -0
- data/lib/fat_table/db_handle.rb +81 -0
- data/lib/fat_table/errors.rb +13 -0
- data/lib/fat_table/evaluator.rb +55 -0
- data/lib/fat_table/formatters.rb +7 -0
- data/lib/fat_table/formatters/aoa_formatter.rb +91 -0
- data/lib/fat_table/formatters/aoh_formatter.rb +91 -0
- data/lib/fat_table/formatters/formatter.rb +1248 -0
- data/lib/fat_table/formatters/latex_formatter.rb +208 -0
- data/lib/fat_table/formatters/org_formatter.rb +72 -0
- data/lib/fat_table/formatters/term_formatter.rb +297 -0
- data/lib/fat_table/formatters/text_formatter.rb +92 -0
- data/lib/fat_table/table.rb +1322 -0
- data/lib/fat_table/version.rb +4 -0
- metadata +331 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
module FatTable
|
2
|
+
# Output the table as plain text. This is almost identical to OrgFormatter
|
3
|
+
# except that dates do not get formatted as inactive timestamps and the
|
4
|
+
# connector at the beginning of hlines is a '+' rather than a '|' as for org
|
5
|
+
# tables.
|
6
|
+
class TextFormatter < Formatter
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
# Does this Formatter require a second pass over the cells to align the
|
11
|
+
# columns according to the alignment formatting instruction to the width of
|
12
|
+
# the widest cell in each column?
|
13
|
+
def aligned?
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def pre_header(widths)
|
18
|
+
result = '+'
|
19
|
+
widths.values.each do |w|
|
20
|
+
result += '=' * (w + 2) + '+'
|
21
|
+
end
|
22
|
+
result[-1] = '+'
|
23
|
+
result + "\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def pre_row
|
27
|
+
'|'
|
28
|
+
end
|
29
|
+
|
30
|
+
def pre_cell(_h)
|
31
|
+
''
|
32
|
+
end
|
33
|
+
|
34
|
+
def quote_cell(v)
|
35
|
+
v
|
36
|
+
end
|
37
|
+
|
38
|
+
def post_cell
|
39
|
+
''
|
40
|
+
end
|
41
|
+
|
42
|
+
def inter_cell
|
43
|
+
'|'
|
44
|
+
end
|
45
|
+
|
46
|
+
def post_row
|
47
|
+
"|\n"
|
48
|
+
end
|
49
|
+
|
50
|
+
def hline(widths)
|
51
|
+
result = '+'
|
52
|
+
widths.values.each do |w|
|
53
|
+
result += '-' * (w + 2) + '+'
|
54
|
+
end
|
55
|
+
result[-1] = '+'
|
56
|
+
result + "\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
def pre_group
|
60
|
+
''
|
61
|
+
end
|
62
|
+
|
63
|
+
def post_group
|
64
|
+
''
|
65
|
+
end
|
66
|
+
|
67
|
+
def pre_gfoot
|
68
|
+
''
|
69
|
+
end
|
70
|
+
|
71
|
+
def post_gfoot
|
72
|
+
''
|
73
|
+
end
|
74
|
+
|
75
|
+
def pre_foot
|
76
|
+
''
|
77
|
+
end
|
78
|
+
|
79
|
+
def post_foot
|
80
|
+
''
|
81
|
+
end
|
82
|
+
|
83
|
+
def post_footers(widths)
|
84
|
+
result = '+'
|
85
|
+
widths.values.each do |w|
|
86
|
+
result += '=' * (w + 2) + '+'
|
87
|
+
end
|
88
|
+
result[-1] = '+'
|
89
|
+
result + "\n"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,1322 @@
|
|
1
|
+
module FatTable
|
2
|
+
# A container for a two-dimensional table. All cells in the table must be a
|
3
|
+
# String, a DateTime (or Date), a Numeric (Bignum, Integer, or BigDecimal), or
|
4
|
+
# a Boolean (TrueClass or FalseClass). All columns must be of one of those
|
5
|
+
# types or be a string convertible into one of them. It is considered an error
|
6
|
+
# if a single column contains cells of different types. Any cell that cannot
|
7
|
+
# be parsed as one of the Numeric, DateTime, or Boolean types will be treated
|
8
|
+
# as a String and have #to_s applied. Until the column type is determined, it
|
9
|
+
# will have the type NilClass.
|
10
|
+
#
|
11
|
+
# You can initialize a Table in several ways:
|
12
|
+
#
|
13
|
+
# 1. ::new, which will return an empty table to which rows or
|
14
|
+
# columns can be added later,
|
15
|
+
#
|
16
|
+
# 2. ::from_csv_file('table.csv'), where the argument is the
|
17
|
+
# name of a .csv file, in which case, the headers will be taken from the
|
18
|
+
# first row of the data.
|
19
|
+
#
|
20
|
+
# 3. ::from_org_file('table.org'), where the argument is the
|
21
|
+
# name of an .org file and the first Emacs org mode table found in the file
|
22
|
+
# will be read. The headers will be taken from the first row of the table
|
23
|
+
# if it is followed by an hrule, otherwise the headers will be synthesized
|
24
|
+
# as +:col_1+, +:col_2+, etc.
|
25
|
+
#
|
26
|
+
# 4. ::from_csv_string('csv_string'), where +csv_string+ is a
|
27
|
+
# string in the same form as a .csv file, and it will be parsed in the same
|
28
|
+
# way.
|
29
|
+
#
|
30
|
+
# 5. ::from_org_string('org_string'), where +org_string+ is a
|
31
|
+
# string in the same form as an Emacs org mode table, and it will be parsed
|
32
|
+
# in the same way.
|
33
|
+
#
|
34
|
+
# 6. ::from_aoa(+aoa+), where +aoa+ is an Array of elements that
|
35
|
+
# are either Arrays or nil. The headers will be taken from the first Array
|
36
|
+
# if it is followed by a nil, otherwise the headers will be synthesized as
|
37
|
+
# +:col_1+, +:col_2+, etc. Each inner Array will be read as a row of the
|
38
|
+
# table and each nil, after the first will be take as a group boundary.
|
39
|
+
#
|
40
|
+
# 7. ::from_aoh(+aoh+), where +aoh+ is an Array of elements each
|
41
|
+
# of which is either (1) a Hash (or any object that responds to #to_h) or
|
42
|
+
# (2) a nil. All Hashes must have the same keys, which become the headers
|
43
|
+
# for the table. Each nil will be taken as marking a group boundary.
|
44
|
+
#
|
45
|
+
# 9. ::from_table(+table+), where +table+ is another FatTable::Table
|
46
|
+
# object.
|
47
|
+
#
|
48
|
+
# In the resulting Table, the headers are converted into symbols, with all
|
49
|
+
# spaces converted to underscore and everything down-cased. So, the heading,
|
50
|
+
# 'Two Words' becomes the header +:two_words+.
|
51
|
+
class Table
|
52
|
+
|
53
|
+
# An Array of FatTable::Columns that constitute the table.
|
54
|
+
attr_reader :columns
|
55
|
+
|
56
|
+
###########################################################################
|
57
|
+
# Constructors
|
58
|
+
###########################################################################
|
59
|
+
|
60
|
+
# :category: Constructors
|
61
|
+
# Return an empty FatTable::Table object.
|
62
|
+
def initialize
|
63
|
+
@columns = []
|
64
|
+
@boundaries = []
|
65
|
+
end
|
66
|
+
|
67
|
+
# :category: Constructors
|
68
|
+
# Construct a Table from the contents of a CSV file. Headers will be taken
|
69
|
+
# from the first row and converted to symbols.
|
70
|
+
def self.from_csv_file(fname)
|
71
|
+
File.open(fname, 'r') do |io|
|
72
|
+
from_csv_io(io)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# :category: Constructors
|
77
|
+
# Construct a Table from a string, treated as the input from a CSV file.
|
78
|
+
def self.from_csv_string(str)
|
79
|
+
from_csv_io(StringIO.new(str))
|
80
|
+
end
|
81
|
+
|
82
|
+
# :category: Constructors
|
83
|
+
|
84
|
+
# Construct a Table from the first table found in the given Emacs org-mode
|
85
|
+
# file. Headers are taken from the first row if the second row is an hrule.
|
86
|
+
# Otherwise, synthetic headers of the form +:col_1+, +:col_2+, etc. are
|
87
|
+
# created.
|
88
|
+
def self.from_org_file(fname)
|
89
|
+
File.open(fname, 'r') do |io|
|
90
|
+
from_org_io(io)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# :category: Constructors
|
95
|
+
# Construct a Table from a string, treated as the contents of an org-mode
|
96
|
+
# file.
|
97
|
+
def self.from_org_string(str)
|
98
|
+
from_org_io(StringIO.new(str))
|
99
|
+
end
|
100
|
+
|
101
|
+
# :category: Constructors
|
102
|
+
|
103
|
+
# Construct a new table from an array of arrays. By default, with +hlines+
|
104
|
+
# false, do not look for separators, i.e. nil or a string of dashes, just
|
105
|
+
# treat the first row as headers. With +hlines+ true, expect separators to
|
106
|
+
# mark the header row and any boundaries. If the second element of the array
|
107
|
+
# is a +nil+, interpret the first element of the array as a row of headers.
|
108
|
+
# Otherwise, synthesize headers of the form +:col_1+, +:col_2+, ... and so
|
109
|
+
# forth. The remaining elements are taken as the body of the table, except
|
110
|
+
# that if an element of the outer array is a +nil+, mark the preceding row
|
111
|
+
# as a boundary. Note: In org mode code blocks, by default (+:hlines no+)
|
112
|
+
# all hlines are stripped from the table, otherwise (+:hlines yes+) they are
|
113
|
+
# indicated with nil elements in the outer array.
|
114
|
+
def self.from_aoa(aoa, hlines: false)
|
115
|
+
from_array_of_arrays(aoa, hlines: hlines)
|
116
|
+
end
|
117
|
+
|
118
|
+
# :category: Constructors
|
119
|
+
|
120
|
+
# Construct a Table from an array of hashes, or any objects that respond to
|
121
|
+
# the #to_h method. All hashes must have the same keys, which, when
|
122
|
+
# converted to symbols will become the headers for the Table. If hlines is
|
123
|
+
# set true, mark a group boundary whenever a nil, rather than a hash
|
124
|
+
# appears in the outer array.
|
125
|
+
def self.from_aoh(aoh, hlines: false)
|
126
|
+
if aoh.first.respond_to?(:to_h)
|
127
|
+
from_array_of_hashes(aoh, hlines: hlines)
|
128
|
+
else
|
129
|
+
raise UserError,
|
130
|
+
"Cannot initialize Table with an array of #{input[0].class}"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# :category: Constructors
|
135
|
+
|
136
|
+
# Construct a Table from another Table. Inherit any group boundaries from
|
137
|
+
# the input table.
|
138
|
+
def self.from_table(table)
|
139
|
+
table.deep_dup
|
140
|
+
end
|
141
|
+
|
142
|
+
# :category: Constructors
|
143
|
+
|
144
|
+
# Construct a Table by running a SQL query against the database set up with
|
145
|
+
# FatTable.set_db. Return the Table with the query results as rows.
|
146
|
+
def self.from_sql(query)
|
147
|
+
raise UserError, 'FatTable.db must be set with FatTable.set_db' if FatTable.db.nil?
|
148
|
+
result = Table.new
|
149
|
+
sth = FatTable.db.prepare(query)
|
150
|
+
sth.execute
|
151
|
+
sth.fetch_hash do |h|
|
152
|
+
result << h
|
153
|
+
end
|
154
|
+
result
|
155
|
+
end
|
156
|
+
|
157
|
+
############################################################################
|
158
|
+
# Class-level constructor helpers
|
159
|
+
############################################################################
|
160
|
+
|
161
|
+
class << self
|
162
|
+
private
|
163
|
+
|
164
|
+
# Construct table from an array of hashes or an array of any object that can
|
165
|
+
# respond to #to_h. If an array element is a nil, mark it as a group
|
166
|
+
# boundary in the Table.
|
167
|
+
def from_array_of_hashes(hashes, hlines: false)
|
168
|
+
result = new
|
169
|
+
hashes.each do |hsh|
|
170
|
+
if hsh.nil?
|
171
|
+
unless hlines
|
172
|
+
raise UserError, 'found an hline in input with hlines false; try setting hlines true'
|
173
|
+
end
|
174
|
+
result.mark_boundary
|
175
|
+
next
|
176
|
+
end
|
177
|
+
result << hsh.to_h
|
178
|
+
end
|
179
|
+
result
|
180
|
+
end
|
181
|
+
|
182
|
+
# Construct a new table from an array of arrays. By default, with hlines
|
183
|
+
# false, do not look for separators, i.e. nils, just treat the first row
|
184
|
+
# as headers. With hlines true, expect nil separators to mark the header
|
185
|
+
# row and any boundaries. If the second element of the array is a nil,
|
186
|
+
# interpret the first element of the array as a row of headers. Otherwise,
|
187
|
+
# synthesize headers of the form :col_1, :col_2, ... and so forth. The
|
188
|
+
# remaining elements are taken as the body of the table, except that if an
|
189
|
+
# element of the outer array is a nil, mark the preceding row as a group
|
190
|
+
# boundary. Note: In org mode code blocks, by default (:hlines no) all
|
191
|
+
# hlines are stripped from the table, otherwise (:hlines yes) they are
|
192
|
+
# indicated with nil elements in the outer array as expected by this
|
193
|
+
# method when hlines is set true.
|
194
|
+
def from_array_of_arrays(rows, hlines: false)
|
195
|
+
result = new
|
196
|
+
headers = []
|
197
|
+
if !hlines
|
198
|
+
# Take the first row as headers
|
199
|
+
# Second row et seq as data
|
200
|
+
headers = rows[0].map(&:to_s).map(&:as_sym)
|
201
|
+
first_data_row = 1
|
202
|
+
elsif rows[1].nil?
|
203
|
+
# Use first row 0 as headers
|
204
|
+
# Row 1 is an hline
|
205
|
+
# Row 2 et seq are data
|
206
|
+
headers = rows[0].map(&:to_s).map(&:as_sym)
|
207
|
+
first_data_row = 2
|
208
|
+
else
|
209
|
+
# Synthesize headers
|
210
|
+
# Row 0 et seq are data
|
211
|
+
headers = (1..rows[0].size).to_a.map { |k| "col_#{k}".as_sym }
|
212
|
+
first_data_row = 0
|
213
|
+
end
|
214
|
+
rows[first_data_row..-1].each do |row|
|
215
|
+
if row.nil?
|
216
|
+
unless hlines
|
217
|
+
raise UserError, 'found an hline in input with hlines false; try setting hlines true'
|
218
|
+
end
|
219
|
+
result.mark_boundary
|
220
|
+
next
|
221
|
+
end
|
222
|
+
row = row.map { |s| s.to_s.strip }
|
223
|
+
hash_row = Hash[headers.zip(row)]
|
224
|
+
result << hash_row
|
225
|
+
end
|
226
|
+
result
|
227
|
+
end
|
228
|
+
|
229
|
+
def from_csv_io(io)
|
230
|
+
result = new
|
231
|
+
::CSV.new(io, headers: true, header_converters: :symbol,
|
232
|
+
skip_blanks: true).each do |row|
|
233
|
+
result << row.to_h
|
234
|
+
end
|
235
|
+
result
|
236
|
+
end
|
237
|
+
|
238
|
+
# Form rows of table by reading the first table found in the org file. The
|
239
|
+
# header row must be marked with an hline (i.e, a row that looks like
|
240
|
+
# '|---+--...--|') and groups of rows may be marked with hlines to
|
241
|
+
# indicate group boundaries.
|
242
|
+
def from_org_io(io)
|
243
|
+
table_re = /\A\s*\|/
|
244
|
+
hrule_re = /\A\s*\|[-+]+/
|
245
|
+
rows = []
|
246
|
+
table_found = false
|
247
|
+
header_found = false
|
248
|
+
io.each do |line|
|
249
|
+
unless table_found
|
250
|
+
# Skip through the file until a table is found
|
251
|
+
next unless line =~ table_re
|
252
|
+
unless line =~ hrule_re
|
253
|
+
line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
|
254
|
+
rows << line.split('|').map(&:clean)
|
255
|
+
end
|
256
|
+
table_found = true
|
257
|
+
next
|
258
|
+
end
|
259
|
+
break unless line =~ table_re
|
260
|
+
if !header_found && line =~ hrule_re
|
261
|
+
rows << nil
|
262
|
+
header_found = true
|
263
|
+
next
|
264
|
+
elsif header_found && line =~ hrule_re
|
265
|
+
# Mark the boundary with a nil
|
266
|
+
rows << nil
|
267
|
+
elsif line !~ table_re
|
268
|
+
# Stop reading at the second hline
|
269
|
+
break
|
270
|
+
else
|
271
|
+
line = line.sub(/\A\s*\|/, '').sub(/\|\s*\z/, '')
|
272
|
+
rows << line.split('|').map(&:clean)
|
273
|
+
end
|
274
|
+
end
|
275
|
+
from_array_of_arrays(rows, hlines: true)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
###########################################################################
|
280
|
+
# Attributes
|
281
|
+
###########################################################################
|
282
|
+
|
283
|
+
# :category: Attributes
|
284
|
+
# Return the Column with the given header.
|
285
|
+
def column(key)
|
286
|
+
columns.detect { |c| c.header == key.as_sym }
|
287
|
+
end
|
288
|
+
|
289
|
+
# :category: Attributes
|
290
|
+
# Return the type of the Column with the given header
|
291
|
+
def type(key)
|
292
|
+
column(key).type
|
293
|
+
end
|
294
|
+
|
295
|
+
# :category: Attributes
|
296
|
+
|
297
|
+
# Return the array of items of the column with the given header symbol
|
298
|
+
# +key+, or if +key+ is an Integer, return that row number. So a table's
|
299
|
+
# rows can be accessed by number, and its columns can be accessed by column
|
300
|
+
# header. Also, double indexing works in either row-major or column-major
|
301
|
+
# order: \tab\[:id\]\[8\] returns the 9th item in the column headed :id and
|
302
|
+
# so does \tab\[8\]\[:id\].
|
303
|
+
def [](key)
|
304
|
+
case key
|
305
|
+
when Integer
|
306
|
+
raise UserError, "index '#{key}' out of range" unless (0..size-1).cover?(key.abs)
|
307
|
+
rows[key]
|
308
|
+
when String
|
309
|
+
raise UserError, "header '#{key}' not in table" unless headers.include?(key)
|
310
|
+
column(key).items
|
311
|
+
when Symbol
|
312
|
+
raise UserError, "header ':#{key}' not in table" unless headers.include?(key)
|
313
|
+
column(key).items
|
314
|
+
else
|
315
|
+
raise UserError, "cannot index table with a #{key.class}"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# :category: Attributes
|
320
|
+
|
321
|
+
# Return true if the table has a Column with the given +key+ as a header.
|
322
|
+
def column?(key)
|
323
|
+
headers.include?(key.as_sym)
|
324
|
+
end
|
325
|
+
|
326
|
+
# :category: Attributes
|
327
|
+
|
328
|
+
# Return a Hash of the Table's Column header symbols to types.
|
329
|
+
def types
|
330
|
+
result = {}
|
331
|
+
columns.each do |c|
|
332
|
+
result[c.header] = c.type
|
333
|
+
end
|
334
|
+
result
|
335
|
+
end
|
336
|
+
|
337
|
+
# :category: Attributes
|
338
|
+
|
339
|
+
# Return the headers for the Table as an array of symbols.
|
340
|
+
def headers
|
341
|
+
columns.map(&:header)
|
342
|
+
end
|
343
|
+
|
344
|
+
# :category: Attributes
|
345
|
+
|
346
|
+
# Return the number of rows in the Table.
|
347
|
+
def size
|
348
|
+
return 0 if columns.empty?
|
349
|
+
columns.first.size
|
350
|
+
end
|
351
|
+
|
352
|
+
# :category: Attributes
|
353
|
+
|
354
|
+
# Return the number of Columns in the Table.
|
355
|
+
def width
|
356
|
+
return 0 if columns.empty?
|
357
|
+
columns.size
|
358
|
+
end
|
359
|
+
|
360
|
+
# :category: Attributes
|
361
|
+
|
362
|
+
# Return whether this Table is empty.
|
363
|
+
def empty?
|
364
|
+
size.zero?
|
365
|
+
end
|
366
|
+
|
367
|
+
# :category: Attributes
|
368
|
+
|
369
|
+
# Return the rows of the Table as an Array of Hashes, keyed by the headers.
|
370
|
+
def rows
|
371
|
+
rows = []
|
372
|
+
unless columns.empty?
|
373
|
+
0.upto(columns.first.items.last_i) do |rnum|
|
374
|
+
row = {}
|
375
|
+
columns.each do |col|
|
376
|
+
row[col.header] = col[rnum]
|
377
|
+
end
|
378
|
+
rows << row
|
379
|
+
end
|
380
|
+
end
|
381
|
+
rows
|
382
|
+
end
|
383
|
+
|
384
|
+
protected
|
385
|
+
|
386
|
+
# :category: Attributes
|
387
|
+
|
388
|
+
# Return the rows from first to last. We could just index #rows, but in a
|
389
|
+
# large table, that would require that we construct all the rows for a range
|
390
|
+
# of any size.
|
391
|
+
def rows_range(first = 0, last = nil) # :nodoc:
|
392
|
+
last ||= size - 1
|
393
|
+
last = [last, 0].max
|
394
|
+
raise UserError, 'first must be <= last' unless first <= last
|
395
|
+
rows = []
|
396
|
+
unless columns.empty?
|
397
|
+
first.upto(last) do |rnum|
|
398
|
+
row = {}
|
399
|
+
columns.each do |col|
|
400
|
+
row[col.header] = col[rnum]
|
401
|
+
end
|
402
|
+
rows << row
|
403
|
+
end
|
404
|
+
end
|
405
|
+
rows
|
406
|
+
end
|
407
|
+
|
408
|
+
#############################################################################
|
409
|
+
# Enumerable
|
410
|
+
#############################################################################
|
411
|
+
|
412
|
+
public
|
413
|
+
|
414
|
+
include Enumerable
|
415
|
+
|
416
|
+
# :category: Attributes
|
417
|
+
|
418
|
+
# Yield each row of the table as a Hash with the column symbols as keys.
|
419
|
+
def each
|
420
|
+
rows.each do |row|
|
421
|
+
yield row
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
|
426
|
+
public
|
427
|
+
|
428
|
+
# :category: Attributes
|
429
|
+
|
430
|
+
# Boundaries mark the last row in each "group" within the table. The last
|
431
|
+
# row of the table is always an implicit boundary, and having the last row
|
432
|
+
# as the sole boundary is the default for new tables unless mentioned
|
433
|
+
# otherwise. Resetting the boundaries means to put it back in that default
|
434
|
+
# state.
|
435
|
+
#
|
436
|
+
# Boundaries can be added when a table is read in, for example, from the
|
437
|
+
# text of an org table in which each hline (other than the one separating
|
438
|
+
# the headers from the body) marks a boundary for the row immediately
|
439
|
+
# preceding the hline.
|
440
|
+
#
|
441
|
+
# The #order_by method resets the boundaries then adds boundaries at the
|
442
|
+
# last row of each group of rows on which the sort keys were equal as a
|
443
|
+
# boundary.
|
444
|
+
#
|
445
|
+
# The #union_all (but not #union since it deletes duplicates) method adds a
|
446
|
+
# boundary between the constituent tables. #union_all also preserves any
|
447
|
+
# boundary markers within the constituent tables. In doing so, the
|
448
|
+
# boundaries of the second table in the #union_all are increased by the size
|
449
|
+
# of the first table so that they refer to rows in the new table.
|
450
|
+
#
|
451
|
+
# The #select method preserves any boundaries from the input table without
|
452
|
+
# change, since it only selects columns for the output and deletes no rows.
|
453
|
+
#
|
454
|
+
# Perhaps surprisingly, the #group_by method does /not/ result in any groups
|
455
|
+
# in the output table since the result of #group_by is to reduce all groups
|
456
|
+
# it finds into a single row, and having a group for each row of the output
|
457
|
+
# table would have no use.
|
458
|
+
#
|
459
|
+
# All the other table-transforming methods reset the boundaries in the new
|
460
|
+
# table. For example, #where re-arranges and deletes rows, so the old
|
461
|
+
# boundaries would make no sense anyway. Likewise, #union, #intersection,
|
462
|
+
# #except, and #join reset the boundaries to their default.
|
463
|
+
#
|
464
|
+
# Return an array of an array of row hashes for the groups in this Table.
|
465
|
+
def groups
|
466
|
+
normalize_boundaries
|
467
|
+
groups = []
|
468
|
+
(0..boundaries.size - 1).each do |k|
|
469
|
+
groups << group_rows(k)
|
470
|
+
end
|
471
|
+
groups
|
472
|
+
end
|
473
|
+
|
474
|
+
# :category: Operators
|
475
|
+
|
476
|
+
# Return this table mutated with all groups removed. Useful after something
|
477
|
+
# like #order_by, which adds groups as a side-effect, when you do not want
|
478
|
+
# the groups displayed in the output. This modifies the input table, so is a
|
479
|
+
# departure from the otherwise immutability of Tables.
|
480
|
+
def degroup!
|
481
|
+
@boundaries = []
|
482
|
+
self
|
483
|
+
end
|
484
|
+
|
485
|
+
# Mark a boundary at k, and if k is nil, mark the last row in the table as a
|
486
|
+
# group boundary. This is used for internal purposes.
|
487
|
+
def mark_boundary(k = nil) # :nodoc:
|
488
|
+
if k
|
489
|
+
boundaries.push(k)
|
490
|
+
else
|
491
|
+
boundaries.push(size - 1)
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
protected
|
496
|
+
|
497
|
+
# :stopdoc:
|
498
|
+
|
499
|
+
# Reader for boundaries, but not public.
|
500
|
+
def boundaries
|
501
|
+
@boundaries
|
502
|
+
end
|
503
|
+
|
504
|
+
# Writer for boundaries, but not public.
|
505
|
+
def boundaries=(bounds)
|
506
|
+
@boundaries = bounds
|
507
|
+
end
|
508
|
+
|
509
|
+
# Make sure size - 1 is last boundary and that they are unique and sorted.
|
510
|
+
def normalize_boundaries
|
511
|
+
unless empty?
|
512
|
+
boundaries.push(size - 1) unless boundaries.include?(size - 1)
|
513
|
+
self.boundaries = boundaries.uniq.sort
|
514
|
+
end
|
515
|
+
boundaries
|
516
|
+
end
|
517
|
+
|
518
|
+
# Concatenate the array of argument bounds to this table's boundaries, but
|
519
|
+
# increase each of the indexes in bounds by shift. This is used in the
|
520
|
+
# #union_all method.
|
521
|
+
def append_boundaries(bounds, shift: 0)
|
522
|
+
@boundaries += bounds.map { |k| k + shift }
|
523
|
+
end
|
524
|
+
|
525
|
+
# Return the group number to which row k belongs. Groups, from the user's
|
526
|
+
# point of view are indexed starting at 1.
|
527
|
+
def row_index_to_group_index(k)
|
528
|
+
boundaries.each_with_index do |b_last, g_num|
|
529
|
+
return (g_num + 1) if k <= b_last
|
530
|
+
end
|
531
|
+
1
|
532
|
+
end
|
533
|
+
|
534
|
+
def group_rows(k) # :nodoc:
|
535
|
+
normalize_boundaries
|
536
|
+
return [] unless k < boundaries.size
|
537
|
+
first = k.zero? ? 0 : boundaries[k - 1] + 1
|
538
|
+
last = boundaries[k]
|
539
|
+
rows_range(first, last)
|
540
|
+
end
|
541
|
+
|
542
|
+
# :startdoc:
|
543
|
+
|
544
|
+
############################################################################
|
545
|
+
# SQL look-alikes. The following methods are based on SQL equivalents and
|
546
|
+
# all return a new Table object rather than modifying the table in place.
|
547
|
+
############################################################################
|
548
|
+
|
549
|
+
public
|
550
|
+
|
551
|
+
# :category: Operators
|
552
|
+
|
553
|
+
# Return a new Table sorting the rows of this Table on the possibly multiple
|
554
|
+
# keys given in the array of syms in headers. Append a ! to the symbol name
|
555
|
+
# to indicate reverse sorting on that column.
|
556
|
+
#
|
557
|
+
# tab.order_by(:ref, :date) => sorted table
|
558
|
+
# tab.order_by(:date!) => reverse sort on :date
|
559
|
+
#
|
560
|
+
# After sorting, the output Table will have group boundaries added after
|
561
|
+
# each row where the sort key changes.
|
562
|
+
def order_by(*sort_heads)
|
563
|
+
sort_heads = [sort_heads].flatten
|
564
|
+
rev_heads = sort_heads.select { |h| h.to_s.ends_with?('!') }
|
565
|
+
sort_heads = sort_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
566
|
+
rev_heads = rev_heads.map { |h| h.to_s.sub(/\!\z/, '').to_sym }
|
567
|
+
new_rows = rows.sort do |r1, r2|
|
568
|
+
key1 = sort_heads.map { |h| rev_heads.include?(h) ? r2[h] : r1[h] }
|
569
|
+
key2 = sort_heads.map { |h| rev_heads.include?(h) ? r1[h] : r2[h] }
|
570
|
+
key1 <=> key2
|
571
|
+
end
|
572
|
+
# Add the new rows to the table, but mark a group boundary at the points
|
573
|
+
# where the sort key changes value.
|
574
|
+
new_tab = Table.new
|
575
|
+
last_key = nil
|
576
|
+
new_rows.each_with_index do |nrow, k|
|
577
|
+
new_tab << nrow
|
578
|
+
key = nrow.fetch_values(*sort_heads)
|
579
|
+
new_tab.mark_boundary(k - 1) if last_key && key != last_key
|
580
|
+
last_key = key
|
581
|
+
end
|
582
|
+
new_tab.normalize_boundaries
|
583
|
+
new_tab
|
584
|
+
end
|
585
|
+
|
586
|
+
# :category: Operators
|
587
|
+
|
588
|
+
# Return a Table having the selected column expressions. Each expression can
|
589
|
+
# be either a
|
590
|
+
#
|
591
|
+
# 1. a symbol, +:old_col+, representing a column in the current table,
|
592
|
+
#
|
593
|
+
# 2. a hash of +new_col: :old_col+ to rename an existing +:old_col+ column as
|
594
|
+
# +:new_col+, or
|
595
|
+
#
|
596
|
+
# 3. a hash of +new_col: 'expression'+, to add a new column that is computed
|
597
|
+
# as an arbitrary ruby expression of the existing columns (whether
|
598
|
+
# selected for the output table or not) or any new_col defined earlier in
|
599
|
+
# the argument list defined as local variables in the expression. The
|
600
|
+
# expression string can also access the instance variable @row, as the row
|
601
|
+
# number of the row being evaluated, and @group, as the group number of
|
602
|
+
# the row being evaluated.
|
603
|
+
#
|
604
|
+
# The bare symbol arguments (1) must precede any hash arguments (2) or (3).
|
605
|
+
# Each expression results in a column in the resulting Table in the order
|
606
|
+
# given. The expressions are evaluated in left-to-right order as well. The
|
607
|
+
# output table preserves any groups present in the input table.
|
608
|
+
#
|
609
|
+
# tab.select(:ref, :date, :shares) => table with only 3 columns selected
|
610
|
+
# tab.select(:ref, :date, shares: :quantity) => rename :shares->:quantity
|
611
|
+
# tab.select(:ref, :date, :shares, cost: 'price * shares') => new column
|
612
|
+
# tab.select(:ref, :date, :shares, seq: '@row') => add sequential nums
|
613
|
+
def select(*cols, **new_cols)
|
614
|
+
result = Table.new
|
615
|
+
normalize_boundaries
|
616
|
+
ev = Evaluator.new(vars: { row: 0, group: 1 },
|
617
|
+
before: '@row = __row; @group = __group')
|
618
|
+
rows.each_with_index do |old_row, old_k|
|
619
|
+
new_row = {}
|
620
|
+
cols.each do |k|
|
621
|
+
h = k.as_sym
|
622
|
+
raise UserError, "Column '#{h}' in select does not exist" unless column?(h)
|
623
|
+
new_row[h] = old_row[h]
|
624
|
+
end
|
625
|
+
new_cols.each_pair do |key, val|
|
626
|
+
key = key.as_sym
|
627
|
+
vars = old_row.merge(new_row)
|
628
|
+
vars[:__row] = old_k + 1
|
629
|
+
vars[:__group] = row_index_to_group_index(old_k)
|
630
|
+
case val
|
631
|
+
when Symbol
|
632
|
+
raise UserError, "Column '#{val}' in select does not exist" unless vars.keys.include?(val)
|
633
|
+
new_row[key] = vars[val]
|
634
|
+
when String
|
635
|
+
new_row[key] = ev.evaluate(val, vars: vars)
|
636
|
+
else
|
637
|
+
raise UserError, 'Hash parameters to select must be a symbol or string'
|
638
|
+
end
|
639
|
+
end
|
640
|
+
result << new_row
|
641
|
+
end
|
642
|
+
result.boundaries = boundaries
|
643
|
+
result.normalize_boundaries
|
644
|
+
result
|
645
|
+
end
|
646
|
+
|
647
|
+
# :category: Operators
|
648
|
+
|
649
|
+
# Return a Table containing only rows for which the Ruby where expression,
|
650
|
+
# +exp+, evaluates to a truthy value. Within the string expression +exp+,
|
651
|
+
# each header is a local variable bound to the value of the current row in
|
652
|
+
# that column, and the instance variables @row and @group are available as
|
653
|
+
# the row and group number of the row being evaluated. Any groups present in
|
654
|
+
# the input Table are eliminated in the output Table.
|
655
|
+
#
|
656
|
+
# tab.where('date > Date.today - 30') => rows with recent dates
|
657
|
+
# tab.where('@row.even? && shares > 500') => even rows with lots of shares
|
658
|
+
def where(expr)
|
659
|
+
expr = expr.to_s
|
660
|
+
result = Table.new
|
661
|
+
headers.each do |h|
|
662
|
+
col = Column.new(header: h)
|
663
|
+
result.add_column(col)
|
664
|
+
end
|
665
|
+
ev = Evaluator.new(vars: { row: 0 },
|
666
|
+
before: '@row = __row; @group = __group')
|
667
|
+
rows.each_with_index do |row, k|
|
668
|
+
vars = row.dup
|
669
|
+
vars[:__row] = k + 1
|
670
|
+
vars[:__group] = row_index_to_group_index(k)
|
671
|
+
result << row if ev.evaluate(expr, vars: vars)
|
672
|
+
end
|
673
|
+
result.normalize_boundaries
|
674
|
+
result
|
675
|
+
end
|
676
|
+
|
677
|
+
# :category: Operators
|
678
|
+
|
679
|
+
# Return this table with all duplicate rows eliminated. Resets groups. Same
|
680
|
+
# as #uniq.
|
681
|
+
def distinct
|
682
|
+
result = Table.new
|
683
|
+
uniq_rows = rows.uniq
|
684
|
+
uniq_rows.each do |row|
|
685
|
+
result << row
|
686
|
+
end
|
687
|
+
result
|
688
|
+
end
|
689
|
+
|
690
|
+
# :category: Operators
|
691
|
+
|
692
|
+
# Return this table with all duplicate rows eliminated. Resets groups. Same
|
693
|
+
# as #distinct.
|
694
|
+
def uniq
|
695
|
+
distinct
|
696
|
+
end
|
697
|
+
|
698
|
+
# :category: Operators
|
699
|
+
|
700
|
+
# Return a Table that combines this table with +other+ table. In other
|
701
|
+
# words, return the union of this table with the other. The headers of this
|
702
|
+
# table are used in the result. There must be the same number of columns of
|
703
|
+
# the same type in the two tables, or an exception will be thrown.
|
704
|
+
# Duplicates are eliminated from the result. Any groups present in either
|
705
|
+
# Table are eliminated in the output Table.
|
706
|
+
def union(other)
|
707
|
+
set_operation(other, :+,
|
708
|
+
distinct: true,
|
709
|
+
add_boundaries: true)
|
710
|
+
end
|
711
|
+
|
712
|
+
# :category: Operators
|
713
|
+
|
714
|
+
# Return a Table that combines this table with +other+ table. In other
|
715
|
+
# words, return the union of this table with the other. The headers of this
|
716
|
+
# table are used in the result. There must be the same number of columns of
|
717
|
+
# the same type in the two tables, or an exception will be thrown.
|
718
|
+
# Duplicates are not eliminated from the result. Adds group boundaries at
|
719
|
+
# boundaries of the constituent tables. Preserves and adjusts the group
|
720
|
+
# boundaries of the constituent table.
|
721
|
+
def union_all(other)
|
722
|
+
set_operation(other, :+,
|
723
|
+
distinct: false,
|
724
|
+
add_boundaries: true,
|
725
|
+
inherit_boundaries: true)
|
726
|
+
end
|
727
|
+
|
728
|
+
# :category: Operators
|
729
|
+
|
730
|
+
# Return a Table that includes the rows that appear in this table and in
|
731
|
+
# +other+ table. In other words, return the intersection of this table with
|
732
|
+
# the other. The headers of this table are used in the result. There must be
|
733
|
+
# the same number of columns of the same type in the two tables, or an
|
734
|
+
# exception will be thrown. Duplicates are eliminated from the result. Any
|
735
|
+
# groups present in either Table are eliminated in the output Table.
|
736
|
+
def intersect(other)
|
737
|
+
set_operation(other, :intersect, distinct: true)
|
738
|
+
end
|
739
|
+
|
740
|
+
# :category: Operators
|
741
|
+
|
742
|
+
# Return a Table that includes all the rows in this table that also occur in
|
743
|
+
# +other+ table. Note that the order of the operands matters. Duplicates in
|
744
|
+
# this table will be included in the output, but duplicates in other will
|
745
|
+
# not. The headers of this table are used in the result. There must be the
|
746
|
+
# same number of columns of the same type in the two tables, or an exception
|
747
|
+
# will be thrown. Duplicates are not eliminated from the result. Resets
|
748
|
+
# groups.
|
749
|
+
def intersect_all(other)
|
750
|
+
set_operation(other, :intersect, distinct: false)
|
751
|
+
end
|
752
|
+
|
753
|
+
# :category: Operators
|
754
|
+
|
755
|
+
# Return a Table that includes the rows of this table except for any rows
|
756
|
+
# that are the same as those in another table. In other words, return the
|
757
|
+
# set difference between this table an the other. The headers of this table
|
758
|
+
# are used in the result. There must be the same number of columns of the
|
759
|
+
# same type in the two tables, or an exception will be thrown. Duplicates
|
760
|
+
# are eliminated from the result. Any groups present in either Table are
|
761
|
+
# eliminated in the output Table.
|
762
|
+
def except(other)
|
763
|
+
set_operation(other, :difference, distinct: true)
|
764
|
+
end
|
765
|
+
|
766
|
+
# :category: Operators
|
767
|
+
|
768
|
+
# Return a Table that includes the rows of this table except for any rows
|
769
|
+
# that are the same as those in +other+ Table. In other words, return the
|
770
|
+
# set difference between this table an the other. The headers of this table
|
771
|
+
# are used in the result. There must be the same number of columns of the
|
772
|
+
# same type in the two tables, or an exception will be thrown. Duplicates
|
773
|
+
# are not eliminated from the result. Any groups present in either Table are
|
774
|
+
# eliminated in the output Table.
|
775
|
+
def except_all(other)
|
776
|
+
set_operation(other, :difference, distinct: false)
|
777
|
+
end
|
778
|
+
|
779
|
+
private
|
780
|
+
|
781
|
+
# Apply the set operation given by op between this table and the other table
|
782
|
+
# given in the first argument. If distinct is true, eliminate duplicates
|
783
|
+
# from the result.
|
784
|
+
def set_operation(other, op = :+,
|
785
|
+
distinct: true,
|
786
|
+
add_boundaries: true,
|
787
|
+
inherit_boundaries: false)
|
788
|
+
unless columns.size == other.columns.size
|
789
|
+
raise UserError, 'Cannot apply a set operation to tables with a different number of columns.'
|
790
|
+
end
|
791
|
+
unless columns.map(&:type) == other.columns.map(&:type)
|
792
|
+
raise UserError, 'Cannot apply a set operation to tables with different column types.'
|
793
|
+
end
|
794
|
+
other_rows = other.rows.map { |r| r.replace_keys(headers) }
|
795
|
+
result = Table.new
|
796
|
+
new_rows = rows.send(op, other_rows)
|
797
|
+
new_rows.each_with_index do |row, k|
|
798
|
+
result << row
|
799
|
+
result.mark_boundary if k == size - 1 && add_boundaries
|
800
|
+
end
|
801
|
+
if inherit_boundaries
|
802
|
+
result.boundaries = normalize_boundaries
|
803
|
+
other.normalize_boundaries
|
804
|
+
result.append_boundaries(other.boundaries, shift: size)
|
805
|
+
end
|
806
|
+
result.normalize_boundaries
|
807
|
+
distinct ? result.distinct : result
|
808
|
+
end
|
809
|
+
|
810
|
+
public
|
811
|
+
|
812
|
+
# An Array of symbols for the valid join types.
|
813
|
+
JOIN_TYPES = [:inner, :left, :right, :full, :cross].freeze
|
814
|
+
|
815
|
+
# :category: Operators
|
816
|
+
#
|
817
|
+
# Return a table that joins this table to another based on one or more join
|
818
|
+
# expressions. There are several possibilities for the join expressions:
|
819
|
+
#
|
820
|
+
# 1. If no join expressions are given, the tables will be joined when all
|
821
|
+
# values with the same name in both tables have the same value, a
|
822
|
+
# "natural" join. However, if the join type is :cross, the join
|
823
|
+
# expression will be taken to be 'true'. Otherwise, if there are no
|
824
|
+
# common column names, an exception will be raised.
|
825
|
+
#
|
826
|
+
# 2. If the join expressions are one or more symbols, the join condition
|
827
|
+
# requires that the values of both tables are equal for all columns named
|
828
|
+
# by the symbols. A column that appears in both tables can be given
|
829
|
+
# without modification and will be assumed to require equality on that
|
830
|
+
# column. If an unmodified symbol is not a name that appears in both
|
831
|
+
# tables, an exception will be raised. Column names that are unique to
|
832
|
+
# the first table must have a '_a' appended to the column name and column
|
833
|
+
# names that are unique to the other table must have a '_b' appended to
|
834
|
+
# the column name. These disambiguated column names must come in pairs,
|
835
|
+
# one for the first table and one for the second, and they will imply a
|
836
|
+
# join condition that the columns must be equal on those columns. Several
|
837
|
+
# such symbol expressions will require that all such implied pairs are
|
838
|
+
# equal in order for the join condition to be met.
|
839
|
+
#
|
840
|
+
# 3. Finally, a string expression can be given that contains an arbitrary
|
841
|
+
# ruby expression that will be evaluated for truthiness. Within the
|
842
|
+
# string, all column names must be disambiguated with the '_a' or '_b'
|
843
|
+
# modifiers whether they are common to both tables or not. The names of
|
844
|
+
# the columns in both tables (without the leading ':' for symbols) are
|
845
|
+
# available as variables within the expression.
|
846
|
+
#
|
847
|
+
# The join_type parameter specifies what sort of join is performed, :inner,
|
848
|
+
# :left, :right, :full, or :cross. The default is an :inner join. The types
|
849
|
+
# of joins are defined as follows where T1 means this table, the receiver,
|
850
|
+
# and T2 means other. These descriptions are taken from the Postgresql
|
851
|
+
# documentation.
|
852
|
+
#
|
853
|
+
# :inner:: For each row R1 of T1, the joined table has a row for each row in
|
854
|
+
# T2 that satisfies the join condition with R1.
|
855
|
+
#
|
856
|
+
# :left:: First, an inner join is performed. Then, for each row in T1 that
|
857
|
+
# does not satisfy the join condition with any row in T2, a joined
|
858
|
+
# row is added with null values in columns of T2. Thus, the joined
|
859
|
+
# table always has at least one row for each row in T1.
|
860
|
+
#
|
861
|
+
# :right:: First, an inner join is performed. Then, for each row in T2 that
|
862
|
+
# does not satisfy the join condition with any row in T1, a joined
|
863
|
+
# row is added with null values in columns of T1. This is the
|
864
|
+
# converse of a left join: the result table will always have a row
|
865
|
+
# for each row in T2.
|
866
|
+
#
|
867
|
+
# :full:: First, an inner join is performed. Then, for each row in T1 that
|
868
|
+
# does not satisfy the join condition with any row in T2, a joined
|
869
|
+
# row is added with null values in columns of T2. Also, for each row
|
870
|
+
# of T2 that does not satisfy the join condition with any row in T1,
|
871
|
+
# a joined row with null values in the columns of T1 is added.
|
872
|
+
#
|
873
|
+
# :cross:: For every possible combination of rows from T1 and T2 (i.e., a
|
874
|
+
# Cartesian product), the joined table will contain a row
|
875
|
+
# consisting of all columns in T1 followed by all columns in T2. If
|
876
|
+
# the tables have N and M rows respectively, the joined table will
|
877
|
+
# have N * M rows.
|
878
|
+
#
|
879
|
+
# Any groups present in either Table are eliminated in the output Table.
|
880
|
+
# See the README for examples.
|
881
|
+
def join(other, *exps, join_type: :inner)
|
882
|
+
unless other.is_a?(Table)
|
883
|
+
raise UserError, 'need other table as first argument to join'
|
884
|
+
end
|
885
|
+
unless JOIN_TYPES.include?(join_type)
|
886
|
+
raise UserError, "join_type may only be: #{JOIN_TYPES.join(', ')}"
|
887
|
+
end
|
888
|
+
# These may be needed for outer joins.
|
889
|
+
self_row_nils = headers.map { |h| [h, nil] }.to_h
|
890
|
+
other_row_nils = other.headers.map { |h| [h, nil] }.to_h
|
891
|
+
join_expression, other_common_heads = build_join_expression(exps, other, join_type)
|
892
|
+
ev = Evaluator.new
|
893
|
+
result = Table.new
|
894
|
+
other_rows = other.rows
|
895
|
+
other_row_matches = Array.new(other_rows.size, false)
|
896
|
+
rows.each do |self_row|
|
897
|
+
self_row_matched = false
|
898
|
+
other_rows.each_with_index do |other_row, k|
|
899
|
+
# Same as other_row, but with keys that are common with self and equal
|
900
|
+
# in value, removed, so the output table need not repeat them.
|
901
|
+
locals = build_locals_hash(row_a: self_row, row_b: other_row)
|
902
|
+
matches = ev.evaluate(join_expression, vars: locals)
|
903
|
+
next unless matches
|
904
|
+
self_row_matched = other_row_matches[k] = true
|
905
|
+
out_row = build_out_row(row_a: self_row, row_b: other_row,
|
906
|
+
common_heads: other_common_heads,
|
907
|
+
type: join_type)
|
908
|
+
result << out_row
|
909
|
+
end
|
910
|
+
if join_type == :left || join_type == :full
|
911
|
+
unless self_row_matched
|
912
|
+
out_row = build_out_row(row_a: self_row, row_b: other_row_nils, type: join_type)
|
913
|
+
result << out_row
|
914
|
+
end
|
915
|
+
end
|
916
|
+
end
|
917
|
+
if join_type == :right || join_type == :full
|
918
|
+
other_rows.each_with_index do |other_row, k|
|
919
|
+
unless other_row_matches[k]
|
920
|
+
out_row = build_out_row(row_a: self_row_nils, row_b: other_row, type: join_type)
|
921
|
+
result << out_row
|
922
|
+
end
|
923
|
+
end
|
924
|
+
end
|
925
|
+
result.normalize_boundaries
|
926
|
+
result
|
927
|
+
end
|
928
|
+
|
929
|
+
# :category: Operators
|
930
|
+
# Perform an inner join as described in FatTable::Table.join.
|
931
|
+
def inner_join(other, *exps)
|
932
|
+
join(other, *exps)
|
933
|
+
end
|
934
|
+
|
935
|
+
# :category: Operators
|
936
|
+
# Perform a left join as described in FatTable::Table.join.
|
937
|
+
def left_join(other, *exps)
|
938
|
+
join(other, *exps, join_type: :left)
|
939
|
+
end
|
940
|
+
|
941
|
+
# :category: Operators
|
942
|
+
# Perform a right join as described in FatTable::Table.join.
|
943
|
+
def right_join(other, *exps)
|
944
|
+
join(other, *exps, join_type: :right)
|
945
|
+
end
|
946
|
+
|
947
|
+
# :category: Operators
|
948
|
+
# Perform a full join as described in FatTable::Table.join.
|
949
|
+
def full_join(other, *exps)
|
950
|
+
join(other, *exps, join_type: :full)
|
951
|
+
end
|
952
|
+
|
953
|
+
# :category: Operators
|
954
|
+
# Perform a cross join as described in FatTable::Table.join.
|
955
|
+
def cross_join(other)
|
956
|
+
join(other, join_type: :cross)
|
957
|
+
end
|
958
|
+
|
959
|
+
private
|
960
|
+
|
961
|
+
# Return an output row appropriate to the given join type, including all the
|
962
|
+
# keys of row_a, the non-common keys of row_b for an :inner join, or all the
|
963
|
+
# keys of row_b for other joins. If any of the row_b keys are also row_a
|
964
|
+
# keys, change the key name by appending a '_b' so the keys will not repeat.
|
965
|
+
def build_out_row(row_a:, row_b:, common_heads: [], type: :inner)
|
966
|
+
if type == :inner
|
967
|
+
# Eliminate the keys that are common with row_a and were matched for
|
968
|
+
# equality
|
969
|
+
row_b = row_b.reject { |k, _| common_heads.include?(k) }
|
970
|
+
end
|
971
|
+
# Translate any remaining row_b heads to append '_b' if they have the
|
972
|
+
# same name as a row_a key.
|
973
|
+
a_heads = row_a.keys
|
974
|
+
row_b = row_b.to_a.each.map { |k, v|
|
975
|
+
[a_heads.include?(k) ? "#{k}_b".to_sym : k, v]
|
976
|
+
}.to_h
|
977
|
+
row_a.merge(row_b)
|
978
|
+
end
|
979
|
+
|
980
|
+
# Return a hash for the local variables of a join expression in which all
|
981
|
+
# the keys in row_a have an '_a' appended and all the keys in row_b have a
|
982
|
+
# '_b' appended.
|
983
|
+
def build_locals_hash(row_a:, row_b:)
|
984
|
+
row_a = row_a.to_a.each.map { |k, v| ["#{k}_a".to_sym, v] }.to_h
|
985
|
+
row_b = row_b.to_a.each.map { |k, v| ["#{k}_b".to_sym, v] }.to_h
|
986
|
+
row_a.merge(row_b)
|
987
|
+
end
|
988
|
+
|
989
|
+
# Return an array of two elements: (1) a ruby expression that expresses the
|
990
|
+
# AND of all join conditions as described in the comment to the #join method
|
991
|
+
# and (2) the heads from other table that (a) are known to be tested for
|
992
|
+
# equality with a head in self table and (b) have the same name. Assume that
|
993
|
+
# the expression will be evaluated in the context of a binding in which the
|
994
|
+
# local variables are all the headers in the self table with '_a' appended
|
995
|
+
# and all the headers in the other table with '_b' appended.
|
996
|
+
def build_join_expression(exps, other, type)
|
997
|
+
return ['true', []] if type == :cross
|
998
|
+
a_heads = headers
|
999
|
+
b_heads = other.headers
|
1000
|
+
common_heads = a_heads & b_heads
|
1001
|
+
b_common_heads = []
|
1002
|
+
if exps.empty?
|
1003
|
+
if common_heads.empty?
|
1004
|
+
raise UserError,
|
1005
|
+
'A non-cross join with no common column names requires join expressions'
|
1006
|
+
else
|
1007
|
+
# A Natural join on all common heads
|
1008
|
+
common_heads.each do |h|
|
1009
|
+
ensure_common_types!(self_h: h, other_h: h, other: other)
|
1010
|
+
end
|
1011
|
+
nat_exp = common_heads.map { |h| "(#{h}_a == #{h}_b)" }.join(' && ')
|
1012
|
+
[nat_exp, common_heads]
|
1013
|
+
end
|
1014
|
+
else
|
1015
|
+
# We have expressions to evaluate
|
1016
|
+
and_conds = []
|
1017
|
+
partial_result = nil
|
1018
|
+
last_sym = nil
|
1019
|
+
exps.each do |exp|
|
1020
|
+
case exp
|
1021
|
+
when Symbol
|
1022
|
+
case exp.to_s.clean
|
1023
|
+
when /\A(.*)_a\z/
|
1024
|
+
a_head = $1.to_sym
|
1025
|
+
unless a_heads.include?(a_head)
|
1026
|
+
raise UserError, "no column '#{a_head}' in table"
|
1027
|
+
end
|
1028
|
+
if partial_result
|
1029
|
+
# Second of a pair
|
1030
|
+
ensure_common_types!(self_h: a_head, other_h: last_sym, other: other)
|
1031
|
+
partial_result << "#{a_head}_a)"
|
1032
|
+
and_conds << partial_result
|
1033
|
+
partial_result = nil
|
1034
|
+
else
|
1035
|
+
# First of a pair of _a or _b
|
1036
|
+
partial_result = "(#{a_head}_a == "
|
1037
|
+
end
|
1038
|
+
last_sym = a_head
|
1039
|
+
when /\A(.*)_b\z/
|
1040
|
+
b_head = $1.to_sym
|
1041
|
+
unless b_heads.include?(b_head)
|
1042
|
+
raise UserError, "no column '#{b_head}' in second table"
|
1043
|
+
end
|
1044
|
+
if partial_result
|
1045
|
+
# Second of a pair
|
1046
|
+
ensure_common_types!(self_h: last_sym, other_h: b_head, other: other)
|
1047
|
+
partial_result << "#{b_head}_b)"
|
1048
|
+
and_conds << partial_result
|
1049
|
+
partial_result = nil
|
1050
|
+
else
|
1051
|
+
# First of a pair of _a or _b
|
1052
|
+
partial_result = "(#{b_head}_b == "
|
1053
|
+
end
|
1054
|
+
b_common_heads << b_head
|
1055
|
+
last_sym = b_head
|
1056
|
+
else
|
1057
|
+
# No modifier, so must be one of the common columns
|
1058
|
+
unless partial_result.nil?
|
1059
|
+
# We were expecting the second of a modified pair, but got an
|
1060
|
+
# unmodified symbol instead.
|
1061
|
+
msg =
|
1062
|
+
"must follow '#{last_sym}' by qualified exp from the other table"
|
1063
|
+
raise UserError, msg
|
1064
|
+
end
|
1065
|
+
# We have an unqualified symbol that must appear in both tables
|
1066
|
+
unless common_heads.include?(exp)
|
1067
|
+
raise UserError, "unqualified column '#{exp}' must occur in both tables"
|
1068
|
+
end
|
1069
|
+
ensure_common_types!(self_h: exp, other_h: exp, other: other)
|
1070
|
+
and_conds << "(#{exp}_a == #{exp}_b)"
|
1071
|
+
b_common_heads << exp
|
1072
|
+
end
|
1073
|
+
when String
|
1074
|
+
# We have a string expression in which all column references must be
|
1075
|
+
# qualified.
|
1076
|
+
and_conds << "(#{exp})"
|
1077
|
+
else
|
1078
|
+
raise UserError, "invalid join expression '#{exp}' of class #{exp.class}"
|
1079
|
+
end
|
1080
|
+
end
|
1081
|
+
[and_conds.join(' && '), b_common_heads]
|
1082
|
+
end
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
# Raise an exception unless self_h in this table and other_h in other table
|
1086
|
+
# have the same types.
|
1087
|
+
def ensure_common_types!(self_h:, other_h:, other:)
|
1088
|
+
unless column(self_h).type == other.column(other_h).type
|
1089
|
+
raise UserError,
|
1090
|
+
"type of column '#{self_h}' does not match type of column '#{other_h}"
|
1091
|
+
end
|
1092
|
+
self
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
###################################################################################
|
1096
|
+
# Group By
|
1097
|
+
###################################################################################
|
1098
|
+
|
1099
|
+
public
|
1100
|
+
|
1101
|
+
# :category: Operators
|
1102
|
+
# Return a Table with a single row for each group of rows in the input table
|
1103
|
+
# where the value of all columns named as simple symbols are equal. All
|
1104
|
+
# other columns are set to the result of aggregating the values of that
|
1105
|
+
# column within the group according to a aggregate function (:count, :sum,
|
1106
|
+
# :min, :max, etc.) that you can specify by adding a hash parameter with the
|
1107
|
+
# column as the key and a symbol for the aggregate function as the value.
|
1108
|
+
# For example, consider the following call:
|
1109
|
+
#
|
1110
|
+
# tab.group_by(:date, :code, :price, shares: :sum).
|
1111
|
+
#
|
1112
|
+
# The first three parameters are simple symbols, so the table is divided
|
1113
|
+
# into groups of rows in which the value of :date, :code, and :price are
|
1114
|
+
# equal. The shares: hash parameter is set to the aggregate function :sum,
|
1115
|
+
# so it will appear in the result as the sum of all the :shares values in
|
1116
|
+
# each group. Because of the way Ruby parses parameters to a method call,
|
1117
|
+
# all the grouping symbols must appear first in the parameter list before
|
1118
|
+
# any hash parameters.
|
1119
|
+
def group_by(*group_cols, **agg_cols)
|
1120
|
+
sorted_tab = order_by(group_cols)
|
1121
|
+
groups = sorted_tab.rows.group_by do |r|
|
1122
|
+
group_cols.map { |k| r[k] }
|
1123
|
+
end
|
1124
|
+
result = Table.new
|
1125
|
+
groups.each_pair do |_vals, grp_rows|
|
1126
|
+
result << row_from_group(grp_rows, group_cols, agg_cols)
|
1127
|
+
end
|
1128
|
+
result.normalize_boundaries
|
1129
|
+
result
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
private
|
1133
|
+
|
1134
|
+
def row_from_group(rows, grp_cols, agg_cols)
|
1135
|
+
new_row = {}
|
1136
|
+
grp_cols.each do |h|
|
1137
|
+
new_row[h] = rows.first[h]
|
1138
|
+
end
|
1139
|
+
agg_cols.each_pair do |h, agg_func|
|
1140
|
+
items = rows.map { |r| r[h] }
|
1141
|
+
new_h = "#{agg_func}_#{h}".as_sym
|
1142
|
+
new_row[new_h] = Column.new(header: h,
|
1143
|
+
items: items).send(agg_func)
|
1144
|
+
end
|
1145
|
+
new_row
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
############################################################################
|
1149
|
+
# Table construction methods.
|
1150
|
+
############################################################################
|
1151
|
+
|
1152
|
+
public
|
1153
|
+
|
1154
|
+
# :category: Constructors
|
1155
|
+
# Add a row represented by a Hash having the headers as keys. If mark is
|
1156
|
+
# true, mark this row as a boundary. All tables should be built ultimately
|
1157
|
+
# using this method as a primitive.
|
1158
|
+
def add_row(row, mark: false)
|
1159
|
+
row.each_pair do |k, v|
|
1160
|
+
key = k.as_sym
|
1161
|
+
columns << Column.new(header: k) unless column?(k)
|
1162
|
+
column(key) << v
|
1163
|
+
end
|
1164
|
+
@boundaries << (size - 1) if mark
|
1165
|
+
self
|
1166
|
+
end
|
1167
|
+
|
1168
|
+
# :category: Constructors
|
1169
|
+
# Add a row without marking.
|
1170
|
+
def <<(row)
|
1171
|
+
add_row(row)
|
1172
|
+
end
|
1173
|
+
|
1174
|
+
# :category: Constructors
|
1175
|
+
# Add a FatTable::Column object +col+ to the table.
|
1176
|
+
def add_column(col)
|
1177
|
+
raise "Table already has a column with header '#{col.header}'" if column?(col.header)
|
1178
|
+
columns << col
|
1179
|
+
self
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
############################################################################
|
1183
|
+
# Convenience output methods
|
1184
|
+
############################################################################
|
1185
|
+
|
1186
|
+
# :category: Output
|
1187
|
+
|
1188
|
+
# In the same spirit as the FatTable module-level functions, the following
|
1189
|
+
# simply tee-up a Formatter for self so that the user need not instantiate
|
1190
|
+
# actual Formatter objects. Thus, one of these methods can be invoked as the
|
1191
|
+
# last method in a chain of Table operations.
|
1192
|
+
|
1193
|
+
# :category: Output
|
1194
|
+
|
1195
|
+
# Return a string or ruby object according to the format specified in
|
1196
|
+
# FatTable.format. If a block is given, it will yield a Formatter of the
|
1197
|
+
# appropriate type to which format and footers can be applied. Otherwise, the
|
1198
|
+
# default format for the type will be used.
|
1199
|
+
#
|
1200
|
+
# :call-seq:
|
1201
|
+
# to_format(options = {}) { |fmt| ... }
|
1202
|
+
#
|
1203
|
+
def to_format(options = {})
|
1204
|
+
if block_given?
|
1205
|
+
to_any(FatTable.format, self, options, &Proc.new)
|
1206
|
+
else
|
1207
|
+
to_any(FatTable.format, self, options)
|
1208
|
+
end
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
# :category: Output
|
1212
|
+
|
1213
|
+
# Return a string or ruby object according to the format type +fmt_type+
|
1214
|
+
# given in the first argument. Valid format types are :psv, :aoa, :aoh,
|
1215
|
+
# :latex, :org, :term, :text, or their string equivalents. If a block is
|
1216
|
+
# given, it will yield a Formatter of the appropriate type to which format
|
1217
|
+
# and footers can be applied. Otherwise, the default format for the type
|
1218
|
+
# will be used.
|
1219
|
+
#
|
1220
|
+
# :call-seq: to_any(fmt_type, options = {}) { |fmt| ... }
|
1221
|
+
#
|
1222
|
+
def to_any(fmt_type, options = {})
|
1223
|
+
fmt = fmt_type.as_sym
|
1224
|
+
raise UserError, "unknown format '#{fmt}'" unless FatTable::FORMATS.include?(fmt)
|
1225
|
+
method = "to_#{fmt}"
|
1226
|
+
if block_given?
|
1227
|
+
send method, options, &Proc.new
|
1228
|
+
else
|
1229
|
+
send method, options
|
1230
|
+
end
|
1231
|
+
end
|
1232
|
+
|
1233
|
+
# :category: Output
|
1234
|
+
|
1235
|
+
# Return the table as a string formatted as a pipe-separated values. If no
|
1236
|
+
# block is given, default formatting is applies to the table's cells. If a
|
1237
|
+
# block is given, it yields a Formatter to the block to which formatting
|
1238
|
+
# instructions and footers can be added by calling methods on it. Since the
|
1239
|
+
# pipe-separated format is the default format for Formatter, there is no
|
1240
|
+
# class PsvFormatter as you might expect.
|
1241
|
+
def to_psv(options = {})
|
1242
|
+
fmt = Formatter.new(self, options)
|
1243
|
+
yield fmt if block_given?
|
1244
|
+
fmt.output
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
# :category: Output
|
1248
|
+
|
1249
|
+
# Return the table as an Array of Array of Strings. If no block is given,
|
1250
|
+
# default formatting is applies to the table's cells. If a block is given,
|
1251
|
+
# it yields an AoaFormatter to the block to which formatting instructions
|
1252
|
+
# and footers can be added by calling methods on it.
|
1253
|
+
def to_aoa(options = {})
|
1254
|
+
fmt = FatTable::AoaFormatter.new(self, options)
|
1255
|
+
yield fmt if block_given?
|
1256
|
+
fmt.output
|
1257
|
+
end
|
1258
|
+
|
1259
|
+
# :category: Output
|
1260
|
+
|
1261
|
+
# Return the table as an Array of Hashes. Each inner hash uses the Table's
|
1262
|
+
# columns as keys and it values are strings representing the cells of the
|
1263
|
+
# table. If no block is given, default formatting is applies to the table's
|
1264
|
+
# cells. If a block is given, it yields an AohFormatter to the block to
|
1265
|
+
# which formatting instructions and footers can be added by calling methods
|
1266
|
+
# on it.
|
1267
|
+
def to_aoh(options = {})
|
1268
|
+
fmt = AohFormatter.new(self, options)
|
1269
|
+
yield fmt if block_given?
|
1270
|
+
fmt.output
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
# :category: Output
|
1274
|
+
|
1275
|
+
# Return the table as a string containing a LaTeX table. If no block is
|
1276
|
+
# given, default formatting applies to the table's cells. If a block is
|
1277
|
+
# given, it yields a LaTeXFormatter to the block to which formatting
|
1278
|
+
# instructions and footers can be added by calling methods on it.
|
1279
|
+
def to_latex(options = {})
|
1280
|
+
fmt = LaTeXFormatter.new(self, options)
|
1281
|
+
yield fmt if block_given?
|
1282
|
+
fmt.output
|
1283
|
+
end
|
1284
|
+
|
1285
|
+
# :category: Output
|
1286
|
+
|
1287
|
+
# Return the table as a string containing an Emacs org-mode table. If no
|
1288
|
+
# block is given, default formatting applies to the table's cells. If a
|
1289
|
+
# block is given, it yields a OrgFormatter to the block to which formatting
|
1290
|
+
# instructions and footers can be added by calling methods on it.
|
1291
|
+
def to_org(options = {})
|
1292
|
+
fmt = OrgFormatter.new(self, options)
|
1293
|
+
yield fmt if block_given?
|
1294
|
+
fmt.output
|
1295
|
+
end
|
1296
|
+
|
1297
|
+
# :category: Output
|
1298
|
+
|
1299
|
+
# Return the table as a string containing ANSI terminal text representing
|
1300
|
+
# table. If no block is given, default formatting applies to the table's
|
1301
|
+
# cells. If a block is given, it yields a TermFormatter to the block to
|
1302
|
+
# which formatting instructions and footers can be added by calling methods
|
1303
|
+
# on it.
|
1304
|
+
def to_term(options = {})
|
1305
|
+
fmt = TermFormatter.new(self, options)
|
1306
|
+
yield fmt if block_given?
|
1307
|
+
fmt.output
|
1308
|
+
end
|
1309
|
+
|
1310
|
+
# :category: Output
|
1311
|
+
|
1312
|
+
# Return the table as a string containing ordinary text representing table.
|
1313
|
+
# If no block is given, default formatting applies to the table's cells. If
|
1314
|
+
# a block is given, it yields a TextFormatter to the block to which
|
1315
|
+
# formatting instructions and footers can be added by calling methods on it.
|
1316
|
+
def to_text(options = {})
|
1317
|
+
fmt = TextFormatter.new(self, options)
|
1318
|
+
yield fmt if block_given?
|
1319
|
+
fmt.output
|
1320
|
+
end
|
1321
|
+
end
|
1322
|
+
end
|