fat_core 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c3022e21efd8b8e152772e4e7c02ef38561f2fdb
4
- data.tar.gz: f79d325c96f94aa4d961f2315dcc6901533c6b46
3
+ metadata.gz: b85c4931e6e06fcb0b7bf0359f67242fa06034b0
4
+ data.tar.gz: fc263e3393af22aae09423a94430461f1c07d476
5
5
  SHA512:
6
- metadata.gz: 431ecd4c693f038bdf70b026a5f9445e45b497f36658180d6f7db12a653a85e7ce55770b7245e3ed5cec064ca185d8318423bc5ce835650cc6d0be2e5dacef4e
7
- data.tar.gz: a2de458910bda7ffe1841684ffe128cd012455280dc7efcc21c0b3f8c48d9f847b458a68542c1ba5fec7ee3af8f54cdc1038d9496ce3dfa0b1da59f3949e7eef
6
+ metadata.gz: a69a5d129cc9135c87b628f45e1ca32869fca906bab23986ac4b0e93fb2bc75d52aaaf7b1da93828971b7d9fc9a5b8606c6daf45b22bcf3de976c5e68c90b639
7
+ data.tar.gz: 15f2a2de60a73586a8c6c0b60f919a66dfd523f58a72adf55a995d93e2b13b52427b190e21a76ad3002567e47167ca44b88d162c04371c0a6f8613bbaafa60a1
data/TODO.org ADDED
@@ -0,0 +1,10 @@
1
+ * Conversion to Spreadsheets
2
+ This is a [[https://github.com/westonganger/spreadsheet_architect][gem]] that I can include into the Table model to convert a table into
3
+ a spread-sheet, or even a sheet in a multi-sheet spreadsheet file.
4
+ * Formatters
5
+ Need to think about ways to define formatters for Table for different output
6
+ types, including tty, color-tty, latex, csv, spreadsheet?
7
+ * Add a Group Boundary concept
8
+ If I want a table to perform sub-totals at various break points, need to have a
9
+ way for a table to record its grouping boundaries. Maybe an array of row
10
+ numbers? Automatically injected by the group-by method?
@@ -115,6 +115,12 @@ module FatCore
115
115
  columns.map(&:header)
116
116
  end
117
117
 
118
+ # Return the number of rows in the table.
119
+ def size
120
+ return 0 if columns.empty?
121
+ columns.first.size
122
+ end
123
+
118
124
  # Return the rows of the table as an array of hashes, keyed by the headers.
119
125
  def rows
120
126
  rows = []
@@ -154,12 +160,12 @@ module FatCore
154
160
  end
155
161
  new_tab = Table.new
156
162
  new_rows.each do |nrow|
157
- new_tab.add_row(nrow)
163
+ new_tab << nrow
158
164
  end
159
165
  new_tab
160
166
  end
161
167
 
162
- # Return a Table having the selected column expression. Each expression can
168
+ # Return a Table having the selected column expressions. Each expression can
163
169
  # be either a (1) symbol, (2) a hash of symbol => symbol, or (3) a hash of
164
170
  # symbol => 'string', though the bare symbol arguments (1) must precede any
165
171
  # hash arguments. Each expression results in a column in the resulting Table
@@ -205,7 +211,7 @@ module FatCore
205
211
  raise 'Parameters to select must be a symbol, string, or hash'
206
212
  end
207
213
  end
208
- result.add_row(new_row)
214
+ result << new_row
209
215
  end
210
216
  result
211
217
  end
@@ -216,7 +222,7 @@ module FatCore
216
222
  result = Table.new
217
223
  ev = Evaluator.new(vars: { row: 0 }, before: '@row += 1')
218
224
  rows.each do |row|
219
- result.add_row(row) if ev.evaluate(expr, vars: row)
225
+ result << row if ev.evaluate(expr, vars: row)
220
226
  end
221
227
  result
222
228
  end
@@ -236,6 +242,273 @@ module FatCore
236
242
  result
237
243
  end
238
244
 
245
+ # Return a table that joins this table to another based on one or more join
246
+ # expressions. There are several possibilities for the join expressions:
247
+ #
248
+ # 1. If no join expressions are given, the tables will be joined when all
249
+ # values with the same name in both tables have the same value, a
250
+ # "natural" join. However, if the join type is :cross, the join
251
+ # expression will be taken to be 'true'. Otherwise, if there are no
252
+ # common column names, an exception will be raised.
253
+ #
254
+ # 2. If the join expressions are one or more symbols, the join condition
255
+ # requires that the values of both tables are equal for all columns named
256
+ # by the symbols. A column that appears in both tables can be given
257
+ # without modification and will be assumed to require equality on that
258
+ # column. If an unmodified symbol is not a name that appears in both
259
+ # tables, an exception will be raised. Column names that are unique to
260
+ # the first table must have a '_a' appended to the column name and column
261
+ # names that are unique to the other table must have a '_b' appended to
262
+ # the column name. These disambiguated column names must come in pairs,
263
+ # one for the first table and one for the second, and they will imply a
264
+ # join condition that the columns must be equal on those columns. Several
265
+ # such symbol expressions will require that all such implied pairs are
266
+ # equal in order for the join condition to be met.
267
+ #
268
+ # 3. Finally, a string expression can be given that contains an arbitrary
269
+ # ruby expression that will be evaluated for truthiness. Within the
270
+ # string, all column names must be disambiguated with the '_a' or '_b'
271
+ # modifiers whether they are common to both tables or not. The names of
272
+ # the columns in both tables (without the leading ':' for symbols) are
273
+ # available as variables within the expression.
274
+ #
275
+ # The join_type parameter specifies what sort of join is performed, :inner,
276
+ # :left, :right, :full, or :cross. The default is an :inner join. The types
277
+ # of joins are defined as follows where T1 means this table, the receiver,
278
+ # and T2 means other. These descriptions are taken from the Postgresql
279
+ # documentation.
280
+ #
281
+ # - :inner :: For each row R1 of T1, the joined table has a row for each row
282
+ # in T2 that satisfies the join condition with R1.
283
+ #
284
+ # - :left :: First, an inner join is performed. Then, for each row in T1
285
+ # that does not satisfy the join condition with any row in T2, a joined
286
+ # row is added with null values in columns of T2. Thus, the joined
287
+ # table always has at least one row for each row in T1.
288
+ #
289
+ # - :right :: First, an inner join is performed. Then, for each row in T2
290
+ # that does not satisfy the join condition with any row in T1, a joined
291
+ # row is added with null values in columns of T1. This is the converse
292
+ # of a left join: the result table will always have a row for each row
293
+ # in T2.
294
+ #
295
+ # - :full :: First, an inner join is performed. Then, for each row in T1
296
+ # that does not satisfy the join condition with any row in T2, a joined
297
+ # row is added with null values in columns of T2. Also, for each row of
298
+ # T2 that does not satisfy the join condition with any row in T1, a
299
+ # joined row with null values in the columns of T1 is added.
300
+ #
301
+ # - :cross :: For every possible combination of rows from T1 and T2 (i.e.,
302
+ # a Cartesian product), the joined table will contain a row consisting
303
+ # of all columns in T1 followed by all columns in T2. If the tables
304
+ # have N and M rows respectively, the joined table will have N * M
305
+ # rows.
306
+ #
307
+ JOIN_TYPES = [ :inner, :left, :right, :full, :cross ]
308
+
309
+ def join(other, *exps, join_type: :inner)
310
+ raise ArgumentError, 'need other table as first argument to join' unless other.is_a?(Table)
311
+ unless JOIN_TYPES.include?(join_type)
312
+ raise ArgumentError, "join_type may only be: #{JOIN_TYPES.join(', ')}"
313
+ end
314
+ # These may be needed for outer joins.
315
+ self_row_nils = headers.map { |h| [h, nil] }.to_h
316
+ other_row_nils = other.headers.map { |h| [h, nil] }.to_h
317
+ join_expression, other_common_heads = build_join_expression(exps, other, join_type)
318
+ ev = Evaluator.new
319
+ result = Table.new
320
+ other_rows = other.rows
321
+ other_row_matches = Array.new(other_rows.size, false)
322
+ rows.each do |self_row|
323
+ self_row_matched = false
324
+ other_rows.each_with_index do |other_row, k|
325
+ # Same as other_row, but with keys that are common with self and equal
326
+ # in value, removed, so the output table need not repeat them.
327
+ locals = build_locals_hash(row_a: self_row, row_b: other_row)
328
+ matches = ev.evaluate(join_expression, vars: locals)
329
+ next unless matches
330
+ self_row_matched = other_row_matches[k] = true
331
+ out_row = build_out_row(row_a: self_row, row_b: other_row,
332
+ common_heads: other_common_heads,
333
+ type: join_type)
334
+ result << out_row
335
+ end
336
+ if join_type == :left || join_type == :full
337
+ unless self_row_matched
338
+ out_row = build_out_row(row_a: self_row, row_b: other_row_nils, type: join_type)
339
+ result << out_row
340
+ end
341
+ end
342
+ end
343
+ if join_type == :right || join_type == :full
344
+ other_rows.each_with_index do |other_row, k|
345
+ unless other_row_matches[k]
346
+ out_row = build_out_row(row_a: self_row_nils, row_b: other_row, type: join_type)
347
+ result << out_row
348
+ end
349
+ end
350
+ end
351
+ result
352
+ end
353
+
354
+ def inner_join(other, *exps)
355
+ join(other, *exps)
356
+ end
357
+
358
+ def left_join(other, *exps)
359
+ join(other, *exps, join_type: :left)
360
+ end
361
+
362
+ def right_join(other, *exps)
363
+ join(other, *exps, join_type: :right)
364
+ end
365
+
366
+ def full_join(other, *exps)
367
+ join(other, *exps, join_type: :full)
368
+ end
369
+
370
+ def cross_join(other)
371
+ join(other, join_type: :cross)
372
+ end
373
+
374
+ private
375
+
376
+ # Return an output row appropriate to the given join type, including all the
377
+ # keys of row_a, the non-common keys of row_b for an :inner join, or all the
378
+ # keys of row_b for other joins. If any of the row_b keys are also row_a
379
+ # keys, change the key name by appending a '_b' so the keys will not repeat.
380
+ def build_out_row(row_a:, row_b:, common_heads: [], type: :inner)
381
+ if type == :inner
382
+ # Eliminate the keys that are common with row_a and were matched for
383
+ # equality
384
+ row_b = row_b.reject { |k, _| common_heads.include?(k) }
385
+ end
386
+ # Translate any remaining row_b heads to append '_b' if they have the
387
+ # same name as a row_a key.
388
+ a_heads = row_a.keys
389
+ row_b = row_b.to_a.each.map do |k, v|
390
+ [a_heads.include?(k) ? "#{k}_b".to_sym : k, v]
391
+ end.to_h
392
+ row_a.merge(row_b)
393
+ end
394
+
395
+ # Return a hash for the local variables of a join expression in which all
396
+ # the keys in row_a have an '_a' appended and all the keys in row_b have a
397
+ # '_b' appended.
398
+ def build_locals_hash(row_a:, row_b:)
399
+ row_a = row_a.to_a.each.map { |k, v| ["#{k}_a".to_sym, v] }.to_h
400
+ row_b = row_b.to_a.each.map { |k, v| ["#{k}_b".to_sym, v] }.to_h
401
+ row_a.merge(row_b)
402
+ end
403
+
404
+ # Return an array of two elements: (1) a ruby expression that expresses the
405
+ # AND of all join conditions as described in the comment to the #join method
406
+ # and (2) the heads from other table that (a) are known to be tested for
407
+ # equality with a head in self table and (b) have the same name. Assume that
408
+ # the expression will be evaluated in the context of a binding in which the
409
+ # local variables are all the headers in the self table with '_a' appended
410
+ # and all the headers in the other table with '_b' appended.
411
+ def build_join_expression(exps, other, type)
412
+ return ['true', []] if type == :cross
413
+ a_heads = headers
414
+ b_heads = other.headers
415
+ common_heads = a_heads & b_heads
416
+ b_common_heads = []
417
+ if exps.empty?
418
+ if common_heads.empty?
419
+ raise ArgumentError,
420
+ 'A non-cross join with no common column names requires join expressions'
421
+ else
422
+ # A Natural join on all common heads
423
+ common_heads.each do |h|
424
+ ensure_common_types!(self_h: h, other_h: h, other: other)
425
+ end
426
+ nat_exp = common_heads.map { |h| "(#{h}_a == #{h}_b)" }.join(' && ')
427
+ [nat_exp, common_heads]
428
+ end
429
+ else
430
+ # We have expressions to evaluate
431
+ and_conds = []
432
+ partial_result = nil
433
+ last_sym = nil
434
+ exps.each do |exp|
435
+ case exp
436
+ when Symbol
437
+ case exp.to_s.clean
438
+ when /\A(.*)_a\z/
439
+ a_head = $1.to_sym
440
+ unless a_heads.include?(a_head)
441
+ raise ArgumentError, "no column '#{a_head}' in table"
442
+ end
443
+ if partial_result
444
+ # Second of a pair
445
+ ensure_common_types!(self_h: a_head, other_h: last_sym, other: other)
446
+ partial_result << "#{a_head}_a)"
447
+ and_conds << partial_result
448
+ partial_result = nil
449
+ else
450
+ # First of a pair of _a or _b
451
+ partial_result = "(#{a_head}_a == "
452
+ end
453
+ last_sym = a_head
454
+ when /\A(.*)_b\z/
455
+ b_head = $1.to_sym
456
+ unless b_heads.include?(b_head)
457
+ raise ArgumentError, "no column '#{b_head}' in second table"
458
+ end
459
+ if partial_result
460
+ # Second of a pair
461
+ ensure_common_types!(self_h: last_sym, other_h: b_head, other: other)
462
+ partial_result << "#{b_head}_b)"
463
+ and_conds << partial_result
464
+ partial_result = nil
465
+ else
466
+ # First of a pair of _a or _b
467
+ partial_result = "(#{b_head}_b == "
468
+ end
469
+ b_common_heads << b_head
470
+ last_sym = b_head
471
+ else
472
+ # No modifier, so must be one of the common columns
473
+ unless partial_result.nil?
474
+ # We were expecting the second of a modified pair, but got an
475
+ # unmodified symbol instead.
476
+ msg =
477
+ "must follow '#{last_sym}' by qualified exp from the other table"
478
+ raise ArgumentError, msg
479
+ end
480
+ # We have an unqualified symbol that must appear in both tables
481
+ unless common_heads.include?(exp)
482
+ raise ArgumentError, "unqualified column '#{exp}' must occur in both tables"
483
+ end
484
+ ensure_common_types!(self_h: exp, other_h: exp, other: other)
485
+ and_conds << "(#{exp}_a == #{exp}_b)"
486
+ b_common_heads << exp
487
+ end
488
+ when String
489
+ # We have a string expression in which all column references must be
490
+ # qualified.
491
+ and_conds << "(#{exp})"
492
+ else
493
+ raise ArgumentError, "invalid join expression '#{exp}' of class #{exp.class}"
494
+ end
495
+ end
496
+ [and_conds.join(' && '), b_common_heads]
497
+ end
498
+ end
499
+
500
+ # Raise an exception unless self_h in this table and other_h in other table
501
+ # have the same types.
502
+ def ensure_common_types!(self_h:, other_h:, other:)
503
+ unless column(self_h).type == other.column(other_h).type
504
+ raise ArgumentError,
505
+ "type of column '#{self_h}' does not match type of column '#{other_h}"
506
+ end
507
+ self
508
+ end
509
+
510
+ public
511
+
239
512
  # Return a Table in which all rows of the table are divided into groups
240
513
  # where the value of all columns named as simple symbols are equal. All
241
514
  # other columns are set to the result of aggregating the values of that
@@ -266,7 +539,7 @@ module FatCore
266
539
  when Hash
267
540
  agg_cols = xp
268
541
  else
269
- raise "Cannot group by parameter '#{xp}"
542
+ raise "Cannot group by parameter '#{xp}'"
270
543
  end
271
544
  end
272
545
  default_agg_func = :first
@@ -285,7 +558,7 @@ module FatCore
285
558
  end
286
559
  result = Table.new
287
560
  result_rows.each do |row|
288
- result.add_row(row)
561
+ result << row
289
562
  end
290
563
  result
291
564
  end
@@ -1,7 +1,7 @@
1
1
  module FatCore
2
2
  MAJOR = 1
3
- MINOR = 3
4
- PATCH = 1
3
+ MINOR = 4
4
+ PATCH = 0
5
5
 
6
6
  VERSION = [MAJOR, MINOR, PATCH].compact.join('.')
7
7
  end
@@ -668,6 +668,107 @@ EOS
668
668
  end
669
669
  end
670
670
 
671
+ describe 'join' do
672
+ before :all do
673
+ @tab_a = Table.new([
674
+ { id: 1, name: 'Paul', age: 32, address: 'California', salary: 20000, join_date: '2001-07-13' },
675
+ { id: 3, name: 'Teddy', age: 23, address: 'Norway', salary: 20000},
676
+ { id: 4, name: 'Mark', age: 25, address: 'Rich-Mond', salary: 65000, join_date: '2007-12-13' },
677
+ { id: 5, name: 'David', age: 27, address: 'Texas', salary: 85000, join_date: '2007-12-13' },
678
+ { id: 2, name: 'Allen', age: 25, address: 'Texas', salary: nil, join_date: '2007-12-13' },
679
+ { id: 8, name: 'Paul', age: 24, address: 'Houston', salary: 20000, join_date: '2005-07-13' },
680
+ { id: 9, name: 'James', age: 44, address: 'Norway', salary: 5000, join_date: '2005-07-13' },
681
+ { id: 10, name: 'James', age: 45, address: 'Texas', salary: 5000, join_date: '2005-07-13' }
682
+ ])
683
+ @tab_b = Table.new([
684
+ { id: 1, dept: 'IT Billing', emp_id: 1 },
685
+ { id: 2, dept: 'Engineering', emp_id: 2 },
686
+ { id: 3, dept: 'Finance', emp_id: 7 }
687
+ ])
688
+ end
689
+
690
+ it 'should be able to do an inner join' do
691
+ join_tab = @tab_a.join(@tab_b, :id_a, :emp_id_b)
692
+ expect(join_tab.class).to eq Table
693
+ expect(join_tab.size).to eq(2)
694
+ expect(join_tab[:name].items).to include('Paul')
695
+ expect(join_tab[:name].items).to include('Allen')
696
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
697
+ :join_date, :id_b, :dept])
698
+ end
699
+
700
+ it 'should be able to do an inner join on a string exp' do
701
+ join_tab = @tab_a.join(@tab_b, 'id_a == emp_id_b')
702
+ expect(join_tab.class).to eq Table
703
+ expect(join_tab.size).to eq(2)
704
+ expect(join_tab[:name].items).to include('Paul')
705
+ expect(join_tab[:name].items).to include('Allen')
706
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
707
+ :join_date, :id_b, :dept, :emp_id])
708
+ end
709
+
710
+ it 'should be able to do a left join' do
711
+ join_tab = @tab_a.left_join(@tab_b, :id_a, :emp_id_b)
712
+ expect(join_tab.class).to eq Table
713
+ expect(join_tab.size).to eq(8)
714
+ expect(join_tab[:name].items).to include('Paul')
715
+ expect(join_tab[:name].items).to include('Allen')
716
+ expect(join_tab[:name].items).to include('Teddy')
717
+ expect(join_tab[:name].items).to include('Mark')
718
+ expect(join_tab[:name].items).to include('David')
719
+ expect(join_tab[:name].items).to include('James')
720
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
721
+ :join_date, :id_b, :dept, :emp_id])
722
+ end
723
+
724
+ it 'should be able to do a right join' do
725
+ join_tab = @tab_a.right_join(@tab_b, :id_a, :emp_id_b)
726
+ expect(join_tab.class).to eq Table
727
+ expect(join_tab.size).to eq(3)
728
+ expect(join_tab[:name].items).to include('Paul')
729
+ expect(join_tab[:name].items).to include('Allen')
730
+ expect(join_tab[:dept].items).to include('IT Billing')
731
+ expect(join_tab[:dept].items).to include('Engineering')
732
+ expect(join_tab[:dept].items).to include('Finance')
733
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
734
+ :join_date, :id_b, :dept, :emp_id])
735
+ end
736
+
737
+ it 'should be able to do a full join' do
738
+ join_tab = @tab_a.full_join(@tab_b, :id_a, :emp_id_b)
739
+ expect(join_tab.class).to eq Table
740
+ expect(join_tab.size).to eq(9)
741
+ expect(join_tab[:name].items).to include('Paul')
742
+ expect(join_tab[:name].items).to include('Allen')
743
+ expect(join_tab[:name].items).to include('Teddy')
744
+ expect(join_tab[:name].items).to include('Mark')
745
+ expect(join_tab[:name].items).to include('David')
746
+ expect(join_tab[:name].items).to include('James')
747
+ expect(join_tab[:dept].items).to include('IT Billing')
748
+ expect(join_tab[:dept].items).to include('Engineering')
749
+ expect(join_tab[:dept].items).to include('Finance')
750
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
751
+ :join_date, :id_b, :dept, :emp_id])
752
+ end
753
+
754
+ it 'should be able to do a cross join' do
755
+ join_tab = @tab_a.cross_join(@tab_b)
756
+ expect(join_tab.class).to eq Table
757
+ expect(join_tab.size).to eq(24)
758
+ expect(join_tab[:name].items).to include('Paul')
759
+ expect(join_tab[:name].items).to include('Allen')
760
+ expect(join_tab[:name].items).to include('Teddy')
761
+ expect(join_tab[:name].items).to include('Mark')
762
+ expect(join_tab[:name].items).to include('David')
763
+ expect(join_tab[:name].items).to include('James')
764
+ expect(join_tab[:dept].items).to include('IT Billing')
765
+ expect(join_tab[:dept].items).to include('Engineering')
766
+ expect(join_tab[:dept].items).to include('Finance')
767
+ expect(join_tab.headers).to eq([:id, :name, :age, :address, :salary,
768
+ :join_date, :id_b, :dept, :emp_id])
769
+ end
770
+ end
771
+
671
772
  describe 'output' do
672
773
  it 'should be able to return itself as an array of arrays' do
673
774
  aoh = [
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fat_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel E. Doherty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-26 00:00:00.000000000 Z
11
+ date: 2017-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: simplecov
@@ -196,6 +196,7 @@ files:
196
196
  - LICENSE.txt
197
197
  - README.md
198
198
  - Rakefile
199
+ - TODO.org
199
200
  - bin/easters
200
201
  - fat_core.gemspec
201
202
  - lib/core_extensions/date/fat_core.rb