object_table 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -1
  3. data/README.md +206 -108
  4. data/lib/object_table/basic_grid.rb +1 -1
  5. data/lib/object_table/column.rb +6 -7
  6. data/lib/object_table/factory.rb +46 -0
  7. data/lib/object_table/grouping/grid.rb +47 -0
  8. data/lib/object_table/grouping.rb +109 -0
  9. data/lib/object_table/joining.rb +71 -0
  10. data/lib/object_table/masked_column.rb +2 -2
  11. data/lib/object_table/printing.rb +69 -0
  12. data/lib/object_table/stacking.rb +66 -0
  13. data/lib/object_table/static_view.rb +2 -5
  14. data/lib/object_table/table_methods.rb +35 -22
  15. data/lib/object_table/util.rb +19 -0
  16. data/lib/object_table/version.rb +1 -1
  17. data/lib/object_table/view.rb +7 -5
  18. data/lib/object_table/view_methods.rb +3 -2
  19. data/lib/object_table.rb +8 -19
  20. data/object_table.gemspec +2 -0
  21. data/spec/object_table/column_spec.rb +2 -2
  22. data/spec/object_table/grouping_spec.rb +475 -0
  23. data/spec/object_table/static_view_spec.rb +2 -2
  24. data/spec/object_table/util_spec.rb +43 -0
  25. data/spec/object_table/view_spec.rb +6 -16
  26. data/spec/object_table_spec.rb +45 -3
  27. data/spec/subclassing_spec.rb +44 -5
  28. data/spec/support/joining_example.rb +171 -0
  29. data/spec/support/object_table_example.rb +124 -29
  30. data/spec/support/stacking_example.rb +111 -0
  31. data/spec/support/utils.rb +8 -0
  32. data/spec/support/view_example.rb +10 -13
  33. metadata +20 -12
  34. data/lib/object_table/group.rb +0 -10
  35. data/lib/object_table/grouped.rb +0 -93
  36. data/lib/object_table/printable.rb +0 -72
  37. data/lib/object_table/stacker.rb +0 -59
  38. data/lib/object_table/table_child.rb +0 -19
  39. data/spec/object_table/grouped_spec.rb +0 -351
  40. data/spec/support/stacker_example.rb +0 -158
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 689d1d35ab1e6a33a345241e866169c7ddcf40fe
4
- data.tar.gz: 11b9d772d9ddf5e2f32feb0b7250910f36be3007
3
+ metadata.gz: 02f7e1642a2f1f8106f32e3b04f8cfebf8676d52
4
+ data.tar.gz: 0986af221eab29e6654f511cebd5534962d82412
5
5
  SHA512:
6
- metadata.gz: f93998847f29f3926d9d956098b3f6b29f6392c14bfa15b1300465d8f9d45cb05876e6a45a9db4e073ef3a10884f3c9c6f00d6365ef3dfff569eea4a13eb41f6
7
- data.tar.gz: 669f9ed3791dcc71a29480a1be3073d700ae8bcba306fae78e4596da805f7b2c31830e23d43640f1d3b2360379194250a527cb366a6553fcd1253b5a4dd52d26
6
+ metadata.gz: d8ee1c4350da59156c2f17e23431969d126a5f8bac0a21348d048fb18486c765d1fe6f4f551a53ef18e03a9642e0704da93e997bb16c2d700003f5cde4f64079
7
+ data.tar.gz: d15636d515d1d5439f5233e58738b5ffdb273a4df77000a4fbdc5ba6ff5018e16e382eb487c6aa0d27e6a9b390e1d6b551a3010ca843123a227c4ccbdee0278d
data/.travis.yml CHANGED
@@ -1,6 +1,5 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.3
4
3
  - 2.0.0
5
4
  - 2.1.0
6
5
  - 2.2.0
data/README.md CHANGED
@@ -1,25 +1,17 @@
1
1
  ruby-object-table
2
2
  =================
3
3
 
4
- [![Gem Version][GV img]][Gem Version]
5
- [![Build Status][BS img]][Build Status]
6
- [![Code Climate][CC img]][Code Climate]
7
- [![Coverage Status][CS img]][Coverage Status]
8
-
9
- [Gem Version]: https://rubygems.org/gems/object_table
10
- [Build Status]: https://travis-ci.org/lincheney/ruby-object-table
11
- [Code Climate]: https://codeclimate.com/github/lincheney/ruby-object-table
12
- [Coverage Status]: https://coveralls.io/r/lincheney/ruby-object-table
13
-
14
- [GV img]: https://badge.fury.io/rb/object_table.png
15
- [BS img]: https://travis-ci.org/lincheney/ruby-object-table.png
16
- [CC img]: https://codeclimate.com/github/lincheney/ruby-object-table.png
17
- [CS img]: https://coveralls.io/repos/lincheney/ruby-object-table/badge.png?branch=master
18
-
19
- Simple data table/frame implementation in ruby
4
+ [![Gem Version](https://badge.fury.io/rb/object_table.svg)](http://badge.fury.io/rb/object_table)
5
+ [![Build Status](https://travis-ci.org/lincheney/ruby-object-table.svg?branch=master)](https://travis-ci.org/lincheney/ruby-object-table)
6
+ [![Code Climate](https://codeclimate.com/github/lincheney/ruby-object-table/badges/gpa.svg)](https://codeclimate.com/github/lincheney/ruby-object-table)
7
+ [![Coverage Status](https://coveralls.io/repos/lincheney/ruby-object-table/badge.svg?branch=master)](https://coveralls.io/r/lincheney/ruby-object-table?branch=master)
8
+
9
+ Simple data table/frame implementation in ruby.
20
10
  Probably slow and extremely inefficient, but it works and that's all that matters.
21
11
  Uses NArrays (https://github.com/masa16/narray) for storing data.
22
12
 
13
+ Be sure to check out the [release notes](https://github.com/lincheney/ruby-object-table/releases).
14
+
23
15
  ## Creating a table
24
16
 
25
17
  Just pass a hash of columns into the constructor.
@@ -69,7 +61,7 @@ Otherwise the scalars are extended to match the length of the vector columns
69
61
  - `#nrows` returns the number of rows
70
62
  - `#colnames` returns an array of the column names
71
63
  - `#clone` make a copy of the table
72
- - `#stack(table1, table2, ...)` appends then supplied tables
64
+ - `#stack(table1, table2, ...)` appends the supplied tables
73
65
  - `#apply(&block)` evaluates `block` in the context of the table
74
66
  - `#where(&block)` filters the table
75
67
  - `#group_by(&block)` splits the table into groups
@@ -369,104 +361,223 @@ If you want to filter a table and keep that data (i.e. without it syncing with t
369
361
  ## Grouping (and aggregating)
370
362
 
371
363
  Use the `#group_by` method and pass column names or a block that returns grouping keys.
372
- Then call `#each` to iterate through the groups or `#apply` to aggregate the results.
373
364
 
374
- The argument to `#group_by` should be a hash mapping key name => key. See the below example.
365
+ ```ruby
366
+ # group by column_1
367
+ >>> data.group_by(:column_1)
368
+ # or group by a dynamically calculated value
369
+ # note the double braces is actually a hash inside a block
370
+ >>> data.group_by{{ key: column_1.round }}
371
+ ```
372
+
373
+ This gives you a `ObjectTable::Grouping`.
374
+ There are two ways to perform aggregation with a grouping: using `apply`/`each` or using `reduce`.
375
+
376
+ Using `apply`/`each` is the most flexible and powerful.
377
+ It iterates through each group and calls a supplied block for each group.
378
+
379
+ `reduce` instead iterates through each *row* and keeps track of which group the row belongs to.
380
+ It can only be used with (online algorithms)[http://en.wikipedia.org/wiki/Online_algorithm]
381
+ but can be much faster if there is a large number of groups (relative to the number of rows).
382
+
383
+ ### Using `apply`/`each`
384
+
385
+ `each` enumerates through the groups.
386
+ `apply` is similar to doing `grouping.each.map` but instead of collecting results in an `Array`
387
+ the results are stacked into a new table.
375
388
 
376
389
  ```ruby
377
- >>> data = ObjectTable.new(name: ['John', 'Tom', 'John', 'Tom', 'Jim'], value: 1..5)
378
- => ObjectTable(5, 2)
379
- name value
380
- 0: "John" 1
381
- 1: "Tom" 2
382
- 2: "John" 3
383
- 3: "Tom" 4
384
- 4: "Jim" 5
385
- name value
386
-
387
- # group by the name and get the no. of rows in each group
388
- >>> num_rows = []
389
- >>> data.group_by(:name).each{ num_rows.push(nrows) }
390
- >>> num_rows
391
- => [2, 2, 1]
392
-
393
- # or group with a block
394
- >>> num_rows = []
395
- # let's group by initial letter of the name
396
- >>> data.group_by{ {initial: name.map{|n| n[0]}} }.each{ num_rows.push(nrows) }
397
- >>> num_rows
398
- => [3, 2]
390
+ # let's create some data
391
+ >>> data = ObjectTable.new(col1: 1..10, col2: (1..20).step(2).to_a)
392
+ => ObjectTable(10, 2)
393
+ col1 col2
394
+ 0: 1 1
395
+ 1: 2 3
396
+ 2: 3 5
397
+ 3: 4 7
398
+ 4: 5 9
399
+ 5: 6 11
400
+ 6: 7 13
401
+ 7: 8 15
402
+ 8: 9 17
403
+ 9: 10 19
404
+ col1 col2
405
+
406
+ # print sum of col2 for col1 remainder 3
407
+ >>> data.group_by{{ rem: col1 % 3 }}.each{ p col2.sum }; nil
408
+ 40
409
+ 27
410
+ 33
411
+
412
+ # which sum is which group?
413
+ # we can access the group keys through @K
414
+ >>> data.group_by{{ rem: col1 > 0 }}.each{ p [@K.rem, col2.sum] }; nil
415
+ [1, 40]
416
+ [2, 27]
417
+ [0, 33]
418
+
419
+ # collect results into an array
420
+ # note that we need an argument to the map block
421
+ >>> data.group_by{{ rem: col1 % 3 }}.each.map{|grp| [grp.K.rem, grp.col2.sum] }
422
+ => [[1, 40], [2, 27], [0, 33]]
423
+
424
+ # collect the results into a new table using apply()
425
+ >>> data.group_by{{ rem: col1 % 3 }}.apply{ col2.sum }
426
+ => ObjectTable(3, 2)
427
+ rem v_0
428
+ 0: 1 40
429
+ 1: 2 27
430
+ 2: 0 33
431
+ rem v_0
432
+
433
+ # aggregated columns are given default names of v_0, v_1, etc.
434
+ # let's set the names ourselves
435
+ >>> data.group_by{{ rem: col1 % 3 }}.apply{ @R[sum: col2.sum] }
436
+ => ObjectTable(3, 2)
437
+ rem sum
438
+ 0: 1 40
439
+ 1: 2 27
440
+ 2: 0 33
441
+ rem sum
399
442
  ```
400
443
 
401
- The group keys are accessible through the `@K` shortcut
444
+ We can also assign new columns based on the group (you cannot do this with `reduce`).
402
445
 
403
446
  ```ruby
404
- >>> data = ObjectTable.new(name: ['John', 'Tom', 'John', 'Tom', 'Jim'], value: 1..5)
405
- >>> data.group_by(:name).each{ p @K }
406
- {:name=>"John"}
407
- {:name=>"Tom"}
408
- {:name=>"Jim"}
409
-
410
- # or if you are using a block with args
411
- >>> data.group_by(:name).each{|grp| p grp.K }
412
- {:name=>"John"}
413
- {:name=>"Tom"}
414
- {:name=>"Jim"}
447
+ >>> data.group_by{{ rem: col1 % 3 }}.each{ self[:sum] = col2.sum }
448
+ >>> data
449
+ => ObjectTable(10, 3)
450
+ col1 col2 sum
451
+ 0: 1 1 40
452
+ 1: 2 3 27
453
+ 2: 3 5 33
454
+ 3: 4 7 40
455
+ 4: 5 9 27
456
+ 5: 6 11 33
457
+ 6: 7 13 40
458
+ 7: 8 15 27
459
+ 8: 9 17 33
460
+ 9: 10 19 40
461
+ col1 col2 sum
415
462
  ```
416
463
 
464
+ ### Using `reduce`
417
465
 
418
- ### Aggregation
466
+ `reduce` returns a new table like `apply`
467
+ (and there is no equivalent for `each`, i.e. iterating through groups).
419
468
 
420
- Call `#apply` and the results are stored into a table.
469
+ Pass a block to `reduce`; you will have access to the `@R` variable
470
+ which is a group-specific hash where you can accumulate results.
471
+ See the examples below.
421
472
 
422
473
  ```ruby
423
- >>> data = ObjectTable.new(name: ['John', 'Tom', 'John', 'Tom', 'Jim'], value: 1..5)
424
- >>> data.group_by(:name).apply{ value.mean }
474
+ # sum of column 2
475
+ >>> data.group_by{{ rem: col1 % 3 }}.reduce{ @R[:sum] += col2 }
476
+ => ObjectTable(3, 2)
477
+ rem sum
478
+ 0: 1 40
479
+ 1: 2 27
480
+ 2: 0 33
481
+ rem sum
482
+
483
+ # we can supply initial values, e.g. if we wish to calculate product
484
+ >>> data.group_by{{ rem: col1 % 3 }}.reduce(prod: 1){ @R[:prod] *= col2 }
425
485
  => ObjectTable(3, 2)
426
- name v_0
427
- 0: "John" 2.0
428
- 1: "Tom" 3.0
429
- 2: "Jim" 5.0
430
- name v_0
486
+ rem prod
487
+ 0: 1 1729
488
+ 1: 2 405
489
+ 2: 0 935
490
+ rem prod
491
+ ```
492
+
493
+ You should avoid reduce unless your aggregating operation is simply
494
+ and you have a relatively large number of groups
495
+ (`reduce` is slower than `apply` with few groups).
496
+
497
+ ### Comparison of `apply` and `reduce`
498
+
499
+ The `reduce` version is more complicated because we must implement the
500
+ online algorithm ourselves.
501
+
502
+ #### Sum
503
+
504
+ ```ruby
505
+ >>> data.group_by{{ rem: col1 % 3 }}.apply{ @R[sum: col2.sum] }
506
+ >>> data.group_by{{ rem: col1 % 3 }}.reduce{ @R[:sum] += col2 }
431
507
  ```
432
508
 
433
- Normally you can only have one aggregated column with a default name of v_0.
434
- You can have more columns and set column names by making a `ObjectTable` or using the @R shortcut.
509
+ #### Product
435
510
 
436
511
  ```ruby
437
- >>> data.group_by(:name).apply{ @R[ mean: value.mean, sum: value.sum] }
438
- => ObjectTable(3, 3)
439
- name mean sum
440
- 0: "John" 2.0 4
441
- 1: "Tom" 3.0 6
442
- 2: "Jim" 5.0 5
443
- name mean sum
444
-
445
- # or if you are using a block with args
446
- >>> data.group_by(:name).apply{|grp| grp.R[ mean: grp.value.mean, sum: grp.value.sum] }
447
- => ObjectTable(3, 3)
448
- name mean sum
449
- 0: "John" 2.0 4
450
- 1: "Tom" 3.0 6
451
- 2: "Jim" 5.0 5
452
- name mean sum
512
+ >>> data.group_by{{ rem: col1 % 3 }}.apply{ @R[prod: col2.prod] }
513
+ >>> data.group_by{{ rem: col1 % 3 }}.reduce(prod: 1){ @R[:prod] *= col2 }
514
+ ```
515
+
516
+ #### Variance
517
+
518
+ Online algorithm for variance taken from:
519
+ http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
520
+
521
+ ```ruby
522
+ >>> data.group_by{{ rem: col1 % 3 }}.apply{ @R[var: col2.stddev**2] }
523
+ >>> data.group_by{{ rem: col1 % 3 }}.reduce(n: 0, mean: 0.0, m2: 0) do
524
+ @R[:n] += 1
525
+ delta = col2 - @R[:mean]
526
+ @R[:mean] += delta / @R[:n]
527
+ @R[:m2] += delta * (col2 - @R[:mean])
528
+ end.apply{ @R[rem: rem, variance: m2 / (n - 1)] }
453
529
  ```
454
530
 
455
- ### Assigning to columns
531
+ ## Joining
456
532
 
457
- Assigning to columns will assign by group.
533
+ Note the current joining algorithm is quite slow.
458
534
 
459
535
  ```ruby
460
- # every row with the same name will get the same group_values
461
- >>> data.group_by(:name).each{|grp| grp[:group_values] = grp.value.to_a.join(',') }
536
+ # let's create some data
537
+ >>> left = ObjectTable.new( key: [1, 2, 3, 5, 7], val_1: 1..5 )
538
+ >>> right = ObjectTable.new( key: [2, 3, 4, 5], val_2: 'a'..'d')
539
+
540
+ # inner join
541
+ >>> left.join(right, :key)
542
+ => ObjectTable(3, 3)
543
+ key val_1 val_2
544
+ 0: 2 2 "a"
545
+ 1: 3 3 "b"
546
+ 2: 5 4 "d"
547
+ key val_1 val_2
548
+
549
+ # left join
550
+ >>> left.join(right, :key, type: 'left')
462
551
  => ObjectTable(5, 3)
463
- name value group_values
464
- 0: "John" 1 "1,3"
465
- 1: "Tom" 2 "2,4"
466
- 2: "John" 3 "1,3"
467
- 3: "Tom" 4 "2,4"
468
- 4: "Jim" 5 "5"
469
- name value group_values
552
+ key val_1 val_2
553
+ 0: 1 1 nil
554
+ 1: 2 2 "a"
555
+ 2: 3 3 "b"
556
+ 3: 5 4 "d"
557
+ 4: 7 5 nil
558
+ key val_1 val_2
559
+
560
+ # right join
561
+ >>> left.join(right, :key, type: 'right')
562
+ => ObjectTable(4, 3)
563
+ key val_1 val_2
564
+ 0: 2 2 "a"
565
+ 1: 3 3 "b"
566
+ 2: 5 4 "d"
567
+ 3: 4 0 "c"
568
+ key val_1 val_2
569
+
570
+ # outer join
571
+ >>> left.join(right, :key, type: 'outer')
572
+ => ObjectTable(6, 3)
573
+ key val_1 val_2
574
+ 0: 1 1 nil
575
+ 1: 2 2 "a"
576
+ 2: 3 3 "b"
577
+ 3: 5 4 "d"
578
+ 4: 7 5 nil
579
+ 5: 4 0 "c"
580
+ key val_1 val_2
470
581
  ```
471
582
 
472
583
  ## Subclassing ObjectTable
@@ -491,8 +602,8 @@ The act of subclassing itself is easy, but any methods you add won't be availabl
491
602
  NoMethodError: undefined method `a_plus_b' for #<ObjectTable::View:0x000000011d4dd0>
492
603
  ```
493
604
 
494
- To make it work, you'll need to subclass `View`, `StaticView` and `Group` too and assign those subclasses under your ObjectTable subclass.
495
- The easiest way is just to include a module with your common methods.
605
+ The easiest way to make it work is to put your methods into a mixin
606
+ and use the `fully_include` class method.
496
607
 
497
608
  ```ruby
498
609
  >>> class WorkingTable < ObjectTable
@@ -502,12 +613,7 @@ The easiest way is just to include a module with your common methods.
502
613
  end
503
614
  end
504
615
 
505
- include Mixin
506
-
507
- # subclass each of these and include the Mixin too
508
- class StaticView < StaticView; include Mixin; end
509
- class View < View; include Mixin; end
510
- class Group < Group; include Mixin; end
616
+ fully_include Mixin
511
617
  end
512
618
  ...
513
619
 
@@ -518,15 +624,7 @@ The easiest way is just to include a module with your common methods.
518
624
 
519
625
  # hurrah!
520
626
  >>> data.where{ a > 1 }.a_plus_b
521
- => NArray.int(2):
627
+ => ObjectTable::MaskedColumn.int(2):
522
628
  [ 7, 9 ]
523
629
 
524
- # also works in groups!
525
- >>> data.group_by{{odd: a % 2}}.each do
526
- p "when a % 2 == #{@K[:odd]}, a + b == #{a_plus_b.to_a}"
527
- end
528
- ...
529
-
530
- "when a % 2 == 1, a + b == [5, 9]"
531
- "when a % 2 == 0, a + b == [7]"
532
630
  ```
@@ -8,7 +8,7 @@ class ObjectTable::BasicGrid < Hash
8
8
 
9
9
  def _get_number_rows!
10
10
  each{|k, v| self[k] = v.to_a if v.is_a?(Range)}
11
- rows = map{|k, v| ObjectTable::Column.length_of(v) rescue nil}.compact.uniq
11
+ rows = map{|k, v| ObjectTable::Column.length_of(v)}.compact.uniq
12
12
  end
13
13
 
14
14
  def _ensure_uniform_columns!(rows = nil)
@@ -4,17 +4,16 @@ module ObjectTable::Column
4
4
 
5
5
  def self.length_of(array)
6
6
  case array
7
- when Array
8
- array.length
9
- when NArray
10
- array.shape.last or 0
11
- else
12
- raise "Expected Array or NArray, got #{array}"
7
+ when Array then array.length
8
+ when NArray then (array.shape.last or 0)
9
+ else nil
13
10
  end
14
11
  end
15
12
 
16
13
 
17
- def self.stack(*columns)
14
+ def self.stack(*columns); _stack(columns); end
15
+
16
+ def self._stack(columns)
18
17
  columns = columns.reject(&:empty?)
19
18
  return NArray[] if columns.empty?
20
19
  return columns[0].clone if columns.length == 1
@@ -0,0 +1,46 @@
1
+ require 'forwardable'
2
+
3
+ module ObjectTable::Factory
4
+
5
+ CLASS_MAP = {
6
+ '__static_view_cls__' => 'StaticView',
7
+ '__view_cls__' => 'View',
8
+ '__group_cls__' => 'Group',
9
+ }.freeze
10
+ FACTORIES = (CLASS_MAP.keys + ['__table_cls__']).freeze
11
+
12
+ module ClassMethods
13
+ CLASS_MAP.each do |name, const|
14
+ eval "def #{name}; self::#{const}; end"
15
+ end
16
+
17
+ def __table_cls__
18
+ self
19
+ end
20
+
21
+ def fully_include(mixin)
22
+ include(mixin)
23
+ constants = constants(false)
24
+ CLASS_MAP.each do |name, const|
25
+ child_cls = send(name)
26
+ # create a new subclass if there isn't already one
27
+ child_cls = const_set(const, Class.new(child_cls)) unless constants.include?(child_cls)
28
+ child_cls.send(:include, mixin)
29
+ end
30
+ end
31
+ end
32
+
33
+ extend Forwardable
34
+ def_delegators 'self.class', *FACTORIES
35
+
36
+ def self.included(base)
37
+ base.extend(ClassMethods)
38
+ end
39
+
40
+ module SubFactory
41
+ FACTORIES.each do |name|
42
+ eval "def #{name}; @#{name} ||= @parent.#{name}; end"
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,47 @@
1
+ require_relative '../util'
2
+
3
+ class ObjectTable::Grouping
4
+
5
+ class Grid
6
+ attr_reader :values, :index
7
+
8
+ def initialize(keys, defaults)
9
+ unless defaults.is_a?(Hash)
10
+ raise "Expected defaults to be a hash, got: #{defaults.inspect}"
11
+ end
12
+ defaults.default = 0
13
+ @defaults = defaults
14
+
15
+ @values = {}
16
+ @index = {}
17
+ @ids = keys.map{|k| @index[k] ||= @index.length}
18
+ @keys = keys
19
+ @length = @index.length
20
+ end
21
+
22
+ def [](k)
23
+ (@values[k] ||= Array.new(@length, @defaults[k]))[@id]
24
+ end
25
+
26
+ def []=(k, v)
27
+ @values[k][@id] = v
28
+ end
29
+
30
+ module RowFactory
31
+ def self.new(*args)
32
+ Struct.new(*args){ attr_accessor :K, :R }
33
+ end
34
+ end
35
+
36
+ def apply_to_rows(rows, key_struct, block)
37
+ @ids.zip(@keys, rows) do |id, key, row|
38
+ @id = id
39
+ row.K = key_struct.new(*key)
40
+ row.R = self
41
+ ObjectTable::Util.apply_block(row, block)
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,109 @@
1
+ require_relative 'factory'
2
+ require_relative 'util'
3
+ require_relative 'static_view'
4
+ require_relative 'grouping/grid'
5
+
6
+ class ObjectTable
7
+ class Group < StaticView
8
+ attr_reader :K
9
+ def initialize(parent, keys, value)
10
+ super(parent, value)
11
+ @K = keys
12
+ end
13
+ end
14
+
15
+ class Grouping
16
+ DEFAULT_VALUE_PREFIX = 'v_'.freeze
17
+ include Factory::SubFactory
18
+
19
+ def initialize(parent, *columns, &grouper)
20
+ @parent = parent
21
+ @grouper = grouper
22
+ @columns = columns
23
+ @names = columns
24
+ end
25
+
26
+ def _keys
27
+ return Util.get_rows(@parent, @columns) unless @columns.empty?
28
+
29
+ keys = @parent.apply(&@grouper)
30
+ raise 'Group keys must be hashes' unless keys.is_a?(Hash)
31
+ keys = BasicGrid.new.replace keys
32
+ keys._ensure_uniform_columns!(@parent.nrows)
33
+
34
+ @names = keys.keys
35
+ keys.values.map(&:to_a).transpose
36
+ end
37
+
38
+ def each(&block)
39
+ groups = Util.group_indices(_keys)
40
+ return to_enum(:_make_groups, groups) unless block
41
+ _make_groups(groups){|grp| Util.apply_block(grp, block)}
42
+ end
43
+
44
+ def apply(&block)
45
+ groups = Util.group_indices(_keys)
46
+ return empty_aggregation if groups.empty?
47
+
48
+ value_key = self.class.generate_name(DEFAULT_VALUE_PREFIX, @names).to_sym
49
+ keys = []
50
+
51
+ data = groups.keys.zip(to_enum(:_make_groups, groups)).map do |key, group|
52
+ value = Util.apply_block(group, block)
53
+
54
+ case value
55
+ when TableMethods
56
+ nrows = value.nrows
57
+ when BasicGrid
58
+ nrows = value._ensure_uniform_columns!
59
+ else
60
+ nrows = (Column.length_of(value) or 1)
61
+ value = BasicGrid[value_key, value]
62
+ end
63
+
64
+ keys.concat( Array.new(nrows, key) )
65
+ value
66
+ end
67
+
68
+ keys = BasicGrid[@names.zip(keys.transpose)]
69
+ result = __table_cls__._stack(data)
70
+ __table_cls__.new(keys.merge!(result.columns))
71
+ end
72
+
73
+ def reduce(defaults={}, &block)
74
+ keys = _keys()
75
+ return empty_aggregation if keys.empty?
76
+
77
+ grid = Grid.new(keys, defaults)
78
+ rows = @parent.each_row(row_factory: Grid::RowFactory)
79
+ grid.apply_to_rows(rows, self.class.key_struct(@names), block)
80
+
81
+ keys = BasicGrid[@names.zip(grid.index.keys.transpose)]
82
+ __table_cls__.new(keys.merge!(grid.values))
83
+ end
84
+
85
+ def _make_groups(groups)
86
+ key_struct = self.class.key_struct(@names)
87
+ groups.each do |k, v|
88
+ yield __group_cls__.new(@parent, key_struct.new(*k), NArray.to_na(v))
89
+ end
90
+ @parent
91
+ end
92
+
93
+ def self.generate_name(prefix, names)
94
+ regex = Regexp.new(Regexp.quote(prefix) + '(\d+)')
95
+ i = names.map{|n| n =~ regex and $1.to_i}.compact.max || -1
96
+ "#{prefix}#{i + 1}"
97
+ end
98
+
99
+ def self.key_struct(names)
100
+ Struct.new(*names.map(&:to_sym))
101
+ end
102
+
103
+ def empty_aggregation
104
+ __table_cls__.new(@names.map{|n| [n, []]})
105
+ end
106
+
107
+ end
108
+
109
+ end