namo 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +52 -0
- data/README.md +29 -4
- data/lib/Namo/Enumerable.rb +9 -0
- data/lib/Namo/VERSION.rb +1 -1
- data/lib/namo.rb +7 -7
- data/test/namo_test.rb +156 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae1731b55ec8d0de8dedfb35fc643f6fb6a68e72bb3464c4139242c1bc6bb91e
|
|
4
|
+
data.tar.gz: 161a1842a4daf8a4365a8420600f2824f5f15fd2ea3951e18d00d811eec22010
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4a563c97ca83b91e578375af09e533f633d2ed2d6bca1152297d3bf4b24dc60c5f25a5679e0e87023e3031f5019b3c32e825f35f23b75b0259cee6fb765a02b5
|
|
7
|
+
data.tar.gz: 140f9b808a8f406ca1ca16611ab776544c332830353f1d9dba58d4cf3578ff746d2ff7788671fd9eda0c082d6cc3f1457f29ee38473d88ec2c1f0aa457a0dace
|
data/CHANGELOG
CHANGED
|
@@ -1,6 +1,58 @@
|
|
|
1
1
|
CHANGELOG
|
|
2
2
|
_________
|
|
3
3
|
|
|
4
|
+
20260615
|
|
5
|
+
0.20.0: + group_by(dimension) — splits a Namo into a Namo::Collection, one member per distinct value, completing the Enumerable coherence pass begun at 0.11.0.
|
|
6
|
+
|
|
7
|
+
1. ~ lib/Namo/Enumerable.rb: + Namo#group_by(dimension). Returns a Namo::Collection
|
|
8
|
+
whose members are the groups — one member per distinct value, each a Namo of the
|
|
9
|
+
matching rows, named by its group value, carrying the surviving formulae, preserving
|
|
10
|
+
subclass type via self.class.new. The grouping dimension is retained (the split is
|
|
11
|
+
along the axis, not consuming it), so namo.group_by(d).as_detail(d) inverts it. A
|
|
12
|
+
nil-valued group produces a nil-named member that holds its rows and round-trips, the
|
|
13
|
+
use-site naming decision (0.18.0) paying off. A ternary makes the materialise
|
|
14
|
+
decision: when dimension is derived, the group source is the projection
|
|
15
|
+
(self[*data_dimensions, dimension], 0.16.0) so the grouped-by formula becomes a stored
|
|
16
|
+
column and is dropped while every other formula carries through live; when data, the
|
|
17
|
+
source is self untouched. A parameterised formula raises the 0.17.0 ArgumentError at
|
|
18
|
+
the materialisation site, group_by needing no enforcement of its own.
|
|
19
|
+
2. ~ test/namo_test.rb: + "#group_by" describe — return type, one-member-per-value,
|
|
20
|
+
row partitioning, grouping-dimension retention, find-by-name, formula carry-through,
|
|
21
|
+
subclass preservation, non-mutation, empty-Namo; the uniform inversion law
|
|
22
|
+
(data: exact; formula: against the materialised form); grouping by a formula
|
|
23
|
+
(partition by value, materialise-and-drop, only-grouped-formula-altered with a live
|
|
24
|
+
dependent, parameterised raise); the nil-valued grouping dimension (nil-named member,
|
|
25
|
+
no rows dropped, round-trip); and consistency with assembly (summary/detail on a
|
|
26
|
+
partitioned Collection).
|
|
27
|
+
3. ~ README.md: + group_by subsection under Collections — the partition constructor as
|
|
28
|
+
the mirror of <<, the round-trip example, the materialise-on-formula behaviour, and
|
|
29
|
+
the cross-reference to the assembly side.
|
|
30
|
+
4. ~ ROADMAP.md: Promote 0.20.0 to shipped; Current state -> 0.20.0; + the materialisation
|
|
31
|
+
rationale (unified-treatment requires it; materialise-and-drop is the only invertible
|
|
32
|
+
choice; the uniform inversion law); Summary folds in group_by; next phase -> 1.0.0.
|
|
33
|
+
5. ~ COMPARISON.md: Aggregation entry — group_by repointed from planned (0.20.0) to
|
|
34
|
+
shipped, the partition constructor now realised; + the grouping-by-formula comparison
|
|
35
|
+
(computed-key grouping is parity, but only Namo partitions on a named, attached formula
|
|
36
|
+
through the data-dimension interface). Date bumped.
|
|
37
|
+
6. ~ Namo::VERSION: /0.19.0/0.20.0/
|
|
38
|
+
|
|
39
|
+
20260614
|
|
40
|
+
0.19.0: ~ row-multiset equality — replace the sorted canonical form with a row multiset, so ==, eql?, hash, and the subset operators compare without sorting: nil/NaN-safe, row- and dimension-order blind, and consistently type-strict.
|
|
41
|
+
|
|
42
|
+
1. ~ lib/namo.rb: replace canonical_data (which sorted @data by row.values_at(*data_dimensions.sort))
|
|
43
|
+
with row_multiset (@data.tally). ==, eql?, hash, and subset_of_rows? now compare row
|
|
44
|
+
multisets keyed by Hash#hash/#eql?. The sort was only ever a mechanism for reaching the
|
|
45
|
+
already-decided multiset semantics (0.6.0); a multiset reaches them directly and needs no
|
|
46
|
+
ordering, so the latent ArgumentError when a dimension mixed nil (or NaN) with present
|
|
47
|
+
values — neither has a <=> against the other — is gone. The relation is now uniformly
|
|
48
|
+
eql?-based, so == agrees with the subset operators, which already tallied: {a: 1} and
|
|
49
|
+
{a: 1.0} are consistently distinct (== previously called them equal via Hash#==, while <=
|
|
50
|
+
called them distinct).
|
|
51
|
+
2. ~ test/namo_test.rb: + four "#==" cases — equality across row order and across dimension
|
|
52
|
+
(key) order when a dimension mixes nil and non-nil values; inequality when such rows
|
|
53
|
+
differ; and type-strictness on values, consistent with <=.
|
|
54
|
+
3. ~ Namo::VERSION: /0.18.0/0.19.0/
|
|
55
|
+
|
|
4
56
|
20260613
|
|
5
57
|
0.18.0: + Namo::Collection — hierarchical aggregate of named Namos with summary/detail views.
|
|
6
58
|
|
data/README.md
CHANGED
|
@@ -1006,7 +1006,7 @@ gt.as_detail(:assembly) # gt's data becomes the detail; retur
|
|
|
1006
1006
|
- If `by` is **already a dimension** in a member's rows, the row passes through untouched — the dimension is intrinsic.
|
|
1007
1007
|
- If `by` is **not** present, `detail` injects it (`row.merge(by => member.name)`), promoting the member's name into a dimension.
|
|
1008
1008
|
|
|
1009
|
-
This single conditional is where assembly (`<<`, members named extrinsically) and partition (`group_by`, members named by an intrinsic value — 0.
|
|
1009
|
+
This single conditional is where assembly (`<<`, members named extrinsically) and partition (`group_by`, members named by an intrinsic value — 0.20.0) meet. For an assembled Collection, `as_detail(:assembly)` is the dimension-creating step: it promotes the member name into real data and **retains** it. From then on the structure is intrinsic and round-trips are exact; the promoted dimension is removed only by explicit contraction (`gt[-:assembly]`), never automatically.
|
|
1010
1010
|
|
|
1011
1011
|
#### `<<` and unnamed members
|
|
1012
1012
|
|
|
@@ -1038,11 +1038,36 @@ gt << front_suspension # re-materialises detail
|
|
|
1038
1038
|
gt.values(:weight) # => [200, 80, 150, 60, ...] (line items again)
|
|
1039
1039
|
```
|
|
1040
1040
|
|
|
1041
|
-
Freeze-gated memoisation is a 2.x optimisation — opt-in via `freeze`, transparent, and never changing this observable behaviour. `group_by` (0.
|
|
1041
|
+
Freeze-gated memoisation is a 2.x optimisation — opt-in via `freeze`, transparent, and never changing this observable behaviour. `group_by` (0.20.0) is the partition-side constructor for the same type: it splits a Namo into a `Collection`, the mirror of assembling one with `<<`.
|
|
1042
1042
|
|
|
1043
|
-
|
|
1043
|
+
#### Partitioning with `group_by`
|
|
1044
1044
|
|
|
1045
|
-
|
|
1045
|
+
`group_by(dimension)` is the partition-side constructor for a `Collection` — the mirror of assembling one with `<<`. It splits a Namo into one member per distinct value of the dimension, each a Namo holding that group's rows, named by its group value:
|
|
1046
|
+
|
|
1047
|
+
```ruby
|
|
1048
|
+
prices.group_by(:symbol)
|
|
1049
|
+
# => #<Namo::Collection members: [:BHP, :RIO, :CBA]>
|
|
1050
|
+
|
|
1051
|
+
prices.group_by(:symbol).summary(:close, reducer: :mean)
|
|
1052
|
+
# => Namo with {member:, close:} rows — mean close per symbol
|
|
1053
|
+
```
|
|
1054
|
+
|
|
1055
|
+
The grouping dimension is retained in every member — the split runs *along* the axis, it doesn't consume it — so the partition inverts exactly through `as_detail` on the same dimension:
|
|
1056
|
+
|
|
1057
|
+
```ruby
|
|
1058
|
+
prices.group_by(:symbol).as_detail(:symbol) == prices
|
|
1059
|
+
# => true
|
|
1060
|
+
```
|
|
1061
|
+
|
|
1062
|
+
Because data and derived dimensions are treated alike, you can group by a formula as readily as by a stored column. Grouping by a derived dimension materialises it first — the grouped-by formula becomes a stored value in each member and is dropped, while every other formula carries through live:
|
|
1063
|
+
|
|
1064
|
+
```ruby
|
|
1065
|
+
prices[:value_score] = proc{|r| r[:pe] < 10 ? 2 : r[:pe] < 15 ? 1 : 0}
|
|
1066
|
+
prices.group_by(:value_score)
|
|
1067
|
+
# => one member per score; :value_score is now a data column in each
|
|
1068
|
+
```
|
|
1069
|
+
|
|
1070
|
+
This gives a single inversion law over the whole namespace — `namo.group_by(d).as_detail(d) == namo[*namo.data_dimensions, d]` for any `d`, with the exact-original round-trip being the data-dimension instance of it. A nil-valued group produces a nil-named member, holding its rows and round-tripping like any other.
|
|
1046
1071
|
|
|
1047
1072
|
## Name
|
|
1048
1073
|
|
data/lib/Namo/Enumerable.rb
CHANGED
|
@@ -68,5 +68,14 @@ class Namo
|
|
|
68
68
|
self.class.new(non_matches, formulae: @formulae.dup),
|
|
69
69
|
]
|
|
70
70
|
end
|
|
71
|
+
|
|
72
|
+
def group_by(dimension)
|
|
73
|
+
collection = Collection.new
|
|
74
|
+
source = derived_dimensions.include?(dimension) ? self[*data_dimensions, dimension] : self
|
|
75
|
+
source.data.group_by{|row_data| row_data[dimension]}.each do |value, rows|
|
|
76
|
+
collection << self.class.new(rows, formulae: source.formulae.dup, name: value)
|
|
77
|
+
end
|
|
78
|
+
collection
|
|
79
|
+
end
|
|
71
80
|
end
|
|
72
81
|
end
|
data/lib/Namo/VERSION.rb
CHANGED
data/lib/namo.rb
CHANGED
|
@@ -164,7 +164,7 @@ class Namo
|
|
|
164
164
|
|
|
165
165
|
def ==(other)
|
|
166
166
|
return false unless other.is_a?(Namo)
|
|
167
|
-
|
|
167
|
+
row_multiset == other.row_multiset
|
|
168
168
|
end
|
|
169
169
|
|
|
170
170
|
def ===(other)
|
|
@@ -175,12 +175,12 @@ class Namo
|
|
|
175
175
|
|
|
176
176
|
def eql?(other)
|
|
177
177
|
self.class == other.class &&
|
|
178
|
-
|
|
178
|
+
row_multiset == other.row_multiset &&
|
|
179
179
|
@formulae.keys.sort == other.formulae.keys.sort
|
|
180
180
|
end
|
|
181
181
|
|
|
182
182
|
def hash
|
|
183
|
-
[self.class,
|
|
183
|
+
[self.class, row_multiset, @formulae.keys.sort].hash
|
|
184
184
|
end
|
|
185
185
|
|
|
186
186
|
def <(other)
|
|
@@ -217,13 +217,13 @@ class Namo
|
|
|
217
217
|
|
|
218
218
|
protected
|
|
219
219
|
|
|
220
|
-
def
|
|
221
|
-
@data.
|
|
220
|
+
def row_multiset
|
|
221
|
+
@data.tally
|
|
222
222
|
end
|
|
223
223
|
|
|
224
224
|
def subset_of_rows?(other)
|
|
225
|
-
self_counts =
|
|
226
|
-
other_counts = other.
|
|
225
|
+
self_counts = row_multiset
|
|
226
|
+
other_counts = other.row_multiset
|
|
227
227
|
self_counts.all?{|row, count| (other_counts[row] || 0) >= count}
|
|
228
228
|
end
|
|
229
229
|
|
data/test/namo_test.rb
CHANGED
|
@@ -2312,6 +2312,31 @@ describe Namo do
|
|
|
2312
2312
|
_(a == 'string').must_equal false
|
|
2313
2313
|
_(a == nil).must_equal false
|
|
2314
2314
|
end
|
|
2315
|
+
|
|
2316
|
+
it "compares rows whose dimension mixes nil and non-nil values" do
|
|
2317
|
+
a = Namo.new([{symbol: 'BHP', sector: 'Mining'}, {symbol: 'CBA', sector: nil}])
|
|
2318
|
+
b = Namo.new([{symbol: 'CBA', sector: nil}, {symbol: 'BHP', sector: 'Mining'}])
|
|
2319
|
+
_(a == b).must_equal true
|
|
2320
|
+
end
|
|
2321
|
+
|
|
2322
|
+
it "is false for differing rows when a dimension contains nil" do
|
|
2323
|
+
a = Namo.new([{symbol: 'BHP', sector: 'Mining'}, {symbol: 'CBA', sector: nil}])
|
|
2324
|
+
b = Namo.new([{symbol: 'BHP', sector: 'Mining'}, {symbol: 'CBA', sector: 'Banking'}])
|
|
2325
|
+
_(a == b).must_equal false
|
|
2326
|
+
end
|
|
2327
|
+
|
|
2328
|
+
it "ignores dimension (key) order within a row" do
|
|
2329
|
+
a = Namo.new([{a: 1, b: 2, c: 3}])
|
|
2330
|
+
b = Namo.new([{c: 3, b: 2, a: 1}])
|
|
2331
|
+
_(a == b).must_equal true
|
|
2332
|
+
end
|
|
2333
|
+
|
|
2334
|
+
it "is type-strict on values, consistent with the subset operators" do
|
|
2335
|
+
a = Namo.new([{x: 1}])
|
|
2336
|
+
b = Namo.new([{x: 1.0}])
|
|
2337
|
+
_(a == b).must_equal false
|
|
2338
|
+
_(a <= b).must_equal false
|
|
2339
|
+
end
|
|
2315
2340
|
end
|
|
2316
2341
|
|
|
2317
2342
|
describe "#===" do
|
|
@@ -2548,4 +2573,135 @@ describe Namo do
|
|
|
2548
2573
|
_(sales.to_a).must_equal sample_data
|
|
2549
2574
|
end
|
|
2550
2575
|
end
|
|
2576
|
+
|
|
2577
|
+
describe "#group_by" do
|
|
2578
|
+
let(:price_data) do
|
|
2579
|
+
[
|
|
2580
|
+
{symbol: 'BHP', date: 1, close: 42.5, pe: 12.0},
|
|
2581
|
+
{symbol: 'BHP', date: 2, close: 43.0, pe: 12.0},
|
|
2582
|
+
{symbol: 'RIO', date: 1, close: 118.3, pe: 9.0},
|
|
2583
|
+
{symbol: 'CBA', date: 1, close: 100.0, pe: 22.0}
|
|
2584
|
+
]
|
|
2585
|
+
end
|
|
2586
|
+
let(:prices) { Namo.new(price_data) }
|
|
2587
|
+
|
|
2588
|
+
it "returns a Namo::Collection" do
|
|
2589
|
+
_(prices.group_by(:symbol)).must_be_kind_of Namo::Collection
|
|
2590
|
+
end
|
|
2591
|
+
|
|
2592
|
+
it "has one member per distinct value of the dimension" do
|
|
2593
|
+
collection = prices.group_by(:symbol)
|
|
2594
|
+
_(collection.members.length).must_equal 3
|
|
2595
|
+
_(collection.members.map(&:name)).must_equal ['BHP', 'RIO', 'CBA']
|
|
2596
|
+
end
|
|
2597
|
+
|
|
2598
|
+
it "gives each member exactly the rows matching its group value" do
|
|
2599
|
+
collection = prices.group_by(:symbol)
|
|
2600
|
+
_(collection.find('BHP').values(:date)).must_equal [1, 2]
|
|
2601
|
+
_(collection.find('RIO').values(:date)).must_equal [1]
|
|
2602
|
+
end
|
|
2603
|
+
|
|
2604
|
+
it "retains the grouping dimension in each member (it is not consumed)" do
|
|
2605
|
+
_(prices.group_by(:symbol).find('BHP').data_dimensions).must_include :symbol
|
|
2606
|
+
end
|
|
2607
|
+
|
|
2608
|
+
it "names each member by its group value, found via find" do
|
|
2609
|
+
_(prices.group_by(:symbol).find('CBA').values(:close)).must_equal [100.0]
|
|
2610
|
+
end
|
|
2611
|
+
|
|
2612
|
+
it "carries the parent's formulae into each member" do
|
|
2613
|
+
prices[:cheap] = proc{|r| r[:pe] < 15}
|
|
2614
|
+
_(prices.group_by(:symbol).find('BHP').values(:cheap)).must_equal [true, true]
|
|
2615
|
+
end
|
|
2616
|
+
|
|
2617
|
+
it "preserves the receiver's class in each member" do
|
|
2618
|
+
subclass = Class.new(Namo)
|
|
2619
|
+
collection = subclass.new(price_data).group_by(:symbol)
|
|
2620
|
+
_(collection.members.first).must_be_instance_of subclass
|
|
2621
|
+
end
|
|
2622
|
+
|
|
2623
|
+
it "does not mutate the receiver" do
|
|
2624
|
+
prices.group_by(:symbol)
|
|
2625
|
+
_(prices.data).must_equal price_data
|
|
2626
|
+
end
|
|
2627
|
+
|
|
2628
|
+
it "returns an empty Collection for an empty Namo" do
|
|
2629
|
+
collection = Namo.new.group_by(:symbol)
|
|
2630
|
+
_(collection).must_be_kind_of Namo::Collection
|
|
2631
|
+
_(collection.members).must_equal []
|
|
2632
|
+
end
|
|
2633
|
+
|
|
2634
|
+
context "the uniform inversion law" do
|
|
2635
|
+
it "round-trips exactly on a data dimension" do
|
|
2636
|
+
_(prices.group_by(:symbol).as_detail(:symbol)).must_equal prices
|
|
2637
|
+
end
|
|
2638
|
+
|
|
2639
|
+
it "round-trips against the materialised form on a formula dimension" do
|
|
2640
|
+
prices[:value_score] = proc{|r| r[:pe] < 10 ? 2 : r[:pe] < 15 ? 1 : 0}
|
|
2641
|
+
recovered = prices.group_by(:value_score).as_detail(:value_score)
|
|
2642
|
+
_(recovered).must_equal prices[*prices.data_dimensions, :value_score]
|
|
2643
|
+
end
|
|
2644
|
+
end
|
|
2645
|
+
|
|
2646
|
+
context "grouping by a formula" do
|
|
2647
|
+
before do
|
|
2648
|
+
prices[:value_score] = proc{|r| r[:pe] < 10 ? 2 : r[:pe] < 15 ? 1 : 0}
|
|
2649
|
+
end
|
|
2650
|
+
|
|
2651
|
+
it "partitions by the formula's value" do
|
|
2652
|
+
collection = prices.group_by(:value_score)
|
|
2653
|
+
_(collection.members.map(&:name).sort).must_equal [0, 1, 2]
|
|
2654
|
+
_(collection.find(1).values(:symbol)).must_equal ['BHP', 'BHP']
|
|
2655
|
+
end
|
|
2656
|
+
|
|
2657
|
+
it "materialises the grouped-by formula into a data column in each member" do
|
|
2658
|
+
member = prices.group_by(:value_score).find(1)
|
|
2659
|
+
_(member.data_dimensions).must_include :value_score
|
|
2660
|
+
_(member.derived_dimensions).wont_include :value_score
|
|
2661
|
+
end
|
|
2662
|
+
|
|
2663
|
+
it "alters only the grouped-by formula, carrying the rest through live" do
|
|
2664
|
+
prices[:tier] = proc{|r| r[:value_score] >= 1 ? 'good' : 'poor'}
|
|
2665
|
+
member = prices.group_by(:value_score).find(1)
|
|
2666
|
+
_(member.derived_dimensions).must_include :tier
|
|
2667
|
+
_(member.derived_dimensions).wont_include :value_score
|
|
2668
|
+
_(member.values(:tier)).must_equal ['good', 'good']
|
|
2669
|
+
end
|
|
2670
|
+
|
|
2671
|
+
it "raises for a parameterised formula it cannot materialise" do
|
|
2672
|
+
prices[:sma] = proc{|row, namo, field, period| 0}
|
|
2673
|
+
_(proc{prices.group_by(:sma)}).must_raise ArgumentError
|
|
2674
|
+
end
|
|
2675
|
+
end
|
|
2676
|
+
|
|
2677
|
+
context "a nil-valued grouping dimension" do
|
|
2678
|
+
let(:sector_data) do
|
|
2679
|
+
[
|
|
2680
|
+
{symbol: 'BHP', sector: 'Mining'},
|
|
2681
|
+
{symbol: 'RIO', sector: 'Mining'},
|
|
2682
|
+
{symbol: 'XYZ', sector: nil}
|
|
2683
|
+
]
|
|
2684
|
+
end
|
|
2685
|
+
let(:stocks) { Namo.new(sector_data) }
|
|
2686
|
+
|
|
2687
|
+
it "produces a nil-named member holding the nil-valued rows" do
|
|
2688
|
+
collection = stocks.group_by(:sector)
|
|
2689
|
+
nil_member = collection.members.find{|m| m.name.nil?}
|
|
2690
|
+
_(nil_member).wont_be_nil
|
|
2691
|
+
_(nil_member.values(:symbol)).must_equal ['XYZ']
|
|
2692
|
+
end
|
|
2693
|
+
|
|
2694
|
+
it "drops no rows and still round-trips" do
|
|
2695
|
+
_(stocks.group_by(:sector).as_detail(:sector)).must_equal stocks
|
|
2696
|
+
end
|
|
2697
|
+
end
|
|
2698
|
+
|
|
2699
|
+
context "consistency with assembly" do
|
|
2700
|
+
it "produces a Collection on which summary and detail behave as on an assembled one" do
|
|
2701
|
+
collection = prices.group_by(:symbol)
|
|
2702
|
+
_(collection.summary(:close, reducer: :sum).values(:member)).must_equal ['BHP', 'RIO', 'CBA']
|
|
2703
|
+
_(collection.detail.values(:close)).must_equal [42.5, 43.0, 118.3, 100.0]
|
|
2704
|
+
end
|
|
2705
|
+
end
|
|
2706
|
+
end
|
|
2551
2707
|
end
|