tdigest 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tdigest/tdigest.rb +28 -1
- data/lib/tdigest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13d323c00e8ecc6fc72db137c6160f28c23a8615
|
4
|
+
data.tar.gz: efad7cb62f79a512e4fc68dceca2ae5a55a693c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a948d7d63a22957a34e9e1cf71d4e6904a325b1b7e4cd93de12611332c41c62be932a7fd69a6f356122c7da033bc24abaa86db29db5b03a8d1dc79031f603e59
|
7
|
+
data.tar.gz: 6fb668b7bbe9f1885af98036843095f03d0a74034462c7fdd8fdde970c95b606be556074c41c042a1e02ce9c60bb29fddd0177d4eefdc6da5a32bac857e391ff
|
data/lib/tdigest/tdigest.rb
CHANGED
@@ -16,8 +16,19 @@ module TDigest
|
|
16
16
|
reset!
|
17
17
|
end
|
18
18
|
|
19
|
+
def +(other)
|
20
|
+
# Uses delta, k and cx from the caller
|
21
|
+
t = self.class.new(@delta, @k, @cx)
|
22
|
+
data = self.centroids.values + other.centroids.values
|
23
|
+
while data.length > 0
|
24
|
+
t.push_centroid(data.delete_at(rand(data.length)))
|
25
|
+
end
|
26
|
+
t
|
27
|
+
end
|
28
|
+
|
19
29
|
def as_bytes
|
20
30
|
# compression as defined by Java implementation
|
31
|
+
size = @centroids.size
|
21
32
|
output = [VERBOSE_ENCODING, compression, size]
|
22
33
|
output += @centroids.map { |_, c| c.mean }
|
23
34
|
output += @centroids.map { |_, c| c.n }
|
@@ -25,6 +36,7 @@ module TDigest
|
|
25
36
|
end
|
26
37
|
|
27
38
|
def as_small_bytes
|
39
|
+
size = @centroids.size
|
28
40
|
output = [SMALL_ENCODING, compression, size]
|
29
41
|
x = 0
|
30
42
|
# delta encoding allows saving 4-bytes floats
|
@@ -115,6 +127,13 @@ module TDigest
|
|
115
127
|
end
|
116
128
|
end
|
117
129
|
|
130
|
+
def merge!(other)
|
131
|
+
# Uses delta, k and cx from the caller
|
132
|
+
t = self + other
|
133
|
+
@centroids = t.centroids
|
134
|
+
compress!
|
135
|
+
end
|
136
|
+
|
118
137
|
def p_rank(x)
|
119
138
|
is_array = x.is_a? Array
|
120
139
|
x = [x] unless is_array
|
@@ -188,7 +207,7 @@ module TDigest
|
|
188
207
|
end
|
189
208
|
|
190
209
|
def size
|
191
|
-
@
|
210
|
+
@n || 0
|
192
211
|
end
|
193
212
|
|
194
213
|
def to_a
|
@@ -310,6 +329,14 @@ module TDigest
|
|
310
329
|
|
311
330
|
_cumulate(false)
|
312
331
|
|
332
|
+
# If the number of centroids has grown to a very large size,
|
333
|
+
# it may be due to values being inserted in sorted order.
|
334
|
+
# We combat that by replaying the centroids in random order,
|
335
|
+
# which is what compress! does
|
336
|
+
if @centroids.size > (@k / @delta)
|
337
|
+
compress!
|
338
|
+
end
|
339
|
+
|
313
340
|
nil
|
314
341
|
end
|
315
342
|
|
data/lib/tdigest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tdigest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sebastian Wallin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbtree
|