tdigest 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/tdigest/tdigest.rb +28 -1
- data/lib/tdigest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13d323c00e8ecc6fc72db137c6160f28c23a8615
|
4
|
+
data.tar.gz: efad7cb62f79a512e4fc68dceca2ae5a55a693c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a948d7d63a22957a34e9e1cf71d4e6904a325b1b7e4cd93de12611332c41c62be932a7fd69a6f356122c7da033bc24abaa86db29db5b03a8d1dc79031f603e59
|
7
|
+
data.tar.gz: 6fb668b7bbe9f1885af98036843095f03d0a74034462c7fdd8fdde970c95b606be556074c41c042a1e02ce9c60bb29fddd0177d4eefdc6da5a32bac857e391ff
|
data/lib/tdigest/tdigest.rb
CHANGED
@@ -16,8 +16,19 @@ module TDigest
|
|
16
16
|
reset!
|
17
17
|
end
|
18
18
|
|
19
|
+
def +(other)
|
20
|
+
# Uses delta, k and cx from the caller
|
21
|
+
t = self.class.new(@delta, @k, @cx)
|
22
|
+
data = self.centroids.values + other.centroids.values
|
23
|
+
while data.length > 0
|
24
|
+
t.push_centroid(data.delete_at(rand(data.length)))
|
25
|
+
end
|
26
|
+
t
|
27
|
+
end
|
28
|
+
|
19
29
|
def as_bytes
|
20
30
|
# compression as defined by Java implementation
|
31
|
+
size = @centroids.size
|
21
32
|
output = [VERBOSE_ENCODING, compression, size]
|
22
33
|
output += @centroids.map { |_, c| c.mean }
|
23
34
|
output += @centroids.map { |_, c| c.n }
|
@@ -25,6 +36,7 @@ module TDigest
|
|
25
36
|
end
|
26
37
|
|
27
38
|
def as_small_bytes
|
39
|
+
size = @centroids.size
|
28
40
|
output = [SMALL_ENCODING, compression, size]
|
29
41
|
x = 0
|
30
42
|
# delta encoding allows saving 4-bytes floats
|
@@ -115,6 +127,13 @@ module TDigest
|
|
115
127
|
end
|
116
128
|
end
|
117
129
|
|
130
|
+
def merge!(other)
|
131
|
+
# Uses delta, k and cx from the caller
|
132
|
+
t = self + other
|
133
|
+
@centroids = t.centroids
|
134
|
+
compress!
|
135
|
+
end
|
136
|
+
|
118
137
|
def p_rank(x)
|
119
138
|
is_array = x.is_a? Array
|
120
139
|
x = [x] unless is_array
|
@@ -188,7 +207,7 @@ module TDigest
|
|
188
207
|
end
|
189
208
|
|
190
209
|
def size
|
191
|
-
@
|
210
|
+
@n || 0
|
192
211
|
end
|
193
212
|
|
194
213
|
def to_a
|
@@ -310,6 +329,14 @@ module TDigest
|
|
310
329
|
|
311
330
|
_cumulate(false)
|
312
331
|
|
332
|
+
# If the number of centroids has grown to a very large size,
|
333
|
+
# it may be due to values being inserted in sorted order.
|
334
|
+
# We combat that by replaying the centroids in random order,
|
335
|
+
# which is what compress! does
|
336
|
+
if @centroids.size > (@k / @delta)
|
337
|
+
compress!
|
338
|
+
end
|
339
|
+
|
313
340
|
nil
|
314
341
|
end
|
315
342
|
|
data/lib/tdigest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tdigest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sebastian Wallin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbtree
|