tdigest 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6b00ddf3a1a0b0a5989002fb3cf8fd51b352c6a
4
- data.tar.gz: 60d1e7f4b42e3e38300f2f9380f9a91077bbd02f
3
+ metadata.gz: 13d323c00e8ecc6fc72db137c6160f28c23a8615
4
+ data.tar.gz: efad7cb62f79a512e4fc68dceca2ae5a55a693c3
5
5
  SHA512:
6
- metadata.gz: d7154db5857ee2b184ff16c1f2afb7ea110be15cb1d4253e5287b0bf6e656232fccdffecf1f0a3ba6f52626039f0498b030a56cc5e71b51f0a6c82c60f15c7f2
7
- data.tar.gz: 7265929cc8f77b8973cd9d60240c38f3ae3ce29e9af6644e18041c7a56f72ec5980bccf1696cb232865d294853821ce31d8582e57bd26806f79785610b1aa3d9
6
+ metadata.gz: a948d7d63a22957a34e9e1cf71d4e6904a325b1b7e4cd93de12611332c41c62be932a7fd69a6f356122c7da033bc24abaa86db29db5b03a8d1dc79031f603e59
7
+ data.tar.gz: 6fb668b7bbe9f1885af98036843095f03d0a74034462c7fdd8fdde970c95b606be556074c41c042a1e02ce9c60bb29fddd0177d4eefdc6da5a32bac857e391ff
@@ -16,8 +16,19 @@ module TDigest
16
16
  reset!
17
17
  end
18
18
 
19
+ def +(other)
20
+ # Uses delta, k and cx from the caller
21
+ t = self.class.new(@delta, @k, @cx)
22
+ data = self.centroids.values + other.centroids.values
23
+ while data.length > 0
24
+ t.push_centroid(data.delete_at(rand(data.length)))
25
+ end
26
+ t
27
+ end
28
+
19
29
  def as_bytes
20
30
  # compression as defined by Java implementation
31
+ size = @centroids.size
21
32
  output = [VERBOSE_ENCODING, compression, size]
22
33
  output += @centroids.map { |_, c| c.mean }
23
34
  output += @centroids.map { |_, c| c.n }
@@ -25,6 +36,7 @@ module TDigest
25
36
  end
26
37
 
27
38
  def as_small_bytes
39
+ size = @centroids.size
28
40
  output = [SMALL_ENCODING, compression, size]
29
41
  x = 0
30
42
  # delta encoding allows saving 4-bytes floats
@@ -115,6 +127,13 @@ module TDigest
115
127
  end
116
128
  end
117
129
 
130
+ def merge!(other)
131
+ # Uses delta, k and cx from the caller
132
+ t = self + other
133
+ @centroids = t.centroids
134
+ compress!
135
+ end
136
+
118
137
  def p_rank(x)
119
138
  is_array = x.is_a? Array
120
139
  x = [x] unless is_array
@@ -188,7 +207,7 @@ module TDigest
188
207
  end
189
208
 
190
209
  def size
191
- @centroids.size
210
+ @n || 0
192
211
  end
193
212
 
194
213
  def to_a
@@ -310,6 +329,14 @@ module TDigest
310
329
 
311
330
  _cumulate(false)
312
331
 
332
+ # If the number of centroids has grown to a very large size,
333
+ # it may be due to values being inserted in sorted order.
334
+ # We combat that by replaying the centroids in random order,
335
+ # which is what compress! does
336
+ if @centroids.size > (@k / @delta)
337
+ compress!
338
+ end
339
+
313
340
  nil
314
341
  end
315
342
 
@@ -1,3 +1,3 @@
1
1
  module TDigest
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tdigest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sebastian Wallin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-09 00:00:00.000000000 Z
11
+ date: 2016-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbtree