tdigest 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6b00ddf3a1a0b0a5989002fb3cf8fd51b352c6a
4
- data.tar.gz: 60d1e7f4b42e3e38300f2f9380f9a91077bbd02f
3
+ metadata.gz: 13d323c00e8ecc6fc72db137c6160f28c23a8615
4
+ data.tar.gz: efad7cb62f79a512e4fc68dceca2ae5a55a693c3
5
5
  SHA512:
6
- metadata.gz: d7154db5857ee2b184ff16c1f2afb7ea110be15cb1d4253e5287b0bf6e656232fccdffecf1f0a3ba6f52626039f0498b030a56cc5e71b51f0a6c82c60f15c7f2
7
- data.tar.gz: 7265929cc8f77b8973cd9d60240c38f3ae3ce29e9af6644e18041c7a56f72ec5980bccf1696cb232865d294853821ce31d8582e57bd26806f79785610b1aa3d9
6
+ metadata.gz: a948d7d63a22957a34e9e1cf71d4e6904a325b1b7e4cd93de12611332c41c62be932a7fd69a6f356122c7da033bc24abaa86db29db5b03a8d1dc79031f603e59
7
+ data.tar.gz: 6fb668b7bbe9f1885af98036843095f03d0a74034462c7fdd8fdde970c95b606be556074c41c042a1e02ce9c60bb29fddd0177d4eefdc6da5a32bac857e391ff
@@ -16,8 +16,19 @@ module TDigest
16
16
  reset!
17
17
  end
18
18
 
19
+ def +(other)
20
+ # Uses delta, k and cx from the caller
21
+ t = self.class.new(@delta, @k, @cx)
22
+ data = self.centroids.values + other.centroids.values
23
+ while data.length > 0
24
+ t.push_centroid(data.delete_at(rand(data.length)))
25
+ end
26
+ t
27
+ end
28
+
19
29
  def as_bytes
20
30
  # compression as defined by Java implementation
31
+ size = @centroids.size
21
32
  output = [VERBOSE_ENCODING, compression, size]
22
33
  output += @centroids.map { |_, c| c.mean }
23
34
  output += @centroids.map { |_, c| c.n }
@@ -25,6 +36,7 @@ module TDigest
25
36
  end
26
37
 
27
38
  def as_small_bytes
39
+ size = @centroids.size
28
40
  output = [SMALL_ENCODING, compression, size]
29
41
  x = 0
30
42
  # delta encoding allows saving 4-bytes floats
@@ -115,6 +127,13 @@ module TDigest
115
127
  end
116
128
  end
117
129
 
130
+ def merge!(other)
131
+ # Uses delta, k and cx from the caller
132
+ t = self + other
133
+ @centroids = t.centroids
134
+ compress!
135
+ end
136
+
118
137
  def p_rank(x)
119
138
  is_array = x.is_a? Array
120
139
  x = [x] unless is_array
@@ -188,7 +207,7 @@ module TDigest
188
207
  end
189
208
 
190
209
  def size
191
- @centroids.size
210
+ @n || 0
192
211
  end
193
212
 
194
213
  def to_a
@@ -310,6 +329,14 @@ module TDigest
310
329
 
311
330
  _cumulate(false)
312
331
 
332
+ # If the number of centroids has grown to a very large size,
333
+ # it may be due to values being inserted in sorted order.
334
+ # We combat that by replaying the centroids in random order,
335
+ # which is what compress! does
336
+ if @centroids.size > (@k / @delta)
337
+ compress!
338
+ end
339
+
313
340
  nil
314
341
  end
315
342
 
@@ -1,3 +1,3 @@
1
1
  module TDigest
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tdigest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sebastian Wallin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-09 00:00:00.000000000 Z
11
+ date: 2016-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbtree