tdigest 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tdigest/tdigest.rb +28 -1
- data/lib/tdigest/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 13d323c00e8ecc6fc72db137c6160f28c23a8615
         | 
| 4 | 
            +
              data.tar.gz: efad7cb62f79a512e4fc68dceca2ae5a55a693c3
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: a948d7d63a22957a34e9e1cf71d4e6904a325b1b7e4cd93de12611332c41c62be932a7fd69a6f356122c7da033bc24abaa86db29db5b03a8d1dc79031f603e59
         | 
| 7 | 
            +
              data.tar.gz: 6fb668b7bbe9f1885af98036843095f03d0a74034462c7fdd8fdde970c95b606be556074c41c042a1e02ce9c60bb29fddd0177d4eefdc6da5a32bac857e391ff
         | 
    
        data/lib/tdigest/tdigest.rb
    CHANGED
    
    | @@ -16,8 +16,19 @@ module TDigest | |
| 16 16 | 
             
                  reset!
         | 
| 17 17 | 
             
                end
         | 
| 18 18 |  | 
| 19 | 
            +
                def +(other)
         | 
| 20 | 
            +
                  # Uses delta, k and cx from the caller
         | 
| 21 | 
            +
                  t = self.class.new(@delta, @k, @cx)
         | 
| 22 | 
            +
                  data = self.centroids.values + other.centroids.values
         | 
| 23 | 
            +
                  while data.length > 0
         | 
| 24 | 
            +
                    t.push_centroid(data.delete_at(rand(data.length)))
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
                  t
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 19 29 | 
             
                def as_bytes
         | 
| 20 30 | 
             
                  # compression as defined by Java implementation
         | 
| 31 | 
            +
                  size = @centroids.size
         | 
| 21 32 | 
             
                  output = [VERBOSE_ENCODING, compression, size]
         | 
| 22 33 | 
             
                  output += @centroids.map { |_, c| c.mean }
         | 
| 23 34 | 
             
                  output += @centroids.map { |_, c| c.n }
         | 
| @@ -25,6 +36,7 @@ module TDigest | |
| 25 36 | 
             
                end
         | 
| 26 37 |  | 
| 27 38 | 
             
                def as_small_bytes
         | 
| 39 | 
            +
                  size = @centroids.size
         | 
| 28 40 | 
             
                  output = [SMALL_ENCODING, compression, size]
         | 
| 29 41 | 
             
                  x = 0
         | 
| 30 42 | 
             
                  # delta encoding allows saving 4-bytes floats
         | 
| @@ -115,6 +127,13 @@ module TDigest | |
| 115 127 | 
             
                  end
         | 
| 116 128 | 
             
                end
         | 
| 117 129 |  | 
| 130 | 
            +
                def merge!(other)
         | 
| 131 | 
            +
                  # Uses delta, k and cx from the caller
         | 
| 132 | 
            +
                  t = self + other
         | 
| 133 | 
            +
                  @centroids = t.centroids
         | 
| 134 | 
            +
                  compress!
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 118 137 | 
             
                def p_rank(x)
         | 
| 119 138 | 
             
                  is_array = x.is_a? Array
         | 
| 120 139 | 
             
                  x = [x] unless is_array
         | 
| @@ -188,7 +207,7 @@ module TDigest | |
| 188 207 | 
             
                end
         | 
| 189 208 |  | 
| 190 209 | 
             
                def size
         | 
| 191 | 
            -
                  @ | 
| 210 | 
            +
                  @n || 0
         | 
| 192 211 | 
             
                end
         | 
| 193 212 |  | 
| 194 213 | 
             
                def to_a
         | 
| @@ -310,6 +329,14 @@ module TDigest | |
| 310 329 |  | 
| 311 330 | 
             
                  _cumulate(false)
         | 
| 312 331 |  | 
| 332 | 
            +
                  # If the number of centroids has grown to a very large size,
         | 
| 333 | 
            +
                  # it may be due to values being inserted in sorted order.
         | 
| 334 | 
            +
                  # We combat that by replaying the centroids in random order,
         | 
| 335 | 
            +
                  # which is what compress! does
         | 
| 336 | 
            +
                  if @centroids.size > (@k / @delta)
         | 
| 337 | 
            +
                    compress!
         | 
| 338 | 
            +
                  end
         | 
| 339 | 
            +
             | 
| 313 340 | 
             
                  nil
         | 
| 314 341 | 
             
                end
         | 
| 315 342 |  | 
    
        data/lib/tdigest/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: tdigest
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0 | 
| 4 | 
            +
              version: 0.1.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Sebastian Wallin
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2016-01- | 
| 11 | 
            +
            date: 2016-01-19 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rbtree
         |