tdigest 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e55c5378cfd6bb3f02ff04c45e4a8303f2d983da
4
+ data.tar.gz: 860b7f13d3d16cef3fa4b351a5e44fb56d4c987a
5
+ SHA512:
6
+ metadata.gz: 1c4883e5bbcca571dc7215fa6b5884d03fce2850e06f0892653c6285eabfaa9af54a2d0c162a5dbdc7ba32c50314aa82c44ad98bf95391dedf0ad81fd8afbc8f
7
+ data.tar.gz: 7814344890f5b5d21012723f5291cb60ce0564f7792fb6e7ce0f7a9285097c636f90e0036d099ba79ac77b99346ad366f13176ee71aa6a5ad2160a5361c00949
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.3
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tdigest.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Sebastian Wallin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,48 @@
1
+ # Tdigest
2
+
3
+ Ruby implementation of Ted Dunning's [t-digest](https://github.com/tdunning/t-digest) data structure.
4
+
5
+ Inspired by the [Javascript implementation](https://github.com/welch/tdigest) by [Will Welch](https://github.com/welch)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'tdigest'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install tdigest
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ td = ::TDigest::TDigest.new
27
+ 1_000.times { td.push(rand) }
28
+ td.compress!
29
+
30
+ puts td.percentile(0.5)
31
+ puts td.p_rank(0.95)
32
+ ```
33
+
34
+ ## Development
35
+
36
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
37
+
38
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
39
+
40
+ ## Contributing
41
+
42
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tdigest. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
43
+
44
+
45
+ ## License
46
+
47
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
48
+
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "tdigest"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,6 @@
1
+ require "tdigest/version"
2
+ require "tdigest/tdigest"
3
+
4
+ module TDigest
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,10 @@
1
+ module TDigest
2
+ class Centroid
3
+ attr_accessor :mean, :n, :cumn, :mean_cumn
4
+ def initialize(params = {})
5
+ params.each do |p, value|
6
+ send("#{p}=", value)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,210 @@
1
+ require 'rbtree'
2
+ require 'tdigest/centroid'
3
+
4
+ module TDigest
5
+ class TDigest
6
+ def initialize(delta = 0.01, k = 25, cx = 1.1)
7
+ @delta = delta
8
+ @k = k
9
+ @cx = cx
10
+ @centroids = RBTree.new
11
+ @nreset = 0
12
+ reset!
13
+ end
14
+
15
+ def bound_mean(x)
16
+ upper = @centroids.upper_bound(x)
17
+ lower = @centroids.lower_bound(x)
18
+ [lower[1], upper[1]]
19
+ end
20
+
21
+ def bound_mean_cumn(cumn)
22
+ last_c = nil
23
+ bounds = []
24
+ matches = @centroids.each do |k, v|
25
+ if v.mean_cumn == cumn
26
+ bounds << v
27
+ break
28
+ elsif v.mean_cumn > cumn
29
+ bounds << last_c
30
+ bounds << v
31
+ break
32
+ else
33
+ last_c = v
34
+ end
35
+ end
36
+ lower = bounds[0]
37
+ upper = bounds[1]
38
+
39
+ [lower, upper]
40
+ end
41
+
42
+ def compress!
43
+ points = to_a
44
+ reset!
45
+ while points.length > 0
46
+ push_centroid(points.delete_at(rand(points.length)))
47
+ end
48
+ _cumulate(true)
49
+ nil
50
+ end
51
+
52
+ def find_nearest(x)
53
+ return nil if size == 0
54
+
55
+ ceil = @centroids.upper_bound(x)
56
+ floor = @centroids.lower_bound(x)
57
+
58
+ return floor[1] if ceil.nil?
59
+ return ceil[1] if floor.nil?
60
+
61
+ ceil_key = ceil[0]
62
+ floor_key = floor[0]
63
+
64
+ if (floor_key - x).abs < (ceil_key - x).abs
65
+ floor[1]
66
+ else
67
+ ceil[1]
68
+ end
69
+ end
70
+
71
+ def p_rank(x)
72
+ is_array = x.is_a? Array
73
+ x = [x] unless is_array
74
+ x.map! do |item|
75
+ if size == 0
76
+ nil
77
+ elsif item < @centroids.min[1].mean
78
+ 0.0
79
+ elsif item > @centroids.max[1].mean
80
+ 1.0
81
+ else
82
+ _cumulate(true)
83
+ bound = bound_mean(item)
84
+ lower, upper = bound
85
+ mean_cumn = lower.mean_cumn
86
+ if lower != upper
87
+ mean_cumn += (item - lower.mean) * (upper.mean_cumn - lower.mean_cumn) / (upper.mean - lower.mean)
88
+ end
89
+ mean_cumn / @n
90
+ end
91
+ end
92
+ is_array ? x : x.first
93
+ end
94
+
95
+ def percentile(p)
96
+ is_array = p.is_a? Array
97
+ p = [p] unless is_array
98
+ p.map! do |item|
99
+ if size == 0
100
+ nil
101
+ else
102
+ _cumulate(true)
103
+ h = @n * item
104
+ lower, upper = bound_mean_cumn(h)
105
+ if upper == lower || lower.nil? || upper.nil?
106
+ (lower || upper).mean
107
+ elsif h == lower.mean_cumn
108
+ lower.mean
109
+ else
110
+ upper.mean
111
+ end
112
+ end
113
+ end
114
+ is_array ? p : p.first
115
+ end
116
+
117
+ def push(x, n = 1)
118
+ x = [x] unless x.is_a? Array
119
+ x.each { |value| _digest(value, n) }
120
+ end
121
+
122
+ def push_centroid(c)
123
+ c = [c] unless c.is_a? Array
124
+ c.each { |centroid| _digest(centroid.mean, centroid.n) }
125
+ end
126
+
127
+ def reset!
128
+ @centroids.clear
129
+ @n = 0
130
+ @nreset += 1
131
+ @last_cumulate = 0
132
+ end
133
+
134
+ def size
135
+ @centroids.count
136
+ end
137
+
138
+ def to_a
139
+ @centroids.map { |_, c| c }
140
+ end
141
+
142
+
143
+ private
144
+
145
+
146
+ def _add_weight(nearest, x, n)
147
+ unless x == nearest.mean
148
+ nearest.mean += n * (x - nearest.mean) / (nearest.n + n)
149
+ end
150
+
151
+ nearest.cumn += n
152
+ nearest.mean_cumn += n / 2
153
+ nearest.n += n
154
+ @n += n
155
+
156
+ nil
157
+ end
158
+
159
+ def _cumulate(exact = false)
160
+ factor = @last_cumulate == 0 ? Float::INFINITY : (@n / @last_cumulate)
161
+ if @n == @last_cumulate
162
+ return
163
+ end
164
+
165
+ cumn = 0
166
+ @centroids.each do |_, c|
167
+ c.mean_cumn = cumn + c.n / 2
168
+ cumn = c.cumn = cumn + c.n
169
+ end
170
+ @n = @last_cumulate = cumn
171
+ nil
172
+ end
173
+
174
+ def _digest(x, n)
175
+ min = @centroids.min
176
+ max = @centroids.max
177
+ min = min.nil? ? nil : min[1]
178
+ max = max.nil? ? nil : max[1]
179
+ nearest = find_nearest(x)
180
+
181
+ if nearest && nearest.mean == x
182
+ _add_weight(nearest, x, n)
183
+ elsif nearest == min
184
+ _new_centroid(x, n, 0)
185
+ elsif nearest == max
186
+ _new_centroid(x, n, @n)
187
+ else
188
+ p = nearest.mean_cumn.to_f / @n
189
+ max_n = (4 * @n * @delta * p * (1 - p)).floor
190
+ if (max_n - nearest.n >= n)
191
+ _add_weight(nearest, x, n)
192
+ else
193
+ _new_centroid(x, n, nearest.cumn)
194
+ end
195
+ end
196
+
197
+ _cumulate(false)
198
+
199
+ nil
200
+ end
201
+
202
+ def _new_centroid(x, n, cumn)
203
+ c = Centroid.new({ mean: x, n: n, cumn: cumn })
204
+ @centroids[x] = c
205
+ @n += n
206
+ c
207
+ end
208
+ end
209
+ end
210
+
@@ -0,0 +1,3 @@
1
+ module TDigest
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tdigest/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tdigest"
8
+ spec.version = TDigest::VERSION
9
+ spec.authors = ["Sebastian Wallin"]
10
+ spec.email = ["sebastian.wallin@gmail.com"]
11
+
12
+ spec.summary = %q{Ruby implementation of Dunning's T-Digest for streaming quantile approximation}
13
+ spec.description = %q{Ruby implementation of Dunning's T-Digest for streaming quantile approximation}
14
+ spec.homepage = "https://github.com/castle/tdigest"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_runtime_dependency 'rbtree', '~> 0.4.2'
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.10"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "minitest"
27
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tdigest
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sebastian Wallin
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rbtree
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Ruby implementation of Dunning's T-Digest for streaming quantile approximation
70
+ email:
71
+ - sebastian.wallin@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - lib/tdigest.rb
85
+ - lib/tdigest/centroid.rb
86
+ - lib/tdigest/tdigest.rb
87
+ - lib/tdigest/version.rb
88
+ - tdigest.gemspec
89
+ homepage: https://github.com/castle/tdigest
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.4.5.1
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Ruby implementation of Dunning's T-Digest for streaming quantile approximation
113
+ test_files: []