pg_histogram 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +52 -0
- data/Rakefile +9 -0
- data/lib/pg_histogram/histogram.rb +86 -0
- data/lib/pg_histogram/version.rb +3 -0
- data/lib/pg_histogram.rb +5 -0
- data/pg_histogram.gemspec +25 -0
- data/test/database.yml +6 -0
- data/test/histogram_test.rb +75 -0
- data/test/test_helper.rb +27 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f1fd000101bac4dc9af44882ab5dd641b3062430
|
4
|
+
data.tar.gz: dc532f7711f65c11aa57706edb3ac6a17124f2e1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: dd6ef99ccd68ce5bf02bcf75c79a76bc95b61b91c0a44417cc4283ccd659b71af989b80f3e225bfd1b720695c79df0cbb8f00b045134a2bf01e91d7348af3c18
|
7
|
+
data.tar.gz: 096ae0c70711f89ca3a3f9d9a5dc0c7f4585a14248504244859a49e24f6e2ff2edb34017d4f21f4e1853e70d644eb793caa346b0a3a03e8f2875d1120e0e9b33
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.3
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 David Roberts
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# PostgreSQL Histogram (for ActiveRecord)
|
2
|
+
|
3
|
+
This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.
|
4
|
+
|
5
|
+
It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
gem 'pg_histogram'
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install pg_histogram
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
Create a Histogram object using the following three parameters:
|
26
|
+
|
27
|
+
1. ActiveRecord query to use
|
28
|
+
2. Name of column to count frequency of
|
29
|
+
3. Bucket size (OPTIONAL - default is 0.5)
|
30
|
+
|
31
|
+
<!-- -->
|
32
|
+
histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
33
|
+
|
34
|
+
|
35
|
+
Call the results method to retrieve a Hash of bucket minimums and frequency counts
|
36
|
+
|
37
|
+
# create sample data
|
38
|
+
5.times do { Widget.create(price: 1.2) }
|
39
|
+
10.times do { Widget.create(price: 2.9 ) }
|
40
|
+
|
41
|
+
# get the results
|
42
|
+
@histogram_data = histogram.results
|
43
|
+
=> {1.0=>5, 2.5=>10}
|
44
|
+
|
45
|
+
|
46
|
+
The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.
|
47
|
+
|
48
|
+
<%= column_chart @histogram_data %>
|
49
|
+
|
50
|
+
## Dependencies
|
51
|
+
|
52
|
+
This gem has been tested with Ruby 2.1.3 and ActiveRecord 4.1.6. Please open an issue or PR if you experience issues with other versions.
|
data/Rakefile
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
module PgHistogram
|
2
|
+
class Histogram
|
3
|
+
attr_reader :query, :column, :bucket_size
|
4
|
+
|
5
|
+
BUCKET_COL = 'bucket'
|
6
|
+
FREQUENCY_COL = 'frequency'
|
7
|
+
ROUND_METHODS_BY_DIRECTION = {
|
8
|
+
nil => :round,
|
9
|
+
down: :floor,
|
10
|
+
up: :ceil
|
11
|
+
}
|
12
|
+
|
13
|
+
# column_name name must be safe for SQL injection
|
14
|
+
def initialize(query, column_name, bucket_size = 0.5)
|
15
|
+
@query = query
|
16
|
+
@column = column_name.to_s
|
17
|
+
@bucket_size = bucket_size
|
18
|
+
end
|
19
|
+
|
20
|
+
# returns histogram as hash
|
21
|
+
# bucket minimum as a key
|
22
|
+
# frequency as value
|
23
|
+
def results
|
24
|
+
# error handling case
|
25
|
+
if max == min
|
26
|
+
{ min => query.where("#{column} = ?", min).count }
|
27
|
+
else
|
28
|
+
labeled_histogram
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def min
|
33
|
+
@min ||= round_to_increment(query.minimum(column), :down)
|
34
|
+
end
|
35
|
+
|
36
|
+
def max
|
37
|
+
@max ||= round_to_increment(query.maximum(column), :up)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def num_buckets
|
43
|
+
@buckets ||= ((max - min) / bucket_size).to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
# returns the bucket label (minimum which can be in bucket) based on bucket #
|
47
|
+
def bucket_num_to_label(bucket_num)
|
48
|
+
min + bucket_size * (bucket_num - 1)
|
49
|
+
end
|
50
|
+
|
51
|
+
# rounds to the nearest bucket_size increment
|
52
|
+
# can optionally pass :up or :down to always round in one direction
|
53
|
+
def round_to_increment(num, direction = nil)
|
54
|
+
return 0 if num.nil?
|
55
|
+
round_method = ROUND_METHODS_BY_DIRECTION[direction]
|
56
|
+
denominator = 1 / bucket_size
|
57
|
+
(num * denominator).send(round_method) / denominator.to_f
|
58
|
+
end
|
59
|
+
|
60
|
+
# executes the query and converts bucket numbers to minimum step in bucket
|
61
|
+
def labeled_histogram
|
62
|
+
query_for_buckets.each_with_object({}) do |row, results|
|
63
|
+
results[bucket_num_to_label(row[BUCKET_COL].to_i)] = row[FREQUENCY_COL].to_i \
|
64
|
+
unless row[BUCKET_COL].nil?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def query_for_buckets
|
69
|
+
ActiveRecord::Base.connection.execute(
|
70
|
+
<<-SQL
|
71
|
+
SELECT width_bucket(#{column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
|
72
|
+
count(*) as #{FREQUENCY_COL}
|
73
|
+
FROM (#{subquery.to_sql}) as subq_results
|
74
|
+
GROUP BY #{BUCKET_COL}
|
75
|
+
ORDER BY #{BUCKET_COL}
|
76
|
+
SQL
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
# use passed AR query as a subquery to not interfere with group clause
|
81
|
+
def subquery
|
82
|
+
# override default order
|
83
|
+
query.select(column).order('1')
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/pg_histogram.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'pg_histogram/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pg_histogram"
|
8
|
+
spec.version = PgHistogram::VERSION
|
9
|
+
spec.authors = ["David Roberts"]
|
10
|
+
spec.email = ["david.roberts@elocal.com"]
|
11
|
+
spec.description = %q{Creates a Histogram fron an ActiveRecord query}
|
12
|
+
spec.summary = %q{Histograms using PostgreSQL and ActiveRecord}
|
13
|
+
spec.homepage = "https://github.com/eLocal/pg_histogram"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "activerecord", "~> 4.0"
|
22
|
+
spec.add_dependency "pg", "~> 0.1"
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
end
|
data/test/database.yml
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
class HistogramTest < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
Widget.delete_all
|
7
|
+
@hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_with_1_result
|
11
|
+
Widget.create!(price: 2.00)
|
12
|
+
|
13
|
+
assert_equal 2.0, @hist.min, 'Minimum is the single price'
|
14
|
+
assert_equal 2.0, @hist.max, 'Minimum is the single price'
|
15
|
+
assert_equal 1, @hist.results[2.0], 'Frequency of 2.0 bucket'
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_ignores_nils_with_1_result
|
19
|
+
Widget.create!(price: 3.00)
|
20
|
+
Widget.create!(price: nil)
|
21
|
+
|
22
|
+
results = @hist.results
|
23
|
+
assert_equal 3.0, @hist.min, 'Minimum is the single price'
|
24
|
+
assert_equal 3.0, @hist.max, 'Minimum is the single price'
|
25
|
+
assert_equal 1, results.count, 'Histogram bucket count'
|
26
|
+
assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_ignores_nils_with_multiple_results
|
30
|
+
Widget.create!(price: 3.00)
|
31
|
+
Widget.create!(price: 2.25)
|
32
|
+
Widget.create!(price: nil)
|
33
|
+
|
34
|
+
results = @hist.results
|
35
|
+
assert_equal 2.0, @hist.min, 'Minimum'
|
36
|
+
assert_equal 3.0, @hist.max, 'Maximum'
|
37
|
+
assert_equal 2, results.count, 'Histogram bucket count'
|
38
|
+
assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
|
39
|
+
assert_equal 1, results[2.0], 'Frequency of 2.0 bucket'
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_with_many_results
|
43
|
+
# use a different bucket size
|
44
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.25)
|
45
|
+
|
46
|
+
10.times { Widget.create!(price: 3.0) }
|
47
|
+
8.times { Widget.create!(price: 5.76) }
|
48
|
+
min_price = Widget.create!(price: 0.98).price
|
49
|
+
max_price = Widget.create!(price: 6.0).price
|
50
|
+
results = hist.results
|
51
|
+
|
52
|
+
assert_equal 0.75, hist.min, 'Histogram minimum price'
|
53
|
+
assert_equal 6.0, hist.max, 'Histogram maximum price'
|
54
|
+
assert_equal 21, hist.send(:num_buckets), 'Histogram buckets'
|
55
|
+
assert_equal 4, results.size, 'Histogram buckets with results'
|
56
|
+
assert_equal 1, results[0.75], 'Frequency of 0.75 bucket'
|
57
|
+
assert_equal 10, results[3.0], 'Frequency of 3.0 bucket'
|
58
|
+
assert_equal 8, results[5.75], 'Frequency of 5.75 bucket'
|
59
|
+
assert_equal 1, results[6.0], 'Frequency of 6.0 bucket'
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_rounding_to_bucket_size
|
63
|
+
hist = PgHistogram::Histogram.new(nil, nil, 0.25)
|
64
|
+
|
65
|
+
assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
|
66
|
+
assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
|
67
|
+
assert_equal 0.5, hist.send(:round_to_increment, 0.5), '0.5 rounded to 0.25 interval'
|
68
|
+
assert_equal 0.25, hist.send(:round_to_increment, 0.478, :down), '0.478 rounded down to 0.25 interval'
|
69
|
+
assert_equal 1.0, hist.send(:round_to_increment, 1.1, :down), '1.1 rounded down to 0.25 interval'
|
70
|
+
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :down), '0.5 rounded down to 0.25 interval'
|
71
|
+
assert_equal 0.5, hist.send(:round_to_increment, 0.478, :up), '0.478 rounded up to 0.25 interval'
|
72
|
+
assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
|
73
|
+
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
|
74
|
+
end
|
75
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'minitest/spec'
|
3
|
+
require 'logger'
|
4
|
+
require 'active_record'
|
5
|
+
require 'yaml'
|
6
|
+
require 'pg_histogram'
|
7
|
+
|
8
|
+
config = YAML.load(File.read('test/database.yml'))
|
9
|
+
ActiveRecord::Base.establish_connection config['test']
|
10
|
+
ActiveRecord::Base.logger = Logger.new 'tmp/test.log'
|
11
|
+
ActiveRecord::Base.logger.level = Logger::DEBUG
|
12
|
+
ActiveRecord::Migration.verbose = false
|
13
|
+
|
14
|
+
|
15
|
+
# Set up the database that we require
|
16
|
+
ActiveRecord::Schema.define do
|
17
|
+
create_table :widgets, force: true do |t|
|
18
|
+
t.float :price
|
19
|
+
t.timestamps
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
class Widget < ActiveRecord::Base
|
26
|
+
end
|
27
|
+
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pg_histogram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Roberts
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '4.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Creates a Histogram fron an ActiveRecord query
|
70
|
+
email:
|
71
|
+
- david.roberts@elocal.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".ruby-version"
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- lib/pg_histogram.rb
|
83
|
+
- lib/pg_histogram/histogram.rb
|
84
|
+
- lib/pg_histogram/version.rb
|
85
|
+
- pg_histogram.gemspec
|
86
|
+
- test/database.yml
|
87
|
+
- test/histogram_test.rb
|
88
|
+
- test/test_helper.rb
|
89
|
+
homepage: https://github.com/eLocal/pg_histogram
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubyforge_project:
|
109
|
+
rubygems_version: 2.2.2
|
110
|
+
signing_key:
|
111
|
+
specification_version: 4
|
112
|
+
summary: Histograms using PostgreSQL and ActiveRecord
|
113
|
+
test_files:
|
114
|
+
- test/database.yml
|
115
|
+
- test/histogram_test.rb
|
116
|
+
- test/test_helper.rb
|