pg_histogram 0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1fd000101bac4dc9af44882ab5dd641b3062430
4
+ data.tar.gz: dc532f7711f65c11aa57706edb3ac6a17124f2e1
5
+ SHA512:
6
+ metadata.gz: dd6ef99ccd68ce5bf02bcf75c79a76bc95b61b91c0a44417cc4283ccd659b71af989b80f3e225bfd1b720695c79df0cbb8f00b045134a2bf01e91d7348af3c18
7
+ data.tar.gz: 096ae0c70711f89ca3a3f9d9a5dc0c7f4585a14248504244859a49e24f6e2ff2edb34017d4f21f4e1853e70d644eb793caa346b0a3a03e8f2875d1120e0e9b33
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_histogram.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 David Roberts
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # PostgreSQL Histogram (for ActiveRecord)
2
+
3
+ This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.
4
+
5
+ It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.
6
+
7
+
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'pg_histogram'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install pg_histogram
22
+
23
+ ## Usage
24
+
25
+ Create a Histogram object using the following three parameters:
26
+
27
+ 1. ActiveRecord query to use
28
+ 2. Name of column to count frequency of
29
+ 3. Bucket size (OPTIONAL - default is 0.5)
30
+
31
+ <!-- -->
32
+ histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
33
+
34
+
35
+ Call the results method to retrieve a Hash of bucket minimums and frequency counts
36
+
37
+ # create sample data
38
+ 5.times do { Widget.create(price: 1.2) }
39
+ 10.times do { Widget.create(price: 2.9 ) }
40
+
41
+ # get the results
42
+ @histogram_data = histogram.results
43
+ => {1.0=>5, 2.5=>10}
44
+
45
+
46
+ The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.
47
+
48
+ <%= column_chart @histogram_data %>
49
+
50
+ ## Dependencies
51
+
52
+ This gem has been tested with Ruby 2.1.3 and ActiveRecord 4.1.6. Please open an issue or PR if you experience issues with other versions.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.test_files = FileList['test/**/*_test.rb']
7
+ t.verbose = !!ENV['VERBOSE_TESTS']
8
+ t.warning = !!ENV['WARNINGS']
9
+ end
@@ -0,0 +1,86 @@
1
+ module PgHistogram
2
+ class Histogram
3
+ attr_reader :query, :column, :bucket_size
4
+
5
+ BUCKET_COL = 'bucket'
6
+ FREQUENCY_COL = 'frequency'
7
+ ROUND_METHODS_BY_DIRECTION = {
8
+ nil => :round,
9
+ down: :floor,
10
+ up: :ceil
11
+ }
12
+
13
+ # column_name name must be safe for SQL injection
14
+ def initialize(query, column_name, bucket_size = 0.5)
15
+ @query = query
16
+ @column = column_name.to_s
17
+ @bucket_size = bucket_size
18
+ end
19
+
20
+ # returns histogram as hash
21
+ # bucket minimum as a key
22
+ # frequency as value
23
+ def results
24
+ # error handling case
25
+ if max == min
26
+ { min => query.where("#{column} = ?", min).count }
27
+ else
28
+ labeled_histogram
29
+ end
30
+ end
31
+
32
+ def min
33
+ @min ||= round_to_increment(query.minimum(column), :down)
34
+ end
35
+
36
+ def max
37
+ @max ||= round_to_increment(query.maximum(column), :up)
38
+ end
39
+
40
+ private
41
+
42
+ def num_buckets
43
+ @buckets ||= ((max - min) / bucket_size).to_i
44
+ end
45
+
46
+ # returns the bucket label (minimum which can be in bucket) based on bucket #
47
+ def bucket_num_to_label(bucket_num)
48
+ min + bucket_size * (bucket_num - 1)
49
+ end
50
+
51
+ # rounds to the nearest bucket_size increment
52
+ # can optionally pass :up or :down to always round in one direction
53
+ def round_to_increment(num, direction = nil)
54
+ return 0 if num.nil?
55
+ round_method = ROUND_METHODS_BY_DIRECTION[direction]
56
+ denominator = 1 / bucket_size
57
+ (num * denominator).send(round_method) / denominator.to_f
58
+ end
59
+
60
+ # executes the query and converts bucket numbers to minimum step in bucket
61
+ def labeled_histogram
62
+ query_for_buckets.each_with_object({}) do |row, results|
63
+ results[bucket_num_to_label(row[BUCKET_COL].to_i)] = row[FREQUENCY_COL].to_i \
64
+ unless row[BUCKET_COL].nil?
65
+ end
66
+ end
67
+
68
+ def query_for_buckets
69
+ ActiveRecord::Base.connection.execute(
70
+ <<-SQL
71
+ SELECT width_bucket(#{column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
72
+ count(*) as #{FREQUENCY_COL}
73
+ FROM (#{subquery.to_sql}) as subq_results
74
+ GROUP BY #{BUCKET_COL}
75
+ ORDER BY #{BUCKET_COL}
76
+ SQL
77
+ )
78
+ end
79
+
80
+ # use passed AR query as a subquery to not interfere with group clause
81
+ def subquery
82
+ # override default order
83
+ query.select(column).order('1')
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,3 @@
1
+ module PgHistogram
2
+ VERSION = "0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require 'pg_histogram/version'
2
+ require 'pg_histogram/histogram'
3
+
4
+ module PgHistogram
5
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_histogram/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pg_histogram"
8
+ spec.version = PgHistogram::VERSION
9
+ spec.authors = ["David Roberts"]
10
+ spec.email = ["david.roberts@elocal.com"]
11
+ spec.description = %q{Creates a Histogram fron an ActiveRecord query}
12
+ spec.summary = %q{Histograms using PostgreSQL and ActiveRecord}
13
+ spec.homepage = "https://github.com/eLocal/pg_histogram"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "activerecord", "~> 4.0"
22
+ spec.add_dependency "pg", "~> 0.1"
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ end
data/test/database.yml ADDED
@@ -0,0 +1,6 @@
1
+ test:
2
+ adapter: postgresql
3
+ database: histogram_test
4
+ username: histogram
5
+ password: 5q3KwfCR
6
+ host: localhost
@@ -0,0 +1,75 @@
1
+ require_relative 'test_helper'
2
+
3
+ class HistogramTest < Minitest::Test
4
+
5
+ def setup
6
+ Widget.delete_all
7
+ @hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
8
+ end
9
+
10
+ def test_with_1_result
11
+ Widget.create!(price: 2.00)
12
+
13
+ assert_equal 2.0, @hist.min, 'Minimum is the single price'
14
+ assert_equal 2.0, @hist.max, 'Minimum is the single price'
15
+ assert_equal 1, @hist.results[2.0], 'Frequency of 2.0 bucket'
16
+ end
17
+
18
+ def test_ignores_nils_with_1_result
19
+ Widget.create!(price: 3.00)
20
+ Widget.create!(price: nil)
21
+
22
+ results = @hist.results
23
+ assert_equal 3.0, @hist.min, 'Minimum is the single price'
24
+ assert_equal 3.0, @hist.max, 'Minimum is the single price'
25
+ assert_equal 1, results.count, 'Histogram bucket count'
26
+ assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
27
+ end
28
+
29
+ def test_ignores_nils_with_multiple_results
30
+ Widget.create!(price: 3.00)
31
+ Widget.create!(price: 2.25)
32
+ Widget.create!(price: nil)
33
+
34
+ results = @hist.results
35
+ assert_equal 2.0, @hist.min, 'Minimum'
36
+ assert_equal 3.0, @hist.max, 'Maximum'
37
+ assert_equal 2, results.count, 'Histogram bucket count'
38
+ assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
39
+ assert_equal 1, results[2.0], 'Frequency of 2.0 bucket'
40
+ end
41
+
42
+ def test_with_many_results
43
+ # use a different bucket size
44
+ hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.25)
45
+
46
+ 10.times { Widget.create!(price: 3.0) }
47
+ 8.times { Widget.create!(price: 5.76) }
48
+ min_price = Widget.create!(price: 0.98).price
49
+ max_price = Widget.create!(price: 6.0).price
50
+ results = hist.results
51
+
52
+ assert_equal 0.75, hist.min, 'Histogram minimum price'
53
+ assert_equal 6.0, hist.max, 'Histogram maximum price'
54
+ assert_equal 21, hist.send(:num_buckets), 'Histogram buckets'
55
+ assert_equal 4, results.size, 'Histogram buckets with results'
56
+ assert_equal 1, results[0.75], 'Frequency of 0.75 bucket'
57
+ assert_equal 10, results[3.0], 'Frequency of 3.0 bucket'
58
+ assert_equal 8, results[5.75], 'Frequency of 5.75 bucket'
59
+ assert_equal 1, results[6.0], 'Frequency of 6.0 bucket'
60
+ end
61
+
62
+ def test_rounding_to_bucket_size
63
+ hist = PgHistogram::Histogram.new(nil, nil, 0.25)
64
+
65
+ assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
66
+ assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
67
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5), '0.5 rounded to 0.25 interval'
68
+ assert_equal 0.25, hist.send(:round_to_increment, 0.478, :down), '0.478 rounded down to 0.25 interval'
69
+ assert_equal 1.0, hist.send(:round_to_increment, 1.1, :down), '1.1 rounded down to 0.25 interval'
70
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5, :down), '0.5 rounded down to 0.25 interval'
71
+ assert_equal 0.5, hist.send(:round_to_increment, 0.478, :up), '0.478 rounded up to 0.25 interval'
72
+ assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
73
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
74
+ end
75
+ end
@@ -0,0 +1,27 @@
1
+ require 'minitest/autorun'
2
+ require 'minitest/spec'
3
+ require 'logger'
4
+ require 'active_record'
5
+ require 'yaml'
6
+ require 'pg_histogram'
7
+
8
+ config = YAML.load(File.read('test/database.yml'))
9
+ ActiveRecord::Base.establish_connection config['test']
10
+ ActiveRecord::Base.logger = Logger.new 'tmp/test.log'
11
+ ActiveRecord::Base.logger.level = Logger::DEBUG
12
+ ActiveRecord::Migration.verbose = false
13
+
14
+
15
+ # Set up the database that we require
16
+ ActiveRecord::Schema.define do
17
+ create_table :widgets, force: true do |t|
18
+ t.float :price
19
+ t.timestamps
20
+ end
21
+ end
22
+
23
+
24
+
25
+ class Widget < ActiveRecord::Base
26
+ end
27
+
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_histogram
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - David Roberts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Creates a Histogram fron an ActiveRecord query
70
+ email:
71
+ - david.roberts@elocal.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/pg_histogram.rb
83
+ - lib/pg_histogram/histogram.rb
84
+ - lib/pg_histogram/version.rb
85
+ - pg_histogram.gemspec
86
+ - test/database.yml
87
+ - test/histogram_test.rb
88
+ - test/test_helper.rb
89
+ homepage: https://github.com/eLocal/pg_histogram
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.2.2
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Histograms using PostgreSQL and ActiveRecord
113
+ test_files:
114
+ - test/database.yml
115
+ - test/histogram_test.rb
116
+ - test/test_helper.rb