pg_histogram 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1fd000101bac4dc9af44882ab5dd641b3062430
4
+ data.tar.gz: dc532f7711f65c11aa57706edb3ac6a17124f2e1
5
+ SHA512:
6
+ metadata.gz: dd6ef99ccd68ce5bf02bcf75c79a76bc95b61b91c0a44417cc4283ccd659b71af989b80f3e225bfd1b720695c79df0cbb8f00b045134a2bf01e91d7348af3c18
7
+ data.tar.gz: 096ae0c70711f89ca3a3f9d9a5dc0c7f4585a14248504244859a49e24f6e2ff2edb34017d4f21f4e1853e70d644eb793caa346b0a3a03e8f2875d1120e0e9b33
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_histogram.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 David Roberts
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # PostgreSQL Histogram (for ActiveRecord)
2
+
3
+ This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.
4
+
5
+ It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.
6
+
7
+
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'pg_histogram'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install pg_histogram
22
+
23
+ ## Usage
24
+
25
+ Create a Histogram object using the following three parameters:
26
+
27
+ 1. ActiveRecord query to use
28
+ 2. Name of column to count frequency of
29
+ 3. Bucket size (OPTIONAL - default is 0.5)
30
+
31
+ <!-- -->
32
+ histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
33
+
34
+
35
+ Call the results method to retrieve a Hash of bucket minimums and frequency counts
36
+
37
+ # create sample data
38
+ 5.times do { Widget.create(price: 1.2) }
39
+ 10.times do { Widget.create(price: 2.9 ) }
40
+
41
+ # get the results
42
+ @histogram_data = histogram.results
43
+ => {1.0=>5, 2.5=>10}
44
+
45
+
46
+ The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.
47
+
48
+ <%= column_chart @histogram_data %>
49
+
50
+ ## Dependencies
51
+
52
+ This gem has been tested with Ruby 2.1.3 and ActiveRecord 4.1.6. Please open an issue or PR if you experience issues with other versions.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.test_files = FileList['test/**/*_test.rb']
7
+ t.verbose = !!ENV['VERBOSE_TESTS']
8
+ t.warning = !!ENV['WARNINGS']
9
+ end
@@ -0,0 +1,86 @@
1
+ module PgHistogram
2
+ class Histogram
3
+ attr_reader :query, :column, :bucket_size
4
+
5
+ BUCKET_COL = 'bucket'
6
+ FREQUENCY_COL = 'frequency'
7
+ ROUND_METHODS_BY_DIRECTION = {
8
+ nil => :round,
9
+ down: :floor,
10
+ up: :ceil
11
+ }
12
+
13
+ # column_name name must be safe for SQL injection
14
+ def initialize(query, column_name, bucket_size = 0.5)
15
+ @query = query
16
+ @column = column_name.to_s
17
+ @bucket_size = bucket_size
18
+ end
19
+
20
+ # returns histogram as hash
21
+ # bucket minimum as a key
22
+ # frequency as value
23
+ def results
24
+ # error handling case
25
+ if max == min
26
+ { min => query.where("#{column} = ?", min).count }
27
+ else
28
+ labeled_histogram
29
+ end
30
+ end
31
+
32
+ def min
33
+ @min ||= round_to_increment(query.minimum(column), :down)
34
+ end
35
+
36
+ def max
37
+ @max ||= round_to_increment(query.maximum(column), :up)
38
+ end
39
+
40
+ private
41
+
42
+ def num_buckets
43
+ @buckets ||= ((max - min) / bucket_size).to_i
44
+ end
45
+
46
+ # returns the bucket label (minimum which can be in bucket) based on bucket #
47
+ def bucket_num_to_label(bucket_num)
48
+ min + bucket_size * (bucket_num - 1)
49
+ end
50
+
51
+ # rounds to the nearest bucket_size increment
52
+ # can optionally pass :up or :down to always round in one direction
53
+ def round_to_increment(num, direction = nil)
54
+ return 0 if num.nil?
55
+ round_method = ROUND_METHODS_BY_DIRECTION[direction]
56
+ denominator = 1 / bucket_size
57
+ (num * denominator).send(round_method) / denominator.to_f
58
+ end
59
+
60
+ # executes the query and converts bucket numbers to minimum step in bucket
61
+ def labeled_histogram
62
+ query_for_buckets.each_with_object({}) do |row, results|
63
+ results[bucket_num_to_label(row[BUCKET_COL].to_i)] = row[FREQUENCY_COL].to_i \
64
+ unless row[BUCKET_COL].nil?
65
+ end
66
+ end
67
+
68
+ def query_for_buckets
69
+ ActiveRecord::Base.connection.execute(
70
+ <<-SQL
71
+ SELECT width_bucket(#{column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
72
+ count(*) as #{FREQUENCY_COL}
73
+ FROM (#{subquery.to_sql}) as subq_results
74
+ GROUP BY #{BUCKET_COL}
75
+ ORDER BY #{BUCKET_COL}
76
+ SQL
77
+ )
78
+ end
79
+
80
+ # use passed AR query as a subquery to not interfere with group clause
81
+ def subquery
82
+ # override default order
83
+ query.select(column).order('1')
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,3 @@
1
+ module PgHistogram
2
+ VERSION = "0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require 'pg_histogram/version'
2
+ require 'pg_histogram/histogram'
3
+
4
+ module PgHistogram
5
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_histogram/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pg_histogram"
8
+ spec.version = PgHistogram::VERSION
9
+ spec.authors = ["David Roberts"]
10
+ spec.email = ["david.roberts@elocal.com"]
11
+ spec.description = %q{Creates a Histogram fron an ActiveRecord query}
12
+ spec.summary = %q{Histograms using PostgreSQL and ActiveRecord}
13
+ spec.homepage = "https://github.com/eLocal/pg_histogram"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "activerecord", "~> 4.0"
22
+ spec.add_dependency "pg", "~> 0.1"
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ end
data/test/database.yml ADDED
@@ -0,0 +1,6 @@
1
+ test:
2
+ adapter: postgresql
3
+ database: histogram_test
4
+ username: histogram
5
+ password: 5q3KwfCR
6
+ host: localhost
@@ -0,0 +1,75 @@
1
+ require_relative 'test_helper'
2
+
3
+ class HistogramTest < Minitest::Test
4
+
5
+ def setup
6
+ Widget.delete_all
7
+ @hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
8
+ end
9
+
10
+ def test_with_1_result
11
+ Widget.create!(price: 2.00)
12
+
13
+ assert_equal 2.0, @hist.min, 'Minimum is the single price'
14
+ assert_equal 2.0, @hist.max, 'Minimum is the single price'
15
+ assert_equal 1, @hist.results[2.0], 'Frequency of 2.0 bucket'
16
+ end
17
+
18
+ def test_ignores_nils_with_1_result
19
+ Widget.create!(price: 3.00)
20
+ Widget.create!(price: nil)
21
+
22
+ results = @hist.results
23
+ assert_equal 3.0, @hist.min, 'Minimum is the single price'
24
+ assert_equal 3.0, @hist.max, 'Minimum is the single price'
25
+ assert_equal 1, results.count, 'Histogram bucket count'
26
+ assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
27
+ end
28
+
29
+ def test_ignores_nils_with_multiple_results
30
+ Widget.create!(price: 3.00)
31
+ Widget.create!(price: 2.25)
32
+ Widget.create!(price: nil)
33
+
34
+ results = @hist.results
35
+ assert_equal 2.0, @hist.min, 'Minimum'
36
+ assert_equal 3.0, @hist.max, 'Maximum'
37
+ assert_equal 2, results.count, 'Histogram bucket count'
38
+ assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
39
+ assert_equal 1, results[2.0], 'Frequency of 2.0 bucket'
40
+ end
41
+
42
+ def test_with_many_results
43
+ # use a different bucket size
44
+ hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.25)
45
+
46
+ 10.times { Widget.create!(price: 3.0) }
47
+ 8.times { Widget.create!(price: 5.76) }
48
+ min_price = Widget.create!(price: 0.98).price
49
+ max_price = Widget.create!(price: 6.0).price
50
+ results = hist.results
51
+
52
+ assert_equal 0.75, hist.min, 'Histogram minimum price'
53
+ assert_equal 6.0, hist.max, 'Histogram maximum price'
54
+ assert_equal 21, hist.send(:num_buckets), 'Histogram buckets'
55
+ assert_equal 4, results.size, 'Histogram buckets with results'
56
+ assert_equal 1, results[0.75], 'Frequency of 0.75 bucket'
57
+ assert_equal 10, results[3.0], 'Frequency of 3.0 bucket'
58
+ assert_equal 8, results[5.75], 'Frequency of 5.75 bucket'
59
+ assert_equal 1, results[6.0], 'Frequency of 6.0 bucket'
60
+ end
61
+
62
+ def test_rounding_to_bucket_size
63
+ hist = PgHistogram::Histogram.new(nil, nil, 0.25)
64
+
65
+ assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
66
+ assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
67
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5), '0.5 rounded to 0.25 interval'
68
+ assert_equal 0.25, hist.send(:round_to_increment, 0.478, :down), '0.478 rounded down to 0.25 interval'
69
+ assert_equal 1.0, hist.send(:round_to_increment, 1.1, :down), '1.1 rounded down to 0.25 interval'
70
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5, :down), '0.5 rounded down to 0.25 interval'
71
+ assert_equal 0.5, hist.send(:round_to_increment, 0.478, :up), '0.478 rounded up to 0.25 interval'
72
+ assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
73
+ assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
74
+ end
75
+ end
@@ -0,0 +1,27 @@
1
+ require 'minitest/autorun'
2
+ require 'minitest/spec'
3
+ require 'logger'
4
+ require 'active_record'
5
+ require 'yaml'
6
+ require 'pg_histogram'
7
+
8
+ config = YAML.load(File.read('test/database.yml'))
9
+ ActiveRecord::Base.establish_connection config['test']
10
+ ActiveRecord::Base.logger = Logger.new 'tmp/test.log'
11
+ ActiveRecord::Base.logger.level = Logger::DEBUG
12
+ ActiveRecord::Migration.verbose = false
13
+
14
+
15
+ # Set up the database that we require
16
+ ActiveRecord::Schema.define do
17
+ create_table :widgets, force: true do |t|
18
+ t.float :price
19
+ t.timestamps
20
+ end
21
+ end
22
+
23
+
24
+
25
+ class Widget < ActiveRecord::Base
26
+ end
27
+
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_histogram
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - David Roberts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Creates a Histogram fron an ActiveRecord query
70
+ email:
71
+ - david.roberts@elocal.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/pg_histogram.rb
83
+ - lib/pg_histogram/histogram.rb
84
+ - lib/pg_histogram/version.rb
85
+ - pg_histogram.gemspec
86
+ - test/database.yml
87
+ - test/histogram_test.rb
88
+ - test/test_helper.rb
89
+ homepage: https://github.com/eLocal/pg_histogram
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.2.2
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Histograms using PostgreSQL and ActiveRecord
113
+ test_files:
114
+ - test/database.yml
115
+ - test/histogram_test.rb
116
+ - test/test_helper.rb