pg_histogram 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -16
- data/lib/pg_histogram/histogram.rb +59 -9
- data/lib/pg_histogram/version.rb +1 -1
- data/test/histogram_test.rb +50 -1
- metadata +2 -4
- data/test/database.yml +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fa7ea082187e3b4ded20bbcd8580ad3ff06f7ee
|
4
|
+
data.tar.gz: 510d7260c0cff0c67e2cccacbb4b788859b7a58f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b27b4ef2219e53ee2b407743419dd5d4756ba7ddc0b21a916e69aa5c05a709ff0bbdf14424f8f6913078bd17fc812ec3af4692cbe635144595fcabe74c9ab4a1
|
7
|
+
data.tar.gz: 41fa94e1a691710b4b88d5f1656aea2fad5fca5e3a05d70caeb54c09fee4d2286c6416d0c61363c97821e7c8a1c0cdad6a57f2f7b3fe8f0c58005dd4babb3b40
|
data/README.md
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.
|
4
4
|
|
5
|
-
It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.
|
6
|
-
|
7
|
-
|
5
|
+
It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries (and only one query if min and max values are specified).
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
@@ -22,28 +20,31 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
Create a Histogram object using the following
|
26
|
-
|
27
|
-
1. ActiveRecord query to use
|
28
|
-
2. Name of column to count frequency of
|
29
|
-
3. Bucket size (OPTIONAL - default is 0.5)
|
30
|
-
|
31
|
-
<!-- -->
|
32
|
-
histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
23
|
+
Create a Histogram object using the following parameters:
|
33
24
|
|
25
|
+
1. ActiveRecord Relation (query) to use.
|
26
|
+
2. Name of column to count frequency of. Also allows for aliased queries such as `'price*discount as final_price'` to create histograms on expressions.
|
27
|
+
3. Options hash (optional). Not all combinations are allowed. For example, if `:buckets` is specified, `:min` and `:max` are required and `:bucket_size` is ignored, and calculated. If `:buckets` is not specified, the number of buckets depends on `:bucket_size`, and `:min` and `:max` are optional.
|
28
|
+
- `:buckets`: number of buckets (integer)
|
29
|
+
- `:min` and `:max`: See [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html)'s docs for exact meaning (defaults to the min and max values of the column).
|
30
|
+
- `:bucket_size`: Width of each bucket (defaults to 1).
|
34
31
|
|
35
|
-
|
32
|
+
### Example
|
33
|
+
Create sample data:
|
36
34
|
|
37
|
-
# create sample data
|
38
35
|
5.times do { Widget.create(price: 1.2) }
|
39
36
|
10.times do { Widget.create(price: 2.9 ) }
|
40
37
|
|
41
|
-
|
38
|
+
Create the histogram object:
|
39
|
+
|
40
|
+
histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
41
|
+
|
42
|
+
Call the results method to retrieve a Hash of bucket minimums and frequency counts:
|
43
|
+
|
42
44
|
@histogram_data = histogram.results
|
43
45
|
=> {1.0=>5, 2.5=>10}
|
44
46
|
|
45
|
-
|
46
|
-
The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.
|
47
|
+
The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data:
|
47
48
|
|
48
49
|
<%= column_chart @histogram_data %>
|
49
50
|
|
@@ -11,10 +11,23 @@ module PgHistogram
|
|
11
11
|
}
|
12
12
|
|
13
13
|
# column_name name must be safe for SQL injection
|
14
|
-
def initialize(query, column_name,
|
14
|
+
def initialize(query, column_name, options = {})
|
15
15
|
@query = query
|
16
16
|
@column = column_name.to_s
|
17
|
-
|
17
|
+
if options.is_a? Hash
|
18
|
+
if options[:buckets]
|
19
|
+
@min = options[:min] || 0
|
20
|
+
@max = options[:max]
|
21
|
+
@buckets = options[:buckets]
|
22
|
+
@bucket_size = calculate_bucket_size
|
23
|
+
else
|
24
|
+
@min = options[:min]
|
25
|
+
@max = options[:max]
|
26
|
+
@bucket_size = (options[:bucket_size] || 1).to_f
|
27
|
+
end
|
28
|
+
else
|
29
|
+
@bucket_size = options.to_f
|
30
|
+
end
|
18
31
|
end
|
19
32
|
|
20
33
|
# returns histogram as hash
|
@@ -23,22 +36,34 @@ module PgHistogram
|
|
23
36
|
def results
|
24
37
|
# error handling case
|
25
38
|
if max == min
|
26
|
-
{ min =>
|
39
|
+
{ min => subquery.where("#{pure_column} = ?", min).count }
|
27
40
|
else
|
28
41
|
labeled_histogram
|
29
42
|
end
|
30
43
|
end
|
31
44
|
|
32
45
|
def min
|
33
|
-
@min ||= round_to_increment(
|
46
|
+
@min ||= round_to_increment(source_min, :down)
|
34
47
|
end
|
35
48
|
|
36
49
|
def max
|
37
|
-
@max ||= round_to_increment(
|
50
|
+
@max ||= round_to_increment(source_max, :up)
|
38
51
|
end
|
39
52
|
|
40
53
|
private
|
41
54
|
|
55
|
+
def source_min
|
56
|
+
@source_min ||= subquery.minimum(pure_column(true))
|
57
|
+
end
|
58
|
+
|
59
|
+
def source_max
|
60
|
+
@source_max ||= subquery.maximum(pure_column(true))
|
61
|
+
end
|
62
|
+
|
63
|
+
def calculate_bucket_size
|
64
|
+
(source_max - source_min).to_f / @buckets
|
65
|
+
end
|
66
|
+
|
42
67
|
def num_buckets
|
43
68
|
@buckets ||= ((max - min) / bucket_size).to_i
|
44
69
|
end
|
@@ -68,19 +93,44 @@ module PgHistogram
|
|
68
93
|
def query_for_buckets
|
69
94
|
ActiveRecord::Base.connection.execute(
|
70
95
|
<<-SQL
|
71
|
-
SELECT width_bucket(#{
|
96
|
+
SELECT width_bucket(#{pure_column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
|
72
97
|
count(*) as #{FREQUENCY_COL}
|
73
|
-
FROM (#{
|
98
|
+
FROM (#{subquery_sql}) as subq_results
|
74
99
|
GROUP BY #{BUCKET_COL}
|
75
100
|
ORDER BY #{BUCKET_COL}
|
76
101
|
SQL
|
77
102
|
)
|
78
103
|
end
|
79
|
-
|
80
104
|
# use passed AR query as a subquery to not interfere with group clause
|
81
105
|
def subquery
|
82
106
|
# override default order
|
83
107
|
query.select(column).order('1')
|
84
108
|
end
|
109
|
+
|
110
|
+
# Use unprepared statement per https://github.com/rails/rails/issues/8743
|
111
|
+
def subquery_sql
|
112
|
+
ActiveRecord::Base.connection.unprepared_statement do
|
113
|
+
subquery.to_sql
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# In case the column has an alias, the pure column is just the aliased name
|
118
|
+
# If expression is true, only the expression (before the 'AS') is returned
|
119
|
+
def pure_column(expression = false)
|
120
|
+
index = column =~ / as /i
|
121
|
+
# If AS is present, split and keep either side
|
122
|
+
if index
|
123
|
+
if expression
|
124
|
+
# Keep left side
|
125
|
+
column[0..index]
|
126
|
+
else
|
127
|
+
# Keep right side
|
128
|
+
column[index + 4..-1]
|
129
|
+
end
|
130
|
+
else
|
131
|
+
# Column was already good.
|
132
|
+
column
|
133
|
+
end
|
134
|
+
end
|
85
135
|
end
|
86
|
-
end
|
136
|
+
end
|
data/lib/pg_histogram/version.rb
CHANGED
data/test/histogram_test.rb
CHANGED
@@ -60,7 +60,7 @@ class HistogramTest < Minitest::Test
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def test_rounding_to_bucket_size
|
63
|
-
hist = PgHistogram::Histogram.new(nil, nil, 0.25)
|
63
|
+
hist = PgHistogram::Histogram.new(nil, nil, bucket_size: 0.25)
|
64
64
|
|
65
65
|
assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
|
66
66
|
assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
|
@@ -72,4 +72,53 @@ class HistogramTest < Minitest::Test
|
|
72
72
|
assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
|
73
73
|
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
|
74
74
|
end
|
75
|
+
|
76
|
+
def test_with_buckets_option
|
77
|
+
# Specify number of buckets
|
78
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price', {buckets: 5, min: 0, max: 10})
|
79
|
+
|
80
|
+
10.times { Widget.create!(price: 3.0) }
|
81
|
+
8.times { Widget.create!(price: 5.76) }
|
82
|
+
results = hist.results
|
83
|
+
|
84
|
+
assert_equal 0, hist.min, 'Histogram minimum price'
|
85
|
+
assert_equal 10, hist.max, 'Histogram maximum price'
|
86
|
+
assert_equal 5, hist.send(:num_buckets), 'Histogram buckets'
|
87
|
+
assert_equal 2, results.size, 'Histogram buckets with results'
|
88
|
+
assert_equal 10, results[2.0], 'Frequency of 2.0 bucket'
|
89
|
+
assert_equal 8, results[4.0], 'Frequency of 4.0 bucket'
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_with_min_max_options
|
93
|
+
# Specify number of buckets
|
94
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price', {min: 0, max: 10})
|
95
|
+
|
96
|
+
10.times { Widget.create!(price: 3.0) }
|
97
|
+
8.times { Widget.create!(price: 5.76) }
|
98
|
+
min_price = Widget.create!(price: 0.98).price
|
99
|
+
max_price = Widget.create!(price: 6.0).price
|
100
|
+
results = hist.results
|
101
|
+
|
102
|
+
assert_equal 0, hist.min, 'Histogram minimum price'
|
103
|
+
assert_equal 10, hist.max, 'Histogram maximum price'
|
104
|
+
assert_equal 10, hist.send(:num_buckets), 'Histogram buckets'
|
105
|
+
assert_equal 4, results.size, 'Histogram buckets with results'
|
106
|
+
assert_equal 10, results[3.0], 'Frequency of 3 bucket'
|
107
|
+
assert_equal nil, results[4.0], 'Frequency of 4.0 bucket'
|
108
|
+
end
|
109
|
+
def test_with_aliased_expression
|
110
|
+
# Specify number of buckets
|
111
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price*2 as double_price')
|
112
|
+
|
113
|
+
5.times { Widget.create!(price: 3.0) }
|
114
|
+
6.times { Widget.create!(price: 6.0) }
|
115
|
+
results = hist.results
|
116
|
+
|
117
|
+
assert_equal 6, hist.min, 'Histogram minimum price'
|
118
|
+
assert_equal 12, hist.max, 'Histogram maximum price'
|
119
|
+
assert_equal 6, hist.send(:num_buckets), 'Histogram buckets'
|
120
|
+
assert_equal 2, results.size, 'Histogram buckets with results'
|
121
|
+
assert_equal 5, results[6.0], 'Frequency of 6.0 bucket'
|
122
|
+
assert_equal 6, results[12.0], 'Frequency of 12.0 bucket'
|
123
|
+
end
|
75
124
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Roberts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -83,7 +83,6 @@ files:
|
|
83
83
|
- lib/pg_histogram/histogram.rb
|
84
84
|
- lib/pg_histogram/version.rb
|
85
85
|
- pg_histogram.gemspec
|
86
|
-
- test/database.yml
|
87
86
|
- test/histogram_test.rb
|
88
87
|
- test/test_helper.rb
|
89
88
|
homepage: https://github.com/eLocal/pg_histogram
|
@@ -111,6 +110,5 @@ signing_key:
|
|
111
110
|
specification_version: 4
|
112
111
|
summary: Histograms using PostgreSQL and ActiveRecord
|
113
112
|
test_files:
|
114
|
-
- test/database.yml
|
115
113
|
- test/histogram_test.rb
|
116
114
|
- test/test_helper.rb
|