pg_histogram 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -16
- data/lib/pg_histogram/histogram.rb +59 -9
- data/lib/pg_histogram/version.rb +1 -1
- data/test/histogram_test.rb +50 -1
- metadata +2 -4
- data/test/database.yml +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fa7ea082187e3b4ded20bbcd8580ad3ff06f7ee
|
4
|
+
data.tar.gz: 510d7260c0cff0c67e2cccacbb4b788859b7a58f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b27b4ef2219e53ee2b407743419dd5d4756ba7ddc0b21a916e69aa5c05a709ff0bbdf14424f8f6913078bd17fc812ec3af4692cbe635144595fcabe74c9ab4a1
|
7
|
+
data.tar.gz: 41fa94e1a691710b4b88d5f1656aea2fad5fca5e3a05d70caeb54c09fee4d2286c6416d0c61363c97821e7c8a1c0cdad6a57f2f7b3fe8f0c58005dd4babb3b40
|
data/README.md
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.
|
4
4
|
|
5
|
-
It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.
|
6
|
-
|
7
|
-
|
5
|
+
It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries (and only one query if min and max values are specified).
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
@@ -22,28 +20,31 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
Create a Histogram object using the following
|
26
|
-
|
27
|
-
1. ActiveRecord query to use
|
28
|
-
2. Name of column to count frequency of
|
29
|
-
3. Bucket size (OPTIONAL - default is 0.5)
|
30
|
-
|
31
|
-
<!-- -->
|
32
|
-
histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
23
|
+
Create a Histogram object using the following parameters:
|
33
24
|
|
25
|
+
1. ActiveRecord Relation (query) to use.
|
26
|
+
2. Name of column to count frequency of. Also allows for aliased queries such as `'price*discount as final_price'` to create histograms on expressions.
|
27
|
+
3. Options hash (optional). Not all combinations are allowed. For example, if `:buckets` is specified, `:min` and `:max` are required and `:bucket_size` is ignored, and calculated. If `:buckets` is not specified, the number of buckets depends on `:bucket_size`, and `:min` and `:max` are optional.
|
28
|
+
- `:buckets`: number of buckets (integer)
|
29
|
+
- `:min` and `:max`: See [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html)'s docs for exact meaning (defaults to the min and max values of the column).
|
30
|
+
- `:bucket_size`: Width of each bucket (defaults to 1).
|
34
31
|
|
35
|
-
|
32
|
+
### Example
|
33
|
+
Create sample data:
|
36
34
|
|
37
|
-
# create sample data
|
38
35
|
5.times do { Widget.create(price: 1.2) }
|
39
36
|
10.times do { Widget.create(price: 2.9 ) }
|
40
37
|
|
41
|
-
|
38
|
+
Create the histogram object:
|
39
|
+
|
40
|
+
histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
|
41
|
+
|
42
|
+
Call the results method to retrieve a Hash of bucket minimums and frequency counts:
|
43
|
+
|
42
44
|
@histogram_data = histogram.results
|
43
45
|
=> {1.0=>5, 2.5=>10}
|
44
46
|
|
45
|
-
|
46
|
-
The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.
|
47
|
+
The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data:
|
47
48
|
|
48
49
|
<%= column_chart @histogram_data %>
|
49
50
|
|
@@ -11,10 +11,23 @@ module PgHistogram
|
|
11
11
|
}
|
12
12
|
|
13
13
|
# column_name name must be safe for SQL injection
|
14
|
-
def initialize(query, column_name,
|
14
|
+
def initialize(query, column_name, options = {})
|
15
15
|
@query = query
|
16
16
|
@column = column_name.to_s
|
17
|
-
|
17
|
+
if options.is_a? Hash
|
18
|
+
if options[:buckets]
|
19
|
+
@min = options[:min] || 0
|
20
|
+
@max = options[:max]
|
21
|
+
@buckets = options[:buckets]
|
22
|
+
@bucket_size = calculate_bucket_size
|
23
|
+
else
|
24
|
+
@min = options[:min]
|
25
|
+
@max = options[:max]
|
26
|
+
@bucket_size = (options[:bucket_size] || 1).to_f
|
27
|
+
end
|
28
|
+
else
|
29
|
+
@bucket_size = options.to_f
|
30
|
+
end
|
18
31
|
end
|
19
32
|
|
20
33
|
# returns histogram as hash
|
@@ -23,22 +36,34 @@ module PgHistogram
|
|
23
36
|
def results
|
24
37
|
# error handling case
|
25
38
|
if max == min
|
26
|
-
{ min =>
|
39
|
+
{ min => subquery.where("#{pure_column} = ?", min).count }
|
27
40
|
else
|
28
41
|
labeled_histogram
|
29
42
|
end
|
30
43
|
end
|
31
44
|
|
32
45
|
def min
|
33
|
-
@min ||= round_to_increment(
|
46
|
+
@min ||= round_to_increment(source_min, :down)
|
34
47
|
end
|
35
48
|
|
36
49
|
def max
|
37
|
-
@max ||= round_to_increment(
|
50
|
+
@max ||= round_to_increment(source_max, :up)
|
38
51
|
end
|
39
52
|
|
40
53
|
private
|
41
54
|
|
55
|
+
def source_min
|
56
|
+
@source_min ||= subquery.minimum(pure_column(true))
|
57
|
+
end
|
58
|
+
|
59
|
+
def source_max
|
60
|
+
@source_max ||= subquery.maximum(pure_column(true))
|
61
|
+
end
|
62
|
+
|
63
|
+
def calculate_bucket_size
|
64
|
+
(source_max - source_min).to_f / @buckets
|
65
|
+
end
|
66
|
+
|
42
67
|
def num_buckets
|
43
68
|
@buckets ||= ((max - min) / bucket_size).to_i
|
44
69
|
end
|
@@ -68,19 +93,44 @@ module PgHistogram
|
|
68
93
|
def query_for_buckets
|
69
94
|
ActiveRecord::Base.connection.execute(
|
70
95
|
<<-SQL
|
71
|
-
SELECT width_bucket(#{
|
96
|
+
SELECT width_bucket(#{pure_column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
|
72
97
|
count(*) as #{FREQUENCY_COL}
|
73
|
-
FROM (#{
|
98
|
+
FROM (#{subquery_sql}) as subq_results
|
74
99
|
GROUP BY #{BUCKET_COL}
|
75
100
|
ORDER BY #{BUCKET_COL}
|
76
101
|
SQL
|
77
102
|
)
|
78
103
|
end
|
79
|
-
|
80
104
|
# use passed AR query as a subquery to not interfere with group clause
|
81
105
|
def subquery
|
82
106
|
# override default order
|
83
107
|
query.select(column).order('1')
|
84
108
|
end
|
109
|
+
|
110
|
+
# Use unprepared statement per https://github.com/rails/rails/issues/8743
|
111
|
+
def subquery_sql
|
112
|
+
ActiveRecord::Base.connection.unprepared_statement do
|
113
|
+
subquery.to_sql
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# In case the column has an alias, the pure column is just the aliased name
|
118
|
+
# If expression is true, only the expression (before the 'AS') is returned
|
119
|
+
def pure_column(expression = false)
|
120
|
+
index = column =~ / as /i
|
121
|
+
# If AS is present, split and keep either side
|
122
|
+
if index
|
123
|
+
if expression
|
124
|
+
# Keep left side
|
125
|
+
column[0..index]
|
126
|
+
else
|
127
|
+
# Keep right side
|
128
|
+
column[index + 4..-1]
|
129
|
+
end
|
130
|
+
else
|
131
|
+
# Column was already good.
|
132
|
+
column
|
133
|
+
end
|
134
|
+
end
|
85
135
|
end
|
86
|
-
end
|
136
|
+
end
|
data/lib/pg_histogram/version.rb
CHANGED
data/test/histogram_test.rb
CHANGED
@@ -60,7 +60,7 @@ class HistogramTest < Minitest::Test
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def test_rounding_to_bucket_size
|
63
|
-
hist = PgHistogram::Histogram.new(nil, nil, 0.25)
|
63
|
+
hist = PgHistogram::Histogram.new(nil, nil, bucket_size: 0.25)
|
64
64
|
|
65
65
|
assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
|
66
66
|
assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
|
@@ -72,4 +72,53 @@ class HistogramTest < Minitest::Test
|
|
72
72
|
assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
|
73
73
|
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
|
74
74
|
end
|
75
|
+
|
76
|
+
def test_with_buckets_option
|
77
|
+
# Specify number of buckets
|
78
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price', {buckets: 5, min: 0, max: 10})
|
79
|
+
|
80
|
+
10.times { Widget.create!(price: 3.0) }
|
81
|
+
8.times { Widget.create!(price: 5.76) }
|
82
|
+
results = hist.results
|
83
|
+
|
84
|
+
assert_equal 0, hist.min, 'Histogram minimum price'
|
85
|
+
assert_equal 10, hist.max, 'Histogram maximum price'
|
86
|
+
assert_equal 5, hist.send(:num_buckets), 'Histogram buckets'
|
87
|
+
assert_equal 2, results.size, 'Histogram buckets with results'
|
88
|
+
assert_equal 10, results[2.0], 'Frequency of 2.0 bucket'
|
89
|
+
assert_equal 8, results[4.0], 'Frequency of 4.0 bucket'
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_with_min_max_options
|
93
|
+
# Specify number of buckets
|
94
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price', {min: 0, max: 10})
|
95
|
+
|
96
|
+
10.times { Widget.create!(price: 3.0) }
|
97
|
+
8.times { Widget.create!(price: 5.76) }
|
98
|
+
min_price = Widget.create!(price: 0.98).price
|
99
|
+
max_price = Widget.create!(price: 6.0).price
|
100
|
+
results = hist.results
|
101
|
+
|
102
|
+
assert_equal 0, hist.min, 'Histogram minimum price'
|
103
|
+
assert_equal 10, hist.max, 'Histogram maximum price'
|
104
|
+
assert_equal 10, hist.send(:num_buckets), 'Histogram buckets'
|
105
|
+
assert_equal 4, results.size, 'Histogram buckets with results'
|
106
|
+
assert_equal 10, results[3.0], 'Frequency of 3 bucket'
|
107
|
+
assert_equal nil, results[4.0], 'Frequency of 4.0 bucket'
|
108
|
+
end
|
109
|
+
def test_with_aliased_expression
|
110
|
+
# Specify number of buckets
|
111
|
+
hist = PgHistogram::Histogram.new(Widget.all, 'price*2 as double_price')
|
112
|
+
|
113
|
+
5.times { Widget.create!(price: 3.0) }
|
114
|
+
6.times { Widget.create!(price: 6.0) }
|
115
|
+
results = hist.results
|
116
|
+
|
117
|
+
assert_equal 6, hist.min, 'Histogram minimum price'
|
118
|
+
assert_equal 12, hist.max, 'Histogram maximum price'
|
119
|
+
assert_equal 6, hist.send(:num_buckets), 'Histogram buckets'
|
120
|
+
assert_equal 2, results.size, 'Histogram buckets with results'
|
121
|
+
assert_equal 5, results[6.0], 'Frequency of 6.0 bucket'
|
122
|
+
assert_equal 6, results[12.0], 'Frequency of 12.0 bucket'
|
123
|
+
end
|
75
124
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Roberts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -83,7 +83,6 @@ files:
|
|
83
83
|
- lib/pg_histogram/histogram.rb
|
84
84
|
- lib/pg_histogram/version.rb
|
85
85
|
- pg_histogram.gemspec
|
86
|
-
- test/database.yml
|
87
86
|
- test/histogram_test.rb
|
88
87
|
- test/test_helper.rb
|
89
88
|
homepage: https://github.com/eLocal/pg_histogram
|
@@ -111,6 +110,5 @@ signing_key:
|
|
111
110
|
specification_version: 4
|
112
111
|
summary: Histograms using PostgreSQL and ActiveRecord
|
113
112
|
test_files:
|
114
|
-
- test/database.yml
|
115
113
|
- test/histogram_test.rb
|
116
114
|
- test/test_helper.rb
|