postgresql_cursor 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +24 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +41 -0
- data/README.md +185 -0
- data/Rakefile +17 -46
- data/lib/postgresql_cursor.rb +12 -180
- data/lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb +17 -0
- data/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb +64 -0
- data/lib/postgresql_cursor/active_record/sql_cursor.rb +92 -0
- data/lib/postgresql_cursor/cursor.rb +199 -0
- data/lib/postgresql_cursor/version.rb +3 -0
- data/postgresql_cursor.gemspec +22 -43
- data/test-app/Gemfile +14 -0
- data/test-app/Gemfile.lock +34 -0
- data/test-app/app.rb +30 -0
- data/test-app/run.sh +10 -0
- data/test/helper.rb +12 -15
- data/test/test_postgresql_cursor.rb +41 -16
- metadata +68 -12
- data/README.rdoc +0 -97
data/test-app/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# A sample Gemfile
|
2
|
+
source "https://rubygems.org"
|
3
|
+
|
4
|
+
gem 'activerecord', '~> 3.2.0'
|
5
|
+
#gem 'activerecord', '~> 4.0.0'
|
6
|
+
#gem 'activerecord', '~> 4.1.0'
|
7
|
+
#gem 'activerecord', '4.1.2.rc1'
|
8
|
+
|
9
|
+
# For testing against Edge Rails
|
10
|
+
#gem 'activerecord', github: 'rails/rails', branch: 'master'
|
11
|
+
#gem 'arel', github: 'rails/arel', branch: 'master'
|
12
|
+
|
13
|
+
gem 'pg'
|
14
|
+
gem 'postgresql_cursor', path:"#{ENV['HOME']}/src/postgresql_cursor"
|
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: /Users/allen/src/postgresql_cursor
|
3
|
+
specs:
|
4
|
+
postgresql_cursor (0.5.0)
|
5
|
+
activerecord (>= 3.2.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (3.2.18)
|
11
|
+
activesupport (= 3.2.18)
|
12
|
+
builder (~> 3.0.0)
|
13
|
+
activerecord (3.2.18)
|
14
|
+
activemodel (= 3.2.18)
|
15
|
+
activesupport (= 3.2.18)
|
16
|
+
arel (~> 3.0.2)
|
17
|
+
tzinfo (~> 0.3.29)
|
18
|
+
activesupport (3.2.18)
|
19
|
+
i18n (~> 0.6, >= 0.6.4)
|
20
|
+
multi_json (~> 1.0)
|
21
|
+
arel (3.0.3)
|
22
|
+
builder (3.0.4)
|
23
|
+
i18n (0.6.9)
|
24
|
+
multi_json (1.10.1)
|
25
|
+
pg (0.17.1)
|
26
|
+
tzinfo (0.3.39)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
activerecord (~> 3.2.0)
|
33
|
+
pg
|
34
|
+
postgresql_cursor!
|
data/test-app/app.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
################################################################################
|
3
|
+
# To run this "app", do a "rake setup" first.
|
4
|
+
# To work with this app, run the "rake console" task, which loads this file.
|
5
|
+
################################################################################
|
6
|
+
require 'rubygems'
|
7
|
+
require 'bundler/setup'
|
8
|
+
require 'pg'
|
9
|
+
require 'active_record'
|
10
|
+
require 'postgresql_cursor'
|
11
|
+
|
12
|
+
ActiveRecord::Base.establish_connection( adapter: 'postgresql',
|
13
|
+
database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
|
14
|
+
username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
|
15
|
+
|
16
|
+
class Product < ActiveRecord::Base
|
17
|
+
def self.generate(max=1_000)
|
18
|
+
Product.destroy_all
|
19
|
+
max.times do |i|
|
20
|
+
connection.execute("insert into products values (#{i})")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def tests
|
25
|
+
Product.where("id>0").each_row(block_size:100) { |r| p r["id"] } # Hash
|
26
|
+
Product.where("id>0").each_instance(block_size:100) { |r| p r.id } # Instance
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
#Product.generate
|
data/test-app/run.sh
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
#bundle install
|
3
|
+
if [ "$1" = "irb" ]; then
|
4
|
+
bundle exec irb -Ilib -r postgresql_cursor
|
5
|
+
elif [ "$1" = "setup" ]; then
|
6
|
+
createdb postgresql_cursor_test
|
7
|
+
echo "create table products ( id serial);" | psql postgresql_cursor_test
|
8
|
+
else
|
9
|
+
bundle exec ruby app.rb
|
10
|
+
fi
|
data/test/helper.rb
CHANGED
@@ -1,25 +1,22 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
1
3
|
require 'rubygems'
|
2
4
|
require 'minitest'
|
3
5
|
require 'active_record'
|
4
|
-
|
5
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
6
|
require 'postgresql_cursor'
|
8
7
|
|
9
|
-
ActiveRecord::Base.establish_connection
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
8
|
+
ActiveRecord::Base.establish_connection(adapter: 'postgresql',
|
9
|
+
database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
|
10
|
+
username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
|
11
|
+
|
12
|
+
class Product < ActiveRecord::Base
|
14
13
|
# create table records (id serial primary key);
|
15
|
-
def self.generate(max=
|
16
|
-
max.times do
|
17
|
-
connection.execute("insert into
|
14
|
+
def self.generate(max=1_000)
|
15
|
+
max.times do |i|
|
16
|
+
connection.execute("insert into products values (#{i+1})")
|
18
17
|
end
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
class MiniTest::Unit::TestCase
|
25
|
-
end
|
21
|
+
Product.destroy_all
|
22
|
+
Product.generate(1000)
|
@@ -1,57 +1,82 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
#
|
1
|
+
################################################################################
|
2
|
+
# Before running test, set up the test db & table with:
|
3
|
+
# rake setup
|
4
|
+
# or create the database manually if your environment doesn't permit
|
5
|
+
################################################################################
|
6
|
+
require_relative 'helper'
|
7
|
+
require 'minitest/autorun'
|
8
|
+
require 'minitest/pride'
|
4
9
|
|
5
10
|
class TestPostgresqlCursor < Minitest::Test
|
6
11
|
|
7
12
|
def test_each
|
8
|
-
c = PostgreSQLCursor.new("select * from
|
13
|
+
c = PostgreSQLCursor::Cursor.new("select * from products order by 1")
|
9
14
|
nn = 0
|
10
15
|
n = c.each { nn += 1}
|
11
16
|
assert_equal nn, n
|
12
17
|
end
|
13
18
|
|
14
19
|
def test_enumerables
|
15
|
-
assert_equal true, PostgreSQLCursor.new("select * from
|
16
|
-
assert_equal false, PostgreSQLCursor.new("select * from
|
20
|
+
assert_equal true, PostgreSQLCursor::Cursor.new("select * from products order by 1").any?
|
21
|
+
assert_equal false, PostgreSQLCursor::Cursor.new("select * from products where id<0").any?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_each_while_until
|
20
|
-
c = PostgreSQLCursor.new("select * from
|
25
|
+
c = PostgreSQLCursor::Cursor.new("select * from products order by 1", until:true)
|
21
26
|
n = c.each { |r| r[:id].to_i > 100 }
|
22
|
-
assert_equal
|
27
|
+
assert_equal 1000, n
|
23
28
|
|
24
|
-
c = PostgreSQLCursor.new("select * from
|
29
|
+
c = PostgreSQLCursor::Cursor.new("select * from products order by 1", while:true)
|
25
30
|
n = c.each { |r| r[:id].to_i < 100 }
|
26
|
-
assert_equal
|
31
|
+
assert_equal 1000, n
|
27
32
|
end
|
28
33
|
|
29
34
|
def test_relation
|
30
35
|
nn = 0
|
31
|
-
|
36
|
+
Product.where("id>0").each_row {|r| nn += 1 }
|
32
37
|
assert_equal 1000, nn
|
33
38
|
end
|
34
39
|
|
35
40
|
def test_activerecord
|
36
41
|
nn = 0
|
37
|
-
|
42
|
+
row = nil
|
43
|
+
Product.each_row_by_sql("select * from products") {|r| row = r; nn += 1 }
|
38
44
|
assert_equal 1000, nn
|
45
|
+
assert_equal Hash, row.class
|
39
46
|
|
40
47
|
nn = 0
|
41
|
-
row =
|
42
|
-
Model.each_instance_by_sql("select * from records") {|r| row = r; nn += 1 }
|
48
|
+
Product.each_instance_by_sql("select * from products") {|r| row = r; nn += 1 }
|
43
49
|
assert_equal 1000, nn
|
44
|
-
assert_equal
|
50
|
+
assert_equal Product, row.class
|
45
51
|
end
|
46
52
|
|
47
53
|
def test_exception
|
48
54
|
begin
|
49
|
-
|
55
|
+
Product.each_row_by_sql("select * from products") do |r|
|
50
56
|
raise "Oops"
|
51
57
|
end
|
52
58
|
rescue Exception => e
|
53
59
|
assert_equal e.message, 'Oops'
|
54
60
|
end
|
55
61
|
end
|
62
|
+
|
63
|
+
def test_cursor
|
64
|
+
cursor = Product.all.each_row
|
65
|
+
assert cursor.respond_to?(:each)
|
66
|
+
r = cursor.map { |row| row["id"] }
|
67
|
+
assert_equal 1000, r.size
|
68
|
+
cursor = Product.each_row_by_sql("select * from products")
|
69
|
+
assert cursor.respond_to?(:each)
|
70
|
+
r = cursor.map { |row| row["id"] }
|
71
|
+
assert_equal 1000, r.size
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_pluck
|
75
|
+
r = Product.pluck_rows(:id)
|
76
|
+
assert_equal 1000, r.size
|
77
|
+
r = Product.all.pluck_instances(:id)
|
78
|
+
assert_equal 1000, r.size
|
79
|
+
assert_equal Fixnum, r.first.class
|
80
|
+
end
|
56
81
|
|
57
82
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: postgresql_cursor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Allen Fair
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,9 +16,51 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 3.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.2.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
22
64
|
version_requirements: !ruby/object:Gem::Requirement
|
23
65
|
requirements:
|
24
66
|
- - ">="
|
@@ -27,25 +69,37 @@ dependencies:
|
|
27
69
|
description: PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter
|
28
70
|
for very large result sets. It provides a cursor open/fetch/close interface to access
|
29
71
|
data without loading all rows into memory, and instead loads the result rows in
|
30
|
-
"chunks" (default of
|
31
|
-
email:
|
72
|
+
"chunks" (default of 1_000 rows), buffers them, and returns the rows one at a time.
|
73
|
+
email:
|
74
|
+
- allen.fair@gmail.com
|
32
75
|
executables: []
|
33
76
|
extensions: []
|
34
|
-
extra_rdoc_files:
|
35
|
-
- LICENSE
|
36
|
-
- README.rdoc
|
77
|
+
extra_rdoc_files: []
|
37
78
|
files:
|
38
79
|
- ".document"
|
80
|
+
- ".gitignore"
|
81
|
+
- Gemfile
|
82
|
+
- Gemfile.lock
|
39
83
|
- LICENSE
|
40
|
-
- README.
|
84
|
+
- README.md
|
41
85
|
- Rakefile
|
42
86
|
- VERSION
|
43
87
|
- lib/postgresql_cursor.rb
|
88
|
+
- lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb
|
89
|
+
- lib/postgresql_cursor/active_record/relation/cursor_iterators.rb
|
90
|
+
- lib/postgresql_cursor/active_record/sql_cursor.rb
|
91
|
+
- lib/postgresql_cursor/cursor.rb
|
92
|
+
- lib/postgresql_cursor/version.rb
|
44
93
|
- postgresql_cursor.gemspec
|
94
|
+
- test-app/Gemfile
|
95
|
+
- test-app/Gemfile.lock
|
96
|
+
- test-app/app.rb
|
97
|
+
- test-app/run.sh
|
45
98
|
- test/helper.rb
|
46
99
|
- test/test_postgresql_cursor.rb
|
47
100
|
homepage: http://github.com/afair/postgresql_cursor
|
48
|
-
licenses:
|
101
|
+
licenses:
|
102
|
+
- MIT
|
49
103
|
metadata: {}
|
50
104
|
post_install_message:
|
51
105
|
rdoc_options: []
|
@@ -63,9 +117,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
117
|
version: '0'
|
64
118
|
requirements: []
|
65
119
|
rubyforge_project:
|
66
|
-
rubygems_version: 2.2.
|
120
|
+
rubygems_version: 2.2.2
|
67
121
|
signing_key:
|
68
122
|
specification_version: 4
|
69
123
|
summary: ActiveRecord PostgreSQL Adapter extension for using a cursor to return a
|
70
124
|
large result set
|
71
|
-
test_files:
|
125
|
+
test_files:
|
126
|
+
- test/helper.rb
|
127
|
+
- test/test_postgresql_cursor.rb
|
data/README.rdoc
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
= PostgreSQLCursor for handling large Result Sets
|
2
|
-
|
3
|
-
{<img src="https://badge.fury.io/rb/postgresql_cursor.svg" alt="Gem Version" />}[http://badge.fury.io/rb/postgresql_cursor]
|
4
|
-
|
5
|
-
PostgreSQLCursor extends ActiveRecord to allow for efficient processing of queries
|
6
|
-
returning a large number of rows.
|
7
|
-
|
8
|
-
Note: Rails/ActiveRecord 4.x support has been reworked in the v0.5 Branch. For AR < 4.x, use the gem 0.4.x series.
|
9
|
-
|
10
|
-
== Why use this?
|
11
|
-
|
12
|
-
ActiveRecord is designed and optimized for web performance. In a web transaction, only a "page" of
|
13
|
-
around 20 rows is returned to the user. When you do this
|
14
|
-
|
15
|
-
Model.find(:all, :conditions=>["id>0"]
|
16
|
-
|
17
|
-
The database returns all matching result set rows to ActiveRecord, which instantiates each row with
|
18
|
-
the data returned. This function returns an array of all these rows to the caller.
|
19
|
-
|
20
|
-
Asyncronous, Background, or Offline processing may require processing a large amount of data.
|
21
|
-
When there is a very large number of rows, this requires a lot more memory to hold the data. Ruby
|
22
|
-
does not return that memory after processing the array, and the causes your process to "bloat". If you
|
23
|
-
don't have enough memory, it will cause an exception.
|
24
|
-
|
25
|
-
== Enter find_each
|
26
|
-
|
27
|
-
To solve this problem, ActiveRecord gives us two alternative methods that work in "chunks" of your data:
|
28
|
-
|
29
|
-
Model.where("id>0").find_each { |model| model.process! }
|
30
|
-
|
31
|
-
Model.where("id>0").find_in_batches do |batch|
|
32
|
-
batch.each { |model| model.process! }
|
33
|
-
end
|
34
|
-
|
35
|
-
Optionally, you can specify a :batch_size option as the size of the "chunk", and defaults to 1000.
|
36
|
-
|
37
|
-
There are drawbacks with these methods:
|
38
|
-
|
39
|
-
* You cannot specify the order, it will be ordered by the primary key (usually id)
|
40
|
-
* The primary key must be numeric
|
41
|
-
* The query is rerun for each chunk (1000 rows), starting at the next id sequence.
|
42
|
-
* You cannot use overly complex queries as that will be rerun and incur more overhead.
|
43
|
-
|
44
|
-
== PostgreSQLCursor FTW!
|
45
|
-
|
46
|
-
PostgreSQLCursor was developed to take advantage of PostgreSQL's cursors. Cursors allow the program
|
47
|
-
to declare a cursor to run a given query returning "chunks" of rows to the application program while
|
48
|
-
retaining the position of the full result set in the database. This overcomes all the disadvantages
|
49
|
-
of using find_each and find_in_batches.
|
50
|
-
|
51
|
-
Also, with PostgreSQL, you have on option to have raw hashes of the row returned instead of the
|
52
|
-
instantiated models. An informal benchmark showed that returning instances is a factor of 4 times
|
53
|
-
slower than returning hashes. If you are can work with the data in this form, you will find better
|
54
|
-
performance.
|
55
|
-
|
56
|
-
With PostgreSQL, you can work with cursors as follows:
|
57
|
-
|
58
|
-
Model.where("id>0").each_row { |hash| Model.process(hash) }
|
59
|
-
|
60
|
-
Model.where("id>0").each_instance { |model| model.process! }
|
61
|
-
Model.where("id>0").each_instance(block_size:100000) { |model| model.process! }
|
62
|
-
|
63
|
-
Model.each_row_by_sql("select * from models") { |hash| Model.process(hash) }
|
64
|
-
|
65
|
-
Model.each_instance_by_sql("select * from models") { |model| model.process }
|
66
|
-
|
67
|
-
All these methods take an options hash to control things more:
|
68
|
-
|
69
|
-
block_size:n The number of rows to fetch from the database each time (default 1000)
|
70
|
-
while:value Continue looping as long as the block returns this value
|
71
|
-
until:value Continue looping until the block returns this value
|
72
|
-
connection:conn Use this connection instead of the current model connection
|
73
|
-
fraction:float A value to set for the cursor_tuple_fraction variable.
|
74
|
-
PostgreSQL uses 0.1 (optimize for 10% of result set)
|
75
|
-
This library uses 1.0 (Optimize for 100% of the result set)
|
76
|
-
Do not override this value unless you understand it.
|
77
|
-
|
78
|
-
==Authors
|
79
|
-
Allen Fair, allen.fair@gmail.com, http://github.com/afair
|
80
|
-
|
81
|
-
Thank you to:
|
82
|
-
* Iulian Dogariu, http://github.com/iulianu (Fixes)
|
83
|
-
* Julian Mehnle, julian@mehnle.net (Suggestions)
|
84
|
-
|
85
|
-
== Note on Patches/Pull Requests
|
86
|
-
|
87
|
-
* Fork the project.
|
88
|
-
* Make your feature addition or bug fix.
|
89
|
-
* Add tests for it. This is important so I don't break it in a
|
90
|
-
future version unintentionally.
|
91
|
-
* Commit, do not mess with rakefile, version, or history.
|
92
|
-
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
93
|
-
* Send me a pull request. Bonus points for topic branches.
|
94
|
-
|
95
|
-
== Copyright
|
96
|
-
|
97
|
-
Copyright (c) 2010 Allen Fair. See LICENSE for details.
|