postgresql_cursor 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ # A sample Gemfile
2
+ source "https://rubygems.org"
3
+
4
+ gem 'activerecord', '~> 3.2.0'
5
+ #gem 'activerecord', '~> 4.0.0'
6
+ #gem 'activerecord', '~> 4.1.0'
7
+ #gem 'activerecord', '4.1.2.rc1'
8
+
9
+ # For testing against Edge Rails
10
+ #gem 'activerecord', github: 'rails/rails', branch: 'master'
11
+ #gem 'arel', github: 'rails/arel', branch: 'master'
12
+
13
+ gem 'pg'
14
+ gem 'postgresql_cursor', path:"#{ENV['HOME']}/src/postgresql_cursor"
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: /Users/allen/src/postgresql_cursor
3
+ specs:
4
+ postgresql_cursor (0.5.0)
5
+ activerecord (>= 3.2.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activemodel (3.2.18)
11
+ activesupport (= 3.2.18)
12
+ builder (~> 3.0.0)
13
+ activerecord (3.2.18)
14
+ activemodel (= 3.2.18)
15
+ activesupport (= 3.2.18)
16
+ arel (~> 3.0.2)
17
+ tzinfo (~> 0.3.29)
18
+ activesupport (3.2.18)
19
+ i18n (~> 0.6, >= 0.6.4)
20
+ multi_json (~> 1.0)
21
+ arel (3.0.3)
22
+ builder (3.0.4)
23
+ i18n (0.6.9)
24
+ multi_json (1.10.1)
25
+ pg (0.17.1)
26
+ tzinfo (0.3.39)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ activerecord (~> 3.2.0)
33
+ pg
34
+ postgresql_cursor!
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ ################################################################################
3
+ # To run this "app", do a "rake setup" first.
4
+ # To work with this app, run the "rake console" task, which loads this file.
5
+ ################################################################################
6
+ require 'rubygems'
7
+ require 'bundler/setup'
8
+ require 'pg'
9
+ require 'active_record'
10
+ require 'postgresql_cursor'
11
+
12
+ ActiveRecord::Base.establish_connection( adapter: 'postgresql',
13
+ database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
14
+ username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
15
+
16
+ class Product < ActiveRecord::Base
17
+ def self.generate(max=1_000)
18
+ Product.destroy_all
19
+ max.times do |i|
20
+ connection.execute("insert into products values (#{i})")
21
+ end
22
+ end
23
+
24
+ def tests
25
+ Product.where("id>0").each_row(block_size:100) { |r| p r["id"] } # Hash
26
+ Product.where("id>0").each_instance(block_size:100) { |r| p r.id } # Instance
27
+ end
28
+ end
29
+
30
+ #Product.generate
@@ -0,0 +1,10 @@
1
+ #!/bin/sh
2
+ #bundle install
3
+ if [ "$1" = "irb" ]; then
4
+ bundle exec irb -Ilib -r postgresql_cursor
5
+ elif [ "$1" = "setup" ]; then
6
+ createdb postgresql_cursor_test
7
+ echo "create table products ( id serial);" | psql postgresql_cursor_test
8
+ else
9
+ bundle exec ruby app.rb
10
+ fi
@@ -1,25 +1,22 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
1
3
  require 'rubygems'
2
4
  require 'minitest'
3
5
  require 'active_record'
4
-
5
- $LOAD_PATH.unshift(File.dirname(__FILE__))
6
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
6
  require 'postgresql_cursor'
8
7
 
9
- ActiveRecord::Base.establish_connection :database=>'allen_test', :adapter=>'postgresql', :username=>'allen'
10
- class Model < ActiveRecord::Base
11
- #set_table_name "records"
12
- self.table_name = "records"
13
-
8
+ ActiveRecord::Base.establish_connection(adapter: 'postgresql',
9
+ database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
10
+ username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
11
+
12
+ class Product < ActiveRecord::Base
14
13
  # create table records (id serial primary key);
15
- def self.generate(max=1_000_000)
16
- max.times do
17
- connection.execute("insert into records values (nextval('records_id_seq'::regclass))")
14
+ def self.generate(max=1_000)
15
+ max.times do |i|
16
+ connection.execute("insert into products values (#{i+1})")
18
17
  end
19
18
  end
20
19
  end
21
20
 
22
- Model.generate(1000) if Model.count == 0
23
-
24
- class MiniTest::Unit::TestCase
25
- end
21
+ Product.destroy_all
22
+ Product.generate(1000)
@@ -1,57 +1,82 @@
1
- require 'helper'
2
- # database: allen_test
3
- # create table records ( id serial);
1
+ ################################################################################
2
+ # Before running test, set up the test db & table with:
3
+ # rake setup
4
+ # or create the database manually if your environment doesn't permit
5
+ ################################################################################
6
+ require_relative 'helper'
7
+ require 'minitest/autorun'
8
+ require 'minitest/pride'
4
9
 
5
10
  class TestPostgresqlCursor < Minitest::Test
6
11
 
7
12
  def test_each
8
- c = PostgreSQLCursor.new("select * from records order by 1")
13
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1")
9
14
  nn = 0
10
15
  n = c.each { nn += 1}
11
16
  assert_equal nn, n
12
17
  end
13
18
 
14
19
  def test_enumerables
15
- assert_equal true, PostgreSQLCursor.new("select * from records order by 1").any?
16
- assert_equal false, PostgreSQLCursor.new("select * from records where id<0").any?
20
+ assert_equal true, PostgreSQLCursor::Cursor.new("select * from products order by 1").any?
21
+ assert_equal false, PostgreSQLCursor::Cursor.new("select * from products where id<0").any?
17
22
  end
18
23
 
19
24
  def test_each_while_until
20
- c = PostgreSQLCursor.new("select * from records order by 1", until:true)
25
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1", until:true)
21
26
  n = c.each { |r| r[:id].to_i > 100 }
22
- assert_equal 101, n
27
+ assert_equal 1000, n
23
28
 
24
- c = PostgreSQLCursor.new("select * from records order by 1", while:true)
29
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1", while:true)
25
30
  n = c.each { |r| r[:id].to_i < 100 }
26
- assert_equal 100, n
31
+ assert_equal 1000, n
27
32
  end
28
33
 
29
34
  def test_relation
30
35
  nn = 0
31
- Model.where("id>0").each_row {|r| nn += 1 }
36
+ Product.where("id>0").each_row {|r| nn += 1 }
32
37
  assert_equal 1000, nn
33
38
  end
34
39
 
35
40
  def test_activerecord
36
41
  nn = 0
37
- Model.each_row_by_sql("select * from records") {|r| nn += 1 }
42
+ row = nil
43
+ Product.each_row_by_sql("select * from products") {|r| row = r; nn += 1 }
38
44
  assert_equal 1000, nn
45
+ assert_equal Hash, row.class
39
46
 
40
47
  nn = 0
41
- row = nil
42
- Model.each_instance_by_sql("select * from records") {|r| row = r; nn += 1 }
48
+ Product.each_instance_by_sql("select * from products") {|r| row = r; nn += 1 }
43
49
  assert_equal 1000, nn
44
- assert_equal Model, row.class
50
+ assert_equal Product, row.class
45
51
  end
46
52
 
47
53
  def test_exception
48
54
  begin
49
- Model.each_row_by_sql("select * from records") do |r|
55
+ Product.each_row_by_sql("select * from products") do |r|
50
56
  raise "Oops"
51
57
  end
52
58
  rescue Exception => e
53
59
  assert_equal e.message, 'Oops'
54
60
  end
55
61
  end
62
+
63
+ def test_cursor
64
+ cursor = Product.all.each_row
65
+ assert cursor.respond_to?(:each)
66
+ r = cursor.map { |row| row["id"] }
67
+ assert_equal 1000, r.size
68
+ cursor = Product.each_row_by_sql("select * from products")
69
+ assert cursor.respond_to?(:each)
70
+ r = cursor.map { |row| row["id"] }
71
+ assert_equal 1000, r.size
72
+ end
73
+
74
+ def test_pluck
75
+ r = Product.pluck_rows(:id)
76
+ assert_equal 1000, r.size
77
+ r = Product.all.pluck_instances(:id)
78
+ assert_equal 1000, r.size
79
+ assert_equal Fixnum, r.first.class
80
+ end
56
81
 
57
82
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgresql_cursor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Allen Fair
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-06 00:00:00.000000000 Z
11
+ date: 2014-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,9 +16,51 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 3.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
22
64
  version_requirements: !ruby/object:Gem::Requirement
23
65
  requirements:
24
66
  - - ">="
@@ -27,25 +69,37 @@ dependencies:
27
69
  description: PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter
28
70
  for very large result sets. It provides a cursor open/fetch/close interface to access
29
71
  data without loading all rows into memory, and instead loads the result rows in
30
- "chunks" (default of 10_000 rows), buffers them, and returns the rows one at a time.
31
- email: allen.fair@gmail.com
72
+ "chunks" (default of 1_000 rows), buffers them, and returns the rows one at a time.
73
+ email:
74
+ - allen.fair@gmail.com
32
75
  executables: []
33
76
  extensions: []
34
- extra_rdoc_files:
35
- - LICENSE
36
- - README.rdoc
77
+ extra_rdoc_files: []
37
78
  files:
38
79
  - ".document"
80
+ - ".gitignore"
81
+ - Gemfile
82
+ - Gemfile.lock
39
83
  - LICENSE
40
- - README.rdoc
84
+ - README.md
41
85
  - Rakefile
42
86
  - VERSION
43
87
  - lib/postgresql_cursor.rb
88
+ - lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb
89
+ - lib/postgresql_cursor/active_record/relation/cursor_iterators.rb
90
+ - lib/postgresql_cursor/active_record/sql_cursor.rb
91
+ - lib/postgresql_cursor/cursor.rb
92
+ - lib/postgresql_cursor/version.rb
44
93
  - postgresql_cursor.gemspec
94
+ - test-app/Gemfile
95
+ - test-app/Gemfile.lock
96
+ - test-app/app.rb
97
+ - test-app/run.sh
45
98
  - test/helper.rb
46
99
  - test/test_postgresql_cursor.rb
47
100
  homepage: http://github.com/afair/postgresql_cursor
48
- licenses: []
101
+ licenses:
102
+ - MIT
49
103
  metadata: {}
50
104
  post_install_message:
51
105
  rdoc_options: []
@@ -63,9 +117,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
117
  version: '0'
64
118
  requirements: []
65
119
  rubyforge_project:
66
- rubygems_version: 2.2.1
120
+ rubygems_version: 2.2.2
67
121
  signing_key:
68
122
  specification_version: 4
69
123
  summary: ActiveRecord PostgreSQL Adapter extension for using a cursor to return a
70
124
  large result set
71
- test_files: []
125
+ test_files:
126
+ - test/helper.rb
127
+ - test/test_postgresql_cursor.rb
@@ -1,97 +0,0 @@
1
- = PostgreSQLCursor for handling large Result Sets
2
-
3
- {<img src="https://badge.fury.io/rb/postgresql_cursor.svg" alt="Gem Version" />}[http://badge.fury.io/rb/postgresql_cursor]
4
-
5
- PostgreSQLCursor extends ActiveRecord to allow for efficient processing of queries
6
- returning a large number of rows.
7
-
8
- Note: Rails/ActiveRecord 4.x support has been reworked in the v0.5 Branch. For AR < 4.x, use the gem 0.4.x series.
9
-
10
- == Why use this?
11
-
12
- ActiveRecord is designed and optimized for web performance. In a web transaction, only a "page" of
13
- around 20 rows is returned to the user. When you do this
14
-
15
- Model.find(:all, :conditions=>["id>0"]
16
-
17
- The database returns all matching result set rows to ActiveRecord, which instantiates each row with
18
- the data returned. This function returns an array of all these rows to the caller.
19
-
20
- Asyncronous, Background, or Offline processing may require processing a large amount of data.
21
- When there is a very large number of rows, this requires a lot more memory to hold the data. Ruby
22
- does not return that memory after processing the array, and the causes your process to "bloat". If you
23
- don't have enough memory, it will cause an exception.
24
-
25
- == Enter find_each
26
-
27
- To solve this problem, ActiveRecord gives us two alternative methods that work in "chunks" of your data:
28
-
29
- Model.where("id>0").find_each { |model| model.process! }
30
-
31
- Model.where("id>0").find_in_batches do |batch|
32
- batch.each { |model| model.process! }
33
- end
34
-
35
- Optionally, you can specify a :batch_size option as the size of the "chunk", and defaults to 1000.
36
-
37
- There are drawbacks with these methods:
38
-
39
- * You cannot specify the order, it will be ordered by the primary key (usually id)
40
- * The primary key must be numeric
41
- * The query is rerun for each chunk (1000 rows), starting at the next id sequence.
42
- * You cannot use overly complex queries as that will be rerun and incur more overhead.
43
-
44
- == PostgreSQLCursor FTW!
45
-
46
- PostgreSQLCursor was developed to take advantage of PostgreSQL's cursors. Cursors allow the program
47
- to declare a cursor to run a given query returning "chunks" of rows to the application program while
48
- retaining the position of the full result set in the database. This overcomes all the disadvantages
49
- of using find_each and find_in_batches.
50
-
51
- Also, with PostgreSQL, you have on option to have raw hashes of the row returned instead of the
52
- instantiated models. An informal benchmark showed that returning instances is a factor of 4 times
53
- slower than returning hashes. If you are can work with the data in this form, you will find better
54
- performance.
55
-
56
- With PostgreSQL, you can work with cursors as follows:
57
-
58
- Model.where("id>0").each_row { |hash| Model.process(hash) }
59
-
60
- Model.where("id>0").each_instance { |model| model.process! }
61
- Model.where("id>0").each_instance(block_size:100000) { |model| model.process! }
62
-
63
- Model.each_row_by_sql("select * from models") { |hash| Model.process(hash) }
64
-
65
- Model.each_instance_by_sql("select * from models") { |model| model.process }
66
-
67
- All these methods take an options hash to control things more:
68
-
69
- block_size:n The number of rows to fetch from the database each time (default 1000)
70
- while:value Continue looping as long as the block returns this value
71
- until:value Continue looping until the block returns this value
72
- connection:conn Use this connection instead of the current model connection
73
- fraction:float A value to set for the cursor_tuple_fraction variable.
74
- PostgreSQL uses 0.1 (optimize for 10% of result set)
75
- This library uses 1.0 (Optimize for 100% of the result set)
76
- Do not override this value unless you understand it.
77
-
78
- ==Authors
79
- Allen Fair, allen.fair@gmail.com, http://github.com/afair
80
-
81
- Thank you to:
82
- * Iulian Dogariu, http://github.com/iulianu (Fixes)
83
- * Julian Mehnle, julian@mehnle.net (Suggestions)
84
-
85
- == Note on Patches/Pull Requests
86
-
87
- * Fork the project.
88
- * Make your feature addition or bug fix.
89
- * Add tests for it. This is important so I don't break it in a
90
- future version unintentionally.
91
- * Commit, do not mess with rakefile, version, or history.
92
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
93
- * Send me a pull request. Bonus points for topic branches.
94
-
95
- == Copyright
96
-
97
- Copyright (c) 2010 Allen Fair. See LICENSE for details.