postgresql_cursor 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ # A sample Gemfile
2
+ source "https://rubygems.org"
3
+
4
+ gem 'activerecord', '~> 3.2.0'
5
+ #gem 'activerecord', '~> 4.0.0'
6
+ #gem 'activerecord', '~> 4.1.0'
7
+ #gem 'activerecord', '4.1.2.rc1'
8
+
9
+ # For testing against Edge Rails
10
+ #gem 'activerecord', github: 'rails/rails', branch: 'master'
11
+ #gem 'arel', github: 'rails/arel', branch: 'master'
12
+
13
+ gem 'pg'
14
+ gem 'postgresql_cursor', path:"#{ENV['HOME']}/src/postgresql_cursor"
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: /Users/allen/src/postgresql_cursor
3
+ specs:
4
+ postgresql_cursor (0.5.0)
5
+ activerecord (>= 3.2.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activemodel (3.2.18)
11
+ activesupport (= 3.2.18)
12
+ builder (~> 3.0.0)
13
+ activerecord (3.2.18)
14
+ activemodel (= 3.2.18)
15
+ activesupport (= 3.2.18)
16
+ arel (~> 3.0.2)
17
+ tzinfo (~> 0.3.29)
18
+ activesupport (3.2.18)
19
+ i18n (~> 0.6, >= 0.6.4)
20
+ multi_json (~> 1.0)
21
+ arel (3.0.3)
22
+ builder (3.0.4)
23
+ i18n (0.6.9)
24
+ multi_json (1.10.1)
25
+ pg (0.17.1)
26
+ tzinfo (0.3.39)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ activerecord (~> 3.2.0)
33
+ pg
34
+ postgresql_cursor!
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ ################################################################################
3
+ # To run this "app", do a "rake setup" first.
4
+ # To work with this app, run the "rake console" task, which loads this file.
5
+ ################################################################################
6
+ require 'rubygems'
7
+ require 'bundler/setup'
8
+ require 'pg'
9
+ require 'active_record'
10
+ require 'postgresql_cursor'
11
+
12
+ ActiveRecord::Base.establish_connection( adapter: 'postgresql',
13
+ database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
14
+ username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
15
+
16
+ class Product < ActiveRecord::Base
17
+ def self.generate(max=1_000)
18
+ Product.destroy_all
19
+ max.times do |i|
20
+ connection.execute("insert into products values (#{i})")
21
+ end
22
+ end
23
+
24
+ def tests
25
+ Product.where("id>0").each_row(block_size:100) { |r| p r["id"] } # Hash
26
+ Product.where("id>0").each_instance(block_size:100) { |r| p r.id } # Instance
27
+ end
28
+ end
29
+
30
+ #Product.generate
@@ -0,0 +1,10 @@
1
+ #!/bin/sh
2
+ #bundle install
3
+ if [ "$1" = "irb" ]; then
4
+ bundle exec irb -Ilib -r postgresql_cursor
5
+ elif [ "$1" = "setup" ]; then
6
+ createdb postgresql_cursor_test
7
+ echo "create table products ( id serial);" | psql postgresql_cursor_test
8
+ else
9
+ bundle exec ruby app.rb
10
+ fi
@@ -1,25 +1,22 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
1
3
  require 'rubygems'
2
4
  require 'minitest'
3
5
  require 'active_record'
4
-
5
- $LOAD_PATH.unshift(File.dirname(__FILE__))
6
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
6
  require 'postgresql_cursor'
8
7
 
9
- ActiveRecord::Base.establish_connection :database=>'allen_test', :adapter=>'postgresql', :username=>'allen'
10
- class Model < ActiveRecord::Base
11
- #set_table_name "records"
12
- self.table_name = "records"
13
-
8
+ ActiveRecord::Base.establish_connection(adapter: 'postgresql',
9
+ database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test',
10
+ username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor')
11
+
12
+ class Product < ActiveRecord::Base
14
13
  # create table records (id serial primary key);
15
- def self.generate(max=1_000_000)
16
- max.times do
17
- connection.execute("insert into records values (nextval('records_id_seq'::regclass))")
14
+ def self.generate(max=1_000)
15
+ max.times do |i|
16
+ connection.execute("insert into products values (#{i+1})")
18
17
  end
19
18
  end
20
19
  end
21
20
 
22
- Model.generate(1000) if Model.count == 0
23
-
24
- class MiniTest::Unit::TestCase
25
- end
21
+ Product.destroy_all
22
+ Product.generate(1000)
@@ -1,57 +1,82 @@
1
- require 'helper'
2
- # database: allen_test
3
- # create table records ( id serial);
1
+ ################################################################################
2
+ # Before running test, set up the test db & table with:
3
+ # rake setup
4
+ # or create the database manually if your environment doesn't permit
5
+ ################################################################################
6
+ require_relative 'helper'
7
+ require 'minitest/autorun'
8
+ require 'minitest/pride'
4
9
 
5
10
  class TestPostgresqlCursor < Minitest::Test
6
11
 
7
12
  def test_each
8
- c = PostgreSQLCursor.new("select * from records order by 1")
13
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1")
9
14
  nn = 0
10
15
  n = c.each { nn += 1}
11
16
  assert_equal nn, n
12
17
  end
13
18
 
14
19
  def test_enumerables
15
- assert_equal true, PostgreSQLCursor.new("select * from records order by 1").any?
16
- assert_equal false, PostgreSQLCursor.new("select * from records where id<0").any?
20
+ assert_equal true, PostgreSQLCursor::Cursor.new("select * from products order by 1").any?
21
+ assert_equal false, PostgreSQLCursor::Cursor.new("select * from products where id<0").any?
17
22
  end
18
23
 
19
24
  def test_each_while_until
20
- c = PostgreSQLCursor.new("select * from records order by 1", until:true)
25
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1", until:true)
21
26
  n = c.each { |r| r[:id].to_i > 100 }
22
- assert_equal 101, n
27
+ assert_equal 1000, n
23
28
 
24
- c = PostgreSQLCursor.new("select * from records order by 1", while:true)
29
+ c = PostgreSQLCursor::Cursor.new("select * from products order by 1", while:true)
25
30
  n = c.each { |r| r[:id].to_i < 100 }
26
- assert_equal 100, n
31
+ assert_equal 1000, n
27
32
  end
28
33
 
29
34
  def test_relation
30
35
  nn = 0
31
- Model.where("id>0").each_row {|r| nn += 1 }
36
+ Product.where("id>0").each_row {|r| nn += 1 }
32
37
  assert_equal 1000, nn
33
38
  end
34
39
 
35
40
  def test_activerecord
36
41
  nn = 0
37
- Model.each_row_by_sql("select * from records") {|r| nn += 1 }
42
+ row = nil
43
+ Product.each_row_by_sql("select * from products") {|r| row = r; nn += 1 }
38
44
  assert_equal 1000, nn
45
+ assert_equal Hash, row.class
39
46
 
40
47
  nn = 0
41
- row = nil
42
- Model.each_instance_by_sql("select * from records") {|r| row = r; nn += 1 }
48
+ Product.each_instance_by_sql("select * from products") {|r| row = r; nn += 1 }
43
49
  assert_equal 1000, nn
44
- assert_equal Model, row.class
50
+ assert_equal Product, row.class
45
51
  end
46
52
 
47
53
  def test_exception
48
54
  begin
49
- Model.each_row_by_sql("select * from records") do |r|
55
+ Product.each_row_by_sql("select * from products") do |r|
50
56
  raise "Oops"
51
57
  end
52
58
  rescue Exception => e
53
59
  assert_equal e.message, 'Oops'
54
60
  end
55
61
  end
62
+
63
+ def test_cursor
64
+ cursor = Product.all.each_row
65
+ assert cursor.respond_to?(:each)
66
+ r = cursor.map { |row| row["id"] }
67
+ assert_equal 1000, r.size
68
+ cursor = Product.each_row_by_sql("select * from products")
69
+ assert cursor.respond_to?(:each)
70
+ r = cursor.map { |row| row["id"] }
71
+ assert_equal 1000, r.size
72
+ end
73
+
74
+ def test_pluck
75
+ r = Product.pluck_rows(:id)
76
+ assert_equal 1000, r.size
77
+ r = Product.all.pluck_instances(:id)
78
+ assert_equal 1000, r.size
79
+ assert_equal Fixnum, r.first.class
80
+ end
56
81
 
57
82
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgresql_cursor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Allen Fair
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-06 00:00:00.000000000 Z
11
+ date: 2014-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,9 +16,51 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 3.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
22
64
  version_requirements: !ruby/object:Gem::Requirement
23
65
  requirements:
24
66
  - - ">="
@@ -27,25 +69,37 @@ dependencies:
27
69
  description: PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter
28
70
  for very large result sets. It provides a cursor open/fetch/close interface to access
29
71
  data without loading all rows into memory, and instead loads the result rows in
30
- "chunks" (default of 10_000 rows), buffers them, and returns the rows one at a time.
31
- email: allen.fair@gmail.com
72
+ "chunks" (default of 1_000 rows), buffers them, and returns the rows one at a time.
73
+ email:
74
+ - allen.fair@gmail.com
32
75
  executables: []
33
76
  extensions: []
34
- extra_rdoc_files:
35
- - LICENSE
36
- - README.rdoc
77
+ extra_rdoc_files: []
37
78
  files:
38
79
  - ".document"
80
+ - ".gitignore"
81
+ - Gemfile
82
+ - Gemfile.lock
39
83
  - LICENSE
40
- - README.rdoc
84
+ - README.md
41
85
  - Rakefile
42
86
  - VERSION
43
87
  - lib/postgresql_cursor.rb
88
+ - lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb
89
+ - lib/postgresql_cursor/active_record/relation/cursor_iterators.rb
90
+ - lib/postgresql_cursor/active_record/sql_cursor.rb
91
+ - lib/postgresql_cursor/cursor.rb
92
+ - lib/postgresql_cursor/version.rb
44
93
  - postgresql_cursor.gemspec
94
+ - test-app/Gemfile
95
+ - test-app/Gemfile.lock
96
+ - test-app/app.rb
97
+ - test-app/run.sh
45
98
  - test/helper.rb
46
99
  - test/test_postgresql_cursor.rb
47
100
  homepage: http://github.com/afair/postgresql_cursor
48
- licenses: []
101
+ licenses:
102
+ - MIT
49
103
  metadata: {}
50
104
  post_install_message:
51
105
  rdoc_options: []
@@ -63,9 +117,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
117
  version: '0'
64
118
  requirements: []
65
119
  rubyforge_project:
66
- rubygems_version: 2.2.1
120
+ rubygems_version: 2.2.2
67
121
  signing_key:
68
122
  specification_version: 4
69
123
  summary: ActiveRecord PostgreSQL Adapter extension for using a cursor to return a
70
124
  large result set
71
- test_files: []
125
+ test_files:
126
+ - test/helper.rb
127
+ - test/test_postgresql_cursor.rb
@@ -1,97 +0,0 @@
1
- = PostgreSQLCursor for handling large Result Sets
2
-
3
- {<img src="https://badge.fury.io/rb/postgresql_cursor.svg" alt="Gem Version" />}[http://badge.fury.io/rb/postgresql_cursor]
4
-
5
- PostgreSQLCursor extends ActiveRecord to allow for efficient processing of queries
6
- returning a large number of rows.
7
-
8
- Note: Rails/ActiveRecord 4.x support has been reworked in the v0.5 Branch. For AR < 4.x, use the gem 0.4.x series.
9
-
10
- == Why use this?
11
-
12
- ActiveRecord is designed and optimized for web performance. In a web transaction, only a "page" of
13
- around 20 rows is returned to the user. When you do this
14
-
15
- Model.find(:all, :conditions=>["id>0"]
16
-
17
- The database returns all matching result set rows to ActiveRecord, which instantiates each row with
18
- the data returned. This function returns an array of all these rows to the caller.
19
-
20
- Asyncronous, Background, or Offline processing may require processing a large amount of data.
21
- When there is a very large number of rows, this requires a lot more memory to hold the data. Ruby
22
- does not return that memory after processing the array, and the causes your process to "bloat". If you
23
- don't have enough memory, it will cause an exception.
24
-
25
- == Enter find_each
26
-
27
- To solve this problem, ActiveRecord gives us two alternative methods that work in "chunks" of your data:
28
-
29
- Model.where("id>0").find_each { |model| model.process! }
30
-
31
- Model.where("id>0").find_in_batches do |batch|
32
- batch.each { |model| model.process! }
33
- end
34
-
35
- Optionally, you can specify a :batch_size option as the size of the "chunk", and defaults to 1000.
36
-
37
- There are drawbacks with these methods:
38
-
39
- * You cannot specify the order, it will be ordered by the primary key (usually id)
40
- * The primary key must be numeric
41
- * The query is rerun for each chunk (1000 rows), starting at the next id sequence.
42
- * You cannot use overly complex queries as that will be rerun and incur more overhead.
43
-
44
- == PostgreSQLCursor FTW!
45
-
46
- PostgreSQLCursor was developed to take advantage of PostgreSQL's cursors. Cursors allow the program
47
- to declare a cursor to run a given query returning "chunks" of rows to the application program while
48
- retaining the position of the full result set in the database. This overcomes all the disadvantages
49
- of using find_each and find_in_batches.
50
-
51
- Also, with PostgreSQL, you have on option to have raw hashes of the row returned instead of the
52
- instantiated models. An informal benchmark showed that returning instances is a factor of 4 times
53
- slower than returning hashes. If you are can work with the data in this form, you will find better
54
- performance.
55
-
56
- With PostgreSQL, you can work with cursors as follows:
57
-
58
- Model.where("id>0").each_row { |hash| Model.process(hash) }
59
-
60
- Model.where("id>0").each_instance { |model| model.process! }
61
- Model.where("id>0").each_instance(block_size:100000) { |model| model.process! }
62
-
63
- Model.each_row_by_sql("select * from models") { |hash| Model.process(hash) }
64
-
65
- Model.each_instance_by_sql("select * from models") { |model| model.process }
66
-
67
- All these methods take an options hash to control things more:
68
-
69
- block_size:n The number of rows to fetch from the database each time (default 1000)
70
- while:value Continue looping as long as the block returns this value
71
- until:value Continue looping until the block returns this value
72
- connection:conn Use this connection instead of the current model connection
73
- fraction:float A value to set for the cursor_tuple_fraction variable.
74
- PostgreSQL uses 0.1 (optimize for 10% of result set)
75
- This library uses 1.0 (Optimize for 100% of the result set)
76
- Do not override this value unless you understand it.
77
-
78
- ==Authors
79
- Allen Fair, allen.fair@gmail.com, http://github.com/afair
80
-
81
- Thank you to:
82
- * Iulian Dogariu, http://github.com/iulianu (Fixes)
83
- * Julian Mehnle, julian@mehnle.net (Suggestions)
84
-
85
- == Note on Patches/Pull Requests
86
-
87
- * Fork the project.
88
- * Make your feature addition or bug fix.
89
- * Add tests for it. This is important so I don't break it in a
90
- future version unintentionally.
91
- * Commit, do not mess with rakefile, version, or history.
92
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
93
- * Send me a pull request. Bonus points for topic branches.
94
-
95
- == Copyright
96
-
97
- Copyright (c) 2010 Allen Fair. See LICENSE for details.