postgresql_cursor 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +24 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +41 -0
- data/README.md +185 -0
- data/Rakefile +17 -46
- data/lib/postgresql_cursor.rb +12 -180
- data/lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb +17 -0
- data/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb +64 -0
- data/lib/postgresql_cursor/active_record/sql_cursor.rb +92 -0
- data/lib/postgresql_cursor/cursor.rb +199 -0
- data/lib/postgresql_cursor/version.rb +3 -0
- data/postgresql_cursor.gemspec +22 -43
- data/test-app/Gemfile +14 -0
- data/test-app/Gemfile.lock +34 -0
- data/test-app/app.rb +30 -0
- data/test-app/run.sh +10 -0
- data/test/helper.rb +12 -15
- data/test/test_postgresql_cursor.rb +41 -16
- metadata +68 -12
- data/README.rdoc +0 -97
@@ -0,0 +1,17 @@
|
|
1
|
+
# lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map
|
2
|
+
module PostgreSQLCursor
|
3
|
+
module ActiveRecord
|
4
|
+
module ConnectionAdapters
|
5
|
+
module PostgreSQLTypeMap
|
6
|
+
# Returns the private "type_map" needed for the cursor operation
|
7
|
+
def get_type_map # :nodoc:
|
8
|
+
if ::ActiveRecord::VERSION::MAJOR == 4 && ::ActiveRecord::VERSION::MINOR == 0
|
9
|
+
::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::OID::TYPE_MAP
|
10
|
+
else
|
11
|
+
type_map
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Defines extension to ActiveRecord/AREL to use this library
|
2
|
+
module PostgreSQLCursor
|
3
|
+
module ActiveRecord
|
4
|
+
module Relation
|
5
|
+
module CursorIterators
|
6
|
+
|
7
|
+
# Public: Executes the query, returning each row as a hash
|
8
|
+
# to the given block.
|
9
|
+
#
|
10
|
+
# options - Hash to control
|
11
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
12
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
13
|
+
# while: value - Exits loop when block does not return this value.
|
14
|
+
# until: value - Exits loop when block returns this value.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# Post.where(user_id:123).each_row { |hash| Post.process(hash) }
|
18
|
+
# Post.each_row.map {|r| r["id"].to_i }
|
19
|
+
#
|
20
|
+
# Returns the number of rows yielded to the block
|
21
|
+
def each_row(options={}, &block)
|
22
|
+
options = {:connection => self.connection}.merge(options)
|
23
|
+
cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
|
24
|
+
return cursor.each_row(&block) if block_given?
|
25
|
+
cursor
|
26
|
+
end
|
27
|
+
alias :each_hash :each_row
|
28
|
+
|
29
|
+
# Public: Like each_row, but returns an instantiated model object to the block
|
30
|
+
#
|
31
|
+
# Paramaters: same as each_row
|
32
|
+
#
|
33
|
+
# Example:
|
34
|
+
# Post.where(user_id:123).each_instance { |post| post.process }
|
35
|
+
# Post.where(user_id:123).each_instance.map { |post| post.process }
|
36
|
+
#
|
37
|
+
# Returns the number of rows yielded to the block
|
38
|
+
def each_instance(options={}, &block)
|
39
|
+
options = {:connection => self.connection}.merge(options)
|
40
|
+
cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
|
41
|
+
return cursor.each_instance(self, &block) if block_given?
|
42
|
+
cursor.iterate_type(self)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Plucks the column names from the rows, and return them in an array
|
46
|
+
def pluck_rows(*cols)
|
47
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
48
|
+
options[:connection] = self.connection
|
49
|
+
self.each_row(options).pluck(*cols)
|
50
|
+
end
|
51
|
+
alias :pluck_row :pluck_rows
|
52
|
+
|
53
|
+
# Plucks the column names from the instances, and return them in an array
|
54
|
+
def pluck_instances(*cols)
|
55
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
56
|
+
options[:connection] = self.connection
|
57
|
+
self.each_instance(options).pluck(*cols)
|
58
|
+
end
|
59
|
+
alias :pluck_instance :pluck_instances
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module PostgreSQLCursor
|
2
|
+
module ActiveRecord
|
3
|
+
module SqlCursor
|
4
|
+
# Public: Executes the query, returning each row as a hash
|
5
|
+
# to the given block.
|
6
|
+
#
|
7
|
+
# options - Hash to control
|
8
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
9
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
10
|
+
# while: value - Exits loop when block does not return this value.
|
11
|
+
# until: value - Exits loop when block returns this value.
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
# Post.each_row { |hash| Post.process(hash) }
|
15
|
+
#
|
16
|
+
# Returns the number of rows yielded to the block
|
17
|
+
def each_row(options={}, &block)
|
18
|
+
options = {:connection => self.connection}.merge(options)
|
19
|
+
all.each_row(options, &block)
|
20
|
+
end
|
21
|
+
alias :each_hash :each_row
|
22
|
+
|
23
|
+
# Public: Like each_row, but returns an instantiated model object to the block
|
24
|
+
#
|
25
|
+
# Paramaters: same as each_row
|
26
|
+
#
|
27
|
+
# Example:
|
28
|
+
# Post.each_instance { |post| post.process }
|
29
|
+
#
|
30
|
+
# Returns the number of rows yielded to the block
|
31
|
+
def each_instance(options={}, &block)
|
32
|
+
options = {:connection => self.connection}.merge(options)
|
33
|
+
all.each_instance(options, &block)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Public: Returns each row as a hash to the given block
|
37
|
+
|
38
|
+
# sql - Full SQL statement, variables interpolated
|
39
|
+
# options - Hash to control
|
40
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
41
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
42
|
+
# while: value - Exits loop when block does not return this value.
|
43
|
+
# until: value - Exits loop when block returns this value.
|
44
|
+
#
|
45
|
+
# Example:
|
46
|
+
# Post.each_row_by_sql("select * from posts") { |hash| Post.process(hash) }
|
47
|
+
# Post.each_row_by_sql("select * from posts").count
|
48
|
+
#
|
49
|
+
# Returns the number of rows yielded to the block
|
50
|
+
def each_row_by_sql(sql, options={}, &block)
|
51
|
+
options = {:connection => self.connection}.merge(options)
|
52
|
+
cursor = PostgreSQLCursor::Cursor.new(sql, options)
|
53
|
+
return cursor.each_row(&block) if block_given?
|
54
|
+
cursor
|
55
|
+
end
|
56
|
+
alias :each_hash_by_sql :each_row_by_sql
|
57
|
+
|
58
|
+
# Public: Returns each row as a model instance to the given block
|
59
|
+
# As this instantiates a model object, it is slower than each_row_by_sql
|
60
|
+
#
|
61
|
+
# Paramaters: see each_row_by_sql
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# Post.each_instance_by_sql("select * from posts") { |post| post.process }
|
65
|
+
# Post.each_instance_by_sql("select * from posts").count
|
66
|
+
#
|
67
|
+
# Returns the number of rows yielded to the block
|
68
|
+
def each_instance_by_sql(sql, options={}, &block)
|
69
|
+
options = {:connection => self.connection}.merge(options)
|
70
|
+
cursor = PostgreSQLCursor::Cursor.new(sql, options)
|
71
|
+
return cursor.each_instance(self, &block) if block_given?
|
72
|
+
cursor.iterate_type(self)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns and array of the given column names. Use if you need cursors and don't expect
|
76
|
+
# this to comsume too much memory. Values are strings. Like ActiveRecord's pluck.
|
77
|
+
def pluck_rows(*cols)
|
78
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
79
|
+
all.each_row(options).pluck(*cols)
|
80
|
+
end
|
81
|
+
alias :pluck_row :pluck_rows
|
82
|
+
|
83
|
+
# Returns and array of the given column names. Use if you need cursors and don't expect
|
84
|
+
# this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck.
|
85
|
+
def pluck_instances(*cols)
|
86
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
87
|
+
all.each_instance(options).pluck(*cols)
|
88
|
+
end
|
89
|
+
alias :pluck_instance :pluck_instances
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
################################################################################
|
2
|
+
# PostgreSQLCursor: library class provides postgresql cursor for large result
|
3
|
+
# set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
|
4
|
+
# If you don't use AR, this assumes #connection and #instantiate methods are available.
|
5
|
+
#
|
6
|
+
# options - Hash to control operation and loop breaks
|
7
|
+
# connection: instance - ActiveRecord connection to use
|
8
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
9
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
10
|
+
# while: value - Exits loop when block does not return this value.
|
11
|
+
# until: value - Exits loop when block returns this value.
|
12
|
+
#
|
13
|
+
# Exmaples:
|
14
|
+
# PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... }
|
15
|
+
# ActiveRecordModel.where(...).each_row { |hash| ... }
|
16
|
+
# ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
|
17
|
+
# ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
|
18
|
+
#
|
19
|
+
module PostgreSQLCursor
|
20
|
+
class Cursor
|
21
|
+
include Enumerable
|
22
|
+
attr_reader :sql, :options, :connection, :count, :result
|
23
|
+
@@cursor_seq = 0
|
24
|
+
|
25
|
+
# Public: Start a new PostgreSQL cursor query
|
26
|
+
# sql - The SQL statement with interpolated values
|
27
|
+
# options - hash of processing controls
|
28
|
+
# while: value - Exits loop when block does not return this value.
|
29
|
+
# until: value - Exits loop when block returns this value.
|
30
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
31
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
32
|
+
# Defaults to 1000
|
33
|
+
#
|
34
|
+
# Examples
|
35
|
+
#
|
36
|
+
# PostgreSQLCursor::Cursor.new("select ....")
|
37
|
+
#
|
38
|
+
# Returns the cursor object when called with new.
|
39
|
+
def initialize(sql, options={})
|
40
|
+
@sql = sql
|
41
|
+
@options = options
|
42
|
+
@connection = @options.fetch(:connection) { ::ActiveRecord::Base.connection }
|
43
|
+
@count = 0
|
44
|
+
@iterate = options[:instances] ? :each_instance : :each_row
|
45
|
+
end
|
46
|
+
|
47
|
+
# Specify the type to instantiate, or reset to return a Hash
|
48
|
+
def iterate_type(type=nil)
|
49
|
+
if type.nil? || type == Hash
|
50
|
+
@iterate = :each_row
|
51
|
+
else
|
52
|
+
@iterate = :each_instance
|
53
|
+
@type = type
|
54
|
+
end
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# Public: Yields each row of the result set to the passed block
|
59
|
+
#
|
60
|
+
# Yields the row to the block. The row is a hash with symbolized keys.
|
61
|
+
# {colname: value, ....}
|
62
|
+
#
|
63
|
+
# Returns the count of rows processed
|
64
|
+
def each(&block)
|
65
|
+
if @iterate == :each_row
|
66
|
+
self.each_row(&block)
|
67
|
+
else
|
68
|
+
self.each_instance(@type, &block)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def each_row(&block)
|
73
|
+
self.each_tuple do |row|
|
74
|
+
row = row.symbolize_keys if @options[:symbolize_keys]
|
75
|
+
block.call(row)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_instance(klass=nil, &block)
|
80
|
+
klass ||= @type
|
81
|
+
self.each_tuple do |row|
|
82
|
+
if ::ActiveRecord::VERSION::MAJOR < 4
|
83
|
+
model = klass.send(:instantiate,row)
|
84
|
+
else
|
85
|
+
@column_types ||= column_types
|
86
|
+
model = klass.send(:instantiate, row, @column_types)
|
87
|
+
end
|
88
|
+
block.call(model)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns an array of columns plucked from the result rows.
|
93
|
+
# Experimental function, as this could still use too much memory
|
94
|
+
# and negate the purpose of this libarary.
|
95
|
+
# Should this return a lazy enumerator instead?
|
96
|
+
def pluck(*cols)
|
97
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
98
|
+
@options.merge!(options)
|
99
|
+
@options[:symbolize_keys] = true
|
100
|
+
self.iterate_type(options[:class]) if options[:class]
|
101
|
+
cols = cols.map {|c| c.to_sym }
|
102
|
+
result = []
|
103
|
+
|
104
|
+
self.each() do |row|
|
105
|
+
row = row.symbolize_keys if row.is_a?(Hash)
|
106
|
+
result << cols.map { |c| row[c] }
|
107
|
+
end
|
108
|
+
|
109
|
+
result.flatten! if cols.size == 1
|
110
|
+
result
|
111
|
+
end
|
112
|
+
|
113
|
+
def each_tuple(&block) #:nodoc:
|
114
|
+
has_do_until = @options.has_key?(:until)
|
115
|
+
has_do_while = @options.has_key?(:while)
|
116
|
+
@count = 0
|
117
|
+
@column_types = nil
|
118
|
+
@connection.transaction do
|
119
|
+
begin
|
120
|
+
open
|
121
|
+
while (row = fetch) do
|
122
|
+
break if row.size==0
|
123
|
+
@count += 1
|
124
|
+
rc = block.call(row)
|
125
|
+
break if has_do_until && rc == @options[:until]
|
126
|
+
break if has_do_while && rc != @options[:while]
|
127
|
+
end
|
128
|
+
rescue Exception => e
|
129
|
+
raise e
|
130
|
+
ensure
|
131
|
+
close
|
132
|
+
end
|
133
|
+
end
|
134
|
+
@count
|
135
|
+
end
|
136
|
+
|
137
|
+
def cast_types(row)
|
138
|
+
row
|
139
|
+
end
|
140
|
+
|
141
|
+
def column_types
|
142
|
+
return nil if ::ActiveRecord::VERSION::MAJOR < 4
|
143
|
+
return @column_types if @column_types
|
144
|
+
|
145
|
+
types = {}
|
146
|
+
fields = @result.fields
|
147
|
+
fields.each_with_index do |fname, i|
|
148
|
+
ftype = @result.ftype i
|
149
|
+
fmod = @result.fmod i
|
150
|
+
types[fname] = @connection.get_type_map.fetch(ftype, fmod) { |oid, mod|
|
151
|
+
warn "unknown OID: #{fname}(#{oid}) (#{sql})"
|
152
|
+
OID::Identity.new
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
@column_types = types
|
157
|
+
end
|
158
|
+
|
159
|
+
# Public: Opens (actually, "declares") the cursor. Call this before fetching
|
160
|
+
def open
|
161
|
+
set_cursor_tuple_fraction
|
162
|
+
@cursor = @@cursor_seq += 1
|
163
|
+
@result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}")
|
164
|
+
@block = []
|
165
|
+
end
|
166
|
+
|
167
|
+
# Public: Returns the next row from the cursor, or empty hash if end of results
|
168
|
+
#
|
169
|
+
# Returns a row as a hash of {'colname'=>value,...}
|
170
|
+
def fetch
|
171
|
+
fetch_block if @block.size==0
|
172
|
+
@block.shift
|
173
|
+
end
|
174
|
+
|
175
|
+
# Private: Fetches the next block of rows into @block
|
176
|
+
def fetch_block(block_size=nil)
|
177
|
+
block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
|
178
|
+
@result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
|
179
|
+
@block = @result.collect {|row| row } # Make our own
|
180
|
+
end
|
181
|
+
|
182
|
+
# Public: Closes the cursor
|
183
|
+
def close
|
184
|
+
@connection.execute("close cursor_#{@cursor}")
|
185
|
+
end
|
186
|
+
|
187
|
+
# Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched
|
188
|
+
# This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0)
|
189
|
+
# used to determine the expected fraction (percent) of result rows returned the the caller.
|
190
|
+
# This value determines the access path by the query planner.
|
191
|
+
def set_cursor_tuple_fraction(frac=1.0)
|
192
|
+
@cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 }
|
193
|
+
return @cursor_tuple_fraction if frac == @cursor_tuple_fraction
|
194
|
+
@cursor_tuple_fraction = frac
|
195
|
+
@result = @connection.execute("set cursor_tuple_fraction to #{frac}")
|
196
|
+
frac
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/postgresql_cursor.gemspec
CHANGED
@@ -1,48 +1,27 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# stub: postgresql_cursor 0.4.3 ruby lib
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'postgresql_cursor/version'
|
6
5
|
|
7
|
-
Gem::Specification.new do |
|
8
|
-
|
9
|
-
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "postgresql_cursor"
|
8
|
+
spec.version = PostgresqlCursor::VERSION
|
9
|
+
spec.authors = ["Allen Fair"]
|
10
|
+
spec.email = ["allen.fair@gmail.com"]
|
11
|
+
spec.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
|
12
|
+
spec.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 1_000 rows), buffers them, and returns the rows one at a time."
|
13
|
+
spec.homepage = "http://github.com/afair/postgresql_cursor"
|
14
|
+
spec.license = "MIT"
|
10
15
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
s.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time."
|
16
|
-
s.email = "allen.fair@gmail.com"
|
17
|
-
s.extra_rdoc_files = [
|
18
|
-
"LICENSE",
|
19
|
-
"README.rdoc"
|
20
|
-
]
|
21
|
-
s.files = [
|
22
|
-
".document",
|
23
|
-
"LICENSE",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"VERSION",
|
27
|
-
"lib/postgresql_cursor.rb",
|
28
|
-
"postgresql_cursor.gemspec",
|
29
|
-
"test/helper.rb",
|
30
|
-
"test/test_postgresql_cursor.rb"
|
31
|
-
]
|
32
|
-
s.homepage = "http://github.com/afair/postgresql_cursor"
|
33
|
-
s.rubygems_version = "2.2.1"
|
34
|
-
s.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
35
20
|
|
36
|
-
|
37
|
-
|
21
|
+
#spec.add_dependency "pg" # Remove this for jruby, which should specify 'activerecord-jdbcpostgresql-adapter'
|
22
|
+
spec.add_dependency "activerecord", ">= 3.2.0"
|
38
23
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
s.add_dependency(%q<activerecord>, [">= 0"])
|
43
|
-
end
|
44
|
-
else
|
45
|
-
s.add_dependency(%q<activerecord>, [">= 0"])
|
46
|
-
end
|
24
|
+
spec.add_development_dependency "pg"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "minitest"
|
47
27
|
end
|
48
|
-
|