postgresql_cursor 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +24 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +41 -0
- data/README.md +185 -0
- data/Rakefile +17 -46
- data/lib/postgresql_cursor.rb +12 -180
- data/lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb +17 -0
- data/lib/postgresql_cursor/active_record/relation/cursor_iterators.rb +64 -0
- data/lib/postgresql_cursor/active_record/sql_cursor.rb +92 -0
- data/lib/postgresql_cursor/cursor.rb +199 -0
- data/lib/postgresql_cursor/version.rb +3 -0
- data/postgresql_cursor.gemspec +22 -43
- data/test-app/Gemfile +14 -0
- data/test-app/Gemfile.lock +34 -0
- data/test-app/app.rb +30 -0
- data/test-app/run.sh +10 -0
- data/test/helper.rb +12 -15
- data/test/test_postgresql_cursor.rb +41 -16
- metadata +68 -12
- data/README.rdoc +0 -97
@@ -0,0 +1,17 @@
|
|
1
|
+
# lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map
|
2
|
+
module PostgreSQLCursor
|
3
|
+
module ActiveRecord
|
4
|
+
module ConnectionAdapters
|
5
|
+
module PostgreSQLTypeMap
|
6
|
+
# Returns the private "type_map" needed for the cursor operation
|
7
|
+
def get_type_map # :nodoc:
|
8
|
+
if ::ActiveRecord::VERSION::MAJOR == 4 && ::ActiveRecord::VERSION::MINOR == 0
|
9
|
+
::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::OID::TYPE_MAP
|
10
|
+
else
|
11
|
+
type_map
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Defines extension to ActiveRecord/AREL to use this library
|
2
|
+
module PostgreSQLCursor
|
3
|
+
module ActiveRecord
|
4
|
+
module Relation
|
5
|
+
module CursorIterators
|
6
|
+
|
7
|
+
# Public: Executes the query, returning each row as a hash
|
8
|
+
# to the given block.
|
9
|
+
#
|
10
|
+
# options - Hash to control
|
11
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
12
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
13
|
+
# while: value - Exits loop when block does not return this value.
|
14
|
+
# until: value - Exits loop when block returns this value.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# Post.where(user_id:123).each_row { |hash| Post.process(hash) }
|
18
|
+
# Post.each_row.map {|r| r["id"].to_i }
|
19
|
+
#
|
20
|
+
# Returns the number of rows yielded to the block
|
21
|
+
def each_row(options={}, &block)
|
22
|
+
options = {:connection => self.connection}.merge(options)
|
23
|
+
cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
|
24
|
+
return cursor.each_row(&block) if block_given?
|
25
|
+
cursor
|
26
|
+
end
|
27
|
+
alias :each_hash :each_row
|
28
|
+
|
29
|
+
# Public: Like each_row, but returns an instantiated model object to the block
|
30
|
+
#
|
31
|
+
# Paramaters: same as each_row
|
32
|
+
#
|
33
|
+
# Example:
|
34
|
+
# Post.where(user_id:123).each_instance { |post| post.process }
|
35
|
+
# Post.where(user_id:123).each_instance.map { |post| post.process }
|
36
|
+
#
|
37
|
+
# Returns the number of rows yielded to the block
|
38
|
+
def each_instance(options={}, &block)
|
39
|
+
options = {:connection => self.connection}.merge(options)
|
40
|
+
cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
|
41
|
+
return cursor.each_instance(self, &block) if block_given?
|
42
|
+
cursor.iterate_type(self)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Plucks the column names from the rows, and return them in an array
|
46
|
+
def pluck_rows(*cols)
|
47
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
48
|
+
options[:connection] = self.connection
|
49
|
+
self.each_row(options).pluck(*cols)
|
50
|
+
end
|
51
|
+
alias :pluck_row :pluck_rows
|
52
|
+
|
53
|
+
# Plucks the column names from the instances, and return them in an array
|
54
|
+
def pluck_instances(*cols)
|
55
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
56
|
+
options[:connection] = self.connection
|
57
|
+
self.each_instance(options).pluck(*cols)
|
58
|
+
end
|
59
|
+
alias :pluck_instance :pluck_instances
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module PostgreSQLCursor
|
2
|
+
module ActiveRecord
|
3
|
+
module SqlCursor
|
4
|
+
# Public: Executes the query, returning each row as a hash
|
5
|
+
# to the given block.
|
6
|
+
#
|
7
|
+
# options - Hash to control
|
8
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
9
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
10
|
+
# while: value - Exits loop when block does not return this value.
|
11
|
+
# until: value - Exits loop when block returns this value.
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
# Post.each_row { |hash| Post.process(hash) }
|
15
|
+
#
|
16
|
+
# Returns the number of rows yielded to the block
|
17
|
+
def each_row(options={}, &block)
|
18
|
+
options = {:connection => self.connection}.merge(options)
|
19
|
+
all.each_row(options, &block)
|
20
|
+
end
|
21
|
+
alias :each_hash :each_row
|
22
|
+
|
23
|
+
# Public: Like each_row, but returns an instantiated model object to the block
|
24
|
+
#
|
25
|
+
# Paramaters: same as each_row
|
26
|
+
#
|
27
|
+
# Example:
|
28
|
+
# Post.each_instance { |post| post.process }
|
29
|
+
#
|
30
|
+
# Returns the number of rows yielded to the block
|
31
|
+
def each_instance(options={}, &block)
|
32
|
+
options = {:connection => self.connection}.merge(options)
|
33
|
+
all.each_instance(options, &block)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Public: Returns each row as a hash to the given block
|
37
|
+
|
38
|
+
# sql - Full SQL statement, variables interpolated
|
39
|
+
# options - Hash to control
|
40
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
41
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
42
|
+
# while: value - Exits loop when block does not return this value.
|
43
|
+
# until: value - Exits loop when block returns this value.
|
44
|
+
#
|
45
|
+
# Example:
|
46
|
+
# Post.each_row_by_sql("select * from posts") { |hash| Post.process(hash) }
|
47
|
+
# Post.each_row_by_sql("select * from posts").count
|
48
|
+
#
|
49
|
+
# Returns the number of rows yielded to the block
|
50
|
+
def each_row_by_sql(sql, options={}, &block)
|
51
|
+
options = {:connection => self.connection}.merge(options)
|
52
|
+
cursor = PostgreSQLCursor::Cursor.new(sql, options)
|
53
|
+
return cursor.each_row(&block) if block_given?
|
54
|
+
cursor
|
55
|
+
end
|
56
|
+
alias :each_hash_by_sql :each_row_by_sql
|
57
|
+
|
58
|
+
# Public: Returns each row as a model instance to the given block
|
59
|
+
# As this instantiates a model object, it is slower than each_row_by_sql
|
60
|
+
#
|
61
|
+
# Paramaters: see each_row_by_sql
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# Post.each_instance_by_sql("select * from posts") { |post| post.process }
|
65
|
+
# Post.each_instance_by_sql("select * from posts").count
|
66
|
+
#
|
67
|
+
# Returns the number of rows yielded to the block
|
68
|
+
def each_instance_by_sql(sql, options={}, &block)
|
69
|
+
options = {:connection => self.connection}.merge(options)
|
70
|
+
cursor = PostgreSQLCursor::Cursor.new(sql, options)
|
71
|
+
return cursor.each_instance(self, &block) if block_given?
|
72
|
+
cursor.iterate_type(self)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns and array of the given column names. Use if you need cursors and don't expect
|
76
|
+
# this to comsume too much memory. Values are strings. Like ActiveRecord's pluck.
|
77
|
+
def pluck_rows(*cols)
|
78
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
79
|
+
all.each_row(options).pluck(*cols)
|
80
|
+
end
|
81
|
+
alias :pluck_row :pluck_rows
|
82
|
+
|
83
|
+
# Returns and array of the given column names. Use if you need cursors and don't expect
|
84
|
+
# this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck.
|
85
|
+
def pluck_instances(*cols)
|
86
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
87
|
+
all.each_instance(options).pluck(*cols)
|
88
|
+
end
|
89
|
+
alias :pluck_instance :pluck_instances
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
################################################################################
|
2
|
+
# PostgreSQLCursor: library class provides postgresql cursor for large result
|
3
|
+
# set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
|
4
|
+
# If you don't use AR, this assumes #connection and #instantiate methods are available.
|
5
|
+
#
|
6
|
+
# options - Hash to control operation and loop breaks
|
7
|
+
# connection: instance - ActiveRecord connection to use
|
8
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
9
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
10
|
+
# while: value - Exits loop when block does not return this value.
|
11
|
+
# until: value - Exits loop when block returns this value.
|
12
|
+
#
|
13
|
+
# Exmaples:
|
14
|
+
# PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... }
|
15
|
+
# ActiveRecordModel.where(...).each_row { |hash| ... }
|
16
|
+
# ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
|
17
|
+
# ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
|
18
|
+
#
|
19
|
+
module PostgreSQLCursor
|
20
|
+
class Cursor
|
21
|
+
include Enumerable
|
22
|
+
attr_reader :sql, :options, :connection, :count, :result
|
23
|
+
@@cursor_seq = 0
|
24
|
+
|
25
|
+
# Public: Start a new PostgreSQL cursor query
|
26
|
+
# sql - The SQL statement with interpolated values
|
27
|
+
# options - hash of processing controls
|
28
|
+
# while: value - Exits loop when block does not return this value.
|
29
|
+
# until: value - Exits loop when block returns this value.
|
30
|
+
# fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
|
31
|
+
# block_size: 1..n - The number of rows to fetch per db block fetch
|
32
|
+
# Defaults to 1000
|
33
|
+
#
|
34
|
+
# Examples
|
35
|
+
#
|
36
|
+
# PostgreSQLCursor::Cursor.new("select ....")
|
37
|
+
#
|
38
|
+
# Returns the cursor object when called with new.
|
39
|
+
def initialize(sql, options={})
|
40
|
+
@sql = sql
|
41
|
+
@options = options
|
42
|
+
@connection = @options.fetch(:connection) { ::ActiveRecord::Base.connection }
|
43
|
+
@count = 0
|
44
|
+
@iterate = options[:instances] ? :each_instance : :each_row
|
45
|
+
end
|
46
|
+
|
47
|
+
# Specify the type to instantiate, or reset to return a Hash
|
48
|
+
def iterate_type(type=nil)
|
49
|
+
if type.nil? || type == Hash
|
50
|
+
@iterate = :each_row
|
51
|
+
else
|
52
|
+
@iterate = :each_instance
|
53
|
+
@type = type
|
54
|
+
end
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# Public: Yields each row of the result set to the passed block
|
59
|
+
#
|
60
|
+
# Yields the row to the block. The row is a hash with symbolized keys.
|
61
|
+
# {colname: value, ....}
|
62
|
+
#
|
63
|
+
# Returns the count of rows processed
|
64
|
+
def each(&block)
|
65
|
+
if @iterate == :each_row
|
66
|
+
self.each_row(&block)
|
67
|
+
else
|
68
|
+
self.each_instance(@type, &block)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def each_row(&block)
|
73
|
+
self.each_tuple do |row|
|
74
|
+
row = row.symbolize_keys if @options[:symbolize_keys]
|
75
|
+
block.call(row)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_instance(klass=nil, &block)
|
80
|
+
klass ||= @type
|
81
|
+
self.each_tuple do |row|
|
82
|
+
if ::ActiveRecord::VERSION::MAJOR < 4
|
83
|
+
model = klass.send(:instantiate,row)
|
84
|
+
else
|
85
|
+
@column_types ||= column_types
|
86
|
+
model = klass.send(:instantiate, row, @column_types)
|
87
|
+
end
|
88
|
+
block.call(model)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns an array of columns plucked from the result rows.
|
93
|
+
# Experimental function, as this could still use too much memory
|
94
|
+
# and negate the purpose of this libarary.
|
95
|
+
# Should this return a lazy enumerator instead?
|
96
|
+
def pluck(*cols)
|
97
|
+
options = cols.last.is_a?(Hash) ? cols.pop : {}
|
98
|
+
@options.merge!(options)
|
99
|
+
@options[:symbolize_keys] = true
|
100
|
+
self.iterate_type(options[:class]) if options[:class]
|
101
|
+
cols = cols.map {|c| c.to_sym }
|
102
|
+
result = []
|
103
|
+
|
104
|
+
self.each() do |row|
|
105
|
+
row = row.symbolize_keys if row.is_a?(Hash)
|
106
|
+
result << cols.map { |c| row[c] }
|
107
|
+
end
|
108
|
+
|
109
|
+
result.flatten! if cols.size == 1
|
110
|
+
result
|
111
|
+
end
|
112
|
+
|
113
|
+
def each_tuple(&block) #:nodoc:
|
114
|
+
has_do_until = @options.has_key?(:until)
|
115
|
+
has_do_while = @options.has_key?(:while)
|
116
|
+
@count = 0
|
117
|
+
@column_types = nil
|
118
|
+
@connection.transaction do
|
119
|
+
begin
|
120
|
+
open
|
121
|
+
while (row = fetch) do
|
122
|
+
break if row.size==0
|
123
|
+
@count += 1
|
124
|
+
rc = block.call(row)
|
125
|
+
break if has_do_until && rc == @options[:until]
|
126
|
+
break if has_do_while && rc != @options[:while]
|
127
|
+
end
|
128
|
+
rescue Exception => e
|
129
|
+
raise e
|
130
|
+
ensure
|
131
|
+
close
|
132
|
+
end
|
133
|
+
end
|
134
|
+
@count
|
135
|
+
end
|
136
|
+
|
137
|
+
def cast_types(row)
|
138
|
+
row
|
139
|
+
end
|
140
|
+
|
141
|
+
def column_types
|
142
|
+
return nil if ::ActiveRecord::VERSION::MAJOR < 4
|
143
|
+
return @column_types if @column_types
|
144
|
+
|
145
|
+
types = {}
|
146
|
+
fields = @result.fields
|
147
|
+
fields.each_with_index do |fname, i|
|
148
|
+
ftype = @result.ftype i
|
149
|
+
fmod = @result.fmod i
|
150
|
+
types[fname] = @connection.get_type_map.fetch(ftype, fmod) { |oid, mod|
|
151
|
+
warn "unknown OID: #{fname}(#{oid}) (#{sql})"
|
152
|
+
OID::Identity.new
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
@column_types = types
|
157
|
+
end
|
158
|
+
|
159
|
+
# Public: Opens (actually, "declares") the cursor. Call this before fetching
|
160
|
+
def open
|
161
|
+
set_cursor_tuple_fraction
|
162
|
+
@cursor = @@cursor_seq += 1
|
163
|
+
@result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}")
|
164
|
+
@block = []
|
165
|
+
end
|
166
|
+
|
167
|
+
# Public: Returns the next row from the cursor, or empty hash if end of results
|
168
|
+
#
|
169
|
+
# Returns a row as a hash of {'colname'=>value,...}
|
170
|
+
def fetch
|
171
|
+
fetch_block if @block.size==0
|
172
|
+
@block.shift
|
173
|
+
end
|
174
|
+
|
175
|
+
# Private: Fetches the next block of rows into @block
|
176
|
+
def fetch_block(block_size=nil)
|
177
|
+
block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
|
178
|
+
@result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
|
179
|
+
@block = @result.collect {|row| row } # Make our own
|
180
|
+
end
|
181
|
+
|
182
|
+
# Public: Closes the cursor
|
183
|
+
def close
|
184
|
+
@connection.execute("close cursor_#{@cursor}")
|
185
|
+
end
|
186
|
+
|
187
|
+
# Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched
|
188
|
+
# This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0)
|
189
|
+
# used to determine the expected fraction (percent) of result rows returned the the caller.
|
190
|
+
# This value determines the access path by the query planner.
|
191
|
+
def set_cursor_tuple_fraction(frac=1.0)
|
192
|
+
@cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 }
|
193
|
+
return @cursor_tuple_fraction if frac == @cursor_tuple_fraction
|
194
|
+
@cursor_tuple_fraction = frac
|
195
|
+
@result = @connection.execute("set cursor_tuple_fraction to #{frac}")
|
196
|
+
frac
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/postgresql_cursor.gemspec
CHANGED
@@ -1,48 +1,27 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# stub: postgresql_cursor 0.4.3 ruby lib
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'postgresql_cursor/version'
|
6
5
|
|
7
|
-
Gem::Specification.new do |
|
8
|
-
|
9
|
-
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "postgresql_cursor"
|
8
|
+
spec.version = PostgresqlCursor::VERSION
|
9
|
+
spec.authors = ["Allen Fair"]
|
10
|
+
spec.email = ["allen.fair@gmail.com"]
|
11
|
+
spec.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
|
12
|
+
spec.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 1_000 rows), buffers them, and returns the rows one at a time."
|
13
|
+
spec.homepage = "http://github.com/afair/postgresql_cursor"
|
14
|
+
spec.license = "MIT"
|
10
15
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
s.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time."
|
16
|
-
s.email = "allen.fair@gmail.com"
|
17
|
-
s.extra_rdoc_files = [
|
18
|
-
"LICENSE",
|
19
|
-
"README.rdoc"
|
20
|
-
]
|
21
|
-
s.files = [
|
22
|
-
".document",
|
23
|
-
"LICENSE",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"VERSION",
|
27
|
-
"lib/postgresql_cursor.rb",
|
28
|
-
"postgresql_cursor.gemspec",
|
29
|
-
"test/helper.rb",
|
30
|
-
"test/test_postgresql_cursor.rb"
|
31
|
-
]
|
32
|
-
s.homepage = "http://github.com/afair/postgresql_cursor"
|
33
|
-
s.rubygems_version = "2.2.1"
|
34
|
-
s.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
35
20
|
|
36
|
-
|
37
|
-
|
21
|
+
#spec.add_dependency "pg" # Remove this for jruby, which should specify 'activerecord-jdbcpostgresql-adapter'
|
22
|
+
spec.add_dependency "activerecord", ">= 3.2.0"
|
38
23
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
s.add_dependency(%q<activerecord>, [">= 0"])
|
43
|
-
end
|
44
|
-
else
|
45
|
-
s.add_dependency(%q<activerecord>, [">= 0"])
|
46
|
-
end
|
24
|
+
spec.add_development_dependency "pg"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "minitest"
|
47
27
|
end
|
48
|
-
|