postgresql_cursor 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ # lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map
2
+ module PostgreSQLCursor
3
+ module ActiveRecord
4
+ module ConnectionAdapters
5
+ module PostgreSQLTypeMap
6
+ # Returns the private "type_map" needed for the cursor operation
7
+ def get_type_map # :nodoc:
8
+ if ::ActiveRecord::VERSION::MAJOR == 4 && ::ActiveRecord::VERSION::MINOR == 0
9
+ ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::OID::TYPE_MAP
10
+ else
11
+ type_map
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,64 @@
1
+ # Defines extension to ActiveRecord/AREL to use this library
2
+ module PostgreSQLCursor
3
+ module ActiveRecord
4
+ module Relation
5
+ module CursorIterators
6
+
7
+ # Public: Executes the query, returning each row as a hash
8
+ # to the given block.
9
+ #
10
+ # options - Hash to control
11
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
12
+ # block_size: 1..n - The number of rows to fetch per db block fetch
13
+ # while: value - Exits loop when block does not return this value.
14
+ # until: value - Exits loop when block returns this value.
15
+ #
16
+ # Example:
17
+ # Post.where(user_id:123).each_row { |hash| Post.process(hash) }
18
+ # Post.each_row.map {|r| r["id"].to_i }
19
+ #
20
+ # Returns the number of rows yielded to the block
21
+ def each_row(options={}, &block)
22
+ options = {:connection => self.connection}.merge(options)
23
+ cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
24
+ return cursor.each_row(&block) if block_given?
25
+ cursor
26
+ end
27
+ alias :each_hash :each_row
28
+
29
+ # Public: Like each_row, but returns an instantiated model object to the block
30
+ #
31
+ # Paramaters: same as each_row
32
+ #
33
+ # Example:
34
+ # Post.where(user_id:123).each_instance { |post| post.process }
35
+ # Post.where(user_id:123).each_instance.map { |post| post.process }
36
+ #
37
+ # Returns the number of rows yielded to the block
38
+ def each_instance(options={}, &block)
39
+ options = {:connection => self.connection}.merge(options)
40
+ cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
41
+ return cursor.each_instance(self, &block) if block_given?
42
+ cursor.iterate_type(self)
43
+ end
44
+
45
+ # Plucks the column names from the rows, and return them in an array
46
+ def pluck_rows(*cols)
47
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
48
+ options[:connection] = self.connection
49
+ self.each_row(options).pluck(*cols)
50
+ end
51
+ alias :pluck_row :pluck_rows
52
+
53
+ # Plucks the column names from the instances, and return them in an array
54
+ def pluck_instances(*cols)
55
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
56
+ options[:connection] = self.connection
57
+ self.each_instance(options).pluck(*cols)
58
+ end
59
+ alias :pluck_instance :pluck_instances
60
+
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,92 @@
1
+ module PostgreSQLCursor
2
+ module ActiveRecord
3
+ module SqlCursor
4
+ # Public: Executes the query, returning each row as a hash
5
+ # to the given block.
6
+ #
7
+ # options - Hash to control
8
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
9
+ # block_size: 1..n - The number of rows to fetch per db block fetch
10
+ # while: value - Exits loop when block does not return this value.
11
+ # until: value - Exits loop when block returns this value.
12
+ #
13
+ # Example:
14
+ # Post.each_row { |hash| Post.process(hash) }
15
+ #
16
+ # Returns the number of rows yielded to the block
17
+ def each_row(options={}, &block)
18
+ options = {:connection => self.connection}.merge(options)
19
+ all.each_row(options, &block)
20
+ end
21
+ alias :each_hash :each_row
22
+
23
+ # Public: Like each_row, but returns an instantiated model object to the block
24
+ #
25
+ # Paramaters: same as each_row
26
+ #
27
+ # Example:
28
+ # Post.each_instance { |post| post.process }
29
+ #
30
+ # Returns the number of rows yielded to the block
31
+ def each_instance(options={}, &block)
32
+ options = {:connection => self.connection}.merge(options)
33
+ all.each_instance(options, &block)
34
+ end
35
+
36
+ # Public: Returns each row as a hash to the given block
37
+
38
+ # sql - Full SQL statement, variables interpolated
39
+ # options - Hash to control
40
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
41
+ # block_size: 1..n - The number of rows to fetch per db block fetch
42
+ # while: value - Exits loop when block does not return this value.
43
+ # until: value - Exits loop when block returns this value.
44
+ #
45
+ # Example:
46
+ # Post.each_row_by_sql("select * from posts") { |hash| Post.process(hash) }
47
+ # Post.each_row_by_sql("select * from posts").count
48
+ #
49
+ # Returns the number of rows yielded to the block
50
+ def each_row_by_sql(sql, options={}, &block)
51
+ options = {:connection => self.connection}.merge(options)
52
+ cursor = PostgreSQLCursor::Cursor.new(sql, options)
53
+ return cursor.each_row(&block) if block_given?
54
+ cursor
55
+ end
56
+ alias :each_hash_by_sql :each_row_by_sql
57
+
58
+ # Public: Returns each row as a model instance to the given block
59
+ # As this instantiates a model object, it is slower than each_row_by_sql
60
+ #
61
+ # Paramaters: see each_row_by_sql
62
+ #
63
+ # Example:
64
+ # Post.each_instance_by_sql("select * from posts") { |post| post.process }
65
+ # Post.each_instance_by_sql("select * from posts").count
66
+ #
67
+ # Returns the number of rows yielded to the block
68
+ def each_instance_by_sql(sql, options={}, &block)
69
+ options = {:connection => self.connection}.merge(options)
70
+ cursor = PostgreSQLCursor::Cursor.new(sql, options)
71
+ return cursor.each_instance(self, &block) if block_given?
72
+ cursor.iterate_type(self)
73
+ end
74
+
75
+ # Returns and array of the given column names. Use if you need cursors and don't expect
76
+ # this to comsume too much memory. Values are strings. Like ActiveRecord's pluck.
77
+ def pluck_rows(*cols)
78
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
79
+ all.each_row(options).pluck(*cols)
80
+ end
81
+ alias :pluck_row :pluck_rows
82
+
83
+ # Returns and array of the given column names. Use if you need cursors and don't expect
84
+ # this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck.
85
+ def pluck_instances(*cols)
86
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
87
+ all.each_instance(options).pluck(*cols)
88
+ end
89
+ alias :pluck_instance :pluck_instances
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,199 @@
1
+ ################################################################################
2
+ # PostgreSQLCursor: library class provides postgresql cursor for large result
3
+ # set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
4
+ # If you don't use AR, this assumes #connection and #instantiate methods are available.
5
+ #
6
+ # options - Hash to control operation and loop breaks
7
+ # connection: instance - ActiveRecord connection to use
8
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
9
+ # block_size: 1..n - The number of rows to fetch per db block fetch
10
+ # while: value - Exits loop when block does not return this value.
11
+ # until: value - Exits loop when block returns this value.
12
+ #
13
+ # Exmaples:
14
+ # PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... }
15
+ # ActiveRecordModel.where(...).each_row { |hash| ... }
16
+ # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
17
+ # ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
18
+ #
19
+ module PostgreSQLCursor
20
+ class Cursor
21
+ include Enumerable
22
+ attr_reader :sql, :options, :connection, :count, :result
23
+ @@cursor_seq = 0
24
+
25
+ # Public: Start a new PostgreSQL cursor query
26
+ # sql - The SQL statement with interpolated values
27
+ # options - hash of processing controls
28
+ # while: value - Exits loop when block does not return this value.
29
+ # until: value - Exits loop when block returns this value.
30
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
31
+ # block_size: 1..n - The number of rows to fetch per db block fetch
32
+ # Defaults to 1000
33
+ #
34
+ # Examples
35
+ #
36
+ # PostgreSQLCursor::Cursor.new("select ....")
37
+ #
38
+ # Returns the cursor object when called with new.
39
+ def initialize(sql, options={})
40
+ @sql = sql
41
+ @options = options
42
+ @connection = @options.fetch(:connection) { ::ActiveRecord::Base.connection }
43
+ @count = 0
44
+ @iterate = options[:instances] ? :each_instance : :each_row
45
+ end
46
+
47
+ # Specify the type to instantiate, or reset to return a Hash
48
+ def iterate_type(type=nil)
49
+ if type.nil? || type == Hash
50
+ @iterate = :each_row
51
+ else
52
+ @iterate = :each_instance
53
+ @type = type
54
+ end
55
+ self
56
+ end
57
+
58
+ # Public: Yields each row of the result set to the passed block
59
+ #
60
+ # Yields the row to the block. The row is a hash with symbolized keys.
61
+ # {colname: value, ....}
62
+ #
63
+ # Returns the count of rows processed
64
+ def each(&block)
65
+ if @iterate == :each_row
66
+ self.each_row(&block)
67
+ else
68
+ self.each_instance(@type, &block)
69
+ end
70
+ end
71
+
72
+ def each_row(&block)
73
+ self.each_tuple do |row|
74
+ row = row.symbolize_keys if @options[:symbolize_keys]
75
+ block.call(row)
76
+ end
77
+ end
78
+
79
+ def each_instance(klass=nil, &block)
80
+ klass ||= @type
81
+ self.each_tuple do |row|
82
+ if ::ActiveRecord::VERSION::MAJOR < 4
83
+ model = klass.send(:instantiate,row)
84
+ else
85
+ @column_types ||= column_types
86
+ model = klass.send(:instantiate, row, @column_types)
87
+ end
88
+ block.call(model)
89
+ end
90
+ end
91
+
92
+ # Returns an array of columns plucked from the result rows.
93
+ # Experimental function, as this could still use too much memory
94
+ # and negate the purpose of this libarary.
95
+ # Should this return a lazy enumerator instead?
96
+ def pluck(*cols)
97
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
98
+ @options.merge!(options)
99
+ @options[:symbolize_keys] = true
100
+ self.iterate_type(options[:class]) if options[:class]
101
+ cols = cols.map {|c| c.to_sym }
102
+ result = []
103
+
104
+ self.each() do |row|
105
+ row = row.symbolize_keys if row.is_a?(Hash)
106
+ result << cols.map { |c| row[c] }
107
+ end
108
+
109
+ result.flatten! if cols.size == 1
110
+ result
111
+ end
112
+
113
+ def each_tuple(&block) #:nodoc:
114
+ has_do_until = @options.has_key?(:until)
115
+ has_do_while = @options.has_key?(:while)
116
+ @count = 0
117
+ @column_types = nil
118
+ @connection.transaction do
119
+ begin
120
+ open
121
+ while (row = fetch) do
122
+ break if row.size==0
123
+ @count += 1
124
+ rc = block.call(row)
125
+ break if has_do_until && rc == @options[:until]
126
+ break if has_do_while && rc != @options[:while]
127
+ end
128
+ rescue Exception => e
129
+ raise e
130
+ ensure
131
+ close
132
+ end
133
+ end
134
+ @count
135
+ end
136
+
137
+ def cast_types(row)
138
+ row
139
+ end
140
+
141
+ def column_types
142
+ return nil if ::ActiveRecord::VERSION::MAJOR < 4
143
+ return @column_types if @column_types
144
+
145
+ types = {}
146
+ fields = @result.fields
147
+ fields.each_with_index do |fname, i|
148
+ ftype = @result.ftype i
149
+ fmod = @result.fmod i
150
+ types[fname] = @connection.get_type_map.fetch(ftype, fmod) { |oid, mod|
151
+ warn "unknown OID: #{fname}(#{oid}) (#{sql})"
152
+ OID::Identity.new
153
+ }
154
+ end
155
+
156
+ @column_types = types
157
+ end
158
+
159
+ # Public: Opens (actually, "declares") the cursor. Call this before fetching
160
+ def open
161
+ set_cursor_tuple_fraction
162
+ @cursor = @@cursor_seq += 1
163
+ @result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}")
164
+ @block = []
165
+ end
166
+
167
+ # Public: Returns the next row from the cursor, or empty hash if end of results
168
+ #
169
+ # Returns a row as a hash of {'colname'=>value,...}
170
+ def fetch
171
+ fetch_block if @block.size==0
172
+ @block.shift
173
+ end
174
+
175
+ # Private: Fetches the next block of rows into @block
176
+ def fetch_block(block_size=nil)
177
+ block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
178
+ @result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
179
+ @block = @result.collect {|row| row } # Make our own
180
+ end
181
+
182
+ # Public: Closes the cursor
183
+ def close
184
+ @connection.execute("close cursor_#{@cursor}")
185
+ end
186
+
187
+ # Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched
188
+ # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0)
189
+ # used to determine the expected fraction (percent) of result rows returned the the caller.
190
+ # This value determines the access path by the query planner.
191
+ def set_cursor_tuple_fraction(frac=1.0)
192
+ @cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 }
193
+ return @cursor_tuple_fraction if frac == @cursor_tuple_fraction
194
+ @cursor_tuple_fraction = frac
195
+ @result = @connection.execute("set cursor_tuple_fraction to #{frac}")
196
+ frac
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,3 @@
1
+ module PostgresqlCursor
2
+ VERSION = "0.5.0"
3
+ end
@@ -1,48 +1,27 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
- # stub: postgresql_cursor 0.4.3 ruby lib
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'postgresql_cursor/version'
6
5
 
7
- Gem::Specification.new do |s|
8
- s.name = "postgresql_cursor"
9
- s.version = "0.4.3"
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "postgresql_cursor"
8
+ spec.version = PostgresqlCursor::VERSION
9
+ spec.authors = ["Allen Fair"]
10
+ spec.email = ["allen.fair@gmail.com"]
11
+ spec.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
12
+ spec.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 1_000 rows), buffers them, and returns the rows one at a time."
13
+ spec.homepage = "http://github.com/afair/postgresql_cursor"
14
+ spec.license = "MIT"
10
15
 
11
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
- s.require_paths = ["lib"]
13
- s.authors = ["Allen Fair"]
14
- s.date = "2014-06-06"
15
- s.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time."
16
- s.email = "allen.fair@gmail.com"
17
- s.extra_rdoc_files = [
18
- "LICENSE",
19
- "README.rdoc"
20
- ]
21
- s.files = [
22
- ".document",
23
- "LICENSE",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/postgresql_cursor.rb",
28
- "postgresql_cursor.gemspec",
29
- "test/helper.rb",
30
- "test/test_postgresql_cursor.rb"
31
- ]
32
- s.homepage = "http://github.com/afair/postgresql_cursor"
33
- s.rubygems_version = "2.2.1"
34
- s.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
35
20
 
36
- if s.respond_to? :specification_version then
37
- s.specification_version = 4
21
+ #spec.add_dependency "pg" # Remove this for jruby, which should specify 'activerecord-jdbcpostgresql-adapter'
22
+ spec.add_dependency "activerecord", ">= 3.2.0"
38
23
 
39
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
40
- s.add_runtime_dependency(%q<activerecord>, [">= 0"])
41
- else
42
- s.add_dependency(%q<activerecord>, [">= 0"])
43
- end
44
- else
45
- s.add_dependency(%q<activerecord>, [">= 0"])
46
- end
24
+ spec.add_development_dependency "pg"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "minitest"
47
27
  end
48
-