postgresql_cursor 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ # lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map
2
+ module PostgreSQLCursor
3
+ module ActiveRecord
4
+ module ConnectionAdapters
5
+ module PostgreSQLTypeMap
6
+ # Returns the private "type_map" needed for the cursor operation
7
+ def get_type_map # :nodoc:
8
+ if ::ActiveRecord::VERSION::MAJOR == 4 && ::ActiveRecord::VERSION::MINOR == 0
9
+ ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::OID::TYPE_MAP
10
+ else
11
+ type_map
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,64 @@
1
+ # Defines extension to ActiveRecord/AREL to use this library
2
+ module PostgreSQLCursor
3
+ module ActiveRecord
4
+ module Relation
5
+ module CursorIterators
6
+
7
+ # Public: Executes the query, returning each row as a hash
8
+ # to the given block.
9
+ #
10
+ # options - Hash to control
11
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
12
+ # block_size: 1..n - The number of rows to fetch per db block fetch
13
+ # while: value - Exits loop when block does not return this value.
14
+ # until: value - Exits loop when block returns this value.
15
+ #
16
+ # Example:
17
+ # Post.where(user_id:123).each_row { |hash| Post.process(hash) }
18
+ # Post.each_row.map {|r| r["id"].to_i }
19
+ #
20
+ # Returns the number of rows yielded to the block
21
+ def each_row(options={}, &block)
22
+ options = {:connection => self.connection}.merge(options)
23
+ cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
24
+ return cursor.each_row(&block) if block_given?
25
+ cursor
26
+ end
27
+ alias :each_hash :each_row
28
+
29
+ # Public: Like each_row, but returns an instantiated model object to the block
30
+ #
31
+ # Paramaters: same as each_row
32
+ #
33
+ # Example:
34
+ # Post.where(user_id:123).each_instance { |post| post.process }
35
+ # Post.where(user_id:123).each_instance.map { |post| post.process }
36
+ #
37
+ # Returns the number of rows yielded to the block
38
+ def each_instance(options={}, &block)
39
+ options = {:connection => self.connection}.merge(options)
40
+ cursor = PostgreSQLCursor::Cursor.new(to_sql, options)
41
+ return cursor.each_instance(self, &block) if block_given?
42
+ cursor.iterate_type(self)
43
+ end
44
+
45
+ # Plucks the column names from the rows, and return them in an array
46
+ def pluck_rows(*cols)
47
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
48
+ options[:connection] = self.connection
49
+ self.each_row(options).pluck(*cols)
50
+ end
51
+ alias :pluck_row :pluck_rows
52
+
53
+ # Plucks the column names from the instances, and return them in an array
54
+ def pluck_instances(*cols)
55
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
56
+ options[:connection] = self.connection
57
+ self.each_instance(options).pluck(*cols)
58
+ end
59
+ alias :pluck_instance :pluck_instances
60
+
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,92 @@
1
+ module PostgreSQLCursor
2
+ module ActiveRecord
3
+ module SqlCursor
4
+ # Public: Executes the query, returning each row as a hash
5
+ # to the given block.
6
+ #
7
+ # options - Hash to control
8
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
9
+ # block_size: 1..n - The number of rows to fetch per db block fetch
10
+ # while: value - Exits loop when block does not return this value.
11
+ # until: value - Exits loop when block returns this value.
12
+ #
13
+ # Example:
14
+ # Post.each_row { |hash| Post.process(hash) }
15
+ #
16
+ # Returns the number of rows yielded to the block
17
+ def each_row(options={}, &block)
18
+ options = {:connection => self.connection}.merge(options)
19
+ all.each_row(options, &block)
20
+ end
21
+ alias :each_hash :each_row
22
+
23
+ # Public: Like each_row, but returns an instantiated model object to the block
24
+ #
25
+ # Paramaters: same as each_row
26
+ #
27
+ # Example:
28
+ # Post.each_instance { |post| post.process }
29
+ #
30
+ # Returns the number of rows yielded to the block
31
+ def each_instance(options={}, &block)
32
+ options = {:connection => self.connection}.merge(options)
33
+ all.each_instance(options, &block)
34
+ end
35
+
36
+ # Public: Returns each row as a hash to the given block
37
+
38
+ # sql - Full SQL statement, variables interpolated
39
+ # options - Hash to control
40
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
41
+ # block_size: 1..n - The number of rows to fetch per db block fetch
42
+ # while: value - Exits loop when block does not return this value.
43
+ # until: value - Exits loop when block returns this value.
44
+ #
45
+ # Example:
46
+ # Post.each_row_by_sql("select * from posts") { |hash| Post.process(hash) }
47
+ # Post.each_row_by_sql("select * from posts").count
48
+ #
49
+ # Returns the number of rows yielded to the block
50
+ def each_row_by_sql(sql, options={}, &block)
51
+ options = {:connection => self.connection}.merge(options)
52
+ cursor = PostgreSQLCursor::Cursor.new(sql, options)
53
+ return cursor.each_row(&block) if block_given?
54
+ cursor
55
+ end
56
+ alias :each_hash_by_sql :each_row_by_sql
57
+
58
+ # Public: Returns each row as a model instance to the given block
59
+ # As this instantiates a model object, it is slower than each_row_by_sql
60
+ #
61
+ # Paramaters: see each_row_by_sql
62
+ #
63
+ # Example:
64
+ # Post.each_instance_by_sql("select * from posts") { |post| post.process }
65
+ # Post.each_instance_by_sql("select * from posts").count
66
+ #
67
+ # Returns the number of rows yielded to the block
68
+ def each_instance_by_sql(sql, options={}, &block)
69
+ options = {:connection => self.connection}.merge(options)
70
+ cursor = PostgreSQLCursor::Cursor.new(sql, options)
71
+ return cursor.each_instance(self, &block) if block_given?
72
+ cursor.iterate_type(self)
73
+ end
74
+
75
+ # Returns and array of the given column names. Use if you need cursors and don't expect
76
+ # this to comsume too much memory. Values are strings. Like ActiveRecord's pluck.
77
+ def pluck_rows(*cols)
78
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
79
+ all.each_row(options).pluck(*cols)
80
+ end
81
+ alias :pluck_row :pluck_rows
82
+
83
+ # Returns and array of the given column names. Use if you need cursors and don't expect
84
+ # this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck.
85
+ def pluck_instances(*cols)
86
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
87
+ all.each_instance(options).pluck(*cols)
88
+ end
89
+ alias :pluck_instance :pluck_instances
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,199 @@
1
+ ################################################################################
2
+ # PostgreSQLCursor: library class provides postgresql cursor for large result
3
+ # set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries.
4
+ # If you don't use AR, this assumes #connection and #instantiate methods are available.
5
+ #
6
+ # options - Hash to control operation and loop breaks
7
+ # connection: instance - ActiveRecord connection to use
8
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
9
+ # block_size: 1..n - The number of rows to fetch per db block fetch
10
+ # while: value - Exits loop when block does not return this value.
11
+ # until: value - Exits loop when block returns this value.
12
+ #
13
+ # Exmaples:
14
+ # PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... }
15
+ # ActiveRecordModel.where(...).each_row { |hash| ... }
16
+ # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... }
17
+ # ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... }
18
+ #
19
+ module PostgreSQLCursor
20
+ class Cursor
21
+ include Enumerable
22
+ attr_reader :sql, :options, :connection, :count, :result
23
+ @@cursor_seq = 0
24
+
25
+ # Public: Start a new PostgreSQL cursor query
26
+ # sql - The SQL statement with interpolated values
27
+ # options - hash of processing controls
28
+ # while: value - Exits loop when block does not return this value.
29
+ # until: value - Exits loop when block returns this value.
30
+ # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0)
31
+ # block_size: 1..n - The number of rows to fetch per db block fetch
32
+ # Defaults to 1000
33
+ #
34
+ # Examples
35
+ #
36
+ # PostgreSQLCursor::Cursor.new("select ....")
37
+ #
38
+ # Returns the cursor object when called with new.
39
+ def initialize(sql, options={})
40
+ @sql = sql
41
+ @options = options
42
+ @connection = @options.fetch(:connection) { ::ActiveRecord::Base.connection }
43
+ @count = 0
44
+ @iterate = options[:instances] ? :each_instance : :each_row
45
+ end
46
+
47
+ # Specify the type to instantiate, or reset to return a Hash
48
+ def iterate_type(type=nil)
49
+ if type.nil? || type == Hash
50
+ @iterate = :each_row
51
+ else
52
+ @iterate = :each_instance
53
+ @type = type
54
+ end
55
+ self
56
+ end
57
+
58
+ # Public: Yields each row of the result set to the passed block
59
+ #
60
+ # Yields the row to the block. The row is a hash with symbolized keys.
61
+ # {colname: value, ....}
62
+ #
63
+ # Returns the count of rows processed
64
+ def each(&block)
65
+ if @iterate == :each_row
66
+ self.each_row(&block)
67
+ else
68
+ self.each_instance(@type, &block)
69
+ end
70
+ end
71
+
72
+ def each_row(&block)
73
+ self.each_tuple do |row|
74
+ row = row.symbolize_keys if @options[:symbolize_keys]
75
+ block.call(row)
76
+ end
77
+ end
78
+
79
+ def each_instance(klass=nil, &block)
80
+ klass ||= @type
81
+ self.each_tuple do |row|
82
+ if ::ActiveRecord::VERSION::MAJOR < 4
83
+ model = klass.send(:instantiate,row)
84
+ else
85
+ @column_types ||= column_types
86
+ model = klass.send(:instantiate, row, @column_types)
87
+ end
88
+ block.call(model)
89
+ end
90
+ end
91
+
92
+ # Returns an array of columns plucked from the result rows.
93
+ # Experimental function, as this could still use too much memory
94
+ # and negate the purpose of this libarary.
95
+ # Should this return a lazy enumerator instead?
96
+ def pluck(*cols)
97
+ options = cols.last.is_a?(Hash) ? cols.pop : {}
98
+ @options.merge!(options)
99
+ @options[:symbolize_keys] = true
100
+ self.iterate_type(options[:class]) if options[:class]
101
+ cols = cols.map {|c| c.to_sym }
102
+ result = []
103
+
104
+ self.each() do |row|
105
+ row = row.symbolize_keys if row.is_a?(Hash)
106
+ result << cols.map { |c| row[c] }
107
+ end
108
+
109
+ result.flatten! if cols.size == 1
110
+ result
111
+ end
112
+
113
+ def each_tuple(&block) #:nodoc:
114
+ has_do_until = @options.has_key?(:until)
115
+ has_do_while = @options.has_key?(:while)
116
+ @count = 0
117
+ @column_types = nil
118
+ @connection.transaction do
119
+ begin
120
+ open
121
+ while (row = fetch) do
122
+ break if row.size==0
123
+ @count += 1
124
+ rc = block.call(row)
125
+ break if has_do_until && rc == @options[:until]
126
+ break if has_do_while && rc != @options[:while]
127
+ end
128
+ rescue Exception => e
129
+ raise e
130
+ ensure
131
+ close
132
+ end
133
+ end
134
+ @count
135
+ end
136
+
137
+ def cast_types(row)
138
+ row
139
+ end
140
+
141
+ def column_types
142
+ return nil if ::ActiveRecord::VERSION::MAJOR < 4
143
+ return @column_types if @column_types
144
+
145
+ types = {}
146
+ fields = @result.fields
147
+ fields.each_with_index do |fname, i|
148
+ ftype = @result.ftype i
149
+ fmod = @result.fmod i
150
+ types[fname] = @connection.get_type_map.fetch(ftype, fmod) { |oid, mod|
151
+ warn "unknown OID: #{fname}(#{oid}) (#{sql})"
152
+ OID::Identity.new
153
+ }
154
+ end
155
+
156
+ @column_types = types
157
+ end
158
+
159
+ # Public: Opens (actually, "declares") the cursor. Call this before fetching
160
+ def open
161
+ set_cursor_tuple_fraction
162
+ @cursor = @@cursor_seq += 1
163
+ @result = @connection.execute("declare cursor_#{@cursor} cursor for #{@sql}")
164
+ @block = []
165
+ end
166
+
167
+ # Public: Returns the next row from the cursor, or empty hash if end of results
168
+ #
169
+ # Returns a row as a hash of {'colname'=>value,...}
170
+ def fetch
171
+ fetch_block if @block.size==0
172
+ @block.shift
173
+ end
174
+
175
+ # Private: Fetches the next block of rows into @block
176
+ def fetch_block(block_size=nil)
177
+ block_size ||= @block_size ||= @options.fetch(:block_size) { 1000 }
178
+ @result = @connection.execute("fetch #{block_size} from cursor_#{@cursor}")
179
+ @block = @result.collect {|row| row } # Make our own
180
+ end
181
+
182
+ # Public: Closes the cursor
183
+ def close
184
+ @connection.execute("close cursor_#{@cursor}")
185
+ end
186
+
187
+ # Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched
188
+ # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0)
189
+ # used to determine the expected fraction (percent) of result rows returned the the caller.
190
+ # This value determines the access path by the query planner.
191
+ def set_cursor_tuple_fraction(frac=1.0)
192
+ @cursor_tuple_fraction ||= @options.fetch(:fraction) { 1.0 }
193
+ return @cursor_tuple_fraction if frac == @cursor_tuple_fraction
194
+ @cursor_tuple_fraction = frac
195
+ @result = @connection.execute("set cursor_tuple_fraction to #{frac}")
196
+ frac
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,3 @@
1
+ module PostgresqlCursor
2
+ VERSION = "0.5.0"
3
+ end
@@ -1,48 +1,27 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
- # stub: postgresql_cursor 0.4.3 ruby lib
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'postgresql_cursor/version'
6
5
 
7
- Gem::Specification.new do |s|
8
- s.name = "postgresql_cursor"
9
- s.version = "0.4.3"
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "postgresql_cursor"
8
+ spec.version = PostgresqlCursor::VERSION
9
+ spec.authors = ["Allen Fair"]
10
+ spec.email = ["allen.fair@gmail.com"]
11
+ spec.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
12
+ spec.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 1_000 rows), buffers them, and returns the rows one at a time."
13
+ spec.homepage = "http://github.com/afair/postgresql_cursor"
14
+ spec.license = "MIT"
10
15
 
11
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
- s.require_paths = ["lib"]
13
- s.authors = ["Allen Fair"]
14
- s.date = "2014-06-06"
15
- s.description = "PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for very large result sets. It provides a cursor open/fetch/close interface to access data without loading all rows into memory, and instead loads the result rows in \"chunks\" (default of 10_000 rows), buffers them, and returns the rows one at a time."
16
- s.email = "allen.fair@gmail.com"
17
- s.extra_rdoc_files = [
18
- "LICENSE",
19
- "README.rdoc"
20
- ]
21
- s.files = [
22
- ".document",
23
- "LICENSE",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/postgresql_cursor.rb",
28
- "postgresql_cursor.gemspec",
29
- "test/helper.rb",
30
- "test/test_postgresql_cursor.rb"
31
- ]
32
- s.homepage = "http://github.com/afair/postgresql_cursor"
33
- s.rubygems_version = "2.2.1"
34
- s.summary = "ActiveRecord PostgreSQL Adapter extension for using a cursor to return a large result set"
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
35
20
 
36
- if s.respond_to? :specification_version then
37
- s.specification_version = 4
21
+ #spec.add_dependency "pg" # Remove this for jruby, which should specify 'activerecord-jdbcpostgresql-adapter'
22
+ spec.add_dependency "activerecord", ">= 3.2.0"
38
23
 
39
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
40
- s.add_runtime_dependency(%q<activerecord>, [">= 0"])
41
- else
42
- s.add_dependency(%q<activerecord>, [">= 0"])
43
- end
44
- else
45
- s.add_dependency(%q<activerecord>, [">= 0"])
46
- end
24
+ spec.add_development_dependency "pg"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "minitest"
47
27
  end
48
-