active_cursor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +29 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +66 -0
- data/README.md +72 -0
- data/Rakefile +8 -0
- data/active_cursor.gemspec +33 -0
- data/assets/objects.png +0 -0
- data/assets/time.png +0 -0
- data/bin/benchmark +94 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/active_cursor.rb +71 -0
- data/lib/active_cursor/version.rb +5 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: aa6257797955fe7432710d3282f7ec6b483ba7ec4b7ab594744ba651ab760299
|
4
|
+
data.tar.gz: 7584a37351f4f530adcc308ca173fcc64bc9051d1418d910177cfeb06f6ba5e9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6880d4c1d207ccbbf5055f856850734f17953f948d06923e29bb8d5409de74e2c36deedb96888209c6dc017da940138d77cfc2ba159822c4ec3bdcd1b3417fed
|
7
|
+
data.tar.gz: 34b5f1209b38f6bd0dc58a271ab37866c8fc207000bd53a350f98153ac43bddc8ee74c959ab5c0f96eda5a7fe76bc190781794d1fd16de4cbb0f6c3d7656917b
|
@@ -0,0 +1,29 @@
|
|
1
|
+
name: Ruby
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
services:
|
9
|
+
postgres:
|
10
|
+
image: postgres:13-alpine
|
11
|
+
ports:
|
12
|
+
- 5432:5432
|
13
|
+
env:
|
14
|
+
POSTGRES_HOST_AUTH_METHOD: trust
|
15
|
+
options: >-
|
16
|
+
--health-cmd pg_isready
|
17
|
+
--health-interval 10s
|
18
|
+
--health-timeout 5s
|
19
|
+
--health-retries 5
|
20
|
+
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
- name: Set up Ruby
|
24
|
+
uses: ruby/setup-ruby@v1
|
25
|
+
with:
|
26
|
+
ruby-version: 3.0.0
|
27
|
+
bundler-cache: true
|
28
|
+
- name: Run the default task
|
29
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in active_cursor.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem "rake", "~> 13.0"
|
9
|
+
gem "rspec", "~> 3.0"
|
10
|
+
gem "pg"
|
11
|
+
gem "sqlite3"
|
12
|
+
gem "gruff"
|
13
|
+
gem "memory_profiler"
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
active_cursor (0.1.0)
|
5
|
+
activerecord (~> 6.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (6.1.4)
|
11
|
+
activesupport (= 6.1.4)
|
12
|
+
activerecord (6.1.4)
|
13
|
+
activemodel (= 6.1.4)
|
14
|
+
activesupport (= 6.1.4)
|
15
|
+
activesupport (6.1.4)
|
16
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
17
|
+
i18n (>= 1.6, < 2)
|
18
|
+
minitest (>= 5.1)
|
19
|
+
tzinfo (~> 2.0)
|
20
|
+
zeitwerk (~> 2.3)
|
21
|
+
concurrent-ruby (1.1.9)
|
22
|
+
diff-lcs (1.4.4)
|
23
|
+
gruff (0.14.0)
|
24
|
+
histogram
|
25
|
+
rmagick
|
26
|
+
histogram (0.2.4.1)
|
27
|
+
i18n (1.8.10)
|
28
|
+
concurrent-ruby (~> 1.0)
|
29
|
+
memory_profiler (1.0.0)
|
30
|
+
minitest (5.14.4)
|
31
|
+
pg (1.2.3)
|
32
|
+
rake (13.0.6)
|
33
|
+
rmagick (4.2.2)
|
34
|
+
rspec (3.10.0)
|
35
|
+
rspec-core (~> 3.10.0)
|
36
|
+
rspec-expectations (~> 3.10.0)
|
37
|
+
rspec-mocks (~> 3.10.0)
|
38
|
+
rspec-core (3.10.1)
|
39
|
+
rspec-support (~> 3.10.0)
|
40
|
+
rspec-expectations (3.10.1)
|
41
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
42
|
+
rspec-support (~> 3.10.0)
|
43
|
+
rspec-mocks (3.10.2)
|
44
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
45
|
+
rspec-support (~> 3.10.0)
|
46
|
+
rspec-support (3.10.2)
|
47
|
+
sqlite3 (1.4.2)
|
48
|
+
tzinfo (2.0.4)
|
49
|
+
concurrent-ruby (~> 1.0)
|
50
|
+
zeitwerk (2.4.2)
|
51
|
+
|
52
|
+
PLATFORMS
|
53
|
+
ruby
|
54
|
+
x86_64-darwin-20
|
55
|
+
|
56
|
+
DEPENDENCIES
|
57
|
+
active_cursor!
|
58
|
+
gruff
|
59
|
+
memory_profiler
|
60
|
+
pg
|
61
|
+
rake (~> 13.0)
|
62
|
+
rspec (~> 3.0)
|
63
|
+
sqlite3
|
64
|
+
|
65
|
+
BUNDLED WITH
|
66
|
+
2.2.16
|
data/README.md
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
# ActiveCursor
|
2
|
+
|
3
|
+
This gem adds support for cursors to Active Record. This library only supports
|
4
|
+
PostgreSQL.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'active_cursor'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle install
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install active_cursor
|
21
|
+
|
22
|
+
## Why?
|
23
|
+
|
24
|
+
Sometimes you need to process a huge amount of data, but loading the entire
|
25
|
+
dataset into memory isn't possible.
|
26
|
+
|
27
|
+
In those cases, you'll usually reach for Active Record's `find_each` method, which
|
28
|
+
will only load records in batches.
|
29
|
+
|
30
|
+
Unfortunately, `find_each` requires that each record in the dataset has a unique,
|
31
|
+
integer ID. That's not always possible. Enter cursors.
|
32
|
+
|
33
|
+
## Usage
|
34
|
+
|
35
|
+
Extend your `ApplicationRecord` with `ActiveCursor::QueryMethods`:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
class ApplicationRecord < ActiveRecord::Base
|
39
|
+
extend ActiveCursor::QueryMethods
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
Now, you're ready to start cursing.
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
User.cursor.each { |user| ... }
|
47
|
+
User.cursor.each_row { |attributes| ... }
|
48
|
+
User.select(:id, :name).cursor.each_tuple { |id, name| ... }
|
49
|
+
```
|
50
|
+
|
51
|
+
By default, this will load 1,000 records at a time from the database. You can
|
52
|
+
change that by specifying the batch size:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
User.cursor(batch_size: 10).each { |user| ... }
|
56
|
+
```
|
57
|
+
|
58
|
+
All methods return enumerables when no block is given, so you can use the full power of Ruby's Enumerable:
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
User.cursor.each.find { |user| user.name == "Rick" }
|
62
|
+
```
|
63
|
+
|
64
|
+
## Development
|
65
|
+
|
66
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
67
|
+
|
68
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
69
|
+
|
70
|
+
## Contributing
|
71
|
+
|
72
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/rzane/active_cursor.
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/active_cursor/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "active_cursor"
|
7
|
+
spec.version = ActiveCursor::VERSION
|
8
|
+
spec.authors = ["Ray Zane"]
|
9
|
+
spec.email = ["raymondzane@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "Adds support for cursors to Active Record."
|
12
|
+
spec.description = "Efficiently iterate through massive collections in your database."
|
13
|
+
spec.homepage = "https://github.com/rzane/active_cursor"
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
|
15
|
+
|
16
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
17
|
+
spec.metadata["source_code_uri"] = "https://github.com/rzane/active_cursor"
|
18
|
+
spec.metadata["changelog_uri"] = "https://github.com/rzane/active_cursor/releases"
|
19
|
+
|
20
|
+
# Specify which files should be added to the gem when it is released.
|
21
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
24
|
+
end
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
|
29
|
+
spec.add_dependency "activerecord", "~> 6.0"
|
30
|
+
|
31
|
+
# For more information and examples about making a new gem, checkout our
|
32
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
33
|
+
end
|
data/assets/objects.png
ADDED
Binary file
|
data/assets/time.png
ADDED
Binary file
|
data/bin/benchmark
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "gruff"
|
5
|
+
require "benchmark"
|
6
|
+
require "memory_profiler"
|
7
|
+
require_relative "../spec/support/database"
|
8
|
+
|
9
|
+
Database.create
|
10
|
+
Database.connect
|
11
|
+
Database.migrate
|
12
|
+
|
13
|
+
COUNTS = (1..5).map { _1 * 200_000 }
|
14
|
+
LABELS = (1..5).to_h { [_1 - 1, _1 * 2] }
|
15
|
+
TIME = Hash.new { |h, k| h[k] = [] }
|
16
|
+
OBJECTS = Hash.new { |h, k| h[k] = [] }
|
17
|
+
|
18
|
+
def seed(count)
|
19
|
+
Widget.delete_all
|
20
|
+
Widget.connection.execute <<~SQL
|
21
|
+
INSERT INTO widgets (value, name, timestamp)
|
22
|
+
SELECT generate_series(0, #{count}), gen_random_uuid(), CURRENT_TIMESTAMP;
|
23
|
+
SQL
|
24
|
+
end
|
25
|
+
|
26
|
+
def measure(name, &block)
|
27
|
+
memory = MemoryProfiler.report do
|
28
|
+
TIME[name] << Benchmark.realtime(&block)
|
29
|
+
end
|
30
|
+
|
31
|
+
OBJECTS[name] << memory.total_allocated
|
32
|
+
end
|
33
|
+
|
34
|
+
def chart(data:, name:, unit:, filename:)
|
35
|
+
chart = Gruff::Bar.new
|
36
|
+
chart.title = name
|
37
|
+
chart.labels = LABELS
|
38
|
+
chart.y_axis_label = unit
|
39
|
+
chart.x_axis_label = "Records (x100,000)"
|
40
|
+
|
41
|
+
data.each do |name, values|
|
42
|
+
chart.data(name, values)
|
43
|
+
end
|
44
|
+
|
45
|
+
chart.write(filename)
|
46
|
+
end
|
47
|
+
|
48
|
+
COUNTS.each do |count|
|
49
|
+
puts count
|
50
|
+
seed(count)
|
51
|
+
|
52
|
+
puts "* to_a"
|
53
|
+
measure "to_a" do
|
54
|
+
Widget.all.each {}
|
55
|
+
end
|
56
|
+
|
57
|
+
puts "* pluck"
|
58
|
+
measure "pluck" do
|
59
|
+
Widget.pluck(:id, :value, :name, :timestamp)
|
60
|
+
end
|
61
|
+
|
62
|
+
puts "* find_each"
|
63
|
+
measure "find_each" do
|
64
|
+
Widget.find_each {}
|
65
|
+
end
|
66
|
+
|
67
|
+
puts "* cursor.each"
|
68
|
+
measure "cursor.each" do
|
69
|
+
Widget.cursor.each {}
|
70
|
+
end
|
71
|
+
|
72
|
+
puts "* cursor.each_row"
|
73
|
+
measure "cursor.each_row" do
|
74
|
+
Widget.cursor.each_row {}
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "* cursor.each_tuple"
|
78
|
+
measure "cursor.each_tuple" do
|
79
|
+
Widget.cursor.each_tuple {}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
chart(
|
84
|
+
data: SECONDS,
|
85
|
+
name: "Time",
|
86
|
+
unit: "Seconds",
|
87
|
+
filename: "assets/time.png"
|
88
|
+
)
|
89
|
+
chart(
|
90
|
+
data: OBJECTS,
|
91
|
+
name: "Allocated Objects",
|
92
|
+
unit: "Objects",
|
93
|
+
filename: "assets/objects.png"
|
94
|
+
)
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "active_cursor"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "active_cursor/version"
|
4
|
+
require "active_support/core_ext/module/delegation"
|
5
|
+
|
6
|
+
class ActiveCursor
|
7
|
+
module QueryMethods
|
8
|
+
def cursor(...)
|
9
|
+
ActiveCursor.new(all, ...)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(relation, batch_size: 1_000)
|
14
|
+
@relation = relation
|
15
|
+
@batch_size = batch_size
|
16
|
+
end
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
return enum_for(__method__) unless block_given?
|
20
|
+
|
21
|
+
iterate do |name|
|
22
|
+
records = model.find_by_sql("FETCH #{batch_size} FROM #{name}")
|
23
|
+
records.each(&block)
|
24
|
+
records.length
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def each_row(&block)
|
29
|
+
return enum_for(__method__) unless block_given?
|
30
|
+
|
31
|
+
iterate do |name|
|
32
|
+
result = connection.execute("FETCH #{batch_size} FROM #{name}")
|
33
|
+
result.each(&block)
|
34
|
+
result.ntuples
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def each_tuple(&block)
|
39
|
+
return enum_for(__method__) unless block_given?
|
40
|
+
|
41
|
+
iterate do |name|
|
42
|
+
result = connection.execute("FETCH #{batch_size} FROM #{name}")
|
43
|
+
result.each_row(&block)
|
44
|
+
result.ntuples
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
attr_reader :relation, :batch_size
|
51
|
+
|
52
|
+
delegate :model, :connection, to: :relation
|
53
|
+
|
54
|
+
def sql
|
55
|
+
connection.unprepared_statement { relation.to_sql }
|
56
|
+
end
|
57
|
+
|
58
|
+
def iterate
|
59
|
+
name = "cursor_#{SecureRandom.uuid.tr("-", "_")}"
|
60
|
+
|
61
|
+
connection.transaction do
|
62
|
+
connection.execute("DECLARE #{name} NO SCROLL CURSOR FOR #{sql}")
|
63
|
+
|
64
|
+
begin
|
65
|
+
loop until yield(name) < batch_size
|
66
|
+
ensure
|
67
|
+
connection.execute("CLOSE #{name}")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: active_cursor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ray Zane
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '6.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '6.0'
|
27
|
+
description: Efficiently iterate through massive collections in your database.
|
28
|
+
email:
|
29
|
+
- raymondzane@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- ".github/workflows/main.yml"
|
35
|
+
- ".gitignore"
|
36
|
+
- ".rspec"
|
37
|
+
- Gemfile
|
38
|
+
- Gemfile.lock
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- active_cursor.gemspec
|
42
|
+
- assets/objects.png
|
43
|
+
- assets/time.png
|
44
|
+
- bin/benchmark
|
45
|
+
- bin/console
|
46
|
+
- bin/setup
|
47
|
+
- lib/active_cursor.rb
|
48
|
+
- lib/active_cursor/version.rb
|
49
|
+
homepage: https://github.com/rzane/active_cursor
|
50
|
+
licenses: []
|
51
|
+
metadata:
|
52
|
+
homepage_uri: https://github.com/rzane/active_cursor
|
53
|
+
source_code_uri: https://github.com/rzane/active_cursor
|
54
|
+
changelog_uri: https://github.com/rzane/active_cursor/releases
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 3.0.0
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.2.3
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Adds support for cursors to Active Record.
|
74
|
+
test_files: []
|