active_cursor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: aa6257797955fe7432710d3282f7ec6b483ba7ec4b7ab594744ba651ab760299
4
+ data.tar.gz: 7584a37351f4f530adcc308ca173fcc64bc9051d1418d910177cfeb06f6ba5e9
5
+ SHA512:
6
+ metadata.gz: 6880d4c1d207ccbbf5055f856850734f17953f948d06923e29bb8d5409de74e2c36deedb96888209c6dc017da940138d77cfc2ba159822c4ec3bdcd1b3417fed
7
+ data.tar.gz: 34b5f1209b38f6bd0dc58a271ab37866c8fc207000bd53a350f98153ac43bddc8ee74c959ab5c0f96eda5a7fe76bc190781794d1fd16de4cbb0f6c3d7656917b
@@ -0,0 +1,29 @@
1
+ name: Ruby
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ services:
9
+ postgres:
10
+ image: postgres:13-alpine
11
+ ports:
12
+ - 5432:5432
13
+ env:
14
+ POSTGRES_HOST_AUTH_METHOD: trust
15
+ options: >-
16
+ --health-cmd pg_isready
17
+ --health-interval 10s
18
+ --health-timeout 5s
19
+ --health-retries 5
20
+
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ - name: Set up Ruby
24
+ uses: ruby/setup-ruby@v1
25
+ with:
26
+ ruby-version: 3.0.0
27
+ bundler-cache: true
28
+ - name: Run the default task
29
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in active_cursor.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+ gem "rspec", "~> 3.0"
10
+ gem "pg"
11
+ gem "sqlite3"
12
+ gem "gruff"
13
+ gem "memory_profiler"
data/Gemfile.lock ADDED
@@ -0,0 +1,66 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ active_cursor (0.1.0)
5
+ activerecord (~> 6.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activemodel (6.1.4)
11
+ activesupport (= 6.1.4)
12
+ activerecord (6.1.4)
13
+ activemodel (= 6.1.4)
14
+ activesupport (= 6.1.4)
15
+ activesupport (6.1.4)
16
+ concurrent-ruby (~> 1.0, >= 1.0.2)
17
+ i18n (>= 1.6, < 2)
18
+ minitest (>= 5.1)
19
+ tzinfo (~> 2.0)
20
+ zeitwerk (~> 2.3)
21
+ concurrent-ruby (1.1.9)
22
+ diff-lcs (1.4.4)
23
+ gruff (0.14.0)
24
+ histogram
25
+ rmagick
26
+ histogram (0.2.4.1)
27
+ i18n (1.8.10)
28
+ concurrent-ruby (~> 1.0)
29
+ memory_profiler (1.0.0)
30
+ minitest (5.14.4)
31
+ pg (1.2.3)
32
+ rake (13.0.6)
33
+ rmagick (4.2.2)
34
+ rspec (3.10.0)
35
+ rspec-core (~> 3.10.0)
36
+ rspec-expectations (~> 3.10.0)
37
+ rspec-mocks (~> 3.10.0)
38
+ rspec-core (3.10.1)
39
+ rspec-support (~> 3.10.0)
40
+ rspec-expectations (3.10.1)
41
+ diff-lcs (>= 1.2.0, < 2.0)
42
+ rspec-support (~> 3.10.0)
43
+ rspec-mocks (3.10.2)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-support (3.10.2)
47
+ sqlite3 (1.4.2)
48
+ tzinfo (2.0.4)
49
+ concurrent-ruby (~> 1.0)
50
+ zeitwerk (2.4.2)
51
+
52
+ PLATFORMS
53
+ ruby
54
+ x86_64-darwin-20
55
+
56
+ DEPENDENCIES
57
+ active_cursor!
58
+ gruff
59
+ memory_profiler
60
+ pg
61
+ rake (~> 13.0)
62
+ rspec (~> 3.0)
63
+ sqlite3
64
+
65
+ BUNDLED WITH
66
+ 2.2.16
data/README.md ADDED
@@ -0,0 +1,72 @@
1
+ # ActiveCursor
2
+
3
+ This gem adds support for cursors to Active Record. This library only supports
4
+ PostgreSQL.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'active_cursor'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle install
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install active_cursor
21
+
22
+ ## Why?
23
+
24
+ Sometimes you need to process a huge amount of data, but loading the entire
25
+ dataset into memory isn't possible.
26
+
27
+ In those cases, you'll usually reach for Active Record's `find_each` method, which
28
+ will only load records in batches.
29
+
30
+ Unfortunately, `find_each` requires that each record in the dataset has a unique,
31
+ integer ID. That's not always possible. Enter cursors.
32
+
33
+ ## Usage
34
+
35
+ Extend your `ApplicationRecord` with `ActiveCursor::QueryMethods`:
36
+
37
+ ```ruby
38
+ class ApplicationRecord < ActiveRecord::Base
39
+ extend ActiveCursor::QueryMethods
40
+ end
41
+ ```
42
+
43
+ Now, you're ready to start cursing.
44
+
45
+ ```ruby
46
+ User.cursor.each { |user| ... }
47
+ User.cursor.each_row { |attributes| ... }
48
+ User.select(:id, :name).cursor.each_tuple { |id, name| ... }
49
+ ```
50
+
51
+ By default, this will load 1,000 records at a time from the database. You can
52
+ change that by specifying the batch size:
53
+
54
+ ```ruby
55
+ User.cursor(batch_size: 10).each { |user| ... }
56
+ ```
57
+
58
+ All methods return enumerables when no block is given, so you can use the full power of Ruby's Enumerable:
59
+
60
+ ```ruby
61
+ User.cursor.each.find { |user| user.name == "Rick" }
62
+ ```
63
+
64
+ ## Development
65
+
66
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
67
+
68
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
69
+
70
+ ## Contributing
71
+
72
+ Bug reports and pull requests are welcome on GitHub at https://github.com/rzane/active_cursor.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/active_cursor/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "active_cursor"
7
+ spec.version = ActiveCursor::VERSION
8
+ spec.authors = ["Ray Zane"]
9
+ spec.email = ["raymondzane@gmail.com"]
10
+
11
+ spec.summary = "Adds support for cursors to Active Record."
12
+ spec.description = "Efficiently iterate through massive collections in your database."
13
+ spec.homepage = "https://github.com/rzane/active_cursor"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = "https://github.com/rzane/active_cursor"
18
+ spec.metadata["changelog_uri"] = "https://github.com/rzane/active_cursor/releases"
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_dependency "activerecord", "~> 6.0"
30
+
31
+ # For more information and examples about making a new gem, checkout our
32
+ # guide at: https://bundler.io/guides/creating_gem.html
33
+ end
Binary file
data/assets/time.png ADDED
Binary file
data/bin/benchmark ADDED
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "gruff"
5
+ require "benchmark"
6
+ require "memory_profiler"
7
+ require_relative "../spec/support/database"
8
+
9
+ Database.create
10
+ Database.connect
11
+ Database.migrate
12
+
13
+ COUNTS = (1..5).map { _1 * 200_000 }
14
+ LABELS = (1..5).to_h { [_1 - 1, _1 * 2] }
15
+ TIME = Hash.new { |h, k| h[k] = [] }
16
+ OBJECTS = Hash.new { |h, k| h[k] = [] }
17
+
18
+ def seed(count)
19
+ Widget.delete_all
20
+ Widget.connection.execute <<~SQL
21
+ INSERT INTO widgets (value, name, timestamp)
22
+ SELECT generate_series(0, #{count}), gen_random_uuid(), CURRENT_TIMESTAMP;
23
+ SQL
24
+ end
25
+
26
+ def measure(name, &block)
27
+ memory = MemoryProfiler.report do
28
+ TIME[name] << Benchmark.realtime(&block)
29
+ end
30
+
31
+ OBJECTS[name] << memory.total_allocated
32
+ end
33
+
34
+ def chart(data:, name:, unit:, filename:)
35
+ chart = Gruff::Bar.new
36
+ chart.title = name
37
+ chart.labels = LABELS
38
+ chart.y_axis_label = unit
39
+ chart.x_axis_label = "Records (x100,000)"
40
+
41
+ data.each do |name, values|
42
+ chart.data(name, values)
43
+ end
44
+
45
+ chart.write(filename)
46
+ end
47
+
48
+ COUNTS.each do |count|
49
+ puts count
50
+ seed(count)
51
+
52
+ puts "* to_a"
53
+ measure "to_a" do
54
+ Widget.all.each {}
55
+ end
56
+
57
+ puts "* pluck"
58
+ measure "pluck" do
59
+ Widget.pluck(:id, :value, :name, :timestamp)
60
+ end
61
+
62
+ puts "* find_each"
63
+ measure "find_each" do
64
+ Widget.find_each {}
65
+ end
66
+
67
+ puts "* cursor.each"
68
+ measure "cursor.each" do
69
+ Widget.cursor.each {}
70
+ end
71
+
72
+ puts "* cursor.each_row"
73
+ measure "cursor.each_row" do
74
+ Widget.cursor.each_row {}
75
+ end
76
+
77
+ puts "* cursor.each_tuple"
78
+ measure "cursor.each_tuple" do
79
+ Widget.cursor.each_tuple {}
80
+ end
81
+ end
82
+
83
+ chart(
84
+ data: SECONDS,
85
+ name: "Time",
86
+ unit: "Seconds",
87
+ filename: "assets/time.png"
88
+ )
89
+ chart(
90
+ data: OBJECTS,
91
+ name: "Allocated Objects",
92
+ unit: "Objects",
93
+ filename: "assets/objects.png"
94
+ )
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "active_cursor"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "active_cursor/version"
4
+ require "active_support/core_ext/module/delegation"
5
+
6
+ class ActiveCursor
7
+ module QueryMethods
8
+ def cursor(...)
9
+ ActiveCursor.new(all, ...)
10
+ end
11
+ end
12
+
13
+ def initialize(relation, batch_size: 1_000)
14
+ @relation = relation
15
+ @batch_size = batch_size
16
+ end
17
+
18
+ def each(&block)
19
+ return enum_for(__method__) unless block_given?
20
+
21
+ iterate do |name|
22
+ records = model.find_by_sql("FETCH #{batch_size} FROM #{name}")
23
+ records.each(&block)
24
+ records.length
25
+ end
26
+ end
27
+
28
+ def each_row(&block)
29
+ return enum_for(__method__) unless block_given?
30
+
31
+ iterate do |name|
32
+ result = connection.execute("FETCH #{batch_size} FROM #{name}")
33
+ result.each(&block)
34
+ result.ntuples
35
+ end
36
+ end
37
+
38
+ def each_tuple(&block)
39
+ return enum_for(__method__) unless block_given?
40
+
41
+ iterate do |name|
42
+ result = connection.execute("FETCH #{batch_size} FROM #{name}")
43
+ result.each_row(&block)
44
+ result.ntuples
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ attr_reader :relation, :batch_size
51
+
52
+ delegate :model, :connection, to: :relation
53
+
54
+ def sql
55
+ connection.unprepared_statement { relation.to_sql }
56
+ end
57
+
58
+ def iterate
59
+ name = "cursor_#{SecureRandom.uuid.tr("-", "_")}"
60
+
61
+ connection.transaction do
62
+ connection.execute("DECLARE #{name} NO SCROLL CURSOR FOR #{sql}")
63
+
64
+ begin
65
+ loop until yield(name) < batch_size
66
+ ensure
67
+ connection.execute("CLOSE #{name}")
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ActiveCursor
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_cursor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ray Zane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '6.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '6.0'
27
+ description: Efficiently iterate through massive collections in your database.
28
+ email:
29
+ - raymondzane@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".github/workflows/main.yml"
35
+ - ".gitignore"
36
+ - ".rspec"
37
+ - Gemfile
38
+ - Gemfile.lock
39
+ - README.md
40
+ - Rakefile
41
+ - active_cursor.gemspec
42
+ - assets/objects.png
43
+ - assets/time.png
44
+ - bin/benchmark
45
+ - bin/console
46
+ - bin/setup
47
+ - lib/active_cursor.rb
48
+ - lib/active_cursor/version.rb
49
+ homepage: https://github.com/rzane/active_cursor
50
+ licenses: []
51
+ metadata:
52
+ homepage_uri: https://github.com/rzane/active_cursor
53
+ source_code_uri: https://github.com/rzane/active_cursor
54
+ changelog_uri: https://github.com/rzane/active_cursor/releases
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 3.0.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.2.3
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Adds support for cursors to Active Record.
74
+ test_files: []