active_cursor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: aa6257797955fe7432710d3282f7ec6b483ba7ec4b7ab594744ba651ab760299
4
+ data.tar.gz: 7584a37351f4f530adcc308ca173fcc64bc9051d1418d910177cfeb06f6ba5e9
5
+ SHA512:
6
+ metadata.gz: 6880d4c1d207ccbbf5055f856850734f17953f948d06923e29bb8d5409de74e2c36deedb96888209c6dc017da940138d77cfc2ba159822c4ec3bdcd1b3417fed
7
+ data.tar.gz: 34b5f1209b38f6bd0dc58a271ab37866c8fc207000bd53a350f98153ac43bddc8ee74c959ab5c0f96eda5a7fe76bc190781794d1fd16de4cbb0f6c3d7656917b
@@ -0,0 +1,29 @@
1
+ name: Ruby
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ services:
9
+ postgres:
10
+ image: postgres:13-alpine
11
+ ports:
12
+ - 5432:5432
13
+ env:
14
+ POSTGRES_HOST_AUTH_METHOD: trust
15
+ options: >-
16
+ --health-cmd pg_isready
17
+ --health-interval 10s
18
+ --health-timeout 5s
19
+ --health-retries 5
20
+
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ - name: Set up Ruby
24
+ uses: ruby/setup-ruby@v1
25
+ with:
26
+ ruby-version: 3.0.0
27
+ bundler-cache: true
28
+ - name: Run the default task
29
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in active_cursor.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+ gem "rspec", "~> 3.0"
10
+ gem "pg"
11
+ gem "sqlite3"
12
+ gem "gruff"
13
+ gem "memory_profiler"
data/Gemfile.lock ADDED
@@ -0,0 +1,66 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ active_cursor (0.1.0)
5
+ activerecord (~> 6.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activemodel (6.1.4)
11
+ activesupport (= 6.1.4)
12
+ activerecord (6.1.4)
13
+ activemodel (= 6.1.4)
14
+ activesupport (= 6.1.4)
15
+ activesupport (6.1.4)
16
+ concurrent-ruby (~> 1.0, >= 1.0.2)
17
+ i18n (>= 1.6, < 2)
18
+ minitest (>= 5.1)
19
+ tzinfo (~> 2.0)
20
+ zeitwerk (~> 2.3)
21
+ concurrent-ruby (1.1.9)
22
+ diff-lcs (1.4.4)
23
+ gruff (0.14.0)
24
+ histogram
25
+ rmagick
26
+ histogram (0.2.4.1)
27
+ i18n (1.8.10)
28
+ concurrent-ruby (~> 1.0)
29
+ memory_profiler (1.0.0)
30
+ minitest (5.14.4)
31
+ pg (1.2.3)
32
+ rake (13.0.6)
33
+ rmagick (4.2.2)
34
+ rspec (3.10.0)
35
+ rspec-core (~> 3.10.0)
36
+ rspec-expectations (~> 3.10.0)
37
+ rspec-mocks (~> 3.10.0)
38
+ rspec-core (3.10.1)
39
+ rspec-support (~> 3.10.0)
40
+ rspec-expectations (3.10.1)
41
+ diff-lcs (>= 1.2.0, < 2.0)
42
+ rspec-support (~> 3.10.0)
43
+ rspec-mocks (3.10.2)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-support (3.10.2)
47
+ sqlite3 (1.4.2)
48
+ tzinfo (2.0.4)
49
+ concurrent-ruby (~> 1.0)
50
+ zeitwerk (2.4.2)
51
+
52
+ PLATFORMS
53
+ ruby
54
+ x86_64-darwin-20
55
+
56
+ DEPENDENCIES
57
+ active_cursor!
58
+ gruff
59
+ memory_profiler
60
+ pg
61
+ rake (~> 13.0)
62
+ rspec (~> 3.0)
63
+ sqlite3
64
+
65
+ BUNDLED WITH
66
+ 2.2.16
data/README.md ADDED
@@ -0,0 +1,72 @@
1
+ # ActiveCursor
2
+
3
+ This gem adds support for cursors to Active Record. This library only supports
4
+ PostgreSQL.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'active_cursor'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle install
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install active_cursor
21
+
22
+ ## Why?
23
+
24
+ Sometimes you need to process a huge amount of data, but loading the entire
25
+ dataset into memory isn't possible.
26
+
27
+ In those cases, you'll usually reach for Active Record's `find_each` method, which
28
+ will only load records in batches.
29
+
30
+ Unfortunately, `find_each` requires that each record in the dataset has a unique,
31
+ integer ID. That's not always possible. Enter cursors.
32
+
33
+ ## Usage
34
+
35
+ Extend your `ApplicationRecord` with `ActiveCursor::QueryMethods`:
36
+
37
+ ```ruby
38
+ class ApplicationRecord < ActiveRecord::Base
39
+ extend ActiveCursor::QueryMethods
40
+ end
41
+ ```
42
+
43
+ Now, you're ready to start cursing.
44
+
45
+ ```ruby
46
+ User.cursor.each { |user| ... }
47
+ User.cursor.each_row { |attributes| ... }
48
+ User.select(:id, :name).cursor.each_tuple { |id, name| ... }
49
+ ```
50
+
51
+ By default, this will load 1,000 records at a time from the database. You can
52
+ change that by specifying the batch size:
53
+
54
+ ```ruby
55
+ User.cursor(batch_size: 10).each { |user| ... }
56
+ ```
57
+
58
+ All methods return enumerables when no block is given, so you can use the full power of Ruby's Enumerable:
59
+
60
+ ```ruby
61
+ User.cursor.each.find { |user| user.name == "Rick" }
62
+ ```
63
+
64
+ ## Development
65
+
66
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
67
+
68
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
69
+
70
+ ## Contributing
71
+
72
+ Bug reports and pull requests are welcome on GitHub at https://github.com/rzane/active_cursor.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/active_cursor/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "active_cursor"
7
+ spec.version = ActiveCursor::VERSION
8
+ spec.authors = ["Ray Zane"]
9
+ spec.email = ["raymondzane@gmail.com"]
10
+
11
+ spec.summary = "Adds support for cursors to Active Record."
12
+ spec.description = "Efficiently iterate through massive collections in your database."
13
+ spec.homepage = "https://github.com/rzane/active_cursor"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = "https://github.com/rzane/active_cursor"
18
+ spec.metadata["changelog_uri"] = "https://github.com/rzane/active_cursor/releases"
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_dependency "activerecord", "~> 6.0"
30
+
31
+ # For more information and examples about making a new gem, checkout our
32
+ # guide at: https://bundler.io/guides/creating_gem.html
33
+ end
Binary file
data/assets/time.png ADDED
Binary file
data/bin/benchmark ADDED
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "gruff"
5
+ require "benchmark"
6
+ require "memory_profiler"
7
+ require_relative "../spec/support/database"
8
+
9
+ Database.create
10
+ Database.connect
11
+ Database.migrate
12
+
13
+ COUNTS = (1..5).map { _1 * 200_000 }
14
+ LABELS = (1..5).to_h { [_1 - 1, _1 * 2] }
15
+ TIME = Hash.new { |h, k| h[k] = [] }
16
+ OBJECTS = Hash.new { |h, k| h[k] = [] }
17
+
18
+ def seed(count)
19
+ Widget.delete_all
20
+ Widget.connection.execute <<~SQL
21
+ INSERT INTO widgets (value, name, timestamp)
22
+ SELECT generate_series(0, #{count}), gen_random_uuid(), CURRENT_TIMESTAMP;
23
+ SQL
24
+ end
25
+
26
+ def measure(name, &block)
27
+ memory = MemoryProfiler.report do
28
+ TIME[name] << Benchmark.realtime(&block)
29
+ end
30
+
31
+ OBJECTS[name] << memory.total_allocated
32
+ end
33
+
34
+ def chart(data:, name:, unit:, filename:)
35
+ chart = Gruff::Bar.new
36
+ chart.title = name
37
+ chart.labels = LABELS
38
+ chart.y_axis_label = unit
39
+ chart.x_axis_label = "Records (x100,000)"
40
+
41
+ data.each do |name, values|
42
+ chart.data(name, values)
43
+ end
44
+
45
+ chart.write(filename)
46
+ end
47
+
48
+ COUNTS.each do |count|
49
+ puts count
50
+ seed(count)
51
+
52
+ puts "* to_a"
53
+ measure "to_a" do
54
+ Widget.all.each {}
55
+ end
56
+
57
+ puts "* pluck"
58
+ measure "pluck" do
59
+ Widget.pluck(:id, :value, :name, :timestamp)
60
+ end
61
+
62
+ puts "* find_each"
63
+ measure "find_each" do
64
+ Widget.find_each {}
65
+ end
66
+
67
+ puts "* cursor.each"
68
+ measure "cursor.each" do
69
+ Widget.cursor.each {}
70
+ end
71
+
72
+ puts "* cursor.each_row"
73
+ measure "cursor.each_row" do
74
+ Widget.cursor.each_row {}
75
+ end
76
+
77
+ puts "* cursor.each_tuple"
78
+ measure "cursor.each_tuple" do
79
+ Widget.cursor.each_tuple {}
80
+ end
81
+ end
82
+
83
+ chart(
84
+ data: SECONDS,
85
+ name: "Time",
86
+ unit: "Seconds",
87
+ filename: "assets/time.png"
88
+ )
89
+ chart(
90
+ data: OBJECTS,
91
+ name: "Allocated Objects",
92
+ unit: "Objects",
93
+ filename: "assets/objects.png"
94
+ )
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "active_cursor"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "active_cursor/version"
4
+ require "active_support/core_ext/module/delegation"
5
+
6
+ class ActiveCursor
7
+ module QueryMethods
8
+ def cursor(...)
9
+ ActiveCursor.new(all, ...)
10
+ end
11
+ end
12
+
13
+ def initialize(relation, batch_size: 1_000)
14
+ @relation = relation
15
+ @batch_size = batch_size
16
+ end
17
+
18
+ def each(&block)
19
+ return enum_for(__method__) unless block_given?
20
+
21
+ iterate do |name|
22
+ records = model.find_by_sql("FETCH #{batch_size} FROM #{name}")
23
+ records.each(&block)
24
+ records.length
25
+ end
26
+ end
27
+
28
+ def each_row(&block)
29
+ return enum_for(__method__) unless block_given?
30
+
31
+ iterate do |name|
32
+ result = connection.execute("FETCH #{batch_size} FROM #{name}")
33
+ result.each(&block)
34
+ result.ntuples
35
+ end
36
+ end
37
+
38
+ def each_tuple(&block)
39
+ return enum_for(__method__) unless block_given?
40
+
41
+ iterate do |name|
42
+ result = connection.execute("FETCH #{batch_size} FROM #{name}")
43
+ result.each_row(&block)
44
+ result.ntuples
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ attr_reader :relation, :batch_size
51
+
52
+ delegate :model, :connection, to: :relation
53
+
54
+ def sql
55
+ connection.unprepared_statement { relation.to_sql }
56
+ end
57
+
58
+ def iterate
59
+ name = "cursor_#{SecureRandom.uuid.tr("-", "_")}"
60
+
61
+ connection.transaction do
62
+ connection.execute("DECLARE #{name} NO SCROLL CURSOR FOR #{sql}")
63
+
64
+ begin
65
+ loop until yield(name) < batch_size
66
+ ensure
67
+ connection.execute("CLOSE #{name}")
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ActiveCursor
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_cursor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ray Zane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '6.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '6.0'
27
+ description: Efficiently iterate through massive collections in your database.
28
+ email:
29
+ - raymondzane@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".github/workflows/main.yml"
35
+ - ".gitignore"
36
+ - ".rspec"
37
+ - Gemfile
38
+ - Gemfile.lock
39
+ - README.md
40
+ - Rakefile
41
+ - active_cursor.gemspec
42
+ - assets/objects.png
43
+ - assets/time.png
44
+ - bin/benchmark
45
+ - bin/console
46
+ - bin/setup
47
+ - lib/active_cursor.rb
48
+ - lib/active_cursor/version.rb
49
+ homepage: https://github.com/rzane/active_cursor
50
+ licenses: []
51
+ metadata:
52
+ homepage_uri: https://github.com/rzane/active_cursor
53
+ source_code_uri: https://github.com/rzane/active_cursor
54
+ changelog_uri: https://github.com/rzane/active_cursor/releases
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 3.0.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.2.3
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Adds support for cursors to Active Record.
74
+ test_files: []