pg_conduit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c3a6e9d910b4d945b126b1a769182899c3230db825b30a384dc13b4eef7b34e3
4
+ data.tar.gz: 8441234b1dd220fdb4b1fa8b8628dda8d5efe016f30bc196968e98421b3a9eb0
5
+ SHA512:
6
+ metadata.gz: '08e0fc8a6219b8c2ee4844ec56a415ea064c62415a53a1d89f4cdd5b09f847dea19396a1e93fb4c1f35fe26af35583a55129ea1999675c842461d328d195df90'
7
+ data.tar.gz: b82ed7a7d8d2dd8537600cf049c32eee574b8dcaed951c015578aab125025bc0e2cae155bd9d779f1513b5b007a493e80b918876ad09d3f71148a176f9f50a67
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,70 @@
1
+ # Ruby CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-ruby/ for more details
4
+ #
5
+ version: 2
6
+ jobs:
7
+ build:
8
+ docker:
9
+ # specify the version you desire here
10
+ - image: circleci/ruby:2.5.1-node-browsers
11
+ environment:
12
+ - TEST_DB_HOST=postgres://circleci@localhost/
13
+ - TEST_DB_SRC=postgres://circleci@localhost/pg_conduit_src_test
14
+ - TEST_DB_DEST=postgres://circleci@localhost/pg_conduit_dest_test
15
+
16
+ - image: circleci/postgres:9.6-alpine
17
+ environment:
18
+ POSTGRES_USER: circleci
19
+ POSTGRES_DB: circleci
20
+
21
+ working_directory: ~/src
22
+
23
+ steps:
24
+ - checkout
25
+
26
+ # Download and cache dependencies
27
+ - restore_cache:
28
+ keys:
29
+ - v1-dependencies-{{ checksum "Gemfile.lock" }}
30
+ # fallback to using the latest cache if no exact match is found
31
+ - v1-dependencies-
32
+
33
+ - run:
34
+ name: install dependencies
35
+ command: |
36
+ bundle install --jobs=4 --retry=3 --path vendor/bundle
37
+
38
+ - save_cache:
39
+ paths:
40
+ - ./vendor/bundle
41
+ key: v1-dependencies-{{ checksum "Gemfile.lock" }}
42
+
43
+ - run:
44
+ name: Wait for postgres container
45
+ command: dockerize -wait tcp://127.0.0.1:5432 -timeout 120s
46
+
47
+ - run:
48
+ name: create databases
49
+ command: bundle exec rake db:create
50
+
51
+ # run tests!
52
+ - run:
53
+ name: run tests
54
+ command: |
55
+ mkdir /tmp/test-results
56
+ TEST_FILES="$(circleci tests glob "spec/**/*_spec.rb" | circleci tests split --split-by=timings)"
57
+
58
+ bundle exec rspec --format progress \
59
+ --format RspecJunitFormatter \
60
+ --out /tmp/test-results/rspec.xml \
61
+ --format progress \
62
+ $TEST_FILES
63
+
64
+ # collect reports
65
+ - store_test_results:
66
+ path: /tmp/test-results
67
+
68
+ - store_artifacts:
69
+ path: /tmp/test-results
70
+ destination: test-results
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ .env
13
+
14
+ # simplecov results
15
+ coverage
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.5.1@pg_conduit
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0
data/Dockerfile ADDED
@@ -0,0 +1,24 @@
1
+ FROM ruby:2.5.1-alpine
2
+
3
+ RUN apk add --update \
4
+ build-base \
5
+ libxml2-dev \
6
+ libxslt-dev \
7
+ postgresql-dev \
8
+ postgresql-client \
9
+ tzdata \
10
+ git \
11
+ && rm -rf /var/cache/apk/*
12
+
13
+ ENV APP_HOME /src
14
+
15
+ RUN mkdir $APP_HOME
16
+ WORKDIR $APP_HOME
17
+
18
+ ENV BUNDLE_PATH=/bundle
19
+
20
+ ADD . $APP_HOME
21
+
22
+ RUN bundle install
23
+
24
+ CMD ['ash']
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) {|repo_name| 'https://github.com/#{repo_name}' }
4
+
5
+ # Specify your gem's dependencies in pg_conduit.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,90 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pg_conduit (0.1.0)
5
+ connection_pool (~> 2.2)
6
+ pg (~> 1.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ coderay (1.1.2)
12
+ connection_pool (2.2.2)
13
+ diff-lcs (1.3)
14
+ docile (1.3.1)
15
+ ffi (1.9.25)
16
+ formatador (0.2.5)
17
+ guard (2.14.2)
18
+ formatador (>= 0.2.4)
19
+ listen (>= 2.7, < 4.0)
20
+ lumberjack (>= 1.0.12, < 2.0)
21
+ nenv (~> 0.1)
22
+ notiffany (~> 0.0)
23
+ pry (>= 0.9.12)
24
+ shellany (~> 0.0)
25
+ thor (>= 0.18.1)
26
+ guard-compat (1.2.1)
27
+ guard-rspec (4.7.3)
28
+ guard (~> 2.1)
29
+ guard-compat (~> 1.1)
30
+ rspec (>= 2.99.0, < 4.0)
31
+ json (2.1.0)
32
+ listen (3.1.5)
33
+ rb-fsevent (~> 0.9, >= 0.9.4)
34
+ rb-inotify (~> 0.9, >= 0.9.7)
35
+ ruby_dep (~> 1.2)
36
+ lumberjack (1.0.13)
37
+ method_source (0.9.0)
38
+ nenv (0.3.0)
39
+ notiffany (0.1.1)
40
+ nenv (~> 0.1)
41
+ shellany (~> 0.0)
42
+ pg (1.0.0)
43
+ pry (0.11.3)
44
+ coderay (~> 1.1.0)
45
+ method_source (~> 0.9.0)
46
+ rake (10.5.0)
47
+ rb-fsevent (0.10.3)
48
+ rb-inotify (0.9.10)
49
+ ffi (>= 0.5.0, < 2)
50
+ rspec (3.7.0)
51
+ rspec-core (~> 3.7.0)
52
+ rspec-expectations (~> 3.7.0)
53
+ rspec-mocks (~> 3.7.0)
54
+ rspec-core (3.7.1)
55
+ rspec-support (~> 3.7.0)
56
+ rspec-expectations (3.7.0)
57
+ diff-lcs (>= 1.2.0, < 2.0)
58
+ rspec-support (~> 3.7.0)
59
+ rspec-mocks (3.7.0)
60
+ diff-lcs (>= 1.2.0, < 2.0)
61
+ rspec-support (~> 3.7.0)
62
+ rspec-support (3.7.1)
63
+ rspec_junit_formatter (0.4.1)
64
+ rspec-core (>= 2, < 4, != 2.12.0)
65
+ ruby_dep (1.5.0)
66
+ shellany (0.0.1)
67
+ simplecov (0.16.1)
68
+ docile (~> 1.1)
69
+ json (>= 1.8, < 3)
70
+ simplecov-html (~> 0.10.0)
71
+ simplecov-html (0.10.2)
72
+ thor (0.20.0)
73
+ yard (0.9.14)
74
+
75
+ PLATFORMS
76
+ ruby
77
+
78
+ DEPENDENCIES
79
+ bundler (~> 1.16)
80
+ guard (~> 2.14)
81
+ guard-rspec (~> 4.7)
82
+ pg_conduit!
83
+ rake (~> 10.0)
84
+ rspec (~> 3.0)
85
+ rspec_junit_formatter (~> 0.4.1)
86
+ simplecov (~> 0.16.1)
87
+ yard (~> 0.9)
88
+
89
+ BUNDLED WITH
90
+ 1.16.2
data/Guardfile ADDED
@@ -0,0 +1,21 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ guard :rspec, cmd: 'bundle exec rspec', all_on_start: true do
19
+ watch(%r{^spec/(.+)\.rb$}) { 'spec' }
20
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
21
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2018 James Brennan
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,127 @@
1
+ # PgConduit
2
+
3
+ [![CircleCI](https://img.shields.io/circleci/project/github/jamesBrennan/pg_conduit.svg?style=svg)](https://circleci.com/gh/jamesBrennan/pg_conduit)
4
+
5
+ Stream data between two postgres databases. This is mostly an excuse for me to
6
+ play around with concurrency in Ruby.
7
+
8
+ This gem is in early development. As such I would advise against using it in any
9
+ environment where data integrity is important. I will release version 1.0 when
10
+ I feel confident that the code is sufficiently robust.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'pg_conduit'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install pg_conduit
27
+
28
+ ## Quick Start
29
+
30
+ ### `PgConduit.db_to_db(source, destination)`
31
+
32
+ Returns an instance of `PgConduit::Pipe` that will execute queries passed to
33
+ `read` and `write` against the `source` and `destination` databases,
34
+ respectively.
35
+
36
+ The `source` and `destination` arguments are passed to
37
+ [`PG::Connection`](https://www.rubydoc.info/gems/pg/PG/Connection), so any
38
+ arguments that it accepts can be used.
39
+
40
+ #### Write one row at a time
41
+
42
+ ```ruby
43
+ source = 'postgres://user:pass@source/db'
44
+ destination = { dbname: 'my_local_db' }
45
+
46
+ pipe = PgConduit.db_to_db(source, destination)
47
+
48
+ pipe.read('SELECT id, full_name, email FROM users')
49
+ .write do |user|
50
+ <<-SQL
51
+ INSERT INTO customers(user_id, name, email)
52
+ VALUES ('#{user['id']}', '#{user['full_name']}', '#{user['email']}')
53
+ SQL
54
+ end
55
+ ```
56
+
57
+ #### Write in batches
58
+
59
+ ```ruby
60
+ source = 'postgres://user:pass@source/db'
61
+ destination = { dbname: 'my_local_db' }
62
+
63
+ pipe = PgConduit.db_to_db(source, destination)
64
+
65
+ pipe.read('SELECT id, full_name, email FROM users')
66
+ .transform do |user|
67
+ <<-SQL
68
+ ('#{user['id']}', '#{user['full_name']}', '#{user['email']}')
69
+ SQL
70
+ end
71
+ .write_batched(size: 100) do |values|
72
+ <<-SQL
73
+ INSERT INTO customers(user_id, name, email)
74
+ VALUES #{values.join(',')}
75
+ SQL
76
+ end
77
+ ```
78
+
79
+ ### `PgConduit.db_to_file(source, destination)`
80
+
81
+ Write output from source database to file.
82
+
83
+ ```ruby
84
+ source = 'postgres://user:pass@source/db'
85
+ destination = '/some/system/path/user_count.txt'
86
+
87
+ pipe = PgConduit.db_to_file(source, destination)
88
+
89
+ pipe.read('SELECT count(*) FROM users')
90
+ .write { |res| "Number of users: #{res['count']}" }
91
+ ```
92
+
93
+ ### `PgConduit.db_to_stdout(source)`
94
+
95
+ Write output from source database to stdout.
96
+
97
+ ```ruby
98
+ pipe = PgConduit.db_to_stdout('postgres://user:pass@source/db')
99
+
100
+ query = <<-SQL
101
+ SELECT posts.user_id, users.email, count(posts.*) FROM users
102
+ JOIN posts ON posts.user_id = users.id
103
+ GROUP BY posts.user_id, users.email
104
+ SQL
105
+
106
+ pipe.read(query)
107
+ .write do |post_count|
108
+ "#{post_count['user_id']} | #{post_count['email']} - #{post_count['count']}"
109
+ end
110
+ ```
111
+
112
+ ### `PgConduit.db_to_null(source)`
113
+
114
+ Swallow output from source database. Mostly useful for testing. `exec` is an
115
+ alias of `write`.
116
+
117
+ ```ruby
118
+ pipe = PgConduit.db_to_null('postgres://user:pass@source/db')
119
+ pipe.read('SELECT count(*) FROM users')
120
+ .exec { |res| raise 'fail' unless res['count'] == 10 }
121
+ ```
122
+
123
+ ## Development
124
+
125
+ ## Contributing
126
+
127
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pg_conduit.
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+ require 'pg'
4
+
5
+ def create_db(conn, name)
6
+ conn.exec "CREATE DATABASE #{name}"
7
+ rescue PG::DuplicateDatabase
8
+ puts "Create database skipped: '#{name}' already exists."
9
+ end
10
+
11
+ RSpec::Core::RakeTask.new(:spec)
12
+
13
+ task :default => :spec
14
+
15
+ namespace :db do
16
+ task :create do
17
+ conn = PG::Connection.open(ENV['TEST_DB_HOST'])
18
+ create_db conn, 'pg_conduit_src_test'
19
+ create_db conn, 'pg_conduit_dest_test'
20
+ ensure
21
+ conn.close
22
+ end
23
+ end
data/bin/checksum.rb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'digest/sha2'
3
+ require 'pg_conduit/version'
4
+
5
+ gem_name = "pg_conduit-#{PgConduit::VERSION}.gem"
6
+ checksum = Digest::SHA512.new.hexdigest File.read("pkg/#{gem_name}")
7
+ File.open("checksum/#{gem_name}.sha512", 'w' ) do |f|
8
+ f.write checksum
9
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'pg_conduit'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require 'pry'
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,21 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIDgDCCAmigAwIBAgIBATANBgkqhkiG9w0BAQUFADBDMRUwEwYDVQQDDAxicmVu
3
+ bmFubXVzaWMxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkW
4
+ A2NvbTAeFw0xODA3MDYwMDA2MzRaFw0xOTA3MDYwMDA2MzRaMEMxFTATBgNVBAMM
5
+ DGJyZW5uYW5tdXNpYzEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPy
6
+ LGQBGRYDY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyxykNKqp
7
+ jD5gnQd2YHuRDNsLN5limGMDjSb38lMZAfJJugoWbSSeQM3+iVl26YWE5JeJpqDu
8
+ Rr/GOQ54bsev4IxCfPl8yIkhAao0i96+k1EeEb/i3pgZOfjXgGSTM71baCZzbqBH
9
+ A5ivcb0wqoHWyqrxNtc4XhSs5RtvtNl3IEe/bs5qqixt6PwsCorD6kAoUUNHNM9W
10
+ MLexivfiJooHOvVRmf1DQuJ2fDccKNHf7ZR4PuiguT4Z73bT2njCBm2o86o/clvR
11
+ WV0jasBOKvsgcg65U10klngBEG56nWSdIE9PrXGPqwBwA6jXttAwFLUNvuam5nGQ
12
+ V3MbCFDeHUUrtwIDAQABo38wfTAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNV
13
+ HQ4EFgQUPJGTSMevbRtDlY9WBrs21ae/+5YwIQYDVR0RBBowGIEWYnJlbm5hbm11
14
+ c2ljQGdtYWlsLmNvbTAhBgNVHRIEGjAYgRZicmVubmFubXVzaWNAZ21haWwuY29t
15
+ MA0GCSqGSIb3DQEBBQUAA4IBAQBr5Fm1ULBbLUYvX85iXI1Q7L7BZp53p9Q4TXrd
16
+ 0n0dE0qHJELwJQkErwZwMhOzaLqcctCwdzLkc9/VZSpLLuzv5bfEWP4EyTET5a+i
17
+ 4dn/Wko+6aNPVonmlHDhNYOPl3edxgxoD0WW08U7NJ4tGxJJURVv4yrCayT3xLCA
18
+ yzUUQaHKkLaXHSfH0yhha+pFpUlTsLeg9hrZ0jqSk4FzUrKbiieLY/f/p5Xr5QDg
19
+ fXe/xr/Sc+2wCjHPVE2J+auN5hk3KCp1I4s2fKqyLIwyhTEF3shuYfCpC8rt/YdN
20
+ cy9/lg5LCI3OvakzxL4Xt1Sq4h/xJZ06ydTVJ1wxfk6BXHrg
21
+ -----END CERTIFICATE-----
@@ -0,0 +1 @@
1
+ 2401cb5efe647705eee22f6ae64a183012e5552bb2417dcc1c78cf52c5c44901925b849443685e63402da82e97a8d8c88c1fe3c1c52d86d184862e900258af58
@@ -0,0 +1,34 @@
1
+ version: "3"
2
+ services:
3
+ yard-docs:
4
+ build: .
5
+ command: bundle exec yard server --reload
6
+ ports:
7
+ - 9999:8808
8
+ volumes:
9
+ - .:/src
10
+ - bundle:/bundle
11
+
12
+ test:
13
+ build: .
14
+ command: bundle exec guard --no-bundler-warning --no-interactions
15
+ volumes:
16
+ - .:/src
17
+ - bundle:/bundle
18
+ environment:
19
+ - TEST_DB_SRC=postgres://postgres@db/pg_conduit_src_test
20
+ - TEST_DB_DEST=postgres://postgres@db/pg_conduit_dest_test
21
+ - TEST_DB_HOST=postgres://postgres@db/
22
+ links:
23
+ - db
24
+
25
+ db:
26
+ image: postgres:9.6.6
27
+ ports:
28
+ - 5499:5432
29
+ volumes:
30
+ - database:/var/lib/postgresql/data
31
+
32
+ volumes:
33
+ bundle:
34
+ database:
@@ -0,0 +1,24 @@
1
+ require 'connection_pool'
2
+
3
+ module PgConduit
4
+ class Connections
5
+ attr_reader :src_pool, :dest_pool
6
+
7
+ def self.init_pool(params)
8
+ ConnectionPool.new { PG::Connection.open(params) }
9
+ end
10
+
11
+ def initialize(source, destination)
12
+ @src_pool = self.class.init_pool source
13
+ @dest_pool = self.class.init_pool destination
14
+ end
15
+
16
+ def with_source
17
+ @src_pool.with { |conn| yield conn }
18
+ end
19
+
20
+ def with_destination
21
+ @dest_pool.with { |conn| yield conn }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,11 @@
1
+ module PgConduit
2
+ class DBWriter
3
+ def initialize(pool)
4
+ @pool = pool
5
+ end
6
+
7
+ def write
8
+ @pool.with { |conn| conn.exec yield }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module PgConduit
2
+ class FileWriter
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ def write
8
+ open(@path, 'a') { |f| f.puts yield }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module PgConduit
2
+ class NullWriter
3
+ def write
4
+ nil.tap { yield }
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,60 @@
1
+ module PgConduit
2
+ # A multi threaded stream reader
3
+ class ParallelStreamReader
4
+ # @param query_stream [PgConduit::QueryStream]
5
+ # @param threads [Integer] The number of threads to use for workers
6
+ # @param queue_max_size [Integer] How many rows should be stored in memory
7
+ # in the work queue.
8
+ def initialize(query_stream, threads: 5, queue_max_size: 1000)
9
+ @queue = SizedQueue.new(queue_max_size)
10
+ @workers = threads
11
+ @stream = query_stream
12
+ end
13
+
14
+ # Read A QueryStream and yield it's rows
15
+ #
16
+ # @yield [Hash] A single row from the QueryStream. Every row from the stream
17
+ # will be yielded but order is not guaranteed.
18
+ def read(&callback)
19
+ reader = read_stream(@stream)
20
+ workers = dispatch_workers(&callback)
21
+ reader.join
22
+ workers.each { |t| t.join }
23
+ :ok
24
+ end
25
+
26
+ private
27
+
28
+ def read_stream(query_stream)
29
+ Thread.new do
30
+ query_stream.each_row { |row| @queue << row }
31
+ @queue.close
32
+ end
33
+ end
34
+
35
+ def dispatch_workers(&callback)
36
+ (1..@workers).to_a.map { dispatch_worker(&callback) }
37
+ end
38
+
39
+ def dispatch_worker(&callback)
40
+ Thread.new do
41
+ loop do
42
+ continue = process_next(&callback)
43
+ break if @queue.closed? && !continue
44
+ end
45
+ end
46
+ end
47
+
48
+ def process_next(&callback)
49
+ continue = false
50
+ Thread.new do
51
+ row = @queue.deq
52
+ if row
53
+ callback.call row
54
+ continue = true
55
+ end
56
+ end.join
57
+ continue
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ module PgConduit
2
+ class Pipe
3
+ # @example
4
+ # Pipe
5
+ # .new(from: query_stream, to: db_writer)
6
+ # .send('SELECT name FROM users')
7
+ # .as do |user|
8
+ # %(INSERT INTO friends (name) VALUES ('#{user["full_name"]}'))
9
+ # end
10
+ def initialize(from:, to:)
11
+ @stream = from
12
+ @writer = to
13
+ @reader = ParallelStreamReader.new(@stream)
14
+ @transformers = []
15
+ end
16
+
17
+ def read(query)
18
+ self.tap { @stream.query(query) }
19
+ end
20
+
21
+ def transform(&transformer)
22
+ self.tap { @transformers << transformer }
23
+ end
24
+
25
+ def write
26
+ exec_read { |row| exec_write { exec_transform(row) } }
27
+ end
28
+
29
+ def peak
30
+ self.tap { @transformers << ->(row) { row.tap { yield row } } }
31
+ end
32
+
33
+ def write_batched(size: 1000)
34
+ collector = RowCollector.new(chunk_size: size)
35
+
36
+ # Set callback to yield collected rows
37
+ collector.on_chunk { |rows| exec_write { yield rows } }
38
+
39
+ # Process each row
40
+ exec_read { |row| collector << exec_transform(row) }
41
+
42
+ # Yield any remaining rows
43
+ collector.finish
44
+ end
45
+
46
+ alias_method :exec, :write
47
+
48
+ private
49
+
50
+ def exec_read(&b)
51
+ @reader.read(&b)
52
+ end
53
+
54
+ def exec_write(&b)
55
+ @writer.write(&b)
56
+ end
57
+
58
+ def exec_transform(row)
59
+ @transformers.reduce(row) { |data, transform| transform.call data }
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,41 @@
1
+ module PgConduit
2
+ # Execute a SQL query and provide the results as a stream
3
+ # @example Print username and email for all users
4
+ #
5
+ # conn = PG::Connection.open
6
+ # stream = PgConduit::QueryStream.new(conn)
7
+ #
8
+ # stream.query('SELECT * FROM users').each_row do |row|
9
+ # puts "#{row['username']}, #{row['email']}"
10
+ # end
11
+ #
12
+ class QueryStream
13
+ attr_reader :sql
14
+
15
+ # @param pool [ConnectionPool] A pool of PG::Connections
16
+ def initialize(pool)
17
+ @pool = pool
18
+ end
19
+
20
+ # @param sql [String] The SQL query to execute
21
+ # @return [self]
22
+ def query(sql)
23
+ self.tap { @sql = sql }
24
+ end
25
+
26
+ # Execute query and yield each row
27
+ # @yield [Hash] A hash representing a single row from the result set
28
+ def each_row
29
+ @pool.with do |conn|
30
+ conn.send_query @sql
31
+ conn.set_single_row_mode
32
+ loop do
33
+ res = conn.get_result
34
+ break unless res
35
+ res.check
36
+ res.stream_each { |row| yield row }
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,68 @@
1
+ module PgConduit
2
+ # A thread safe accumulator, used to chunk an input stream
3
+ class RowCollector
4
+ # @param chunk_size [Integer] How many rows should be collected before
5
+ # yielding
6
+ def initialize(chunk_size: 100)
7
+ @chunk_size = chunk_size
8
+ @rows = []
9
+ @finished = false
10
+ @mutex = Mutex.new
11
+ end
12
+
13
+ # Provide a block to be called with each accumulated chunk
14
+ #
15
+ # @yield [Array] collected rows
16
+ # @return [self]
17
+ #
18
+ # @example Print once every ten rows
19
+ #
20
+ # collector = RowCollector.new(chunk_size: 10)
21
+ # collector.on_chunk { |rows| puts rows }
22
+ #
23
+ # 100.times { |n| collector << n }
24
+ #
25
+ # #> [0,1,2,3,4,5,6,7,8,9]
26
+ # #> [10,11,12,13,14,15,16,17,18,19]
27
+ # #> ...etc
28
+ #
29
+ def on_chunk(&callback)
30
+ self.tap do
31
+ @mutex.synchronize { @callback = callback }
32
+ end
33
+ end
34
+
35
+ # @param row [Object] Row to add to the buffer
36
+ def <<(row)
37
+ @mutex.synchronize do
38
+ if @finished
39
+ raise 'Data may not be added to a row collector that has been marked as finished'
40
+ end
41
+
42
+ @rows << row
43
+ if @rows.length % @chunk_size == 0
44
+ flush(&@callback)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Flushes any collected rows, yielding them to the callback and marks the
50
+ # collector as finished. Any subsequent calls to :<< will raise an error.
51
+ def finish
52
+ @mutex.synchronize do
53
+ flush(&@callback)
54
+ @finished = true
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ # Yields the collected rows and resets the row collector
61
+ # @yield [Array<Hash>] The collected rows
62
+ def flush
63
+ yield @rows if @rows.length > 0
64
+ @rows = []
65
+ true
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,7 @@
1
+ module PgConduit
2
+ class STDOUTWriter
3
+ def write
4
+ puts yield
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module PgConduit
2
+ VERSION = '0.1.0'
3
+ end
data/lib/pg_conduit.rb ADDED
@@ -0,0 +1,67 @@
1
+ require 'pg'
2
+ require 'connection_pool'
3
+ require 'pg_conduit/version'
4
+
5
+ module PgConduit
6
+ autoload :Connections, 'pg_conduit/connections'
7
+ autoload :DBWriter, 'pg_conduit/db_writer'
8
+ autoload :FileWriter, 'pg_conduit/file_writer'
9
+ autoload :NullWriter, 'pg_conduit/null_writer'
10
+ autoload :ParallelStreamReader, 'pg_conduit/parallel_stream_reader'
11
+ autoload :Pipe, 'pg_conduit/pipe'
12
+ autoload :QueryStream, 'pg_conduit/query_stream'
13
+ autoload :RowCollector, 'pg_conduit/row_collector'
14
+ autoload :STDOUTWriter, 'pg_conduit/stdout_writer'
15
+
16
+ class << self
17
+ # Create a new DB -> DB Pipe
18
+ #
19
+ # @param src [String,Hash] Source database connection params
20
+ # @param dest [String,Hash] Destination database connection params
21
+ # @return [PgConduit::Pipe]
22
+ def db_to_db(src, dest)
23
+ connections = Connections.new src, dest
24
+ query_stream = QueryStream.new connections.src_pool
25
+ db_writer = DBWriter.new connections.dest_pool
26
+
27
+ Pipe.new from: query_stream, to: db_writer
28
+ end
29
+
30
+ # Create a new DB -> File Pipe
31
+ #
32
+ # @param src [String,Hash] Source database connection params
33
+ # @param dest [Sting,Pathname] Path to destination file
34
+ # @return [PgConduit::Pipe]
35
+ def db_to_file(src, dest)
36
+ pool = Connections.init_pool src
37
+ query_stream = QueryStream.new pool
38
+ file_writer = FileWriter.new dest
39
+
40
+ Pipe.new from: query_stream, to: file_writer
41
+ end
42
+
43
+ # Create a new DB -> STDOUT Pipe
44
+ #
45
+ # @param src [String,Hash] Source database connection params
46
+ # @return [PgConduit::Pipe]
47
+ def db_to_stdout(src)
48
+ pool = Connections.init_pool src
49
+ query_stream = QueryStream.new pool
50
+ stdout_writer = STDOUTWriter.new
51
+
52
+ Pipe.new from: query_stream, to: stdout_writer
53
+ end
54
+
55
+ # Create a new DB -> Null pipe
56
+ #
57
+ # @param src [String,Hash] Source database connection params
58
+ # @return [PgConduit::Pipe]
59
+ def db_to_null(src)
60
+ pool = Connections.init_pool src
61
+ query_stream = QueryStream.new pool
62
+ stdout_writer = NullWriter.new
63
+
64
+ Pipe.new from: query_stream, to: stdout_writer
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_conduit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'pg_conduit'
8
+ spec.version = PgConduit::VERSION
9
+ spec.authors = ['James Brennan']
10
+ spec.email = ['brennanmusic@gmail.com']
11
+
12
+ spec.summary = 'Stream data from one postgres database to another'
13
+ spec.homepage = 'https://github.com/jamesBrennan/pg_conduit'
14
+
15
+ spec.license = 'MIT'
16
+
17
+ spec.cert_chain = ['certs/jamesbrennan.pem']
18
+ spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem') if $0 =~ /gem\z/
19
+
20
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
21
+ f.match(%r{^spec/})
22
+ end
23
+
24
+ spec.require_paths = ['lib']
25
+
26
+ spec.add_dependency 'pg', '~> 1.0'
27
+ spec.add_dependency 'connection_pool', '~> 2.2'
28
+
29
+ spec.add_development_dependency 'bundler', '~> 1.16'
30
+ spec.add_development_dependency 'rake', '~> 10.0'
31
+ spec.add_development_dependency 'rspec', '~> 3.0'
32
+ spec.add_development_dependency 'rspec_junit_formatter', '~> 0.4.1'
33
+ spec.add_development_dependency 'simplecov', '~> 0.16.1'
34
+ spec.add_development_dependency 'yard', '~> 0.9'
35
+ spec.add_development_dependency 'guard', '~> 2.14'
36
+ spec.add_development_dependency 'guard-rspec', '~> 4.7'
37
+ end
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,236 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_conduit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Brennan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIDgDCCAmigAwIBAgIBATANBgkqhkiG9w0BAQUFADBDMRUwEwYDVQQDDAxicmVu
14
+ bmFubXVzaWMxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkW
15
+ A2NvbTAeFw0xODA3MDYwMDA2MzRaFw0xOTA3MDYwMDA2MzRaMEMxFTATBgNVBAMM
16
+ DGJyZW5uYW5tdXNpYzEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPy
17
+ LGQBGRYDY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyxykNKqp
18
+ jD5gnQd2YHuRDNsLN5limGMDjSb38lMZAfJJugoWbSSeQM3+iVl26YWE5JeJpqDu
19
+ Rr/GOQ54bsev4IxCfPl8yIkhAao0i96+k1EeEb/i3pgZOfjXgGSTM71baCZzbqBH
20
+ A5ivcb0wqoHWyqrxNtc4XhSs5RtvtNl3IEe/bs5qqixt6PwsCorD6kAoUUNHNM9W
21
+ MLexivfiJooHOvVRmf1DQuJ2fDccKNHf7ZR4PuiguT4Z73bT2njCBm2o86o/clvR
22
+ WV0jasBOKvsgcg65U10klngBEG56nWSdIE9PrXGPqwBwA6jXttAwFLUNvuam5nGQ
23
+ V3MbCFDeHUUrtwIDAQABo38wfTAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNV
24
+ HQ4EFgQUPJGTSMevbRtDlY9WBrs21ae/+5YwIQYDVR0RBBowGIEWYnJlbm5hbm11
25
+ c2ljQGdtYWlsLmNvbTAhBgNVHRIEGjAYgRZicmVubmFubXVzaWNAZ21haWwuY29t
26
+ MA0GCSqGSIb3DQEBBQUAA4IBAQBr5Fm1ULBbLUYvX85iXI1Q7L7BZp53p9Q4TXrd
27
+ 0n0dE0qHJELwJQkErwZwMhOzaLqcctCwdzLkc9/VZSpLLuzv5bfEWP4EyTET5a+i
28
+ 4dn/Wko+6aNPVonmlHDhNYOPl3edxgxoD0WW08U7NJ4tGxJJURVv4yrCayT3xLCA
29
+ yzUUQaHKkLaXHSfH0yhha+pFpUlTsLeg9hrZ0jqSk4FzUrKbiieLY/f/p5Xr5QDg
30
+ fXe/xr/Sc+2wCjHPVE2J+auN5hk3KCp1I4s2fKqyLIwyhTEF3shuYfCpC8rt/YdN
31
+ cy9/lg5LCI3OvakzxL4Xt1Sq4h/xJZ06ydTVJ1wxfk6BXHrg
32
+ -----END CERTIFICATE-----
33
+ date: 2018-07-06 00:00:00.000000000 Z
34
+ dependencies:
35
+ - !ruby/object:Gem::Dependency
36
+ name: pg
37
+ requirement: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.0'
42
+ type: :runtime
43
+ prerelease: false
44
+ version_requirements: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '1.0'
49
+ - !ruby/object:Gem::Dependency
50
+ name: connection_pool
51
+ requirement: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '2.2'
56
+ type: :runtime
57
+ prerelease: false
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '2.2'
63
+ - !ruby/object:Gem::Dependency
64
+ name: bundler
65
+ requirement: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.16'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '1.16'
77
+ - !ruby/object:Gem::Dependency
78
+ name: rake
79
+ requirement: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '10.0'
84
+ type: :development
85
+ prerelease: false
86
+ version_requirements: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '10.0'
91
+ - !ruby/object:Gem::Dependency
92
+ name: rspec
93
+ requirement: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '3.0'
98
+ type: :development
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '3.0'
105
+ - !ruby/object:Gem::Dependency
106
+ name: rspec_junit_formatter
107
+ requirement: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: 0.4.1
112
+ type: :development
113
+ prerelease: false
114
+ version_requirements: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: 0.4.1
119
+ - !ruby/object:Gem::Dependency
120
+ name: simplecov
121
+ requirement: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - "~>"
124
+ - !ruby/object:Gem::Version
125
+ version: 0.16.1
126
+ type: :development
127
+ prerelease: false
128
+ version_requirements: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - "~>"
131
+ - !ruby/object:Gem::Version
132
+ version: 0.16.1
133
+ - !ruby/object:Gem::Dependency
134
+ name: yard
135
+ requirement: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - "~>"
138
+ - !ruby/object:Gem::Version
139
+ version: '0.9'
140
+ type: :development
141
+ prerelease: false
142
+ version_requirements: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - "~>"
145
+ - !ruby/object:Gem::Version
146
+ version: '0.9'
147
+ - !ruby/object:Gem::Dependency
148
+ name: guard
149
+ requirement: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - "~>"
152
+ - !ruby/object:Gem::Version
153
+ version: '2.14'
154
+ type: :development
155
+ prerelease: false
156
+ version_requirements: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - "~>"
159
+ - !ruby/object:Gem::Version
160
+ version: '2.14'
161
+ - !ruby/object:Gem::Dependency
162
+ name: guard-rspec
163
+ requirement: !ruby/object:Gem::Requirement
164
+ requirements:
165
+ - - "~>"
166
+ - !ruby/object:Gem::Version
167
+ version: '4.7'
168
+ type: :development
169
+ prerelease: false
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - "~>"
173
+ - !ruby/object:Gem::Version
174
+ version: '4.7'
175
+ description:
176
+ email:
177
+ - brennanmusic@gmail.com
178
+ executables: []
179
+ extensions: []
180
+ extra_rdoc_files: []
181
+ files:
182
+ - ".circleci/config.yml"
183
+ - ".gitignore"
184
+ - ".rspec"
185
+ - ".ruby-version"
186
+ - ".travis.yml"
187
+ - Dockerfile
188
+ - Gemfile
189
+ - Gemfile.lock
190
+ - Guardfile
191
+ - MIT-LICENSE
192
+ - README.md
193
+ - Rakefile
194
+ - bin/checksum.rb
195
+ - bin/console
196
+ - bin/setup
197
+ - certs/jamesbrennan.pem
198
+ - checksum/pg_conduit-0.1.0.gem.sha512
199
+ - docker-compose.yml
200
+ - lib/pg_conduit.rb
201
+ - lib/pg_conduit/connections.rb
202
+ - lib/pg_conduit/db_writer.rb
203
+ - lib/pg_conduit/file_writer.rb
204
+ - lib/pg_conduit/null_writer.rb
205
+ - lib/pg_conduit/parallel_stream_reader.rb
206
+ - lib/pg_conduit/pipe.rb
207
+ - lib/pg_conduit/query_stream.rb
208
+ - lib/pg_conduit/row_collector.rb
209
+ - lib/pg_conduit/stdout_writer.rb
210
+ - lib/pg_conduit/version.rb
211
+ - pg_conduit.gemspec
212
+ homepage: https://github.com/jamesBrennan/pg_conduit
213
+ licenses:
214
+ - MIT
215
+ metadata: {}
216
+ post_install_message:
217
+ rdoc_options: []
218
+ require_paths:
219
+ - lib
220
+ required_ruby_version: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ version: '0'
225
+ required_rubygems_version: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ requirements: []
231
+ rubyforge_project:
232
+ rubygems_version: 2.7.6
233
+ signing_key:
234
+ specification_version: 4
235
+ summary: Stream data from one postgres database to another
236
+ test_files: []
metadata.gz.sig ADDED
Binary file