pg_conduit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c3a6e9d910b4d945b126b1a769182899c3230db825b30a384dc13b4eef7b34e3
4
+ data.tar.gz: 8441234b1dd220fdb4b1fa8b8628dda8d5efe016f30bc196968e98421b3a9eb0
5
+ SHA512:
6
+ metadata.gz: '08e0fc8a6219b8c2ee4844ec56a415ea064c62415a53a1d89f4cdd5b09f847dea19396a1e93fb4c1f35fe26af35583a55129ea1999675c842461d328d195df90'
7
+ data.tar.gz: b82ed7a7d8d2dd8537600cf049c32eee574b8dcaed951c015578aab125025bc0e2cae155bd9d779f1513b5b007a493e80b918876ad09d3f71148a176f9f50a67
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,70 @@
1
+ # Ruby CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-ruby/ for more details
4
+ #
5
+ version: 2
6
+ jobs:
7
+ build:
8
+ docker:
9
+ # specify the version you desire here
10
+ - image: circleci/ruby:2.5.1-node-browsers
11
+ environment:
12
+ - TEST_DB_HOST=postgres://circleci@localhost/
13
+ - TEST_DB_SRC=postgres://circleci@localhost/pg_conduit_src_test
14
+ - TEST_DB_DEST=postgres://circleci@localhost/pg_conduit_dest_test
15
+
16
+ - image: circleci/postgres:9.6-alpine
17
+ environment:
18
+ POSTGRES_USER: circleci
19
+ POSTGRES_DB: circleci
20
+
21
+ working_directory: ~/src
22
+
23
+ steps:
24
+ - checkout
25
+
26
+ # Download and cache dependencies
27
+ - restore_cache:
28
+ keys:
29
+ - v1-dependencies-{{ checksum "Gemfile.lock" }}
30
+ # fallback to using the latest cache if no exact match is found
31
+ - v1-dependencies-
32
+
33
+ - run:
34
+ name: install dependencies
35
+ command: |
36
+ bundle install --jobs=4 --retry=3 --path vendor/bundle
37
+
38
+ - save_cache:
39
+ paths:
40
+ - ./vendor/bundle
41
+ key: v1-dependencies-{{ checksum "Gemfile.lock" }}
42
+
43
+ - run:
44
+ name: Wait for postgres container
45
+ command: dockerize -wait tcp://127.0.0.1:5432 -timeout 120s
46
+
47
+ - run:
48
+ name: create databases
49
+ command: bundle exec rake db:create
50
+
51
+ # run tests!
52
+ - run:
53
+ name: run tests
54
+ command: |
55
+ mkdir /tmp/test-results
56
+ TEST_FILES="$(circleci tests glob "spec/**/*_spec.rb" | circleci tests split --split-by=timings)"
57
+
58
+ bundle exec rspec --format progress \
59
+ --format RspecJunitFormatter \
60
+ --out /tmp/test-results/rspec.xml \
61
+ --format progress \
62
+ $TEST_FILES
63
+
64
+ # collect reports
65
+ - store_test_results:
66
+ path: /tmp/test-results
67
+
68
+ - store_artifacts:
69
+ path: /tmp/test-results
70
+ destination: test-results
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ .env
13
+
14
+ # simplecov results
15
+ coverage
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.5.1@pg_conduit
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0
data/Dockerfile ADDED
@@ -0,0 +1,24 @@
1
+ FROM ruby:2.5.1-alpine
2
+
3
+ RUN apk add --update \
4
+ build-base \
5
+ libxml2-dev \
6
+ libxslt-dev \
7
+ postgresql-dev \
8
+ postgresql-client \
9
+ tzdata \
10
+ git \
11
+ && rm -rf /var/cache/apk/*
12
+
13
+ ENV APP_HOME /src
14
+
15
+ RUN mkdir $APP_HOME
16
+ WORKDIR $APP_HOME
17
+
18
+ ENV BUNDLE_PATH=/bundle
19
+
20
+ ADD . $APP_HOME
21
+
22
+ RUN bundle install
23
+
24
+ CMD ['ash']
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) {|repo_name| 'https://github.com/#{repo_name}' }
4
+
5
+ # Specify your gem's dependencies in pg_conduit.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,90 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pg_conduit (0.1.0)
5
+ connection_pool (~> 2.2)
6
+ pg (~> 1.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ coderay (1.1.2)
12
+ connection_pool (2.2.2)
13
+ diff-lcs (1.3)
14
+ docile (1.3.1)
15
+ ffi (1.9.25)
16
+ formatador (0.2.5)
17
+ guard (2.14.2)
18
+ formatador (>= 0.2.4)
19
+ listen (>= 2.7, < 4.0)
20
+ lumberjack (>= 1.0.12, < 2.0)
21
+ nenv (~> 0.1)
22
+ notiffany (~> 0.0)
23
+ pry (>= 0.9.12)
24
+ shellany (~> 0.0)
25
+ thor (>= 0.18.1)
26
+ guard-compat (1.2.1)
27
+ guard-rspec (4.7.3)
28
+ guard (~> 2.1)
29
+ guard-compat (~> 1.1)
30
+ rspec (>= 2.99.0, < 4.0)
31
+ json (2.1.0)
32
+ listen (3.1.5)
33
+ rb-fsevent (~> 0.9, >= 0.9.4)
34
+ rb-inotify (~> 0.9, >= 0.9.7)
35
+ ruby_dep (~> 1.2)
36
+ lumberjack (1.0.13)
37
+ method_source (0.9.0)
38
+ nenv (0.3.0)
39
+ notiffany (0.1.1)
40
+ nenv (~> 0.1)
41
+ shellany (~> 0.0)
42
+ pg (1.0.0)
43
+ pry (0.11.3)
44
+ coderay (~> 1.1.0)
45
+ method_source (~> 0.9.0)
46
+ rake (10.5.0)
47
+ rb-fsevent (0.10.3)
48
+ rb-inotify (0.9.10)
49
+ ffi (>= 0.5.0, < 2)
50
+ rspec (3.7.0)
51
+ rspec-core (~> 3.7.0)
52
+ rspec-expectations (~> 3.7.0)
53
+ rspec-mocks (~> 3.7.0)
54
+ rspec-core (3.7.1)
55
+ rspec-support (~> 3.7.0)
56
+ rspec-expectations (3.7.0)
57
+ diff-lcs (>= 1.2.0, < 2.0)
58
+ rspec-support (~> 3.7.0)
59
+ rspec-mocks (3.7.0)
60
+ diff-lcs (>= 1.2.0, < 2.0)
61
+ rspec-support (~> 3.7.0)
62
+ rspec-support (3.7.1)
63
+ rspec_junit_formatter (0.4.1)
64
+ rspec-core (>= 2, < 4, != 2.12.0)
65
+ ruby_dep (1.5.0)
66
+ shellany (0.0.1)
67
+ simplecov (0.16.1)
68
+ docile (~> 1.1)
69
+ json (>= 1.8, < 3)
70
+ simplecov-html (~> 0.10.0)
71
+ simplecov-html (0.10.2)
72
+ thor (0.20.0)
73
+ yard (0.9.14)
74
+
75
+ PLATFORMS
76
+ ruby
77
+
78
+ DEPENDENCIES
79
+ bundler (~> 1.16)
80
+ guard (~> 2.14)
81
+ guard-rspec (~> 4.7)
82
+ pg_conduit!
83
+ rake (~> 10.0)
84
+ rspec (~> 3.0)
85
+ rspec_junit_formatter (~> 0.4.1)
86
+ simplecov (~> 0.16.1)
87
+ yard (~> 0.9)
88
+
89
+ BUNDLED WITH
90
+ 1.16.2
data/Guardfile ADDED
@@ -0,0 +1,21 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ guard :rspec, cmd: 'bundle exec rspec', all_on_start: true do
19
+ watch(%r{^spec/(.+)\.rb$}) { 'spec' }
20
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
21
+ end
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2018 James Brennan
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,127 @@
1
+ # PgConduit
2
+
3
+ [![CircleCI](https://img.shields.io/circleci/project/github/jamesBrennan/pg_conduit.svg?style=svg)](https://circleci.com/gh/jamesBrennan/pg_conduit)
4
+
5
+ Stream data between two postgres databases. This is mostly an excuse for me to
6
+ play around with concurrency in Ruby.
7
+
8
+ This gem is in early development. As such I would advise against using it in any
9
+ environment where data integrity is important. I will release version 1.0 when
10
+ I feel confident that the code is sufficiently robust.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'pg_conduit'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install pg_conduit
27
+
28
+ ## Quick Start
29
+
30
+ ### `PgConduit.db_to_db(source, destination)`
31
+
32
+ Returns an instance of `PgConduit::Pipe` that will execute queries passed to
33
+ `read` and `write` against the `source` and `destination` databases,
34
+ respectively.
35
+
36
+ The `source` and `destination` arguments are passed to
37
+ [`PG::Connection`](https://www.rubydoc.info/gems/pg/PG/Connection), so any
38
+ arguments that it accepts can be used.
39
+
40
+ #### Write one row at a time
41
+
42
+ ```ruby
43
+ source = 'postgres://user:pass@source/db'
44
+ destination = { dbname: 'my_local_db' }
45
+
46
+ pipe = PgConduit.db_to_db(source, destination)
47
+
48
+ pipe.read('SELECT id, full_name, email FROM users')
49
+ .write do |user|
50
+ <<-SQL
51
+ INSERT INTO customers(user_id, name, email)
52
+ VALUES ('#{user['id']}', '#{user['full_name']}', '#{user['email']}')
53
+ SQL
54
+ end
55
+ ```
56
+
57
+ #### Write in batches
58
+
59
+ ```ruby
60
+ source = 'postgres://user:pass@source/db'
61
+ destination = { dbname: 'my_local_db' }
62
+
63
+ pipe = PgConduit.db_to_db(source, destination)
64
+
65
+ pipe.read('SELECT id, full_name, email FROM users')
66
+ .transform do |user|
67
+ <<-SQL
68
+ ('#{user['id']}', '#{user['full_name']}', '#{user['email']}')
69
+ SQL
70
+ end
71
+ .write_batched(size: 100) do |values|
72
+ <<-SQL
73
+ INSERT INTO customers(user_id, name, email)
74
+ VALUES #{values.join(',')}
75
+ SQL
76
+ end
77
+ ```
78
+
79
+ ### `PgConduit.db_to_file(source, destination)`
80
+
81
+ Write output from source database to file.
82
+
83
+ ```ruby
84
+ source = 'postgres://user:pass@source/db'
85
+ destination = '/some/system/path/user_count.txt'
86
+
87
+ pipe = PgConduit.db_to_file(source, destination)
88
+
89
+ pipe.read('SELECT count(*) FROM users')
90
+ .write { |res| "Number of users: #{res['count']}" }
91
+ ```
92
+
93
+ ### `PgConduit.db_to_stdout(source)`
94
+
95
+ Write output from source database to stdout.
96
+
97
+ ```ruby
98
+ pipe = PgConduit.db_to_stdout('postgres://user:pass@source/db')
99
+
100
+ query = <<-SQL
101
+ SELECT posts.user_id, users.email, count(posts.*) FROM users
102
+ JOIN posts ON posts.user_id = users.id
103
+ GROUP BY posts.user_id, users.email
104
+ SQL
105
+
106
+ pipe.read(query)
107
+ .write do |post_count|
108
+ "#{post_count['user_id']} | #{post_count['email']} - #{post_count['count']}"
109
+ end
110
+ ```
111
+
112
+ ### `PgConduit.db_to_null(source)`
113
+
114
+ Swallow output from source database. Mostly useful for testing. `exec` is an
115
+ alias of `write`.
116
+
117
+ ```ruby
118
+ pipe = PgConduit.db_to_null('postgres://user:pass@source/db')
119
+ pipe.read('SELECT count(*) FROM users')
120
+ .exec { |res| raise 'fail' unless res['count'] == 10 }
121
+ ```
122
+
123
+ ## Development
124
+
125
+ ## Contributing
126
+
127
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pg_conduit.
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+ require 'pg'
4
+
5
+ def create_db(conn, name)
6
+ conn.exec "CREATE DATABASE #{name}"
7
+ rescue PG::DuplicateDatabase
8
+ puts "Create database skipped: '#{name}' already exists."
9
+ end
10
+
11
+ RSpec::Core::RakeTask.new(:spec)
12
+
13
+ task :default => :spec
14
+
15
+ namespace :db do
16
+ task :create do
17
+ conn = PG::Connection.open(ENV['TEST_DB_HOST'])
18
+ create_db conn, 'pg_conduit_src_test'
19
+ create_db conn, 'pg_conduit_dest_test'
20
+ ensure
21
+ conn.close
22
+ end
23
+ end
data/bin/checksum.rb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'digest/sha2'
3
+ require 'pg_conduit/version'
4
+
5
+ gem_name = "pg_conduit-#{PgConduit::VERSION}.gem"
6
+ checksum = Digest::SHA512.new.hexdigest File.read("pkg/#{gem_name}")
7
+ File.open("checksum/#{gem_name}.sha512", 'w' ) do |f|
8
+ f.write checksum
9
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'pg_conduit'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require 'pry'
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,21 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIDgDCCAmigAwIBAgIBATANBgkqhkiG9w0BAQUFADBDMRUwEwYDVQQDDAxicmVu
3
+ bmFubXVzaWMxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkW
4
+ A2NvbTAeFw0xODA3MDYwMDA2MzRaFw0xOTA3MDYwMDA2MzRaMEMxFTATBgNVBAMM
5
+ DGJyZW5uYW5tdXNpYzEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPy
6
+ LGQBGRYDY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyxykNKqp
7
+ jD5gnQd2YHuRDNsLN5limGMDjSb38lMZAfJJugoWbSSeQM3+iVl26YWE5JeJpqDu
8
+ Rr/GOQ54bsev4IxCfPl8yIkhAao0i96+k1EeEb/i3pgZOfjXgGSTM71baCZzbqBH
9
+ A5ivcb0wqoHWyqrxNtc4XhSs5RtvtNl3IEe/bs5qqixt6PwsCorD6kAoUUNHNM9W
10
+ MLexivfiJooHOvVRmf1DQuJ2fDccKNHf7ZR4PuiguT4Z73bT2njCBm2o86o/clvR
11
+ WV0jasBOKvsgcg65U10klngBEG56nWSdIE9PrXGPqwBwA6jXttAwFLUNvuam5nGQ
12
+ V3MbCFDeHUUrtwIDAQABo38wfTAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNV
13
+ HQ4EFgQUPJGTSMevbRtDlY9WBrs21ae/+5YwIQYDVR0RBBowGIEWYnJlbm5hbm11
14
+ c2ljQGdtYWlsLmNvbTAhBgNVHRIEGjAYgRZicmVubmFubXVzaWNAZ21haWwuY29t
15
+ MA0GCSqGSIb3DQEBBQUAA4IBAQBr5Fm1ULBbLUYvX85iXI1Q7L7BZp53p9Q4TXrd
16
+ 0n0dE0qHJELwJQkErwZwMhOzaLqcctCwdzLkc9/VZSpLLuzv5bfEWP4EyTET5a+i
17
+ 4dn/Wko+6aNPVonmlHDhNYOPl3edxgxoD0WW08U7NJ4tGxJJURVv4yrCayT3xLCA
18
+ yzUUQaHKkLaXHSfH0yhha+pFpUlTsLeg9hrZ0jqSk4FzUrKbiieLY/f/p5Xr5QDg
19
+ fXe/xr/Sc+2wCjHPVE2J+auN5hk3KCp1I4s2fKqyLIwyhTEF3shuYfCpC8rt/YdN
20
+ cy9/lg5LCI3OvakzxL4Xt1Sq4h/xJZ06ydTVJ1wxfk6BXHrg
21
+ -----END CERTIFICATE-----
@@ -0,0 +1 @@
1
+ 2401cb5efe647705eee22f6ae64a183012e5552bb2417dcc1c78cf52c5c44901925b849443685e63402da82e97a8d8c88c1fe3c1c52d86d184862e900258af58
@@ -0,0 +1,34 @@
1
+ version: "3"
2
+ services:
3
+ yard-docs:
4
+ build: .
5
+ command: bundle exec yard server --reload
6
+ ports:
7
+ - 9999:8808
8
+ volumes:
9
+ - .:/src
10
+ - bundle:/bundle
11
+
12
+ test:
13
+ build: .
14
+ command: bundle exec guard --no-bundler-warning --no-interactions
15
+ volumes:
16
+ - .:/src
17
+ - bundle:/bundle
18
+ environment:
19
+ - TEST_DB_SRC=postgres://postgres@db/pg_conduit_src_test
20
+ - TEST_DB_DEST=postgres://postgres@db/pg_conduit_dest_test
21
+ - TEST_DB_HOST=postgres://postgres@db/
22
+ links:
23
+ - db
24
+
25
+ db:
26
+ image: postgres:9.6.6
27
+ ports:
28
+ - 5499:5432
29
+ volumes:
30
+ - database:/var/lib/postgresql/data
31
+
32
+ volumes:
33
+ bundle:
34
+ database:
@@ -0,0 +1,24 @@
1
+ require 'connection_pool'
2
+
3
+ module PgConduit
4
+ class Connections
5
+ attr_reader :src_pool, :dest_pool
6
+
7
+ def self.init_pool(params)
8
+ ConnectionPool.new { PG::Connection.open(params) }
9
+ end
10
+
11
+ def initialize(source, destination)
12
+ @src_pool = self.class.init_pool source
13
+ @dest_pool = self.class.init_pool destination
14
+ end
15
+
16
+ def with_source
17
+ @src_pool.with { |conn| yield conn }
18
+ end
19
+
20
+ def with_destination
21
+ @dest_pool.with { |conn| yield conn }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,11 @@
1
+ module PgConduit
2
+ class DBWriter
3
+ def initialize(pool)
4
+ @pool = pool
5
+ end
6
+
7
+ def write
8
+ @pool.with { |conn| conn.exec yield }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module PgConduit
2
+ class FileWriter
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ def write
8
+ open(@path, 'a') { |f| f.puts yield }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module PgConduit
2
+ class NullWriter
3
+ def write
4
+ nil.tap { yield }
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,60 @@
1
+ module PgConduit
2
+ # A multi threaded stream reader
3
+ class ParallelStreamReader
4
+ # @param query_stream [PgConduit::QueryStream]
5
+ # @param threads [Integer] The number of threads to use for workers
6
+ # @param queue_max_size [Integer] How many rows should be stored in memory
7
+ # in the work queue.
8
+ def initialize(query_stream, threads: 5, queue_max_size: 1000)
9
+ @queue = SizedQueue.new(queue_max_size)
10
+ @workers = threads
11
+ @stream = query_stream
12
+ end
13
+
14
+ # Read A QueryStream and yield it's rows
15
+ #
16
+ # @yield [Hash] A single row from the QueryStream. Every row from the stream
17
+ # will be yielded but order is not guaranteed.
18
+ def read(&callback)
19
+ reader = read_stream(@stream)
20
+ workers = dispatch_workers(&callback)
21
+ reader.join
22
+ workers.each { |t| t.join }
23
+ :ok
24
+ end
25
+
26
+ private
27
+
28
+ def read_stream(query_stream)
29
+ Thread.new do
30
+ query_stream.each_row { |row| @queue << row }
31
+ @queue.close
32
+ end
33
+ end
34
+
35
+ def dispatch_workers(&callback)
36
+ (1..@workers).to_a.map { dispatch_worker(&callback) }
37
+ end
38
+
39
+ def dispatch_worker(&callback)
40
+ Thread.new do
41
+ loop do
42
+ continue = process_next(&callback)
43
+ break if @queue.closed? && !continue
44
+ end
45
+ end
46
+ end
47
+
48
+ def process_next(&callback)
49
+ continue = false
50
+ Thread.new do
51
+ row = @queue.deq
52
+ if row
53
+ callback.call row
54
+ continue = true
55
+ end
56
+ end.join
57
+ continue
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ module PgConduit
2
+ class Pipe
3
+ # @example
4
+ # Pipe
5
+ # .new(from: query_stream, to: db_writer)
6
+ # .send('SELECT name FROM users')
7
+ # .as do |user|
8
+ # %(INSERT INTO friends (name) VALUES ('#{user["full_name"]}'))
9
+ # end
10
+ def initialize(from:, to:)
11
+ @stream = from
12
+ @writer = to
13
+ @reader = ParallelStreamReader.new(@stream)
14
+ @transformers = []
15
+ end
16
+
17
+ def read(query)
18
+ self.tap { @stream.query(query) }
19
+ end
20
+
21
+ def transform(&transformer)
22
+ self.tap { @transformers << transformer }
23
+ end
24
+
25
+ def write
26
+ exec_read { |row| exec_write { exec_transform(row) } }
27
+ end
28
+
29
+ def peak
30
+ self.tap { @transformers << ->(row) { row.tap { yield row } } }
31
+ end
32
+
33
+ def write_batched(size: 1000)
34
+ collector = RowCollector.new(chunk_size: size)
35
+
36
+ # Set callback to yield collected rows
37
+ collector.on_chunk { |rows| exec_write { yield rows } }
38
+
39
+ # Process each row
40
+ exec_read { |row| collector << exec_transform(row) }
41
+
42
+ # Yield any remaining rows
43
+ collector.finish
44
+ end
45
+
46
+ alias_method :exec, :write
47
+
48
+ private
49
+
50
+ def exec_read(&b)
51
+ @reader.read(&b)
52
+ end
53
+
54
+ def exec_write(&b)
55
+ @writer.write(&b)
56
+ end
57
+
58
+ def exec_transform(row)
59
+ @transformers.reduce(row) { |data, transform| transform.call data }
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,41 @@
1
+ module PgConduit
2
+ # Execute a SQL query and provide the results as a stream
3
+ # @example Print username and email for all users
4
+ #
5
+ # conn = PG::Connection.open
6
+ # stream = PgConduit::QueryStream.new(conn)
7
+ #
8
+ # stream.query('SELECT * FROM users').each_row do |row|
9
+ # puts "#{row['username']}, #{row['email']}"
10
+ # end
11
+ #
12
+ class QueryStream
13
+ attr_reader :sql
14
+
15
+ # @param pool [ConnectionPool] A pool of PG::Connections
16
+ def initialize(pool)
17
+ @pool = pool
18
+ end
19
+
20
+ # @param sql [String] The SQL query to execute
21
+ # @return [self]
22
+ def query(sql)
23
+ self.tap { @sql = sql }
24
+ end
25
+
26
+ # Execute query and yield each row
27
+ # @yield [Hash] A hash representing a single row from the result set
28
+ def each_row
29
+ @pool.with do |conn|
30
+ conn.send_query @sql
31
+ conn.set_single_row_mode
32
+ loop do
33
+ res = conn.get_result
34
+ break unless res
35
+ res.check
36
+ res.stream_each { |row| yield row }
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,68 @@
1
+ module PgConduit
2
+ # A thread safe accumulator, used to chunk an input stream
3
+ class RowCollector
4
+ # @param chunk_size [Integer] How many rows should be collected before
5
+ # yielding
6
+ def initialize(chunk_size: 100)
7
+ @chunk_size = chunk_size
8
+ @rows = []
9
+ @finished = false
10
+ @mutex = Mutex.new
11
+ end
12
+
13
+ # Provide a block to be called with each accumulated chunk
14
+ #
15
+ # @yield [Array] collected rows
16
+ # @return [self]
17
+ #
18
+ # @example Print once every ten rows
19
+ #
20
+ # collector = RowCollector.new(chunk_size: 10)
21
+ # collector.on_chunk { |rows| puts rows }
22
+ #
23
+ # 100.times { |n| collector << n }
24
+ #
25
+ # #> [0,1,2,3,4,5,6,7,8,9]
26
+ # #> [10,11,12,13,14,15,16,17,18,19]
27
+ # #> ...etc
28
+ #
29
+ def on_chunk(&callback)
30
+ self.tap do
31
+ @mutex.synchronize { @callback = callback }
32
+ end
33
+ end
34
+
35
+ # @param row [Object] Row to add to the buffer
36
+ def <<(row)
37
+ @mutex.synchronize do
38
+ if @finished
39
+ raise 'Data may not be added to a row collector that has been marked as finished'
40
+ end
41
+
42
+ @rows << row
43
+ if @rows.length % @chunk_size == 0
44
+ flush(&@callback)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Flushes any collected rows, yielding them to the callback and marks the
50
+ # collector as finished. Any subsequent calls to :<< will raise an error.
51
+ def finish
52
+ @mutex.synchronize do
53
+ flush(&@callback)
54
+ @finished = true
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ # Yields the collected rows and resets the row collector
61
+ # @yield [Array<Hash>] The collected rows
62
+ def flush
63
+ yield @rows if @rows.length > 0
64
+ @rows = []
65
+ true
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,7 @@
1
+ module PgConduit
2
+ class STDOUTWriter
3
+ def write
4
+ puts yield
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module PgConduit
2
+ VERSION = '0.1.0'
3
+ end
data/lib/pg_conduit.rb ADDED
@@ -0,0 +1,67 @@
1
+ require 'pg'
2
+ require 'connection_pool'
3
+ require 'pg_conduit/version'
4
+
5
+ module PgConduit
6
+ autoload :Connections, 'pg_conduit/connections'
7
+ autoload :DBWriter, 'pg_conduit/db_writer'
8
+ autoload :FileWriter, 'pg_conduit/file_writer'
9
+ autoload :NullWriter, 'pg_conduit/null_writer'
10
+ autoload :ParallelStreamReader, 'pg_conduit/parallel_stream_reader'
11
+ autoload :Pipe, 'pg_conduit/pipe'
12
+ autoload :QueryStream, 'pg_conduit/query_stream'
13
+ autoload :RowCollector, 'pg_conduit/row_collector'
14
+ autoload :STDOUTWriter, 'pg_conduit/stdout_writer'
15
+
16
+ class << self
17
+ # Create a new DB -> DB Pipe
18
+ #
19
+ # @param src [String,Hash] Source database connection params
20
+ # @param dest [String,Hash] Destination database connection params
21
+ # @return [PgConduit::Pipe]
22
+ def db_to_db(src, dest)
23
+ connections = Connections.new src, dest
24
+ query_stream = QueryStream.new connections.src_pool
25
+ db_writer = DBWriter.new connections.dest_pool
26
+
27
+ Pipe.new from: query_stream, to: db_writer
28
+ end
29
+
30
+ # Create a new DB -> File Pipe
31
+ #
32
+ # @param src [String,Hash] Source database connection params
33
+ # @param dest [Sting,Pathname] Path to destination file
34
+ # @return [PgConduit::Pipe]
35
+ def db_to_file(src, dest)
36
+ pool = Connections.init_pool src
37
+ query_stream = QueryStream.new pool
38
+ file_writer = FileWriter.new dest
39
+
40
+ Pipe.new from: query_stream, to: file_writer
41
+ end
42
+
43
+ # Create a new DB -> STDOUT Pipe
44
+ #
45
+ # @param src [String,Hash] Source database connection params
46
+ # @return [PgConduit::Pipe]
47
+ def db_to_stdout(src)
48
+ pool = Connections.init_pool src
49
+ query_stream = QueryStream.new pool
50
+ stdout_writer = STDOUTWriter.new
51
+
52
+ Pipe.new from: query_stream, to: stdout_writer
53
+ end
54
+
55
+ # Create a new DB -> Null pipe
56
+ #
57
+ # @param src [String,Hash] Source database connection params
58
+ # @return [PgConduit::Pipe]
59
+ def db_to_null(src)
60
+ pool = Connections.init_pool src
61
+ query_stream = QueryStream.new pool
62
+ stdout_writer = NullWriter.new
63
+
64
+ Pipe.new from: query_stream, to: stdout_writer
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_conduit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'pg_conduit'
8
+ spec.version = PgConduit::VERSION
9
+ spec.authors = ['James Brennan']
10
+ spec.email = ['brennanmusic@gmail.com']
11
+
12
+ spec.summary = 'Stream data from one postgres database to another'
13
+ spec.homepage = 'https://github.com/jamesBrennan/pg_conduit'
14
+
15
+ spec.license = 'MIT'
16
+
17
+ spec.cert_chain = ['certs/jamesbrennan.pem']
18
+ spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem') if $0 =~ /gem\z/
19
+
20
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
21
+ f.match(%r{^spec/})
22
+ end
23
+
24
+ spec.require_paths = ['lib']
25
+
26
+ spec.add_dependency 'pg', '~> 1.0'
27
+ spec.add_dependency 'connection_pool', '~> 2.2'
28
+
29
+ spec.add_development_dependency 'bundler', '~> 1.16'
30
+ spec.add_development_dependency 'rake', '~> 10.0'
31
+ spec.add_development_dependency 'rspec', '~> 3.0'
32
+ spec.add_development_dependency 'rspec_junit_formatter', '~> 0.4.1'
33
+ spec.add_development_dependency 'simplecov', '~> 0.16.1'
34
+ spec.add_development_dependency 'yard', '~> 0.9'
35
+ spec.add_development_dependency 'guard', '~> 2.14'
36
+ spec.add_development_dependency 'guard-rspec', '~> 4.7'
37
+ end
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,236 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_conduit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Brennan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIDgDCCAmigAwIBAgIBATANBgkqhkiG9w0BAQUFADBDMRUwEwYDVQQDDAxicmVu
14
+ bmFubXVzaWMxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkW
15
+ A2NvbTAeFw0xODA3MDYwMDA2MzRaFw0xOTA3MDYwMDA2MzRaMEMxFTATBgNVBAMM
16
+ DGJyZW5uYW5tdXNpYzEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPy
17
+ LGQBGRYDY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyxykNKqp
18
+ jD5gnQd2YHuRDNsLN5limGMDjSb38lMZAfJJugoWbSSeQM3+iVl26YWE5JeJpqDu
19
+ Rr/GOQ54bsev4IxCfPl8yIkhAao0i96+k1EeEb/i3pgZOfjXgGSTM71baCZzbqBH
20
+ A5ivcb0wqoHWyqrxNtc4XhSs5RtvtNl3IEe/bs5qqixt6PwsCorD6kAoUUNHNM9W
21
+ MLexivfiJooHOvVRmf1DQuJ2fDccKNHf7ZR4PuiguT4Z73bT2njCBm2o86o/clvR
22
+ WV0jasBOKvsgcg65U10klngBEG56nWSdIE9PrXGPqwBwA6jXttAwFLUNvuam5nGQ
23
+ V3MbCFDeHUUrtwIDAQABo38wfTAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNV
24
+ HQ4EFgQUPJGTSMevbRtDlY9WBrs21ae/+5YwIQYDVR0RBBowGIEWYnJlbm5hbm11
25
+ c2ljQGdtYWlsLmNvbTAhBgNVHRIEGjAYgRZicmVubmFubXVzaWNAZ21haWwuY29t
26
+ MA0GCSqGSIb3DQEBBQUAA4IBAQBr5Fm1ULBbLUYvX85iXI1Q7L7BZp53p9Q4TXrd
27
+ 0n0dE0qHJELwJQkErwZwMhOzaLqcctCwdzLkc9/VZSpLLuzv5bfEWP4EyTET5a+i
28
+ 4dn/Wko+6aNPVonmlHDhNYOPl3edxgxoD0WW08U7NJ4tGxJJURVv4yrCayT3xLCA
29
+ yzUUQaHKkLaXHSfH0yhha+pFpUlTsLeg9hrZ0jqSk4FzUrKbiieLY/f/p5Xr5QDg
30
+ fXe/xr/Sc+2wCjHPVE2J+auN5hk3KCp1I4s2fKqyLIwyhTEF3shuYfCpC8rt/YdN
31
+ cy9/lg5LCI3OvakzxL4Xt1Sq4h/xJZ06ydTVJ1wxfk6BXHrg
32
+ -----END CERTIFICATE-----
33
+ date: 2018-07-06 00:00:00.000000000 Z
34
+ dependencies:
35
+ - !ruby/object:Gem::Dependency
36
+ name: pg
37
+ requirement: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.0'
42
+ type: :runtime
43
+ prerelease: false
44
+ version_requirements: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '1.0'
49
+ - !ruby/object:Gem::Dependency
50
+ name: connection_pool
51
+ requirement: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '2.2'
56
+ type: :runtime
57
+ prerelease: false
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '2.2'
63
+ - !ruby/object:Gem::Dependency
64
+ name: bundler
65
+ requirement: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.16'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '1.16'
77
+ - !ruby/object:Gem::Dependency
78
+ name: rake
79
+ requirement: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '10.0'
84
+ type: :development
85
+ prerelease: false
86
+ version_requirements: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '10.0'
91
+ - !ruby/object:Gem::Dependency
92
+ name: rspec
93
+ requirement: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '3.0'
98
+ type: :development
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '3.0'
105
+ - !ruby/object:Gem::Dependency
106
+ name: rspec_junit_formatter
107
+ requirement: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: 0.4.1
112
+ type: :development
113
+ prerelease: false
114
+ version_requirements: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: 0.4.1
119
+ - !ruby/object:Gem::Dependency
120
+ name: simplecov
121
+ requirement: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - "~>"
124
+ - !ruby/object:Gem::Version
125
+ version: 0.16.1
126
+ type: :development
127
+ prerelease: false
128
+ version_requirements: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - "~>"
131
+ - !ruby/object:Gem::Version
132
+ version: 0.16.1
133
+ - !ruby/object:Gem::Dependency
134
+ name: yard
135
+ requirement: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - "~>"
138
+ - !ruby/object:Gem::Version
139
+ version: '0.9'
140
+ type: :development
141
+ prerelease: false
142
+ version_requirements: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - "~>"
145
+ - !ruby/object:Gem::Version
146
+ version: '0.9'
147
+ - !ruby/object:Gem::Dependency
148
+ name: guard
149
+ requirement: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - "~>"
152
+ - !ruby/object:Gem::Version
153
+ version: '2.14'
154
+ type: :development
155
+ prerelease: false
156
+ version_requirements: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - "~>"
159
+ - !ruby/object:Gem::Version
160
+ version: '2.14'
161
+ - !ruby/object:Gem::Dependency
162
+ name: guard-rspec
163
+ requirement: !ruby/object:Gem::Requirement
164
+ requirements:
165
+ - - "~>"
166
+ - !ruby/object:Gem::Version
167
+ version: '4.7'
168
+ type: :development
169
+ prerelease: false
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - "~>"
173
+ - !ruby/object:Gem::Version
174
+ version: '4.7'
175
+ description:
176
+ email:
177
+ - brennanmusic@gmail.com
178
+ executables: []
179
+ extensions: []
180
+ extra_rdoc_files: []
181
+ files:
182
+ - ".circleci/config.yml"
183
+ - ".gitignore"
184
+ - ".rspec"
185
+ - ".ruby-version"
186
+ - ".travis.yml"
187
+ - Dockerfile
188
+ - Gemfile
189
+ - Gemfile.lock
190
+ - Guardfile
191
+ - MIT-LICENSE
192
+ - README.md
193
+ - Rakefile
194
+ - bin/checksum.rb
195
+ - bin/console
196
+ - bin/setup
197
+ - certs/jamesbrennan.pem
198
+ - checksum/pg_conduit-0.1.0.gem.sha512
199
+ - docker-compose.yml
200
+ - lib/pg_conduit.rb
201
+ - lib/pg_conduit/connections.rb
202
+ - lib/pg_conduit/db_writer.rb
203
+ - lib/pg_conduit/file_writer.rb
204
+ - lib/pg_conduit/null_writer.rb
205
+ - lib/pg_conduit/parallel_stream_reader.rb
206
+ - lib/pg_conduit/pipe.rb
207
+ - lib/pg_conduit/query_stream.rb
208
+ - lib/pg_conduit/row_collector.rb
209
+ - lib/pg_conduit/stdout_writer.rb
210
+ - lib/pg_conduit/version.rb
211
+ - pg_conduit.gemspec
212
+ homepage: https://github.com/jamesBrennan/pg_conduit
213
+ licenses:
214
+ - MIT
215
+ metadata: {}
216
+ post_install_message:
217
+ rdoc_options: []
218
+ require_paths:
219
+ - lib
220
+ required_ruby_version: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ version: '0'
225
+ required_rubygems_version: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ requirements: []
231
+ rubyforge_project:
232
+ rubygems_version: 2.7.6
233
+ signing_key:
234
+ specification_version: 4
235
+ summary: Stream data from one postgres database to another
236
+ test_files: []
metadata.gz.sig ADDED
Binary file