forklift_etl 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.travis.yml +10 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +74 -0
- data/Rakefile +13 -0
- data/bin/forklift +61 -0
- data/doc/EmailSuffix.html +228 -0
- data/doc/Forklift.html +187 -0
- data/doc/Forklift/Base.html +167 -0
- data/doc/Forklift/Base/Connection.html +590 -0
- data/doc/Forklift/Base/Logger.html +453 -0
- data/doc/Forklift/Base/Mailer.html +399 -0
- data/doc/Forklift/Base/Mailer/ERBBinding.html +256 -0
- data/doc/Forklift/Base/Pid.html +489 -0
- data/doc/Forklift/Base/Utils.html +252 -0
- data/doc/Forklift/Connection.html +164 -0
- data/doc/Forklift/Connection/Elasticsearch.html +419 -0
- data/doc/Forklift/Connection/Mysql.html +939 -0
- data/doc/Forklift/Patterns.html +164 -0
- data/doc/Forklift/Patterns/Elasticsearch.html +169 -0
- data/doc/Forklift/Patterns/Mysql.html +402 -0
- data/doc/Forklift/Plan.html +704 -0
- data/doc/Gemfile.html +132 -0
- data/doc/Object.html +326 -0
- data/doc/Rakefile.html +138 -0
- data/doc/SpecClient.html +291 -0
- data/doc/SpecPlan.html +253 -0
- data/doc/SpecSeeds.html +303 -0
- data/doc/created.rid +35 -0
- data/doc/example/Gemfile.html +129 -0
- data/doc/images/add.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +122 -0
- data/doc/js/darkfish.js +155 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/rdoc.css +543 -0
- data/doc/table_of_contents.html +309 -0
- data/example/Gemfile +3 -0
- data/example/Gemfile.lock +55 -0
- data/example/config/connections/elasticsearch/source.yml +1 -0
- data/example/config/connections/mysql/destination.yml +6 -0
- data/example/config/connections/mysql/source.yml +6 -0
- data/example/config/email.yml +18 -0
- data/example/plan.rb +87 -0
- data/example/template/email.erb +6 -0
- data/example/transformations/cleanup.sql +1 -0
- data/example/transformations/combined_name.sql +7 -0
- data/example/transformations/email_suffix.rb +20 -0
- data/forklift.jpg +0 -0
- data/forklift_etl.gemspec +28 -0
- data/lib/forklift/base/connection.rb +72 -0
- data/lib/forklift/base/logger.rb +49 -0
- data/lib/forklift/base/mailer.rb +83 -0
- data/lib/forklift/base/pid.rb +55 -0
- data/lib/forklift/base/utils.rb +23 -0
- data/lib/forklift/forklift.rb +19 -0
- data/lib/forklift/patterns/elasticsearch_patterns.rb +7 -0
- data/lib/forklift/patterns/mysql_patterns.rb +87 -0
- data/lib/forklift/plan.rb +138 -0
- data/lib/forklift/transports/elasticsearch.rb +75 -0
- data/lib/forklift/transports/mysql.rb +241 -0
- data/lib/forklift/version.rb +3 -0
- data/readme.md +410 -0
- data/spec/config/connections/elasticsearch/forklift_test.yml +1 -0
- data/spec/config/connections/mysql/forklift_test_destination.yml +6 -0
- data/spec/config/connections/mysql/forklift_test_source_a.yml +6 -0
- data/spec/config/connections/mysql/forklift_test_source_b.yml +6 -0
- data/spec/config/connections/mysql/forklift_test_working.yml +6 -0
- data/spec/config/email.yml +4 -0
- data/spec/integration/basic_spec.rb +29 -0
- data/spec/integration/elasticsearch_patterns_spec.rb +5 -0
- data/spec/integration/elasticsearch_spec.rb +95 -0
- data/spec/integration/multi_transport_spec.rb +112 -0
- data/spec/integration/mysql_patterns_spec.rb +76 -0
- data/spec/integration/mysql_spec.rb +138 -0
- data/spec/spec_helper.rb +30 -0
- data/spec/support/dumps/elasticsearch/forklift_test.json +7 -0
- data/spec/support/dumps/mysql/forklift_test_source_a.sql +79 -0
- data/spec/support/dumps/mysql/forklift_test_source_b.sql +23 -0
- data/spec/support/spec_client.rb +30 -0
- data/spec/support/spec_plan.rb +15 -0
- data/spec/support/spec_seeds.rb +69 -0
- data/spec/template/spec_email_template.erb +4 -0
- data/spec/unit/connection/mysql_spec.rb +102 -0
- data/spec/unit/misc/email_spec.rb +37 -0
- data/spec/unit/misc/pid_spec.rb +25 -0
- data/spec/unit/misc/step_spec.rb +53 -0
- data/template/destination.yml +6 -0
- data/template/email.erb +1 -0
- data/template/email.yml +18 -0
- data/template/plan.rb +10 -0
- data/template/source.yml +6 -0
- metadata +289 -0
@@ -0,0 +1 @@
|
|
1
|
+
ALTER TABLE `users` DROP `combined_name`;
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class EmailSuffix
|
2
|
+
|
3
|
+
def do!(connection, forklift)
|
4
|
+
forklift.logger.log "collecting email suffixes..."
|
5
|
+
|
6
|
+
suffixes = {}
|
7
|
+
connection.read("select email from users"){|data|
|
8
|
+
data.each do |row|
|
9
|
+
part = row[:email].split('@').last
|
10
|
+
suffixes[part] = 0 if suffixes[part].nil?
|
11
|
+
suffixes[part] = suffixes[part] + 1
|
12
|
+
end
|
13
|
+
}
|
14
|
+
|
15
|
+
suffixes.each do |suffix, count|
|
16
|
+
forklift.logger.log " > #{suffix}: #{count}" if count > 5
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
data/forklift.jpg
ADDED
Binary file
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'forklift/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "forklift_etl"
|
8
|
+
s.version = Forklift::VERSION
|
9
|
+
s.authors = ["Evan Tahler"]
|
10
|
+
s.email = ["evan@taskrabbit.com"]
|
11
|
+
s.homepage = "https://github.com/taskrabbit/forklift"
|
12
|
+
s.summary = %q{Forklift: Moving big databases around. A ruby ETL tool.}
|
13
|
+
s.description = %q{A collection of ETL tools and patterns for mysql and elasticsearch.}
|
14
|
+
s.license = "MIT"
|
15
|
+
|
16
|
+
s.rubyforge_project = "forklift_etl"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.add_dependency "activesupport", '~> 4.0', ">= 4.0.0"
|
24
|
+
s.add_dependency "mysql2", '~> 0.0', ">= 0.0.1"
|
25
|
+
s.add_dependency "elasticsearch", '~> 1.0', ">= 1.0.0"
|
26
|
+
s.add_dependency "pony", '~> 1.0', ">= 1.0.0"
|
27
|
+
s.add_dependency "lumberjack", '~> 1.0', ">= 1.0.0"
|
28
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Forklift
|
2
|
+
module Base
|
3
|
+
class Connection
|
4
|
+
|
5
|
+
def initialize(config)
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def config
|
10
|
+
@config
|
11
|
+
end
|
12
|
+
|
13
|
+
def client
|
14
|
+
@client
|
15
|
+
end
|
16
|
+
|
17
|
+
def connect
|
18
|
+
# Will define @client
|
19
|
+
raise 'not implemented'
|
20
|
+
end
|
21
|
+
|
22
|
+
def disconnect
|
23
|
+
raise 'not implemented'
|
24
|
+
end
|
25
|
+
|
26
|
+
def read(query)
|
27
|
+
# will return an array of data rows
|
28
|
+
raise 'not implemented'
|
29
|
+
end
|
30
|
+
|
31
|
+
def write(data, collection)
|
32
|
+
# will write array data to collection (table)
|
33
|
+
raise 'not implemented'
|
34
|
+
end
|
35
|
+
|
36
|
+
def pipe
|
37
|
+
# when copying within the same connection, this method can be defined to speed things up
|
38
|
+
raise 'not implemented'
|
39
|
+
end
|
40
|
+
|
41
|
+
def exec(path)
|
42
|
+
begin
|
43
|
+
exec!(path)
|
44
|
+
rescue Exception => e
|
45
|
+
forklift.logger.log(e)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def exec!(path)
|
50
|
+
forklift.logger.log "Running script: #{path}"
|
51
|
+
extension = path.split(".").last
|
52
|
+
if(extension == "rb" || extension == "ruby")
|
53
|
+
exec_ruby(path)
|
54
|
+
else
|
55
|
+
exec_script(path)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def exec_ruby(path)
|
60
|
+
klass = forklift.utils.class_name_from_file(path)
|
61
|
+
require path
|
62
|
+
model = eval("#{klass}.new")
|
63
|
+
model.do!(self, forklift)
|
64
|
+
end
|
65
|
+
|
66
|
+
def exec_script(path)
|
67
|
+
raise 'not implemented'
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'lumberjack'
|
2
|
+
|
3
|
+
module Forklift
|
4
|
+
module Base
|
5
|
+
class Logger
|
6
|
+
|
7
|
+
def initialize(forklift)
|
8
|
+
@forklift = forklift
|
9
|
+
end
|
10
|
+
|
11
|
+
def forklift
|
12
|
+
@forklift
|
13
|
+
end
|
14
|
+
|
15
|
+
def messages
|
16
|
+
@messages ||= []
|
17
|
+
end
|
18
|
+
|
19
|
+
def logger
|
20
|
+
log_dir = "#{forklift.config[:project_root]}/log"
|
21
|
+
@logger ||= ::Lumberjack::Logger.new("#{log_dir}/forklift.log", :buffer_size => 0)
|
22
|
+
end
|
23
|
+
|
24
|
+
def log(message, severity="info")
|
25
|
+
timed_message = "[Forklift @ #{Time.now}] #{message}"
|
26
|
+
puts timed_message unless forklift.config[:logger][:stdout] != true
|
27
|
+
logger.send(severity.to_sym, message) unless logger.nil?
|
28
|
+
messages << timed_message
|
29
|
+
end
|
30
|
+
|
31
|
+
def debug(message)
|
32
|
+
if forklift.config[:logger][:debug] == true
|
33
|
+
log("[debug] #{message}")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def emphatically(message)
|
38
|
+
log "" if message.length > 0
|
39
|
+
log "*** #{message} ***"
|
40
|
+
log ""
|
41
|
+
end
|
42
|
+
|
43
|
+
def fatal(message)
|
44
|
+
log "!!! #{message} !!!"
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'pony'
|
2
|
+
require 'erb'
|
3
|
+
require 'active_support/core_ext/hash/keys'
|
4
|
+
|
5
|
+
module Forklift
|
6
|
+
module Base
|
7
|
+
class Mailer
|
8
|
+
|
9
|
+
def initialize(forklift)
|
10
|
+
@forklift = forklift
|
11
|
+
end
|
12
|
+
|
13
|
+
# Public: Pull out the settings from config/email.yml.
|
14
|
+
#
|
15
|
+
# Returns a Hash with all symbolized keys.
|
16
|
+
def config
|
17
|
+
config_file = "#{forklift.config[:project_root]}/config/email.yml"
|
18
|
+
@config ||= forklift.utils.load_yml(config_file).deep_symbolize_keys
|
19
|
+
end
|
20
|
+
|
21
|
+
def forklift
|
22
|
+
@forklift
|
23
|
+
end
|
24
|
+
|
25
|
+
def message_defaults
|
26
|
+
{
|
27
|
+
:from => "Forklift",
|
28
|
+
:subject => "Forklift has moved your database @ #{Time.new}",
|
29
|
+
:body => "Forklift has moved your database @ #{Time.new}",
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def send_template(args, template_file, variables, attachment_lines=[])
|
34
|
+
renderer = ERB.new(File.read(template_file))
|
35
|
+
binder = ERBBinding.new(variables)
|
36
|
+
body = renderer.result(binder.get_binding)
|
37
|
+
args[:body] = body
|
38
|
+
send(args, attachment_lines)
|
39
|
+
end
|
40
|
+
|
41
|
+
def send(args, attachment_lines=[])
|
42
|
+
params = message_defaults
|
43
|
+
[:to, :from, :subject, :body].each do |i|
|
44
|
+
params[i] = args[i] unless args[i].nil?
|
45
|
+
end
|
46
|
+
if attachment_lines.length > 0
|
47
|
+
params[:attachments] = {"log.txt" => attachment_lines.join("\r\n")}
|
48
|
+
end
|
49
|
+
deliver(params)
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# Private: Actually deliver the message using Pony.
|
55
|
+
#
|
56
|
+
# Returns the raw email from Pony.
|
57
|
+
def deliver(params)
|
58
|
+
forklift.logger.log("Sending email via #{config[:via]}")
|
59
|
+
if params[:html_body].nil?
|
60
|
+
params[:html_body] = params[:body]
|
61
|
+
params.delete(:body)
|
62
|
+
end
|
63
|
+
params[:via] = config[:via].to_sym
|
64
|
+
params[:via_options] = config[:via_options]
|
65
|
+
Pony.mail(params)
|
66
|
+
end
|
67
|
+
|
68
|
+
class ERBBinding
|
69
|
+
def initialize(hash)
|
70
|
+
hash.each do |k,v|
|
71
|
+
v = v.gsub("'", " ") if v.class == String
|
72
|
+
instance_variable_set("@#{k}", v)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_binding
|
77
|
+
return binding()
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Forklift
|
2
|
+
module Base
|
3
|
+
class Pid
|
4
|
+
|
5
|
+
def initialize(forklift)
|
6
|
+
@forklift = forklift
|
7
|
+
end
|
8
|
+
|
9
|
+
def forklift
|
10
|
+
@forklift
|
11
|
+
end
|
12
|
+
|
13
|
+
def pid_dir
|
14
|
+
"#{forklift.config[:project_root]}/pid"
|
15
|
+
end
|
16
|
+
|
17
|
+
def ensure_pid_dir
|
18
|
+
`mkdir -p #{pid_dir}`
|
19
|
+
end
|
20
|
+
|
21
|
+
def pidfile
|
22
|
+
"#{pid_dir}/pidfile"
|
23
|
+
end
|
24
|
+
|
25
|
+
def store!
|
26
|
+
forklift.logger.debug "Creating pidfile @ #{pidfile}"
|
27
|
+
ensure_pid_dir
|
28
|
+
File.open(pidfile, 'w') {|f| f << Process.pid}
|
29
|
+
end
|
30
|
+
|
31
|
+
def recall
|
32
|
+
ensure_pid_dir
|
33
|
+
IO.read(pidfile).to_i rescue nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def delete!
|
37
|
+
forklift.logger.debug "Removing pidfile @ #{pidfile}"
|
38
|
+
FileUtils.rm(pidfile) rescue nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def safe_to_run?
|
42
|
+
return if recall.nil?
|
43
|
+
count = `ps -p #{recall} | wc -l`.to_i
|
44
|
+
if count >= 2
|
45
|
+
forklift.logger.fatal "This application is already running (pidfile) #{recall}. Exiting now"
|
46
|
+
exit(1)
|
47
|
+
else
|
48
|
+
forklift.logger.log "Clearing old pidfile from previous process #{recall}"
|
49
|
+
delete!
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
module Forklift
|
5
|
+
module Base
|
6
|
+
class Utils
|
7
|
+
|
8
|
+
def load_yml(file)
|
9
|
+
YAML.load(ERB.new(File.read(file)).result)
|
10
|
+
end
|
11
|
+
|
12
|
+
def class_name_from_file(file)
|
13
|
+
klass = ""
|
14
|
+
words = file.split("/").last.split(".").first.split("_")
|
15
|
+
words.each do |word|
|
16
|
+
klass << word.capitalize
|
17
|
+
end
|
18
|
+
klass
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module Forklift
|
4
|
+
|
5
|
+
lib = File.expand_path(File.dirname(__FILE__))
|
6
|
+
|
7
|
+
require "#{lib}/base/utils.rb"
|
8
|
+
require "#{lib}/base/pid.rb"
|
9
|
+
require "#{lib}/base/logger.rb"
|
10
|
+
require "#{lib}/base/mailer.rb"
|
11
|
+
require "#{lib}/base/connection.rb"
|
12
|
+
|
13
|
+
Dir["#{lib}/transports/*.rb"].each {|file| require file }
|
14
|
+
Dir["#{lib}/patterns/*.rb"].each {|file| require file }
|
15
|
+
Dir["#{Dir.pwd}/transports/*.rb"].each {|file| require file } if File.directory?("#{Dir.pwd}/transports")
|
16
|
+
Dir["#{Dir.pwd}/patterns/*.rb"].each {|file| require file } if File.directory?("#{Dir.pwd}/patterns")
|
17
|
+
|
18
|
+
require "#{lib}/plan.rb"
|
19
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Forklift
|
2
|
+
module Patterns
|
3
|
+
class Mysql
|
4
|
+
|
5
|
+
def self.pipe(source, from_table, destination, to_table)
|
6
|
+
start = Time.new.to_i
|
7
|
+
from_db = source.current_database
|
8
|
+
to_db = destination.current_database
|
9
|
+
source.forklift.logger.log("mysql pipe: `#{from_db}`.`#{from_table}` => `#{to_db}`.`#{to_table}`")
|
10
|
+
source.q("drop table if exists `#{to_db}`.`#{to_table}`")
|
11
|
+
source.q("create table `#{to_db}`.`#{to_table}` like `#{from_db}`.`#{from_table}`")
|
12
|
+
source.q("insert into `#{to_db}`.`#{to_table}` select * from `#{from_db}`.`#{from_table}`")
|
13
|
+
delta = Time.new.to_i - start
|
14
|
+
source.forklift.logger.log(" ^ moved #{destination.count(to_table, to_db)} rows in #{delta}s")
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.incremental_pipe(source, from_table, destination, to_table, matcher=source.default_matcher, primary_key='id')
|
18
|
+
start = Time.new.to_i
|
19
|
+
from_db = source.current_database
|
20
|
+
to_db = destination.current_database
|
21
|
+
source.forklift.logger.log("mysql incremental_pipe: `#{from_db}`.`#{from_table}` => `#{to_db}`.`#{to_table}`")
|
22
|
+
source.q("create table if not exists `#{to_db}`.`#{to_table}` like `#{from_db}`.`#{from_table}`")
|
23
|
+
|
24
|
+
# Count the number of rows in to_table
|
25
|
+
original_count = source.count(to_table, to_db)
|
26
|
+
|
27
|
+
# Find the latest/max/newest timestamp from the final table
|
28
|
+
# in order to determine the last copied row.
|
29
|
+
latest_timestamp = source.max_timestamp(to_table, matcher, to_db)
|
30
|
+
|
31
|
+
# If to_table has existing rows, ensure none of them are "stale."
|
32
|
+
# A stale row in to_table means a previously copied row was
|
33
|
+
# updated in from_table, so let's delete it from the to_table
|
34
|
+
# so we can get a fresh copy of that row.
|
35
|
+
if original_count > 0
|
36
|
+
# Get the ids of rows in from_table that are newer than the newest row in to_table.
|
37
|
+
# Some of these rows could either be a) stale or b) new.
|
38
|
+
source.read("select `#{primary_key}` from `#{from_db}`.`#{from_table}` where `#{matcher}` > \"#{latest_timestamp}\" order by `#{matcher}`") do |stale_rows|
|
39
|
+
if stale_rows.length > 0
|
40
|
+
# Delete these ids from to_table.
|
41
|
+
# If the ids are stale, then they'll be deleted. If they're new, they won't exist, and nothing will happen.
|
42
|
+
stale_ids = stale_rows.map { |row| row[primary_key.to_sym] }.join(',')
|
43
|
+
source.q("delete from `#{to_db}`.`#{to_table}` where `#{primary_key}` in (#{stale_ids})")
|
44
|
+
source.forklift.logger.log(" ^ deleted up to #{stale_rows.length} stale rows from `#{to_db}`.`#{to_table}`")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Do the insert into to_table
|
50
|
+
destination.q("insert into `#{to_db}`.`#{to_table}` select * from `#{from_db}`.`#{from_table}` where `#{matcher}` > \"#{latest_timestamp}\" order by `#{matcher}`")
|
51
|
+
delta = Time.new.to_i - start
|
52
|
+
new_count = destination.count(to_table, to_db) - original_count
|
53
|
+
source.forklift.logger.log(" ^ created #{new_count} new rows in #{delta}s")
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.optimistic_pipe(source, from_table, destination, to_table, matcher=source.default_matcher, primary_key='id')
|
57
|
+
from_db = source.current_database
|
58
|
+
to_db = destination.current_database
|
59
|
+
if self.can_incremental_pipe?(from_db, from_table)
|
60
|
+
incremental_pipe(from_db, from_table, to_db, to_table, matcher, primary_key)
|
61
|
+
else
|
62
|
+
pipe(from_db, from_table, to_db, to_table)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.can_incremental_pipe?(conn, table, matcher=conn.default_matcher)
|
67
|
+
conn.columns(table, conn.current_database).include?(matcher)
|
68
|
+
end
|
69
|
+
|
70
|
+
## When you are copying data to and from mysql
|
71
|
+
## An implamentation of "pipe" for remote databases
|
72
|
+
def self.mysql_optimistic_import(source, destination)
|
73
|
+
#TODO: allow passing in of matcher and primary_key
|
74
|
+
source.tables.each do |table|
|
75
|
+
if( source.columns(table).include?(source.default_matcher) && destination.tables.include?(table) )
|
76
|
+
since = destination.max_timestamp(table)
|
77
|
+
source.read_since(table, since){ |data| destination.write(data, table) }
|
78
|
+
else
|
79
|
+
destination.truncate table
|
80
|
+
source.read("select * from #{table}"){ |data| destination.write(data, table) }
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|