forklift_etl 1.1.8 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/forklift/patterns/mysql_patterns.rb +36 -4
- data/lib/forklift/plan.rb +1 -0
- data/lib/forklift/transports/csv.rb +1 -1
- data/lib/forklift/transports/elasticsearch.rb +1 -1
- data/lib/forklift/transports/mysql.rb +46 -24
- data/lib/forklift/version.rb +1 -1
- data/readme.md +4 -1
- data/spec/integration/mysql_patterns_spec.rb +2 -0
- data/spec/integration/mysql_spec.rb +3 -2
- data/spec/unit/connection/mysql_spec.rb +6 -4
- metadata +2 -2
data/Gemfile.lock
CHANGED
@@ -2,14 +2,18 @@ module Forklift
|
|
2
2
|
module Patterns
|
3
3
|
class Mysql
|
4
4
|
|
5
|
-
def self.pipe(source, from_table, destination, to_table)
|
5
|
+
def self.pipe(source, from_table, destination, to_table, tmp_table="_forklift_tmp")
|
6
6
|
start = Time.new.to_i
|
7
7
|
from_db = source.current_database
|
8
8
|
to_db = destination.current_database
|
9
9
|
source.forklift.logger.log("mysql pipe: `#{from_db}`.`#{from_table}` => `#{to_db}`.`#{to_table}`")
|
10
|
+
|
11
|
+
source.q("drop table if exists `#{to_db}`.`#{tmp_table}`")
|
12
|
+
source.q("create table `#{to_db}`.`#{tmp_table}` like `#{from_db}`.`#{from_table}`")
|
13
|
+
source.q("insert into `#{to_db}`.`#{tmp_table}` select * from `#{from_db}`.`#{from_table}`")
|
10
14
|
source.q("drop table if exists `#{to_db}`.`#{to_table}`")
|
11
|
-
source.q("
|
12
|
-
|
15
|
+
source.q("rename table `#{to_db}`.`#{tmp_table}` to `#{to_db}`.`#{to_table}`")
|
16
|
+
|
13
17
|
delta = Time.new.to_i - start
|
14
18
|
source.forklift.logger.log(" ^ moved #{destination.count(to_table, to_db)} rows in #{delta}s")
|
15
19
|
end
|
@@ -59,7 +63,9 @@ module Forklift
|
|
59
63
|
if self.can_incremental_pipe?(source, from_table, destination, to_table, matcher)
|
60
64
|
begin
|
61
65
|
incremental_pipe(source, from_table, destination, to_table, matcher, primary_key)
|
62
|
-
rescue
|
66
|
+
rescue Exception => e
|
67
|
+
source.forklift.logger.log("! incremental_pipe failure on #{from_table} => #{to_table}: #{e} ")
|
68
|
+
source.forklift.logger.log("! falling back to pipe...")
|
63
69
|
pipe(source, from_table, destination, to_table)
|
64
70
|
end
|
65
71
|
else
|
@@ -98,6 +104,32 @@ module Forklift
|
|
98
104
|
end
|
99
105
|
end
|
100
106
|
|
107
|
+
# The high water method will stub a row in all tables with a `default_matcher` column prentending to have a record from `time`
|
108
|
+
# This enabled partial forklift funs which will only extract data "later than X"
|
109
|
+
# TODO: assumes all columns have a default NULL setting
|
110
|
+
def self.write_high_water_mark(db, time, matcher=source.default_matcher)
|
111
|
+
db.tables.each do |table|
|
112
|
+
columns, types = db.columns(table, nil, true)
|
113
|
+
if columns.include?(matcher)
|
114
|
+
row = {}
|
115
|
+
i = 0
|
116
|
+
while( i < columns.length )
|
117
|
+
if(columns[i] == matcher)
|
118
|
+
row[column[i]] << time.to_s(:db)
|
119
|
+
elsif( types[i] =~ /text/ || types[i] =~ /varchar/ )
|
120
|
+
row[column[i]] << "~~stub~~"
|
121
|
+
elsif( types[i] =~ /float/ || types[i] =~ /int/ )
|
122
|
+
row[column[i]] << 0
|
123
|
+
else
|
124
|
+
row[column[i]] << "NULL"
|
125
|
+
end
|
126
|
+
i = i + 1
|
127
|
+
end
|
128
|
+
db.write([row], table)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
101
133
|
end
|
102
134
|
end
|
103
135
|
end
|
data/lib/forklift/plan.rb
CHANGED
@@ -28,7 +28,11 @@ module Forklift
|
|
28
28
|
q("DROP table `#{database}`.`#{table}`");
|
29
29
|
end
|
30
30
|
|
31
|
-
def
|
31
|
+
def rename(table, new_table, database=current_database, new_database=current_database)
|
32
|
+
q("RENAME TABLE `#{database}`.`#{table}` TO `#{new_database}`.`#{new_table}`")
|
33
|
+
end
|
34
|
+
|
35
|
+
def read(query, database=current_database, looping=true, limit=forklift.config[:batch_size], offset=0)
|
32
36
|
loop_count = 0
|
33
37
|
# TODO: Detect limit/offset already present in query
|
34
38
|
q("USE `#{database}`")
|
@@ -77,14 +81,25 @@ module Forklift
|
|
77
81
|
end
|
78
82
|
end
|
79
83
|
end
|
84
|
+
end
|
80
85
|
|
86
|
+
insert_q = "INSERT INTO `#{database}`.`#{table}` (#{safe_columns(columns)}) VALUES "
|
87
|
+
delete_q = "DELETE FROM `#{database}`.`#{table}` WHERE `#{primary_key}` IN "
|
88
|
+
delete_keys = []
|
89
|
+
data.each do |d|
|
81
90
|
if(to_update == true && !d[primary_key.to_sym].nil?)
|
82
|
-
|
91
|
+
delete_keys << d[primary_key.to_sym]
|
83
92
|
end
|
84
|
-
|
85
|
-
insert_q
|
86
|
-
|
93
|
+
insert_q << safe_values(columns, d)
|
94
|
+
insert_q << ","
|
95
|
+
end
|
96
|
+
|
97
|
+
if delete_keys.length > 0
|
98
|
+
delete_q << "(#{delete_keys.join(',')})"
|
99
|
+
q(delete_q)
|
87
100
|
end
|
101
|
+
insert_q = insert_q[0...-1]
|
102
|
+
q(insert_q)
|
88
103
|
forklift.logger.log "wrote #{data.length} rows to `#{database}`.`#{table}`"
|
89
104
|
end
|
90
105
|
end
|
@@ -177,12 +192,15 @@ module Forklift
|
|
177
192
|
end
|
178
193
|
end
|
179
194
|
|
180
|
-
def columns(table, database=current_database)
|
195
|
+
def columns(table, database=current_database, return_types=false)
|
181
196
|
cols = []
|
197
|
+
types = []
|
182
198
|
read("describe `#{database}`.`#{table}`").each do |row|
|
183
|
-
cols
|
199
|
+
cols << row[:Field]
|
200
|
+
types << row[:Type]
|
184
201
|
end
|
185
|
-
cols
|
202
|
+
return cols if return_types == false
|
203
|
+
return cols, types
|
186
204
|
end
|
187
205
|
|
188
206
|
def dump(file, options=[])
|
@@ -257,27 +275,31 @@ module Forklift
|
|
257
275
|
return a.join(', ')
|
258
276
|
end
|
259
277
|
|
260
|
-
def safe_values(
|
278
|
+
def safe_values(columns, d)
|
261
279
|
a = []
|
262
|
-
|
280
|
+
sym_cols = columns.map { |s| s.to_sym }
|
281
|
+
sym_cols.each do |c|
|
263
282
|
part = "NULL"
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
283
|
+
v = d[c]
|
284
|
+
unless v.nil?
|
285
|
+
if( [::String, ::Symbol].include?(v.class) )
|
286
|
+
s = v.to_s
|
287
|
+
s.gsub!('\\') { '\\\\' }
|
288
|
+
s.gsub!('\"', '\/"')
|
289
|
+
s.gsub!('"', '\"')
|
290
|
+
part = "\"#{s}\""
|
291
|
+
elsif( [::Date, ::Time, ::DateTime].include?(v.class) )
|
292
|
+
s = v.to_s(:db)
|
293
|
+
part = "\"#{s}\""
|
294
|
+
elsif( [::Fixnum].include?(v.class) )
|
295
|
+
part = v
|
296
|
+
elsif( [::Float, ::BigDecimal].include?(v.class) )
|
297
|
+
part = v.to_f
|
298
|
+
end
|
277
299
|
end
|
278
300
|
a << part
|
279
301
|
end
|
280
|
-
return a.join(', ')
|
302
|
+
return "( #{a.join(', ')} )"
|
281
303
|
end
|
282
304
|
|
283
305
|
#/private
|
data/lib/forklift/version.rb
CHANGED
data/readme.md
CHANGED
@@ -370,7 +370,8 @@ end
|
|
370
370
|
- max_timestamp(table, matcher=default_matcher, database=current_database)
|
371
371
|
- return the timestamp of the max(matcher) or 1970-01-01
|
372
372
|
- truncate!(table, database=current_database)
|
373
|
-
- columns(table, database=current_database)
|
373
|
+
- columns(table, database=current_database, return_types=false)
|
374
|
+
- rename(table, new_table, database, new_database)
|
374
375
|
- dump(file)
|
375
376
|
- mysqldump the database to `file` via gzip
|
376
377
|
|
@@ -384,6 +385,8 @@ end
|
|
384
385
|
- mysql_optimistic_import(source, destination)
|
385
386
|
- tries to do an incramental table copy, falls back to a full table copy
|
386
387
|
- this differs from `pipe`, as all data is loaded into forklift, rather than relying on mysql transfer methods
|
388
|
+
- write_high_water_mark(db, time, matcher)
|
389
|
+
- The high water method will stub a row in all tables with a `default_matcher` column prentending to have a record from `time`
|
387
390
|
|
388
391
|
### Elasticsearch
|
389
392
|
|
@@ -37,7 +37,8 @@ describe 'mysql' do
|
|
37
37
|
it "can write new data" do
|
38
38
|
table = "users"
|
39
39
|
data = [
|
40
|
-
{email: 'other@example.com', first_name: 'other', last_name: 'n', created_at: Time.new.to_s(:db), updated_at: Time.new.to_s(:db)}
|
40
|
+
{email: 'other@example.com', first_name: 'other', last_name: 'n', created_at: Time.new.to_s(:db), updated_at: Time.new.to_s(:db)},
|
41
|
+
{email: 'else@example.com', first_name: 'else', last_name: 'n', created_at: Time.new.to_s(:db), updated_at: Time.new.to_s(:db)}
|
41
42
|
]
|
42
43
|
plan = SpecPlan.new
|
43
44
|
plan.do! {
|
@@ -47,7 +48,7 @@ describe 'mysql' do
|
|
47
48
|
|
48
49
|
destination = SpecClient.mysql('forklift_test_source_a')
|
49
50
|
count = destination.query('select count(1) as "count" from users').first['count']
|
50
|
-
expect(count).to eql
|
51
|
+
expect(count).to eql 7
|
51
52
|
end
|
52
53
|
|
53
54
|
it "can update existing data" do
|
@@ -90,13 +90,15 @@ describe Forklift::Connection::Mysql do
|
|
90
90
|
subject { described_class.new({}, {}) }
|
91
91
|
|
92
92
|
it "escapes one trailing backslash" do
|
93
|
-
|
94
|
-
|
93
|
+
columns = ['col']
|
94
|
+
values = {:col => "foo\\"}
|
95
|
+
expect(subject.send(:safe_values, columns, values)).to eq("( \"foo\\\\\" )")
|
95
96
|
end
|
96
97
|
|
97
98
|
it "escapes two trailing backslashes" do
|
98
|
-
|
99
|
-
|
99
|
+
columns = ['col']
|
100
|
+
values = {:col => "foo\\\\" }
|
101
|
+
expect(subject.send(:safe_values, columns, values)).to eq("( \"foo\\\\\\\\\" )")
|
100
102
|
end
|
101
103
|
end
|
102
104
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forklift_etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-08-
|
12
|
+
date: 2014-08-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|