mosql 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +48 -0
- data/README.md +168 -0
- data/Rakefile +12 -0
- data/bin/mosql +7 -0
- data/lib/mosql.rb +11 -0
- data/lib/mosql/cli.rb +305 -0
- data/lib/mosql/log.rb +7 -0
- data/lib/mosql/schema.rb +149 -0
- data/lib/mosql/sql.rb +59 -0
- data/lib/mosql/tailer.rb +36 -0
- data/lib/mosql/version.rb +3 -0
- data/mosql.gemspec +24 -0
- data/test/_lib.rb +18 -0
- data/test/functional/_lib.rb +65 -0
- data/test/functional/functional.rb +7 -0
- data/test/functional/schema.rb +66 -0
- data/test/functional/sql.rb +38 -0
- data/test/unit/lib/mongo-sql/schema.rb +102 -0
- metadata +232 -0
data/lib/mosql/log.rb
ADDED
data/lib/mosql/schema.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class SchemaError < StandardError; end;
|
3
|
+
|
4
|
+
class Schema
|
5
|
+
include MoSQL::Logging
|
6
|
+
|
7
|
+
def to_ordered_hash(lst)
|
8
|
+
hash = BSON::OrderedHash.new
|
9
|
+
lst.each do |ent|
|
10
|
+
raise "Invalid ordered hash entry #{ent.inspect}" unless ent.is_a?(Hash) && ent.keys.length == 1
|
11
|
+
field, type = ent.first
|
12
|
+
hash[field] = type
|
13
|
+
end
|
14
|
+
hash
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_spec(spec)
|
18
|
+
out = spec.dup
|
19
|
+
out[:columns] = to_ordered_hash(spec[:columns])
|
20
|
+
out
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(map)
|
24
|
+
@map = {}
|
25
|
+
map.each do |dbname, db|
|
26
|
+
@map[dbname] ||= {}
|
27
|
+
db.each do |cname, spec|
|
28
|
+
@map[dbname][cname] = parse_spec(spec)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_schema(db, clobber=false)
|
34
|
+
@map.values.map(&:values).flatten.each do |collection|
|
35
|
+
meta = collection[:meta]
|
36
|
+
log.info("Creating table '#{meta[:table]}'...")
|
37
|
+
db.send(clobber ? :create_table! : :create_table?, meta[:table]) do
|
38
|
+
collection[:columns].each do |field, type|
|
39
|
+
column field, type
|
40
|
+
end
|
41
|
+
if meta[:extra_props]
|
42
|
+
column '_extra_props', 'TEXT'
|
43
|
+
end
|
44
|
+
primary_key [:_id]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_ns(ns)
|
50
|
+
db, collection = ns.split(".")
|
51
|
+
schema = (@map[db] || {})[collection]
|
52
|
+
if schema.nil?
|
53
|
+
log.debug("No mapping for ns: #{ns}")
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
schema
|
57
|
+
end
|
58
|
+
|
59
|
+
def find_ns!(ns)
|
60
|
+
schema = find_ns(ns)
|
61
|
+
raise SchemaError.new("No mapping for namespace: #{ns}") if schema.nil?
|
62
|
+
schema
|
63
|
+
end
|
64
|
+
|
65
|
+
def transform(ns, obj, schema=nil)
|
66
|
+
schema ||= find_ns!(ns)
|
67
|
+
|
68
|
+
obj = obj.dup
|
69
|
+
row = []
|
70
|
+
schema[:columns].each do |name, type|
|
71
|
+
v = obj.delete(name)
|
72
|
+
case v
|
73
|
+
when BSON::Binary, BSON::ObjectId
|
74
|
+
v = v.to_s
|
75
|
+
end
|
76
|
+
row << v
|
77
|
+
end
|
78
|
+
|
79
|
+
if schema[:meta][:extra_props]
|
80
|
+
# Kludgily delete binary blobs from _extra_props -- they may
|
81
|
+
# contain invalid UTF-8, which to_json will not properly encode.
|
82
|
+
obj.each do |k,v|
|
83
|
+
obj.delete(k) if v.is_a?(BSON::Binary)
|
84
|
+
end
|
85
|
+
row << obj.to_json
|
86
|
+
end
|
87
|
+
|
88
|
+
log.debug { "Transformed: #{row.inspect}" }
|
89
|
+
|
90
|
+
row
|
91
|
+
end
|
92
|
+
|
93
|
+
def all_columns(schema)
|
94
|
+
cols = schema[:columns].keys
|
95
|
+
if schema[:meta][:extra_props]
|
96
|
+
cols << "_extra_props"
|
97
|
+
end
|
98
|
+
cols
|
99
|
+
end
|
100
|
+
|
101
|
+
def copy_data(db, ns, objs)
|
102
|
+
schema = find_ns!(ns)
|
103
|
+
data = objs.map { |o| transform_to_copy(ns, o, schema) }.join("\n")
|
104
|
+
db.synchronize do |pg|
|
105
|
+
sql = "COPY \"#{schema[:meta][:table]}\" " +
|
106
|
+
"(#{all_columns(schema).map {|c| "\"#{c}\""}.join(",")}) FROM STDIN"
|
107
|
+
pg.execute(sql)
|
108
|
+
objs.each do |o|
|
109
|
+
pg.put_copy_data(transform_to_copy(ns, o, schema) + "\n")
|
110
|
+
end
|
111
|
+
pg.put_copy_end
|
112
|
+
begin
|
113
|
+
pg.get_result.check
|
114
|
+
rescue PGError => e
|
115
|
+
db.send(:raise_error, e)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def quote_copy(val)
|
121
|
+
case val
|
122
|
+
when nil
|
123
|
+
"\\N"
|
124
|
+
when true
|
125
|
+
't'
|
126
|
+
when false
|
127
|
+
'f'
|
128
|
+
else
|
129
|
+
val.to_s.gsub(/([\\\t\n\r])/, '\\\\\\1')
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def transform_to_copy(ns, row, schema=nil)
|
134
|
+
row.map { |c| quote_copy(c) }.join("\t")
|
135
|
+
end
|
136
|
+
|
137
|
+
def table_for_ns(ns)
|
138
|
+
find_ns!(ns)[:meta][:table]
|
139
|
+
end
|
140
|
+
|
141
|
+
def all_mongo_dbs
|
142
|
+
@map.keys
|
143
|
+
end
|
144
|
+
|
145
|
+
def collections_for_mongo_db(db)
|
146
|
+
(@map[db]||{}).keys
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
data/lib/mosql/sql.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class SQLAdapter
|
3
|
+
include MoSQL::Logging
|
4
|
+
|
5
|
+
attr_reader :db
|
6
|
+
|
7
|
+
def initialize(schema, uri, pgschema=nil)
|
8
|
+
@schema = schema
|
9
|
+
connect_db(uri, pgschema)
|
10
|
+
end
|
11
|
+
|
12
|
+
def connect_db(uri, pgschema)
|
13
|
+
@db = Sequel.connect(uri, :after_connect => proc do |conn|
|
14
|
+
if pgschema
|
15
|
+
begin
|
16
|
+
conn.execute("CREATE SCHEMA \"#{pgschema}\"")
|
17
|
+
rescue PG::Error
|
18
|
+
end
|
19
|
+
conn.execute("SET search_path TO \"#{pgschema}\"")
|
20
|
+
end
|
21
|
+
end)
|
22
|
+
end
|
23
|
+
|
24
|
+
def table_for_ns(ns)
|
25
|
+
@db[@schema.table_for_ns(ns).intern]
|
26
|
+
end
|
27
|
+
|
28
|
+
def upsert_ns(ns, obj)
|
29
|
+
h = {}
|
30
|
+
cols = @schema.all_columns(@schema.find_ns(ns))
|
31
|
+
row = @schema.transform(ns, obj)
|
32
|
+
cols.zip(row).each { |k,v| h[k] = v }
|
33
|
+
upsert(table_for_ns(ns), h)
|
34
|
+
end
|
35
|
+
|
36
|
+
def upsert(table, item)
|
37
|
+
begin
|
38
|
+
upsert!(table, item)
|
39
|
+
rescue Sequel::DatabaseError => e
|
40
|
+
wrapped = e.wrapped_exception
|
41
|
+
if wrapped.result
|
42
|
+
log.warn("Ignoring row (_id=#{item['_id']}): #{e}")
|
43
|
+
else
|
44
|
+
raise e
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def upsert!(table, item)
|
50
|
+
begin
|
51
|
+
table.insert(item)
|
52
|
+
rescue Sequel::DatabaseError => e
|
53
|
+
raise e unless e.message =~ /duplicate key value violates unique constraint/
|
54
|
+
table.where(:_id => item['_id']).update(item)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
data/lib/mosql/tailer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class Tailer < Mongoriver::AbstractPersistentTailer
|
3
|
+
def self.create_table(db, tablename)
|
4
|
+
db.create_table?(tablename) do
|
5
|
+
column :service, 'TEXT'
|
6
|
+
column :timestamp, 'INTEGER'
|
7
|
+
primary_key [:service]
|
8
|
+
end
|
9
|
+
db[tablename.to_sym]
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(backends, type, table, opts)
|
13
|
+
super(backends, type, opts)
|
14
|
+
@table = table
|
15
|
+
@service = opts[:service] || "mosql"
|
16
|
+
end
|
17
|
+
|
18
|
+
def read_timestamp
|
19
|
+
row = @table.where(:service => @service).select([:timestamp]).first
|
20
|
+
if row
|
21
|
+
BSON::Timestamp.new(row[:timestamp], 0)
|
22
|
+
else
|
23
|
+
BSON::Timestamp.new(0, 0)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_timestamp(ts)
|
28
|
+
begin
|
29
|
+
@table.insert({:service => @service, :timestamp => ts.seconds})
|
30
|
+
rescue Sequel::DatabaseError => e
|
31
|
+
raise unless e.message =~ /duplicate key value violates unique constraint/
|
32
|
+
@table.where(:service => @service).update(:timestamp => ts.seconds)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/mosql.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
$:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
|
3
|
+
require 'mosql/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.authors = ["Nelson Elhage"]
|
7
|
+
gem.email = ["nelhage@stripe.com"]
|
8
|
+
gem.description = %q{A library for streaming MongoDB to SQL}
|
9
|
+
gem.summary = %q{MongoDB -> SQL streaming bridge}
|
10
|
+
gem.homepage = "https://github.com/stripe/mosql"
|
11
|
+
|
12
|
+
gem.files = `git ls-files`.split($\)
|
13
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
14
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
15
|
+
gem.name = "mosql"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = MoSQL::VERSION
|
18
|
+
|
19
|
+
%w[sequel pg mongo bson_ext rake log4r json
|
20
|
+
mongoriver].each { |dep| gem.add_runtime_dependency(dep) }
|
21
|
+
|
22
|
+
gem.add_development_dependency "minitest"
|
23
|
+
gem.add_development_dependency "mocha"
|
24
|
+
end
|
data/test/_lib.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'minitest/autorun'
|
5
|
+
require 'minitest/spec'
|
6
|
+
require 'mocha'
|
7
|
+
|
8
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib')))
|
9
|
+
|
10
|
+
require 'mosql'
|
11
|
+
|
12
|
+
module MoSQL
|
13
|
+
class Test < ::MiniTest::Spec
|
14
|
+
def setup
|
15
|
+
# Put any stubs here that you want to apply globally
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../_lib")
|
2
|
+
|
3
|
+
module MoSQL
|
4
|
+
class Test::Functional < MoSQL::Test
|
5
|
+
attr_reader :sequel, :mongo
|
6
|
+
|
7
|
+
def sql_test_uri
|
8
|
+
ENV['MONGOSQL_TEST_SQL'] || 'postgres:///test'
|
9
|
+
end
|
10
|
+
def mongo_test_uri
|
11
|
+
ENV['MONGOSQL_TEST_MONGO'] || 'mongodb://localhost'
|
12
|
+
end
|
13
|
+
def mongo_test_dbname
|
14
|
+
ENV['MONGOSQL_TEST_MONGO_DB'] || 'test'
|
15
|
+
end
|
16
|
+
|
17
|
+
def connect_sql
|
18
|
+
begin
|
19
|
+
conn = Sequel.connect(sql_test_uri)
|
20
|
+
conn.test_connection
|
21
|
+
conn
|
22
|
+
rescue Sequel::DatabaseConnectionError
|
23
|
+
$stderr.puts <<EOF
|
24
|
+
|
25
|
+
*********************************************************************
|
26
|
+
Unable to connect to PostgreSQL database at `#{sql_test_uri}'. Either
|
27
|
+
configure a PostgresSQL database running locally without
|
28
|
+
authentication with a 'test' database, or set \$MONGOSQL_TEST_SQL in
|
29
|
+
the environment.
|
30
|
+
*********************************************************************
|
31
|
+
|
32
|
+
EOF
|
33
|
+
exit(1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def connect_mongo
|
38
|
+
begin
|
39
|
+
Mongo::Connection.from_uri(mongo_test_uri)
|
40
|
+
rescue Mongo::ConnectionFailure, Mongo::ConnectionError
|
41
|
+
$stderr.puts <<EOF
|
42
|
+
|
43
|
+
*********************************************************************
|
44
|
+
Unable to connect to MongoDB database at `#{mongo_test_uri}'. Either
|
45
|
+
configure a MongoDB database running on localhost without
|
46
|
+
authentication with a 'test' database, or set \$MONGOSQL_TEST_MONGO in
|
47
|
+
the environment.
|
48
|
+
*********************************************************************
|
49
|
+
|
50
|
+
EOF
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def mongo_db
|
56
|
+
mongo.db(mongo_test_dbname)
|
57
|
+
end
|
58
|
+
|
59
|
+
def setup
|
60
|
+
@sequel = connect_sql
|
61
|
+
@mongo = connect_mongo
|
62
|
+
super
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '_lib.rb')
|
2
|
+
|
3
|
+
class MoSQL::Test::Functional::SchemaTest < MoSQL::Test::Functional
|
4
|
+
TEST_MAP = <<EOF
|
5
|
+
---
|
6
|
+
db:
|
7
|
+
collection:
|
8
|
+
:meta:
|
9
|
+
:table: sqltable
|
10
|
+
:columns:
|
11
|
+
- _id: TEXT
|
12
|
+
- var: INTEGER
|
13
|
+
with_extra_props:
|
14
|
+
:meta:
|
15
|
+
:table: sqltable2
|
16
|
+
:extra_props: true
|
17
|
+
:columns:
|
18
|
+
- _id: INTEGER
|
19
|
+
EOF
|
20
|
+
|
21
|
+
before do
|
22
|
+
@map = MoSQL::Schema.new(YAML.load(TEST_MAP))
|
23
|
+
|
24
|
+
@sequel.drop_table?(:sqltable)
|
25
|
+
@sequel.drop_table?(:sqltable2)
|
26
|
+
@map.create_schema(@sequel)
|
27
|
+
end
|
28
|
+
|
29
|
+
def table; @sequel[:sqltable]; end
|
30
|
+
def table2; @sequel[:sqltable2]; end
|
31
|
+
|
32
|
+
it 'Creates the tables with the right columns' do
|
33
|
+
assert_equal(Set.new([:_id, :var]),
|
34
|
+
Set.new(table.columns))
|
35
|
+
assert_equal(Set.new([:_id, :_extra_props]),
|
36
|
+
Set.new(table2.columns))
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'Can COPY data' do
|
40
|
+
objects = [
|
41
|
+
{'_id' => "a", 'var' => 0},
|
42
|
+
{'_id' => "b", 'var' => 1},
|
43
|
+
{'_id' => "c"},
|
44
|
+
{'_id' => "d", 'other_var' => "hello"}
|
45
|
+
]
|
46
|
+
@map.copy_data(@sequel, 'db.collection', objects.map { |o| @map.transform('db.collection', o) } )
|
47
|
+
assert_equal(4, table.count)
|
48
|
+
rows = table.select.sort_by { |r| r[:_id] }
|
49
|
+
assert_equal(%w[a b c d], rows.map { |r| r[:_id] })
|
50
|
+
assert_equal(nil, rows[2][:var])
|
51
|
+
assert_equal(nil, rows[3][:var])
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'Can COPY BSON::ObjectIDs' do
|
55
|
+
o = {'_id' => BSON::ObjectId.new, 'var' => 0}
|
56
|
+
@map.copy_data(@sequel, 'db.collection', [ @map.transform('db.collection', o)] )
|
57
|
+
assert_equal(o['_id'].to_s, table.select.first[:_id])
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'Can transform BSON::ObjectIDs' do
|
61
|
+
o = {'_id' => BSON::ObjectId.new, 'var' => 0}
|
62
|
+
row = @map.transform('db.collection', o)
|
63
|
+
table.insert(row)
|
64
|
+
assert_equal(o['_id'].to_s, table.select.first[:_id])
|
65
|
+
end
|
66
|
+
end
|