mosql 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +48 -0
- data/README.md +168 -0
- data/Rakefile +12 -0
- data/bin/mosql +7 -0
- data/lib/mosql.rb +11 -0
- data/lib/mosql/cli.rb +305 -0
- data/lib/mosql/log.rb +7 -0
- data/lib/mosql/schema.rb +149 -0
- data/lib/mosql/sql.rb +59 -0
- data/lib/mosql/tailer.rb +36 -0
- data/lib/mosql/version.rb +3 -0
- data/mosql.gemspec +24 -0
- data/test/_lib.rb +18 -0
- data/test/functional/_lib.rb +65 -0
- data/test/functional/functional.rb +7 -0
- data/test/functional/schema.rb +66 -0
- data/test/functional/sql.rb +38 -0
- data/test/unit/lib/mongo-sql/schema.rb +102 -0
- metadata +232 -0
data/lib/mosql/log.rb
ADDED
data/lib/mosql/schema.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class SchemaError < StandardError; end;
|
3
|
+
|
4
|
+
class Schema
|
5
|
+
include MoSQL::Logging
|
6
|
+
|
7
|
+
def to_ordered_hash(lst)
|
8
|
+
hash = BSON::OrderedHash.new
|
9
|
+
lst.each do |ent|
|
10
|
+
raise "Invalid ordered hash entry #{ent.inspect}" unless ent.is_a?(Hash) && ent.keys.length == 1
|
11
|
+
field, type = ent.first
|
12
|
+
hash[field] = type
|
13
|
+
end
|
14
|
+
hash
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_spec(spec)
|
18
|
+
out = spec.dup
|
19
|
+
out[:columns] = to_ordered_hash(spec[:columns])
|
20
|
+
out
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(map)
|
24
|
+
@map = {}
|
25
|
+
map.each do |dbname, db|
|
26
|
+
@map[dbname] ||= {}
|
27
|
+
db.each do |cname, spec|
|
28
|
+
@map[dbname][cname] = parse_spec(spec)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_schema(db, clobber=false)
|
34
|
+
@map.values.map(&:values).flatten.each do |collection|
|
35
|
+
meta = collection[:meta]
|
36
|
+
log.info("Creating table '#{meta[:table]}'...")
|
37
|
+
db.send(clobber ? :create_table! : :create_table?, meta[:table]) do
|
38
|
+
collection[:columns].each do |field, type|
|
39
|
+
column field, type
|
40
|
+
end
|
41
|
+
if meta[:extra_props]
|
42
|
+
column '_extra_props', 'TEXT'
|
43
|
+
end
|
44
|
+
primary_key [:_id]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_ns(ns)
|
50
|
+
db, collection = ns.split(".")
|
51
|
+
schema = (@map[db] || {})[collection]
|
52
|
+
if schema.nil?
|
53
|
+
log.debug("No mapping for ns: #{ns}")
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
schema
|
57
|
+
end
|
58
|
+
|
59
|
+
def find_ns!(ns)
|
60
|
+
schema = find_ns(ns)
|
61
|
+
raise SchemaError.new("No mapping for namespace: #{ns}") if schema.nil?
|
62
|
+
schema
|
63
|
+
end
|
64
|
+
|
65
|
+
def transform(ns, obj, schema=nil)
|
66
|
+
schema ||= find_ns!(ns)
|
67
|
+
|
68
|
+
obj = obj.dup
|
69
|
+
row = []
|
70
|
+
schema[:columns].each do |name, type|
|
71
|
+
v = obj.delete(name)
|
72
|
+
case v
|
73
|
+
when BSON::Binary, BSON::ObjectId
|
74
|
+
v = v.to_s
|
75
|
+
end
|
76
|
+
row << v
|
77
|
+
end
|
78
|
+
|
79
|
+
if schema[:meta][:extra_props]
|
80
|
+
# Kludgily delete binary blobs from _extra_props -- they may
|
81
|
+
# contain invalid UTF-8, which to_json will not properly encode.
|
82
|
+
obj.each do |k,v|
|
83
|
+
obj.delete(k) if v.is_a?(BSON::Binary)
|
84
|
+
end
|
85
|
+
row << obj.to_json
|
86
|
+
end
|
87
|
+
|
88
|
+
log.debug { "Transformed: #{row.inspect}" }
|
89
|
+
|
90
|
+
row
|
91
|
+
end
|
92
|
+
|
93
|
+
def all_columns(schema)
|
94
|
+
cols = schema[:columns].keys
|
95
|
+
if schema[:meta][:extra_props]
|
96
|
+
cols << "_extra_props"
|
97
|
+
end
|
98
|
+
cols
|
99
|
+
end
|
100
|
+
|
101
|
+
def copy_data(db, ns, objs)
|
102
|
+
schema = find_ns!(ns)
|
103
|
+
data = objs.map { |o| transform_to_copy(ns, o, schema) }.join("\n")
|
104
|
+
db.synchronize do |pg|
|
105
|
+
sql = "COPY \"#{schema[:meta][:table]}\" " +
|
106
|
+
"(#{all_columns(schema).map {|c| "\"#{c}\""}.join(",")}) FROM STDIN"
|
107
|
+
pg.execute(sql)
|
108
|
+
objs.each do |o|
|
109
|
+
pg.put_copy_data(transform_to_copy(ns, o, schema) + "\n")
|
110
|
+
end
|
111
|
+
pg.put_copy_end
|
112
|
+
begin
|
113
|
+
pg.get_result.check
|
114
|
+
rescue PGError => e
|
115
|
+
db.send(:raise_error, e)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def quote_copy(val)
|
121
|
+
case val
|
122
|
+
when nil
|
123
|
+
"\\N"
|
124
|
+
when true
|
125
|
+
't'
|
126
|
+
when false
|
127
|
+
'f'
|
128
|
+
else
|
129
|
+
val.to_s.gsub(/([\\\t\n\r])/, '\\\\\\1')
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def transform_to_copy(ns, row, schema=nil)
|
134
|
+
row.map { |c| quote_copy(c) }.join("\t")
|
135
|
+
end
|
136
|
+
|
137
|
+
def table_for_ns(ns)
|
138
|
+
find_ns!(ns)[:meta][:table]
|
139
|
+
end
|
140
|
+
|
141
|
+
def all_mongo_dbs
|
142
|
+
@map.keys
|
143
|
+
end
|
144
|
+
|
145
|
+
def collections_for_mongo_db(db)
|
146
|
+
(@map[db]||{}).keys
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
data/lib/mosql/sql.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class SQLAdapter
|
3
|
+
include MoSQL::Logging
|
4
|
+
|
5
|
+
attr_reader :db
|
6
|
+
|
7
|
+
def initialize(schema, uri, pgschema=nil)
|
8
|
+
@schema = schema
|
9
|
+
connect_db(uri, pgschema)
|
10
|
+
end
|
11
|
+
|
12
|
+
def connect_db(uri, pgschema)
|
13
|
+
@db = Sequel.connect(uri, :after_connect => proc do |conn|
|
14
|
+
if pgschema
|
15
|
+
begin
|
16
|
+
conn.execute("CREATE SCHEMA \"#{pgschema}\"")
|
17
|
+
rescue PG::Error
|
18
|
+
end
|
19
|
+
conn.execute("SET search_path TO \"#{pgschema}\"")
|
20
|
+
end
|
21
|
+
end)
|
22
|
+
end
|
23
|
+
|
24
|
+
def table_for_ns(ns)
|
25
|
+
@db[@schema.table_for_ns(ns).intern]
|
26
|
+
end
|
27
|
+
|
28
|
+
def upsert_ns(ns, obj)
|
29
|
+
h = {}
|
30
|
+
cols = @schema.all_columns(@schema.find_ns(ns))
|
31
|
+
row = @schema.transform(ns, obj)
|
32
|
+
cols.zip(row).each { |k,v| h[k] = v }
|
33
|
+
upsert(table_for_ns(ns), h)
|
34
|
+
end
|
35
|
+
|
36
|
+
def upsert(table, item)
|
37
|
+
begin
|
38
|
+
upsert!(table, item)
|
39
|
+
rescue Sequel::DatabaseError => e
|
40
|
+
wrapped = e.wrapped_exception
|
41
|
+
if wrapped.result
|
42
|
+
log.warn("Ignoring row (_id=#{item['_id']}): #{e}")
|
43
|
+
else
|
44
|
+
raise e
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def upsert!(table, item)
|
50
|
+
begin
|
51
|
+
table.insert(item)
|
52
|
+
rescue Sequel::DatabaseError => e
|
53
|
+
raise e unless e.message =~ /duplicate key value violates unique constraint/
|
54
|
+
table.where(:_id => item['_id']).update(item)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
data/lib/mosql/tailer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module MoSQL
|
2
|
+
class Tailer < Mongoriver::AbstractPersistentTailer
|
3
|
+
def self.create_table(db, tablename)
|
4
|
+
db.create_table?(tablename) do
|
5
|
+
column :service, 'TEXT'
|
6
|
+
column :timestamp, 'INTEGER'
|
7
|
+
primary_key [:service]
|
8
|
+
end
|
9
|
+
db[tablename.to_sym]
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(backends, type, table, opts)
|
13
|
+
super(backends, type, opts)
|
14
|
+
@table = table
|
15
|
+
@service = opts[:service] || "mosql"
|
16
|
+
end
|
17
|
+
|
18
|
+
def read_timestamp
|
19
|
+
row = @table.where(:service => @service).select([:timestamp]).first
|
20
|
+
if row
|
21
|
+
BSON::Timestamp.new(row[:timestamp], 0)
|
22
|
+
else
|
23
|
+
BSON::Timestamp.new(0, 0)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_timestamp(ts)
|
28
|
+
begin
|
29
|
+
@table.insert({:service => @service, :timestamp => ts.seconds})
|
30
|
+
rescue Sequel::DatabaseError => e
|
31
|
+
raise unless e.message =~ /duplicate key value violates unique constraint/
|
32
|
+
@table.where(:service => @service).update(:timestamp => ts.seconds)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/mosql.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
$:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
|
3
|
+
require 'mosql/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.authors = ["Nelson Elhage"]
|
7
|
+
gem.email = ["nelhage@stripe.com"]
|
8
|
+
gem.description = %q{A library for streaming MongoDB to SQL}
|
9
|
+
gem.summary = %q{MongoDB -> SQL streaming bridge}
|
10
|
+
gem.homepage = "https://github.com/stripe/mosql"
|
11
|
+
|
12
|
+
gem.files = `git ls-files`.split($\)
|
13
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
14
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
15
|
+
gem.name = "mosql"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = MoSQL::VERSION
|
18
|
+
|
19
|
+
%w[sequel pg mongo bson_ext rake log4r json
|
20
|
+
mongoriver].each { |dep| gem.add_runtime_dependency(dep) }
|
21
|
+
|
22
|
+
gem.add_development_dependency "minitest"
|
23
|
+
gem.add_development_dependency "mocha"
|
24
|
+
end
|
data/test/_lib.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'minitest/autorun'
|
5
|
+
require 'minitest/spec'
|
6
|
+
require 'mocha'
|
7
|
+
|
8
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib')))
|
9
|
+
|
10
|
+
require 'mosql'
|
11
|
+
|
12
|
+
module MoSQL
|
13
|
+
class Test < ::MiniTest::Spec
|
14
|
+
def setup
|
15
|
+
# Put any stubs here that you want to apply globally
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../_lib")
|
2
|
+
|
3
|
+
module MoSQL
|
4
|
+
class Test::Functional < MoSQL::Test
|
5
|
+
attr_reader :sequel, :mongo
|
6
|
+
|
7
|
+
def sql_test_uri
|
8
|
+
ENV['MONGOSQL_TEST_SQL'] || 'postgres:///test'
|
9
|
+
end
|
10
|
+
def mongo_test_uri
|
11
|
+
ENV['MONGOSQL_TEST_MONGO'] || 'mongodb://localhost'
|
12
|
+
end
|
13
|
+
def mongo_test_dbname
|
14
|
+
ENV['MONGOSQL_TEST_MONGO_DB'] || 'test'
|
15
|
+
end
|
16
|
+
|
17
|
+
def connect_sql
|
18
|
+
begin
|
19
|
+
conn = Sequel.connect(sql_test_uri)
|
20
|
+
conn.test_connection
|
21
|
+
conn
|
22
|
+
rescue Sequel::DatabaseConnectionError
|
23
|
+
$stderr.puts <<EOF
|
24
|
+
|
25
|
+
*********************************************************************
|
26
|
+
Unable to connect to PostgreSQL database at `#{sql_test_uri}'. Either
|
27
|
+
configure a PostgresSQL database running locally without
|
28
|
+
authentication with a 'test' database, or set \$MONGOSQL_TEST_SQL in
|
29
|
+
the environment.
|
30
|
+
*********************************************************************
|
31
|
+
|
32
|
+
EOF
|
33
|
+
exit(1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def connect_mongo
|
38
|
+
begin
|
39
|
+
Mongo::Connection.from_uri(mongo_test_uri)
|
40
|
+
rescue Mongo::ConnectionFailure, Mongo::ConnectionError
|
41
|
+
$stderr.puts <<EOF
|
42
|
+
|
43
|
+
*********************************************************************
|
44
|
+
Unable to connect to MongoDB database at `#{mongo_test_uri}'. Either
|
45
|
+
configure a MongoDB database running on localhost without
|
46
|
+
authentication with a 'test' database, or set \$MONGOSQL_TEST_MONGO in
|
47
|
+
the environment.
|
48
|
+
*********************************************************************
|
49
|
+
|
50
|
+
EOF
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def mongo_db
|
56
|
+
mongo.db(mongo_test_dbname)
|
57
|
+
end
|
58
|
+
|
59
|
+
def setup
|
60
|
+
@sequel = connect_sql
|
61
|
+
@mongo = connect_mongo
|
62
|
+
super
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '_lib.rb')
|
2
|
+
|
3
|
+
class MoSQL::Test::Functional::SchemaTest < MoSQL::Test::Functional
|
4
|
+
TEST_MAP = <<EOF
|
5
|
+
---
|
6
|
+
db:
|
7
|
+
collection:
|
8
|
+
:meta:
|
9
|
+
:table: sqltable
|
10
|
+
:columns:
|
11
|
+
- _id: TEXT
|
12
|
+
- var: INTEGER
|
13
|
+
with_extra_props:
|
14
|
+
:meta:
|
15
|
+
:table: sqltable2
|
16
|
+
:extra_props: true
|
17
|
+
:columns:
|
18
|
+
- _id: INTEGER
|
19
|
+
EOF
|
20
|
+
|
21
|
+
before do
|
22
|
+
@map = MoSQL::Schema.new(YAML.load(TEST_MAP))
|
23
|
+
|
24
|
+
@sequel.drop_table?(:sqltable)
|
25
|
+
@sequel.drop_table?(:sqltable2)
|
26
|
+
@map.create_schema(@sequel)
|
27
|
+
end
|
28
|
+
|
29
|
+
def table; @sequel[:sqltable]; end
|
30
|
+
def table2; @sequel[:sqltable2]; end
|
31
|
+
|
32
|
+
it 'Creates the tables with the right columns' do
|
33
|
+
assert_equal(Set.new([:_id, :var]),
|
34
|
+
Set.new(table.columns))
|
35
|
+
assert_equal(Set.new([:_id, :_extra_props]),
|
36
|
+
Set.new(table2.columns))
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'Can COPY data' do
|
40
|
+
objects = [
|
41
|
+
{'_id' => "a", 'var' => 0},
|
42
|
+
{'_id' => "b", 'var' => 1},
|
43
|
+
{'_id' => "c"},
|
44
|
+
{'_id' => "d", 'other_var' => "hello"}
|
45
|
+
]
|
46
|
+
@map.copy_data(@sequel, 'db.collection', objects.map { |o| @map.transform('db.collection', o) } )
|
47
|
+
assert_equal(4, table.count)
|
48
|
+
rows = table.select.sort_by { |r| r[:_id] }
|
49
|
+
assert_equal(%w[a b c d], rows.map { |r| r[:_id] })
|
50
|
+
assert_equal(nil, rows[2][:var])
|
51
|
+
assert_equal(nil, rows[3][:var])
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'Can COPY BSON::ObjectIDs' do
|
55
|
+
o = {'_id' => BSON::ObjectId.new, 'var' => 0}
|
56
|
+
@map.copy_data(@sequel, 'db.collection', [ @map.transform('db.collection', o)] )
|
57
|
+
assert_equal(o['_id'].to_s, table.select.first[:_id])
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'Can transform BSON::ObjectIDs' do
|
61
|
+
o = {'_id' => BSON::ObjectId.new, 'var' => 0}
|
62
|
+
row = @map.transform('db.collection', o)
|
63
|
+
table.insert(row)
|
64
|
+
assert_equal(o['_id'].to_s, table.select.first[:_id])
|
65
|
+
end
|
66
|
+
end
|