mosql 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ module MoSQL
2
+ module Logging
3
+ def log
4
+ @@logger ||= Log4r::Logger.new("Stripe::MoSQL")
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,149 @@
1
+ module MoSQL
2
+ class SchemaError < StandardError; end;
3
+
4
+ class Schema
5
+ include MoSQL::Logging
6
+
7
+ def to_ordered_hash(lst)
8
+ hash = BSON::OrderedHash.new
9
+ lst.each do |ent|
10
+ raise "Invalid ordered hash entry #{ent.inspect}" unless ent.is_a?(Hash) && ent.keys.length == 1
11
+ field, type = ent.first
12
+ hash[field] = type
13
+ end
14
+ hash
15
+ end
16
+
17
+ def parse_spec(spec)
18
+ out = spec.dup
19
+ out[:columns] = to_ordered_hash(spec[:columns])
20
+ out
21
+ end
22
+
23
+ def initialize(map)
24
+ @map = {}
25
+ map.each do |dbname, db|
26
+ @map[dbname] ||= {}
27
+ db.each do |cname, spec|
28
+ @map[dbname][cname] = parse_spec(spec)
29
+ end
30
+ end
31
+ end
32
+
33
+ def create_schema(db, clobber=false)
34
+ @map.values.map(&:values).flatten.each do |collection|
35
+ meta = collection[:meta]
36
+ log.info("Creating table '#{meta[:table]}'...")
37
+ db.send(clobber ? :create_table! : :create_table?, meta[:table]) do
38
+ collection[:columns].each do |field, type|
39
+ column field, type
40
+ end
41
+ if meta[:extra_props]
42
+ column '_extra_props', 'TEXT'
43
+ end
44
+ primary_key [:_id]
45
+ end
46
+ end
47
+ end
48
+
49
+ def find_ns(ns)
50
+ db, collection = ns.split(".")
51
+ schema = (@map[db] || {})[collection]
52
+ if schema.nil?
53
+ log.debug("No mapping for ns: #{ns}")
54
+ return nil
55
+ end
56
+ schema
57
+ end
58
+
59
+ def find_ns!(ns)
60
+ schema = find_ns(ns)
61
+ raise SchemaError.new("No mapping for namespace: #{ns}") if schema.nil?
62
+ schema
63
+ end
64
+
65
+ def transform(ns, obj, schema=nil)
66
+ schema ||= find_ns!(ns)
67
+
68
+ obj = obj.dup
69
+ row = []
70
+ schema[:columns].each do |name, type|
71
+ v = obj.delete(name)
72
+ case v
73
+ when BSON::Binary, BSON::ObjectId
74
+ v = v.to_s
75
+ end
76
+ row << v
77
+ end
78
+
79
+ if schema[:meta][:extra_props]
80
+ # Kludgily delete binary blobs from _extra_props -- they may
81
+ # contain invalid UTF-8, which to_json will not properly encode.
82
+ obj.each do |k,v|
83
+ obj.delete(k) if v.is_a?(BSON::Binary)
84
+ end
85
+ row << obj.to_json
86
+ end
87
+
88
+ log.debug { "Transformed: #{row.inspect}" }
89
+
90
+ row
91
+ end
92
+
93
+ def all_columns(schema)
94
+ cols = schema[:columns].keys
95
+ if schema[:meta][:extra_props]
96
+ cols << "_extra_props"
97
+ end
98
+ cols
99
+ end
100
+
101
+ def copy_data(db, ns, objs)
102
+ schema = find_ns!(ns)
103
+ data = objs.map { |o| transform_to_copy(ns, o, schema) }.join("\n")
104
+ db.synchronize do |pg|
105
+ sql = "COPY \"#{schema[:meta][:table]}\" " +
106
+ "(#{all_columns(schema).map {|c| "\"#{c}\""}.join(",")}) FROM STDIN"
107
+ pg.execute(sql)
108
+ objs.each do |o|
109
+ pg.put_copy_data(transform_to_copy(ns, o, schema) + "\n")
110
+ end
111
+ pg.put_copy_end
112
+ begin
113
+ pg.get_result.check
114
+ rescue PGError => e
115
+ db.send(:raise_error, e)
116
+ end
117
+ end
118
+ end
119
+
120
+ def quote_copy(val)
121
+ case val
122
+ when nil
123
+ "\\N"
124
+ when true
125
+ 't'
126
+ when false
127
+ 'f'
128
+ else
129
+ val.to_s.gsub(/([\\\t\n\r])/, '\\\\\\1')
130
+ end
131
+ end
132
+
133
+ def transform_to_copy(ns, row, schema=nil)
134
+ row.map { |c| quote_copy(c) }.join("\t")
135
+ end
136
+
137
+ def table_for_ns(ns)
138
+ find_ns!(ns)[:meta][:table]
139
+ end
140
+
141
+ def all_mongo_dbs
142
+ @map.keys
143
+ end
144
+
145
+ def collections_for_mongo_db(db)
146
+ (@map[db]||{}).keys
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,59 @@
1
+ module MoSQL
2
+ class SQLAdapter
3
+ include MoSQL::Logging
4
+
5
+ attr_reader :db
6
+
7
+ def initialize(schema, uri, pgschema=nil)
8
+ @schema = schema
9
+ connect_db(uri, pgschema)
10
+ end
11
+
12
+ def connect_db(uri, pgschema)
13
+ @db = Sequel.connect(uri, :after_connect => proc do |conn|
14
+ if pgschema
15
+ begin
16
+ conn.execute("CREATE SCHEMA \"#{pgschema}\"")
17
+ rescue PG::Error
18
+ end
19
+ conn.execute("SET search_path TO \"#{pgschema}\"")
20
+ end
21
+ end)
22
+ end
23
+
24
+ def table_for_ns(ns)
25
+ @db[@schema.table_for_ns(ns).intern]
26
+ end
27
+
28
+ def upsert_ns(ns, obj)
29
+ h = {}
30
+ cols = @schema.all_columns(@schema.find_ns(ns))
31
+ row = @schema.transform(ns, obj)
32
+ cols.zip(row).each { |k,v| h[k] = v }
33
+ upsert(table_for_ns(ns), h)
34
+ end
35
+
36
+ def upsert(table, item)
37
+ begin
38
+ upsert!(table, item)
39
+ rescue Sequel::DatabaseError => e
40
+ wrapped = e.wrapped_exception
41
+ if wrapped.result
42
+ log.warn("Ignoring row (_id=#{item['_id']}): #{e}")
43
+ else
44
+ raise e
45
+ end
46
+ end
47
+ end
48
+
49
+ def upsert!(table, item)
50
+ begin
51
+ table.insert(item)
52
+ rescue Sequel::DatabaseError => e
53
+ raise e unless e.message =~ /duplicate key value violates unique constraint/
54
+ table.where(:_id => item['_id']).update(item)
55
+ end
56
+ end
57
+ end
58
+ end
59
+
@@ -0,0 +1,36 @@
1
+ module MoSQL
2
+ class Tailer < Mongoriver::AbstractPersistentTailer
3
+ def self.create_table(db, tablename)
4
+ db.create_table?(tablename) do
5
+ column :service, 'TEXT'
6
+ column :timestamp, 'INTEGER'
7
+ primary_key [:service]
8
+ end
9
+ db[tablename.to_sym]
10
+ end
11
+
12
+ def initialize(backends, type, table, opts)
13
+ super(backends, type, opts)
14
+ @table = table
15
+ @service = opts[:service] || "mosql"
16
+ end
17
+
18
+ def read_timestamp
19
+ row = @table.where(:service => @service).select([:timestamp]).first
20
+ if row
21
+ BSON::Timestamp.new(row[:timestamp], 0)
22
+ else
23
+ BSON::Timestamp.new(0, 0)
24
+ end
25
+ end
26
+
27
+ def write_timestamp(ts)
28
+ begin
29
+ @table.insert({:service => @service, :timestamp => ts.seconds})
30
+ rescue Sequel::DatabaseError => e
31
+ raise unless e.message =~ /duplicate key value violates unique constraint/
32
+ @table.where(:service => @service).update(:timestamp => ts.seconds)
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,3 @@
1
+ module MoSQL
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+ $:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
3
+ require 'mosql/version'
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ["Nelson Elhage"]
7
+ gem.email = ["nelhage@stripe.com"]
8
+ gem.description = %q{A library for streaming MongoDB to SQL}
9
+ gem.summary = %q{MongoDB -> SQL streaming bridge}
10
+ gem.homepage = "https://github.com/stripe/mosql"
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.name = "mosql"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = MoSQL::VERSION
18
+
19
+ %w[sequel pg mongo bson_ext rake log4r json
20
+ mongoriver].each { |dep| gem.add_runtime_dependency(dep) }
21
+
22
+ gem.add_development_dependency "minitest"
23
+ gem.add_development_dependency "mocha"
24
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'minitest/autorun'
5
+ require 'minitest/spec'
6
+ require 'mocha'
7
+
8
+ $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib')))
9
+
10
+ require 'mosql'
11
+
12
+ module MoSQL
13
+ class Test < ::MiniTest::Spec
14
+ def setup
15
+ # Put any stubs here that you want to apply globally
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,65 @@
1
+ require File.join(File.dirname(__FILE__), "../_lib")
2
+
3
+ module MoSQL
4
+ class Test::Functional < MoSQL::Test
5
+ attr_reader :sequel, :mongo
6
+
7
+ def sql_test_uri
8
+ ENV['MONGOSQL_TEST_SQL'] || 'postgres:///test'
9
+ end
10
+ def mongo_test_uri
11
+ ENV['MONGOSQL_TEST_MONGO'] || 'mongodb://localhost'
12
+ end
13
+ def mongo_test_dbname
14
+ ENV['MONGOSQL_TEST_MONGO_DB'] || 'test'
15
+ end
16
+
17
+ def connect_sql
18
+ begin
19
+ conn = Sequel.connect(sql_test_uri)
20
+ conn.test_connection
21
+ conn
22
+ rescue Sequel::DatabaseConnectionError
23
+ $stderr.puts <<EOF
24
+
25
+ *********************************************************************
26
+ Unable to connect to PostgreSQL database at `#{sql_test_uri}'. Either
27
+ configure a PostgresSQL database running locally without
28
+ authentication with a 'test' database, or set \$MONGOSQL_TEST_SQL in
29
+ the environment.
30
+ *********************************************************************
31
+
32
+ EOF
33
+ exit(1)
34
+ end
35
+ end
36
+
37
+ def connect_mongo
38
+ begin
39
+ Mongo::Connection.from_uri(mongo_test_uri)
40
+ rescue Mongo::ConnectionFailure, Mongo::ConnectionError
41
+ $stderr.puts <<EOF
42
+
43
+ *********************************************************************
44
+ Unable to connect to MongoDB database at `#{mongo_test_uri}'. Either
45
+ configure a MongoDB database running on localhost without
46
+ authentication with a 'test' database, or set \$MONGOSQL_TEST_MONGO in
47
+ the environment.
48
+ *********************************************************************
49
+
50
+ EOF
51
+ exit(1)
52
+ end
53
+ end
54
+
55
+ def mongo_db
56
+ mongo.db(mongo_test_dbname)
57
+ end
58
+
59
+ def setup
60
+ @sequel = connect_sql
61
+ @mongo = connect_mongo
62
+ super
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,7 @@
1
+ require File.join(File.dirname(__FILE__), '_lib.rb')
2
+
3
+ class MoSQL::Test::Functional::SchemaTest < MoSQL::Test::Functional
4
+ it 'runs tests' do
5
+ assert(true)
6
+ end
7
+ end
@@ -0,0 +1,66 @@
1
+ require File.join(File.dirname(__FILE__), '_lib.rb')
2
+
3
+ class MoSQL::Test::Functional::SchemaTest < MoSQL::Test::Functional
4
+ TEST_MAP = <<EOF
5
+ ---
6
+ db:
7
+ collection:
8
+ :meta:
9
+ :table: sqltable
10
+ :columns:
11
+ - _id: TEXT
12
+ - var: INTEGER
13
+ with_extra_props:
14
+ :meta:
15
+ :table: sqltable2
16
+ :extra_props: true
17
+ :columns:
18
+ - _id: INTEGER
19
+ EOF
20
+
21
+ before do
22
+ @map = MoSQL::Schema.new(YAML.load(TEST_MAP))
23
+
24
+ @sequel.drop_table?(:sqltable)
25
+ @sequel.drop_table?(:sqltable2)
26
+ @map.create_schema(@sequel)
27
+ end
28
+
29
+ def table; @sequel[:sqltable]; end
30
+ def table2; @sequel[:sqltable2]; end
31
+
32
+ it 'Creates the tables with the right columns' do
33
+ assert_equal(Set.new([:_id, :var]),
34
+ Set.new(table.columns))
35
+ assert_equal(Set.new([:_id, :_extra_props]),
36
+ Set.new(table2.columns))
37
+ end
38
+
39
+ it 'Can COPY data' do
40
+ objects = [
41
+ {'_id' => "a", 'var' => 0},
42
+ {'_id' => "b", 'var' => 1},
43
+ {'_id' => "c"},
44
+ {'_id' => "d", 'other_var' => "hello"}
45
+ ]
46
+ @map.copy_data(@sequel, 'db.collection', objects.map { |o| @map.transform('db.collection', o) } )
47
+ assert_equal(4, table.count)
48
+ rows = table.select.sort_by { |r| r[:_id] }
49
+ assert_equal(%w[a b c d], rows.map { |r| r[:_id] })
50
+ assert_equal(nil, rows[2][:var])
51
+ assert_equal(nil, rows[3][:var])
52
+ end
53
+
54
+ it 'Can COPY BSON::ObjectIDs' do
55
+ o = {'_id' => BSON::ObjectId.new, 'var' => 0}
56
+ @map.copy_data(@sequel, 'db.collection', [ @map.transform('db.collection', o)] )
57
+ assert_equal(o['_id'].to_s, table.select.first[:_id])
58
+ end
59
+
60
+ it 'Can transform BSON::ObjectIDs' do
61
+ o = {'_id' => BSON::ObjectId.new, 'var' => 0}
62
+ row = @map.transform('db.collection', o)
63
+ table.insert(row)
64
+ assert_equal(o['_id'].to_s, table.select.first[:_id])
65
+ end
66
+ end