oplogjam 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ require 'oplogjam/operators/intermediate'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class IntermediateField < Intermediate
6
+ def update(column)
7
+ populated_column = column.set(path, Sequel.function(:coalesce, column[path], EMPTY_OBJECT))
8
+
9
+ nodes.inject(populated_column, &UPDATE_COLUMN)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'oplogjam/operators/intermediate'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class IntermediateIndex < Intermediate
6
+ def update(column)
7
+ # Now for a not-so-fun bit!
8
+ #
9
+ # As this is a numeric index, it might either be an index into an existing array or a numeric field name on an
10
+ # object.
11
+ #
12
+ # If it is an index into an array then we need to ensure that all prior indexes down to 0 are either set or null.
13
+ # If it is anything else, it should be an empty object. In order to figure that out, we need to look at the parent
14
+ # path and switch based on its type.
15
+ filled_array_column = (0...index).inject(column) { |subject, i|
16
+ prior_path = parent_path + [i.to_s]
17
+
18
+ subject.set(prior_path, Sequel.function(:coalesce, column[prior_path], NULL))
19
+ }
20
+
21
+ populated_column = Sequel.pg_jsonb_op(
22
+ Sequel.case(
23
+ {
24
+ ARRAY_TYPE => filled_array_column.set(path, Sequel.function(:coalesce, filled_array_column[path], EMPTY_OBJECT))
25
+ },
26
+ column.set(path, Sequel.function(:coalesce, column[path], EMPTY_OBJECT)),
27
+ column[parent_path].typeof
28
+ )
29
+ )
30
+
31
+ nodes.inject(populated_column, &UPDATE_COLUMN)
32
+ end
33
+
34
+ def parent_path
35
+ path[0...-1]
36
+ end
37
+
38
+ def index
39
+ Integer(path.last, 10)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,100 @@
1
+ require 'oplogjam/operators/field_assignment'
2
+ require 'oplogjam/operators/index_assignment'
3
+ require 'oplogjam/operators/intermediate_field'
4
+ require 'oplogjam/operators/intermediate_index'
5
+
6
+ module Oplogjam
7
+ module Operators
8
+ class Set
9
+
10
+ # Transform a MongoDB $set operation (e.g. $set: { 'a.1.b.0': 'foo' }) into a tree of nodes more amenable to
11
+ # transforming into SQL.
12
+ #
13
+ # Specifically, parse nested field and index assignments into specific node types FieldAssignment, IndexAssignment
14
+ # (for setting the final value) and IntermediateField, IntermediateIndex (for any intermediate fields and indexes).
15
+ #
16
+ # e.g.
17
+ #
18
+ # $set: { a: 1 } will become Set(['a'] => FieldAssignment(['a'], 1))
19
+ # $set: { a: 1, b: 2 } will become Set(['a'] => FieldAssignment(['a'], 1), ['b'] => FieldAssignment(['b'], 2))
20
+ # $set: { 'a.b': 1 } will become Set(['a'] => IntermediateField(['a'], FieldAssignment(['a', 'b'], 1)))
21
+ def self.from(operation)
22
+
23
+ # Start with an empty Set and iterate over every key in the $set operation, mutating the Set as we go
24
+ operation.each_with_object(new) do |(dotted_path, value), set|
25
+
26
+ # Split the dotted path `a.b.c` into an array `['a', 'b', 'c']`
27
+ path = dotted_path.split(FIELD_SEPARATOR)
28
+
29
+ # Start with an empty path which will incrementally populate
30
+ current_path = []
31
+
32
+ # Starting with the set, go through the successive path segments, building up intermediate paths on the current
33
+ # node, e.g. 'a.b' will iterate over ['a'], ['a', 'b']
34
+ #
35
+ # Note that we exclude the final path segment as that will be used below in a separate set phase
36
+ populated_node = path[0...-1].inject(set) { |current_node, segment|
37
+
38
+ # Extend the current path with the current segment appended.
39
+ current_path << segment
40
+
41
+ # Populate an empty intermediate with a copy of the current path.
42
+ #
43
+ # Note that this could either be a numeric index (which might be indexing into an array) or an object field
44
+ # name.
45
+ current_node.populate(current_path.dup)
46
+ }
47
+
48
+ # Set the final value on the full path
49
+ populated_node.set(path, value)
50
+ end
51
+ end
52
+
53
+ attr_reader :tree
54
+
55
+ def initialize(tree = {})
56
+ @tree = tree
57
+ end
58
+
59
+ def populate(path)
60
+ if path.last =~ NUMERIC_INDEX
61
+ populate_index(path)
62
+ else
63
+ populate_field(path)
64
+ end
65
+ end
66
+
67
+ def set(path, value)
68
+ if path.last =~ NUMERIC_INDEX
69
+ set_index(path, value)
70
+ else
71
+ set_field(path, value)
72
+ end
73
+ end
74
+
75
+ def populate_field(path)
76
+ tree[path] ||= IntermediateField.new(path)
77
+ end
78
+
79
+ def populate_index(path)
80
+ tree[path] ||= IntermediateIndex.new(path)
81
+ end
82
+
83
+ def set_field(path, value)
84
+ tree[path] = FieldAssignment.new(path, value)
85
+ end
86
+
87
+ def set_index(path, value)
88
+ tree[path] = IndexAssignment.new(path, value)
89
+ end
90
+
91
+ def update(column)
92
+ nodes.inject(column, &UPDATE_COLUMN)
93
+ end
94
+
95
+ def nodes
96
+ tree.values
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,40 @@
1
+ require 'oplogjam/operators/unset_field'
2
+ require 'oplogjam/operators/unset_index'
3
+
4
+ module Oplogjam
5
+ module Operators
6
+ class Unset
7
+ def self.from(operation)
8
+ operation.each_with_object(new) do |(dotted_path, _), unset|
9
+ path = dotted_path.split(FIELD_SEPARATOR)
10
+
11
+ if path.last =~ NUMERIC_INDEX
12
+ unset.unset_index(path)
13
+ else
14
+ unset.unset_field(path)
15
+ end
16
+ end
17
+ end
18
+
19
+ attr_reader :unsets
20
+
21
+ def initialize(unsets = [])
22
+ @unsets = unsets
23
+ end
24
+
25
+ def unset_field(path)
26
+ unsets << UnsetField.new(path)
27
+ end
28
+
29
+ def unset_index(path)
30
+ unsets << UnsetIndex.new(path)
31
+ end
32
+
33
+ def delete(column)
34
+ unsets.inject(column) do |subject, unset|
35
+ unset.delete(subject)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,15 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class UnsetField
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def delete(column)
11
+ column.delete_path(path)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,39 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class UnsetIndex
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def delete(column)
11
+ nullify_or_unset = Sequel.case(
12
+ [
13
+ [
14
+ column[parent_path].array_length > index,
15
+ column.set(path, NULL)
16
+ ]
17
+ ],
18
+ column.delete_path(path)
19
+ )
20
+
21
+ Sequel.pg_jsonb_op(
22
+ Sequel.case(
23
+ { ARRAY_TYPE => nullify_or_unset },
24
+ column.delete_path(path),
25
+ column[parent_path].typeof
26
+ )
27
+ )
28
+ end
29
+
30
+ def parent_path
31
+ path[0...-1]
32
+ end
33
+
34
+ def index
35
+ Integer(path.last, 10)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ require 'oplogjam/operation'
2
+
3
+ module Oplogjam
4
+ class Oplog
5
+ attr_reader :client
6
+
7
+ def initialize(client)
8
+ @client = client
9
+ end
10
+
11
+ def operations(query = {})
12
+ Enumerator.new do |yielder|
13
+ cursor = client.use(LOCAL)[OPLOG].find(query, cursor_type: :tailable_await).no_cursor_timeout
14
+
15
+ cursor.each do |document|
16
+ yielder << Operation.from(document)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module Oplogjam
2
+ module Sanitizer
3
+ # Strip any null bytes from objects as they will be rejected by PostgreSQL
4
+ def self.sanitize(obj)
5
+ case obj
6
+ when Sequel::Postgres::JSONBHash, Hash
7
+ obj.each_with_object({}) do |(key, value), acc|
8
+ acc[sanitize(key)] = sanitize(value)
9
+ end
10
+ when Sequel::Postgres::JSONBArray, Array
11
+ obj.map { |element| sanitize(element) }
12
+ when String
13
+ obj.tr("\x00", '')
14
+ else
15
+ obj
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,44 @@
1
+ module Oplogjam
2
+ class Schema
3
+ COLUMNS = %i[id document created_at updated_at].freeze
4
+
5
+ attr_reader :db
6
+
7
+ def initialize(db)
8
+ @db = db
9
+ end
10
+
11
+ def import(collection, name, batch_size = 100)
12
+ collection.find.snapshot(true).each_slice(batch_size) do |documents|
13
+ values = documents.map { |document|
14
+ [
15
+ Sequel.object_to_json(document.fetch(ID)),
16
+ Sequel.pg_jsonb(document),
17
+ Time.now.utc,
18
+ Time.now.utc
19
+ ]
20
+ }
21
+
22
+ db[name].import(COLUMNS, values)
23
+ end
24
+ end
25
+
26
+ def create_table(name)
27
+ db.create_table?(name) do
28
+ uuid :uuid, default: Sequel.function(:uuid_generate_v1), primary_key: true
29
+ jsonb :id, null: false
30
+ jsonb :document, null: false
31
+ timestamp :created_at, null: false
32
+ timestamp :updated_at, null: false
33
+ timestamp :deleted_at
34
+ end
35
+ end
36
+
37
+ def add_indexes(name)
38
+ db.alter_table(name) do
39
+ add_index %i[id deleted_at], unique: true, if_not_exists: true
40
+ add_index :id, unique: true, where: { deleted_at: nil }, if_not_exists: true
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,79 @@
1
+ require 'oplogjam/operators'
2
+
3
+ module Oplogjam
4
+ InvalidUpdate = Class.new(ArgumentError)
5
+
6
+ class Update
7
+ attr_reader :h, :ts, :ns, :o2, :o
8
+
9
+ def self.from(bson)
10
+ h = bson.fetch(H)
11
+ ts = bson.fetch(TS)
12
+ ns = bson.fetch(NS)
13
+ o2 = bson.fetch(O2)
14
+ o = bson.fetch(O)
15
+
16
+ new(h, ts, ns, o2, o)
17
+ rescue KeyError => e
18
+ raise InvalidUpdate, "missing field: #{e}"
19
+ end
20
+
21
+ def initialize(h, ts, ns, o2, o)
22
+ @h = Integer(h)
23
+ @ts = Oplogjam::Timestamp(ts)
24
+ @ns = String(ns)
25
+ @o2 = Oplogjam::Document(o2)
26
+ @o = Oplogjam::Document(o)
27
+ end
28
+
29
+ alias id h
30
+ alias namespace ns
31
+ alias query o2
32
+ alias update o
33
+
34
+ def timestamp
35
+ Time.at(ts.seconds, ts.increment)
36
+ end
37
+
38
+ def ==(other)
39
+ return false unless other.is_a?(Update)
40
+
41
+ id == other.id
42
+ end
43
+
44
+ def apply(mapping)
45
+ table = mapping[namespace]
46
+ return unless table
47
+
48
+ row_id = query.fetch(ID).to_json
49
+
50
+ table
51
+ .where(id: row_id, deleted_at: nil)
52
+ .update(document: jsonb_update, updated_at: Time.now.utc)
53
+ end
54
+
55
+ private
56
+
57
+ def jsonb_update
58
+ return Sequel.pg_jsonb(query.merge(update)) if replacement?
59
+
60
+ unsets_to_jsonb(sets_to_jsonb(Sequel.pg_jsonb_op(:document)))
61
+ end
62
+
63
+ def sets_to_jsonb(column)
64
+ return column unless update.key?(SET)
65
+
66
+ Operators::Set.from(update.fetch(SET)).update(column)
67
+ end
68
+
69
+ def unsets_to_jsonb(column)
70
+ return column unless update.key?(UNSET)
71
+
72
+ Operators::Unset.from(update.fetch(UNSET)).delete(column)
73
+ end
74
+
75
+ def replacement?
76
+ !update.key?(SET) && !update.key?(UNSET)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,174 @@
1
+ require 'bson'
2
+ require 'oplogjam'
3
+
4
+ module Oplogjam
5
+ RSpec.describe ApplyOps do
6
+ describe '.from' do
7
+ it 'converts a BSON applyOps to an ApplyOps' do
8
+ bson = BSON::Document.new(
9
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
10
+ t: 1,
11
+ h: -1_789_557_309_812_000_233,
12
+ v: 2,
13
+ op: 'c',
14
+ ns: 'foo.$cmd',
15
+ o: BSON::Document.new(
16
+ applyOps: [
17
+ BSON::Document.new(
18
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
19
+ t: 14,
20
+ h: -3_028_027_288_268_436_781,
21
+ v: 2,
22
+ op: 'i',
23
+ ns: 'foo.bar',
24
+ o: BSON::Document.new(_id: 1, baz: 'quux')
25
+ )
26
+ ]
27
+ )
28
+ )
29
+
30
+ expect(described_class.from(bson)).to be_a(described_class)
31
+ end
32
+
33
+ it 'raises an error if the operations are missing' do
34
+ bson = BSON::Document.new(
35
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
36
+ t: 1,
37
+ h: -1_789_557_309_812_000_233,
38
+ v: 2,
39
+ op: 'c',
40
+ ns: 'foo.$cmd'
41
+ )
42
+
43
+ expect { described_class.from(bson) }.to raise_error(InvalidApplyOps)
44
+ end
45
+ end
46
+
47
+ describe '#timestamp' do
48
+ it 'returns the timestamp as a Time' do
49
+ bson = BSON::Document.new(
50
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
51
+ t: 1,
52
+ h: -1_789_557_309_812_000_233,
53
+ v: 2,
54
+ op: 'c',
55
+ ns: 'foo.$cmd',
56
+ o: BSON::Document.new(
57
+ applyOps: [
58
+ BSON::Document.new(
59
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
60
+ t: 14,
61
+ h: -3_028_027_288_268_436_781,
62
+ v: 2,
63
+ op: 'i',
64
+ ns: 'foo.bar',
65
+ o: BSON::Document.new(_id: 1, baz: 'quux')
66
+ )
67
+ ]
68
+ )
69
+ )
70
+ apply_ops = described_class.from(bson)
71
+
72
+ expect(apply_ops.timestamp).to eq(Time.at(1_479_420_028, 1))
73
+ end
74
+ end
75
+
76
+ describe '#namespace' do
77
+ it 'returns the namespace' do
78
+ bson = BSON::Document.new(
79
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
80
+ t: 1,
81
+ h: -1_789_557_309_812_000_233,
82
+ v: 2,
83
+ op: 'c',
84
+ ns: 'foo.$cmd',
85
+ o: BSON::Document.new(
86
+ applyOps: [
87
+ BSON::Document.new(
88
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
89
+ t: 14,
90
+ h: -3_028_027_288_268_436_781,
91
+ v: 2,
92
+ op: 'i',
93
+ ns: 'foo.bar',
94
+ o: BSON::Document.new(_id: 1, baz: 'quux')
95
+ )
96
+ ]
97
+ )
98
+ )
99
+ apply_ops = described_class.from(bson)
100
+
101
+ expect(apply_ops.namespace).to eq('foo.$cmd')
102
+ end
103
+ end
104
+
105
+ describe '#id' do
106
+ it 'returns a unique identifier for the operation' do
107
+ bson = BSON::Document.new(
108
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
109
+ t: 1,
110
+ h: -1_789_557_309_812_000_233,
111
+ v: 2,
112
+ op: 'c',
113
+ ns: 'foo.$cmd',
114
+ o: BSON::Document.new(
115
+ applyOps: [
116
+ BSON::Document.new(
117
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
118
+ t: 14,
119
+ h: -3_028_027_288_268_436_781,
120
+ v: 2,
121
+ op: 'i',
122
+ ns: 'foo.bar',
123
+ o: BSON::Document.new(_id: 1, baz: 'quux')
124
+ )
125
+ ]
126
+ )
127
+ )
128
+ apply_ops = described_class.from(bson)
129
+
130
+ expect(apply_ops.id).to eq(-1_789_557_309_812_000_233)
131
+ end
132
+ end
133
+
134
+ describe '#operations' do
135
+ it 'returns the operations' do
136
+ bson = BSON::Document.new(
137
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
138
+ t: 1,
139
+ h: -1_789_557_309_812_000_233,
140
+ v: 2,
141
+ op: 'c',
142
+ ns: 'foo.$cmd',
143
+ o: BSON::Document.new(
144
+ applyOps: [
145
+ BSON::Document.new(
146
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
147
+ t: 14,
148
+ h: -3_028_027_288_268_436_781,
149
+ v: 2,
150
+ op: 'i',
151
+ ns: 'foo.bar',
152
+ o: BSON::Document.new(_id: 1, baz: 'quux')
153
+ )
154
+ ]
155
+ )
156
+ )
157
+ apply_ops = described_class.from(bson)
158
+ insert = Insert.from(
159
+ BSON::Document.new(
160
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
161
+ t: 14,
162
+ h: -3_028_027_288_268_436_781,
163
+ v: 2,
164
+ op: 'i',
165
+ ns: 'foo.bar',
166
+ o: BSON::Document.new(_id: 1, baz: 'quux')
167
+ )
168
+ )
169
+
170
+ expect(apply_ops.operations).to contain_exactly(insert)
171
+ end
172
+ end
173
+ end
174
+ end