oplogjam 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ require 'oplogjam/operators/intermediate'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class IntermediateField < Intermediate
6
+ def update(column)
7
+ populated_column = column.set(path, Sequel.function(:coalesce, column[path], EMPTY_OBJECT))
8
+
9
+ nodes.inject(populated_column, &UPDATE_COLUMN)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'oplogjam/operators/intermediate'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class IntermediateIndex < Intermediate
6
+ def update(column)
7
+ # Now for a not-so-fun bit!
8
+ #
9
+ # As this is a numeric index, it might either be an index into an existing array or a numeric field name on an
10
+ # object.
11
+ #
12
+ # If it is an index into an array then we need to ensure that all prior indexes down to 0 are either set or null.
13
+ # If it is anything else, it should be an empty object. In order to figure that out, we need to look at the parent
14
+ # path and switch based on its type.
15
+ filled_array_column = (0...index).inject(column) { |subject, i|
16
+ prior_path = parent_path + [i.to_s]
17
+
18
+ subject.set(prior_path, Sequel.function(:coalesce, column[prior_path], NULL))
19
+ }
20
+
21
+ populated_column = Sequel.pg_jsonb_op(
22
+ Sequel.case(
23
+ {
24
+ ARRAY_TYPE => filled_array_column.set(path, Sequel.function(:coalesce, filled_array_column[path], EMPTY_OBJECT))
25
+ },
26
+ column.set(path, Sequel.function(:coalesce, column[path], EMPTY_OBJECT)),
27
+ column[parent_path].typeof
28
+ )
29
+ )
30
+
31
+ nodes.inject(populated_column, &UPDATE_COLUMN)
32
+ end
33
+
34
+ def parent_path
35
+ path[0...-1]
36
+ end
37
+
38
+ def index
39
+ Integer(path.last, 10)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,100 @@
1
+ require 'oplogjam/operators/field_assignment'
2
+ require 'oplogjam/operators/index_assignment'
3
+ require 'oplogjam/operators/intermediate_field'
4
+ require 'oplogjam/operators/intermediate_index'
5
+
6
+ module Oplogjam
7
+ module Operators
8
+ class Set
9
+
10
+ # Transform a MongoDB $set operation (e.g. $set: { 'a.1.b.0': 'foo' }) into a tree of nodes more amenable to
11
+ # transforming into SQL.
12
+ #
13
+ # Specifically, parse nested field and index assignments into specific node types FieldAssignment, IndexAssignment
14
+ # (for setting the final value) and IntermediateField, IntermediateIndex (for any intermediate fields and indexes).
15
+ #
16
+ # e.g.
17
+ #
18
+ # $set: { a: 1 } will become Set(['a'] => FieldAssignment(['a'], 1))
19
+ # $set: { a: 1, b: 2 } will become Set(['a'] => FieldAssignment(['a'], 1), ['b'] => FieldAssignment(['b'], 2))
20
+ # $set: { 'a.b': 1 } will become Set(['a'] => IntermediateField(['a'], FieldAssignment(['a', 'b'], 1)))
21
+ def self.from(operation)
22
+
23
+ # Start with an empty Set and iterate over every key in the $set operation, mutating the Set as we go
24
+ operation.each_with_object(new) do |(dotted_path, value), set|
25
+
26
+ # Split the dotted path `a.b.c` into an array `['a', 'b', 'c']`
27
+ path = dotted_path.split(FIELD_SEPARATOR)
28
+
29
+ # Start with an empty path which will incrementally populate
30
+ current_path = []
31
+
32
+ # Starting with the set, go through the successive path segments, building up intermediate paths on the current
33
+ # node, e.g. 'a.b' will iterate over ['a'], ['a', 'b']
34
+ #
35
+ # Note that we exclude the final path segment as that will be used below in a separate set phase
36
+ populated_node = path[0...-1].inject(set) { |current_node, segment|
37
+
38
+ # Extend the current path with the current segment appended.
39
+ current_path << segment
40
+
41
+ # Populate an empty intermediate with a copy of the current path.
42
+ #
43
+ # Note that this could either be a numeric index (which might be indexing into an array) or an object field
44
+ # name.
45
+ current_node.populate(current_path.dup)
46
+ }
47
+
48
+ # Set the final value on the full path
49
+ populated_node.set(path, value)
50
+ end
51
+ end
52
+
53
+ attr_reader :tree
54
+
55
+ def initialize(tree = {})
56
+ @tree = tree
57
+ end
58
+
59
+ def populate(path)
60
+ if path.last =~ NUMERIC_INDEX
61
+ populate_index(path)
62
+ else
63
+ populate_field(path)
64
+ end
65
+ end
66
+
67
+ def set(path, value)
68
+ if path.last =~ NUMERIC_INDEX
69
+ set_index(path, value)
70
+ else
71
+ set_field(path, value)
72
+ end
73
+ end
74
+
75
+ def populate_field(path)
76
+ tree[path] ||= IntermediateField.new(path)
77
+ end
78
+
79
+ def populate_index(path)
80
+ tree[path] ||= IntermediateIndex.new(path)
81
+ end
82
+
83
+ def set_field(path, value)
84
+ tree[path] = FieldAssignment.new(path, value)
85
+ end
86
+
87
+ def set_index(path, value)
88
+ tree[path] = IndexAssignment.new(path, value)
89
+ end
90
+
91
+ def update(column)
92
+ nodes.inject(column, &UPDATE_COLUMN)
93
+ end
94
+
95
+ def nodes
96
+ tree.values
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,40 @@
1
+ require 'oplogjam/operators/unset_field'
2
+ require 'oplogjam/operators/unset_index'
3
+
4
+ module Oplogjam
5
+ module Operators
6
+ class Unset
7
+ def self.from(operation)
8
+ operation.each_with_object(new) do |(dotted_path, _), unset|
9
+ path = dotted_path.split(FIELD_SEPARATOR)
10
+
11
+ if path.last =~ NUMERIC_INDEX
12
+ unset.unset_index(path)
13
+ else
14
+ unset.unset_field(path)
15
+ end
16
+ end
17
+ end
18
+
19
+ attr_reader :unsets
20
+
21
+ def initialize(unsets = [])
22
+ @unsets = unsets
23
+ end
24
+
25
+ def unset_field(path)
26
+ unsets << UnsetField.new(path)
27
+ end
28
+
29
+ def unset_index(path)
30
+ unsets << UnsetIndex.new(path)
31
+ end
32
+
33
+ def delete(column)
34
+ unsets.inject(column) do |subject, unset|
35
+ unset.delete(subject)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,15 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class UnsetField
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def delete(column)
11
+ column.delete_path(path)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,39 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class UnsetIndex
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def delete(column)
11
+ nullify_or_unset = Sequel.case(
12
+ [
13
+ [
14
+ column[parent_path].array_length > index,
15
+ column.set(path, NULL)
16
+ ]
17
+ ],
18
+ column.delete_path(path)
19
+ )
20
+
21
+ Sequel.pg_jsonb_op(
22
+ Sequel.case(
23
+ { ARRAY_TYPE => nullify_or_unset },
24
+ column.delete_path(path),
25
+ column[parent_path].typeof
26
+ )
27
+ )
28
+ end
29
+
30
+ def parent_path
31
+ path[0...-1]
32
+ end
33
+
34
+ def index
35
+ Integer(path.last, 10)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ require 'oplogjam/operation'
2
+
3
+ module Oplogjam
4
+ class Oplog
5
+ attr_reader :client
6
+
7
+ def initialize(client)
8
+ @client = client
9
+ end
10
+
11
+ def operations(query = {})
12
+ Enumerator.new do |yielder|
13
+ cursor = client.use(LOCAL)[OPLOG].find(query, cursor_type: :tailable_await).no_cursor_timeout
14
+
15
+ cursor.each do |document|
16
+ yielder << Operation.from(document)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module Oplogjam
2
+ module Sanitizer
3
+ # Strip any null bytes from objects as they will be rejected by PostgreSQL
4
+ def self.sanitize(obj)
5
+ case obj
6
+ when Sequel::Postgres::JSONBHash, Hash
7
+ obj.each_with_object({}) do |(key, value), acc|
8
+ acc[sanitize(key)] = sanitize(value)
9
+ end
10
+ when Sequel::Postgres::JSONBArray, Array
11
+ obj.map { |element| sanitize(element) }
12
+ when String
13
+ obj.tr("\x00", '')
14
+ else
15
+ obj
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,44 @@
1
+ module Oplogjam
2
+ class Schema
3
+ COLUMNS = %i[id document created_at updated_at].freeze
4
+
5
+ attr_reader :db
6
+
7
+ def initialize(db)
8
+ @db = db
9
+ end
10
+
11
+ def import(collection, name, batch_size = 100)
12
+ collection.find.snapshot(true).each_slice(batch_size) do |documents|
13
+ values = documents.map { |document|
14
+ [
15
+ Sequel.object_to_json(document.fetch(ID)),
16
+ Sequel.pg_jsonb(document),
17
+ Time.now.utc,
18
+ Time.now.utc
19
+ ]
20
+ }
21
+
22
+ db[name].import(COLUMNS, values)
23
+ end
24
+ end
25
+
26
+ def create_table(name)
27
+ db.create_table?(name) do
28
+ uuid :uuid, default: Sequel.function(:uuid_generate_v1), primary_key: true
29
+ jsonb :id, null: false
30
+ jsonb :document, null: false
31
+ timestamp :created_at, null: false
32
+ timestamp :updated_at, null: false
33
+ timestamp :deleted_at
34
+ end
35
+ end
36
+
37
+ def add_indexes(name)
38
+ db.alter_table(name) do
39
+ add_index %i[id deleted_at], unique: true, if_not_exists: true
40
+ add_index :id, unique: true, where: { deleted_at: nil }, if_not_exists: true
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,79 @@
1
+ require 'oplogjam/operators'
2
+
3
+ module Oplogjam
4
+ InvalidUpdate = Class.new(ArgumentError)
5
+
6
+ class Update
7
+ attr_reader :h, :ts, :ns, :o2, :o
8
+
9
+ def self.from(bson)
10
+ h = bson.fetch(H)
11
+ ts = bson.fetch(TS)
12
+ ns = bson.fetch(NS)
13
+ o2 = bson.fetch(O2)
14
+ o = bson.fetch(O)
15
+
16
+ new(h, ts, ns, o2, o)
17
+ rescue KeyError => e
18
+ raise InvalidUpdate, "missing field: #{e}"
19
+ end
20
+
21
+ def initialize(h, ts, ns, o2, o)
22
+ @h = Integer(h)
23
+ @ts = Oplogjam::Timestamp(ts)
24
+ @ns = String(ns)
25
+ @o2 = Oplogjam::Document(o2)
26
+ @o = Oplogjam::Document(o)
27
+ end
28
+
29
+ alias id h
30
+ alias namespace ns
31
+ alias query o2
32
+ alias update o
33
+
34
+ def timestamp
35
+ Time.at(ts.seconds, ts.increment)
36
+ end
37
+
38
+ def ==(other)
39
+ return false unless other.is_a?(Update)
40
+
41
+ id == other.id
42
+ end
43
+
44
+ def apply(mapping)
45
+ table = mapping[namespace]
46
+ return unless table
47
+
48
+ row_id = query.fetch(ID).to_json
49
+
50
+ table
51
+ .where(id: row_id, deleted_at: nil)
52
+ .update(document: jsonb_update, updated_at: Time.now.utc)
53
+ end
54
+
55
+ private
56
+
57
+ def jsonb_update
58
+ return Sequel.pg_jsonb(query.merge(update)) if replacement?
59
+
60
+ unsets_to_jsonb(sets_to_jsonb(Sequel.pg_jsonb_op(:document)))
61
+ end
62
+
63
+ def sets_to_jsonb(column)
64
+ return column unless update.key?(SET)
65
+
66
+ Operators::Set.from(update.fetch(SET)).update(column)
67
+ end
68
+
69
+ def unsets_to_jsonb(column)
70
+ return column unless update.key?(UNSET)
71
+
72
+ Operators::Unset.from(update.fetch(UNSET)).delete(column)
73
+ end
74
+
75
+ def replacement?
76
+ !update.key?(SET) && !update.key?(UNSET)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,174 @@
1
+ require 'bson'
2
+ require 'oplogjam'
3
+
4
+ module Oplogjam
5
+ RSpec.describe ApplyOps do
6
+ describe '.from' do
7
+ it 'converts a BSON applyOps to an ApplyOps' do
8
+ bson = BSON::Document.new(
9
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
10
+ t: 1,
11
+ h: -1_789_557_309_812_000_233,
12
+ v: 2,
13
+ op: 'c',
14
+ ns: 'foo.$cmd',
15
+ o: BSON::Document.new(
16
+ applyOps: [
17
+ BSON::Document.new(
18
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
19
+ t: 14,
20
+ h: -3_028_027_288_268_436_781,
21
+ v: 2,
22
+ op: 'i',
23
+ ns: 'foo.bar',
24
+ o: BSON::Document.new(_id: 1, baz: 'quux')
25
+ )
26
+ ]
27
+ )
28
+ )
29
+
30
+ expect(described_class.from(bson)).to be_a(described_class)
31
+ end
32
+
33
+ it 'raises an error if the operations are missing' do
34
+ bson = BSON::Document.new(
35
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
36
+ t: 1,
37
+ h: -1_789_557_309_812_000_233,
38
+ v: 2,
39
+ op: 'c',
40
+ ns: 'foo.$cmd'
41
+ )
42
+
43
+ expect { described_class.from(bson) }.to raise_error(InvalidApplyOps)
44
+ end
45
+ end
46
+
47
+ describe '#timestamp' do
48
+ it 'returns the timestamp as a Time' do
49
+ bson = BSON::Document.new(
50
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
51
+ t: 1,
52
+ h: -1_789_557_309_812_000_233,
53
+ v: 2,
54
+ op: 'c',
55
+ ns: 'foo.$cmd',
56
+ o: BSON::Document.new(
57
+ applyOps: [
58
+ BSON::Document.new(
59
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
60
+ t: 14,
61
+ h: -3_028_027_288_268_436_781,
62
+ v: 2,
63
+ op: 'i',
64
+ ns: 'foo.bar',
65
+ o: BSON::Document.new(_id: 1, baz: 'quux')
66
+ )
67
+ ]
68
+ )
69
+ )
70
+ apply_ops = described_class.from(bson)
71
+
72
+ expect(apply_ops.timestamp).to eq(Time.at(1_479_420_028, 1))
73
+ end
74
+ end
75
+
76
+ describe '#namespace' do
77
+ it 'returns the namespace' do
78
+ bson = BSON::Document.new(
79
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
80
+ t: 1,
81
+ h: -1_789_557_309_812_000_233,
82
+ v: 2,
83
+ op: 'c',
84
+ ns: 'foo.$cmd',
85
+ o: BSON::Document.new(
86
+ applyOps: [
87
+ BSON::Document.new(
88
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
89
+ t: 14,
90
+ h: -3_028_027_288_268_436_781,
91
+ v: 2,
92
+ op: 'i',
93
+ ns: 'foo.bar',
94
+ o: BSON::Document.new(_id: 1, baz: 'quux')
95
+ )
96
+ ]
97
+ )
98
+ )
99
+ apply_ops = described_class.from(bson)
100
+
101
+ expect(apply_ops.namespace).to eq('foo.$cmd')
102
+ end
103
+ end
104
+
105
+ describe '#id' do
106
+ it 'returns a unique identifier for the operation' do
107
+ bson = BSON::Document.new(
108
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
109
+ t: 1,
110
+ h: -1_789_557_309_812_000_233,
111
+ v: 2,
112
+ op: 'c',
113
+ ns: 'foo.$cmd',
114
+ o: BSON::Document.new(
115
+ applyOps: [
116
+ BSON::Document.new(
117
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
118
+ t: 14,
119
+ h: -3_028_027_288_268_436_781,
120
+ v: 2,
121
+ op: 'i',
122
+ ns: 'foo.bar',
123
+ o: BSON::Document.new(_id: 1, baz: 'quux')
124
+ )
125
+ ]
126
+ )
127
+ )
128
+ apply_ops = described_class.from(bson)
129
+
130
+ expect(apply_ops.id).to eq(-1_789_557_309_812_000_233)
131
+ end
132
+ end
133
+
134
+ describe '#operations' do
135
+ it 'returns the operations' do
136
+ bson = BSON::Document.new(
137
+ ts: BSON::Timestamp.new(1_479_420_028, 1),
138
+ t: 1,
139
+ h: -1_789_557_309_812_000_233,
140
+ v: 2,
141
+ op: 'c',
142
+ ns: 'foo.$cmd',
143
+ o: BSON::Document.new(
144
+ applyOps: [
145
+ BSON::Document.new(
146
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
147
+ t: 14,
148
+ h: -3_028_027_288_268_436_781,
149
+ v: 2,
150
+ op: 'i',
151
+ ns: 'foo.bar',
152
+ o: BSON::Document.new(_id: 1, baz: 'quux')
153
+ )
154
+ ]
155
+ )
156
+ )
157
+ apply_ops = described_class.from(bson)
158
+ insert = Insert.from(
159
+ BSON::Document.new(
160
+ ts: BSON::Timestamp.new(1_496_414_570, 11),
161
+ t: 14,
162
+ h: -3_028_027_288_268_436_781,
163
+ v: 2,
164
+ op: 'i',
165
+ ns: 'foo.bar',
166
+ o: BSON::Document.new(_id: 1, baz: 'quux')
167
+ )
168
+ )
169
+
170
+ expect(apply_ops.operations).to contain_exactly(insert)
171
+ end
172
+ end
173
+ end
174
+ end