oplogjam 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +738 -0
- data/lib/oplogjam.rb +69 -0
- data/lib/oplogjam/apply_ops.rb +49 -0
- data/lib/oplogjam/command.rb +41 -0
- data/lib/oplogjam/delete.rb +50 -0
- data/lib/oplogjam/insert.rb +62 -0
- data/lib/oplogjam/noop.rb +39 -0
- data/lib/oplogjam/operation.rb +31 -0
- data/lib/oplogjam/operators.rb +2 -0
- data/lib/oplogjam/operators/assignment.rb +12 -0
- data/lib/oplogjam/operators/field_assignment.rb +11 -0
- data/lib/oplogjam/operators/index_assignment.rb +39 -0
- data/lib/oplogjam/operators/intermediate.rb +47 -0
- data/lib/oplogjam/operators/intermediate_field.rb +13 -0
- data/lib/oplogjam/operators/intermediate_index.rb +43 -0
- data/lib/oplogjam/operators/set.rb +100 -0
- data/lib/oplogjam/operators/unset.rb +40 -0
- data/lib/oplogjam/operators/unset_field.rb +15 -0
- data/lib/oplogjam/operators/unset_index.rb +39 -0
- data/lib/oplogjam/oplog.rb +21 -0
- data/lib/oplogjam/sanitizer.rb +19 -0
- data/lib/oplogjam/schema.rb +44 -0
- data/lib/oplogjam/update.rb +79 -0
- data/spec/oplogjam/apply_ops_spec.rb +174 -0
- data/spec/oplogjam/command_spec.rb +103 -0
- data/spec/oplogjam/delete_spec.rb +163 -0
- data/spec/oplogjam/insert_spec.rb +289 -0
- data/spec/oplogjam/noop_spec.rb +123 -0
- data/spec/oplogjam/operation_spec.rb +110 -0
- data/spec/oplogjam/operators/set_spec.rb +53 -0
- data/spec/oplogjam/sanitizer_spec.rb +35 -0
- data/spec/oplogjam/update_spec.rb +406 -0
- data/spec/spec_helper.rb +19 -0
- metadata +199 -0
data/lib/oplogjam.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'bson'
|
2
|
+
require 'oj'
|
3
|
+
require 'sequel'
|
4
|
+
require 'oplogjam/oplog'
|
5
|
+
require 'oplogjam/sanitizer'
|
6
|
+
require 'oplogjam/schema'
|
7
|
+
|
8
|
+
module Oplogjam
|
9
|
+
# Enable Sequel extensions for JSONB
|
10
|
+
Sequel.extension :pg_array, :pg_json, :pg_json_ops
|
11
|
+
|
12
|
+
# Operation types
|
13
|
+
APPLY_OPS = 'applyOps'.freeze
|
14
|
+
C = 'c'.freeze
|
15
|
+
D = 'd'.freeze
|
16
|
+
U = 'u'.freeze
|
17
|
+
I = 'i'.freeze
|
18
|
+
N = 'n'.freeze
|
19
|
+
UNKNOWN = 'unknown'.freeze
|
20
|
+
|
21
|
+
# Operation fields
|
22
|
+
H = 'h'.freeze
|
23
|
+
MSG = 'msg'.freeze
|
24
|
+
NS = 'ns'.freeze
|
25
|
+
O = 'o'.freeze
|
26
|
+
O2 = 'o2'.freeze
|
27
|
+
OP = 'op'.freeze
|
28
|
+
TS = 'ts'.freeze
|
29
|
+
|
30
|
+
# BSON fields
|
31
|
+
ID = '_id'.freeze
|
32
|
+
SET = '$set'.freeze
|
33
|
+
UNSET = '$unset'.freeze
|
34
|
+
FIELD_SEPARATOR = '.'.freeze
|
35
|
+
NUMERIC_INDEX = /\A\d+\z/
|
36
|
+
|
37
|
+
# SQL
|
38
|
+
ARRAY_TYPE = 'array'.freeze
|
39
|
+
EMPTY_OBJECT = Sequel.pg_jsonb({}.freeze)
|
40
|
+
NULL = 'null'.freeze
|
41
|
+
TABLE = 'table'.freeze
|
42
|
+
SCHEMA = 'schema'.freeze
|
43
|
+
PUBLIC = 'public'.freeze
|
44
|
+
|
45
|
+
# Recursive updates
|
46
|
+
UPDATE_COLUMN = proc { |column, node| node.update(column) }
|
47
|
+
|
48
|
+
# Database & collection names
|
49
|
+
LOCAL = 'local'.freeze
|
50
|
+
OPLOG = 'oplog.rs'.freeze
|
51
|
+
|
52
|
+
# Strict type coercion for BSON types
|
53
|
+
def self.Timestamp(ts)
|
54
|
+
raise TypeError, "#{ts} is not a BSON Timestamp" unless ts.is_a?(BSON::Timestamp)
|
55
|
+
|
56
|
+
ts
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.Document(document)
|
60
|
+
raise TypeError, "#{document} is not a BSON Document" unless document.is_a?(BSON::Document)
|
61
|
+
|
62
|
+
document
|
63
|
+
end
|
64
|
+
|
65
|
+
# Override JSONB serialization to sanitize data first
|
66
|
+
def Sequel.object_to_json(obj, *args, &blk)
|
67
|
+
Oj.dump(Sanitizer.sanitize(obj), mode: :rails)
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
InvalidApplyOps = Class.new(ArgumentError)
|
3
|
+
|
4
|
+
class ApplyOps
|
5
|
+
attr_reader :h, :ts, :ns, :apply_ops
|
6
|
+
|
7
|
+
def self.from(bson)
|
8
|
+
h = bson.fetch(H)
|
9
|
+
ts = bson.fetch(TS)
|
10
|
+
ns = bson.fetch(NS)
|
11
|
+
o = bson.fetch(O)
|
12
|
+
apply_ops = o.fetch(APPLY_OPS)
|
13
|
+
|
14
|
+
new(h, ts, ns, apply_ops)
|
15
|
+
rescue KeyError => e
|
16
|
+
raise InvalidApplyOps, "missing field: #{e}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(h, ts, ns, apply_ops)
|
20
|
+
@h = Integer(h)
|
21
|
+
@ts = Oplogjam::Timestamp(ts)
|
22
|
+
@ns = String(ns)
|
23
|
+
@apply_ops = Array(apply_ops)
|
24
|
+
end
|
25
|
+
|
26
|
+
alias id h
|
27
|
+
alias namespace ns
|
28
|
+
|
29
|
+
def timestamp
|
30
|
+
Time.at(ts.seconds, ts.increment)
|
31
|
+
end
|
32
|
+
|
33
|
+
def apply(mapping)
|
34
|
+
operations.each do |operation|
|
35
|
+
operation.apply(mapping)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def operations
|
40
|
+
apply_ops.map { |bson| Operation.from(bson) }
|
41
|
+
end
|
42
|
+
|
43
|
+
def ==(other)
|
44
|
+
return unless other.is_a?(ApplyOps)
|
45
|
+
|
46
|
+
id == other.id
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
InvalidCommand = Class.new(ArgumentError)
|
3
|
+
|
4
|
+
class Command
|
5
|
+
attr_reader :h, :ts, :ns, :o
|
6
|
+
|
7
|
+
def self.from(bson)
|
8
|
+
h = bson.fetch(H)
|
9
|
+
ts = bson.fetch(TS)
|
10
|
+
ns = bson.fetch(NS)
|
11
|
+
o = bson.fetch(O)
|
12
|
+
|
13
|
+
new(h, ts, ns, o)
|
14
|
+
rescue KeyError => e
|
15
|
+
raise InvalidCommand, "missing field: #{e}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(h, ts, ns, o)
|
19
|
+
@h = Integer(h)
|
20
|
+
@ts = Oplogjam::Timestamp(ts)
|
21
|
+
@ns = String(ns)
|
22
|
+
@o = Oplogjam::Document(o)
|
23
|
+
end
|
24
|
+
|
25
|
+
alias id h
|
26
|
+
alias command o
|
27
|
+
alias namespace ns
|
28
|
+
|
29
|
+
def timestamp
|
30
|
+
Time.at(ts.seconds, ts.increment)
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(other)
|
34
|
+
return false unless other.is_a?(Command)
|
35
|
+
|
36
|
+
id == other.id
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply(_mapping); end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
InvalidDelete = Class.new(ArgumentError)
|
3
|
+
|
4
|
+
class Delete
|
5
|
+
attr_reader :h, :ts, :ns, :o
|
6
|
+
|
7
|
+
def self.from(bson)
|
8
|
+
h = bson.fetch(H)
|
9
|
+
ts = bson.fetch(TS)
|
10
|
+
ns = bson.fetch(NS)
|
11
|
+
o = bson.fetch(O)
|
12
|
+
|
13
|
+
new(h, ts, ns, o)
|
14
|
+
rescue KeyError => e
|
15
|
+
raise InvalidDelete, "missing field: #{e}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(h, ts, ns, o)
|
19
|
+
@h = Integer(h)
|
20
|
+
@ts = Oplogjam::Timestamp(ts)
|
21
|
+
@ns = String(ns)
|
22
|
+
@o = Oplogjam::Document(o)
|
23
|
+
end
|
24
|
+
|
25
|
+
alias id h
|
26
|
+
alias namespace ns
|
27
|
+
alias query o
|
28
|
+
|
29
|
+
def timestamp
|
30
|
+
Time.at(ts.seconds, ts.increment)
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(other)
|
34
|
+
return false unless other.is_a?(Delete)
|
35
|
+
|
36
|
+
id == other.id
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply(mapping)
|
40
|
+
table = mapping[namespace]
|
41
|
+
return unless table
|
42
|
+
|
43
|
+
row_id = query.fetch(ID).to_json
|
44
|
+
|
45
|
+
table
|
46
|
+
.where(id: row_id, deleted_at: nil)
|
47
|
+
.update(updated_at: Time.now.utc, deleted_at: Time.now.utc)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
InvalidInsert = Class.new(ArgumentError)
|
3
|
+
|
4
|
+
class Insert
|
5
|
+
attr_reader :h, :ts, :ns, :o
|
6
|
+
|
7
|
+
def self.from(bson)
|
8
|
+
h = bson.fetch(H)
|
9
|
+
ts = bson.fetch(TS)
|
10
|
+
ns = bson.fetch(NS)
|
11
|
+
o = bson.fetch(O)
|
12
|
+
|
13
|
+
new(h, ts, ns, o)
|
14
|
+
rescue KeyError => e
|
15
|
+
raise InvalidInsert, "missing field: #{e}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(h, ts, ns, o)
|
19
|
+
@h = Integer(h)
|
20
|
+
@ts = Oplogjam::Timestamp(ts)
|
21
|
+
@ns = String(ns)
|
22
|
+
@o = Oplogjam::Document(o)
|
23
|
+
end
|
24
|
+
|
25
|
+
alias namespace ns
|
26
|
+
alias id h
|
27
|
+
alias document o
|
28
|
+
|
29
|
+
def timestamp
|
30
|
+
Time.at(ts.seconds, ts.increment)
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(other)
|
34
|
+
return false unless other.is_a?(Insert)
|
35
|
+
|
36
|
+
id == other.id
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply(mapping)
|
40
|
+
table = mapping[namespace]
|
41
|
+
return unless table
|
42
|
+
|
43
|
+
row_id = document.fetch(ID).to_json
|
44
|
+
|
45
|
+
table
|
46
|
+
.insert_conflict(
|
47
|
+
target: :id,
|
48
|
+
conflict_where: { deleted_at: nil },
|
49
|
+
update: {
|
50
|
+
document: Sequel[:excluded][:document],
|
51
|
+
updated_at: Time.now.utc
|
52
|
+
}
|
53
|
+
)
|
54
|
+
.insert(
|
55
|
+
id: row_id,
|
56
|
+
document: Sequel.pg_jsonb(document),
|
57
|
+
created_at: Time.now.utc,
|
58
|
+
updated_at: Time.now.utc
|
59
|
+
)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
InvalidNoop = Class.new(ArgumentError)
|
3
|
+
|
4
|
+
class Noop
|
5
|
+
attr_reader :h, :ts, :msg
|
6
|
+
|
7
|
+
def self.from(bson)
|
8
|
+
h = bson.fetch(H)
|
9
|
+
ts = bson.fetch(TS)
|
10
|
+
o = bson.fetch(O)
|
11
|
+
msg = o.fetch(MSG)
|
12
|
+
|
13
|
+
new(h, ts, msg)
|
14
|
+
rescue KeyError => e
|
15
|
+
raise InvalidNoop, "missing field: #{e}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(h, ts, msg)
|
19
|
+
@h = Integer(h)
|
20
|
+
@ts = Oplogjam::Timestamp(ts)
|
21
|
+
@msg = String(msg)
|
22
|
+
end
|
23
|
+
|
24
|
+
alias message msg
|
25
|
+
alias id h
|
26
|
+
|
27
|
+
def timestamp
|
28
|
+
Time.at(ts.seconds, ts.increment)
|
29
|
+
end
|
30
|
+
|
31
|
+
def ==(other)
|
32
|
+
return false unless other.is_a?(Noop)
|
33
|
+
|
34
|
+
id == other.id
|
35
|
+
end
|
36
|
+
|
37
|
+
def apply(_mapping); end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'oplogjam/noop'
|
2
|
+
require 'oplogjam/insert'
|
3
|
+
require 'oplogjam/update'
|
4
|
+
require 'oplogjam/delete'
|
5
|
+
require 'oplogjam/command'
|
6
|
+
require 'oplogjam/apply_ops'
|
7
|
+
|
8
|
+
module Oplogjam
|
9
|
+
InvalidOperation = Class.new(ArgumentError)
|
10
|
+
|
11
|
+
class Operation
|
12
|
+
def self.from(bson)
|
13
|
+
op = bson.fetch(OP, UNKNOWN)
|
14
|
+
|
15
|
+
case op
|
16
|
+
when N then Noop.from(bson)
|
17
|
+
when I then Insert.from(bson)
|
18
|
+
when U then Update.from(bson)
|
19
|
+
when D then Delete.from(bson)
|
20
|
+
when C
|
21
|
+
if bson.fetch(O, {}).key?(APPLY_OPS)
|
22
|
+
ApplyOps.from(bson)
|
23
|
+
else
|
24
|
+
Command.from(bson)
|
25
|
+
end
|
26
|
+
else
|
27
|
+
raise InvalidOperation, "invalid operation: #{bson}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'oplogjam/operators/assignment'
|
2
|
+
|
3
|
+
module Oplogjam
|
4
|
+
module Operators
|
5
|
+
class IndexAssignment < Assignment
|
6
|
+
def update(column)
|
7
|
+
# Now for a not-so-fun bit!
|
8
|
+
#
|
9
|
+
# As this is a numeric index, it might either be an index into an existing array or a numeric field name on an
|
10
|
+
# object.
|
11
|
+
#
|
12
|
+
# If it is an index into an array then we need to ensure that all prior indexes down to 0 are either set or null.
|
13
|
+
filled_array_column = (0...index).inject(column) { |subject, i|
|
14
|
+
prior_path = parent_path + [i.to_s]
|
15
|
+
|
16
|
+
subject.set(prior_path, Sequel.function(:coalesce, column[prior_path], NULL))
|
17
|
+
}
|
18
|
+
|
19
|
+
populated_column = Sequel.pg_jsonb_op(
|
20
|
+
Sequel.case(
|
21
|
+
{ ARRAY_TYPE => filled_array_column },
|
22
|
+
column,
|
23
|
+
column[parent_path].typeof
|
24
|
+
)
|
25
|
+
)
|
26
|
+
|
27
|
+
populated_column.set(path, value.to_json)
|
28
|
+
end
|
29
|
+
|
30
|
+
def index
|
31
|
+
Integer(path.last, 10)
|
32
|
+
end
|
33
|
+
|
34
|
+
def parent_path
|
35
|
+
path[0...-1]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Oplogjam
|
2
|
+
module Operators
|
3
|
+
class Intermediate
|
4
|
+
attr_reader :path, :tree
|
5
|
+
|
6
|
+
def initialize(path, tree = {})
|
7
|
+
@path = path
|
8
|
+
@tree = tree
|
9
|
+
end
|
10
|
+
|
11
|
+
def populate(path)
|
12
|
+
if path.last =~ NUMERIC_INDEX
|
13
|
+
populate_index(path)
|
14
|
+
else
|
15
|
+
populate_field(path)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def set(path, value)
|
20
|
+
if path.last =~ NUMERIC_INDEX
|
21
|
+
set_index(path, value)
|
22
|
+
else
|
23
|
+
set_field(path, value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def populate_field(path)
|
28
|
+
tree[path] ||= IntermediateField.new(path)
|
29
|
+
end
|
30
|
+
|
31
|
+
def populate_index(path)
|
32
|
+
tree[path] ||= IntermediateIndex.new(path)
|
33
|
+
end
|
34
|
+
def set_field(path, value)
|
35
|
+
tree[path] = FieldAssignment.new(path, value)
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_index(path, value)
|
39
|
+
tree[path] = IndexAssignment.new(path, value)
|
40
|
+
end
|
41
|
+
|
42
|
+
def nodes
|
43
|
+
tree.values
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|