oplogjam 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ require 'bson'
2
+ require 'oj'
3
+ require 'sequel'
4
+ require 'oplogjam/oplog'
5
+ require 'oplogjam/sanitizer'
6
+ require 'oplogjam/schema'
7
+
8
+ module Oplogjam
9
+ # Enable Sequel extensions for JSONB
10
+ Sequel.extension :pg_array, :pg_json, :pg_json_ops
11
+
12
+ # Operation types
13
+ APPLY_OPS = 'applyOps'.freeze
14
+ C = 'c'.freeze
15
+ D = 'd'.freeze
16
+ U = 'u'.freeze
17
+ I = 'i'.freeze
18
+ N = 'n'.freeze
19
+ UNKNOWN = 'unknown'.freeze
20
+
21
+ # Operation fields
22
+ H = 'h'.freeze
23
+ MSG = 'msg'.freeze
24
+ NS = 'ns'.freeze
25
+ O = 'o'.freeze
26
+ O2 = 'o2'.freeze
27
+ OP = 'op'.freeze
28
+ TS = 'ts'.freeze
29
+
30
+ # BSON fields
31
+ ID = '_id'.freeze
32
+ SET = '$set'.freeze
33
+ UNSET = '$unset'.freeze
34
+ FIELD_SEPARATOR = '.'.freeze
35
+ NUMERIC_INDEX = /\A\d+\z/
36
+
37
+ # SQL
38
+ ARRAY_TYPE = 'array'.freeze
39
+ EMPTY_OBJECT = Sequel.pg_jsonb({}.freeze)
40
+ NULL = 'null'.freeze
41
+ TABLE = 'table'.freeze
42
+ SCHEMA = 'schema'.freeze
43
+ PUBLIC = 'public'.freeze
44
+
45
+ # Recursive updates
46
+ UPDATE_COLUMN = proc { |column, node| node.update(column) }
47
+
48
+ # Database & collection names
49
+ LOCAL = 'local'.freeze
50
+ OPLOG = 'oplog.rs'.freeze
51
+
52
+ # Strict type coercion for BSON types
53
+ def self.Timestamp(ts)
54
+ raise TypeError, "#{ts} is not a BSON Timestamp" unless ts.is_a?(BSON::Timestamp)
55
+
56
+ ts
57
+ end
58
+
59
+ def self.Document(document)
60
+ raise TypeError, "#{document} is not a BSON Document" unless document.is_a?(BSON::Document)
61
+
62
+ document
63
+ end
64
+
65
+ # Override JSONB serialization to sanitize data first
66
+ def Sequel.object_to_json(obj, *args, &blk)
67
+ Oj.dump(Sanitizer.sanitize(obj), mode: :rails)
68
+ end
69
+ end
@@ -0,0 +1,49 @@
1
+ module Oplogjam
2
+ InvalidApplyOps = Class.new(ArgumentError)
3
+
4
+ class ApplyOps
5
+ attr_reader :h, :ts, :ns, :apply_ops
6
+
7
+ def self.from(bson)
8
+ h = bson.fetch(H)
9
+ ts = bson.fetch(TS)
10
+ ns = bson.fetch(NS)
11
+ o = bson.fetch(O)
12
+ apply_ops = o.fetch(APPLY_OPS)
13
+
14
+ new(h, ts, ns, apply_ops)
15
+ rescue KeyError => e
16
+ raise InvalidApplyOps, "missing field: #{e}"
17
+ end
18
+
19
+ def initialize(h, ts, ns, apply_ops)
20
+ @h = Integer(h)
21
+ @ts = Oplogjam::Timestamp(ts)
22
+ @ns = String(ns)
23
+ @apply_ops = Array(apply_ops)
24
+ end
25
+
26
+ alias id h
27
+ alias namespace ns
28
+
29
+ def timestamp
30
+ Time.at(ts.seconds, ts.increment)
31
+ end
32
+
33
+ def apply(mapping)
34
+ operations.each do |operation|
35
+ operation.apply(mapping)
36
+ end
37
+ end
38
+
39
+ def operations
40
+ apply_ops.map { |bson| Operation.from(bson) }
41
+ end
42
+
43
+ def ==(other)
44
+ return unless other.is_a?(ApplyOps)
45
+
46
+ id == other.id
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,41 @@
1
+ module Oplogjam
2
+ InvalidCommand = Class.new(ArgumentError)
3
+
4
+ class Command
5
+ attr_reader :h, :ts, :ns, :o
6
+
7
+ def self.from(bson)
8
+ h = bson.fetch(H)
9
+ ts = bson.fetch(TS)
10
+ ns = bson.fetch(NS)
11
+ o = bson.fetch(O)
12
+
13
+ new(h, ts, ns, o)
14
+ rescue KeyError => e
15
+ raise InvalidCommand, "missing field: #{e}"
16
+ end
17
+
18
+ def initialize(h, ts, ns, o)
19
+ @h = Integer(h)
20
+ @ts = Oplogjam::Timestamp(ts)
21
+ @ns = String(ns)
22
+ @o = Oplogjam::Document(o)
23
+ end
24
+
25
+ alias id h
26
+ alias command o
27
+ alias namespace ns
28
+
29
+ def timestamp
30
+ Time.at(ts.seconds, ts.increment)
31
+ end
32
+
33
+ def ==(other)
34
+ return false unless other.is_a?(Command)
35
+
36
+ id == other.id
37
+ end
38
+
39
+ def apply(_mapping); end
40
+ end
41
+ end
@@ -0,0 +1,50 @@
1
+ module Oplogjam
2
+ InvalidDelete = Class.new(ArgumentError)
3
+
4
+ class Delete
5
+ attr_reader :h, :ts, :ns, :o
6
+
7
+ def self.from(bson)
8
+ h = bson.fetch(H)
9
+ ts = bson.fetch(TS)
10
+ ns = bson.fetch(NS)
11
+ o = bson.fetch(O)
12
+
13
+ new(h, ts, ns, o)
14
+ rescue KeyError => e
15
+ raise InvalidDelete, "missing field: #{e}"
16
+ end
17
+
18
+ def initialize(h, ts, ns, o)
19
+ @h = Integer(h)
20
+ @ts = Oplogjam::Timestamp(ts)
21
+ @ns = String(ns)
22
+ @o = Oplogjam::Document(o)
23
+ end
24
+
25
+ alias id h
26
+ alias namespace ns
27
+ alias query o
28
+
29
+ def timestamp
30
+ Time.at(ts.seconds, ts.increment)
31
+ end
32
+
33
+ def ==(other)
34
+ return false unless other.is_a?(Delete)
35
+
36
+ id == other.id
37
+ end
38
+
39
+ def apply(mapping)
40
+ table = mapping[namespace]
41
+ return unless table
42
+
43
+ row_id = query.fetch(ID).to_json
44
+
45
+ table
46
+ .where(id: row_id, deleted_at: nil)
47
+ .update(updated_at: Time.now.utc, deleted_at: Time.now.utc)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,62 @@
1
+ module Oplogjam
2
+ InvalidInsert = Class.new(ArgumentError)
3
+
4
+ class Insert
5
+ attr_reader :h, :ts, :ns, :o
6
+
7
+ def self.from(bson)
8
+ h = bson.fetch(H)
9
+ ts = bson.fetch(TS)
10
+ ns = bson.fetch(NS)
11
+ o = bson.fetch(O)
12
+
13
+ new(h, ts, ns, o)
14
+ rescue KeyError => e
15
+ raise InvalidInsert, "missing field: #{e}"
16
+ end
17
+
18
+ def initialize(h, ts, ns, o)
19
+ @h = Integer(h)
20
+ @ts = Oplogjam::Timestamp(ts)
21
+ @ns = String(ns)
22
+ @o = Oplogjam::Document(o)
23
+ end
24
+
25
+ alias namespace ns
26
+ alias id h
27
+ alias document o
28
+
29
+ def timestamp
30
+ Time.at(ts.seconds, ts.increment)
31
+ end
32
+
33
+ def ==(other)
34
+ return false unless other.is_a?(Insert)
35
+
36
+ id == other.id
37
+ end
38
+
39
+ def apply(mapping)
40
+ table = mapping[namespace]
41
+ return unless table
42
+
43
+ row_id = document.fetch(ID).to_json
44
+
45
+ table
46
+ .insert_conflict(
47
+ target: :id,
48
+ conflict_where: { deleted_at: nil },
49
+ update: {
50
+ document: Sequel[:excluded][:document],
51
+ updated_at: Time.now.utc
52
+ }
53
+ )
54
+ .insert(
55
+ id: row_id,
56
+ document: Sequel.pg_jsonb(document),
57
+ created_at: Time.now.utc,
58
+ updated_at: Time.now.utc
59
+ )
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,39 @@
1
+ module Oplogjam
2
+ InvalidNoop = Class.new(ArgumentError)
3
+
4
+ class Noop
5
+ attr_reader :h, :ts, :msg
6
+
7
+ def self.from(bson)
8
+ h = bson.fetch(H)
9
+ ts = bson.fetch(TS)
10
+ o = bson.fetch(O)
11
+ msg = o.fetch(MSG)
12
+
13
+ new(h, ts, msg)
14
+ rescue KeyError => e
15
+ raise InvalidNoop, "missing field: #{e}"
16
+ end
17
+
18
+ def initialize(h, ts, msg)
19
+ @h = Integer(h)
20
+ @ts = Oplogjam::Timestamp(ts)
21
+ @msg = String(msg)
22
+ end
23
+
24
+ alias message msg
25
+ alias id h
26
+
27
+ def timestamp
28
+ Time.at(ts.seconds, ts.increment)
29
+ end
30
+
31
+ def ==(other)
32
+ return false unless other.is_a?(Noop)
33
+
34
+ id == other.id
35
+ end
36
+
37
+ def apply(_mapping); end
38
+ end
39
+ end
@@ -0,0 +1,31 @@
1
+ require 'oplogjam/noop'
2
+ require 'oplogjam/insert'
3
+ require 'oplogjam/update'
4
+ require 'oplogjam/delete'
5
+ require 'oplogjam/command'
6
+ require 'oplogjam/apply_ops'
7
+
8
+ module Oplogjam
9
+ InvalidOperation = Class.new(ArgumentError)
10
+
11
+ class Operation
12
+ def self.from(bson)
13
+ op = bson.fetch(OP, UNKNOWN)
14
+
15
+ case op
16
+ when N then Noop.from(bson)
17
+ when I then Insert.from(bson)
18
+ when U then Update.from(bson)
19
+ when D then Delete.from(bson)
20
+ when C
21
+ if bson.fetch(O, {}).key?(APPLY_OPS)
22
+ ApplyOps.from(bson)
23
+ else
24
+ Command.from(bson)
25
+ end
26
+ else
27
+ raise InvalidOperation, "invalid operation: #{bson}"
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,2 @@
1
+ require 'oplogjam/operators/set'
2
+ require 'oplogjam/operators/unset'
@@ -0,0 +1,12 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class Assignment
4
+ attr_reader :path, :value
5
+
6
+ def initialize(path, value)
7
+ @path = path
8
+ @value = value
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'oplogjam/operators/assignment'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class FieldAssignment < Assignment
6
+ def update(column)
7
+ column.set(path, value.to_json)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,39 @@
1
+ require 'oplogjam/operators/assignment'
2
+
3
+ module Oplogjam
4
+ module Operators
5
+ class IndexAssignment < Assignment
6
+ def update(column)
7
+ # Now for a not-so-fun bit!
8
+ #
9
+ # As this is a numeric index, it might either be an index into an existing array or a numeric field name on an
10
+ # object.
11
+ #
12
+ # If it is an index into an array then we need to ensure that all prior indexes down to 0 are either set or null.
13
+ filled_array_column = (0...index).inject(column) { |subject, i|
14
+ prior_path = parent_path + [i.to_s]
15
+
16
+ subject.set(prior_path, Sequel.function(:coalesce, column[prior_path], NULL))
17
+ }
18
+
19
+ populated_column = Sequel.pg_jsonb_op(
20
+ Sequel.case(
21
+ { ARRAY_TYPE => filled_array_column },
22
+ column,
23
+ column[parent_path].typeof
24
+ )
25
+ )
26
+
27
+ populated_column.set(path, value.to_json)
28
+ end
29
+
30
+ def index
31
+ Integer(path.last, 10)
32
+ end
33
+
34
+ def parent_path
35
+ path[0...-1]
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,47 @@
1
+ module Oplogjam
2
+ module Operators
3
+ class Intermediate
4
+ attr_reader :path, :tree
5
+
6
+ def initialize(path, tree = {})
7
+ @path = path
8
+ @tree = tree
9
+ end
10
+
11
+ def populate(path)
12
+ if path.last =~ NUMERIC_INDEX
13
+ populate_index(path)
14
+ else
15
+ populate_field(path)
16
+ end
17
+ end
18
+
19
+ def set(path, value)
20
+ if path.last =~ NUMERIC_INDEX
21
+ set_index(path, value)
22
+ else
23
+ set_field(path, value)
24
+ end
25
+ end
26
+
27
+ def populate_field(path)
28
+ tree[path] ||= IntermediateField.new(path)
29
+ end
30
+
31
+ def populate_index(path)
32
+ tree[path] ||= IntermediateIndex.new(path)
33
+ end
34
+ def set_field(path, value)
35
+ tree[path] = FieldAssignment.new(path, value)
36
+ end
37
+
38
+ def set_index(path, value)
39
+ tree[path] = IndexAssignment.new(path, value)
40
+ end
41
+
42
+ def nodes
43
+ tree.values
44
+ end
45
+ end
46
+ end
47
+ end