spinoza 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +66 -3
- data/lib/spinoza/calvin/executor.rb +107 -0
- data/lib/spinoza/calvin/node.rb +44 -0
- data/lib/spinoza/calvin/readcaster.rb +50 -0
- data/lib/spinoza/calvin/scheduler.rb +134 -0
- data/lib/spinoza/calvin/sequencer.rb +74 -0
- data/lib/spinoza/common.rb +3 -0
- data/lib/spinoza/system/link.rb +33 -0
- data/lib/spinoza/system/lock-manager.rb +22 -8
- data/lib/spinoza/system/log.rb +95 -0
- data/lib/spinoza/system/meta-log.rb +103 -0
- data/lib/spinoza/system/model.rb +14 -0
- data/lib/spinoza/system/node.rb +56 -7
- data/lib/spinoza/system/operation.rb +22 -6
- data/lib/spinoza/system/store.rb +15 -14
- data/lib/spinoza/system/{table-spec.rb → table.rb} +10 -6
- data/lib/spinoza/system/timeline.rb +81 -0
- data/lib/spinoza/transaction.rb +170 -39
- data/lib/spinoza/version.rb +1 -1
- data/test/test-executor.rb +110 -0
- data/test/test-link.rb +43 -0
- data/test/test-log.rb +47 -0
- data/test/test-meta-log.rb +63 -0
- data/test/test-node.rb +35 -14
- data/test/test-readcaster.rb +87 -0
- data/test/test-scheduler.rb +163 -0
- data/test/test-sequencer.rb +78 -0
- data/test/test-timeline.rb +58 -0
- data/test/test-transaction.rb +75 -18
- metadata +42 -3
data/lib/spinoza/common.rb
CHANGED
data/lib/spinoza/system/link.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spinoza/system/model'
|
2
|
+
require 'spinoza/system/timeline'
|
3
|
+
|
4
|
+
# Models a comm link between nodes, including the latency between sender and
|
5
|
+
# receiver. The class is stateless: the state of the channnel (messages and
|
6
|
+
# their scheduled arrivals) is part of the global timeline.
|
7
|
+
class Spinoza::Link < Spinoza::Model
|
8
|
+
# Source and destination nodes.
|
9
|
+
attr_reader :src, :dst
|
10
|
+
|
11
|
+
# Delay between send by source and receive by destination.
|
12
|
+
attr_reader :latency
|
13
|
+
|
14
|
+
def initialize src: raise, dst: raise, latency: 0.100, **rest
|
15
|
+
super **rest
|
16
|
+
@src, @dst, @latency = src, dst, latency
|
17
|
+
end
|
18
|
+
|
19
|
+
class << self
|
20
|
+
alias [] new
|
21
|
+
end
|
22
|
+
|
23
|
+
def inspect
|
24
|
+
"<#{self.class}: #{src} -> #{dst}>"
|
25
|
+
end
|
26
|
+
|
27
|
+
# The src node calls this to send a message. The message is scheduled for
|
28
|
+
# arrival at the destination.
|
29
|
+
def send_message msg
|
30
|
+
timeline << Spinoza::Event[actor: dst, time: time_now + latency,
|
31
|
+
action: :recv, msg: msg]
|
32
|
+
end
|
33
|
+
end
|
@@ -3,6 +3,13 @@ require 'spinoza/common'
|
|
3
3
|
# Manages concurrency in the spinoza system model, which explicitly schedules
|
4
4
|
# all database reads and writes. So all this does is check for concurrency
|
5
5
|
# violations; nothing actually blocks.
|
6
|
+
#
|
7
|
+
# The +txn+ references in this class care only about identity, so they could
|
8
|
+
# all be ids or they could all be transaction objects. Similarly the resource
|
9
|
+
# being locked by a ReadLock or WriteLock can be anything whose identity is
|
10
|
+
# defined by hash equality, i.e. #eql?. Typically, we use `[table, key]` pairs,
|
11
|
+
# where `key` is a primary key reference like `{id: ...}`.
|
12
|
+
#
|
6
13
|
class Spinoza::LockManager
|
7
14
|
class ConcurrencyError < StandardError; end
|
8
15
|
|
@@ -74,8 +81,8 @@ class Spinoza::LockManager
|
|
74
81
|
end
|
75
82
|
end
|
76
83
|
|
77
|
-
|
78
|
-
|
84
|
+
# { resource => WriteLock | ReadLock | nil, ... }
|
85
|
+
# typically, resource == [table, key]
|
79
86
|
attr_reader :locks
|
80
87
|
|
81
88
|
def initialize
|
@@ -86,10 +93,10 @@ class Spinoza::LockManager
|
|
86
93
|
case lock = locks[resource]
|
87
94
|
when nil
|
88
95
|
locks[resource] = ReadLock.new(txn)
|
89
|
-
when ReadLock
|
96
|
+
when ReadLock, WriteLock
|
90
97
|
lock.add txn
|
91
|
-
|
92
|
-
|
98
|
+
# in WriteLock case, add the reader as a writer
|
99
|
+
# (fails if not locked by txn)
|
93
100
|
else raise
|
94
101
|
end
|
95
102
|
end
|
@@ -109,11 +116,9 @@ class Spinoza::LockManager
|
|
109
116
|
def unlock_read resource, txn
|
110
117
|
lock = locks[resource]
|
111
118
|
case lock
|
112
|
-
when WriteLock
|
113
|
-
raise ConcurrencyError, "#{resource} is write locked: #{lock}"
|
114
119
|
when nil
|
115
120
|
raise ConcurrencyError, "#{resource} is not locked"
|
116
|
-
when ReadLock
|
121
|
+
when ReadLock, WriteLock
|
117
122
|
begin
|
118
123
|
lock.remove txn
|
119
124
|
locks.delete resource if lock.unlocked?
|
@@ -142,6 +147,15 @@ class Spinoza::LockManager
|
|
142
147
|
end
|
143
148
|
end
|
144
149
|
|
150
|
+
def unlock_all txn
|
151
|
+
locks.delete_if do |resource, lock|
|
152
|
+
if lock and lock.includes? txn
|
153
|
+
lock.remove txn
|
154
|
+
lock.unlocked?
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
145
159
|
def has_read_lock? resource, txn
|
146
160
|
lock = locks[resource]
|
147
161
|
lock.kind_of?(ReadLock) && lock.includes?(txn)
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'spinoza/common'
|
2
|
+
|
3
|
+
# Model of asynchronously replicated global log, such as Cassandra. We assume
|
4
|
+
# that each node in our system has a replica providing this service.
|
5
|
+
class Spinoza::Log
|
6
|
+
# Delay for a write to become durable on "enough" replicas, from the point of
|
7
|
+
# view of the writing node. Adjust this quantity for your definition of
|
8
|
+
# durable and your network performance.
|
9
|
+
attr_reader :dt_durable
|
10
|
+
|
11
|
+
# Delay for a write to become "completely" replicated: readable at all nodes.
|
12
|
+
# Adjust this quantity for your network performance.
|
13
|
+
attr_reader :dt_replicated
|
14
|
+
|
15
|
+
# We do not allow the same key to be written twice, since a key uniquely
|
16
|
+
# designates the logged transaction request. This error is raised if a
|
17
|
+
# key is overwritten.
|
18
|
+
class KeyConflictError < StandardError; end
|
19
|
+
|
20
|
+
class Entry
|
21
|
+
# Node which wrote the entry.
|
22
|
+
attr_reader :node
|
23
|
+
|
24
|
+
# Data payload.
|
25
|
+
attr_reader :value
|
26
|
+
|
27
|
+
# When, in the global timeline, this entry is durable enough and the
|
28
|
+
# writing node has been notified that this is the case.
|
29
|
+
attr_reader :time_durable
|
30
|
+
|
31
|
+
# When, in the global timeline, this entry is completely replicated.
|
32
|
+
attr_reader :time_replicated
|
33
|
+
|
34
|
+
def initialize node: raise, value: raise,
|
35
|
+
time_durable: raise, time_replicated: raise
|
36
|
+
@node, @value, @time_durable, @time_replicated =
|
37
|
+
node, value, time_durable, time_replicated
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns true if the writing node believes the data to be durable.
|
41
|
+
def durable?
|
42
|
+
@node.time_now >= @time_durable
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if +other_node+ can read the entry (i.e. it has been
|
46
|
+
# replicated to the nodes).
|
47
|
+
def readable_at? other_node
|
48
|
+
other_node == @node or other_node.time_now >= @time_replicated
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize dt_durable: 0.300, dt_replicated: 0.500
|
53
|
+
@dt_durable = dt_durable
|
54
|
+
@dt_replicated = dt_replicated
|
55
|
+
@store = {}
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns true if the writing node believes the data at +key+ is durable.
|
59
|
+
def durable? key
|
60
|
+
entry = @store[key]
|
61
|
+
entry && entry.durable?
|
62
|
+
end
|
63
|
+
|
64
|
+
def time_durable key
|
65
|
+
@store[key].time_durable
|
66
|
+
end
|
67
|
+
|
68
|
+
def when_durable key, **event_opts
|
69
|
+
entry = @store[key]
|
70
|
+
entry.node.timeline.schedule Spinoza::Event[
|
71
|
+
time: entry.time_durable,
|
72
|
+
**event_opts
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
76
|
+
def time_replicated key
|
77
|
+
@store[key].time_replicated
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the entry.
|
81
|
+
def write key, value, node: raise
|
82
|
+
raise KeyConflictError if @store[key] or not value
|
83
|
+
@store[key] =
|
84
|
+
Entry.new node: node, value: value,
|
85
|
+
time_durable: node.time_now + dt_durable,
|
86
|
+
time_replicated: node.time_now + dt_replicated
|
87
|
+
end
|
88
|
+
|
89
|
+
# Returns the value if the data has been propagated to +node+, otherwise,
|
90
|
+
# returns nil.
|
91
|
+
def read key, node: raise
|
92
|
+
entry = @store[key]
|
93
|
+
entry && entry.readable_at?(node) ? entry.value : nil
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'spinoza/system/timeline'
|
2
|
+
|
3
|
+
# Model of synchronously replicated, linearizable global log, such as Zookeeper.
|
4
|
+
class Spinoza::MetaLog
|
5
|
+
# Time to replicate a write to a quorum of MetaLog nodes, and for a unique
|
6
|
+
# sequence number to be assigned, and for that to be communicated to the
|
7
|
+
# writer node.
|
8
|
+
attr_reader :dt_quorum
|
9
|
+
|
10
|
+
# Delay for a write to become "completely" replicated: readable at all nodes.
|
11
|
+
# Adjust this quantity for your network performance.
|
12
|
+
attr_reader :dt_replicated
|
13
|
+
|
14
|
+
class Entry
|
15
|
+
# Node which wrote the entry.
|
16
|
+
attr_reader :node
|
17
|
+
|
18
|
+
# Data payload.
|
19
|
+
attr_reader :value
|
20
|
+
|
21
|
+
# When, in the global timeline, this entry has reached a quorum and the
|
22
|
+
# writing node has been notified that this is the case.
|
23
|
+
attr_reader :time_quorum
|
24
|
+
|
25
|
+
# When, in the global timeline, this entry is completely replicated.
|
26
|
+
attr_reader :time_replicated
|
27
|
+
|
28
|
+
def initialize node: raise, value: raise,
|
29
|
+
time_quorum: raise, time_replicated: raise
|
30
|
+
@node, @value, @time_quorum, @time_replicated =
|
31
|
+
node, value, time_quorum, time_replicated
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns true if the writing node knows the data is at a quorum.
|
35
|
+
def quorum?
|
36
|
+
@node.time_now >= @time_quorum
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns true if +other_node+ can read the entry (i.e. it has been
|
40
|
+
# replicated to the nodes).
|
41
|
+
def readable_at? other_node
|
42
|
+
other_node == @node or other_node.time_now >= @time_replicated
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize dt_quorum: 0.300, dt_replicated: 0.500
|
47
|
+
@dt_quorum = dt_quorum
|
48
|
+
@dt_replicated = dt_replicated
|
49
|
+
@store = []
|
50
|
+
@replication_listeners = []
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns true if the writing node knows that the data at +id+ has been
|
54
|
+
# replicated to a quorum of nodes.
|
55
|
+
def quorum? id
|
56
|
+
entry = @store[id]
|
57
|
+
entry && entry.quorum?
|
58
|
+
end
|
59
|
+
|
60
|
+
def time_quorum id
|
61
|
+
@store[id].time_quorum
|
62
|
+
end
|
63
|
+
|
64
|
+
def time_replicated id
|
65
|
+
@store[id].time_replicated
|
66
|
+
end
|
67
|
+
|
68
|
+
# Request that, whenever a new entry is created, an event be added to the
|
69
|
+
# schedule that will fire at entry.time_replicated. The event will send
|
70
|
+
# the method named `action` to `actor`, with id, node, and value arguments.
|
71
|
+
# Note that events fire in id order (because of the strong consistency
|
72
|
+
# guarantees that the meta-log's underlying store is assumed to have).
|
73
|
+
def on_entry_available actor, action
|
74
|
+
@replication_listeners << [actor, action]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Append value to the MetaLog, assigning it a unique monotonically increasing
|
78
|
+
# ID. In our use case, the value will be a key (or batch of keys) of the Log.
|
79
|
+
# Returns an id, which can be used to retrieve the entry in the order it was
|
80
|
+
# appended. The returned id should only be used to observe the model, and not
|
81
|
+
# used within the model itself, since the id won't be available to the
|
82
|
+
# requesting process until `time_quorum(id)`.
|
83
|
+
def append value, node: raise
|
84
|
+
entry = Entry.new(node: node, value: value,
|
85
|
+
time_quorum: node.time_now + dt_quorum,
|
86
|
+
time_replicated: node.time_now + dt_replicated)
|
87
|
+
@store << entry
|
88
|
+
id = @store.size - 1
|
89
|
+
@replication_listeners.each do |actor, action|
|
90
|
+
node.timeline << Spinoza::Event[
|
91
|
+
time: entry.time_replicated, actor: actor, action: action,
|
92
|
+
id: id, node: node, value: value]
|
93
|
+
end
|
94
|
+
id
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the value if the data has been propagated to +node+, otherwise,
|
98
|
+
# returns nil.
|
99
|
+
def get id, node: raise
|
100
|
+
entry = @store[id]
|
101
|
+
entry && entry.readable_at?(node) ? entry.value : nil
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spinoza/common'
|
2
|
+
|
3
|
+
# Base class for all model classes that know about the passage of time.
|
4
|
+
class Spinoza::Model
|
5
|
+
attr_reader :timeline
|
6
|
+
|
7
|
+
def initialize timeline: nil
|
8
|
+
@timeline = timeline
|
9
|
+
end
|
10
|
+
|
11
|
+
def time_now
|
12
|
+
timeline.now
|
13
|
+
end
|
14
|
+
end
|
data/lib/spinoza/system/node.rb
CHANGED
@@ -1,17 +1,66 @@
|
|
1
|
+
require 'spinoza/system/model'
|
1
2
|
require 'spinoza/system/store'
|
2
|
-
require 'spinoza/system/table
|
3
|
+
require 'spinoza/system/table'
|
3
4
|
require 'spinoza/system/lock-manager'
|
4
5
|
|
5
|
-
# A top level entity in the system model, representing
|
6
|
-
# the
|
7
|
-
|
6
|
+
# A top level entity in the system model, representing one node of
|
7
|
+
# the distributed system, typically one per host. Nodes are connected by Links.
|
8
|
+
# Node is stateful.
|
9
|
+
class Spinoza::Node < Spinoza::Model
|
10
|
+
attr_reader :name
|
8
11
|
attr_reader :store
|
9
12
|
attr_reader :lock_manager
|
10
13
|
|
14
|
+
# Outgoing links to peer nodes, as a map `{node => link, ...}`.
|
15
|
+
# Use `links[node].send_message(msg)` to send a message to a peer.
|
16
|
+
# Use Node#recv to handle received messages.
|
17
|
+
attr_reader :links
|
18
|
+
|
19
|
+
@next_name = 0
|
20
|
+
def self.new_name
|
21
|
+
@next_name.tap {@next_name += 1}.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def new_name
|
25
|
+
Spinoza::Node.new_name
|
26
|
+
end
|
27
|
+
|
11
28
|
# Create a node whose store contains the specified tables and which has
|
12
29
|
# its own lock manager.
|
13
|
-
def initialize *
|
14
|
-
|
15
|
-
@
|
30
|
+
def initialize *tables, name: new_name, **rest
|
31
|
+
super **rest
|
32
|
+
@store = Spinoza::Store.new *tables
|
33
|
+
@name = name
|
34
|
+
@lock_manager = Spinoza::LockManager.new
|
35
|
+
@links = {}
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"<Node #{name}>"
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
name.to_s
|
44
|
+
end
|
45
|
+
|
46
|
+
def link dst, **opts
|
47
|
+
require 'spinoza/system/link'
|
48
|
+
|
49
|
+
if links[dst]
|
50
|
+
raise "Link from #{self} to #{dst} already exists."
|
51
|
+
end
|
52
|
+
links[dst] = Spinoza::Link[timeline: timeline, src: self, dst: dst, **opts]
|
53
|
+
end
|
54
|
+
|
55
|
+
def tables
|
56
|
+
store.tables
|
57
|
+
end
|
58
|
+
|
59
|
+
class << self
|
60
|
+
alias [] new
|
61
|
+
end
|
62
|
+
|
63
|
+
def recv msg: raise
|
64
|
+
# Defined in subclasses.
|
16
65
|
end
|
17
66
|
end
|
@@ -23,7 +23,7 @@ module Spinoza
|
|
23
23
|
|
24
24
|
class InsertOperation < Operation
|
25
25
|
attr_reader :row
|
26
|
-
def initialize txn = nil, table:
|
26
|
+
def initialize txn = nil, table: nil, row: nil
|
27
27
|
@txn = txn
|
28
28
|
@table, @row = table, row
|
29
29
|
end
|
@@ -35,11 +35,15 @@ module Spinoza
|
|
35
35
|
def check lm
|
36
36
|
true
|
37
37
|
end
|
38
|
+
|
39
|
+
def inspect
|
40
|
+
"<insert #{table}: #{row}>"
|
41
|
+
end
|
38
42
|
end
|
39
43
|
|
40
44
|
class UpdateOperation < Operation
|
41
45
|
attr_reader :key, :row
|
42
|
-
def initialize txn = nil, table:
|
46
|
+
def initialize txn = nil, table: nil, row: nil, key: nil
|
43
47
|
@txn = txn
|
44
48
|
@table, @key, @row = table, key, row
|
45
49
|
end
|
@@ -51,11 +55,15 @@ module Spinoza
|
|
51
55
|
def check lm
|
52
56
|
lm.has_write_lock? table, key, txn
|
53
57
|
end
|
58
|
+
|
59
|
+
def inspect
|
60
|
+
"<update #{table} #{key}: #{row}>"
|
61
|
+
end
|
54
62
|
end
|
55
63
|
|
56
64
|
class DeleteOperation < Operation
|
57
65
|
attr_reader :key
|
58
|
-
def initialize txn = nil, table:
|
66
|
+
def initialize txn = nil, table: nil, key: nil
|
59
67
|
@txn = txn
|
60
68
|
@table, @key = table, key
|
61
69
|
end
|
@@ -63,17 +71,25 @@ module Spinoza
|
|
63
71
|
def execute ds
|
64
72
|
ds.where(key).delete
|
65
73
|
end
|
74
|
+
|
75
|
+
def inspect
|
76
|
+
"<delete #{table} #{key}>"
|
77
|
+
end
|
66
78
|
end
|
67
79
|
|
68
80
|
class ReadOperation < Operation
|
69
81
|
attr_reader :key
|
70
|
-
def initialize txn = nil, table:
|
82
|
+
def initialize txn = nil, table: nil, key: nil
|
71
83
|
@txn = txn
|
72
84
|
@table, @key = table, key
|
73
85
|
end
|
74
86
|
|
75
87
|
def execute ds
|
76
|
-
ReadResult.new(op: self, val: ds.where(key).
|
88
|
+
ReadResult.new(op: self, val: ds.where(key).first)
|
89
|
+
end
|
90
|
+
|
91
|
+
def inspect
|
92
|
+
"<read #{table} #{key}>"
|
77
93
|
end
|
78
94
|
end
|
79
95
|
|
@@ -81,7 +97,7 @@ module Spinoza
|
|
81
97
|
# particular time.
|
82
98
|
class ReadResult
|
83
99
|
attr_reader :op, :val
|
84
|
-
def initialize op:
|
100
|
+
def initialize op: nil, val: nil
|
85
101
|
@op, @val = op, val
|
86
102
|
end
|
87
103
|
end
|