crdt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dd48ff44957feb80db35dde367d7738a00017ba4
4
+ data.tar.gz: 14f4d3d491163faaad568b5dd827e058671a3640
5
+ SHA512:
6
+ metadata.gz: f93a162bb0765597bb6519a41691bcca73bcd5c7ee6f6d67b91f02434a514d5088b11980716fa0a9297eabadb71c9a5d7be370d06255f1a4195562699715d5b0
7
+ data.tar.gz: 512bd46616582d6d910302677d3f9956e261965db82391f5cccaf1413aae37aa813545b2daf081137519c11b7909038b4984beb8c68fb4e9d6c73d98997a0f67
data/.gitignore ADDED
File without changes
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in crdt.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Steven Karas
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # CRDTs for Ruby
2
+
3
+ This gem provides CRDTs for use in other projects. I've favored clarity of code and intent over optimizations, so if you really need the extra performance, you can use these as a guide to understand the underlying concept, and then implement a more performant version.
4
+
5
+ That means no fancy class hierarchy, no performance oriented code, no complex loading path and class space munging.
6
+
7
+ ## What are CRDTs
8
+
9
+ CRDTS are distributed data types that exhibit something called Strong Eventual Consistency. Basically, they're the building blocks that let you build distributed systems.
10
+
11
+ ## How can I learn more
12
+
13
+ Marc Shapiro has cowritten a bunch of papers that cover both the basics of CRDTs and also a useful survey of simple CRDTs. There are video lectures where he explains most of them visually as well.
14
+
15
+ In fact, the names of the data types in this project I've taken from his survey paper.
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ ```ruby
22
+ gem 'crdt'
23
+ ```
24
+
25
+ And then execute:
26
+
27
+ $ bundle
28
+
29
+ Or install it yourself as:
30
+
31
+ $ gem install crdt
32
+
33
+ ## Usage
34
+
35
+ You can require all the CRDTs, or individual ones:
36
+
37
+ ```ruby
38
+ require 'crdt'
39
+ ```
40
+
41
+ Or
42
+
43
+ ```ruby
44
+ require 'crdt/or_set'
45
+ ```
46
+
47
+ ## Contributing
48
+
49
+ 1. Fork it ( https://github.com/stevenkaras/crdt/fork )
50
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
51
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
52
+ 4. Push to the branch (`git push origin my-new-feature`)
53
+ 5. Create a new Pull Request
54
+
55
+ ## Acknowledgements
56
+
57
+ Based on research by Marc Shapiro, et al.
58
+
59
+ Inspired by [aphyr/meangirls](https://github.com/aphyr/meangirls), but not based on (he does some funky class inheritence/loading tricks I don't like).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/crdt.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'crdt/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'crdt'
8
+ spec.version = CRDT::VERSION
9
+
10
+ spec.licenses = ['MIT']
11
+ spec.summary = "Convergent/Commutative Replicated Data Types"
12
+ spec.description = "This library provides naive implementations of common CRDTs"
13
+
14
+ spec.authors = ["Steven Karas"]
15
+ spec.email = 'steven.karas@gmail.com'
16
+ spec.homepage = 'https://rubygems.org/gems/crdt'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ end
data/lib/crdt.rb ADDED
@@ -0,0 +1,15 @@
1
+ # Convergent/Commutative Replicated Data Types
2
+ #
3
+ # TODO: document library inclusion
4
+ # TODO: document usage example
5
+ module CRDT
6
+ end
7
+
8
+ %w{
9
+ pn_counter
10
+ vector_clock
11
+ or_set
12
+ lww_register
13
+ }.each do |lib|
14
+ require File.expand_path("crdt/#{lib}", __DIR__)
15
+ end
@@ -0,0 +1,66 @@
1
+ module CRDT
2
+ # Last Write Wins Register
3
+ #
4
+ # This is a LWWRegister, useful for storing arbitrary data. However, it assumes that your nodes' clocks are synchronized.
5
+ #
6
+ # In practice, this is problematic if you expect changes to take place more often than the clock drift.
7
+ # In my personal experience, clock drift is usually only a few seconds between servers, but can be upwards of several minutes between personal devices such as mobile phones/tablets (especially those on different cellular networks)
8
+ class LWWRegister
9
+ def initialize(tiebreaker = Thread.current.object_id.to_i)
10
+ @tiebreaker = tiebreaker
11
+ @value = nil
12
+ @timestamp = nil
13
+ end
14
+
15
+ attr_accessor :value, :timestamp, :timestamp_nsec, :timestamp_tiebreaker
16
+
17
+ # Set the value of this register, throwing out any previous value
18
+ def set(value)
19
+ @value = value
20
+ time = Time.now
21
+ @timestamp = time.to_i
22
+ @timestamp_nsec = time.nsec
23
+ @timestamp_tiebreaker = @tiebreaker
24
+ end
25
+
26
+ # Get the value in this register
27
+ def get
28
+ @value
29
+ end
30
+
31
+ # Perform a one way merge, potentially bringing in the value from another register
32
+ def merge(other)
33
+ return unless other.timestamp
34
+ return unless other.timestamp >= @timestamp
35
+ return unless other.timestamp_nsec >= @timestamp_nsec
36
+ return unless other.timestamp_tiebreaker >= @timestamp_tiebreaker
37
+ @value = other.value
38
+ @timestamp = other.timestamp
39
+ @timestamp_nsec = other.timestamp_nsec
40
+ @timestamp_tiebreaker = other.timestamp_tiebreaker
41
+ end
42
+
43
+ # Get a hash representation of this register, suitable for serialization to JSON
44
+ def to_h
45
+ return {
46
+ value: @value,
47
+ timestamp: @timestamp,
48
+ timestamp_nsec: @timestamp_nsec,
49
+ timestamp_tiebreaker: @timestamp_tiebreaker,
50
+ tiebreaker: @tiebreaker,
51
+ }
52
+ end
53
+
54
+ # Build a new register from the given hash
55
+ def self.from_h(hash)
56
+ register = LWWRegister.new(hash["tiebreaker"])
57
+
58
+ register.value = hash["value"]
59
+ register.timestamp = hash["timestamp"]
60
+ register.timestamp_nsec = hash["timestamp_nsec"]
61
+ register.timestamp_tiebreaker = hash["timestamp_tiebreaker"]
62
+
63
+ return register
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,106 @@
1
+ module CRDT
2
+ # Observed-Removed Set
3
+ #
4
+ # This CRDT allows items to be added, and removed. The idea being that when an item is added, it is added along with a token. When removing an element, all tokens for that item are marked as removed.
5
+ # This implementation of an ORSet keeps a unified record for each item, where removed tokens are moved from an "observed" set to a "removed" set.
6
+ #
7
+ # Efficiency:
8
+ # Number of items: n, Number of nodes: m, Number of operations: k
9
+ # Space efficiency: O(k)
10
+ # Space efficiency with garbage collection: O(n)
11
+ # Adding an item: O(1)
12
+ # Removing an item: O(k) in the degenerate case, typically closer to O(1)
13
+ # Testing if an item is in the set: O(1)
14
+ class ORSet
15
+ # Create a new, empty set
16
+ def initialize(node_identity = Thread.current.object_id, token_counter = 0)
17
+ @node_identity = node_identity
18
+ @token_counter = token_counter
19
+ @items = {}
20
+ end
21
+
22
+ attr_accessor :items, :token_counter
23
+
24
+ # Check if this item is in the set
25
+ def has?(item)
26
+ tokens = @items[item]
27
+ return false unless tokens
28
+ return ! tokens[:observed].empty?
29
+ end
30
+
31
+ # Add an item to this set
32
+ def add(item)
33
+ # the token in this implementation is "better", since it's easier for us to parse/garbage collect
34
+ token = "#{@node_identity}:#{@token_counter}"
35
+ @token_counter += 1
36
+
37
+ @items[item] ||= { observed: [], removed: []}
38
+ @items[item][:observed] << token
39
+ end
40
+
41
+ # Mark an item as removed from the set
42
+ def remove(item)
43
+ @items[item][:removed] += @items[item][:observed]
44
+ @items[item][:observed] = []
45
+ end
46
+
47
+ # Get a hash representation of this set, suitable for serialization to JSON
48
+ def to_h
49
+ return {
50
+ node_identity: @node_identity,
51
+ token_counter: @token_counter,
52
+ items: @items,
53
+ }
54
+ end
55
+
56
+ # Create a ORSet from a hash, such as that deserialized from JSON
57
+ def self.from_h(hash)
58
+ set = ORSet.new(hash["node_identity"], hash["token_counter"])
59
+
60
+ hash["items"].each do |item, record|
61
+ set.items[item] = {observed: [], removed: []}
62
+ set.items[item][:observed] += record[:observed]
63
+ set.items[item][:removed] += record[:removed]
64
+ end
65
+
66
+ return set
67
+ end
68
+
69
+ # Perform a one-way merge, bringing changes from the other ORSet provided
70
+ #
71
+ # @param other (ORSet)
72
+ def merge(other)
73
+ other.items.each do |item, record|
74
+ @items[item] ||= {observed: [], removed: []}
75
+ @items[item][:observed] += record[:observed]
76
+ @items[item][:removed] += record[:removed]
77
+ @items[item][:observed] -= @items[item][:removed]
78
+ end
79
+ end
80
+
81
+ # garbage collect all tokens originating from the given node that are smaller than the given counter
82
+ #
83
+ # This should be called only when partial consensus can be ascertained for the system
84
+ def gc(node_to_collect, until_counter)
85
+ match_proc = proc do |token|
86
+ node, counter = token.split(":")
87
+ node == node_to_collect && counter.to_i <= until_counter
88
+ end
89
+
90
+ @items.each do |item, record|
91
+ # remove any removal records, since the system has reached consensus up to this node's counter
92
+ record[:removed].reject!(&:match_proc)
93
+
94
+ # squash all the observed tokens into one
95
+ # This is potentially unnecessary so long as at most one active observed token is recorded per node
96
+ tokens = record[:observed].select(&:match_proc).map do |token|
97
+ node, counter = token.split(":")
98
+ [node, counter.to_i]
99
+ end.sort_by(&:last)
100
+ surviving_token = tokens.pop
101
+ record[:observed] -= tokens
102
+ record[:observed] << surviving_token
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,143 @@
1
+ module CRDT
2
+ # A positive negative counter
3
+ #
4
+ # This counter can be incremented up or down. Each node should only adjust it's up and down counters.
5
+ # The current value of the counter is calculated by taking the sum of all the positive counters and subtracting the sum of all the negative counters
6
+ #
7
+ # # Efficiency:
8
+ # value in counter: n, number of nodes: m, number of changes: k
9
+ # Local changes (+/-) are O(1)
10
+ # Merging changes are O(m)
11
+ # The space cost is O(m)
12
+ # The space cost of synchronization is O(m)
13
+ #
14
+ # # Implementation notes:
15
+ # This implementation is a CvRDT. That means it takes
16
+ # This implementation doesn't support garbage collection, although you could add it by removing a node's records, and folding it into a base value.
17
+ class PNCounter
18
+ # @param hash [Hash] a serialized PNCounter, conforming to the format here
19
+ #
20
+ # Expects a Hash in the following format:
21
+ # {
22
+ # "positive" => {
23
+ # "1" => 15,
24
+ # "3" => 4
25
+ # },
26
+ # "negative" => {
27
+ # }
28
+ # }
29
+ def self.from_h(hash)
30
+ counter = PNCounter.new
31
+
32
+ hash["positive"].each do |source, amount|
33
+ counter.increase(amount, source)
34
+ end
35
+ hash["negative"].each do |source, amount|
36
+ counter.decrease(amount, source)
37
+ end
38
+
39
+ return counter
40
+ end
41
+
42
+ # Get a hash representation of this object, which is suitable for serialization to JSON
43
+ def to_h
44
+ return {
45
+ cached_value: @cached_value,
46
+ positive: @positive_counters,
47
+ negative: @negative_counters,
48
+ }
49
+ end
50
+
51
+ # Create a new counter
52
+ #
53
+ # @param this_source Identifier for this node, used for tracking changes to the counter. Defaults to the current Thread's object ID
54
+ def initialize(this_source = Thread.current.object_id)
55
+ @cached_value = 0
56
+ @positive_counters = {}
57
+ @negative_counters = {}
58
+ @this_source = this_source
59
+ end
60
+
61
+ attr_accessor :positive_counters, :negative_counters
62
+
63
+ # Increase this counter by the given amount
64
+ #
65
+ # @param amount [Number] a non-negative amount to decrease this counter by
66
+ def increase(amount, source = nil)
67
+ source ||= @this_source
68
+ positive_counters[source] ||= 0
69
+ positive_counters[source] += amount
70
+ @cached_value += amount
71
+
72
+ return self
73
+ end
74
+
75
+ # Decrease this counter by the given amount
76
+ #
77
+ # @param amount [Number] a non-negative amount to decrease this counter by
78
+ def decrease(amount, source = nil)
79
+ source ||= @this_source
80
+ negative_counters[source] ||= 0
81
+ negative_counters[source] += amount
82
+ @cached_value -= amount
83
+
84
+ return self
85
+ end
86
+
87
+ # Add something to this counter
88
+ #
89
+ # @param other [Number] the amount to add to this counter
90
+ def +=(other)
91
+ if other > 0
92
+ increase(other)
93
+ else
94
+ decrease(- other)
95
+ end
96
+ end
97
+
98
+ # Subtract something from this counter
99
+ #
100
+ # @param other [Number] the amount to subtract from this counter
101
+ def -=(other)
102
+ if other > 0
103
+ decrease(other)
104
+ else
105
+ increase(- other)
106
+ end
107
+ end
108
+
109
+ def value
110
+ @cached_value
111
+ end
112
+
113
+ def to_i
114
+ @cached_value.to_i
115
+ end
116
+
117
+ # Merge the counters from the other PNCounter into this one
118
+ def merge(other)
119
+ other.positive_counters.each do |source, amount|
120
+ current_amount = @positive_counters[source]
121
+ if current_amount
122
+ if current_amount < amount
123
+ @positive_counters[source] = amount
124
+ end
125
+ else
126
+ @positive_counters[source] = amount
127
+ end
128
+ end
129
+ other.negative_counters.each do |source, amount|
130
+ current_amount = @negative_counters[source]
131
+ if current_amount
132
+ if current_amount < amount
133
+ @negative_counters[source] = amount
134
+ end
135
+ else
136
+ @negative_counters[source] = amount
137
+ end
138
+ end
139
+
140
+ return self
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,74 @@
1
+ module CRDT
2
+ # Vector clocks are a loose synchronization primitive
3
+ #
4
+ # Vector clocks can be used as a building block to create other replicated data types, and tracking operations
5
+ #
6
+ # Formally, a vector clock is equivalent to a GCounter that is only incremented by 1, and the aggregate value is ignored
7
+ class VectorClock
8
+ # Create a new vector clock
9
+ #
10
+ # @param default_node Identity of the current node. Defaults to the current Thread object id
11
+ def initialize(default_node = Thread.current.object_id)
12
+ @default_node = default_node
13
+ @clocks = {}
14
+ end
15
+
16
+ attr_accessor :clocks
17
+
18
+ # Increment the clock for the given node by 1
19
+ #
20
+ # @param node The node to update the clock for. Defaults to the default node
21
+ def increment_clock(node = nil)
22
+ node ||= @default_node
23
+ @clocks[node] ||= 0
24
+ @clocks[node] += 1
25
+ end
26
+
27
+ # Get the current clock value for the given node
28
+ #
29
+ # @param node the node to check for. Defaults to the default node
30
+ def value(node = nil)
31
+ node ||= @default_node
32
+ @clocks[node]
33
+ end
34
+
35
+ # Create a new VectorClock from the provided hash. The hash should follow this syntax:
36
+ #
37
+ # {
38
+ # "clocks" => {
39
+ # "1" => 3,
40
+ # "3" => 2
41
+ # }
42
+ # }
43
+ def self.from_h(hash)
44
+ clock = VectorClock.new
45
+
46
+ hash["clocks"].each do |node, value|
47
+ clock.clocks[node] = value
48
+ end
49
+
50
+ return clock
51
+ end
52
+
53
+ # Get a hash representation of this vector clock, suitable for serialization to JSON
54
+ def to_h
55
+ return {
56
+ clocks: @clocks,
57
+ }
58
+ end
59
+
60
+ # Perform a one-way merge, bringing in clock values from the other clock
61
+ def merge(other)
62
+ other.clocks.each do |node, value|
63
+ current_value = @clocks[node]
64
+ if current_value
65
+ if current_value < value
66
+ @clocks[node] = value
67
+ end
68
+ else
69
+ @clocks[node] = value
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,3 @@
1
+ module CRDT
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crdt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Steven Karas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: This library provides naive implementations of common CRDTs
42
+ email: steven.karas@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - ".gitignore"
48
+ - Gemfile
49
+ - LICENSE.txt
50
+ - README.md
51
+ - Rakefile
52
+ - crdt.gemspec
53
+ - lib/crdt.rb
54
+ - lib/crdt/lww_register.rb
55
+ - lib/crdt/or_set.rb
56
+ - lib/crdt/pn_counter.rb
57
+ - lib/crdt/vector_clock.rb
58
+ - lib/crdt/version.rb
59
+ homepage: https://rubygems.org/gems/crdt
60
+ licenses:
61
+ - MIT
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.2.2
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: Convergent/Commutative Replicated Data Types
83
+ test_files: []