haplocheirus-client 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Haplocheirus Client is Copyright (C) 2010 Twitter, Inc.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not
4
+ use this work except in compliance with the License. You may obtain a copy of
5
+ the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12
+ License for the specific language governing permissions and limitations under
13
+ the License.
@@ -0,0 +1,70 @@
1
+ Haplocheirus Client
2
+ -------------------
3
+
4
+ Haplocheirus is a highly available, partitioned storage service for
5
+ vectors of heterogenous blobs. This is a Ruby client library for that service.
6
+
7
+
8
+ INSTALLATION
9
+ ------------
10
+
11
+ gem install haplocheirus-client
12
+
13
+
14
+ USAGE
15
+ -----
16
+
17
+ Attach a new client to a Haplo service:
18
+
19
+ >> client = Haplocheirus.new # Defaults to localhost:7666
20
+
21
+ Store a vector under the id of '0':
22
+
23
+ >> client.store '0', ['foo', 'bar']
24
+
25
+ Find the first 2 entries, starting at index 0:
26
+
27
+ >> client.get '0', 0, 2
28
+ ['bar', 'foo'] # note the reverse order
29
+
30
+ Append an entry:
31
+
32
+ >> client.append '0', 'baz'
33
+ >> client.get '0', 0, 3
34
+ ['baz', 'bar', 'foo']
35
+
36
+ Merge that vector with another:
37
+
38
+ >> client.merge '0', ['bat', 'quux']
39
+ >> client.get '0', 0, 5
40
+ ['quux', 'baz', 'bat', 'bar', 'foo']
41
+
42
+
43
+ Remove an entry:
44
+
45
+ >> client.remove 'bat', '0'
46
+ >> client.get '0', 0, 4
47
+ ['quux', 'baz', 'bar', 'foo']
48
+
49
+ Remove a set of entries:
50
+
51
+ >> client.unmerge '0' ['foo', 'bar']
52
+ >> client.get '0', 0, 2
53
+ ['quux', 'baz']
54
+
55
+ Delete the vector:
56
+ >> client.delete '0'
57
+
58
+
59
+ CONTRIBUTORS
60
+ ------------
61
+
62
+ Brandon Mitchell
63
+
64
+
65
+ LICENSE
66
+ -------
67
+
68
+ Copyright (C) 2010 Twitter, Inc.
69
+
70
+ This work is licensed under the Apache License, Version 2.0. See LICENSE for details.
@@ -0,0 +1,40 @@
1
+ require 'thrift'
2
+ require 'thrift_client'
3
+
4
+ require 'haplocheirus/thrift/timeline_store'
5
+ require 'haplocheirus/service'
6
+ require 'haplocheirus/client'
7
+
8
+ module Haplocheirus
9
+ autoload :MockService, 'haplocheirus/mock_service'
10
+
11
+ # Convenience method for:
12
+ #
13
+ # s = Haplocheirus::Service.new(*args)
14
+ # Haplocheirus::Client.new(s)
15
+ #
16
+ def self.new(*args)
17
+ service = Haplocheirus::Service.new(*args)
18
+ Haplocheirus::Client.new(service)
19
+ rescue Haplocheirus::Service::ServiceDisabled
20
+ nil
21
+ end
22
+
23
+ # Nice-to-haves...
24
+ class TimelineSegment #:nodoc:
25
+
26
+ def hit?
27
+ state == TimelineSegmentState::HIT
28
+ end
29
+
30
+ def miss?
31
+ state == TimelineSegmentState::MISS
32
+ end
33
+
34
+ def timeout?
35
+ state == TimelineSegmentState::TIMEOUT
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,177 @@
1
+ class Haplocheirus::Client
2
+
3
+ attr_accessor :service
4
+
5
+ # ==== Parameters
6
+ # service<ThriftClient>
7
+ #
8
+ def initialize(service)
9
+ @service = service
10
+ end
11
+
12
+ # Appends an entry to a set of timelines given by
13
+ # timeline_ids. Appends will do nothing if the timeline has not been
14
+ # created using #store.
15
+ #
16
+ # ==== Parameters
17
+ # entry
18
+ # prefix<String>:: Prefix to prepend to each id
19
+ # timeline_ids<Array[Integer], Integer>
20
+ #
21
+ def append(entry, prefix, *timeline_ids)
22
+ @service.append entry, prefix, timeline_ids.flatten
23
+ end
24
+
25
+ # Removes an entry from a set of timlines given by timeline_ids
26
+ #
27
+ # ==== Paramaters
28
+ # entry
29
+ # prefix<String>:: Prefix to prepend to each id
30
+ # timeline_ids<Array[Integer]>
31
+ #
32
+ def remove(entry, prefix, timeline_ids)
33
+ @service.remove entry, prefix, timeline_ids
34
+ end
35
+
36
+ # Gets entries on the timeline given by timeline_id, optionally
37
+ # beginning at offset and limited by length. Timelines are stored in
38
+ # recency order - an offset of 0 is the latest entry. Returns nil if
39
+ # the timeline_id does not exist.
40
+ #
41
+ # ==== Parameters
42
+ # timeline_id<String>
43
+ # offset<Integer>
44
+ # length<Integer>
45
+ # dedupe<Boolean>:: Optional. Defaults to false.
46
+ #
47
+ # ==== Returns
48
+ # TimelineSegment
49
+ #
50
+ # NOTE: The #size of the returned segment is computed *before* dupes
51
+ # are removed.
52
+ #
53
+ def get(timeline_id, offset, length, dedupe = false)
54
+ @service.get timeline_id, offset, length, dedupe
55
+ rescue Haplocheirus::TimelineStoreException
56
+ nil
57
+ end
58
+
59
+ # ==== Parameters
60
+ # timeline_queries<Array[TimelineQuery]>:: list of query structs
61
+ #
62
+ # ==== Returns
63
+ # Array[TimelineSegment]
64
+ #
65
+ # NOTE: Because there is no identifying information in the returned
66
+ # TimelineSegments, there is a strict ordering relationship between
67
+ # the query and returned segment lists.
68
+ #
69
+ def get_multi(timeline_queries)
70
+ @service.get_multi timeline_queries
71
+ end
72
+
73
+ # Gets a range of entries from the timeline given by timeline_id
74
+ # since from_id (exclusive). This may include entries that were inserted out
75
+ # of order. from_id and to_id are treated as a 8 byte prefixes. If
76
+ # to_id is <= 0, results are not bounded by a maximum value. Returns
77
+ # nil if the timeline_id does not exist.
78
+ #
79
+ # ==== Parameters
80
+ # timeline_id<String>
81
+ # from_id<Integer>
82
+ # to_id<Integer>:: Optional. Defaults to 0.
83
+ # dedupe<Integer>:: Optional. Defaults to false.
84
+ #
85
+ # ==== Returns
86
+ # TimelineSegment
87
+ #
88
+ # NOTE: The #size of the returned segment is computed *before* dupes
89
+ # are removed.
90
+ #
91
+ def range(timeline_id, from_id, to_id = 0, dedupe = false)
92
+ @service.get_range timeline_id, from_id, to_id, dedupe
93
+ rescue Haplocheirus::TimelineStoreException
94
+ nil
95
+ end
96
+
97
+ # Atomically stores a set of entries into a timeline given by
98
+ # timeline_id. The entries are stored in the order provided.
99
+ #
100
+ # ==== Parameters
101
+ # timeline_id<String>
102
+ # entries<Array>
103
+ #
104
+ def store(timeline_id, entries)
105
+ @service.store timeline_id, entries
106
+ end
107
+
108
+ # Returns the intersection of entries with the current contents of
109
+ # the timeline given by timeline_id. Returns nil if the
110
+ # timeline_id does not exist.
111
+ #
112
+ # ==== Parameters
113
+ # timeline_id<String>
114
+ # entries<Array>
115
+ #
116
+ def filter(timeline_id, *entries)
117
+ # FIXME: Expose max search depth
118
+ @service.filter timeline_id, entries.flatten, -1
119
+ rescue Haplocheirus::TimelineStoreException
120
+ []
121
+ end
122
+
123
+ # Merges the entries into the timeline given by timeline_id. Merges
124
+ # will do nothing if the timeline hasn't been created using
125
+ # #store. Entries should be byte arrays of at least 8B per entry.
126
+ #
127
+ # ==== Parameters
128
+ # timeline_id<String>
129
+ # entries<Array>
130
+ #
131
+ def merge(timeline_id, entries)
132
+ @service.merge timeline_id, entries
133
+ end
134
+
135
+ # Merges entries in the timeline given by source_id into the
136
+ # timeline given by dest_id. Does nothing if source_id does not exist.
137
+ #
138
+ # ==== Parameters
139
+ # dest_id<String>
140
+ # source_id<String>
141
+ #
142
+ def merge_indirect(dest_id, source_id)
143
+ @service.merge_indirect dest_id, source_id
144
+ end
145
+
146
+ # Remove a list of entries from a timeline. Unmerges will do nothing
147
+ # if the timeline hasn't been created using #store. Entries should
148
+ # be byte arrays of at least 8B per entry.
149
+ #
150
+ # ==== Parameters
151
+ # timeline_id<String>
152
+ # entries<Array>
153
+ #
154
+ def unmerge(timeline_id, entries)
155
+ @service.unmerge timeline_id, entries
156
+ end
157
+
158
+ # Removes entries in the timeline given by source_id from the
159
+ # timeline given by dest_id. Does nothing if source_id does not exist.
160
+ #
161
+ # ==== Parameters
162
+ # dest_id<String>
163
+ # source_id<String>
164
+ #
165
+ def unmerge_indirect(dest_id, source_id)
166
+ @service.unmerge_indirect dest_id, source_id
167
+ end
168
+
169
+ # Removes the timeline from the backend store
170
+ #
171
+ # ==== Parameters
172
+ # timeline_id<String>
173
+ #
174
+ def delete(timeline_id)
175
+ @service.delete_timeline timeline_id
176
+ end
177
+ end
@@ -0,0 +1,166 @@
1
+ require 'set'
2
+
3
+ class Haplocheirus::MockService #:nodoc:
4
+
5
+ class MockNode < Struct.new(:status_id, :secondary_id, :bitfield)
6
+ include Comparable
7
+
8
+ RETWEET_BIT = 31
9
+
10
+ def self.unpack(string)
11
+ new *string.unpack("QQI")
12
+ end
13
+
14
+ def initialize(*args)
15
+ super
16
+ self.bitfield ||= 0
17
+ end
18
+
19
+ def <=>(other)
20
+ status_id <=> other.status_id
21
+ end
22
+
23
+ def is_share?
24
+ bitfield[RETWEET_BIT] == 1
25
+ end
26
+ end
27
+
28
+ def initialize
29
+ @timelines = {}
30
+ end
31
+
32
+ def append(e, p, is)
33
+ is.each do |i|
34
+ key = p + i.to_s
35
+ next unless @timelines.key?(key)
36
+ # NOTE: This check occurs on read, server-side
37
+ @timelines[key].unshift(e) unless @timelines[key].include?(e)
38
+ end
39
+ end
40
+
41
+ def remove(e, p, is)
42
+ is.each do |i|
43
+ key = p + i.to_s
44
+ next unless @timelines.key?(key)
45
+ @timelines[key].reject! { |i| i == e }
46
+ @timelines.delete(key) if @timelines[key].empty?
47
+ end
48
+ end
49
+
50
+ def get(i, o, l, d = false)
51
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
52
+ t = @timelines[i].to_a[o..(o+l)]
53
+ t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
54
+ t = dedupe(t) if d
55
+ Haplocheirus::TimelineSegment.new(:entries => t.reverse.map{ |tt| tt.dup },
56
+ :size => t.length,
57
+ :state => Haplocheirus::TimelineSegmentState::HIT)
58
+ end
59
+
60
+ def get_multi(qs)
61
+ qs.map do |q|
62
+ begin
63
+ get q.timeline_id, q.offset, q.length, q.dedupe
64
+ rescue Haplocheirus::TimelineStoreException
65
+ Haplocheirus::TimelineSegment.new(:entries => [],
66
+ :size => 0,
67
+ :state => Haplocheirus::TimelineSegmentState::MISS)
68
+ end
69
+ end
70
+ end
71
+
72
+ def get_range(i, f, t = 0, d = false)
73
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
74
+ min = @timelines[i].index([f].pack("Q"))
75
+ max = t > 0 ? @timelines[i].index([t].pack("Q")) : 0
76
+ t = min ? @timelines[i][max..min-1] : @timelines[i]
77
+ t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
78
+ t = dedupe(t) if d
79
+ Haplocheirus::TimelineSegment.new(:entries => t.reverse,
80
+ :size => @timelines[i].length,
81
+ :state => Haplocheirus::TimelineSegmentState::HIT)
82
+ end
83
+
84
+ def store(i, e)
85
+ @timelines[i] = []
86
+ e.reverse.each { |n| append n, '', [i] }
87
+ end
88
+
89
+ def filter(i, e, depth = -1)
90
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
91
+
92
+ haystack = @timelines[i].map do |ea|
93
+ node = MockNode.unpack(ea)
94
+ if node.is_share?
95
+ node.secondary_id
96
+ else
97
+ node.status_id
98
+ end
99
+ end.uniq
100
+
101
+ # FIXME: Only send the first 8 bytes for the needles
102
+ e.select do |packed|
103
+ node = MockNode.unpack(packed)
104
+ haystack.include?(node.status_id)
105
+ end
106
+ end
107
+
108
+ def merge(i, e)
109
+ return unless @timelines.key?(i)
110
+
111
+ e.each do |el|
112
+ o = 0
113
+ o += 1 while @timelines[i][0] <= el
114
+ @timelines[i].insert(o + 1, el)
115
+ end
116
+ end
117
+
118
+ def merge_indirect(d, s)
119
+ merge(d, @timelines[s]) if @timelines.key?(s)
120
+ end
121
+
122
+ def unmerge(i, e)
123
+ return unless @timelines.key?(i)
124
+ @timelines[i].reject! { |o| e.find { |el| MockNode.unpack(el) == MockNode.unpack(o) } }
125
+ end
126
+
127
+ def unmerge_indirect(d, s)
128
+ unmerge(d, @timelines[s]) if @timelines.key?(s)
129
+ end
130
+
131
+ def delete_timeline(i)
132
+ @timelines.delete(i)
133
+ end
134
+
135
+ # This is not part of Haplo, but is useful for test harnesses
136
+ def reset!
137
+ @timelines = {}
138
+ end
139
+
140
+ private
141
+
142
+ def dedupe(t)
143
+ # I can't wait until Array#uniq takes a block...
144
+ seen = { }
145
+ nodes = []
146
+
147
+ t.each do |i|
148
+ node = MockNode.unpack(i)
149
+ next if seen.key?(node.status_id)
150
+
151
+ if node.is_share?
152
+ next if seen.key?(node.secondary_id)
153
+
154
+ seen[node.status_id] = i
155
+ seen[node.secondary_id] = true
156
+ nodes << node.status_id
157
+ else
158
+ seen[node.status_id] = i
159
+ nodes << node.status_id
160
+ end
161
+ end
162
+
163
+ seen.values_at(*nodes.sort!)
164
+ end
165
+
166
+ end