haplocheirus-client 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Haplocheirus Client is Copyright (C) 2010 Twitter, Inc.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not
4
+ use this work except in compliance with the License. You may obtain a copy of
5
+ the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12
+ License for the specific language governing permissions and limitations under
13
+ the License.
@@ -0,0 +1,70 @@
1
+ Haplocheirus Client
2
+ -------------------
3
+
4
+ Haplocheirus is a highly available, partitioned storage service for
5
+ vectors of heterogenous blobs. This is a Ruby client library for that service.
6
+
7
+
8
+ INSTALLATION
9
+ ------------
10
+
11
+ gem install haplocheirus-client
12
+
13
+
14
+ USAGE
15
+ -----
16
+
17
+ Attach a new client to a Haplo service:
18
+
19
+ >> client = Haplocheirus.new # Defaults to localhost:7666
20
+
21
+ Store a vector under the id of '0':
22
+
23
+ >> client.store '0', ['foo', 'bar']
24
+
25
+ Find the first 2 entries, starting at index 0:
26
+
27
+ >> client.get '0', 0, 2
28
+ ['bar', 'foo'] # note the reverse order
29
+
30
+ Append an entry:
31
+
32
+ >> client.append '0', 'baz'
33
+ >> client.get '0', 0, 3
34
+ ['baz', 'bar', 'foo']
35
+
36
+ Merge that vector with another:
37
+
38
+ >> client.merge '0', ['bat', 'quux']
39
+ >> client.get '0', 0, 5
40
+ ['quux', 'baz', 'bat', 'bar', 'foo']
41
+
42
+
43
+ Remove an entry:
44
+
45
+ >> client.remove 'bat', '0'
46
+ >> client.get '0', 0, 4
47
+ ['quux', 'baz', 'bar', 'foo']
48
+
49
+ Remove a set of entries:
50
+
51
+ >> client.unmerge '0' ['foo', 'bar']
52
+ >> client.get '0', 0, 2
53
+ ['quux', 'baz']
54
+
55
+ Delete the vector:
56
+ >> client.delete '0'
57
+
58
+
59
+ CONTRIBUTORS
60
+ ------------
61
+
62
+ Brandon Mitchell
63
+
64
+
65
+ LICENSE
66
+ -------
67
+
68
+ Copyright (C) 2010 Twitter, Inc.
69
+
70
+ This work is licensed under the Apache License, Version 2.0. See LICENSE for details.
@@ -0,0 +1,40 @@
1
+ require 'thrift'
2
+ require 'thrift_client'
3
+
4
+ require 'haplocheirus/thrift/timeline_store'
5
+ require 'haplocheirus/service'
6
+ require 'haplocheirus/client'
7
+
8
+ module Haplocheirus
9
+ autoload :MockService, 'haplocheirus/mock_service'
10
+
11
+ # Convenience method for:
12
+ #
13
+ # s = Haplocheirus::Service.new(*args)
14
+ # Haplocheirus::Client.new(s)
15
+ #
16
+ def self.new(*args)
17
+ service = Haplocheirus::Service.new(*args)
18
+ Haplocheirus::Client.new(service)
19
+ rescue Haplocheirus::Service::ServiceDisabled
20
+ nil
21
+ end
22
+
23
+ # Nice-to-haves...
24
+ class TimelineSegment #:nodoc:
25
+
26
+ def hit?
27
+ state == TimelineSegmentState::HIT
28
+ end
29
+
30
+ def miss?
31
+ state == TimelineSegmentState::MISS
32
+ end
33
+
34
+ def timeout?
35
+ state == TimelineSegmentState::TIMEOUT
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,177 @@
1
+ class Haplocheirus::Client
2
+
3
+ attr_accessor :service
4
+
5
+ # ==== Parameters
6
+ # service<ThriftClient>
7
+ #
8
+ def initialize(service)
9
+ @service = service
10
+ end
11
+
12
+ # Appends an entry to a set of timelines given by
13
+ # timeline_ids. Appends will do nothing if the timeline has not been
14
+ # created using #store.
15
+ #
16
+ # ==== Parameters
17
+ # entry
18
+ # prefix<String>:: Prefix to prepend to each id
19
+ # timeline_ids<Array[Integer], Integer>
20
+ #
21
+ def append(entry, prefix, *timeline_ids)
22
+ @service.append entry, prefix, timeline_ids.flatten
23
+ end
24
+
25
+ # Removes an entry from a set of timlines given by timeline_ids
26
+ #
27
+ # ==== Paramaters
28
+ # entry
29
+ # prefix<String>:: Prefix to prepend to each id
30
+ # timeline_ids<Array[Integer]>
31
+ #
32
+ def remove(entry, prefix, timeline_ids)
33
+ @service.remove entry, prefix, timeline_ids
34
+ end
35
+
36
+ # Gets entries on the timeline given by timeline_id, optionally
37
+ # beginning at offset and limited by length. Timelines are stored in
38
+ # recency order - an offset of 0 is the latest entry. Returns nil if
39
+ # the timeline_id does not exist.
40
+ #
41
+ # ==== Parameters
42
+ # timeline_id<String>
43
+ # offset<Integer>
44
+ # length<Integer>
45
+ # dedupe<Boolean>:: Optional. Defaults to false.
46
+ #
47
+ # ==== Returns
48
+ # TimelineSegment
49
+ #
50
+ # NOTE: The #size of the returned segment is computed *before* dupes
51
+ # are removed.
52
+ #
53
+ def get(timeline_id, offset, length, dedupe = false)
54
+ @service.get timeline_id, offset, length, dedupe
55
+ rescue Haplocheirus::TimelineStoreException
56
+ nil
57
+ end
58
+
59
+ # ==== Parameters
60
+ # timeline_queries<Array[TimelineQuery]>:: list of query structs
61
+ #
62
+ # ==== Returns
63
+ # Array[TimelineSegment]
64
+ #
65
+ # NOTE: Because there is no identifying information in the returned
66
+ # TimelineSegments, there is a strict ordering relationship between
67
+ # the query and returned segment lists.
68
+ #
69
+ def get_multi(timeline_queries)
70
+ @service.get_multi timeline_queries
71
+ end
72
+
73
+ # Gets a range of entries from the timeline given by timeline_id
74
+ # since from_id (exclusive). This may include entries that were inserted out
75
+ # of order. from_id and to_id are treated as a 8 byte prefixes. If
76
+ # to_id is <= 0, results are not bounded by a maximum value. Returns
77
+ # nil if the timeline_id does not exist.
78
+ #
79
+ # ==== Parameters
80
+ # timeline_id<String>
81
+ # from_id<Integer>
82
+ # to_id<Integer>:: Optional. Defaults to 0.
83
+ # dedupe<Integer>:: Optional. Defaults to false.
84
+ #
85
+ # ==== Returns
86
+ # TimelineSegment
87
+ #
88
+ # NOTE: The #size of the returned segment is computed *before* dupes
89
+ # are removed.
90
+ #
91
+ def range(timeline_id, from_id, to_id = 0, dedupe = false)
92
+ @service.get_range timeline_id, from_id, to_id, dedupe
93
+ rescue Haplocheirus::TimelineStoreException
94
+ nil
95
+ end
96
+
97
+ # Atomically stores a set of entries into a timeline given by
98
+ # timeline_id. The entries are stored in the order provided.
99
+ #
100
+ # ==== Parameters
101
+ # timeline_id<String>
102
+ # entries<Array>
103
+ #
104
+ def store(timeline_id, entries)
105
+ @service.store timeline_id, entries
106
+ end
107
+
108
+ # Returns the intersection of entries with the current contents of
109
+ # the timeline given by timeline_id. Returns nil if the
110
+ # timeline_id does not exist.
111
+ #
112
+ # ==== Parameters
113
+ # timeline_id<String>
114
+ # entries<Array>
115
+ #
116
+ def filter(timeline_id, *entries)
117
+ # FIXME: Expose max search depth
118
+ @service.filter timeline_id, entries.flatten, -1
119
+ rescue Haplocheirus::TimelineStoreException
120
+ []
121
+ end
122
+
123
+ # Merges the entries into the timeline given by timeline_id. Merges
124
+ # will do nothing if the timeline hasn't been created using
125
+ # #store. Entries should be byte arrays of at least 8B per entry.
126
+ #
127
+ # ==== Parameters
128
+ # timeline_id<String>
129
+ # entries<Array>
130
+ #
131
+ def merge(timeline_id, entries)
132
+ @service.merge timeline_id, entries
133
+ end
134
+
135
+ # Merges entries in the timeline given by source_id into the
136
+ # timeline given by dest_id. Does nothing if source_id does not exist.
137
+ #
138
+ # ==== Parameters
139
+ # dest_id<String>
140
+ # source_id<String>
141
+ #
142
+ def merge_indirect(dest_id, source_id)
143
+ @service.merge_indirect dest_id, source_id
144
+ end
145
+
146
+ # Remove a list of entries from a timeline. Unmerges will do nothing
147
+ # if the timeline hasn't been created using #store. Entries should
148
+ # be byte arrays of at least 8B per entry.
149
+ #
150
+ # ==== Parameters
151
+ # timeline_id<String>
152
+ # entries<Array>
153
+ #
154
+ def unmerge(timeline_id, entries)
155
+ @service.unmerge timeline_id, entries
156
+ end
157
+
158
+ # Removes entries in the timeline given by source_id from the
159
+ # timeline given by dest_id. Does nothing if source_id does not exist.
160
+ #
161
+ # ==== Parameters
162
+ # dest_id<String>
163
+ # source_id<String>
164
+ #
165
+ def unmerge_indirect(dest_id, source_id)
166
+ @service.unmerge_indirect dest_id, source_id
167
+ end
168
+
169
+ # Removes the timeline from the backend store
170
+ #
171
+ # ==== Parameters
172
+ # timeline_id<String>
173
+ #
174
+ def delete(timeline_id)
175
+ @service.delete_timeline timeline_id
176
+ end
177
+ end
@@ -0,0 +1,166 @@
1
+ require 'set'
2
+
3
+ class Haplocheirus::MockService #:nodoc:
4
+
5
+ class MockNode < Struct.new(:status_id, :secondary_id, :bitfield)
6
+ include Comparable
7
+
8
+ RETWEET_BIT = 31
9
+
10
+ def self.unpack(string)
11
+ new *string.unpack("QQI")
12
+ end
13
+
14
+ def initialize(*args)
15
+ super
16
+ self.bitfield ||= 0
17
+ end
18
+
19
+ def <=>(other)
20
+ status_id <=> other.status_id
21
+ end
22
+
23
+ def is_share?
24
+ bitfield[RETWEET_BIT] == 1
25
+ end
26
+ end
27
+
28
+ def initialize
29
+ @timelines = {}
30
+ end
31
+
32
+ def append(e, p, is)
33
+ is.each do |i|
34
+ key = p + i.to_s
35
+ next unless @timelines.key?(key)
36
+ # NOTE: This check occurs on read, server-side
37
+ @timelines[key].unshift(e) unless @timelines[key].include?(e)
38
+ end
39
+ end
40
+
41
+ def remove(e, p, is)
42
+ is.each do |i|
43
+ key = p + i.to_s
44
+ next unless @timelines.key?(key)
45
+ @timelines[key].reject! { |i| i == e }
46
+ @timelines.delete(key) if @timelines[key].empty?
47
+ end
48
+ end
49
+
50
+ def get(i, o, l, d = false)
51
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
52
+ t = @timelines[i].to_a[o..(o+l)]
53
+ t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
54
+ t = dedupe(t) if d
55
+ Haplocheirus::TimelineSegment.new(:entries => t.reverse.map{ |tt| tt.dup },
56
+ :size => t.length,
57
+ :state => Haplocheirus::TimelineSegmentState::HIT)
58
+ end
59
+
60
+ def get_multi(qs)
61
+ qs.map do |q|
62
+ begin
63
+ get q.timeline_id, q.offset, q.length, q.dedupe
64
+ rescue Haplocheirus::TimelineStoreException
65
+ Haplocheirus::TimelineSegment.new(:entries => [],
66
+ :size => 0,
67
+ :state => Haplocheirus::TimelineSegmentState::MISS)
68
+ end
69
+ end
70
+ end
71
+
72
+ def get_range(i, f, t = 0, d = false)
73
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
74
+ min = @timelines[i].index([f].pack("Q"))
75
+ max = t > 0 ? @timelines[i].index([t].pack("Q")) : 0
76
+ t = min ? @timelines[i][max..min-1] : @timelines[i]
77
+ t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
78
+ t = dedupe(t) if d
79
+ Haplocheirus::TimelineSegment.new(:entries => t.reverse,
80
+ :size => @timelines[i].length,
81
+ :state => Haplocheirus::TimelineSegmentState::HIT)
82
+ end
83
+
84
+ def store(i, e)
85
+ @timelines[i] = []
86
+ e.reverse.each { |n| append n, '', [i] }
87
+ end
88
+
89
+ def filter(i, e, depth = -1)
90
+ raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
91
+
92
+ haystack = @timelines[i].map do |ea|
93
+ node = MockNode.unpack(ea)
94
+ if node.is_share?
95
+ node.secondary_id
96
+ else
97
+ node.status_id
98
+ end
99
+ end.uniq
100
+
101
+ # FIXME: Only send the first 8 bytes for the needles
102
+ e.select do |packed|
103
+ node = MockNode.unpack(packed)
104
+ haystack.include?(node.status_id)
105
+ end
106
+ end
107
+
108
+ def merge(i, e)
109
+ return unless @timelines.key?(i)
110
+
111
+ e.each do |el|
112
+ o = 0
113
+ o += 1 while @timelines[i][0] <= el
114
+ @timelines[i].insert(o + 1, el)
115
+ end
116
+ end
117
+
118
+ def merge_indirect(d, s)
119
+ merge(d, @timelines[s]) if @timelines.key?(s)
120
+ end
121
+
122
+ def unmerge(i, e)
123
+ return unless @timelines.key?(i)
124
+ @timelines[i].reject! { |o| e.find { |el| MockNode.unpack(el) == MockNode.unpack(o) } }
125
+ end
126
+
127
+ def unmerge_indirect(d, s)
128
+ unmerge(d, @timelines[s]) if @timelines.key?(s)
129
+ end
130
+
131
+ def delete_timeline(i)
132
+ @timelines.delete(i)
133
+ end
134
+
135
+ # This is not part of Haplo, but is useful for test harnesses
136
+ def reset!
137
+ @timelines = {}
138
+ end
139
+
140
+ private
141
+
142
+ def dedupe(t)
143
+ # I can't wait until Array#uniq takes a block...
144
+ seen = { }
145
+ nodes = []
146
+
147
+ t.each do |i|
148
+ node = MockNode.unpack(i)
149
+ next if seen.key?(node.status_id)
150
+
151
+ if node.is_share?
152
+ next if seen.key?(node.secondary_id)
153
+
154
+ seen[node.status_id] = i
155
+ seen[node.secondary_id] = true
156
+ nodes << node.status_id
157
+ else
158
+ seen[node.status_id] = i
159
+ nodes << node.status_id
160
+ end
161
+ end
162
+
163
+ seen.values_at(*nodes.sort!)
164
+ end
165
+
166
+ end