haplocheirus-client 0.6.5
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +13 -0
- data/README.md +70 -0
- data/lib/haplocheirus.rb +40 -0
- data/lib/haplocheirus/client.rb +177 -0
- data/lib/haplocheirus/mock_service.rb +166 -0
- data/lib/haplocheirus/service.rb +24 -0
- data/lib/haplocheirus/thrift/timeline_store.rb +886 -0
- data/lib/haplocheirus/thrift/timeline_store_types.rb +107 -0
- data/lib/haplocheirus/version.rb +3 -0
- metadata +123 -0
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Haplocheirus Client is Copyright (C) 2010 Twitter, Inc.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
4
|
+
use this work except in compliance with the License. You may obtain a copy of
|
5
|
+
the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
11
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
12
|
+
License for the specific language governing permissions and limitations under
|
13
|
+
the License.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
Haplocheirus Client
|
2
|
+
-------------------
|
3
|
+
|
4
|
+
Haplocheirus is a highly available, partitioned storage service for
|
5
|
+
vectors of heterogenous blobs. This is a Ruby client library for that service.
|
6
|
+
|
7
|
+
|
8
|
+
INSTALLATION
|
9
|
+
------------
|
10
|
+
|
11
|
+
gem install haplocheirus-client
|
12
|
+
|
13
|
+
|
14
|
+
USAGE
|
15
|
+
-----
|
16
|
+
|
17
|
+
Attach a new client to a Haplo service:
|
18
|
+
|
19
|
+
>> client = Haplocheirus.new # Defaults to localhost:7666
|
20
|
+
|
21
|
+
Store a vector under the id of '0':
|
22
|
+
|
23
|
+
>> client.store '0', ['foo', 'bar']
|
24
|
+
|
25
|
+
Find the first 2 entries, starting at index 0:
|
26
|
+
|
27
|
+
>> client.get '0', 0, 2
|
28
|
+
['bar', 'foo'] # note the reverse order
|
29
|
+
|
30
|
+
Append an entry:
|
31
|
+
|
32
|
+
>> client.append '0', 'baz'
|
33
|
+
>> client.get '0', 0, 3
|
34
|
+
['baz', 'bar', 'foo']
|
35
|
+
|
36
|
+
Merge that vector with another:
|
37
|
+
|
38
|
+
>> client.merge '0', ['bat', 'quux']
|
39
|
+
>> client.get '0', 0, 5
|
40
|
+
['quux', 'baz', 'bat', 'bar', 'foo']
|
41
|
+
|
42
|
+
|
43
|
+
Remove an entry:
|
44
|
+
|
45
|
+
>> client.remove 'bat', '0'
|
46
|
+
>> client.get '0', 0, 4
|
47
|
+
['quux', 'baz', 'bar', 'foo']
|
48
|
+
|
49
|
+
Remove a set of entries:
|
50
|
+
|
51
|
+
>> client.unmerge '0' ['foo', 'bar']
|
52
|
+
>> client.get '0', 0, 2
|
53
|
+
['quux', 'baz']
|
54
|
+
|
55
|
+
Delete the vector:
|
56
|
+
>> client.delete '0'
|
57
|
+
|
58
|
+
|
59
|
+
CONTRIBUTORS
|
60
|
+
------------
|
61
|
+
|
62
|
+
Brandon Mitchell
|
63
|
+
|
64
|
+
|
65
|
+
LICENSE
|
66
|
+
-------
|
67
|
+
|
68
|
+
Copyright (C) 2010 Twitter, Inc.
|
69
|
+
|
70
|
+
This work is licensed under the Apache License, Version 2.0. See LICENSE for details.
|
data/lib/haplocheirus.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'thrift'
|
2
|
+
require 'thrift_client'
|
3
|
+
|
4
|
+
require 'haplocheirus/thrift/timeline_store'
|
5
|
+
require 'haplocheirus/service'
|
6
|
+
require 'haplocheirus/client'
|
7
|
+
|
8
|
+
module Haplocheirus
|
9
|
+
autoload :MockService, 'haplocheirus/mock_service'
|
10
|
+
|
11
|
+
# Convenience method for:
|
12
|
+
#
|
13
|
+
# s = Haplocheirus::Service.new(*args)
|
14
|
+
# Haplocheirus::Client.new(s)
|
15
|
+
#
|
16
|
+
def self.new(*args)
|
17
|
+
service = Haplocheirus::Service.new(*args)
|
18
|
+
Haplocheirus::Client.new(service)
|
19
|
+
rescue Haplocheirus::Service::ServiceDisabled
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
# Nice-to-haves...
|
24
|
+
class TimelineSegment #:nodoc:
|
25
|
+
|
26
|
+
def hit?
|
27
|
+
state == TimelineSegmentState::HIT
|
28
|
+
end
|
29
|
+
|
30
|
+
def miss?
|
31
|
+
state == TimelineSegmentState::MISS
|
32
|
+
end
|
33
|
+
|
34
|
+
def timeout?
|
35
|
+
state == TimelineSegmentState::TIMEOUT
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
class Haplocheirus::Client
|
2
|
+
|
3
|
+
attr_accessor :service
|
4
|
+
|
5
|
+
# ==== Parameters
|
6
|
+
# service<ThriftClient>
|
7
|
+
#
|
8
|
+
def initialize(service)
|
9
|
+
@service = service
|
10
|
+
end
|
11
|
+
|
12
|
+
# Appends an entry to a set of timelines given by
|
13
|
+
# timeline_ids. Appends will do nothing if the timeline has not been
|
14
|
+
# created using #store.
|
15
|
+
#
|
16
|
+
# ==== Parameters
|
17
|
+
# entry
|
18
|
+
# prefix<String>:: Prefix to prepend to each id
|
19
|
+
# timeline_ids<Array[Integer], Integer>
|
20
|
+
#
|
21
|
+
def append(entry, prefix, *timeline_ids)
|
22
|
+
@service.append entry, prefix, timeline_ids.flatten
|
23
|
+
end
|
24
|
+
|
25
|
+
# Removes an entry from a set of timlines given by timeline_ids
|
26
|
+
#
|
27
|
+
# ==== Paramaters
|
28
|
+
# entry
|
29
|
+
# prefix<String>:: Prefix to prepend to each id
|
30
|
+
# timeline_ids<Array[Integer]>
|
31
|
+
#
|
32
|
+
def remove(entry, prefix, timeline_ids)
|
33
|
+
@service.remove entry, prefix, timeline_ids
|
34
|
+
end
|
35
|
+
|
36
|
+
# Gets entries on the timeline given by timeline_id, optionally
|
37
|
+
# beginning at offset and limited by length. Timelines are stored in
|
38
|
+
# recency order - an offset of 0 is the latest entry. Returns nil if
|
39
|
+
# the timeline_id does not exist.
|
40
|
+
#
|
41
|
+
# ==== Parameters
|
42
|
+
# timeline_id<String>
|
43
|
+
# offset<Integer>
|
44
|
+
# length<Integer>
|
45
|
+
# dedupe<Boolean>:: Optional. Defaults to false.
|
46
|
+
#
|
47
|
+
# ==== Returns
|
48
|
+
# TimelineSegment
|
49
|
+
#
|
50
|
+
# NOTE: The #size of the returned segment is computed *before* dupes
|
51
|
+
# are removed.
|
52
|
+
#
|
53
|
+
def get(timeline_id, offset, length, dedupe = false)
|
54
|
+
@service.get timeline_id, offset, length, dedupe
|
55
|
+
rescue Haplocheirus::TimelineStoreException
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
# ==== Parameters
|
60
|
+
# timeline_queries<Array[TimelineQuery]>:: list of query structs
|
61
|
+
#
|
62
|
+
# ==== Returns
|
63
|
+
# Array[TimelineSegment]
|
64
|
+
#
|
65
|
+
# NOTE: Because there is no identifying information in the returned
|
66
|
+
# TimelineSegments, there is a strict ordering relationship between
|
67
|
+
# the query and returned segment lists.
|
68
|
+
#
|
69
|
+
def get_multi(timeline_queries)
|
70
|
+
@service.get_multi timeline_queries
|
71
|
+
end
|
72
|
+
|
73
|
+
# Gets a range of entries from the timeline given by timeline_id
|
74
|
+
# since from_id (exclusive). This may include entries that were inserted out
|
75
|
+
# of order. from_id and to_id are treated as a 8 byte prefixes. If
|
76
|
+
# to_id is <= 0, results are not bounded by a maximum value. Returns
|
77
|
+
# nil if the timeline_id does not exist.
|
78
|
+
#
|
79
|
+
# ==== Parameters
|
80
|
+
# timeline_id<String>
|
81
|
+
# from_id<Integer>
|
82
|
+
# to_id<Integer>:: Optional. Defaults to 0.
|
83
|
+
# dedupe<Integer>:: Optional. Defaults to false.
|
84
|
+
#
|
85
|
+
# ==== Returns
|
86
|
+
# TimelineSegment
|
87
|
+
#
|
88
|
+
# NOTE: The #size of the returned segment is computed *before* dupes
|
89
|
+
# are removed.
|
90
|
+
#
|
91
|
+
def range(timeline_id, from_id, to_id = 0, dedupe = false)
|
92
|
+
@service.get_range timeline_id, from_id, to_id, dedupe
|
93
|
+
rescue Haplocheirus::TimelineStoreException
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
# Atomically stores a set of entries into a timeline given by
|
98
|
+
# timeline_id. The entries are stored in the order provided.
|
99
|
+
#
|
100
|
+
# ==== Parameters
|
101
|
+
# timeline_id<String>
|
102
|
+
# entries<Array>
|
103
|
+
#
|
104
|
+
def store(timeline_id, entries)
|
105
|
+
@service.store timeline_id, entries
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns the intersection of entries with the current contents of
|
109
|
+
# the timeline given by timeline_id. Returns nil if the
|
110
|
+
# timeline_id does not exist.
|
111
|
+
#
|
112
|
+
# ==== Parameters
|
113
|
+
# timeline_id<String>
|
114
|
+
# entries<Array>
|
115
|
+
#
|
116
|
+
def filter(timeline_id, *entries)
|
117
|
+
# FIXME: Expose max search depth
|
118
|
+
@service.filter timeline_id, entries.flatten, -1
|
119
|
+
rescue Haplocheirus::TimelineStoreException
|
120
|
+
[]
|
121
|
+
end
|
122
|
+
|
123
|
+
# Merges the entries into the timeline given by timeline_id. Merges
|
124
|
+
# will do nothing if the timeline hasn't been created using
|
125
|
+
# #store. Entries should be byte arrays of at least 8B per entry.
|
126
|
+
#
|
127
|
+
# ==== Parameters
|
128
|
+
# timeline_id<String>
|
129
|
+
# entries<Array>
|
130
|
+
#
|
131
|
+
def merge(timeline_id, entries)
|
132
|
+
@service.merge timeline_id, entries
|
133
|
+
end
|
134
|
+
|
135
|
+
# Merges entries in the timeline given by source_id into the
|
136
|
+
# timeline given by dest_id. Does nothing if source_id does not exist.
|
137
|
+
#
|
138
|
+
# ==== Parameters
|
139
|
+
# dest_id<String>
|
140
|
+
# source_id<String>
|
141
|
+
#
|
142
|
+
def merge_indirect(dest_id, source_id)
|
143
|
+
@service.merge_indirect dest_id, source_id
|
144
|
+
end
|
145
|
+
|
146
|
+
# Remove a list of entries from a timeline. Unmerges will do nothing
|
147
|
+
# if the timeline hasn't been created using #store. Entries should
|
148
|
+
# be byte arrays of at least 8B per entry.
|
149
|
+
#
|
150
|
+
# ==== Parameters
|
151
|
+
# timeline_id<String>
|
152
|
+
# entries<Array>
|
153
|
+
#
|
154
|
+
def unmerge(timeline_id, entries)
|
155
|
+
@service.unmerge timeline_id, entries
|
156
|
+
end
|
157
|
+
|
158
|
+
# Removes entries in the timeline given by source_id from the
|
159
|
+
# timeline given by dest_id. Does nothing if source_id does not exist.
|
160
|
+
#
|
161
|
+
# ==== Parameters
|
162
|
+
# dest_id<String>
|
163
|
+
# source_id<String>
|
164
|
+
#
|
165
|
+
def unmerge_indirect(dest_id, source_id)
|
166
|
+
@service.unmerge_indirect dest_id, source_id
|
167
|
+
end
|
168
|
+
|
169
|
+
# Removes the timeline from the backend store
|
170
|
+
#
|
171
|
+
# ==== Parameters
|
172
|
+
# timeline_id<String>
|
173
|
+
#
|
174
|
+
def delete(timeline_id)
|
175
|
+
@service.delete_timeline timeline_id
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Haplocheirus::MockService #:nodoc:
|
4
|
+
|
5
|
+
class MockNode < Struct.new(:status_id, :secondary_id, :bitfield)
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
RETWEET_BIT = 31
|
9
|
+
|
10
|
+
def self.unpack(string)
|
11
|
+
new *string.unpack("QQI")
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
super
|
16
|
+
self.bitfield ||= 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def <=>(other)
|
20
|
+
status_id <=> other.status_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_share?
|
24
|
+
bitfield[RETWEET_BIT] == 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
@timelines = {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def append(e, p, is)
|
33
|
+
is.each do |i|
|
34
|
+
key = p + i.to_s
|
35
|
+
next unless @timelines.key?(key)
|
36
|
+
# NOTE: This check occurs on read, server-side
|
37
|
+
@timelines[key].unshift(e) unless @timelines[key].include?(e)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def remove(e, p, is)
|
42
|
+
is.each do |i|
|
43
|
+
key = p + i.to_s
|
44
|
+
next unless @timelines.key?(key)
|
45
|
+
@timelines[key].reject! { |i| i == e }
|
46
|
+
@timelines.delete(key) if @timelines[key].empty?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def get(i, o, l, d = false)
|
51
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
52
|
+
t = @timelines[i].to_a[o..(o+l)]
|
53
|
+
t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
|
54
|
+
t = dedupe(t) if d
|
55
|
+
Haplocheirus::TimelineSegment.new(:entries => t.reverse.map{ |tt| tt.dup },
|
56
|
+
:size => t.length,
|
57
|
+
:state => Haplocheirus::TimelineSegmentState::HIT)
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_multi(qs)
|
61
|
+
qs.map do |q|
|
62
|
+
begin
|
63
|
+
get q.timeline_id, q.offset, q.length, q.dedupe
|
64
|
+
rescue Haplocheirus::TimelineStoreException
|
65
|
+
Haplocheirus::TimelineSegment.new(:entries => [],
|
66
|
+
:size => 0,
|
67
|
+
:state => Haplocheirus::TimelineSegmentState::MISS)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def get_range(i, f, t = 0, d = false)
|
73
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
74
|
+
min = @timelines[i].index([f].pack("Q"))
|
75
|
+
max = t > 0 ? @timelines[i].index([t].pack("Q")) : 0
|
76
|
+
t = min ? @timelines[i][max..min-1] : @timelines[i]
|
77
|
+
t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
|
78
|
+
t = dedupe(t) if d
|
79
|
+
Haplocheirus::TimelineSegment.new(:entries => t.reverse,
|
80
|
+
:size => @timelines[i].length,
|
81
|
+
:state => Haplocheirus::TimelineSegmentState::HIT)
|
82
|
+
end
|
83
|
+
|
84
|
+
def store(i, e)
|
85
|
+
@timelines[i] = []
|
86
|
+
e.reverse.each { |n| append n, '', [i] }
|
87
|
+
end
|
88
|
+
|
89
|
+
def filter(i, e, depth = -1)
|
90
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
91
|
+
|
92
|
+
haystack = @timelines[i].map do |ea|
|
93
|
+
node = MockNode.unpack(ea)
|
94
|
+
if node.is_share?
|
95
|
+
node.secondary_id
|
96
|
+
else
|
97
|
+
node.status_id
|
98
|
+
end
|
99
|
+
end.uniq
|
100
|
+
|
101
|
+
# FIXME: Only send the first 8 bytes for the needles
|
102
|
+
e.select do |packed|
|
103
|
+
node = MockNode.unpack(packed)
|
104
|
+
haystack.include?(node.status_id)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def merge(i, e)
|
109
|
+
return unless @timelines.key?(i)
|
110
|
+
|
111
|
+
e.each do |el|
|
112
|
+
o = 0
|
113
|
+
o += 1 while @timelines[i][0] <= el
|
114
|
+
@timelines[i].insert(o + 1, el)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def merge_indirect(d, s)
|
119
|
+
merge(d, @timelines[s]) if @timelines.key?(s)
|
120
|
+
end
|
121
|
+
|
122
|
+
def unmerge(i, e)
|
123
|
+
return unless @timelines.key?(i)
|
124
|
+
@timelines[i].reject! { |o| e.find { |el| MockNode.unpack(el) == MockNode.unpack(o) } }
|
125
|
+
end
|
126
|
+
|
127
|
+
def unmerge_indirect(d, s)
|
128
|
+
unmerge(d, @timelines[s]) if @timelines.key?(s)
|
129
|
+
end
|
130
|
+
|
131
|
+
def delete_timeline(i)
|
132
|
+
@timelines.delete(i)
|
133
|
+
end
|
134
|
+
|
135
|
+
# This is not part of Haplo, but is useful for test harnesses
|
136
|
+
def reset!
|
137
|
+
@timelines = {}
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
def dedupe(t)
|
143
|
+
# I can't wait until Array#uniq takes a block...
|
144
|
+
seen = { }
|
145
|
+
nodes = []
|
146
|
+
|
147
|
+
t.each do |i|
|
148
|
+
node = MockNode.unpack(i)
|
149
|
+
next if seen.key?(node.status_id)
|
150
|
+
|
151
|
+
if node.is_share?
|
152
|
+
next if seen.key?(node.secondary_id)
|
153
|
+
|
154
|
+
seen[node.status_id] = i
|
155
|
+
seen[node.secondary_id] = true
|
156
|
+
nodes << node.status_id
|
157
|
+
else
|
158
|
+
seen[node.status_id] = i
|
159
|
+
nodes << node.status_id
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
seen.values_at(*nodes.sort!)
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|