haplocheirus-client 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +13 -0
- data/README.md +70 -0
- data/lib/haplocheirus.rb +40 -0
- data/lib/haplocheirus/client.rb +177 -0
- data/lib/haplocheirus/mock_service.rb +166 -0
- data/lib/haplocheirus/service.rb +24 -0
- data/lib/haplocheirus/thrift/timeline_store.rb +886 -0
- data/lib/haplocheirus/thrift/timeline_store_types.rb +107 -0
- data/lib/haplocheirus/version.rb +3 -0
- metadata +123 -0
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Haplocheirus Client is Copyright (C) 2010 Twitter, Inc.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
4
|
+
use this work except in compliance with the License. You may obtain a copy of
|
5
|
+
the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
11
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
12
|
+
License for the specific language governing permissions and limitations under
|
13
|
+
the License.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
Haplocheirus Client
|
2
|
+
-------------------
|
3
|
+
|
4
|
+
Haplocheirus is a highly available, partitioned storage service for
|
5
|
+
vectors of heterogenous blobs. This is a Ruby client library for that service.
|
6
|
+
|
7
|
+
|
8
|
+
INSTALLATION
|
9
|
+
------------
|
10
|
+
|
11
|
+
gem install haplocheirus-client
|
12
|
+
|
13
|
+
|
14
|
+
USAGE
|
15
|
+
-----
|
16
|
+
|
17
|
+
Attach a new client to a Haplo service:
|
18
|
+
|
19
|
+
>> client = Haplocheirus.new # Defaults to localhost:7666
|
20
|
+
|
21
|
+
Store a vector under the id of '0':
|
22
|
+
|
23
|
+
>> client.store '0', ['foo', 'bar']
|
24
|
+
|
25
|
+
Find the first 2 entries, starting at index 0:
|
26
|
+
|
27
|
+
>> client.get '0', 0, 2
|
28
|
+
['bar', 'foo'] # note the reverse order
|
29
|
+
|
30
|
+
Append an entry:
|
31
|
+
|
32
|
+
>> client.append '0', 'baz'
|
33
|
+
>> client.get '0', 0, 3
|
34
|
+
['baz', 'bar', 'foo']
|
35
|
+
|
36
|
+
Merge that vector with another:
|
37
|
+
|
38
|
+
>> client.merge '0', ['bat', 'quux']
|
39
|
+
>> client.get '0', 0, 5
|
40
|
+
['quux', 'baz', 'bat', 'bar', 'foo']
|
41
|
+
|
42
|
+
|
43
|
+
Remove an entry:
|
44
|
+
|
45
|
+
>> client.remove 'bat', '0'
|
46
|
+
>> client.get '0', 0, 4
|
47
|
+
['quux', 'baz', 'bar', 'foo']
|
48
|
+
|
49
|
+
Remove a set of entries:
|
50
|
+
|
51
|
+
>> client.unmerge '0' ['foo', 'bar']
|
52
|
+
>> client.get '0', 0, 2
|
53
|
+
['quux', 'baz']
|
54
|
+
|
55
|
+
Delete the vector:
|
56
|
+
>> client.delete '0'
|
57
|
+
|
58
|
+
|
59
|
+
CONTRIBUTORS
|
60
|
+
------------
|
61
|
+
|
62
|
+
Brandon Mitchell
|
63
|
+
|
64
|
+
|
65
|
+
LICENSE
|
66
|
+
-------
|
67
|
+
|
68
|
+
Copyright (C) 2010 Twitter, Inc.
|
69
|
+
|
70
|
+
This work is licensed under the Apache License, Version 2.0. See LICENSE for details.
|
data/lib/haplocheirus.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'thrift'
|
2
|
+
require 'thrift_client'
|
3
|
+
|
4
|
+
require 'haplocheirus/thrift/timeline_store'
|
5
|
+
require 'haplocheirus/service'
|
6
|
+
require 'haplocheirus/client'
|
7
|
+
|
8
|
+
module Haplocheirus
|
9
|
+
autoload :MockService, 'haplocheirus/mock_service'
|
10
|
+
|
11
|
+
# Convenience method for:
|
12
|
+
#
|
13
|
+
# s = Haplocheirus::Service.new(*args)
|
14
|
+
# Haplocheirus::Client.new(s)
|
15
|
+
#
|
16
|
+
def self.new(*args)
|
17
|
+
service = Haplocheirus::Service.new(*args)
|
18
|
+
Haplocheirus::Client.new(service)
|
19
|
+
rescue Haplocheirus::Service::ServiceDisabled
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
# Nice-to-haves...
|
24
|
+
class TimelineSegment #:nodoc:
|
25
|
+
|
26
|
+
def hit?
|
27
|
+
state == TimelineSegmentState::HIT
|
28
|
+
end
|
29
|
+
|
30
|
+
def miss?
|
31
|
+
state == TimelineSegmentState::MISS
|
32
|
+
end
|
33
|
+
|
34
|
+
def timeout?
|
35
|
+
state == TimelineSegmentState::TIMEOUT
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
class Haplocheirus::Client
|
2
|
+
|
3
|
+
attr_accessor :service
|
4
|
+
|
5
|
+
# ==== Parameters
|
6
|
+
# service<ThriftClient>
|
7
|
+
#
|
8
|
+
def initialize(service)
|
9
|
+
@service = service
|
10
|
+
end
|
11
|
+
|
12
|
+
# Appends an entry to a set of timelines given by
|
13
|
+
# timeline_ids. Appends will do nothing if the timeline has not been
|
14
|
+
# created using #store.
|
15
|
+
#
|
16
|
+
# ==== Parameters
|
17
|
+
# entry
|
18
|
+
# prefix<String>:: Prefix to prepend to each id
|
19
|
+
# timeline_ids<Array[Integer], Integer>
|
20
|
+
#
|
21
|
+
def append(entry, prefix, *timeline_ids)
|
22
|
+
@service.append entry, prefix, timeline_ids.flatten
|
23
|
+
end
|
24
|
+
|
25
|
+
# Removes an entry from a set of timlines given by timeline_ids
|
26
|
+
#
|
27
|
+
# ==== Paramaters
|
28
|
+
# entry
|
29
|
+
# prefix<String>:: Prefix to prepend to each id
|
30
|
+
# timeline_ids<Array[Integer]>
|
31
|
+
#
|
32
|
+
def remove(entry, prefix, timeline_ids)
|
33
|
+
@service.remove entry, prefix, timeline_ids
|
34
|
+
end
|
35
|
+
|
36
|
+
# Gets entries on the timeline given by timeline_id, optionally
|
37
|
+
# beginning at offset and limited by length. Timelines are stored in
|
38
|
+
# recency order - an offset of 0 is the latest entry. Returns nil if
|
39
|
+
# the timeline_id does not exist.
|
40
|
+
#
|
41
|
+
# ==== Parameters
|
42
|
+
# timeline_id<String>
|
43
|
+
# offset<Integer>
|
44
|
+
# length<Integer>
|
45
|
+
# dedupe<Boolean>:: Optional. Defaults to false.
|
46
|
+
#
|
47
|
+
# ==== Returns
|
48
|
+
# TimelineSegment
|
49
|
+
#
|
50
|
+
# NOTE: The #size of the returned segment is computed *before* dupes
|
51
|
+
# are removed.
|
52
|
+
#
|
53
|
+
def get(timeline_id, offset, length, dedupe = false)
|
54
|
+
@service.get timeline_id, offset, length, dedupe
|
55
|
+
rescue Haplocheirus::TimelineStoreException
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
# ==== Parameters
|
60
|
+
# timeline_queries<Array[TimelineQuery]>:: list of query structs
|
61
|
+
#
|
62
|
+
# ==== Returns
|
63
|
+
# Array[TimelineSegment]
|
64
|
+
#
|
65
|
+
# NOTE: Because there is no identifying information in the returned
|
66
|
+
# TimelineSegments, there is a strict ordering relationship between
|
67
|
+
# the query and returned segment lists.
|
68
|
+
#
|
69
|
+
def get_multi(timeline_queries)
|
70
|
+
@service.get_multi timeline_queries
|
71
|
+
end
|
72
|
+
|
73
|
+
# Gets a range of entries from the timeline given by timeline_id
|
74
|
+
# since from_id (exclusive). This may include entries that were inserted out
|
75
|
+
# of order. from_id and to_id are treated as a 8 byte prefixes. If
|
76
|
+
# to_id is <= 0, results are not bounded by a maximum value. Returns
|
77
|
+
# nil if the timeline_id does not exist.
|
78
|
+
#
|
79
|
+
# ==== Parameters
|
80
|
+
# timeline_id<String>
|
81
|
+
# from_id<Integer>
|
82
|
+
# to_id<Integer>:: Optional. Defaults to 0.
|
83
|
+
# dedupe<Integer>:: Optional. Defaults to false.
|
84
|
+
#
|
85
|
+
# ==== Returns
|
86
|
+
# TimelineSegment
|
87
|
+
#
|
88
|
+
# NOTE: The #size of the returned segment is computed *before* dupes
|
89
|
+
# are removed.
|
90
|
+
#
|
91
|
+
def range(timeline_id, from_id, to_id = 0, dedupe = false)
|
92
|
+
@service.get_range timeline_id, from_id, to_id, dedupe
|
93
|
+
rescue Haplocheirus::TimelineStoreException
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
# Atomically stores a set of entries into a timeline given by
|
98
|
+
# timeline_id. The entries are stored in the order provided.
|
99
|
+
#
|
100
|
+
# ==== Parameters
|
101
|
+
# timeline_id<String>
|
102
|
+
# entries<Array>
|
103
|
+
#
|
104
|
+
def store(timeline_id, entries)
|
105
|
+
@service.store timeline_id, entries
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns the intersection of entries with the current contents of
|
109
|
+
# the timeline given by timeline_id. Returns nil if the
|
110
|
+
# timeline_id does not exist.
|
111
|
+
#
|
112
|
+
# ==== Parameters
|
113
|
+
# timeline_id<String>
|
114
|
+
# entries<Array>
|
115
|
+
#
|
116
|
+
def filter(timeline_id, *entries)
|
117
|
+
# FIXME: Expose max search depth
|
118
|
+
@service.filter timeline_id, entries.flatten, -1
|
119
|
+
rescue Haplocheirus::TimelineStoreException
|
120
|
+
[]
|
121
|
+
end
|
122
|
+
|
123
|
+
# Merges the entries into the timeline given by timeline_id. Merges
|
124
|
+
# will do nothing if the timeline hasn't been created using
|
125
|
+
# #store. Entries should be byte arrays of at least 8B per entry.
|
126
|
+
#
|
127
|
+
# ==== Parameters
|
128
|
+
# timeline_id<String>
|
129
|
+
# entries<Array>
|
130
|
+
#
|
131
|
+
def merge(timeline_id, entries)
|
132
|
+
@service.merge timeline_id, entries
|
133
|
+
end
|
134
|
+
|
135
|
+
# Merges entries in the timeline given by source_id into the
|
136
|
+
# timeline given by dest_id. Does nothing if source_id does not exist.
|
137
|
+
#
|
138
|
+
# ==== Parameters
|
139
|
+
# dest_id<String>
|
140
|
+
# source_id<String>
|
141
|
+
#
|
142
|
+
def merge_indirect(dest_id, source_id)
|
143
|
+
@service.merge_indirect dest_id, source_id
|
144
|
+
end
|
145
|
+
|
146
|
+
# Remove a list of entries from a timeline. Unmerges will do nothing
|
147
|
+
# if the timeline hasn't been created using #store. Entries should
|
148
|
+
# be byte arrays of at least 8B per entry.
|
149
|
+
#
|
150
|
+
# ==== Parameters
|
151
|
+
# timeline_id<String>
|
152
|
+
# entries<Array>
|
153
|
+
#
|
154
|
+
def unmerge(timeline_id, entries)
|
155
|
+
@service.unmerge timeline_id, entries
|
156
|
+
end
|
157
|
+
|
158
|
+
# Removes entries in the timeline given by source_id from the
|
159
|
+
# timeline given by dest_id. Does nothing if source_id does not exist.
|
160
|
+
#
|
161
|
+
# ==== Parameters
|
162
|
+
# dest_id<String>
|
163
|
+
# source_id<String>
|
164
|
+
#
|
165
|
+
def unmerge_indirect(dest_id, source_id)
|
166
|
+
@service.unmerge_indirect dest_id, source_id
|
167
|
+
end
|
168
|
+
|
169
|
+
# Removes the timeline from the backend store
|
170
|
+
#
|
171
|
+
# ==== Parameters
|
172
|
+
# timeline_id<String>
|
173
|
+
#
|
174
|
+
def delete(timeline_id)
|
175
|
+
@service.delete_timeline timeline_id
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Haplocheirus::MockService #:nodoc:
|
4
|
+
|
5
|
+
class MockNode < Struct.new(:status_id, :secondary_id, :bitfield)
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
RETWEET_BIT = 31
|
9
|
+
|
10
|
+
def self.unpack(string)
|
11
|
+
new *string.unpack("QQI")
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
super
|
16
|
+
self.bitfield ||= 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def <=>(other)
|
20
|
+
status_id <=> other.status_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_share?
|
24
|
+
bitfield[RETWEET_BIT] == 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
@timelines = {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def append(e, p, is)
|
33
|
+
is.each do |i|
|
34
|
+
key = p + i.to_s
|
35
|
+
next unless @timelines.key?(key)
|
36
|
+
# NOTE: This check occurs on read, server-side
|
37
|
+
@timelines[key].unshift(e) unless @timelines[key].include?(e)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def remove(e, p, is)
|
42
|
+
is.each do |i|
|
43
|
+
key = p + i.to_s
|
44
|
+
next unless @timelines.key?(key)
|
45
|
+
@timelines[key].reject! { |i| i == e }
|
46
|
+
@timelines.delete(key) if @timelines[key].empty?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def get(i, o, l, d = false)
|
51
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
52
|
+
t = @timelines[i].to_a[o..(o+l)]
|
53
|
+
t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
|
54
|
+
t = dedupe(t) if d
|
55
|
+
Haplocheirus::TimelineSegment.new(:entries => t.reverse.map{ |tt| tt.dup },
|
56
|
+
:size => t.length,
|
57
|
+
:state => Haplocheirus::TimelineSegmentState::HIT)
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_multi(qs)
|
61
|
+
qs.map do |q|
|
62
|
+
begin
|
63
|
+
get q.timeline_id, q.offset, q.length, q.dedupe
|
64
|
+
rescue Haplocheirus::TimelineStoreException
|
65
|
+
Haplocheirus::TimelineSegment.new(:entries => [],
|
66
|
+
:size => 0,
|
67
|
+
:state => Haplocheirus::TimelineSegmentState::MISS)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def get_range(i, f, t = 0, d = false)
|
73
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
74
|
+
min = @timelines[i].index([f].pack("Q"))
|
75
|
+
max = t > 0 ? @timelines[i].index([t].pack("Q")) : 0
|
76
|
+
t = min ? @timelines[i][max..min-1] : @timelines[i]
|
77
|
+
t.sort! { |a, b| a[0,8].unpack("Q") <=> b[0,8].unpack("Q") }
|
78
|
+
t = dedupe(t) if d
|
79
|
+
Haplocheirus::TimelineSegment.new(:entries => t.reverse,
|
80
|
+
:size => @timelines[i].length,
|
81
|
+
:state => Haplocheirus::TimelineSegmentState::HIT)
|
82
|
+
end
|
83
|
+
|
84
|
+
def store(i, e)
|
85
|
+
@timelines[i] = []
|
86
|
+
e.reverse.each { |n| append n, '', [i] }
|
87
|
+
end
|
88
|
+
|
89
|
+
def filter(i, e, depth = -1)
|
90
|
+
raise Haplocheirus::TimelineStoreException unless @timelines.key?(i)
|
91
|
+
|
92
|
+
haystack = @timelines[i].map do |ea|
|
93
|
+
node = MockNode.unpack(ea)
|
94
|
+
if node.is_share?
|
95
|
+
node.secondary_id
|
96
|
+
else
|
97
|
+
node.status_id
|
98
|
+
end
|
99
|
+
end.uniq
|
100
|
+
|
101
|
+
# FIXME: Only send the first 8 bytes for the needles
|
102
|
+
e.select do |packed|
|
103
|
+
node = MockNode.unpack(packed)
|
104
|
+
haystack.include?(node.status_id)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def merge(i, e)
|
109
|
+
return unless @timelines.key?(i)
|
110
|
+
|
111
|
+
e.each do |el|
|
112
|
+
o = 0
|
113
|
+
o += 1 while @timelines[i][0] <= el
|
114
|
+
@timelines[i].insert(o + 1, el)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def merge_indirect(d, s)
|
119
|
+
merge(d, @timelines[s]) if @timelines.key?(s)
|
120
|
+
end
|
121
|
+
|
122
|
+
def unmerge(i, e)
|
123
|
+
return unless @timelines.key?(i)
|
124
|
+
@timelines[i].reject! { |o| e.find { |el| MockNode.unpack(el) == MockNode.unpack(o) } }
|
125
|
+
end
|
126
|
+
|
127
|
+
def unmerge_indirect(d, s)
|
128
|
+
unmerge(d, @timelines[s]) if @timelines.key?(s)
|
129
|
+
end
|
130
|
+
|
131
|
+
def delete_timeline(i)
|
132
|
+
@timelines.delete(i)
|
133
|
+
end
|
134
|
+
|
135
|
+
# This is not part of Haplo, but is useful for test harnesses
|
136
|
+
def reset!
|
137
|
+
@timelines = {}
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
def dedupe(t)
|
143
|
+
# I can't wait until Array#uniq takes a block...
|
144
|
+
seen = { }
|
145
|
+
nodes = []
|
146
|
+
|
147
|
+
t.each do |i|
|
148
|
+
node = MockNode.unpack(i)
|
149
|
+
next if seen.key?(node.status_id)
|
150
|
+
|
151
|
+
if node.is_share?
|
152
|
+
next if seen.key?(node.secondary_id)
|
153
|
+
|
154
|
+
seen[node.status_id] = i
|
155
|
+
seen[node.secondary_id] = true
|
156
|
+
nodes << node.status_id
|
157
|
+
else
|
158
|
+
seen[node.status_id] = i
|
159
|
+
nodes << node.status_id
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
seen.values_at(*nodes.sort!)
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|