blodsband 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,233 @@
1
+
2
+ module Blodsband
3
+
4
+ class Riak
5
+
6
+ class Search
7
+
8
+ BOOLS = Set.new(["AND", "OR"])
9
+
10
+ #
11
+ # Create a search instance.
12
+ #
13
+ # @param [URI] url a {::URI} to the HTTP port of a Riak node with Riak Search enabled.
14
+ #
15
+ def initialize(url)
16
+ @url = url
17
+ end
18
+
19
+ # {include:Search#search}
20
+ #
21
+ # @param (see #search)
22
+ #
23
+ # @return [Blodsband::Future<Hash<Symbol, Object>>] an eventual {Hash} <code>:total => number_of_results, :results => [doc1, doc2, ...]</code>
24
+ #
25
+ def asearch(query, options = {})
26
+ options = options.clone
27
+ fields = options.delete(:fields)
28
+ if fields
29
+ asearch(multi_field_query(query, fields), options)
30
+ else
31
+ mult = Multi.new
32
+ mult.add(:resp,
33
+ client(URI.join(@url.to_s,
34
+ "/solr/select")).
35
+ aget(:query => query_with_options(query, options)))
36
+ return(Future.new do
37
+ mult.really_perform
38
+ response = mult.responses[:callback][:resp].response['response']
39
+ rval = {
40
+ :total => response['numFound'],
41
+ :results => response['docs'].map do |x|
42
+ x['fields']
43
+ end
44
+ }
45
+ class << rval
46
+ attr_reader :max_score
47
+ end
48
+ rval.instance_eval do
49
+ @max_score = response['maxScore']
50
+ end
51
+ rval
52
+ end)
53
+ end
54
+ end
55
+
56
+ #
57
+ # Search for documents in Riak Search.
58
+ #
59
+ # @param [String] query the query to use when searching. See {http://wiki.basho.com/Riak-Search---Querying.html} for more information.
60
+ # @param [Hash<Symbol, Object>] options
61
+ # :default_field:: [String] the default field to apply to all terms in the query that lack their own field specifications.
62
+ # :per_page:: [Integer] the maximum number of hits to return.
63
+ # :page:: [Integer] the page number (starting at 1) that you want returned
64
+ # :fields:: [Array<String>] if you want to search for terms without field specification among multiple fields, provide <code>:fields</code> instead of <code>:default_field</code>. This will emulate (slightly) the dismax functionality of Solr by rewriting the query and adding each field you define to each unfielded term.
65
+ #
66
+ # @return [Hash<Symbol, Object>] a {Hash} <code>:total => number_of_results, :results => [doc1, doc2, ...]</code>
67
+ #
68
+ def search(query, options = {})
69
+ asearch(query, options).get
70
+ end
71
+
72
+ #
73
+ # {include:Search#index!}
74
+ #
75
+ # @param (see #index!)
76
+ #
77
+ # @return [Blodsband::Riak::Future<Object>] a future that returns when the indexing is finished.
78
+ #
79
+ def aindex!(document)
80
+ mult = Multi.new
81
+ mult.add(:resp,
82
+ client(URI.join(@url.to_s,
83
+ "/solr/update")).
84
+ apost(:head => { 'Content-Type' => 'text/xml' },
85
+ :body => add_document_xml(document).to_s))
86
+ return(Future.new do
87
+ mult.really_perform
88
+ mult.responses[:callback][:resp].response
89
+ end)
90
+ end
91
+
92
+ #
93
+ # Index a document in Riak Search.
94
+ #
95
+ # @param [Hash] document the object to index. All keys and values will be used to create the indexed document.
96
+ #
97
+ def index!(document)
98
+ aindex!(document).get
99
+ end
100
+
101
+ private
102
+
103
+ #
104
+ # Split a string into an Array tree by parentheses, eg:
105
+ #
106
+ # "a (b c) (d (e f))" => ["a ", ["b c"], ["d ", ["e f"]]]
107
+ #
108
+ def parenthesize(string)
109
+ rval = []
110
+ add_parenthese_parts(rval, string)
111
+ rval
112
+ end
113
+
114
+ #
115
+ # Helper for #parenthesize, pushing next bit of result onto parent
116
+ #
117
+ def add_parenthese_parts(parent, string)
118
+ buffer = ""
119
+ while string.size > 0
120
+ char = string[0]
121
+ string = string[1..-1]
122
+ if char == "("
123
+ parent << buffer if buffer.size > 0
124
+ sub = []
125
+ string = add_parenthese_parts(sub, string)
126
+ parent << sub if sub.size > 0
127
+ buffer = ""
128
+ elsif char == ")"
129
+ parent << buffer if buffer.size > 0
130
+ return string
131
+ else
132
+ buffer << char
133
+ end
134
+ end
135
+ parent << buffer if buffer.size > 0
136
+ return string
137
+ end
138
+
139
+ #
140
+ # Use the result from a parenthesized query to add fields to all terms without fields without breaking
141
+ # the original logical structure of the query. Hopefully. This should not be necessary, but Riak Search has no
142
+ # dismax or similar query parser :/
143
+ #
144
+ def multifieldify(query_part, fields)
145
+ if query_part.is_a?(Array)
146
+ query_part.collect do |sub_part|
147
+ if sub_part.is_a?(Array)
148
+ "(#{multifieldify(sub_part, fields)})"
149
+ elsif sub_part.is_a?(String)
150
+ multifieldify(sub_part, fields)
151
+ end
152
+ end.join(" ")
153
+ elsif query_part.is_a?(String)
154
+ terms = []
155
+ while match = query_part.match(/^\s*(\w+:)?(("[^"]*")|(\S+))\s*(.*)/)
156
+ if match[1]
157
+ terms << "#{match[1]}#{match[2]}"
158
+ elsif BOOLS.include?(match[2])
159
+ terms << match[2]
160
+ else
161
+ terms << "("
162
+ fields.each do |field|
163
+ parts = field.split("^")
164
+ if parts.size == 1
165
+ terms << "#{field}:#{match[2]}"
166
+ elsif parts.size == 2
167
+ terms << "#{parts[0]}:#{match[2]}^#{parts[1]}"
168
+ else
169
+ raise ("Illegal field declaration for #multi_field_query. " +
170
+ "Fields must look like 'fieldname' or 'fieldname^boost'")
171
+ end
172
+ end
173
+ terms << ")"
174
+ end
175
+ query_part = match[5]
176
+ end
177
+ terms.join(" ")
178
+ else
179
+ raise "Illegal query part type: #{query_part.inspect}. Only Array or String allowed."
180
+ end
181
+ end
182
+
183
+ #
184
+ # Make sure that all terms in query that have no fields defined get the provided fields.
185
+ #
186
+ def multi_field_query(query, fields)
187
+ multifieldify(parenthesize(query), fields)
188
+ end
189
+
190
+ def query_with_options(q, options = {})
191
+ query = {
192
+ :q => q,
193
+ :wt => 'json'
194
+ }
195
+ if options[:index]
196
+ query[:index] = options[:index]
197
+ end
198
+ if options[:page]
199
+ query[:start] = (options[:page] - 1) * (options[:per_page] || 10)
200
+ end
201
+ if options[:sort]
202
+ query[:sort] = options[:sort]
203
+ end
204
+ if options[:per_page]
205
+ query[:rows] = options[:per_page]
206
+ end
207
+ if options[:default_field]
208
+ query[:df] = options[:default_field]
209
+ end
210
+ query
211
+ end
212
+
213
+ def client(url)
214
+ http = EventMachine::HttpRequest.new(url)
215
+ http.use EventMachine::Middleware::JSONResponse
216
+ http
217
+ end
218
+
219
+ def add_document_xml(document)
220
+ xml = REXML::Document.new('<add></add>')
221
+ doc_el = xml.root.add_element('doc')
222
+ document.each do |field,value|
223
+ field_el = doc_el.add_element('field', { 'name' => field })
224
+ field_el.text = value
225
+ end
226
+ xml
227
+ end
228
+
229
+ end
230
+
231
+ end
232
+
233
+ end
@@ -0,0 +1,104 @@
1
+
2
+ module Blodsband
3
+
4
+ class Riak
5
+
6
+ #
7
+ # A concurrent set. Named Sset to avoid colliding with ruby standard Set.
8
+ #
9
+ class Sset < List
10
+
11
+ #
12
+ # @param [Object] v a value to check if this set includes.
13
+ #
14
+ # @return [true,false] whether this set includes v.
15
+ #
16
+ def include?(v)
17
+ !bucket.get(key_for(v)).nil?
18
+ end
19
+
20
+ #
21
+ # @param [Array<Object>] values some values to check if they are included in this set.
22
+ #
23
+ # @return [Set<Object>] the values that are included in this set.
24
+ #
25
+ def retain(values)
26
+ map = values.inject({}) do |sum, value|
27
+ sum.merge(key_for(value) => value)
28
+ end
29
+ included = bucket.has_many?(map.keys)
30
+ Set.new(map.reject do |k, v|
31
+ !included.include?(k)
32
+ end.values)
33
+ end
34
+
35
+ #
36
+ # @return [Set<Object>] this {Blodsband::Riak::Sset} as a ruby {::Set}.
37
+ #
38
+ def to_set
39
+ Set.new(to_a)
40
+ end
41
+
42
+ #
43
+ # @param [Blodsband::Riak::Sset] sset a {Blodsband::Riak::Sset} to intersect with this.
44
+ #
45
+ # @return [Set<Object>] a ruby {::Set} containing the intersection of this {Blodsband::Riak::Sset} and the argument.
46
+ #
47
+ def intersect(sset)
48
+ if size > sset.size
49
+ retain(sset.to_a)
50
+ else
51
+ sset.retain(to_a)
52
+ end
53
+ end
54
+
55
+ #
56
+ # @param [Object] v a value to remove from this set.
57
+ #
58
+ def delete(v)
59
+ element_key = bucket.get(key_for(v))
60
+ if element_key.nil?
61
+ nil
62
+ else
63
+ element = Element.find(self, element_key)
64
+ element.delete
65
+ element.value
66
+ end
67
+ end
68
+
69
+ protected
70
+
71
+ def ok_to_add?(new_element)
72
+ bucket.get(key_for(new_element.value)).nil?
73
+ end
74
+
75
+ def backlog_delete(new_element)
76
+ backlog << [:delete_element_pointer,
77
+ key_for(new_element.value)]
78
+ end
79
+
80
+ def backlog_add(new_element)
81
+ backlog << [:add_element_pointer,
82
+ new_element.key,
83
+ key_for(new_element.value)]
84
+ end
85
+
86
+ def delete_element_pointer(key)
87
+ bucket.delete(key)
88
+ end
89
+
90
+ def add_element_pointer(element_key, key)
91
+ bucket.put(key, element_key, :riak_params => {:w => :all})
92
+ end
93
+
94
+ private
95
+
96
+ def key_for(value)
97
+ "#{key}_#{Digest::SHA1.hexdigest(Yajl::Encoder.encode(value)).to_i(16).to_s(36)}"
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+
104
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blodsband
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Martin Bruse
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: em-http-request
16
+ requirement: &70126645594260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70126645594260
25
+ - !ruby/object:Gem::Dependency
26
+ name: em-synchrony
27
+ requirement: &70126645593760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70126645593760
36
+ - !ruby/object:Gem::Dependency
37
+ name: yajl-ruby
38
+ requirement: &70126645593300 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70126645593300
47
+ - !ruby/object:Gem::Dependency
48
+ name: mail
49
+ requirement: &70126645592800 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70126645592800
58
+ description: ! 'An event-machine based riak client with some extra relationship support.
59
+
60
+ '
61
+ email: zondolfin at gmail dot com
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/blodsband/error.rb
67
+ - lib/blodsband/future.rb
68
+ - lib/blodsband/multi.rb
69
+ - lib/blodsband/riak/bucket.rb
70
+ - lib/blodsband/riak/counter.rb
71
+ - lib/blodsband/riak/list.rb
72
+ - lib/blodsband/riak/lock.rb
73
+ - lib/blodsband/riak/map.rb
74
+ - lib/blodsband/riak/mr.rb
75
+ - lib/blodsband/riak/response.rb
76
+ - lib/blodsband/riak/search.rb
77
+ - lib/blodsband/riak/sset.rb
78
+ - lib/blodsband/riak.rb
79
+ - lib/blodsband.rb
80
+ homepage: http://github.com/ProjectDaisy/blodsband
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options:
84
+ - --line-numbers
85
+ - --inline-source
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 1.8.15
103
+ signing_key:
104
+ specification_version: 3
105
+ summary: An event-machine based riak client with some extra relationship support.
106
+ test_files: []
107
+ has_rdoc: true