cloudsearchable 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +119 -0
- data/Rakefile +11 -0
- data/cloudsearchable.gemspec +50 -0
- data/lib/cloudsearchable.rb +206 -0
- data/lib/cloudsearchable/cloud_search.rb +41 -0
- data/lib/cloudsearchable/domain.rb +159 -0
- data/lib/cloudsearchable/field.rb +56 -0
- data/lib/cloudsearchable/query_chain.rb +218 -0
- data/lib/cloudsearchable/version.rb +3 -0
- data/spec/cloudsearchable/cloud_search_spec.rb +45 -0
- data/spec/cloudsearchable/cloudsearchable_spec.rb +71 -0
- data/spec/cloudsearchable/domain_spec.rb +158 -0
- data/spec/cloudsearchable/field_spec.rb +30 -0
- data/spec/cloudsearchable/query_chain_spec.rb +305 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/test_classes/cloud_searchable_test_class.rb +42 -0
- metadata +153 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
#
|
2
|
+
# Class the represents the schema of a domain in CloudSearch
|
3
|
+
#
|
4
|
+
# In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
|
5
|
+
#
|
6
|
+
module Cloudsearchable
|
7
|
+
class Domain
|
8
|
+
class DomainNotFound < StandardError; end
|
9
|
+
|
10
|
+
attr_reader :name, :fields
|
11
|
+
|
12
|
+
def initialize name
|
13
|
+
@name = "#{Cloudsearchable::Config.domain_prefix}#{name}"
|
14
|
+
@fields = {}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Defines a literal index field.
|
18
|
+
# @param name field name
|
19
|
+
# @param type field type - one of :literal, :uint, or :text
|
20
|
+
# @option options [Boolean] :search_enabled (true)
|
21
|
+
# @option options [Boolean] :return_enabled (true)
|
22
|
+
# @option options [Symbol or Proc] :source The name of a method to call on a record to fetch
|
23
|
+
# the value of the field, or else a Proc to be evaluated in the context of the record.
|
24
|
+
# Defaults to a method with the same name as the field.
|
25
|
+
def add_field(name, type, options = {})
|
26
|
+
field = Field.new(name, type, options)
|
27
|
+
raise "Field #{name} already exists on index #{self.name}" if @fields.has_key?(field.name)
|
28
|
+
@fields[field.name] = field
|
29
|
+
end
|
30
|
+
|
31
|
+
# Creates the domain and defines its index fields in Cloudsearch
|
32
|
+
# Will blindly recreate index fields, no-op if the index already exists
|
33
|
+
def create
|
34
|
+
Cloudsearchable.logger.info "Creating domain #{name}"
|
35
|
+
CloudSearch.client.create_domain(:domain_name => name)
|
36
|
+
|
37
|
+
#Create the fields for the index
|
38
|
+
fields.values.each do |field|
|
39
|
+
Cloudsearchable.logger.info " ...creating #{field.type} field #{name}"
|
40
|
+
field.define_in_domain self.name
|
41
|
+
end
|
42
|
+
Cloudsearchable.logger.info " ...done!"
|
43
|
+
end
|
44
|
+
|
45
|
+
def reindex
|
46
|
+
CloudSearch.client.index_documents(:domain_name => name)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# This queries the status of the domain from Cloudsearch and determines if
|
51
|
+
# the domain needs to be reindexed. If so, it will initiate the reindex and
|
52
|
+
# wait timeout seconds for it to complete. Default is 0. Reindexings tend
|
53
|
+
# to take 15-30 minutes.
|
54
|
+
#
|
55
|
+
# @return true if the changes are applied, false if the domain is still reindexing
|
56
|
+
#
|
57
|
+
def apply_changes(timeout = 0)
|
58
|
+
d = cloudsearch_domain(true)[:domain_status_list][0]
|
59
|
+
if(d[:requires_index_documents])
|
60
|
+
reindex
|
61
|
+
end
|
62
|
+
|
63
|
+
#We'll potentially sleep until the reindex has completed
|
64
|
+
end_time = Time.now + timeout
|
65
|
+
sleep_time = 1
|
66
|
+
loop do
|
67
|
+
d = cloudsearch_domain(true)[:domain_status_list][0]
|
68
|
+
break unless (d[:processing] && Time.now < end_time)
|
69
|
+
|
70
|
+
sleep(sleep_time)
|
71
|
+
sleep_time = [2 * sleep_time, end_time - Time.now].min #exponential backoff
|
72
|
+
end
|
73
|
+
|
74
|
+
!d[:processing] #processing is true as long as it is reindexing
|
75
|
+
end
|
76
|
+
|
77
|
+
# Add or replace the CloudSearch document for a particular version of a record
|
78
|
+
def post_record record, record_id, version
|
79
|
+
CloudSearch.post_sdf doc_endpoint, addition_sdf(record, record_id, version)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Delete the CloudSearch document for a particular record (version must be greater than the last version pushed)
|
83
|
+
def delete_record record_id, version
|
84
|
+
CloudSearch.post_sdf doc_endpoint, deletion_sdf(record_id, version)
|
85
|
+
end
|
86
|
+
|
87
|
+
def execute_query(params)
|
88
|
+
uri = URI("http://#{search_endpoint}/#{CloudSearch::API_VERSION}/search")
|
89
|
+
uri.query = URI.encode_www_form(params)
|
90
|
+
Cloudsearchable.logger.info "CloudSearch execute: #{uri.to_s}"
|
91
|
+
res = Net::HTTP.get_response(uri).body
|
92
|
+
JSON.parse(res)
|
93
|
+
end
|
94
|
+
|
95
|
+
def deletion_sdf record_id, version
|
96
|
+
{
|
97
|
+
:type => "delete",
|
98
|
+
:id => document_id(record_id),
|
99
|
+
:version => version
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def addition_sdf record, record_id, version
|
104
|
+
{
|
105
|
+
:type => "add",
|
106
|
+
:id => document_id(record_id),
|
107
|
+
:version => version,
|
108
|
+
:lang => "en", # FIXME - key off of marketplace
|
109
|
+
:fields => sdf_fields(record)
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
# Generate a documentID that follows the CS restrictions
|
114
|
+
def document_id record_id
|
115
|
+
Digest::MD5.hexdigest record_id.to_s
|
116
|
+
end
|
117
|
+
|
118
|
+
protected
|
119
|
+
|
120
|
+
#
|
121
|
+
# AWS Cloudsearchable Domain
|
122
|
+
#
|
123
|
+
# @param force_reload force a re-fetch from the domain
|
124
|
+
#
|
125
|
+
def cloudsearch_domain(force_reload = false)
|
126
|
+
if(force_reload || !@domain)
|
127
|
+
@domain = CloudSearch.client.describe_domains(:domain_names => [name])
|
128
|
+
else
|
129
|
+
@domain
|
130
|
+
end
|
131
|
+
|
132
|
+
status = @domain[:domain_status_list]
|
133
|
+
if status.nil? || status && status.empty?
|
134
|
+
raise(DomainNotFound, "Cloudsearchable could not find the domain '#{name}' in AWS. Check the name and the availability region.")
|
135
|
+
end
|
136
|
+
|
137
|
+
@domain
|
138
|
+
end
|
139
|
+
|
140
|
+
def sdf_fields record
|
141
|
+
fields.values.inject({}) do |sdf, field|
|
142
|
+
value = field.value_for(record)
|
143
|
+
sdf[field.name] = value if value
|
144
|
+
sdf
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# AWS CloudSearch Domain API to get search endpoint
|
149
|
+
def search_endpoint
|
150
|
+
@search_endpoint ||= cloudsearch_domain[:domain_status_list].first[:search_service][:endpoint]
|
151
|
+
end
|
152
|
+
|
153
|
+
# AWS CloudSearch Domain API to get doc endpoint
|
154
|
+
def doc_endpoint
|
155
|
+
@doc_endpoint ||= cloudsearch_domain[:domain_status_list].first[:doc_service][:endpoint]
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Class the represents the schema of a domain in CloudSearch
|
5
|
+
#
|
6
|
+
# In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
|
7
|
+
#
|
8
|
+
module Cloudsearchable
|
9
|
+
# Represents a single field in a CloudSearch index.
|
10
|
+
#
|
11
|
+
class Field
|
12
|
+
FieldTypes = [:literal, :uint, :text].freeze
|
13
|
+
# Maps the type of field to the name of the options hash when defining the field
|
14
|
+
FieldTypeOptionsNames = {:literal => :literal_options, :uint => :u_int_options, :text => :text_options}.freeze
|
15
|
+
# Maps from field type to the allowed set of options for the field
|
16
|
+
FieldTypeOptionsKeys = {
|
17
|
+
literal: [:default_value, :facet_enabled, :search_enabled, :result_enabled].freeze,
|
18
|
+
uint: [:default_value].freeze,
|
19
|
+
text: [:default_value, :facet_enabled, :result_enabled].freeze
|
20
|
+
}.freeze
|
21
|
+
attr_reader :name, :type, :source, :options
|
22
|
+
|
23
|
+
def initialize(name, type, options = {})
|
24
|
+
raise ArgumentError, "Invalid field type '#{type}'" unless FieldTypes.include?(type)
|
25
|
+
@name = name.to_sym
|
26
|
+
@type = type.to_sym
|
27
|
+
@source = options[:source] || @name
|
28
|
+
@options = options.slice(*FieldTypeOptionsKeys[@type])
|
29
|
+
end
|
30
|
+
|
31
|
+
def value_for record
|
32
|
+
if @source.respond_to?(:call)
|
33
|
+
record.instance_exec &@source
|
34
|
+
else
|
35
|
+
record.send @source
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def define_in_domain domain_name
|
40
|
+
CloudSearch.client.define_index_field(
|
41
|
+
:domain_name => domain_name,
|
42
|
+
:index_field => definition
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def definition
|
47
|
+
# http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/API_IndexField.html
|
48
|
+
{
|
49
|
+
:index_field_name => name.to_s,
|
50
|
+
:index_field_type => type.to_s,
|
51
|
+
FieldTypeOptionsNames[type] => options
|
52
|
+
}
|
53
|
+
end
|
54
|
+
protected :definition
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
module Cloudsearchable
|
2
|
+
class NoClausesError < StandardError; end
|
3
|
+
class WarningInQueryResult < StandardError; end
|
4
|
+
|
5
|
+
#
|
6
|
+
# An object that represents a query to cloud search
|
7
|
+
#
|
8
|
+
class QueryChain
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_reader :domain, :fields
|
12
|
+
|
13
|
+
# options:
|
14
|
+
# - fatal_warnings: if true, raises a WarningInQueryResult exception on warning. Defaults to false
|
15
|
+
def initialize(domain, options = {})
|
16
|
+
@fatal_warnings = options.fetch(:fatal_warnings, false)
|
17
|
+
@domain = domain
|
18
|
+
@q = nil
|
19
|
+
@clauses = []
|
20
|
+
@rank = nil
|
21
|
+
@limit = 100000 # 10 is the CloudSearch default, 2kb limit will probably hit before this will
|
22
|
+
@offset = nil
|
23
|
+
@fields = Set.new
|
24
|
+
@results = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# This method can be called in several different forms.
|
29
|
+
#
|
30
|
+
# To do an equality search on several fields, you can pass a single hash, e.g.:
|
31
|
+
#
|
32
|
+
# Collection.search.where(customer_id: "12345", another_field: "Some value")
|
33
|
+
#
|
34
|
+
# To do a search on a single field, you can pass three parameters in the
|
35
|
+
# form: where(field, op, value)
|
36
|
+
#
|
37
|
+
# Collection.search.where(:customer_id, :==, 12345)
|
38
|
+
#
|
39
|
+
# To search for any of several possible values for a field, use the :any operator:
|
40
|
+
#
|
41
|
+
# Collection.search.where(:product_group, :any, %w{gl_kitchen gl_grocery})
|
42
|
+
#
|
43
|
+
# Equality and inequality operators (:==, :!=, :<, :<=, :>, :>=) are supported on
|
44
|
+
# integers, and equality operators are supported on all scalars.
|
45
|
+
# Currently, special operators against arrays (any and all) are not yet implemented.
|
46
|
+
#
|
47
|
+
def where(field_or_hash, op = nil, value = nil)
|
48
|
+
raise if materialized?
|
49
|
+
|
50
|
+
if field_or_hash.is_a? Hash
|
51
|
+
field_or_hash.each_pair do |k, v|
|
52
|
+
where(k, :==, v)
|
53
|
+
end
|
54
|
+
elsif field_or_hash.is_a? Symbol
|
55
|
+
field = field_or_hash
|
56
|
+
@clauses << if op == :within_range
|
57
|
+
"#{field}:#{value.to_s}"
|
58
|
+
elsif op == :== || op == :eq
|
59
|
+
"#{field}:'#{value.to_s}'"
|
60
|
+
elsif op == :any
|
61
|
+
'(or ' + value.map { |v| "#{field}:'#{v.to_s}'" }.join(' ') + ')'
|
62
|
+
elsif op == :!=
|
63
|
+
"(not #{field}:'#{value.to_s}')"
|
64
|
+
elsif op == :> && value.is_a?(Integer)
|
65
|
+
"#{field}:#{value+1}.."
|
66
|
+
elsif op == :< && value.is_a?(Integer)
|
67
|
+
"#{field}:..#{value-1}"
|
68
|
+
elsif op == :>= && value.is_a?(Integer)
|
69
|
+
"#{field}:#{value}.."
|
70
|
+
elsif op == :<= && value.is_a?(Integer)
|
71
|
+
"#{field}:..#{value}"
|
72
|
+
else
|
73
|
+
raise "op #{op} is unrecognized"
|
74
|
+
end
|
75
|
+
else
|
76
|
+
raise "field_or_hash must be a Hash or Symbol, not a #{field_or_hash.class}"
|
77
|
+
end
|
78
|
+
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# Allows searching by text, overwriting any existing text search.
|
84
|
+
#
|
85
|
+
# Collection.search.text('mens shoes')
|
86
|
+
#
|
87
|
+
# For more examples see http://docs.aws.amazon.com/cloudsearch/latest/developerguide/searching.text.html
|
88
|
+
#
|
89
|
+
def text(text)
|
90
|
+
raise if materialized?
|
91
|
+
@q = text
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Set a rank expression on the query, overwriting any existing expression. Defaults to "-text_relevance"
|
97
|
+
#
|
98
|
+
# Collection.search.order('created_at') # order by the created_at field ascending
|
99
|
+
# Collection.search.order('-created_at') # descending order
|
100
|
+
#
|
101
|
+
# For more examples see http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/tuneranking.html
|
102
|
+
#
|
103
|
+
def order rank_expression
|
104
|
+
raise if materialized?
|
105
|
+
raise "order clause must be a string, not a #{rank_expression.class}" unless rank_expression.is_a? String
|
106
|
+
@rank = rank_expression.to_s
|
107
|
+
self
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Limit the number of results returned from query to the given count.
|
112
|
+
#
|
113
|
+
# Collection.search.limit(25)
|
114
|
+
#
|
115
|
+
def limit count
|
116
|
+
raise if materialized?
|
117
|
+
raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
|
118
|
+
@limit = count.to_i
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
#
|
123
|
+
# Offset the results returned by the query by the given count.
|
124
|
+
#
|
125
|
+
# Collection.search.offset(250)
|
126
|
+
#
|
127
|
+
def offset count
|
128
|
+
raise if materialized?
|
129
|
+
raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
|
130
|
+
@offset = count.to_i
|
131
|
+
self
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
# Adds a one or more fields to the returned result set, e.g.:
|
136
|
+
#
|
137
|
+
# my_query.returning(:collection_id)
|
138
|
+
# my_query.returning(:collection_id, :created_at)
|
139
|
+
#
|
140
|
+
# x = [:collection_id, :created_at]
|
141
|
+
# my_query.returning(x)
|
142
|
+
#
|
143
|
+
def returning(*fields)
|
144
|
+
raise if materialized?
|
145
|
+
|
146
|
+
fields.flatten!
|
147
|
+
fields.each do |f|
|
148
|
+
@fields << f
|
149
|
+
end
|
150
|
+
self
|
151
|
+
end
|
152
|
+
|
153
|
+
#
|
154
|
+
# True if the query has been materialized (e.g. the search has been
|
155
|
+
# executed).
|
156
|
+
#
|
157
|
+
def materialized?
|
158
|
+
!@results.nil?
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Executes the query, getting a result set, returns true if work was done,
|
163
|
+
# false if the query was already materialized.
|
164
|
+
# Raises exception if there was a warning and not in production.
|
165
|
+
#
|
166
|
+
def materialize!
|
167
|
+
return false if materialized?
|
168
|
+
|
169
|
+
@results = domain.execute_query(to_q)
|
170
|
+
|
171
|
+
if @results && @results["info"] && messages = @results["info"]["messages"]
|
172
|
+
messages.each do |message|
|
173
|
+
if message["severity"] == "warning"
|
174
|
+
Cloudsearchable.logger.warn "Cloud Search Warning: #{message["code"]}: #{message["message"]}"
|
175
|
+
raise(WarningInQueryResult, "#{message["code"]}: #{message["message"]}") if @fatal_warnings
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
def found_count
|
184
|
+
materialize!
|
185
|
+
if @results['hits']
|
186
|
+
@results['hits']['found']
|
187
|
+
else
|
188
|
+
raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def each(&block)
|
193
|
+
materialize!
|
194
|
+
if @results['hits']
|
195
|
+
@results['hits']['hit'].each(&block)
|
196
|
+
else
|
197
|
+
raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
#
|
202
|
+
# Turns this Query object into a query string hash that goes on the CloudSearch URL
|
203
|
+
#
|
204
|
+
def to_q
|
205
|
+
raise NoClausesError, "no search terms were specified" if (@clauses.nil? || @clauses.empty?) && (@q.nil? || @q.empty?)
|
206
|
+
|
207
|
+
bq = (@clauses.count > 0) ? "(and #{@clauses.join(' ')})" : @clauses.first
|
208
|
+
{
|
209
|
+
q: @q,
|
210
|
+
bq: bq,
|
211
|
+
rank: @rank,
|
212
|
+
size: @limit,
|
213
|
+
start: @offset,
|
214
|
+
:'return-fields' => @fields.reduce("") { |s,f| s << f.to_s }
|
215
|
+
}
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'test_classes/cloud_searchable_test_class'
|
3
|
+
|
4
|
+
describe CloudSearch do
|
5
|
+
|
6
|
+
let(:item) do
|
7
|
+
CloudSearchableSampleClassFactory.call.new.tap do |instance|
|
8
|
+
instance.destroyed = false
|
9
|
+
instance.lock_version = 1
|
10
|
+
instance.id = 1
|
11
|
+
instance.customer = '1234'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
let(:sdf_document){item.class.cloudsearch_index(:test_index).send :addition_sdf, item, item.id, item.lock_version}
|
15
|
+
let(:endpoint){'https://fake_end_point.amazon.com'}
|
16
|
+
|
17
|
+
class MockHTTPOK < Net::HTTPOK
|
18
|
+
attr :body
|
19
|
+
def initialize body
|
20
|
+
@body = body
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MockHTTPBadRequest < Net::HTTPBadRequest
|
25
|
+
def initialize; end
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:success_response){ MockHTTPOK.new( {"status" => "success", "adds" => 1, "deletes" => 0}.to_json ) }
|
29
|
+
|
30
|
+
it 'json parses the response' do
|
31
|
+
Net::HTTP.any_instance.stub(:start).and_return{success_response}
|
32
|
+
|
33
|
+
response = described_class.post_sdf endpoint, sdf_document
|
34
|
+
response.should eq JSON.parse success_response.body
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'triggers error! on response its no not a Net::HTTPSuccess' do
|
38
|
+
response = MockHTTPBadRequest.new
|
39
|
+
Net::HTTP.any_instance.stub(:start).and_return{response}
|
40
|
+
|
41
|
+
response.should_receive(:error!)
|
42
|
+
described_class.post_sdf endpoint, sdf_document
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|