cloudsearchable 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +119 -0
- data/Rakefile +11 -0
- data/cloudsearchable.gemspec +50 -0
- data/lib/cloudsearchable.rb +206 -0
- data/lib/cloudsearchable/cloud_search.rb +41 -0
- data/lib/cloudsearchable/domain.rb +159 -0
- data/lib/cloudsearchable/field.rb +56 -0
- data/lib/cloudsearchable/query_chain.rb +218 -0
- data/lib/cloudsearchable/version.rb +3 -0
- data/spec/cloudsearchable/cloud_search_spec.rb +45 -0
- data/spec/cloudsearchable/cloudsearchable_spec.rb +71 -0
- data/spec/cloudsearchable/domain_spec.rb +158 -0
- data/spec/cloudsearchable/field_spec.rb +30 -0
- data/spec/cloudsearchable/query_chain_spec.rb +305 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/test_classes/cloud_searchable_test_class.rb +42 -0
- metadata +153 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
#
|
2
|
+
# Class the represents the schema of a domain in CloudSearch
|
3
|
+
#
|
4
|
+
# In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
|
5
|
+
#
|
6
|
+
module Cloudsearchable
|
7
|
+
class Domain
|
8
|
+
class DomainNotFound < StandardError; end
|
9
|
+
|
10
|
+
attr_reader :name, :fields
|
11
|
+
|
12
|
+
def initialize name
|
13
|
+
@name = "#{Cloudsearchable::Config.domain_prefix}#{name}"
|
14
|
+
@fields = {}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Defines a literal index field.
|
18
|
+
# @param name field name
|
19
|
+
# @param type field type - one of :literal, :uint, or :text
|
20
|
+
# @option options [Boolean] :search_enabled (true)
|
21
|
+
# @option options [Boolean] :return_enabled (true)
|
22
|
+
# @option options [Symbol or Proc] :source The name of a method to call on a record to fetch
|
23
|
+
# the value of the field, or else a Proc to be evaluated in the context of the record.
|
24
|
+
# Defaults to a method with the same name as the field.
|
25
|
+
def add_field(name, type, options = {})
|
26
|
+
field = Field.new(name, type, options)
|
27
|
+
raise "Field #{name} already exists on index #{self.name}" if @fields.has_key?(field.name)
|
28
|
+
@fields[field.name] = field
|
29
|
+
end
|
30
|
+
|
31
|
+
# Creates the domain and defines its index fields in Cloudsearch
|
32
|
+
# Will blindly recreate index fields, no-op if the index already exists
|
33
|
+
def create
|
34
|
+
Cloudsearchable.logger.info "Creating domain #{name}"
|
35
|
+
CloudSearch.client.create_domain(:domain_name => name)
|
36
|
+
|
37
|
+
#Create the fields for the index
|
38
|
+
fields.values.each do |field|
|
39
|
+
Cloudsearchable.logger.info " ...creating #{field.type} field #{name}"
|
40
|
+
field.define_in_domain self.name
|
41
|
+
end
|
42
|
+
Cloudsearchable.logger.info " ...done!"
|
43
|
+
end
|
44
|
+
|
45
|
+
def reindex
|
46
|
+
CloudSearch.client.index_documents(:domain_name => name)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# This queries the status of the domain from Cloudsearch and determines if
|
51
|
+
# the domain needs to be reindexed. If so, it will initiate the reindex and
|
52
|
+
# wait timeout seconds for it to complete. Default is 0. Reindexings tend
|
53
|
+
# to take 15-30 minutes.
|
54
|
+
#
|
55
|
+
# @return true if the changes are applied, false if the domain is still reindexing
|
56
|
+
#
|
57
|
+
def apply_changes(timeout = 0)
|
58
|
+
d = cloudsearch_domain(true)[:domain_status_list][0]
|
59
|
+
if(d[:requires_index_documents])
|
60
|
+
reindex
|
61
|
+
end
|
62
|
+
|
63
|
+
#We'll potentially sleep until the reindex has completed
|
64
|
+
end_time = Time.now + timeout
|
65
|
+
sleep_time = 1
|
66
|
+
loop do
|
67
|
+
d = cloudsearch_domain(true)[:domain_status_list][0]
|
68
|
+
break unless (d[:processing] && Time.now < end_time)
|
69
|
+
|
70
|
+
sleep(sleep_time)
|
71
|
+
sleep_time = [2 * sleep_time, end_time - Time.now].min #exponential backoff
|
72
|
+
end
|
73
|
+
|
74
|
+
!d[:processing] #processing is true as long as it is reindexing
|
75
|
+
end
|
76
|
+
|
77
|
+
# Add or replace the CloudSearch document for a particular version of a record
|
78
|
+
def post_record record, record_id, version
|
79
|
+
CloudSearch.post_sdf doc_endpoint, addition_sdf(record, record_id, version)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Delete the CloudSearch document for a particular record (version must be greater than the last version pushed)
|
83
|
+
def delete_record record_id, version
|
84
|
+
CloudSearch.post_sdf doc_endpoint, deletion_sdf(record_id, version)
|
85
|
+
end
|
86
|
+
|
87
|
+
def execute_query(params)
|
88
|
+
uri = URI("http://#{search_endpoint}/#{CloudSearch::API_VERSION}/search")
|
89
|
+
uri.query = URI.encode_www_form(params)
|
90
|
+
Cloudsearchable.logger.info "CloudSearch execute: #{uri.to_s}"
|
91
|
+
res = Net::HTTP.get_response(uri).body
|
92
|
+
JSON.parse(res)
|
93
|
+
end
|
94
|
+
|
95
|
+
def deletion_sdf record_id, version
|
96
|
+
{
|
97
|
+
:type => "delete",
|
98
|
+
:id => document_id(record_id),
|
99
|
+
:version => version
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def addition_sdf record, record_id, version
|
104
|
+
{
|
105
|
+
:type => "add",
|
106
|
+
:id => document_id(record_id),
|
107
|
+
:version => version,
|
108
|
+
:lang => "en", # FIXME - key off of marketplace
|
109
|
+
:fields => sdf_fields(record)
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
# Generate a documentID that follows the CS restrictions
|
114
|
+
def document_id record_id
|
115
|
+
Digest::MD5.hexdigest record_id.to_s
|
116
|
+
end
|
117
|
+
|
118
|
+
protected
|
119
|
+
|
120
|
+
#
|
121
|
+
# AWS Cloudsearchable Domain
|
122
|
+
#
|
123
|
+
# @param force_reload force a re-fetch from the domain
|
124
|
+
#
|
125
|
+
def cloudsearch_domain(force_reload = false)
|
126
|
+
if(force_reload || !@domain)
|
127
|
+
@domain = CloudSearch.client.describe_domains(:domain_names => [name])
|
128
|
+
else
|
129
|
+
@domain
|
130
|
+
end
|
131
|
+
|
132
|
+
status = @domain[:domain_status_list]
|
133
|
+
if status.nil? || status && status.empty?
|
134
|
+
raise(DomainNotFound, "Cloudsearchable could not find the domain '#{name}' in AWS. Check the name and the availability region.")
|
135
|
+
end
|
136
|
+
|
137
|
+
@domain
|
138
|
+
end
|
139
|
+
|
140
|
+
def sdf_fields record
|
141
|
+
fields.values.inject({}) do |sdf, field|
|
142
|
+
value = field.value_for(record)
|
143
|
+
sdf[field.name] = value if value
|
144
|
+
sdf
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# AWS CloudSearch Domain API to get search endpoint
|
149
|
+
def search_endpoint
|
150
|
+
@search_endpoint ||= cloudsearch_domain[:domain_status_list].first[:search_service][:endpoint]
|
151
|
+
end
|
152
|
+
|
153
|
+
# AWS CloudSearch Domain API to get doc endpoint
|
154
|
+
def doc_endpoint
|
155
|
+
@doc_endpoint ||= cloudsearch_domain[:domain_status_list].first[:doc_service][:endpoint]
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Class the represents the schema of a domain in CloudSearch
|
5
|
+
#
|
6
|
+
# In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
|
7
|
+
#
|
8
|
+
module Cloudsearchable
|
9
|
+
# Represents a single field in a CloudSearch index.
|
10
|
+
#
|
11
|
+
class Field
|
12
|
+
FieldTypes = [:literal, :uint, :text].freeze
|
13
|
+
# Maps the type of field to the name of the options hash when defining the field
|
14
|
+
FieldTypeOptionsNames = {:literal => :literal_options, :uint => :u_int_options, :text => :text_options}.freeze
|
15
|
+
# Maps from field type to the allowed set of options for the field
|
16
|
+
FieldTypeOptionsKeys = {
|
17
|
+
literal: [:default_value, :facet_enabled, :search_enabled, :result_enabled].freeze,
|
18
|
+
uint: [:default_value].freeze,
|
19
|
+
text: [:default_value, :facet_enabled, :result_enabled].freeze
|
20
|
+
}.freeze
|
21
|
+
attr_reader :name, :type, :source, :options
|
22
|
+
|
23
|
+
def initialize(name, type, options = {})
|
24
|
+
raise ArgumentError, "Invalid field type '#{type}'" unless FieldTypes.include?(type)
|
25
|
+
@name = name.to_sym
|
26
|
+
@type = type.to_sym
|
27
|
+
@source = options[:source] || @name
|
28
|
+
@options = options.slice(*FieldTypeOptionsKeys[@type])
|
29
|
+
end
|
30
|
+
|
31
|
+
def value_for record
|
32
|
+
if @source.respond_to?(:call)
|
33
|
+
record.instance_exec &@source
|
34
|
+
else
|
35
|
+
record.send @source
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def define_in_domain domain_name
|
40
|
+
CloudSearch.client.define_index_field(
|
41
|
+
:domain_name => domain_name,
|
42
|
+
:index_field => definition
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def definition
|
47
|
+
# http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/API_IndexField.html
|
48
|
+
{
|
49
|
+
:index_field_name => name.to_s,
|
50
|
+
:index_field_type => type.to_s,
|
51
|
+
FieldTypeOptionsNames[type] => options
|
52
|
+
}
|
53
|
+
end
|
54
|
+
protected :definition
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
module Cloudsearchable
|
2
|
+
class NoClausesError < StandardError; end
|
3
|
+
class WarningInQueryResult < StandardError; end
|
4
|
+
|
5
|
+
#
|
6
|
+
# An object that represents a query to cloud search
|
7
|
+
#
|
8
|
+
class QueryChain
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_reader :domain, :fields
|
12
|
+
|
13
|
+
# options:
|
14
|
+
# - fatal_warnings: if true, raises a WarningInQueryResult exception on warning. Defaults to false
|
15
|
+
def initialize(domain, options = {})
|
16
|
+
@fatal_warnings = options.fetch(:fatal_warnings, false)
|
17
|
+
@domain = domain
|
18
|
+
@q = nil
|
19
|
+
@clauses = []
|
20
|
+
@rank = nil
|
21
|
+
@limit = 100000 # 10 is the CloudSearch default, 2kb limit will probably hit before this will
|
22
|
+
@offset = nil
|
23
|
+
@fields = Set.new
|
24
|
+
@results = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# This method can be called in several different forms.
|
29
|
+
#
|
30
|
+
# To do an equality search on several fields, you can pass a single hash, e.g.:
|
31
|
+
#
|
32
|
+
# Collection.search.where(customer_id: "12345", another_field: "Some value")
|
33
|
+
#
|
34
|
+
# To do a search on a single field, you can pass three parameters in the
|
35
|
+
# form: where(field, op, value)
|
36
|
+
#
|
37
|
+
# Collection.search.where(:customer_id, :==, 12345)
|
38
|
+
#
|
39
|
+
# To search for any of several possible values for a field, use the :any operator:
|
40
|
+
#
|
41
|
+
# Collection.search.where(:product_group, :any, %w{gl_kitchen gl_grocery})
|
42
|
+
#
|
43
|
+
# Equality and inequality operators (:==, :!=, :<, :<=, :>, :>=) are supported on
|
44
|
+
# integers, and equality operators are supported on all scalars.
|
45
|
+
# Currently, special operators against arrays (any and all) are not yet implemented.
|
46
|
+
#
|
47
|
+
def where(field_or_hash, op = nil, value = nil)
|
48
|
+
raise if materialized?
|
49
|
+
|
50
|
+
if field_or_hash.is_a? Hash
|
51
|
+
field_or_hash.each_pair do |k, v|
|
52
|
+
where(k, :==, v)
|
53
|
+
end
|
54
|
+
elsif field_or_hash.is_a? Symbol
|
55
|
+
field = field_or_hash
|
56
|
+
@clauses << if op == :within_range
|
57
|
+
"#{field}:#{value.to_s}"
|
58
|
+
elsif op == :== || op == :eq
|
59
|
+
"#{field}:'#{value.to_s}'"
|
60
|
+
elsif op == :any
|
61
|
+
'(or ' + value.map { |v| "#{field}:'#{v.to_s}'" }.join(' ') + ')'
|
62
|
+
elsif op == :!=
|
63
|
+
"(not #{field}:'#{value.to_s}')"
|
64
|
+
elsif op == :> && value.is_a?(Integer)
|
65
|
+
"#{field}:#{value+1}.."
|
66
|
+
elsif op == :< && value.is_a?(Integer)
|
67
|
+
"#{field}:..#{value-1}"
|
68
|
+
elsif op == :>= && value.is_a?(Integer)
|
69
|
+
"#{field}:#{value}.."
|
70
|
+
elsif op == :<= && value.is_a?(Integer)
|
71
|
+
"#{field}:..#{value}"
|
72
|
+
else
|
73
|
+
raise "op #{op} is unrecognized"
|
74
|
+
end
|
75
|
+
else
|
76
|
+
raise "field_or_hash must be a Hash or Symbol, not a #{field_or_hash.class}"
|
77
|
+
end
|
78
|
+
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# Allows searching by text, overwriting any existing text search.
|
84
|
+
#
|
85
|
+
# Collection.search.text('mens shoes')
|
86
|
+
#
|
87
|
+
# For more examples see http://docs.aws.amazon.com/cloudsearch/latest/developerguide/searching.text.html
|
88
|
+
#
|
89
|
+
def text(text)
|
90
|
+
raise if materialized?
|
91
|
+
@q = text
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Set a rank expression on the query, overwriting any existing expression. Defaults to "-text_relevance"
|
97
|
+
#
|
98
|
+
# Collection.search.order('created_at') # order by the created_at field ascending
|
99
|
+
# Collection.search.order('-created_at') # descending order
|
100
|
+
#
|
101
|
+
# For more examples see http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/tuneranking.html
|
102
|
+
#
|
103
|
+
def order rank_expression
|
104
|
+
raise if materialized?
|
105
|
+
raise "order clause must be a string, not a #{rank_expression.class}" unless rank_expression.is_a? String
|
106
|
+
@rank = rank_expression.to_s
|
107
|
+
self
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Limit the number of results returned from query to the given count.
|
112
|
+
#
|
113
|
+
# Collection.search.limit(25)
|
114
|
+
#
|
115
|
+
def limit count
|
116
|
+
raise if materialized?
|
117
|
+
raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
|
118
|
+
@limit = count.to_i
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
#
|
123
|
+
# Offset the results returned by the query by the given count.
|
124
|
+
#
|
125
|
+
# Collection.search.offset(250)
|
126
|
+
#
|
127
|
+
def offset count
|
128
|
+
raise if materialized?
|
129
|
+
raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
|
130
|
+
@offset = count.to_i
|
131
|
+
self
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
# Adds a one or more fields to the returned result set, e.g.:
|
136
|
+
#
|
137
|
+
# my_query.returning(:collection_id)
|
138
|
+
# my_query.returning(:collection_id, :created_at)
|
139
|
+
#
|
140
|
+
# x = [:collection_id, :created_at]
|
141
|
+
# my_query.returning(x)
|
142
|
+
#
|
143
|
+
def returning(*fields)
|
144
|
+
raise if materialized?
|
145
|
+
|
146
|
+
fields.flatten!
|
147
|
+
fields.each do |f|
|
148
|
+
@fields << f
|
149
|
+
end
|
150
|
+
self
|
151
|
+
end
|
152
|
+
|
153
|
+
#
|
154
|
+
# True if the query has been materialized (e.g. the search has been
|
155
|
+
# executed).
|
156
|
+
#
|
157
|
+
def materialized?
|
158
|
+
!@results.nil?
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Executes the query, getting a result set, returns true if work was done,
|
163
|
+
# false if the query was already materialized.
|
164
|
+
# Raises exception if there was a warning and not in production.
|
165
|
+
#
|
166
|
+
def materialize!
|
167
|
+
return false if materialized?
|
168
|
+
|
169
|
+
@results = domain.execute_query(to_q)
|
170
|
+
|
171
|
+
if @results && @results["info"] && messages = @results["info"]["messages"]
|
172
|
+
messages.each do |message|
|
173
|
+
if message["severity"] == "warning"
|
174
|
+
Cloudsearchable.logger.warn "Cloud Search Warning: #{message["code"]}: #{message["message"]}"
|
175
|
+
raise(WarningInQueryResult, "#{message["code"]}: #{message["message"]}") if @fatal_warnings
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
def found_count
|
184
|
+
materialize!
|
185
|
+
if @results['hits']
|
186
|
+
@results['hits']['found']
|
187
|
+
else
|
188
|
+
raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def each(&block)
|
193
|
+
materialize!
|
194
|
+
if @results['hits']
|
195
|
+
@results['hits']['hit'].each(&block)
|
196
|
+
else
|
197
|
+
raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
#
|
202
|
+
# Turns this Query object into a query string hash that goes on the CloudSearch URL
|
203
|
+
#
|
204
|
+
def to_q
|
205
|
+
raise NoClausesError, "no search terms were specified" if (@clauses.nil? || @clauses.empty?) && (@q.nil? || @q.empty?)
|
206
|
+
|
207
|
+
bq = (@clauses.count > 0) ? "(and #{@clauses.join(' ')})" : @clauses.first
|
208
|
+
{
|
209
|
+
q: @q,
|
210
|
+
bq: bq,
|
211
|
+
rank: @rank,
|
212
|
+
size: @limit,
|
213
|
+
start: @offset,
|
214
|
+
:'return-fields' => @fields.reduce("") { |s,f| s << f.to_s }
|
215
|
+
}
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'test_classes/cloud_searchable_test_class'
|
3
|
+
|
4
|
+
describe CloudSearch do
|
5
|
+
|
6
|
+
let(:item) do
|
7
|
+
CloudSearchableSampleClassFactory.call.new.tap do |instance|
|
8
|
+
instance.destroyed = false
|
9
|
+
instance.lock_version = 1
|
10
|
+
instance.id = 1
|
11
|
+
instance.customer = '1234'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
let(:sdf_document){item.class.cloudsearch_index(:test_index).send :addition_sdf, item, item.id, item.lock_version}
|
15
|
+
let(:endpoint){'https://fake_end_point.amazon.com'}
|
16
|
+
|
17
|
+
class MockHTTPOK < Net::HTTPOK
|
18
|
+
attr :body
|
19
|
+
def initialize body
|
20
|
+
@body = body
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MockHTTPBadRequest < Net::HTTPBadRequest
|
25
|
+
def initialize; end
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:success_response){ MockHTTPOK.new( {"status" => "success", "adds" => 1, "deletes" => 0}.to_json ) }
|
29
|
+
|
30
|
+
it 'json parses the response' do
|
31
|
+
Net::HTTP.any_instance.stub(:start).and_return{success_response}
|
32
|
+
|
33
|
+
response = described_class.post_sdf endpoint, sdf_document
|
34
|
+
response.should eq JSON.parse success_response.body
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'triggers error! on response its no not a Net::HTTPSuccess' do
|
38
|
+
response = MockHTTPBadRequest.new
|
39
|
+
Net::HTTP.any_instance.stub(:start).and_return{response}
|
40
|
+
|
41
|
+
response.should_receive(:error!)
|
42
|
+
described_class.post_sdf endpoint, sdf_document
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|