cloudsearchable 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,159 @@
1
+ #
2
+ # Class the represents the schema of a domain in CloudSearch
3
+ #
4
+ # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
5
+ #
6
+ module Cloudsearchable
7
+ class Domain
8
+ class DomainNotFound < StandardError; end
9
+
10
+ attr_reader :name, :fields
11
+
12
+ def initialize name
13
+ @name = "#{Cloudsearchable::Config.domain_prefix}#{name}"
14
+ @fields = {}
15
+ end
16
+
17
+ # Defines a literal index field.
18
+ # @param name field name
19
+ # @param type field type - one of :literal, :uint, or :text
20
+ # @option options [Boolean] :search_enabled (true)
21
+ # @option options [Boolean] :return_enabled (true)
22
+ # @option options [Symbol or Proc] :source The name of a method to call on a record to fetch
23
+ # the value of the field, or else a Proc to be evaluated in the context of the record.
24
+ # Defaults to a method with the same name as the field.
25
+ def add_field(name, type, options = {})
26
+ field = Field.new(name, type, options)
27
+ raise "Field #{name} already exists on index #{self.name}" if @fields.has_key?(field.name)
28
+ @fields[field.name] = field
29
+ end
30
+
31
+ # Creates the domain and defines its index fields in Cloudsearch
32
+ # Will blindly recreate index fields, no-op if the index already exists
33
+ def create
34
+ Cloudsearchable.logger.info "Creating domain #{name}"
35
+ CloudSearch.client.create_domain(:domain_name => name)
36
+
37
+ #Create the fields for the index
38
+ fields.values.each do |field|
39
+ Cloudsearchable.logger.info " ...creating #{field.type} field #{name}"
40
+ field.define_in_domain self.name
41
+ end
42
+ Cloudsearchable.logger.info " ...done!"
43
+ end
44
+
45
+ def reindex
46
+ CloudSearch.client.index_documents(:domain_name => name)
47
+ end
48
+
49
+ #
50
+ # This queries the status of the domain from Cloudsearch and determines if
51
+ # the domain needs to be reindexed. If so, it will initiate the reindex and
52
+ # wait timeout seconds for it to complete. Default is 0. Reindexings tend
53
+ # to take 15-30 minutes.
54
+ #
55
+ # @return true if the changes are applied, false if the domain is still reindexing
56
+ #
57
+ def apply_changes(timeout = 0)
58
+ d = cloudsearch_domain(true)[:domain_status_list][0]
59
+ if(d[:requires_index_documents])
60
+ reindex
61
+ end
62
+
63
+ #We'll potentially sleep until the reindex has completed
64
+ end_time = Time.now + timeout
65
+ sleep_time = 1
66
+ loop do
67
+ d = cloudsearch_domain(true)[:domain_status_list][0]
68
+ break unless (d[:processing] && Time.now < end_time)
69
+
70
+ sleep(sleep_time)
71
+ sleep_time = [2 * sleep_time, end_time - Time.now].min #exponential backoff
72
+ end
73
+
74
+ !d[:processing] #processing is true as long as it is reindexing
75
+ end
76
+
77
+ # Add or replace the CloudSearch document for a particular version of a record
78
+ def post_record record, record_id, version
79
+ CloudSearch.post_sdf doc_endpoint, addition_sdf(record, record_id, version)
80
+ end
81
+
82
+ # Delete the CloudSearch document for a particular record (version must be greater than the last version pushed)
83
+ def delete_record record_id, version
84
+ CloudSearch.post_sdf doc_endpoint, deletion_sdf(record_id, version)
85
+ end
86
+
87
+ def execute_query(params)
88
+ uri = URI("http://#{search_endpoint}/#{CloudSearch::API_VERSION}/search")
89
+ uri.query = URI.encode_www_form(params)
90
+ Cloudsearchable.logger.info "CloudSearch execute: #{uri.to_s}"
91
+ res = Net::HTTP.get_response(uri).body
92
+ JSON.parse(res)
93
+ end
94
+
95
+ def deletion_sdf record_id, version
96
+ {
97
+ :type => "delete",
98
+ :id => document_id(record_id),
99
+ :version => version
100
+ }
101
+ end
102
+
103
+ def addition_sdf record, record_id, version
104
+ {
105
+ :type => "add",
106
+ :id => document_id(record_id),
107
+ :version => version,
108
+ :lang => "en", # FIXME - key off of marketplace
109
+ :fields => sdf_fields(record)
110
+ }
111
+ end
112
+
113
+ # Generate a documentID that follows the CS restrictions
114
+ def document_id record_id
115
+ Digest::MD5.hexdigest record_id.to_s
116
+ end
117
+
118
+ protected
119
+
120
+ #
121
+ # AWS Cloudsearchable Domain
122
+ #
123
+ # @param force_reload force a re-fetch from the domain
124
+ #
125
+ def cloudsearch_domain(force_reload = false)
126
+ if(force_reload || !@domain)
127
+ @domain = CloudSearch.client.describe_domains(:domain_names => [name])
128
+ else
129
+ @domain
130
+ end
131
+
132
+ status = @domain[:domain_status_list]
133
+ if status.nil? || status && status.empty?
134
+ raise(DomainNotFound, "Cloudsearchable could not find the domain '#{name}' in AWS. Check the name and the availability region.")
135
+ end
136
+
137
+ @domain
138
+ end
139
+
140
+ def sdf_fields record
141
+ fields.values.inject({}) do |sdf, field|
142
+ value = field.value_for(record)
143
+ sdf[field.name] = value if value
144
+ sdf
145
+ end
146
+ end
147
+
148
+ # AWS CloudSearch Domain API to get search endpoint
149
+ def search_endpoint
150
+ @search_endpoint ||= cloudsearch_domain[:domain_status_list].first[:search_service][:endpoint]
151
+ end
152
+
153
+ # AWS CloudSearch Domain API to get doc endpoint
154
+ def doc_endpoint
155
+ @doc_endpoint ||= cloudsearch_domain[:domain_status_list].first[:doc_service][:endpoint]
156
+ end
157
+
158
+ end
159
+ end
@@ -0,0 +1,56 @@
1
+ require 'active_support/core_ext/hash'
2
+
3
+ #
4
+ # Class the represents the schema of a domain in CloudSearch
5
+ #
6
+ # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
7
+ #
8
+ module Cloudsearchable
9
+ # Represents a single field in a CloudSearch index.
10
+ #
11
+ class Field
12
+ FieldTypes = [:literal, :uint, :text].freeze
13
+ # Maps the type of field to the name of the options hash when defining the field
14
+ FieldTypeOptionsNames = {:literal => :literal_options, :uint => :u_int_options, :text => :text_options}.freeze
15
+ # Maps from field type to the allowed set of options for the field
16
+ FieldTypeOptionsKeys = {
17
+ literal: [:default_value, :facet_enabled, :search_enabled, :result_enabled].freeze,
18
+ uint: [:default_value].freeze,
19
+ text: [:default_value, :facet_enabled, :result_enabled].freeze
20
+ }.freeze
21
+ attr_reader :name, :type, :source, :options
22
+
23
+ def initialize(name, type, options = {})
24
+ raise ArgumentError, "Invalid field type '#{type}'" unless FieldTypes.include?(type)
25
+ @name = name.to_sym
26
+ @type = type.to_sym
27
+ @source = options[:source] || @name
28
+ @options = options.slice(*FieldTypeOptionsKeys[@type])
29
+ end
30
+
31
+ def value_for record
32
+ if @source.respond_to?(:call)
33
+ record.instance_exec &@source
34
+ else
35
+ record.send @source
36
+ end
37
+ end
38
+
39
+ def define_in_domain domain_name
40
+ CloudSearch.client.define_index_field(
41
+ :domain_name => domain_name,
42
+ :index_field => definition
43
+ )
44
+ end
45
+
46
+ def definition
47
+ # http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/API_IndexField.html
48
+ {
49
+ :index_field_name => name.to_s,
50
+ :index_field_type => type.to_s,
51
+ FieldTypeOptionsNames[type] => options
52
+ }
53
+ end
54
+ protected :definition
55
+ end
56
+ end
@@ -0,0 +1,218 @@
1
+ module Cloudsearchable
2
+ class NoClausesError < StandardError; end
3
+ class WarningInQueryResult < StandardError; end
4
+
5
+ #
6
+ # An object that represents a query to cloud search
7
+ #
8
+ class QueryChain
9
+ include Enumerable
10
+
11
+ attr_reader :domain, :fields
12
+
13
+ # options:
14
+ # - fatal_warnings: if true, raises a WarningInQueryResult exception on warning. Defaults to false
15
+ def initialize(domain, options = {})
16
+ @fatal_warnings = options.fetch(:fatal_warnings, false)
17
+ @domain = domain
18
+ @q = nil
19
+ @clauses = []
20
+ @rank = nil
21
+ @limit = 100000 # 10 is the CloudSearch default, 2kb limit will probably hit before this will
22
+ @offset = nil
23
+ @fields = Set.new
24
+ @results = nil
25
+ end
26
+
27
+ #
28
+ # This method can be called in several different forms.
29
+ #
30
+ # To do an equality search on several fields, you can pass a single hash, e.g.:
31
+ #
32
+ # Collection.search.where(customer_id: "12345", another_field: "Some value")
33
+ #
34
+ # To do a search on a single field, you can pass three parameters in the
35
+ # form: where(field, op, value)
36
+ #
37
+ # Collection.search.where(:customer_id, :==, 12345)
38
+ #
39
+ # To search for any of several possible values for a field, use the :any operator:
40
+ #
41
+ # Collection.search.where(:product_group, :any, %w{gl_kitchen gl_grocery})
42
+ #
43
+ # Equality and inequality operators (:==, :!=, :<, :<=, :>, :>=) are supported on
44
+ # integers, and equality operators are supported on all scalars.
45
+ # Currently, special operators against arrays (any and all) are not yet implemented.
46
+ #
47
+ def where(field_or_hash, op = nil, value = nil)
48
+ raise if materialized?
49
+
50
+ if field_or_hash.is_a? Hash
51
+ field_or_hash.each_pair do |k, v|
52
+ where(k, :==, v)
53
+ end
54
+ elsif field_or_hash.is_a? Symbol
55
+ field = field_or_hash
56
+ @clauses << if op == :within_range
57
+ "#{field}:#{value.to_s}"
58
+ elsif op == :== || op == :eq
59
+ "#{field}:'#{value.to_s}'"
60
+ elsif op == :any
61
+ '(or ' + value.map { |v| "#{field}:'#{v.to_s}'" }.join(' ') + ')'
62
+ elsif op == :!=
63
+ "(not #{field}:'#{value.to_s}')"
64
+ elsif op == :> && value.is_a?(Integer)
65
+ "#{field}:#{value+1}.."
66
+ elsif op == :< && value.is_a?(Integer)
67
+ "#{field}:..#{value-1}"
68
+ elsif op == :>= && value.is_a?(Integer)
69
+ "#{field}:#{value}.."
70
+ elsif op == :<= && value.is_a?(Integer)
71
+ "#{field}:..#{value}"
72
+ else
73
+ raise "op #{op} is unrecognized"
74
+ end
75
+ else
76
+ raise "field_or_hash must be a Hash or Symbol, not a #{field_or_hash.class}"
77
+ end
78
+
79
+ self
80
+ end
81
+
82
+ #
83
+ # Allows searching by text, overwriting any existing text search.
84
+ #
85
+ # Collection.search.text('mens shoes')
86
+ #
87
+ # For more examples see http://docs.aws.amazon.com/cloudsearch/latest/developerguide/searching.text.html
88
+ #
89
+ def text(text)
90
+ raise if materialized?
91
+ @q = text
92
+ self
93
+ end
94
+
95
+ #
96
+ # Set a rank expression on the query, overwriting any existing expression. Defaults to "-text_relevance"
97
+ #
98
+ # Collection.search.order('created_at') # order by the created_at field ascending
99
+ # Collection.search.order('-created_at') # descending order
100
+ #
101
+ # For more examples see http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/tuneranking.html
102
+ #
103
+ def order rank_expression
104
+ raise if materialized?
105
+ raise "order clause must be a string, not a #{rank_expression.class}" unless rank_expression.is_a? String
106
+ @rank = rank_expression.to_s
107
+ self
108
+ end
109
+
110
+ #
111
+ # Limit the number of results returned from query to the given count.
112
+ #
113
+ # Collection.search.limit(25)
114
+ #
115
+ def limit count
116
+ raise if materialized?
117
+ raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
118
+ @limit = count.to_i
119
+ self
120
+ end
121
+
122
+ #
123
+ # Offset the results returned by the query by the given count.
124
+ #
125
+ # Collection.search.offset(250)
126
+ #
127
+ def offset count
128
+ raise if materialized?
129
+ raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
130
+ @offset = count.to_i
131
+ self
132
+ end
133
+
134
+ #
135
+ # Adds a one or more fields to the returned result set, e.g.:
136
+ #
137
+ # my_query.returning(:collection_id)
138
+ # my_query.returning(:collection_id, :created_at)
139
+ #
140
+ # x = [:collection_id, :created_at]
141
+ # my_query.returning(x)
142
+ #
143
+ def returning(*fields)
144
+ raise if materialized?
145
+
146
+ fields.flatten!
147
+ fields.each do |f|
148
+ @fields << f
149
+ end
150
+ self
151
+ end
152
+
153
+ #
154
+ # True if the query has been materialized (e.g. the search has been
155
+ # executed).
156
+ #
157
+ def materialized?
158
+ !@results.nil?
159
+ end
160
+
161
+ #
162
+ # Executes the query, getting a result set, returns true if work was done,
163
+ # false if the query was already materialized.
164
+ # Raises exception if there was a warning and not in production.
165
+ #
166
+ def materialize!
167
+ return false if materialized?
168
+
169
+ @results = domain.execute_query(to_q)
170
+
171
+ if @results && @results["info"] && messages = @results["info"]["messages"]
172
+ messages.each do |message|
173
+ if message["severity"] == "warning"
174
+ Cloudsearchable.logger.warn "Cloud Search Warning: #{message["code"]}: #{message["message"]}"
175
+ raise(WarningInQueryResult, "#{message["code"]}: #{message["message"]}") if @fatal_warnings
176
+ end
177
+ end
178
+ end
179
+
180
+ true
181
+ end
182
+
183
+ def found_count
184
+ materialize!
185
+ if @results['hits']
186
+ @results['hits']['found']
187
+ else
188
+ raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
189
+ end
190
+ end
191
+
192
+ def each(&block)
193
+ materialize!
194
+ if @results['hits']
195
+ @results['hits']['hit'].each(&block)
196
+ else
197
+ raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
198
+ end
199
+ end
200
+
201
+ #
202
+ # Turns this Query object into a query string hash that goes on the CloudSearch URL
203
+ #
204
+ def to_q
205
+ raise NoClausesError, "no search terms were specified" if (@clauses.nil? || @clauses.empty?) && (@q.nil? || @q.empty?)
206
+
207
+ bq = (@clauses.count > 0) ? "(and #{@clauses.join(' ')})" : @clauses.first
208
+ {
209
+ q: @q,
210
+ bq: bq,
211
+ rank: @rank,
212
+ size: @limit,
213
+ start: @offset,
214
+ :'return-fields' => @fields.reduce("") { |s,f| s << f.to_s }
215
+ }
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,3 @@
1
+ module Cloudsearchable
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+ require 'test_classes/cloud_searchable_test_class'
3
+
4
+ describe CloudSearch do
5
+
6
+ let(:item) do
7
+ CloudSearchableSampleClassFactory.call.new.tap do |instance|
8
+ instance.destroyed = false
9
+ instance.lock_version = 1
10
+ instance.id = 1
11
+ instance.customer = '1234'
12
+ end
13
+ end
14
+ let(:sdf_document){item.class.cloudsearch_index(:test_index).send :addition_sdf, item, item.id, item.lock_version}
15
+ let(:endpoint){'https://fake_end_point.amazon.com'}
16
+
17
+ class MockHTTPOK < Net::HTTPOK
18
+ attr :body
19
+ def initialize body
20
+ @body = body
21
+ end
22
+ end
23
+
24
+ class MockHTTPBadRequest < Net::HTTPBadRequest
25
+ def initialize; end
26
+ end
27
+
28
+ let(:success_response){ MockHTTPOK.new( {"status" => "success", "adds" => 1, "deletes" => 0}.to_json ) }
29
+
30
+ it 'json parses the response' do
31
+ Net::HTTP.any_instance.stub(:start).and_return{success_response}
32
+
33
+ response = described_class.post_sdf endpoint, sdf_document
34
+ response.should eq JSON.parse success_response.body
35
+ end
36
+
37
+ it 'triggers error! on response its no not a Net::HTTPSuccess' do
38
+ response = MockHTTPBadRequest.new
39
+ Net::HTTP.any_instance.stub(:start).and_return{response}
40
+
41
+ response.should_receive(:error!)
42
+ described_class.post_sdf endpoint, sdf_document
43
+ end
44
+
45
+ end