cloudsearchable 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ #
2
+ # Class the represents the schema of a domain in CloudSearch
3
+ #
4
+ # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
5
+ #
6
+ module Cloudsearchable
7
+ class Domain
8
+ class DomainNotFound < StandardError; end
9
+
10
+ attr_reader :name, :fields
11
+
12
+ def initialize name
13
+ @name = "#{Cloudsearchable::Config.domain_prefix}#{name}"
14
+ @fields = {}
15
+ end
16
+
17
+ # Defines a literal index field.
18
+ # @param name field name
19
+ # @param type field type - one of :literal, :uint, or :text
20
+ # @option options [Boolean] :search_enabled (true)
21
+ # @option options [Boolean] :return_enabled (true)
22
+ # @option options [Symbol or Proc] :source The name of a method to call on a record to fetch
23
+ # the value of the field, or else a Proc to be evaluated in the context of the record.
24
+ # Defaults to a method with the same name as the field.
25
+ def add_field(name, type, options = {})
26
+ field = Field.new(name, type, options)
27
+ raise "Field #{name} already exists on index #{self.name}" if @fields.has_key?(field.name)
28
+ @fields[field.name] = field
29
+ end
30
+
31
+ # Creates the domain and defines its index fields in Cloudsearch
32
+ # Will blindly recreate index fields, no-op if the index already exists
33
+ def create
34
+ Cloudsearchable.logger.info "Creating domain #{name}"
35
+ CloudSearch.client.create_domain(:domain_name => name)
36
+
37
+ #Create the fields for the index
38
+ fields.values.each do |field|
39
+ Cloudsearchable.logger.info " ...creating #{field.type} field #{name}"
40
+ field.define_in_domain self.name
41
+ end
42
+ Cloudsearchable.logger.info " ...done!"
43
+ end
44
+
45
+ def reindex
46
+ CloudSearch.client.index_documents(:domain_name => name)
47
+ end
48
+
49
+ #
50
+ # This queries the status of the domain from Cloudsearch and determines if
51
+ # the domain needs to be reindexed. If so, it will initiate the reindex and
52
+ # wait timeout seconds for it to complete. Default is 0. Reindexings tend
53
+ # to take 15-30 minutes.
54
+ #
55
+ # @return true if the changes are applied, false if the domain is still reindexing
56
+ #
57
+ def apply_changes(timeout = 0)
58
+ d = cloudsearch_domain(true)[:domain_status_list][0]
59
+ if(d[:requires_index_documents])
60
+ reindex
61
+ end
62
+
63
+ #We'll potentially sleep until the reindex has completed
64
+ end_time = Time.now + timeout
65
+ sleep_time = 1
66
+ loop do
67
+ d = cloudsearch_domain(true)[:domain_status_list][0]
68
+ break unless (d[:processing] && Time.now < end_time)
69
+
70
+ sleep(sleep_time)
71
+ sleep_time = [2 * sleep_time, end_time - Time.now].min #exponential backoff
72
+ end
73
+
74
+ !d[:processing] #processing is true as long as it is reindexing
75
+ end
76
+
77
+ # Add or replace the CloudSearch document for a particular version of a record
78
+ def post_record record, record_id, version
79
+ CloudSearch.post_sdf doc_endpoint, addition_sdf(record, record_id, version)
80
+ end
81
+
82
+ # Delete the CloudSearch document for a particular record (version must be greater than the last version pushed)
83
+ def delete_record record_id, version
84
+ CloudSearch.post_sdf doc_endpoint, deletion_sdf(record_id, version)
85
+ end
86
+
87
+ def execute_query(params)
88
+ uri = URI("http://#{search_endpoint}/#{CloudSearch::API_VERSION}/search")
89
+ uri.query = URI.encode_www_form(params)
90
+ Cloudsearchable.logger.info "CloudSearch execute: #{uri.to_s}"
91
+ res = Net::HTTP.get_response(uri).body
92
+ JSON.parse(res)
93
+ end
94
+
95
+ def deletion_sdf record_id, version
96
+ {
97
+ :type => "delete",
98
+ :id => document_id(record_id),
99
+ :version => version
100
+ }
101
+ end
102
+
103
+ def addition_sdf record, record_id, version
104
+ {
105
+ :type => "add",
106
+ :id => document_id(record_id),
107
+ :version => version,
108
+ :lang => "en", # FIXME - key off of marketplace
109
+ :fields => sdf_fields(record)
110
+ }
111
+ end
112
+
113
+ # Generate a documentID that follows the CS restrictions
114
+ def document_id record_id
115
+ Digest::MD5.hexdigest record_id.to_s
116
+ end
117
+
118
+ protected
119
+
120
+ #
121
+ # AWS Cloudsearchable Domain
122
+ #
123
+ # @param force_reload force a re-fetch from the domain
124
+ #
125
+ def cloudsearch_domain(force_reload = false)
126
+ if(force_reload || !@domain)
127
+ @domain = CloudSearch.client.describe_domains(:domain_names => [name])
128
+ else
129
+ @domain
130
+ end
131
+
132
+ status = @domain[:domain_status_list]
133
+ if status.nil? || status && status.empty?
134
+ raise(DomainNotFound, "Cloudsearchable could not find the domain '#{name}' in AWS. Check the name and the availability region.")
135
+ end
136
+
137
+ @domain
138
+ end
139
+
140
+ def sdf_fields record
141
+ fields.values.inject({}) do |sdf, field|
142
+ value = field.value_for(record)
143
+ sdf[field.name] = value if value
144
+ sdf
145
+ end
146
+ end
147
+
148
+ # AWS CloudSearch Domain API to get search endpoint
149
+ def search_endpoint
150
+ @search_endpoint ||= cloudsearch_domain[:domain_status_list].first[:search_service][:endpoint]
151
+ end
152
+
153
+ # AWS CloudSearch Domain API to get doc endpoint
154
+ def doc_endpoint
155
+ @doc_endpoint ||= cloudsearch_domain[:domain_status_list].first[:doc_service][:endpoint]
156
+ end
157
+
158
+ end
159
+ end
@@ -0,0 +1,56 @@
1
+ require 'active_support/core_ext/hash'
2
+
3
+ #
4
+ # Class the represents the schema of a domain in CloudSearch
5
+ #
6
+ # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled.
7
+ #
8
+ module Cloudsearchable
9
+ # Represents a single field in a CloudSearch index.
10
+ #
11
+ class Field
12
+ FieldTypes = [:literal, :uint, :text].freeze
13
+ # Maps the type of field to the name of the options hash when defining the field
14
+ FieldTypeOptionsNames = {:literal => :literal_options, :uint => :u_int_options, :text => :text_options}.freeze
15
+ # Maps from field type to the allowed set of options for the field
16
+ FieldTypeOptionsKeys = {
17
+ literal: [:default_value, :facet_enabled, :search_enabled, :result_enabled].freeze,
18
+ uint: [:default_value].freeze,
19
+ text: [:default_value, :facet_enabled, :result_enabled].freeze
20
+ }.freeze
21
+ attr_reader :name, :type, :source, :options
22
+
23
+ def initialize(name, type, options = {})
24
+ raise ArgumentError, "Invalid field type '#{type}'" unless FieldTypes.include?(type)
25
+ @name = name.to_sym
26
+ @type = type.to_sym
27
+ @source = options[:source] || @name
28
+ @options = options.slice(*FieldTypeOptionsKeys[@type])
29
+ end
30
+
31
+ def value_for record
32
+ if @source.respond_to?(:call)
33
+ record.instance_exec &@source
34
+ else
35
+ record.send @source
36
+ end
37
+ end
38
+
39
+ def define_in_domain domain_name
40
+ CloudSearch.client.define_index_field(
41
+ :domain_name => domain_name,
42
+ :index_field => definition
43
+ )
44
+ end
45
+
46
+ def definition
47
+ # http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/API_IndexField.html
48
+ {
49
+ :index_field_name => name.to_s,
50
+ :index_field_type => type.to_s,
51
+ FieldTypeOptionsNames[type] => options
52
+ }
53
+ end
54
+ protected :definition
55
+ end
56
+ end
@@ -0,0 +1,218 @@
1
+ module Cloudsearchable
2
+ class NoClausesError < StandardError; end
3
+ class WarningInQueryResult < StandardError; end
4
+
5
+ #
6
+ # An object that represents a query to cloud search
7
+ #
8
+ class QueryChain
9
+ include Enumerable
10
+
11
+ attr_reader :domain, :fields
12
+
13
+ # options:
14
+ # - fatal_warnings: if true, raises a WarningInQueryResult exception on warning. Defaults to false
15
+ def initialize(domain, options = {})
16
+ @fatal_warnings = options.fetch(:fatal_warnings, false)
17
+ @domain = domain
18
+ @q = nil
19
+ @clauses = []
20
+ @rank = nil
21
+ @limit = 100000 # 10 is the CloudSearch default, 2kb limit will probably hit before this will
22
+ @offset = nil
23
+ @fields = Set.new
24
+ @results = nil
25
+ end
26
+
27
+ #
28
+ # This method can be called in several different forms.
29
+ #
30
+ # To do an equality search on several fields, you can pass a single hash, e.g.:
31
+ #
32
+ # Collection.search.where(customer_id: "12345", another_field: "Some value")
33
+ #
34
+ # To do a search on a single field, you can pass three parameters in the
35
+ # form: where(field, op, value)
36
+ #
37
+ # Collection.search.where(:customer_id, :==, 12345)
38
+ #
39
+ # To search for any of several possible values for a field, use the :any operator:
40
+ #
41
+ # Collection.search.where(:product_group, :any, %w{gl_kitchen gl_grocery})
42
+ #
43
+ # Equality and inequality operators (:==, :!=, :<, :<=, :>, :>=) are supported on
44
+ # integers, and equality operators are supported on all scalars.
45
+ # Currently, special operators against arrays (any and all) are not yet implemented.
46
+ #
47
+ def where(field_or_hash, op = nil, value = nil)
48
+ raise if materialized?
49
+
50
+ if field_or_hash.is_a? Hash
51
+ field_or_hash.each_pair do |k, v|
52
+ where(k, :==, v)
53
+ end
54
+ elsif field_or_hash.is_a? Symbol
55
+ field = field_or_hash
56
+ @clauses << if op == :within_range
57
+ "#{field}:#{value.to_s}"
58
+ elsif op == :== || op == :eq
59
+ "#{field}:'#{value.to_s}'"
60
+ elsif op == :any
61
+ '(or ' + value.map { |v| "#{field}:'#{v.to_s}'" }.join(' ') + ')'
62
+ elsif op == :!=
63
+ "(not #{field}:'#{value.to_s}')"
64
+ elsif op == :> && value.is_a?(Integer)
65
+ "#{field}:#{value+1}.."
66
+ elsif op == :< && value.is_a?(Integer)
67
+ "#{field}:..#{value-1}"
68
+ elsif op == :>= && value.is_a?(Integer)
69
+ "#{field}:#{value}.."
70
+ elsif op == :<= && value.is_a?(Integer)
71
+ "#{field}:..#{value}"
72
+ else
73
+ raise "op #{op} is unrecognized"
74
+ end
75
+ else
76
+ raise "field_or_hash must be a Hash or Symbol, not a #{field_or_hash.class}"
77
+ end
78
+
79
+ self
80
+ end
81
+
82
+ #
83
+ # Allows searching by text, overwriting any existing text search.
84
+ #
85
+ # Collection.search.text('mens shoes')
86
+ #
87
+ # For more examples see http://docs.aws.amazon.com/cloudsearch/latest/developerguide/searching.text.html
88
+ #
89
+ def text(text)
90
+ raise if materialized?
91
+ @q = text
92
+ self
93
+ end
94
+
95
+ #
96
+ # Set a rank expression on the query, overwriting any existing expression. Defaults to "-text_relevance"
97
+ #
98
+ # Collection.search.order('created_at') # order by the created_at field ascending
99
+ # Collection.search.order('-created_at') # descending order
100
+ #
101
+ # For more examples see http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/tuneranking.html
102
+ #
103
+ def order rank_expression
104
+ raise if materialized?
105
+ raise "order clause must be a string, not a #{rank_expression.class}" unless rank_expression.is_a? String
106
+ @rank = rank_expression.to_s
107
+ self
108
+ end
109
+
110
+ #
111
+ # Limit the number of results returned from query to the given count.
112
+ #
113
+ # Collection.search.limit(25)
114
+ #
115
+ def limit count
116
+ raise if materialized?
117
+ raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
118
+ @limit = count.to_i
119
+ self
120
+ end
121
+
122
+ #
123
+ # Offset the results returned by the query by the given count.
124
+ #
125
+ # Collection.search.offset(250)
126
+ #
127
+ def offset count
128
+ raise if materialized?
129
+ raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i
130
+ @offset = count.to_i
131
+ self
132
+ end
133
+
134
+ #
135
+ # Adds a one or more fields to the returned result set, e.g.:
136
+ #
137
+ # my_query.returning(:collection_id)
138
+ # my_query.returning(:collection_id, :created_at)
139
+ #
140
+ # x = [:collection_id, :created_at]
141
+ # my_query.returning(x)
142
+ #
143
+ def returning(*fields)
144
+ raise if materialized?
145
+
146
+ fields.flatten!
147
+ fields.each do |f|
148
+ @fields << f
149
+ end
150
+ self
151
+ end
152
+
153
+ #
154
+ # True if the query has been materialized (e.g. the search has been
155
+ # executed).
156
+ #
157
+ def materialized?
158
+ !@results.nil?
159
+ end
160
+
161
+ #
162
+ # Executes the query, getting a result set, returns true if work was done,
163
+ # false if the query was already materialized.
164
+ # Raises exception if there was a warning and not in production.
165
+ #
166
+ def materialize!
167
+ return false if materialized?
168
+
169
+ @results = domain.execute_query(to_q)
170
+
171
+ if @results && @results["info"] && messages = @results["info"]["messages"]
172
+ messages.each do |message|
173
+ if message["severity"] == "warning"
174
+ Cloudsearchable.logger.warn "Cloud Search Warning: #{message["code"]}: #{message["message"]}"
175
+ raise(WarningInQueryResult, "#{message["code"]}: #{message["message"]}") if @fatal_warnings
176
+ end
177
+ end
178
+ end
179
+
180
+ true
181
+ end
182
+
183
+ def found_count
184
+ materialize!
185
+ if @results['hits']
186
+ @results['hits']['found']
187
+ else
188
+ raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
189
+ end
190
+ end
191
+
192
+ def each(&block)
193
+ materialize!
194
+ if @results['hits']
195
+ @results['hits']['hit'].each(&block)
196
+ else
197
+ raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}"
198
+ end
199
+ end
200
+
201
+ #
202
+ # Turns this Query object into a query string hash that goes on the CloudSearch URL
203
+ #
204
+ def to_q
205
+ raise NoClausesError, "no search terms were specified" if (@clauses.nil? || @clauses.empty?) && (@q.nil? || @q.empty?)
206
+
207
+ bq = (@clauses.count > 0) ? "(and #{@clauses.join(' ')})" : @clauses.first
208
+ {
209
+ q: @q,
210
+ bq: bq,
211
+ rank: @rank,
212
+ size: @limit,
213
+ start: @offset,
214
+ :'return-fields' => @fields.reduce("") { |s,f| s << f.to_s }
215
+ }
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,3 @@
1
+ module Cloudsearchable
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+ require 'test_classes/cloud_searchable_test_class'
3
+
4
+ describe CloudSearch do
5
+
6
+ let(:item) do
7
+ CloudSearchableSampleClassFactory.call.new.tap do |instance|
8
+ instance.destroyed = false
9
+ instance.lock_version = 1
10
+ instance.id = 1
11
+ instance.customer = '1234'
12
+ end
13
+ end
14
+ let(:sdf_document){item.class.cloudsearch_index(:test_index).send :addition_sdf, item, item.id, item.lock_version}
15
+ let(:endpoint){'https://fake_end_point.amazon.com'}
16
+
17
+ class MockHTTPOK < Net::HTTPOK
18
+ attr :body
19
+ def initialize body
20
+ @body = body
21
+ end
22
+ end
23
+
24
+ class MockHTTPBadRequest < Net::HTTPBadRequest
25
+ def initialize; end
26
+ end
27
+
28
+ let(:success_response){ MockHTTPOK.new( {"status" => "success", "adds" => 1, "deletes" => 0}.to_json ) }
29
+
30
+ it 'json parses the response' do
31
+ Net::HTTP.any_instance.stub(:start).and_return{success_response}
32
+
33
+ response = described_class.post_sdf endpoint, sdf_document
34
+ response.should eq JSON.parse success_response.body
35
+ end
36
+
37
+ it 'triggers error! on response its no not a Net::HTTPSuccess' do
38
+ response = MockHTTPBadRequest.new
39
+ Net::HTTP.any_instance.stub(:start).and_return{response}
40
+
41
+ response.should_receive(:error!)
42
+ described_class.post_sdf endpoint, sdf_document
43
+ end
44
+
45
+ end