aws_cloud_search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE +23 -0
- data/README.md +45 -0
- data/Rakefile +5 -0
- data/aws_cloud_search.gemspec +21 -0
- data/lib/aws_cloud_search.rb +79 -0
- data/lib/aws_cloud_search/cloud_search.rb +71 -0
- data/lib/aws_cloud_search/cloud_search_config.rb +5 -0
- data/lib/aws_cloud_search/document.rb +81 -0
- data/lib/aws_cloud_search/document_batch.rb +81 -0
- data/lib/aws_cloud_search/document_batcher.rb +35 -0
- data/lib/aws_cloud_search/exceptions.rb +17 -0
- data/lib/aws_cloud_search/search_request.rb +19 -0
- data/lib/aws_cloud_search/search_response.rb +37 -0
- data/lib/aws_cloud_search/version.rb +3 -0
- data/spec/aws_cloud_search/cloud_search_spec.rb +97 -0
- data/spec/aws_cloud_search/document_batch_spec.rb +114 -0
- data/spec/aws_cloud_search/document_spec.rb +38 -0
- data/spec/aws_cloud_search/search_response_spec.rb +31 -0
- data/spec/spec_helper.rb +7 -0
- metadata +96 -0
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--drb
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
LICENSE
|
2
|
+
|
3
|
+
The MIT License
|
4
|
+
|
5
|
+
Copyright (c) 2012 Spoke Software, Inc.
|
6
|
+
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
9
|
+
in the Software without restriction, including without limitation the rights
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
12
|
+
furnished to do so, subject to the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
15
|
+
all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# AWSCloudSearch
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
This gem is an implementation of the Amazon Web Service CloudSearch API (http://aws.amazon.com/cloudsearch/).
|
6
|
+
|
7
|
+
The AWS CloudSearch service is comprised of three API end points: search, document batching, and configuration. This gem
|
8
|
+
currently supports only the search and document batching APIs
|
9
|
+
|
10
|
+
## Roadmap
|
11
|
+
|
12
|
+
Spoke developed this library in a short period of time in order to migrate from IndexTank to AWS CloudSearch.
|
13
|
+
As such, there are a few features that are missing that we would like to build over time.
|
14
|
+
|
15
|
+
+ Implementation of the configuration API
|
16
|
+
+ Query builder
|
17
|
+
+ Faceting helpers
|
18
|
+
+ Spec tests that stub the AWS CloudSearch service
|
19
|
+
+ Sample usage in this README
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
Add this line to your application's Gemfile:
|
24
|
+
|
25
|
+
gem 'aws_cloud_search'
|
26
|
+
|
27
|
+
And then execute:
|
28
|
+
|
29
|
+
$ bundle
|
30
|
+
|
31
|
+
Or install it yourself as:
|
32
|
+
|
33
|
+
$ gem install aws_cloud_search
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
|
37
|
+
TODO: Write usage instructions here
|
38
|
+
|
39
|
+
## Contributing
|
40
|
+
|
41
|
+
1. Fork it
|
42
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
43
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
44
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
45
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/aws_cloud_search/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["David Jensen", "Mike Javorski"]
|
6
|
+
gem.email = ["david.jensen@spoke.com", "mike.javorski@spoke.com"]
|
7
|
+
gem.description = %q{AWSCloudSearch Search gem}
|
8
|
+
gem.summary = %q{Implementation of the AWS CloudSearch API}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "aws_cloud_search"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = AWSCloudSearch::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency 'faraday_middleware', '>= 0.8.0'
|
19
|
+
|
20
|
+
gem.add_development_dependency 'rspec', '>= 2.6.0'
|
21
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "aws_cloud_search/cloud_search"
|
2
|
+
require "aws_cloud_search/cloud_search_config"
|
3
|
+
require "aws_cloud_search/document"
|
4
|
+
require "aws_cloud_search/document_batch"
|
5
|
+
require "aws_cloud_search/document_batcher"
|
6
|
+
require "aws_cloud_search/exceptions"
|
7
|
+
require "aws_cloud_search/search_response"
|
8
|
+
require "aws_cloud_search/search_request"
|
9
|
+
require "aws_cloud_search/version"
|
10
|
+
|
11
|
+
require "faraday_middleware"
|
12
|
+
|
13
|
+
module AWSCloudSearch
|
14
|
+
API_VERSION = "2011-02-01"
|
15
|
+
|
16
|
+
# AWS CloudSearch only allows XML 1.0 valid characters
|
17
|
+
INVALID_CHAR_XML10 = /[^\u0009\u000a\u000d\u0020-\uD7FF\uE000-\uFFFD]/m
|
18
|
+
# for future reference in case AWS-CS updates to XML 1.1 char compliance
|
19
|
+
#INVALID_CHAR_XML11 = /[^\u0001-\uD7FF\uE000-\uFFFD]/m
|
20
|
+
|
21
|
+
|
22
|
+
def self.search_url(domain, region="us-east-1")
|
23
|
+
"http://search-#{domain}.#{region}.cloudsearch.amazonaws.com"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.document_url(domain, region="us-east-1")
|
27
|
+
"http://doc-#{domain}.#{region}.cloudsearch.amazonaws.com"
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.configuration_url
|
31
|
+
"https://cloudsearch.us-east-1.amazonaws.com"
|
32
|
+
end
|
33
|
+
|
34
|
+
# Initialize the module
|
35
|
+
# @param [String] url
|
36
|
+
# @param [String] aws_access_key_id
|
37
|
+
# @param [String] aws_secret_access_key
|
38
|
+
def self.create_connection(url, aws_access_key_id=nil, aws_secret_access_key=nil)
|
39
|
+
connection = Faraday.new url do |builder|
|
40
|
+
builder.use AWSCloudSearch::HttpCodeResponseMiddleware
|
41
|
+
builder.use FaradayMiddleware::EncodeJson
|
42
|
+
builder.use FaradayMiddleware::ParseJson
|
43
|
+
builder.adapter Faraday.default_adapter
|
44
|
+
|
45
|
+
# for future reference
|
46
|
+
#conn.request :json, :content_type => /\bjson$/
|
47
|
+
#conn.response :json, :content_type => /\bjson$/
|
48
|
+
#conn.adapter Faraday.default_adapter
|
49
|
+
end
|
50
|
+
connection.headers['User-Agent'] = "AWSCloudSearch-Ruby-Client/#{VERSION}"
|
51
|
+
connection
|
52
|
+
end
|
53
|
+
|
54
|
+
class HttpCodeResponseMiddleware < Faraday::Response::Middleware
|
55
|
+
def on_complete(env)
|
56
|
+
case env[:status]
|
57
|
+
when 200..299
|
58
|
+
nil
|
59
|
+
when 408
|
60
|
+
raise RequestTimeout, env[:body]
|
61
|
+
when 400..499
|
62
|
+
raise HttpClientError, env[:body]
|
63
|
+
when 509
|
64
|
+
raise BandwidthLimitExceeded, env[:body]
|
65
|
+
when 500..599
|
66
|
+
raise HttpServerError, env[:body]
|
67
|
+
else
|
68
|
+
raise UnexpectedHTTPException, env[:body]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def initialize(app)
|
73
|
+
super
|
74
|
+
@parser = nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
class CloudSearch
|
6
|
+
|
7
|
+
def initialize(domain, region="us-east-1")
|
8
|
+
@doc_conn = AWSCloudSearch::create_connection( AWSCloudSearch::document_url(domain, region) )
|
9
|
+
@search_conn = AWSCloudSearch::create_connection( AWSCloudSearch::search_url(domain, region) )
|
10
|
+
end
|
11
|
+
|
12
|
+
# Sends a batch of document updates and deletes by invoking the CloudSearch documents/batch API
|
13
|
+
# @param [DocumentBatch] doc_batch The batch of document adds and deletes to send
|
14
|
+
# @return
|
15
|
+
def documents_batch(doc_batch)
|
16
|
+
raise ArgumentError.new("Invalid argument. Expected DocumentBatch, got #{doc_batch.class}.") unless doc_batch.is_a? DocumentBatch
|
17
|
+
|
18
|
+
resp = @doc_conn.post do |req|
|
19
|
+
req.url "/#{AWSCloudSearch::API_VERSION}/documents/batch"
|
20
|
+
req.headers['Content-Type'] = 'application/json'
|
21
|
+
req.body = doc_batch.to_json
|
22
|
+
end
|
23
|
+
raise(Exception, "AwsCloudSearchCloud::DocumentService batch returned #{resp.body[:errors].size} errors: #{resp.body[:errors].join(';')}") if resp.body[:status] == 'error'
|
24
|
+
resp.body
|
25
|
+
end
|
26
|
+
|
27
|
+
# Performs a search
|
28
|
+
# @param [SearchRequest] search_req
|
29
|
+
# @return
|
30
|
+
def search(search_req)
|
31
|
+
raise ArgumentError.new("Invalid Type: search_request must be of type SearchRequest") unless search_req.is_a? SearchRequest
|
32
|
+
|
33
|
+
resp = @search_conn.get do |req|
|
34
|
+
req.url "/#{AWSCloudSearch::API_VERSION}/search", search_req.to_hash
|
35
|
+
end
|
36
|
+
|
37
|
+
search_response = SearchResponse.new(resp.body)
|
38
|
+
if search_response.error
|
39
|
+
raise StandardError.new("Unknown error") if resp.messages.blank?
|
40
|
+
code = resp.messages.first['code']
|
41
|
+
message = resp.messages.first['message']
|
42
|
+
msg = "#{code}: #{message}"
|
43
|
+
case code
|
44
|
+
when /WildcardTermLimit/
|
45
|
+
raise WildcardTermLimit.new(msg)
|
46
|
+
when /InvalidFieldOrRankAliasInRankParameter/
|
47
|
+
raise InvalidFieldOrRankAliasInRankParameter, msg
|
48
|
+
when /UnknownFieldInMatchExpression/
|
49
|
+
raise UnknownFieldInMatchExpression, msg
|
50
|
+
when /IncorrectFieldTypeInMatchExpression/
|
51
|
+
raise IncorrectFieldTypeInMatchExpression, msg
|
52
|
+
when /InvalidMatchExpression/
|
53
|
+
raise InvalidMatchExpression, msg
|
54
|
+
when /UndefinedField/
|
55
|
+
raise UndefinedField, msg
|
56
|
+
else
|
57
|
+
raise AwsCloudSearchError, "Unknown error. #{msg}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
search_response
|
62
|
+
end
|
63
|
+
|
64
|
+
# Build a DocumentBatcher linked to this CloudSearch domain
|
65
|
+
# @return [DocumentBatcher]
|
66
|
+
def new_batcher
|
67
|
+
DocumentBatcher.new(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "json"
|
2
|
+
|
3
|
+
module AWSCloudSearch
|
4
|
+
class Document
|
5
|
+
|
6
|
+
# A typed attribute accessor helper. When the value is set, if it does not match
|
7
|
+
# the pre-defined type, an exception is thrown.
|
8
|
+
# @param [String] name Name of the attribute
|
9
|
+
# @param [Class] type The class type of the attribute
|
10
|
+
def self.type_attr_accessor(name, type)
|
11
|
+
define_method(name) do
|
12
|
+
instance_variable_get("@#{name}")
|
13
|
+
end
|
14
|
+
|
15
|
+
define_method("#{name}=") do |value|
|
16
|
+
if value.is_a? type or value == nil
|
17
|
+
instance_variable_set("@#{name}", value)
|
18
|
+
else
|
19
|
+
raise ArgumentError.new("Invalid Type")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
type_attr_accessor :version, Integer
|
25
|
+
type_attr_accessor :lang, String
|
26
|
+
attr_accessor :type
|
27
|
+
attr_reader :fields, :id
|
28
|
+
|
29
|
+
# Initializes the object
|
30
|
+
# @param [boolean] auto_version Set to true to automatically set the version, default is false
|
31
|
+
def initialize(auto_version=false)
|
32
|
+
@fields = {}
|
33
|
+
new_version if auto_version
|
34
|
+
end
|
35
|
+
|
36
|
+
# Adds a new field to the document
|
37
|
+
# @param [String] name Name of the document field
|
38
|
+
# @param [String, Integer] value Value of the document field
|
39
|
+
def add_field(name, value)
|
40
|
+
raise ArgumentError.new("Found invalid XML 1.0 unicode character(s)") if value.is_a? String and value =~ INVALID_CHAR_XML10
|
41
|
+
@fields[name] = value
|
42
|
+
end
|
43
|
+
|
44
|
+
# The id field must conform to a special format
|
45
|
+
def id=(id)
|
46
|
+
raise ArgumentError.new("Invalid ID: Document id must be a String or respond to #to_s") if (id.nil? || !id.respond_to?(:to_s))
|
47
|
+
@id = id.to_s
|
48
|
+
raise ArgumentError.new("Invalid ID: Document id must match the regex [a-z0-9][a-z0-9_]*$") unless @id =~ /^[a-z0-9][a-z0-9_]*$/
|
49
|
+
end
|
50
|
+
|
51
|
+
# Resets the fields.
|
52
|
+
def clear_fields
|
53
|
+
@fields = {}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Set a new version automatically
|
57
|
+
def new_version
|
58
|
+
@version = Time.now.to_i
|
59
|
+
end
|
60
|
+
|
61
|
+
# Return this object as a hash
|
62
|
+
def to_hash
|
63
|
+
@fields.delete_if {|key,val| val.nil?}
|
64
|
+
h = {
|
65
|
+
:type => @type,
|
66
|
+
:id => @id,
|
67
|
+
:version => @version,
|
68
|
+
:fields => @fields
|
69
|
+
}
|
70
|
+
h[:lang] = @lang unless (@type == 'delete')
|
71
|
+
|
72
|
+
h
|
73
|
+
end
|
74
|
+
|
75
|
+
#Return this object as json
|
76
|
+
def to_json
|
77
|
+
to_hash.to_json
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
class DocumentBatch
|
6
|
+
|
7
|
+
attr_reader :bytesize
|
8
|
+
|
9
|
+
# Constructor
|
10
|
+
# @param [Integer] pref_bytesize The preferred size of the batch in bytes. May be exceeded, if so batch is considered full.
|
11
|
+
# @param [Integer] max_bytesize The batch size in bytes must not exceed this number. Must be greater than pref_bytesize.
|
12
|
+
# @raise [ArgumentError] If pref_bytesize is not less than max_bytesize
|
13
|
+
def initialize(pref_bytesize=1048576, max_bytesize= 5242880)
|
14
|
+
raise ArgumentError.new("pref_bytesize must be less than max_bytesize") if pref_bytesize >= max_bytesize
|
15
|
+
|
16
|
+
@pref_bytesize = pref_bytesize
|
17
|
+
@max_bytesize = max_bytesize
|
18
|
+
@batch_add = []
|
19
|
+
@batch_delete = []
|
20
|
+
@bytesize = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds a document with the add operation to the batch.
|
24
|
+
# @param [Document] doc
|
25
|
+
# @raise [ArgumentError] If parameter is not an AWSCloudSearch::Document
|
26
|
+
def add_document(doc)
|
27
|
+
raise ArgumentError.new("Invalid Type") unless doc.is_a? Document
|
28
|
+
|
29
|
+
doc.type = 'add'
|
30
|
+
json = doc.to_json
|
31
|
+
doc_bytesize = json.bytesize
|
32
|
+
|
33
|
+
raise Exception.new("Max batch size exceeded, document add was not added to batch.") if (doc_bytesize + @bytesize) > @max_bytesize
|
34
|
+
raise ArgumentError.new("Found invalid XML 1.0 unicode characters.") if json =~ INVALID_CHAR_XML10
|
35
|
+
|
36
|
+
@bytesize += doc_bytesize
|
37
|
+
@batch_add << doc
|
38
|
+
end
|
39
|
+
|
40
|
+
# Adds a delete document operation to the batch. Removes lang and fields from the object as they are not
|
41
|
+
# required for delete operations.
|
42
|
+
# @param [Document] doc The document to delete
|
43
|
+
# @raise [ArgumentError] If parameter is not an AWSCloudSearch::Document
|
44
|
+
# TODO: refactor to only use the required fields, hide the document construction from the user
|
45
|
+
def delete_document(doc)
|
46
|
+
raise ArgumentError.new("Invalid Type") unless doc.is_a? Document
|
47
|
+
|
48
|
+
doc.type = 'delete'
|
49
|
+
doc.lang = nil
|
50
|
+
doc.clear_fields
|
51
|
+
doc_bytesize = doc.to_json.bytesize
|
52
|
+
|
53
|
+
raise Exception.new("Max batch size exceeded, document delete was not added to batch.") if (doc_bytesize + @bytesize) > @max_bytesize
|
54
|
+
|
55
|
+
@bytesize += doc_bytesize
|
56
|
+
@batch_delete << doc
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Integer] Number of items in the batch
|
60
|
+
def size
|
61
|
+
@batch_add.size + @batch_delete.size
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Boolean] True if the bytesize of the batch exceeds the preferred bytesize
|
65
|
+
def full?
|
66
|
+
@bytesize >= @pref_bytesize
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [String] The JSON string representation of the DocumentBatch
|
70
|
+
def to_json
|
71
|
+
(@batch_add + @batch_delete).map {|item| item.to_hash}.to_json
|
72
|
+
end
|
73
|
+
|
74
|
+
def clear
|
75
|
+
@batch_add.clear
|
76
|
+
@batch_delete.clear
|
77
|
+
@bytesize = 0
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
# Convenience method that will allow continuous batch additions and will chunk to a size threshold
|
6
|
+
# and send requests for each chunk.
|
7
|
+
class DocumentBatcher
|
8
|
+
|
9
|
+
def initialize(cs)
|
10
|
+
@cs = cs
|
11
|
+
@batch = DocumentBatch.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_document(doc)
|
15
|
+
flush if @batch.full?
|
16
|
+
|
17
|
+
@batch.add_document doc
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete_document(doc)
|
21
|
+
flush if @batch.full?
|
22
|
+
|
23
|
+
@batch.delete_document doc
|
24
|
+
end
|
25
|
+
|
26
|
+
# Sends the batch of adds and deletes to CloudSearch Search and then clears the current batch.
|
27
|
+
# TODO: (dj) implement connection retry logic
|
28
|
+
def flush
|
29
|
+
@cs.documents_batch @batch
|
30
|
+
@batch.clear
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
# CloudSearch API errors
|
3
|
+
class AwsCloudSearchError < StandardError; end
|
4
|
+
class WildcardTermLimit < AwsCloudSearchError; end
|
5
|
+
class InvalidFieldOrRankAliasInRankParameter < AwsCloudSearchError; end
|
6
|
+
class UnknownFieldInMatchExpression < AwsCloudSearchError; end
|
7
|
+
class IncorrectFieldTypeInMatchExpression < AwsCloudSearchError; end
|
8
|
+
class InvalidMatchExpression < AwsCloudSearchError; end
|
9
|
+
class UndefinedField < AwsCloudSearchError; end
|
10
|
+
|
11
|
+
# HTTP errors
|
12
|
+
class UnexpectedHTTPException < StandardError; end
|
13
|
+
class HttpClientError < StandardError; end
|
14
|
+
class HttpServerError < StandardError; end
|
15
|
+
class RequestTimeout < HttpClientError; end
|
16
|
+
class BandwidthLimitExceeded < HttpServerError; end
|
17
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
class SearchRequest
|
3
|
+
|
4
|
+
attr_accessor :q, :bq, :rank, :results_type, :return_fields, :size, :start
|
5
|
+
|
6
|
+
def to_hash
|
7
|
+
hash = {}
|
8
|
+
hash['q'] = @q unless @q.nil?
|
9
|
+
hash['bq'] = @bq unless @bq.nil?
|
10
|
+
hash['rank'] = @rank unless @rank.nil?
|
11
|
+
hash['size'] = @size unless @size.nil?
|
12
|
+
hash['start'] = @start unless @start.nil?
|
13
|
+
hash['results-type'] = @results_type unless @results_type.nil?
|
14
|
+
hash['return-fields'] = @return_fields.join(',') unless @return_fields.nil?
|
15
|
+
hash
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
class SearchResponse
|
3
|
+
attr_reader :response
|
4
|
+
attr_reader :hits
|
5
|
+
|
6
|
+
alias :results :hits
|
7
|
+
|
8
|
+
# error is an undocumented field that occurs when an error is returned
|
9
|
+
FIELDS = [ :match_expr, :rank, :cpu_time_ms, :time_ms, :rid, :found, :start, :error, :messages ].freeze
|
10
|
+
FIELDS.each { |f| attr_accessor f }
|
11
|
+
|
12
|
+
# Takes in the hash, representing the json object returned from a search request
|
13
|
+
def initialize(response)
|
14
|
+
@response = response
|
15
|
+
|
16
|
+
FIELDS.each do |f|
|
17
|
+
fs = f.to_s.gsub('_' , '-')
|
18
|
+
if @response.has_key? 'info' and @response['info'][fs]
|
19
|
+
val = @response['info'][fs]
|
20
|
+
elsif @response.has_key? 'hits' and @response['hits'][fs]
|
21
|
+
val = @response['hits'][fs]
|
22
|
+
else
|
23
|
+
val = @response[fs]
|
24
|
+
end
|
25
|
+
self.instance_variable_set "@#{f}", val unless val.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
@hits = @response['hits']['hit'] if @response.has_key? 'hits'
|
29
|
+
end
|
30
|
+
|
31
|
+
def result_size
|
32
|
+
@hits ? @hits.size : 0
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# These tests requires that your domain index contains the following fields:
|
4
|
+
# - name: text
|
5
|
+
# - type: text
|
6
|
+
# - summary: text
|
7
|
+
# - num_links: uint
|
8
|
+
describe AWSCloudSearch::CloudSearch do
|
9
|
+
|
10
|
+
let(:ds) { AWSCloudSearch::CloudSearch.new(ENV['CLOUDSEARCH_DOMAIN']) }
|
11
|
+
|
12
|
+
it "should send document batch" do
|
13
|
+
batch = AWSCloudSearch::DocumentBatch.new
|
14
|
+
|
15
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
16
|
+
doc1.id = Array.new( 8 ) { rand(256) }.pack('C*').unpack('H*').first
|
17
|
+
doc1.lang = 'en'
|
18
|
+
doc1.add_field('name', 'Jane Williams')
|
19
|
+
doc1.add_field('type', 'person')
|
20
|
+
|
21
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
22
|
+
doc2.id = Array.new( 8 ) { rand(256) }.pack('C*').unpack('H*').first
|
23
|
+
doc2.lang = 'en'
|
24
|
+
doc2.add_field :name, 'Bob Dobalina'
|
25
|
+
doc2.add_field :type, 'person'
|
26
|
+
|
27
|
+
batch.add_document doc1
|
28
|
+
batch.add_document doc2
|
29
|
+
ds.documents_batch(batch)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should delete a document" do
|
33
|
+
id = 'joeblotzdelete_test'
|
34
|
+
batch1 = AWSCloudSearch::DocumentBatch.new
|
35
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
36
|
+
doc1.id = id
|
37
|
+
doc1.lang = 'en'
|
38
|
+
doc1.add_field('name', 'Joe Blotz Delete Test')
|
39
|
+
doc1.add_field('type', 'person')
|
40
|
+
batch1.add_document doc1
|
41
|
+
ds.documents_batch(batch1)
|
42
|
+
|
43
|
+
batch2 = AWSCloudSearch::DocumentBatch.new
|
44
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
45
|
+
doc2.id = id
|
46
|
+
batch2.delete_document doc2
|
47
|
+
ds.documents_batch(batch2)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should raise ArgumentError for invalid XML 1.0 chars" do
|
51
|
+
batch = AWSCloudSearch::DocumentBatch.new
|
52
|
+
|
53
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
54
|
+
id = Time.now.to_i.to_s
|
55
|
+
doc1.id = id
|
56
|
+
doc1.lang = 'en'
|
57
|
+
doc1.add_field('name', "Jane Williams")
|
58
|
+
doc1.add_field('type', 'person')
|
59
|
+
|
60
|
+
# \\uD800 is not a valid UTF-8 and it this line of code may cause your debugger to break
|
61
|
+
expect {doc1.add_field("summary", "This is a REALLY bad char, not even UTF-8 acceptable: \uD800")}.to raise_error(ArgumentError)
|
62
|
+
|
63
|
+
#expect { batch.add_document doc1 }.to raise_error(ArgumentError)
|
64
|
+
|
65
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
66
|
+
id = Time.now.to_i.to_s
|
67
|
+
doc2.id = id
|
68
|
+
doc2.lang = 'en'
|
69
|
+
doc2.add_field('name', "Brian Williams")
|
70
|
+
doc2.add_field('type', 'person')
|
71
|
+
expect {doc2.add_field("summary", "This is a bad char for XML 1.0: \v")}.to raise_error(ArgumentError)
|
72
|
+
|
73
|
+
doc2.instance_variable_get("@fields")['how_did_i_get_here'] = "This is a bad char for XML 1.0: \ufffe"
|
74
|
+
expect { batch.add_document doc2 }.to raise_error(ArgumentError)
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
it "should return a DocumentBatcher instance for new_batcher" do
|
81
|
+
ds.new_batcher.should be_an(AWSCloudSearch::DocumentBatcher)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should search" do
|
85
|
+
sr = AWSCloudSearch::SearchRequest.new
|
86
|
+
sr.bq = "(and name:'Jane')"
|
87
|
+
sr.return_fields = %w(logo_url name type)
|
88
|
+
sr.size = 10
|
89
|
+
sr.start = 0
|
90
|
+
sr.results_type = 'json'
|
91
|
+
|
92
|
+
res = ds.search(sr)
|
93
|
+
|
94
|
+
res.should be_an(AWSCloudSearch::SearchResponse)
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::DocumentBatch do
|
4
|
+
|
5
|
+
let(:batch) { AWSCloudSearch::DocumentBatch.new }
|
6
|
+
|
7
|
+
let(:sample_add_doc) do
|
8
|
+
AWSCloudSearch::Document.new(true).tap do |d|
|
9
|
+
d.id = '73e'
|
10
|
+
d.lang = 'en'
|
11
|
+
d.add_field('name', 'Jane Williams')
|
12
|
+
d.add_field('type', 'person')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:sample_delete_doc) do
|
17
|
+
AWSCloudSearch::Document.new(true).tap do |d|
|
18
|
+
d.type = 'delete' # we have to set this here so that delete doc bytesize calculations are correct
|
19
|
+
d.id = '47p'
|
20
|
+
d.lang = nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should should not instantiate" do
|
25
|
+
expect { AWSCloudSearch::DocumentBatch.new(100, 100) }.to raise_error(ArgumentError)
|
26
|
+
expect { AWSCloudSearch::DocumentBatch.new(101, 100) }.to raise_error(ArgumentError)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should instantiate" do
|
30
|
+
expect { AWSCloudSearch::DocumentBatch.new }.to_not raise_error
|
31
|
+
expect { AWSCloudSearch::DocumentBatch.new(100, 101) }.to_not raise_error
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should raise error when passed an invalid object type" do
|
35
|
+
expect { batch.add_document("Hello") }.to raise_error(ArgumentError)
|
36
|
+
expect { batch.delete_document("Hello") }.to raise_error(ArgumentError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should not raise error when passed a Document" do
|
40
|
+
expect { batch.add_document(sample_add_doc) }.to_not raise_error(ArgumentError)
|
41
|
+
expect { batch.delete_document(sample_delete_doc) }.to_not raise_error(ArgumentError)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return the correct size" do
|
45
|
+
batch.add_document sample_add_doc
|
46
|
+
batch.size.should eq(1)
|
47
|
+
|
48
|
+
batch.delete_document sample_delete_doc
|
49
|
+
batch.size.should eq(2)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should raise error when the max batch size is exceeded" do
|
53
|
+
small_batch = AWSCloudSearch::DocumentBatch.new(1, 10)
|
54
|
+
expect { small_batch.add_document(sample_add_doc) }.to raise_error
|
55
|
+
expect { small_batch.delete_document(sample_delete_doc) }.to raise_error
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should be full" do
|
59
|
+
bytesize = sample_add_doc.to_json.bytesize
|
60
|
+
|
61
|
+
b1 = AWSCloudSearch::DocumentBatch.new(bytesize)
|
62
|
+
b1.add_document sample_add_doc
|
63
|
+
b1.full?.should be_true
|
64
|
+
|
65
|
+
b2 = AWSCloudSearch::DocumentBatch.new(bytesize-1)
|
66
|
+
b2.add_document sample_add_doc
|
67
|
+
b2.full?.should be_true
|
68
|
+
|
69
|
+
bytesize = sample_delete_doc.to_json.bytesize
|
70
|
+
|
71
|
+
b3 = AWSCloudSearch::DocumentBatch.new(bytesize)
|
72
|
+
b3.delete_document sample_delete_doc
|
73
|
+
b3.full?.should be_true
|
74
|
+
|
75
|
+
b4 = AWSCloudSearch::DocumentBatch.new(bytesize-1)
|
76
|
+
b4.delete_document sample_delete_doc
|
77
|
+
b4.full?.should be_true
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should return the total bytesize of all docs" do
|
81
|
+
batch.bytesize.should eq(0)
|
82
|
+
|
83
|
+
batch.add_document sample_add_doc
|
84
|
+
bytesize = sample_add_doc.to_json.bytesize
|
85
|
+
batch.bytesize.should eq(bytesize)
|
86
|
+
|
87
|
+
batch.delete_document sample_delete_doc
|
88
|
+
bytesize += sample_delete_doc.to_json.bytesize
|
89
|
+
batch.bytesize.should eq(bytesize)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should not be full" do
|
93
|
+
batch.add_document sample_add_doc
|
94
|
+
batch.full?.should_not be_true
|
95
|
+
|
96
|
+
batch.delete_document sample_add_doc
|
97
|
+
batch.full?.should_not be_true
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should clear" do
|
101
|
+
clear_batch = AWSCloudSearch::DocumentBatch.new
|
102
|
+
clear_batch.add_document sample_add_doc
|
103
|
+
clear_batch.delete_document sample_delete_doc
|
104
|
+
|
105
|
+
clear_batch.bytesize.should be > 0
|
106
|
+
clear_batch.size.should be > 0
|
107
|
+
|
108
|
+
clear_batch.clear
|
109
|
+
|
110
|
+
clear_batch.bytesize.should eq(0)
|
111
|
+
clear_batch.size.should eq(0)
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::Document do
|
4
|
+
let(:doc) { AWSCloudSearch::Document.new }
|
5
|
+
|
6
|
+
context "#id=" do
|
7
|
+
it "should accept a String-able value (Integer)" do
|
8
|
+
expect { doc.id = 123456789 }.to_not raise_error
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should accept a compliant String" do
|
12
|
+
expect { doc.id = "abcdef" }.to_not raise_error
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should not accept a non-compliant String" do
|
16
|
+
expect { doc.id = 'AZ12' }.to raise_error(ArgumentError)
|
17
|
+
expect { doc.id = '!@#$%^&*()AZ' }.to raise_error(ArgumentError)
|
18
|
+
expect { doc.id = '_abc123' }.to raise_error(ArgumentError)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should not accept nil" do
|
22
|
+
expect { doc.id = nil }.to raise_error(ArgumentError)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "#type_attr_accessor attributes" do
|
27
|
+
it "should accept values of proper type" do
|
28
|
+
expect { doc.lang = 'abcd' }.to_not raise_error
|
29
|
+
expect { doc.version = 1234 }.to_not raise_error
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should not accept values of incorrect type" do
|
33
|
+
expect { doc.lang = 1234 }.to raise_error(ArgumentError)
|
34
|
+
expect { doc.version "abcd" }.to raise_error(ArgumentError)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::SearchResponse do
|
4
|
+
before(:each) do
|
5
|
+
@res1 = {
|
6
|
+
'hits' => {
|
7
|
+
'found' => 1,
|
8
|
+
'start' => 0,
|
9
|
+
'hit' => [
|
10
|
+
{ 'id' => '2e'}
|
11
|
+
]
|
12
|
+
},
|
13
|
+
'info' => {
|
14
|
+
'cpu-time-ms' => 0,
|
15
|
+
'time-ms' => 2,
|
16
|
+
'rid' => '6ddcaa561c05c4cc221cb551e21a9631b979b9aa5297fab17731a8b9f863b20423151ddcd9b246caee73334112c96801'
|
17
|
+
}
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should initialize from hash" do
|
22
|
+
sr = AWSCloudSearch::SearchResponse.new(@res1)
|
23
|
+
|
24
|
+
sr.found.should eq(1)
|
25
|
+
sr.start.should eq(0)
|
26
|
+
sr.cpu_time_ms.should eq(0)
|
27
|
+
sr.time_ms.should eq(2)
|
28
|
+
sr.rid.should eq('6ddcaa561c05c4cc221cb551e21a9631b979b9aa5297fab17731a8b9f863b20423151ddcd9b246caee73334112c96801')
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: aws_cloud_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- David Jensen
|
9
|
+
- Mike Javorski
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2012-04-18 00:00:00.000000000Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: faraday_middleware
|
17
|
+
requirement: &9414220 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.8.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *9414220
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &9413280 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.6.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *9413280
|
37
|
+
description: AWSCloudSearch Search gem
|
38
|
+
email:
|
39
|
+
- david.jensen@spoke.com
|
40
|
+
- mike.javorski@spoke.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .rspec
|
47
|
+
- Gemfile
|
48
|
+
- LICENSE
|
49
|
+
- README.md
|
50
|
+
- Rakefile
|
51
|
+
- aws_cloud_search.gemspec
|
52
|
+
- lib/aws_cloud_search.rb
|
53
|
+
- lib/aws_cloud_search/cloud_search.rb
|
54
|
+
- lib/aws_cloud_search/cloud_search_config.rb
|
55
|
+
- lib/aws_cloud_search/document.rb
|
56
|
+
- lib/aws_cloud_search/document_batch.rb
|
57
|
+
- lib/aws_cloud_search/document_batcher.rb
|
58
|
+
- lib/aws_cloud_search/exceptions.rb
|
59
|
+
- lib/aws_cloud_search/search_request.rb
|
60
|
+
- lib/aws_cloud_search/search_response.rb
|
61
|
+
- lib/aws_cloud_search/version.rb
|
62
|
+
- spec/aws_cloud_search/cloud_search_spec.rb
|
63
|
+
- spec/aws_cloud_search/document_batch_spec.rb
|
64
|
+
- spec/aws_cloud_search/document_spec.rb
|
65
|
+
- spec/aws_cloud_search/search_response_spec.rb
|
66
|
+
- spec/spec_helper.rb
|
67
|
+
homepage: ''
|
68
|
+
licenses: []
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.8.17
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: Implementation of the AWS CloudSearch API
|
91
|
+
test_files:
|
92
|
+
- spec/aws_cloud_search/cloud_search_spec.rb
|
93
|
+
- spec/aws_cloud_search/document_batch_spec.rb
|
94
|
+
- spec/aws_cloud_search/document_spec.rb
|
95
|
+
- spec/aws_cloud_search/search_response_spec.rb
|
96
|
+
- spec/spec_helper.rb
|