aws_cloud_search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE +23 -0
- data/README.md +45 -0
- data/Rakefile +5 -0
- data/aws_cloud_search.gemspec +21 -0
- data/lib/aws_cloud_search.rb +79 -0
- data/lib/aws_cloud_search/cloud_search.rb +71 -0
- data/lib/aws_cloud_search/cloud_search_config.rb +5 -0
- data/lib/aws_cloud_search/document.rb +81 -0
- data/lib/aws_cloud_search/document_batch.rb +81 -0
- data/lib/aws_cloud_search/document_batcher.rb +35 -0
- data/lib/aws_cloud_search/exceptions.rb +17 -0
- data/lib/aws_cloud_search/search_request.rb +19 -0
- data/lib/aws_cloud_search/search_response.rb +37 -0
- data/lib/aws_cloud_search/version.rb +3 -0
- data/spec/aws_cloud_search/cloud_search_spec.rb +97 -0
- data/spec/aws_cloud_search/document_batch_spec.rb +114 -0
- data/spec/aws_cloud_search/document_spec.rb +38 -0
- data/spec/aws_cloud_search/search_response_spec.rb +31 -0
- data/spec/spec_helper.rb +7 -0
- metadata +96 -0
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--drb
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
LICENSE
|
2
|
+
|
3
|
+
The MIT License
|
4
|
+
|
5
|
+
Copyright (c) 2012 Spoke Software, Inc.
|
6
|
+
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
9
|
+
in the Software without restriction, including without limitation the rights
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
12
|
+
furnished to do so, subject to the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
15
|
+
all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# AWSCloudSearch
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
This gem is an implementation of the Amazon Web Service CloudSearch API (http://aws.amazon.com/cloudsearch/).
|
6
|
+
|
7
|
+
The AWS CloudSearch service is comprised of three API end points: search, document batching, and configuration. This gem
|
8
|
+
currently supports only the search and document batching APIs
|
9
|
+
|
10
|
+
## Roadmap
|
11
|
+
|
12
|
+
Spoke developed this library in a short period of time in order to migrate from IndexTank to AWS CloudSearch.
|
13
|
+
As such, there are a few features that are missing that we would like to build over time.
|
14
|
+
|
15
|
+
+ Implementation of the configuration API
|
16
|
+
+ Query builder
|
17
|
+
+ Faceting helpers
|
18
|
+
+ Spec tests that stub the AWS CloudSearch service
|
19
|
+
+ Sample usage in this README
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
Add this line to your application's Gemfile:
|
24
|
+
|
25
|
+
gem 'aws_cloud_search'
|
26
|
+
|
27
|
+
And then execute:
|
28
|
+
|
29
|
+
$ bundle
|
30
|
+
|
31
|
+
Or install it yourself as:
|
32
|
+
|
33
|
+
$ gem install aws_cloud_search
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
|
37
|
+
TODO: Write usage instructions here
|
38
|
+
|
39
|
+
## Contributing
|
40
|
+
|
41
|
+
1. Fork it
|
42
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
43
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
44
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
45
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/aws_cloud_search/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["David Jensen", "Mike Javorski"]
|
6
|
+
gem.email = ["david.jensen@spoke.com", "mike.javorski@spoke.com"]
|
7
|
+
gem.description = %q{AWSCloudSearch Search gem}
|
8
|
+
gem.summary = %q{Implementation of the AWS CloudSearch API}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "aws_cloud_search"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = AWSCloudSearch::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency 'faraday_middleware', '>= 0.8.0'
|
19
|
+
|
20
|
+
gem.add_development_dependency 'rspec', '>= 2.6.0'
|
21
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "aws_cloud_search/cloud_search"
|
2
|
+
require "aws_cloud_search/cloud_search_config"
|
3
|
+
require "aws_cloud_search/document"
|
4
|
+
require "aws_cloud_search/document_batch"
|
5
|
+
require "aws_cloud_search/document_batcher"
|
6
|
+
require "aws_cloud_search/exceptions"
|
7
|
+
require "aws_cloud_search/search_response"
|
8
|
+
require "aws_cloud_search/search_request"
|
9
|
+
require "aws_cloud_search/version"
|
10
|
+
|
11
|
+
require "faraday_middleware"
|
12
|
+
|
13
|
+
module AWSCloudSearch
|
14
|
+
API_VERSION = "2011-02-01"
|
15
|
+
|
16
|
+
# AWS CloudSearch only allows XML 1.0 valid characters
|
17
|
+
INVALID_CHAR_XML10 = /[^\u0009\u000a\u000d\u0020-\uD7FF\uE000-\uFFFD]/m
|
18
|
+
# for future reference in case AWS-CS updates to XML 1.1 char compliance
|
19
|
+
#INVALID_CHAR_XML11 = /[^\u0001-\uD7FF\uE000-\uFFFD]/m
|
20
|
+
|
21
|
+
|
22
|
+
def self.search_url(domain, region="us-east-1")
|
23
|
+
"http://search-#{domain}.#{region}.cloudsearch.amazonaws.com"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.document_url(domain, region="us-east-1")
|
27
|
+
"http://doc-#{domain}.#{region}.cloudsearch.amazonaws.com"
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.configuration_url
|
31
|
+
"https://cloudsearch.us-east-1.amazonaws.com"
|
32
|
+
end
|
33
|
+
|
34
|
+
# Initialize the module
|
35
|
+
# @param [String] url
|
36
|
+
# @param [String] aws_access_key_id
|
37
|
+
# @param [String] aws_secret_access_key
|
38
|
+
def self.create_connection(url, aws_access_key_id=nil, aws_secret_access_key=nil)
|
39
|
+
connection = Faraday.new url do |builder|
|
40
|
+
builder.use AWSCloudSearch::HttpCodeResponseMiddleware
|
41
|
+
builder.use FaradayMiddleware::EncodeJson
|
42
|
+
builder.use FaradayMiddleware::ParseJson
|
43
|
+
builder.adapter Faraday.default_adapter
|
44
|
+
|
45
|
+
# for future reference
|
46
|
+
#conn.request :json, :content_type => /\bjson$/
|
47
|
+
#conn.response :json, :content_type => /\bjson$/
|
48
|
+
#conn.adapter Faraday.default_adapter
|
49
|
+
end
|
50
|
+
connection.headers['User-Agent'] = "AWSCloudSearch-Ruby-Client/#{VERSION}"
|
51
|
+
connection
|
52
|
+
end
|
53
|
+
|
54
|
+
class HttpCodeResponseMiddleware < Faraday::Response::Middleware
|
55
|
+
def on_complete(env)
|
56
|
+
case env[:status]
|
57
|
+
when 200..299
|
58
|
+
nil
|
59
|
+
when 408
|
60
|
+
raise RequestTimeout, env[:body]
|
61
|
+
when 400..499
|
62
|
+
raise HttpClientError, env[:body]
|
63
|
+
when 509
|
64
|
+
raise BandwidthLimitExceeded, env[:body]
|
65
|
+
when 500..599
|
66
|
+
raise HttpServerError, env[:body]
|
67
|
+
else
|
68
|
+
raise UnexpectedHTTPException, env[:body]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def initialize(app)
|
73
|
+
super
|
74
|
+
@parser = nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
class CloudSearch
|
6
|
+
|
7
|
+
def initialize(domain, region="us-east-1")
|
8
|
+
@doc_conn = AWSCloudSearch::create_connection( AWSCloudSearch::document_url(domain, region) )
|
9
|
+
@search_conn = AWSCloudSearch::create_connection( AWSCloudSearch::search_url(domain, region) )
|
10
|
+
end
|
11
|
+
|
12
|
+
# Sends a batch of document updates and deletes by invoking the CloudSearch documents/batch API
|
13
|
+
# @param [DocumentBatch] doc_batch The batch of document adds and deletes to send
|
14
|
+
# @return
|
15
|
+
def documents_batch(doc_batch)
|
16
|
+
raise ArgumentError.new("Invalid argument. Expected DocumentBatch, got #{doc_batch.class}.") unless doc_batch.is_a? DocumentBatch
|
17
|
+
|
18
|
+
resp = @doc_conn.post do |req|
|
19
|
+
req.url "/#{AWSCloudSearch::API_VERSION}/documents/batch"
|
20
|
+
req.headers['Content-Type'] = 'application/json'
|
21
|
+
req.body = doc_batch.to_json
|
22
|
+
end
|
23
|
+
raise(Exception, "AwsCloudSearchCloud::DocumentService batch returned #{resp.body[:errors].size} errors: #{resp.body[:errors].join(';')}") if resp.body[:status] == 'error'
|
24
|
+
resp.body
|
25
|
+
end
|
26
|
+
|
27
|
+
# Performs a search
|
28
|
+
# @param [SearchRequest] search_req
|
29
|
+
# @return
|
30
|
+
def search(search_req)
|
31
|
+
raise ArgumentError.new("Invalid Type: search_request must be of type SearchRequest") unless search_req.is_a? SearchRequest
|
32
|
+
|
33
|
+
resp = @search_conn.get do |req|
|
34
|
+
req.url "/#{AWSCloudSearch::API_VERSION}/search", search_req.to_hash
|
35
|
+
end
|
36
|
+
|
37
|
+
search_response = SearchResponse.new(resp.body)
|
38
|
+
if search_response.error
|
39
|
+
raise StandardError.new("Unknown error") if resp.messages.blank?
|
40
|
+
code = resp.messages.first['code']
|
41
|
+
message = resp.messages.first['message']
|
42
|
+
msg = "#{code}: #{message}"
|
43
|
+
case code
|
44
|
+
when /WildcardTermLimit/
|
45
|
+
raise WildcardTermLimit.new(msg)
|
46
|
+
when /InvalidFieldOrRankAliasInRankParameter/
|
47
|
+
raise InvalidFieldOrRankAliasInRankParameter, msg
|
48
|
+
when /UnknownFieldInMatchExpression/
|
49
|
+
raise UnknownFieldInMatchExpression, msg
|
50
|
+
when /IncorrectFieldTypeInMatchExpression/
|
51
|
+
raise IncorrectFieldTypeInMatchExpression, msg
|
52
|
+
when /InvalidMatchExpression/
|
53
|
+
raise InvalidMatchExpression, msg
|
54
|
+
when /UndefinedField/
|
55
|
+
raise UndefinedField, msg
|
56
|
+
else
|
57
|
+
raise AwsCloudSearchError, "Unknown error. #{msg}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
search_response
|
62
|
+
end
|
63
|
+
|
64
|
+
# Build a DocumentBatcher linked to this CloudSearch domain
|
65
|
+
# @return [DocumentBatcher]
|
66
|
+
def new_batcher
|
67
|
+
DocumentBatcher.new(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "json"
|
2
|
+
|
3
|
+
module AWSCloudSearch
|
4
|
+
class Document
|
5
|
+
|
6
|
+
# A typed attribute accessor helper. When the value is set, if it does not match
|
7
|
+
# the pre-defined type, an exception is thrown.
|
8
|
+
# @param [String] name Name of the attribute
|
9
|
+
# @param [Class] type The class type of the attribute
|
10
|
+
def self.type_attr_accessor(name, type)
|
11
|
+
define_method(name) do
|
12
|
+
instance_variable_get("@#{name}")
|
13
|
+
end
|
14
|
+
|
15
|
+
define_method("#{name}=") do |value|
|
16
|
+
if value.is_a? type or value == nil
|
17
|
+
instance_variable_set("@#{name}", value)
|
18
|
+
else
|
19
|
+
raise ArgumentError.new("Invalid Type")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
type_attr_accessor :version, Integer
|
25
|
+
type_attr_accessor :lang, String
|
26
|
+
attr_accessor :type
|
27
|
+
attr_reader :fields, :id
|
28
|
+
|
29
|
+
# Initializes the object
|
30
|
+
# @param [boolean] auto_version Set to true to automatically set the version, default is false
|
31
|
+
def initialize(auto_version=false)
|
32
|
+
@fields = {}
|
33
|
+
new_version if auto_version
|
34
|
+
end
|
35
|
+
|
36
|
+
# Adds a new field to the document
|
37
|
+
# @param [String] name Name of the document field
|
38
|
+
# @param [String, Integer] value Value of the document field
|
39
|
+
def add_field(name, value)
|
40
|
+
raise ArgumentError.new("Found invalid XML 1.0 unicode character(s)") if value.is_a? String and value =~ INVALID_CHAR_XML10
|
41
|
+
@fields[name] = value
|
42
|
+
end
|
43
|
+
|
44
|
+
# The id field must conform to a special format
|
45
|
+
def id=(id)
|
46
|
+
raise ArgumentError.new("Invalid ID: Document id must be a String or respond to #to_s") if (id.nil? || !id.respond_to?(:to_s))
|
47
|
+
@id = id.to_s
|
48
|
+
raise ArgumentError.new("Invalid ID: Document id must match the regex [a-z0-9][a-z0-9_]*$") unless @id =~ /^[a-z0-9][a-z0-9_]*$/
|
49
|
+
end
|
50
|
+
|
51
|
+
# Resets the fields.
|
52
|
+
def clear_fields
|
53
|
+
@fields = {}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Set a new version automatically
|
57
|
+
def new_version
|
58
|
+
@version = Time.now.to_i
|
59
|
+
end
|
60
|
+
|
61
|
+
# Return this object as a hash
|
62
|
+
def to_hash
|
63
|
+
@fields.delete_if {|key,val| val.nil?}
|
64
|
+
h = {
|
65
|
+
:type => @type,
|
66
|
+
:id => @id,
|
67
|
+
:version => @version,
|
68
|
+
:fields => @fields
|
69
|
+
}
|
70
|
+
h[:lang] = @lang unless (@type == 'delete')
|
71
|
+
|
72
|
+
h
|
73
|
+
end
|
74
|
+
|
75
|
+
#Return this object as json
|
76
|
+
def to_json
|
77
|
+
to_hash.to_json
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
class DocumentBatch
|
6
|
+
|
7
|
+
attr_reader :bytesize
|
8
|
+
|
9
|
+
# Constructor
|
10
|
+
# @param [Integer] pref_bytesize The preferred size of the batch in bytes. May be exceeded, if so batch is considered full.
|
11
|
+
# @param [Integer] max_bytesize The batch size in bytes must not exceed this number. Must be greater than pref_bytesize.
|
12
|
+
# @raise [ArgumentError] If pref_bytesize is not less than max_bytesize
|
13
|
+
def initialize(pref_bytesize=1048576, max_bytesize= 5242880)
|
14
|
+
raise ArgumentError.new("pref_bytesize must be less than max_bytesize") if pref_bytesize >= max_bytesize
|
15
|
+
|
16
|
+
@pref_bytesize = pref_bytesize
|
17
|
+
@max_bytesize = max_bytesize
|
18
|
+
@batch_add = []
|
19
|
+
@batch_delete = []
|
20
|
+
@bytesize = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds a document with the add operation to the batch.
|
24
|
+
# @param [Document] doc
|
25
|
+
# @raise [ArgumentError] If parameter is not an AWSCloudSearch::Document
|
26
|
+
def add_document(doc)
|
27
|
+
raise ArgumentError.new("Invalid Type") unless doc.is_a? Document
|
28
|
+
|
29
|
+
doc.type = 'add'
|
30
|
+
json = doc.to_json
|
31
|
+
doc_bytesize = json.bytesize
|
32
|
+
|
33
|
+
raise Exception.new("Max batch size exceeded, document add was not added to batch.") if (doc_bytesize + @bytesize) > @max_bytesize
|
34
|
+
raise ArgumentError.new("Found invalid XML 1.0 unicode characters.") if json =~ INVALID_CHAR_XML10
|
35
|
+
|
36
|
+
@bytesize += doc_bytesize
|
37
|
+
@batch_add << doc
|
38
|
+
end
|
39
|
+
|
40
|
+
# Adds a delete document operation to the batch. Removes lang and fields from the object as they are not
|
41
|
+
# required for delete operations.
|
42
|
+
# @param [Document] doc The document to delete
|
43
|
+
# @raise [ArgumentError] If parameter is not an AWSCloudSearch::Document
|
44
|
+
# TODO: refactor to only use the required fields, hide the document construction from the user
|
45
|
+
def delete_document(doc)
|
46
|
+
raise ArgumentError.new("Invalid Type") unless doc.is_a? Document
|
47
|
+
|
48
|
+
doc.type = 'delete'
|
49
|
+
doc.lang = nil
|
50
|
+
doc.clear_fields
|
51
|
+
doc_bytesize = doc.to_json.bytesize
|
52
|
+
|
53
|
+
raise Exception.new("Max batch size exceeded, document delete was not added to batch.") if (doc_bytesize + @bytesize) > @max_bytesize
|
54
|
+
|
55
|
+
@bytesize += doc_bytesize
|
56
|
+
@batch_delete << doc
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Integer] Number of items in the batch
|
60
|
+
def size
|
61
|
+
@batch_add.size + @batch_delete.size
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Boolean] True if the bytesize of the batch exceeds the preferred bytesize
|
65
|
+
def full?
|
66
|
+
@bytesize >= @pref_bytesize
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [String] The JSON string representation of the DocumentBatch
|
70
|
+
def to_json
|
71
|
+
(@batch_add + @batch_delete).map {|item| item.to_hash}.to_json
|
72
|
+
end
|
73
|
+
|
74
|
+
def clear
|
75
|
+
@batch_add.clear
|
76
|
+
@batch_delete.clear
|
77
|
+
@bytesize = 0
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "json"
|
2
|
+
require "aws_cloud_search"
|
3
|
+
|
4
|
+
module AWSCloudSearch
|
5
|
+
# Convenience method that will allow continuous batch additions and will chunk to a size threshold
|
6
|
+
# and send requests for each chunk.
|
7
|
+
class DocumentBatcher
|
8
|
+
|
9
|
+
def initialize(cs)
|
10
|
+
@cs = cs
|
11
|
+
@batch = DocumentBatch.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_document(doc)
|
15
|
+
flush if @batch.full?
|
16
|
+
|
17
|
+
@batch.add_document doc
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete_document(doc)
|
21
|
+
flush if @batch.full?
|
22
|
+
|
23
|
+
@batch.delete_document doc
|
24
|
+
end
|
25
|
+
|
26
|
+
# Sends the batch of adds and deletes to CloudSearch Search and then clears the current batch.
|
27
|
+
# TODO: (dj) implement connection retry logic
|
28
|
+
def flush
|
29
|
+
@cs.documents_batch @batch
|
30
|
+
@batch.clear
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
# CloudSearch API errors
|
3
|
+
class AwsCloudSearchError < StandardError; end
|
4
|
+
class WildcardTermLimit < AwsCloudSearchError; end
|
5
|
+
class InvalidFieldOrRankAliasInRankParameter < AwsCloudSearchError; end
|
6
|
+
class UnknownFieldInMatchExpression < AwsCloudSearchError; end
|
7
|
+
class IncorrectFieldTypeInMatchExpression < AwsCloudSearchError; end
|
8
|
+
class InvalidMatchExpression < AwsCloudSearchError; end
|
9
|
+
class UndefinedField < AwsCloudSearchError; end
|
10
|
+
|
11
|
+
# HTTP errors
|
12
|
+
class UnexpectedHTTPException < StandardError; end
|
13
|
+
class HttpClientError < StandardError; end
|
14
|
+
class HttpServerError < StandardError; end
|
15
|
+
class RequestTimeout < HttpClientError; end
|
16
|
+
class BandwidthLimitExceeded < HttpServerError; end
|
17
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
class SearchRequest
|
3
|
+
|
4
|
+
attr_accessor :q, :bq, :rank, :results_type, :return_fields, :size, :start
|
5
|
+
|
6
|
+
def to_hash
|
7
|
+
hash = {}
|
8
|
+
hash['q'] = @q unless @q.nil?
|
9
|
+
hash['bq'] = @bq unless @bq.nil?
|
10
|
+
hash['rank'] = @rank unless @rank.nil?
|
11
|
+
hash['size'] = @size unless @size.nil?
|
12
|
+
hash['start'] = @start unless @start.nil?
|
13
|
+
hash['results-type'] = @results_type unless @results_type.nil?
|
14
|
+
hash['return-fields'] = @return_fields.join(',') unless @return_fields.nil?
|
15
|
+
hash
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module AWSCloudSearch
|
2
|
+
class SearchResponse
|
3
|
+
attr_reader :response
|
4
|
+
attr_reader :hits
|
5
|
+
|
6
|
+
alias :results :hits
|
7
|
+
|
8
|
+
# error is an undocumented field that occurs when an error is returned
|
9
|
+
FIELDS = [ :match_expr, :rank, :cpu_time_ms, :time_ms, :rid, :found, :start, :error, :messages ].freeze
|
10
|
+
FIELDS.each { |f| attr_accessor f }
|
11
|
+
|
12
|
+
# Takes in the hash, representing the json object returned from a search request
|
13
|
+
def initialize(response)
|
14
|
+
@response = response
|
15
|
+
|
16
|
+
FIELDS.each do |f|
|
17
|
+
fs = f.to_s.gsub('_' , '-')
|
18
|
+
if @response.has_key? 'info' and @response['info'][fs]
|
19
|
+
val = @response['info'][fs]
|
20
|
+
elsif @response.has_key? 'hits' and @response['hits'][fs]
|
21
|
+
val = @response['hits'][fs]
|
22
|
+
else
|
23
|
+
val = @response[fs]
|
24
|
+
end
|
25
|
+
self.instance_variable_set "@#{f}", val unless val.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
@hits = @response['hits']['hit'] if @response.has_key? 'hits'
|
29
|
+
end
|
30
|
+
|
31
|
+
def result_size
|
32
|
+
@hits ? @hits.size : 0
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# These tests requires that your domain index contains the following fields:
|
4
|
+
# - name: text
|
5
|
+
# - type: text
|
6
|
+
# - summary: text
|
7
|
+
# - num_links: uint
|
8
|
+
describe AWSCloudSearch::CloudSearch do
|
9
|
+
|
10
|
+
let(:ds) { AWSCloudSearch::CloudSearch.new(ENV['CLOUDSEARCH_DOMAIN']) }
|
11
|
+
|
12
|
+
it "should send document batch" do
|
13
|
+
batch = AWSCloudSearch::DocumentBatch.new
|
14
|
+
|
15
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
16
|
+
doc1.id = Array.new( 8 ) { rand(256) }.pack('C*').unpack('H*').first
|
17
|
+
doc1.lang = 'en'
|
18
|
+
doc1.add_field('name', 'Jane Williams')
|
19
|
+
doc1.add_field('type', 'person')
|
20
|
+
|
21
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
22
|
+
doc2.id = Array.new( 8 ) { rand(256) }.pack('C*').unpack('H*').first
|
23
|
+
doc2.lang = 'en'
|
24
|
+
doc2.add_field :name, 'Bob Dobalina'
|
25
|
+
doc2.add_field :type, 'person'
|
26
|
+
|
27
|
+
batch.add_document doc1
|
28
|
+
batch.add_document doc2
|
29
|
+
ds.documents_batch(batch)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should delete a document" do
|
33
|
+
id = 'joeblotzdelete_test'
|
34
|
+
batch1 = AWSCloudSearch::DocumentBatch.new
|
35
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
36
|
+
doc1.id = id
|
37
|
+
doc1.lang = 'en'
|
38
|
+
doc1.add_field('name', 'Joe Blotz Delete Test')
|
39
|
+
doc1.add_field('type', 'person')
|
40
|
+
batch1.add_document doc1
|
41
|
+
ds.documents_batch(batch1)
|
42
|
+
|
43
|
+
batch2 = AWSCloudSearch::DocumentBatch.new
|
44
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
45
|
+
doc2.id = id
|
46
|
+
batch2.delete_document doc2
|
47
|
+
ds.documents_batch(batch2)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should raise ArgumentError for invalid XML 1.0 chars" do
|
51
|
+
batch = AWSCloudSearch::DocumentBatch.new
|
52
|
+
|
53
|
+
doc1 = AWSCloudSearch::Document.new(true)
|
54
|
+
id = Time.now.to_i.to_s
|
55
|
+
doc1.id = id
|
56
|
+
doc1.lang = 'en'
|
57
|
+
doc1.add_field('name', "Jane Williams")
|
58
|
+
doc1.add_field('type', 'person')
|
59
|
+
|
60
|
+
# \\uD800 is not a valid UTF-8 and it this line of code may cause your debugger to break
|
61
|
+
expect {doc1.add_field("summary", "This is a REALLY bad char, not even UTF-8 acceptable: \uD800")}.to raise_error(ArgumentError)
|
62
|
+
|
63
|
+
#expect { batch.add_document doc1 }.to raise_error(ArgumentError)
|
64
|
+
|
65
|
+
doc2 = AWSCloudSearch::Document.new(true)
|
66
|
+
id = Time.now.to_i.to_s
|
67
|
+
doc2.id = id
|
68
|
+
doc2.lang = 'en'
|
69
|
+
doc2.add_field('name', "Brian Williams")
|
70
|
+
doc2.add_field('type', 'person')
|
71
|
+
expect {doc2.add_field("summary", "This is a bad char for XML 1.0: \v")}.to raise_error(ArgumentError)
|
72
|
+
|
73
|
+
doc2.instance_variable_get("@fields")['how_did_i_get_here'] = "This is a bad char for XML 1.0: \ufffe"
|
74
|
+
expect { batch.add_document doc2 }.to raise_error(ArgumentError)
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
it "should return a DocumentBatcher instance for new_batcher" do
|
81
|
+
ds.new_batcher.should be_an(AWSCloudSearch::DocumentBatcher)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should search" do
|
85
|
+
sr = AWSCloudSearch::SearchRequest.new
|
86
|
+
sr.bq = "(and name:'Jane')"
|
87
|
+
sr.return_fields = %w(logo_url name type)
|
88
|
+
sr.size = 10
|
89
|
+
sr.start = 0
|
90
|
+
sr.results_type = 'json'
|
91
|
+
|
92
|
+
res = ds.search(sr)
|
93
|
+
|
94
|
+
res.should be_an(AWSCloudSearch::SearchResponse)
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::DocumentBatch do
|
4
|
+
|
5
|
+
let(:batch) { AWSCloudSearch::DocumentBatch.new }
|
6
|
+
|
7
|
+
let(:sample_add_doc) do
|
8
|
+
AWSCloudSearch::Document.new(true).tap do |d|
|
9
|
+
d.id = '73e'
|
10
|
+
d.lang = 'en'
|
11
|
+
d.add_field('name', 'Jane Williams')
|
12
|
+
d.add_field('type', 'person')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:sample_delete_doc) do
|
17
|
+
AWSCloudSearch::Document.new(true).tap do |d|
|
18
|
+
d.type = 'delete' # we have to set this here so that delete doc bytesize calculations are correct
|
19
|
+
d.id = '47p'
|
20
|
+
d.lang = nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should should not instantiate" do
|
25
|
+
expect { AWSCloudSearch::DocumentBatch.new(100, 100) }.to raise_error(ArgumentError)
|
26
|
+
expect { AWSCloudSearch::DocumentBatch.new(101, 100) }.to raise_error(ArgumentError)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should instantiate" do
|
30
|
+
expect { AWSCloudSearch::DocumentBatch.new }.to_not raise_error
|
31
|
+
expect { AWSCloudSearch::DocumentBatch.new(100, 101) }.to_not raise_error
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should raise error when passed an invalid object type" do
|
35
|
+
expect { batch.add_document("Hello") }.to raise_error(ArgumentError)
|
36
|
+
expect { batch.delete_document("Hello") }.to raise_error(ArgumentError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should not raise error when passed a Document" do
|
40
|
+
expect { batch.add_document(sample_add_doc) }.to_not raise_error(ArgumentError)
|
41
|
+
expect { batch.delete_document(sample_delete_doc) }.to_not raise_error(ArgumentError)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return the correct size" do
|
45
|
+
batch.add_document sample_add_doc
|
46
|
+
batch.size.should eq(1)
|
47
|
+
|
48
|
+
batch.delete_document sample_delete_doc
|
49
|
+
batch.size.should eq(2)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should raise error when the max batch size is exceeded" do
|
53
|
+
small_batch = AWSCloudSearch::DocumentBatch.new(1, 10)
|
54
|
+
expect { small_batch.add_document(sample_add_doc) }.to raise_error
|
55
|
+
expect { small_batch.delete_document(sample_delete_doc) }.to raise_error
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should be full" do
|
59
|
+
bytesize = sample_add_doc.to_json.bytesize
|
60
|
+
|
61
|
+
b1 = AWSCloudSearch::DocumentBatch.new(bytesize)
|
62
|
+
b1.add_document sample_add_doc
|
63
|
+
b1.full?.should be_true
|
64
|
+
|
65
|
+
b2 = AWSCloudSearch::DocumentBatch.new(bytesize-1)
|
66
|
+
b2.add_document sample_add_doc
|
67
|
+
b2.full?.should be_true
|
68
|
+
|
69
|
+
bytesize = sample_delete_doc.to_json.bytesize
|
70
|
+
|
71
|
+
b3 = AWSCloudSearch::DocumentBatch.new(bytesize)
|
72
|
+
b3.delete_document sample_delete_doc
|
73
|
+
b3.full?.should be_true
|
74
|
+
|
75
|
+
b4 = AWSCloudSearch::DocumentBatch.new(bytesize-1)
|
76
|
+
b4.delete_document sample_delete_doc
|
77
|
+
b4.full?.should be_true
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should return the total bytesize of all docs" do
|
81
|
+
batch.bytesize.should eq(0)
|
82
|
+
|
83
|
+
batch.add_document sample_add_doc
|
84
|
+
bytesize = sample_add_doc.to_json.bytesize
|
85
|
+
batch.bytesize.should eq(bytesize)
|
86
|
+
|
87
|
+
batch.delete_document sample_delete_doc
|
88
|
+
bytesize += sample_delete_doc.to_json.bytesize
|
89
|
+
batch.bytesize.should eq(bytesize)
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should not be full" do
|
93
|
+
batch.add_document sample_add_doc
|
94
|
+
batch.full?.should_not be_true
|
95
|
+
|
96
|
+
batch.delete_document sample_add_doc
|
97
|
+
batch.full?.should_not be_true
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should clear" do
|
101
|
+
clear_batch = AWSCloudSearch::DocumentBatch.new
|
102
|
+
clear_batch.add_document sample_add_doc
|
103
|
+
clear_batch.delete_document sample_delete_doc
|
104
|
+
|
105
|
+
clear_batch.bytesize.should be > 0
|
106
|
+
clear_batch.size.should be > 0
|
107
|
+
|
108
|
+
clear_batch.clear
|
109
|
+
|
110
|
+
clear_batch.bytesize.should eq(0)
|
111
|
+
clear_batch.size.should eq(0)
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::Document do
|
4
|
+
let(:doc) { AWSCloudSearch::Document.new }
|
5
|
+
|
6
|
+
context "#id=" do
|
7
|
+
it "should accept a String-able value (Integer)" do
|
8
|
+
expect { doc.id = 123456789 }.to_not raise_error
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should accept a compliant String" do
|
12
|
+
expect { doc.id = "abcdef" }.to_not raise_error
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should not accept a non-compliant String" do
|
16
|
+
expect { doc.id = 'AZ12' }.to raise_error(ArgumentError)
|
17
|
+
expect { doc.id = '!@#$%^&*()AZ' }.to raise_error(ArgumentError)
|
18
|
+
expect { doc.id = '_abc123' }.to raise_error(ArgumentError)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should not accept nil" do
|
22
|
+
expect { doc.id = nil }.to raise_error(ArgumentError)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "#type_attr_accessor attributes" do
|
27
|
+
it "should accept values of proper type" do
|
28
|
+
expect { doc.lang = 'abcd' }.to_not raise_error
|
29
|
+
expect { doc.version = 1234 }.to_not raise_error
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should not accept values of incorrect type" do
|
33
|
+
expect { doc.lang = 1234 }.to raise_error(ArgumentError)
|
34
|
+
expect { doc.version "abcd" }.to raise_error(ArgumentError)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AWSCloudSearch::SearchResponse do
|
4
|
+
before(:each) do
|
5
|
+
@res1 = {
|
6
|
+
'hits' => {
|
7
|
+
'found' => 1,
|
8
|
+
'start' => 0,
|
9
|
+
'hit' => [
|
10
|
+
{ 'id' => '2e'}
|
11
|
+
]
|
12
|
+
},
|
13
|
+
'info' => {
|
14
|
+
'cpu-time-ms' => 0,
|
15
|
+
'time-ms' => 2,
|
16
|
+
'rid' => '6ddcaa561c05c4cc221cb551e21a9631b979b9aa5297fab17731a8b9f863b20423151ddcd9b246caee73334112c96801'
|
17
|
+
}
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should initialize from hash" do
|
22
|
+
sr = AWSCloudSearch::SearchResponse.new(@res1)
|
23
|
+
|
24
|
+
sr.found.should eq(1)
|
25
|
+
sr.start.should eq(0)
|
26
|
+
sr.cpu_time_ms.should eq(0)
|
27
|
+
sr.time_ms.should eq(2)
|
28
|
+
sr.rid.should eq('6ddcaa561c05c4cc221cb551e21a9631b979b9aa5297fab17731a8b9f863b20423151ddcd9b246caee73334112c96801')
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: aws_cloud_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- David Jensen
|
9
|
+
- Mike Javorski
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2012-04-18 00:00:00.000000000Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: faraday_middleware
|
17
|
+
requirement: &9414220 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.8.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *9414220
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &9413280 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.6.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *9413280
|
37
|
+
description: AWSCloudSearch Search gem
|
38
|
+
email:
|
39
|
+
- david.jensen@spoke.com
|
40
|
+
- mike.javorski@spoke.com
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .rspec
|
47
|
+
- Gemfile
|
48
|
+
- LICENSE
|
49
|
+
- README.md
|
50
|
+
- Rakefile
|
51
|
+
- aws_cloud_search.gemspec
|
52
|
+
- lib/aws_cloud_search.rb
|
53
|
+
- lib/aws_cloud_search/cloud_search.rb
|
54
|
+
- lib/aws_cloud_search/cloud_search_config.rb
|
55
|
+
- lib/aws_cloud_search/document.rb
|
56
|
+
- lib/aws_cloud_search/document_batch.rb
|
57
|
+
- lib/aws_cloud_search/document_batcher.rb
|
58
|
+
- lib/aws_cloud_search/exceptions.rb
|
59
|
+
- lib/aws_cloud_search/search_request.rb
|
60
|
+
- lib/aws_cloud_search/search_response.rb
|
61
|
+
- lib/aws_cloud_search/version.rb
|
62
|
+
- spec/aws_cloud_search/cloud_search_spec.rb
|
63
|
+
- spec/aws_cloud_search/document_batch_spec.rb
|
64
|
+
- spec/aws_cloud_search/document_spec.rb
|
65
|
+
- spec/aws_cloud_search/search_response_spec.rb
|
66
|
+
- spec/spec_helper.rb
|
67
|
+
homepage: ''
|
68
|
+
licenses: []
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.8.17
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: Implementation of the AWS CloudSearch API
|
91
|
+
test_files:
|
92
|
+
- spec/aws_cloud_search/cloud_search_spec.rb
|
93
|
+
- spec/aws_cloud_search/document_batch_spec.rb
|
94
|
+
- spec/aws_cloud_search/document_spec.rb
|
95
|
+
- spec/aws_cloud_search/search_response_spec.rb
|
96
|
+
- spec/spec_helper.rb
|