eden_cloud_search 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +3 -0
  4. data/Gemfile +13 -0
  5. data/Guardfile +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +142 -0
  8. data/Rakefile +9 -0
  9. data/eden_cloud_search.gemspec +30 -0
  10. data/lib/eden_cloud_search/config.rb +62 -0
  11. data/lib/eden_cloud_search/document.rb +95 -0
  12. data/lib/eden_cloud_search/exceptions.rb +3 -0
  13. data/lib/eden_cloud_search/indexer.rb +44 -0
  14. data/lib/eden_cloud_search/invalid_document.rb +11 -0
  15. data/lib/eden_cloud_search/search_response.rb +73 -0
  16. data/lib/eden_cloud_search/searcher.rb +118 -0
  17. data/lib/eden_cloud_search/version.rb +3 -0
  18. data/lib/eden_cloud_search.rb +24 -0
  19. data/spec/cloud_search/config_spec.rb +23 -0
  20. data/spec/cloud_search/document_spec.rb +335 -0
  21. data/spec/cloud_search/indexer_spec.rb +146 -0
  22. data/spec/cloud_search/invalid_document_spec.rb +9 -0
  23. data/spec/cloud_search/search_response_spec.rb +220 -0
  24. data/spec/cloud_search/searcher_spec.rb +254 -0
  25. data/spec/fixtures/full.json +1 -0
  26. data/spec/fixtures/vcr_cassettes/index/request/add.yml +38 -0
  27. data/spec/fixtures/vcr_cassettes/index/request/add_in_batch.yml +40 -0
  28. data/spec/fixtures/vcr_cassettes/index/request/delete.yml +37 -0
  29. data/spec/fixtures/vcr_cassettes/search/request/facets.yml +121 -0
  30. data/spec/fixtures/vcr_cassettes/search/request/facets_with_constraints.yml +123 -0
  31. data/spec/fixtures/vcr_cassettes/search/request/full.yml +114 -0
  32. data/spec/fixtures/vcr_cassettes/search/request/paginated.yml +36 -0
  33. data/spec/fixtures/vcr_cassettes/search/request/paginated_first_page.yml +56 -0
  34. data/spec/fixtures/vcr_cassettes/search/request/paginated_second_page.yml +81 -0
  35. data/spec/spec_helper.rb +17 -0
  36. data/spec/support/vcr.rb +5 -0
  37. metadata +212 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.sw*
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ bin
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cloud_search.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem "growl"
8
+ gem "guard"
9
+ gem "guard-rspec"
10
+ gem "rb-fsevent"
11
+ gem "debugger"
12
+ end
13
+
data/Guardfile ADDED
@@ -0,0 +1,10 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
9
+ end
10
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Willian Fernandes
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,142 @@
1
+ [![Build Status](https://secure.travis-ci.org/willian/cloud_search.png)](http://travis-ci.org/willian/cloud_search)
2
+
3
+ # CloudSearch
4
+
5
+ This is a simple Ruby wrapper around the Amazon's CloudSearch API. It has support for searching (with both simple and boolean queries), pagination
6
+ and documents indexing.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem "cloud_search"
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install cloud_search
21
+
22
+ ## Usage
23
+
24
+ The example bellow uses the Amazon's example database called `imdb-movies`:
25
+
26
+ ### Use your AWS CloudSearch configuration
27
+ ``` ruby
28
+ CloudSearch.configure do |config|
29
+ config.domain_id = "pl6u4t3elu7dhsbwaqbsy3y6be"
30
+ config.domain_name = "imdb-movies"
31
+ end
32
+ ```
33
+
34
+ ### Search for 'star wars' on 'imdb-movies'
35
+ ``` ruby
36
+ searcher = CloudSearch::Searcher.new
37
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
38
+ .with_query("star wars")
39
+ .search
40
+ ```
41
+
42
+ ### Or you can search using part of the name
43
+ ``` ruby
44
+ searcher = CloudSearch::Searcher.new
45
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
46
+ .with_query("matri*")
47
+ .search
48
+ ```
49
+
50
+ ### You can also search using boolean queries
51
+ ``` ruby
52
+ searcher = CloudSearch::Searcher.new
53
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
54
+ .as_boolean_query
55
+ .with_query("year:2000")
56
+ .search
57
+ ```
58
+
59
+ ### You can use weighted fields in your search
60
+ ``` ruby
61
+ searcher = CloudSearch::Searcher.new
62
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
63
+ .with_weights(:title => 3, :actor => 2, :default_weight => 1)
64
+ .as_boolean_query
65
+ .with_query("year:2000")
66
+ .search
67
+ ```
68
+
69
+ ### You can sort the result using a rank expression (previously created on your CloudSearch domain)
70
+ ``` ruby
71
+ searcher = CloudSearch::Searcher.new
72
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
73
+ .with_query("matrix")
74
+ .ranked_with("my_rank_expression")
75
+ ```
76
+
77
+ If you want to rank using descending order, just prepend the expression name with a '-' sign:
78
+
79
+ ``` ruby
80
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
81
+ .with_query("matrix")
82
+ .ranked_with("-my_rank_expression")
83
+ ```
84
+
85
+ ## Results
86
+ ``` ruby
87
+ resp.results.each do |result|
88
+ movie = result["data"]
89
+
90
+ # List of actors on the movie
91
+ movie["actor"]
92
+
93
+ # Movie's name
94
+ movie["title"]
95
+
96
+ # A rank number used to sort the results
97
+ # The `text_relevance` key is added by AMS CloudSearch
98
+ movie["text_relevance"]
99
+ end
100
+ ```
101
+
102
+ ## Pagination
103
+
104
+ The results you get back are (currently) API-compatible with will\_paginate:
105
+
106
+ ``` ruby
107
+ searcher = CloudSearch::Searcher.new
108
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
109
+ .with_query("star wars")
110
+ .with_items_per_page(30)
111
+ .at_page(10)
112
+ .search
113
+
114
+ resp.total_entries #=> 5000
115
+ resp.total_pages #=> 167
116
+ resp.current_page #=> 10
117
+ resp.offset #=> 300
118
+ resp.page_size #=> 30
119
+ ```
120
+
121
+ ## Indexing documents
122
+
123
+ ``` ruby
124
+ document = CloudSearch::Document.new :type => "add", # or "delete"
125
+ :version => 123,
126
+ :id => 680,
127
+ :lang => :en,
128
+ :fields => {:title => "Lord of the Rings"}
129
+
130
+ indexer = CloudSearch::Indexer.new
131
+ indexer << document # add as many documents as you want (CloudSearch currently sets a limit of 5MB per documents batch)
132
+ indexer.index
133
+ ```
134
+
135
+ ## Contributing
136
+
137
+ 1. Fork it
138
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
139
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
140
+ 4. Push to the branch (`git push origin my-new-feature`)
141
+ 5. Create new Pull Request
142
+
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new
6
+
7
+ task :default => :spec
8
+ task :test => :spec
9
+
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'eden_cloud_search/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "eden_cloud_search"
8
+ gem.version = CloudSearch::VERSION
9
+ gem.platform = Gem::Platform::RUBY
10
+ gem.authors = ["Willian Fernandes"]
11
+ gem.email = ["willian@willianfernandes.com.br"]
12
+ gem.homepage = "http://rubygems.org/gems/eden_cloud_search"
13
+ gem.summary = "A wraper to Amazon CloudSearch's API"
14
+ gem.description = gem.summary
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_development_dependency "pry"
22
+ gem.add_development_dependency "rake"
23
+ gem.add_development_dependency "rspec" , "~> 2.11"
24
+ gem.add_development_dependency "simplecov" , "~> 0.6"
25
+ gem.add_development_dependency "vcr" , "~> 2.2"
26
+ gem.add_development_dependency "webmock"
27
+
28
+ gem.add_dependency "rest-client", "~> 1.6.7"
29
+ end
30
+
@@ -0,0 +1,62 @@
1
+ require "singleton"
2
+
3
+ module CloudSearch
4
+ class MissingConfigurationError < StandardError
5
+ def initialize(parameter_name)
6
+ super "Missing '#{parameter_name}' configuration parameter"
7
+ end
8
+ end
9
+
10
+ module ConfigurationChecking
11
+ private
12
+
13
+ def check_configuration_parameters
14
+ %w(domain_id domain_name).each do |config|
15
+ raise MissingConfigurationError.new(config) if CloudSearch.config[config].nil?
16
+ end
17
+ end
18
+ end
19
+
20
+ class Config
21
+ include Singleton
22
+
23
+ attr_accessor :api_version
24
+ attr_accessor :configuration_url
25
+ attr_accessor :domain_id
26
+ attr_accessor :domain_name
27
+ attr_accessor :document_url
28
+ attr_accessor :region
29
+ attr_accessor :search_url
30
+
31
+ def [](config)
32
+ self.__send__(config)
33
+ end
34
+
35
+ def api_version
36
+ @api_version ||= "2011-02-01"
37
+ end
38
+
39
+ def configuration_url
40
+ @configuration_url ||= "https://cloudsearch.#{self.region}.amazonaws.com"
41
+ end
42
+
43
+ def document_url
44
+ @document_url ||= "http://doc-#{base_path}"
45
+ end
46
+
47
+ def region
48
+ @region ||= "us-east-1"
49
+ end
50
+
51
+ def search_url
52
+ @search_url ||= "http://search-#{base_path}"
53
+ end
54
+
55
+ private
56
+
57
+ def base_path
58
+ "#{self.domain_name}-#{self.domain_id}.#{self.region}.cloudsearch.amazonaws.com/#{self.api_version}"
59
+ end
60
+ end
61
+ end
62
+
@@ -0,0 +1,95 @@
1
+ module CloudSearch
2
+ class Document
3
+ MAX_VERSION = 4294967295
4
+
5
+ attr_accessor :type, :lang, :fields
6
+ attr_reader :errors, :id, :version
7
+
8
+ def initialize(attributes = {})
9
+ attributes.each_pair { |key, value| self.__send__("#{key}=", value) }
10
+ end
11
+
12
+ def id=(_id)
13
+ @id = _id.to_s
14
+ end
15
+
16
+ def version=(_version)
17
+ begin
18
+ @version = Integer(_version)
19
+ rescue ArgumentError, TypeError
20
+ @version = _version
21
+ end
22
+ end
23
+
24
+ def valid?
25
+ @errors = {}
26
+ run_id_validations
27
+ run_version_validations
28
+ run_type_validations
29
+ if type == "add"
30
+ run_lang_validations
31
+ run_fields_validations
32
+ end
33
+ errors.empty?
34
+ end
35
+
36
+ def as_json
37
+ {:type => type, :id => id, :version => version}.tap do |hash|
38
+ hash.merge!(:lang => lang, :fields => fields) if type == "add"
39
+ end
40
+ end
41
+
42
+ def to_json
43
+ JSON.unparse as_json
44
+ end
45
+
46
+ private
47
+
48
+ def run_id_validations
49
+ validate :id do |messages|
50
+ messages << "can't be blank" if blank?(:id)
51
+ messages << "is invalid" unless blank?(:id) or id =~ /\A[^_][a-z0-9_]+\z/
52
+ end
53
+ end
54
+
55
+ def run_version_validations
56
+ validate :version do |messages|
57
+ messages << "can't be blank" if blank?(:version)
58
+ messages << "is invalid" unless blank?(:version) or version.to_s =~ /\A[0-9]+\z/
59
+ messages << "must be less than #{MAX_VERSION + 1}" if messages.empty? and version > MAX_VERSION
60
+ end
61
+ end
62
+
63
+ def run_type_validations
64
+ validate :type do |messages|
65
+ messages << "can't be blank" if blank?(:type)
66
+ messages << "is invalid" if !blank?(:type) and !%w(add delete).include?(type)
67
+ end
68
+ end
69
+
70
+ def run_lang_validations
71
+ validate :lang do |messages|
72
+ messages << "can't be blank" if blank?(:lang)
73
+ messages << "is invalid" unless blank?(:lang) or lang =~ /\A[a-z]{2}\z/
74
+ end
75
+ end
76
+
77
+ def run_fields_validations
78
+ validate :fields do |messages|
79
+ messages << "can't be empty" if fields.nil?
80
+ messages << "must be an instance of Hash" if !fields.nil? and !fields.instance_of?(Hash)
81
+ end
82
+ end
83
+
84
+ def blank?(attr)
85
+ self.__send__(attr).to_s.strip.length.zero?
86
+ end
87
+
88
+ def validate(attr, &block)
89
+ messages = []
90
+ yield messages
91
+ errors[attr] = messages unless messages.empty?
92
+ end
93
+ end
94
+ end
95
+
@@ -0,0 +1,3 @@
1
+ module CloudSearch
2
+ class InsufficientParametersException < StandardError; end
3
+ end
@@ -0,0 +1,44 @@
1
+ module CloudSearch
2
+ class Indexer
3
+ include ::CloudSearch::ConfigurationChecking
4
+
5
+ def initialize
6
+ @documents = []
7
+ end
8
+
9
+ def <<(document)
10
+ raise InvalidDocument.new(document) unless document.valid?
11
+ @documents << document
12
+ end
13
+
14
+ alias :add :<<
15
+
16
+ def documents
17
+ @documents.freeze
18
+ end
19
+
20
+ def index
21
+ cloud_search_response = RestClient.post url, documents_json, headers
22
+ message = "#{cloud_search_response.code} - #{cloud_search_response.length} bytes\n#{url}\n"
23
+ response = JSON.parse cloud_search_response.body
24
+
25
+ [response, message]
26
+ end
27
+
28
+ private
29
+
30
+ def headers
31
+ {"Content-Type" => "application/json", "Accept" => "application/json" }
32
+ end
33
+
34
+ def documents_json
35
+ JSON.unparse(@documents.map(&:as_json))
36
+ end
37
+
38
+ def url
39
+ check_configuration_parameters
40
+
41
+ "#{CloudSearch.config.document_url}/documents/batch"
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,11 @@
1
+ module CloudSearch
2
+ class InvalidDocument < StandardError
3
+ def initialize(document)
4
+ document.valid?
5
+ error_message = document.errors.map do
6
+ |attribute, errors| errors.empty? ? nil : "#{attribute}: #{errors.join(", ")}"
7
+ end.join("; ")
8
+ super error_message
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,73 @@
1
+ module CloudSearch
2
+ class SearchResponse
3
+ attr_writer :items_per_page
4
+ attr_reader :current_page, :total_pages, :body, :facets
5
+ attr_accessor :http_code
6
+
7
+ def body=(body)
8
+ @body = JSON.parse(body || "{}")
9
+ calculate_pages
10
+ build_facets
11
+ @body
12
+ end
13
+
14
+ def results
15
+ _hits["hit"] || []
16
+ end
17
+
18
+ def hits
19
+ _hits["found"] || 0
20
+ end
21
+
22
+ def found?
23
+ hits > 0
24
+ end
25
+
26
+ def items_per_page
27
+ @items_per_page || 10
28
+ end
29
+
30
+ def has_pagination?
31
+ hits > items_per_page
32
+ end
33
+
34
+ def offset
35
+ return 0 unless found?
36
+ (@current_page - 1) * items_per_page
37
+ end
38
+
39
+ alias :page_size :items_per_page
40
+ alias :limit_value :items_per_page
41
+ alias :total_entries :hits
42
+ alias :any? :found?
43
+
44
+ private
45
+
46
+ def calculate_pages
47
+ num_full_pages = hits / items_per_page
48
+ @total_pages = hits % items_per_page > 0 ? num_full_pages + 1 : num_full_pages
49
+ @total_pages = 1 if @total_pages == 0
50
+
51
+ start = _hits["start"] || 0
52
+ @current_page = (start / items_per_page) + 1
53
+ @current_page = @total_pages if @current_page > @total_pages
54
+ end
55
+
56
+ def build_facets
57
+ @facets = {}
58
+ return unless body['facets']
59
+
60
+ body['facets'].each do |facet, result|
61
+ @facets[facet] = if result['constraints']
62
+ result['constraints'].inject({}) { |hash, item| hash[item['value']] = item['count']; hash }
63
+ else
64
+ result
65
+ end
66
+ end
67
+ end
68
+
69
+ def _hits
70
+ body["hits"] || {}
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,118 @@
1
+ require 'uri'
2
+
3
+ module CloudSearch
4
+ class Searcher
5
+ include ::CloudSearch::ConfigurationChecking
6
+
7
+ attr_reader :weights
8
+
9
+ def initialize
10
+ @response = SearchResponse.new
11
+ @query = ''
12
+ @boolean_queries = {}
13
+ @filters = {}
14
+ @facets = []
15
+ @fields = []
16
+ end
17
+
18
+ def search
19
+ cloud_search_response = RestClient.get "#{CloudSearch.config.search_url}/search", :params => params
20
+ @response.http_code = cloud_search_response.code
21
+ @response.body = cloud_search_response.body
22
+
23
+ @response
24
+ end
25
+
26
+ def with_query(query)
27
+ @query = query || ''
28
+ self
29
+ end
30
+
31
+ def with_boolean_query(queries)
32
+ queries.each do |k, v|
33
+ queries[k] = [v] unless v.respond_to? :map
34
+ end
35
+
36
+ @boolean_queries.merge!(queries)
37
+ self
38
+ end
39
+
40
+ def with_filters(filters)
41
+ @filters = filters
42
+ self
43
+ end
44
+
45
+ def with_facets(*facets)
46
+ @facets += facets
47
+ self
48
+ end
49
+
50
+ def ranked_by(rank_expression)
51
+ @rank = rank_expression
52
+ self
53
+ end
54
+
55
+ def with_fields(*fields)
56
+ @fields += fields
57
+ self
58
+ end
59
+
60
+ def with_items_per_page(items_per_page)
61
+ @response.items_per_page = items_per_page
62
+ self
63
+ end
64
+
65
+ def at_page(page)
66
+ @page_number = (page && page < 1) ? 1 : page
67
+ self
68
+ end
69
+
70
+ def params
71
+ check_configuration_parameters
72
+ raise InsufficientParametersException.new('At least query or boolean_query must be defined.') if (@query.empty? && @boolean_queries.empty?)
73
+
74
+ params = {
75
+ 'q' => query,
76
+ 'bq' => boolean_query,
77
+ 'size' => items_per_page,
78
+ 'start' => start,
79
+ 'return-fields' => URI.escape(@fields.join(",")),
80
+ 'facet' => @facets.join(','),
81
+ 'rank' => @rank
82
+ }
83
+ params.merge! @filters
84
+ params.delete_if { |_,v| v.nil? || v.to_s.empty? }
85
+
86
+ params
87
+ end
88
+
89
+ def items_per_page
90
+ @response.items_per_page
91
+ end
92
+
93
+ def page_number
94
+ @page_number or 1
95
+ end
96
+
97
+ def start
98
+ return 0 if page_number <= 1
99
+ (items_per_page * (page_number - 1))
100
+ end
101
+
102
+ private
103
+
104
+ def query
105
+ CGI::escape(@query)
106
+ end
107
+
108
+ def boolean_query
109
+ return '' if @boolean_queries.empty?
110
+
111
+ bq = @boolean_queries.map do |key, values|
112
+ "#{key}:'#{values.map { |e| CGI::escape(e) }.join('|')}'"
113
+ end.join(' ')
114
+ "(and #{bq})"
115
+ end
116
+ end
117
+ end
118
+
@@ -0,0 +1,3 @@
1
+ module CloudSearch
2
+ VERSION = "0.1.10"
3
+ end