eden_cloud_search 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +3 -0
  4. data/Gemfile +13 -0
  5. data/Guardfile +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +142 -0
  8. data/Rakefile +9 -0
  9. data/eden_cloud_search.gemspec +30 -0
  10. data/lib/eden_cloud_search/config.rb +62 -0
  11. data/lib/eden_cloud_search/document.rb +95 -0
  12. data/lib/eden_cloud_search/exceptions.rb +3 -0
  13. data/lib/eden_cloud_search/indexer.rb +44 -0
  14. data/lib/eden_cloud_search/invalid_document.rb +11 -0
  15. data/lib/eden_cloud_search/search_response.rb +73 -0
  16. data/lib/eden_cloud_search/searcher.rb +118 -0
  17. data/lib/eden_cloud_search/version.rb +3 -0
  18. data/lib/eden_cloud_search.rb +24 -0
  19. data/spec/cloud_search/config_spec.rb +23 -0
  20. data/spec/cloud_search/document_spec.rb +335 -0
  21. data/spec/cloud_search/indexer_spec.rb +146 -0
  22. data/spec/cloud_search/invalid_document_spec.rb +9 -0
  23. data/spec/cloud_search/search_response_spec.rb +220 -0
  24. data/spec/cloud_search/searcher_spec.rb +254 -0
  25. data/spec/fixtures/full.json +1 -0
  26. data/spec/fixtures/vcr_cassettes/index/request/add.yml +38 -0
  27. data/spec/fixtures/vcr_cassettes/index/request/add_in_batch.yml +40 -0
  28. data/spec/fixtures/vcr_cassettes/index/request/delete.yml +37 -0
  29. data/spec/fixtures/vcr_cassettes/search/request/facets.yml +121 -0
  30. data/spec/fixtures/vcr_cassettes/search/request/facets_with_constraints.yml +123 -0
  31. data/spec/fixtures/vcr_cassettes/search/request/full.yml +114 -0
  32. data/spec/fixtures/vcr_cassettes/search/request/paginated.yml +36 -0
  33. data/spec/fixtures/vcr_cassettes/search/request/paginated_first_page.yml +56 -0
  34. data/spec/fixtures/vcr_cassettes/search/request/paginated_second_page.yml +81 -0
  35. data/spec/spec_helper.rb +17 -0
  36. data/spec/support/vcr.rb +5 -0
  37. metadata +212 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.sw*
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ bin
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cloud_search.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem "growl"
8
+ gem "guard"
9
+ gem "guard-rspec"
10
+ gem "rb-fsevent"
11
+ gem "debugger"
12
+ end
13
+
data/Guardfile ADDED
@@ -0,0 +1,10 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
9
+ end
10
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Willian Fernandes
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,142 @@
1
+ [![Build Status](https://secure.travis-ci.org/willian/cloud_search.png)](http://travis-ci.org/willian/cloud_search)
2
+
3
+ # CloudSearch
4
+
5
+ This is a simple Ruby wrapper around the Amazon's CloudSearch API. It has support for searching (with both simple and boolean queries), pagination
6
+ and documents indexing.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem "cloud_search"
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install cloud_search
21
+
22
+ ## Usage
23
+
24
+ The example bellow uses the Amazon's example database called `imdb-movies`:
25
+
26
+ ### Use your AWS CloudSearch configuration
27
+ ``` ruby
28
+ CloudSearch.configure do |config|
29
+ config.domain_id = "pl6u4t3elu7dhsbwaqbsy3y6be"
30
+ config.domain_name = "imdb-movies"
31
+ end
32
+ ```
33
+
34
+ ### Search for 'star wars' on 'imdb-movies'
35
+ ``` ruby
36
+ searcher = CloudSearch::Searcher.new
37
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
38
+ .with_query("star wars")
39
+ .search
40
+ ```
41
+
42
+ ### Or you can search using part of the name
43
+ ``` ruby
44
+ searcher = CloudSearch::Searcher.new
45
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
46
+ .with_query("matri*")
47
+ .search
48
+ ```
49
+
50
+ ### You can also search using boolean queries
51
+ ``` ruby
52
+ searcher = CloudSearch::Searcher.new
53
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
54
+ .as_boolean_query
55
+ .with_query("year:2000")
56
+ .search
57
+ ```
58
+
59
+ ### You can use weighted fields in your search
60
+ ``` ruby
61
+ searcher = CloudSearch::Searcher.new
62
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
63
+ .with_weights(:title => 3, :actor => 2, :default_weight => 1)
64
+ .as_boolean_query
65
+ .with_query("year:2000")
66
+ .search
67
+ ```
68
+
69
+ ### You can sort the result using a rank expression (previously created on your CloudSearch domain)
70
+ ``` ruby
71
+ searcher = CloudSearch::Searcher.new
72
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
73
+ .with_query("matrix")
74
+ .ranked_with("my_rank_expression")
75
+ ```
76
+
77
+ If you want to rank using descending order, just prepend the expression name with a '-' sign:
78
+
79
+ ``` ruby
80
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
81
+ .with_query("matrix")
82
+ .ranked_with("-my_rank_expression")
83
+ ```
84
+
85
+ ## Results
86
+ ``` ruby
87
+ resp.results.each do |result|
88
+ movie = result["data"]
89
+
90
+ # List of actors on the movie
91
+ movie["actor"]
92
+
93
+ # Movie's name
94
+ movie["title"]
95
+
96
+ # A rank number used to sort the results
97
+ # The `text_relevance` key is added by AMS CloudSearch
98
+ movie["text_relevance"]
99
+ end
100
+ ```
101
+
102
+ ## Pagination
103
+
104
+ The results you get back are (currently) API-compatible with will\_paginate:
105
+
106
+ ``` ruby
107
+ searcher = CloudSearch::Searcher.new
108
+ resp = searcher.with_fields(:actor, :director, :title, :year, :text_relevance)
109
+ .with_query("star wars")
110
+ .with_items_per_page(30)
111
+ .at_page(10)
112
+ .search
113
+
114
+ resp.total_entries #=> 5000
115
+ resp.total_pages #=> 167
116
+ resp.current_page #=> 10
117
+ resp.offset #=> 300
118
+ resp.page_size #=> 30
119
+ ```
120
+
121
+ ## Indexing documents
122
+
123
+ ``` ruby
124
+ document = CloudSearch::Document.new :type => "add", # or "delete"
125
+ :version => 123,
126
+ :id => 680,
127
+ :lang => :en,
128
+ :fields => {:title => "Lord of the Rings"}
129
+
130
+ indexer = CloudSearch::Indexer.new
131
+ indexer << document # add as many documents as you want (CloudSearch currently sets a limit of 5MB per documents batch)
132
+ indexer.index
133
+ ```
134
+
135
+ ## Contributing
136
+
137
+ 1. Fork it
138
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
139
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
140
+ 4. Push to the branch (`git push origin my-new-feature`)
141
+ 5. Create new Pull Request
142
+
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new
6
+
7
+ task :default => :spec
8
+ task :test => :spec
9
+
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'eden_cloud_search/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "eden_cloud_search"
8
+ gem.version = CloudSearch::VERSION
9
+ gem.platform = Gem::Platform::RUBY
10
+ gem.authors = ["Willian Fernandes"]
11
+ gem.email = ["willian@willianfernandes.com.br"]
12
+ gem.homepage = "http://rubygems.org/gems/eden_cloud_search"
13
+ gem.summary = "A wraper to Amazon CloudSearch's API"
14
+ gem.description = gem.summary
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_development_dependency "pry"
22
+ gem.add_development_dependency "rake"
23
+ gem.add_development_dependency "rspec" , "~> 2.11"
24
+ gem.add_development_dependency "simplecov" , "~> 0.6"
25
+ gem.add_development_dependency "vcr" , "~> 2.2"
26
+ gem.add_development_dependency "webmock"
27
+
28
+ gem.add_dependency "rest-client", "~> 1.6.7"
29
+ end
30
+
@@ -0,0 +1,62 @@
1
+ require "singleton"
2
+
3
+ module CloudSearch
4
+ class MissingConfigurationError < StandardError
5
+ def initialize(parameter_name)
6
+ super "Missing '#{parameter_name}' configuration parameter"
7
+ end
8
+ end
9
+
10
+ module ConfigurationChecking
11
+ private
12
+
13
+ def check_configuration_parameters
14
+ %w(domain_id domain_name).each do |config|
15
+ raise MissingConfigurationError.new(config) if CloudSearch.config[config].nil?
16
+ end
17
+ end
18
+ end
19
+
20
+ class Config
21
+ include Singleton
22
+
23
+ attr_accessor :api_version
24
+ attr_accessor :configuration_url
25
+ attr_accessor :domain_id
26
+ attr_accessor :domain_name
27
+ attr_accessor :document_url
28
+ attr_accessor :region
29
+ attr_accessor :search_url
30
+
31
+ def [](config)
32
+ self.__send__(config)
33
+ end
34
+
35
+ def api_version
36
+ @api_version ||= "2011-02-01"
37
+ end
38
+
39
+ def configuration_url
40
+ @configuration_url ||= "https://cloudsearch.#{self.region}.amazonaws.com"
41
+ end
42
+
43
+ def document_url
44
+ @document_url ||= "http://doc-#{base_path}"
45
+ end
46
+
47
+ def region
48
+ @region ||= "us-east-1"
49
+ end
50
+
51
+ def search_url
52
+ @search_url ||= "http://search-#{base_path}"
53
+ end
54
+
55
+ private
56
+
57
+ def base_path
58
+ "#{self.domain_name}-#{self.domain_id}.#{self.region}.cloudsearch.amazonaws.com/#{self.api_version}"
59
+ end
60
+ end
61
+ end
62
+
@@ -0,0 +1,95 @@
1
+ module CloudSearch
2
+ class Document
3
+ MAX_VERSION = 4294967295
4
+
5
+ attr_accessor :type, :lang, :fields
6
+ attr_reader :errors, :id, :version
7
+
8
+ def initialize(attributes = {})
9
+ attributes.each_pair { |key, value| self.__send__("#{key}=", value) }
10
+ end
11
+
12
+ def id=(_id)
13
+ @id = _id.to_s
14
+ end
15
+
16
+ def version=(_version)
17
+ begin
18
+ @version = Integer(_version)
19
+ rescue ArgumentError, TypeError
20
+ @version = _version
21
+ end
22
+ end
23
+
24
+ def valid?
25
+ @errors = {}
26
+ run_id_validations
27
+ run_version_validations
28
+ run_type_validations
29
+ if type == "add"
30
+ run_lang_validations
31
+ run_fields_validations
32
+ end
33
+ errors.empty?
34
+ end
35
+
36
+ def as_json
37
+ {:type => type, :id => id, :version => version}.tap do |hash|
38
+ hash.merge!(:lang => lang, :fields => fields) if type == "add"
39
+ end
40
+ end
41
+
42
+ def to_json
43
+ JSON.unparse as_json
44
+ end
45
+
46
+ private
47
+
48
+ def run_id_validations
49
+ validate :id do |messages|
50
+ messages << "can't be blank" if blank?(:id)
51
+ messages << "is invalid" unless blank?(:id) or id =~ /\A[^_][a-z0-9_]+\z/
52
+ end
53
+ end
54
+
55
+ def run_version_validations
56
+ validate :version do |messages|
57
+ messages << "can't be blank" if blank?(:version)
58
+ messages << "is invalid" unless blank?(:version) or version.to_s =~ /\A[0-9]+\z/
59
+ messages << "must be less than #{MAX_VERSION + 1}" if messages.empty? and version > MAX_VERSION
60
+ end
61
+ end
62
+
63
+ def run_type_validations
64
+ validate :type do |messages|
65
+ messages << "can't be blank" if blank?(:type)
66
+ messages << "is invalid" if !blank?(:type) and !%w(add delete).include?(type)
67
+ end
68
+ end
69
+
70
+ def run_lang_validations
71
+ validate :lang do |messages|
72
+ messages << "can't be blank" if blank?(:lang)
73
+ messages << "is invalid" unless blank?(:lang) or lang =~ /\A[a-z]{2}\z/
74
+ end
75
+ end
76
+
77
+ def run_fields_validations
78
+ validate :fields do |messages|
79
+ messages << "can't be empty" if fields.nil?
80
+ messages << "must be an instance of Hash" if !fields.nil? and !fields.instance_of?(Hash)
81
+ end
82
+ end
83
+
84
+ def blank?(attr)
85
+ self.__send__(attr).to_s.strip.length.zero?
86
+ end
87
+
88
+ def validate(attr, &block)
89
+ messages = []
90
+ yield messages
91
+ errors[attr] = messages unless messages.empty?
92
+ end
93
+ end
94
+ end
95
+
@@ -0,0 +1,3 @@
1
+ module CloudSearch
2
+ class InsufficientParametersException < StandardError; end
3
+ end
@@ -0,0 +1,44 @@
1
+ module CloudSearch
2
+ class Indexer
3
+ include ::CloudSearch::ConfigurationChecking
4
+
5
+ def initialize
6
+ @documents = []
7
+ end
8
+
9
+ def <<(document)
10
+ raise InvalidDocument.new(document) unless document.valid?
11
+ @documents << document
12
+ end
13
+
14
+ alias :add :<<
15
+
16
+ def documents
17
+ @documents.freeze
18
+ end
19
+
20
+ def index
21
+ cloud_search_response = RestClient.post url, documents_json, headers
22
+ message = "#{cloud_search_response.code} - #{cloud_search_response.length} bytes\n#{url}\n"
23
+ response = JSON.parse cloud_search_response.body
24
+
25
+ [response, message]
26
+ end
27
+
28
+ private
29
+
30
+ def headers
31
+ {"Content-Type" => "application/json", "Accept" => "application/json" }
32
+ end
33
+
34
+ def documents_json
35
+ JSON.unparse(@documents.map(&:as_json))
36
+ end
37
+
38
+ def url
39
+ check_configuration_parameters
40
+
41
+ "#{CloudSearch.config.document_url}/documents/batch"
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,11 @@
1
+ module CloudSearch
2
+ class InvalidDocument < StandardError
3
+ def initialize(document)
4
+ document.valid?
5
+ error_message = document.errors.map do
6
+ |attribute, errors| errors.empty? ? nil : "#{attribute}: #{errors.join(", ")}"
7
+ end.join("; ")
8
+ super error_message
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,73 @@
1
+ module CloudSearch
2
+ class SearchResponse
3
+ attr_writer :items_per_page
4
+ attr_reader :current_page, :total_pages, :body, :facets
5
+ attr_accessor :http_code
6
+
7
+ def body=(body)
8
+ @body = JSON.parse(body || "{}")
9
+ calculate_pages
10
+ build_facets
11
+ @body
12
+ end
13
+
14
+ def results
15
+ _hits["hit"] || []
16
+ end
17
+
18
+ def hits
19
+ _hits["found"] || 0
20
+ end
21
+
22
+ def found?
23
+ hits > 0
24
+ end
25
+
26
+ def items_per_page
27
+ @items_per_page || 10
28
+ end
29
+
30
+ def has_pagination?
31
+ hits > items_per_page
32
+ end
33
+
34
+ def offset
35
+ return 0 unless found?
36
+ (@current_page - 1) * items_per_page
37
+ end
38
+
39
+ alias :page_size :items_per_page
40
+ alias :limit_value :items_per_page
41
+ alias :total_entries :hits
42
+ alias :any? :found?
43
+
44
+ private
45
+
46
+ def calculate_pages
47
+ num_full_pages = hits / items_per_page
48
+ @total_pages = hits % items_per_page > 0 ? num_full_pages + 1 : num_full_pages
49
+ @total_pages = 1 if @total_pages == 0
50
+
51
+ start = _hits["start"] || 0
52
+ @current_page = (start / items_per_page) + 1
53
+ @current_page = @total_pages if @current_page > @total_pages
54
+ end
55
+
56
+ def build_facets
57
+ @facets = {}
58
+ return unless body['facets']
59
+
60
+ body['facets'].each do |facet, result|
61
+ @facets[facet] = if result['constraints']
62
+ result['constraints'].inject({}) { |hash, item| hash[item['value']] = item['count']; hash }
63
+ else
64
+ result
65
+ end
66
+ end
67
+ end
68
+
69
+ def _hits
70
+ body["hits"] || {}
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,118 @@
1
+ require 'uri'
2
+
3
+ module CloudSearch
4
+ class Searcher
5
+ include ::CloudSearch::ConfigurationChecking
6
+
7
+ attr_reader :weights
8
+
9
+ def initialize
10
+ @response = SearchResponse.new
11
+ @query = ''
12
+ @boolean_queries = {}
13
+ @filters = {}
14
+ @facets = []
15
+ @fields = []
16
+ end
17
+
18
+ def search
19
+ cloud_search_response = RestClient.get "#{CloudSearch.config.search_url}/search", :params => params
20
+ @response.http_code = cloud_search_response.code
21
+ @response.body = cloud_search_response.body
22
+
23
+ @response
24
+ end
25
+
26
+ def with_query(query)
27
+ @query = query || ''
28
+ self
29
+ end
30
+
31
+ def with_boolean_query(queries)
32
+ queries.each do |k, v|
33
+ queries[k] = [v] unless v.respond_to? :map
34
+ end
35
+
36
+ @boolean_queries.merge!(queries)
37
+ self
38
+ end
39
+
40
+ def with_filters(filters)
41
+ @filters = filters
42
+ self
43
+ end
44
+
45
+ def with_facets(*facets)
46
+ @facets += facets
47
+ self
48
+ end
49
+
50
+ def ranked_by(rank_expression)
51
+ @rank = rank_expression
52
+ self
53
+ end
54
+
55
+ def with_fields(*fields)
56
+ @fields += fields
57
+ self
58
+ end
59
+
60
+ def with_items_per_page(items_per_page)
61
+ @response.items_per_page = items_per_page
62
+ self
63
+ end
64
+
65
+ def at_page(page)
66
+ @page_number = (page && page < 1) ? 1 : page
67
+ self
68
+ end
69
+
70
+ def params
71
+ check_configuration_parameters
72
+ raise InsufficientParametersException.new('At least query or boolean_query must be defined.') if (@query.empty? && @boolean_queries.empty?)
73
+
74
+ params = {
75
+ 'q' => query,
76
+ 'bq' => boolean_query,
77
+ 'size' => items_per_page,
78
+ 'start' => start,
79
+ 'return-fields' => URI.escape(@fields.join(",")),
80
+ 'facet' => @facets.join(','),
81
+ 'rank' => @rank
82
+ }
83
+ params.merge! @filters
84
+ params.delete_if { |_,v| v.nil? || v.to_s.empty? }
85
+
86
+ params
87
+ end
88
+
89
+ def items_per_page
90
+ @response.items_per_page
91
+ end
92
+
93
+ def page_number
94
+ @page_number or 1
95
+ end
96
+
97
+ def start
98
+ return 0 if page_number <= 1
99
+ (items_per_page * (page_number - 1))
100
+ end
101
+
102
+ private
103
+
104
+ def query
105
+ CGI::escape(@query)
106
+ end
107
+
108
+ def boolean_query
109
+ return '' if @boolean_queries.empty?
110
+
111
+ bq = @boolean_queries.map do |key, values|
112
+ "#{key}:'#{values.map { |e| CGI::escape(e) }.join('|')}'"
113
+ end.join(' ')
114
+ "(and #{bq})"
115
+ end
116
+ end
117
+ end
118
+
@@ -0,0 +1,3 @@
1
+ module CloudSearch
2
+ VERSION = "0.1.10"
3
+ end