elastic_searchable 1.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -18,8 +18,6 @@ pkg
18
18
  # test files
19
19
  test/*.log
20
20
  test/*.sqlite3
21
- spec/**/*.log
22
- spec/*.sqlite3
23
21
 
24
22
  # For vim:
25
23
  *.swp
data/.rvmrc CHANGED
@@ -1 +1 @@
1
- rvm use default@elastic_searchable --create
1
+ rvm use ruby-1.9.3@elastic_searchable --create
data/CONTRIBUTORS.txt CHANGED
@@ -1,5 +1,4 @@
1
1
  Ryan Sonnek - Original Author
2
- Geoff Hichborn - Implemented search result autoscrubbing
3
2
 
4
3
 
5
4
  Complete list of contributors:
data/Rakefile CHANGED
@@ -1,11 +1,13 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
2
3
 
3
4
  require 'rake'
4
5
 
5
- require 'rspec/core/rake_task'
6
-
7
- desc "Run specs"
8
- RSpec::Core::RakeTask.new do |t|
6
+ require 'rake/testtask'
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.pattern = 'test/**/test_*.rb'
10
+ test.verbose = true
9
11
  end
10
- task :default => :spec
11
- task :test => :spec
12
+ task :default => :test
13
+
@@ -21,12 +21,11 @@ Gem::Specification.new do |s|
21
21
 
22
22
  s.add_runtime_dependency(%q<activerecord>, [">= 3.0.5"])
23
23
  s.add_runtime_dependency(%q<httparty>, [">= 0.6.0"])
24
- s.add_runtime_dependency(%q<backgrounded>, ["~> 0.7.0"])
24
+ s.add_runtime_dependency(%q<backgrounded>, [">= 0.7.0"])
25
25
  s.add_runtime_dependency(%q<multi_json>, [">= 1.0.0"])
26
- s.add_development_dependency(%q<rake>)
27
- s.add_development_dependency(%q<sqlite3>)
28
- s.add_development_dependency(%q<pry>)
29
- s.add_development_dependency(%q<rspec>)
30
- s.add_development_dependency(%q<byebug>)
31
- s.add_development_dependency(%q<prefactory>)
26
+ s.add_development_dependency(%q<rake>, ["0.9.2.2"])
27
+ s.add_development_dependency(%q<sqlite3>, ["1.3.4"])
28
+ s.add_development_dependency(%q<pry>, ["0.9.6.2"])
29
+ s.add_development_dependency(%q<shoulda>, ["2.11.3"])
30
+ s.add_development_dependency(%q<mocha>, ["0.10.0"])
32
31
  end
@@ -8,9 +8,7 @@ require 'elastic_searchable/paginator'
8
8
  module ElasticSearchable
9
9
  module ActiveRecordExtensions
10
10
  # Valid options:
11
- # :index (optional) configure index to store data in. default to ElasticSearchable.default_index
12
11
  # :type (optional) configue type to store data in. default to model table name
13
- # :index_options (optional) configure index properties (ex: tokenizer)
14
12
  # :mapping (optional) configure field properties for this model (ex: skip analyzer for field)
15
13
  # :if (optional) reference symbol/proc condition to only index when condition is true
16
14
  # :unless (optional) reference symbol/proc condition to skip indexing when condition is true
@@ -29,6 +27,13 @@ module ElasticSearchable
29
27
  cattr_accessor :elastic_options
30
28
  self.elastic_options = options.symbolize_keys.merge(:unless => Array.wrap(options[:unless]).push(:elasticsearch_offline?))
31
29
 
30
+ if self.elastic_options[:index_options]
31
+ ActiveSupport::Deprecation.warn ":index_options has been deprecated. Use ElasticSearchable.index_settings instead.", caller
32
+ end
33
+ if self.elastic_options[:index]
34
+ ActiveSupport::Deprecation.warn ":index has been deprecated. Use ElasticSearchable.index_name instead.", caller
35
+ end
36
+
32
37
  extend ElasticSearchable::Indexing::ClassMethods
33
38
  extend ElasticSearchable::Queries
34
39
 
@@ -3,57 +3,28 @@ module ElasticSearchable
3
3
  module ClassMethods
4
4
  # delete all documents of this type in the index
5
5
  # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/delete_mapping/
6
- def clean_index
7
- ElasticSearchable.request :delete, index_type_path
6
+ def delete_mapping
7
+ ElasticSearchable.request :delete, index_mapping_path
8
8
  end
9
9
 
10
10
  # configure the index for this type
11
11
  # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/put_mapping/
12
- def update_index_mapping
13
- if mapping = self.elastic_options[:mapping]
14
- ElasticSearchable.request :put, index_type_path('_mapping'), :json_body => {index_type => mapping}
15
- end
16
- end
17
-
18
- # create the index
19
- # http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html
20
- def create_index
21
- options = {}
22
- options.merge! :settings => self.elastic_options[:index_options] if self.elastic_options[:index_options]
23
- options.merge! :mappings => {index_type => self.elastic_options[:mapping]} if self.elastic_options[:mapping]
24
- ElasticSearchable.request :put, index_path, :json_body => options
25
- end
26
-
27
- # explicitly refresh the index, making all operations performed since the last refresh
28
- # available for search
29
- #
30
- # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/refresh/
31
- def refresh_index
32
- ElasticSearchable.request :post, index_path('_refresh')
33
- end
34
-
35
- # deletes the entire index
36
- # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/delete_index/
37
- def delete_index
38
- ElasticSearchable.request :delete, index_path
12
+ def create_mapping
13
+ return unless self.elastic_options[:mapping]
14
+ ElasticSearchable.request :put, index_mapping_path('_mapping'), :json_body => {index_type => mapping}
39
15
  end
40
16
 
41
17
  # delete one record from the index
42
18
  # http://www.elasticsearch.com/docs/elasticsearch/rest_api/delete/
43
19
  def delete_id_from_index(id)
44
- ElasticSearchable.request :delete, index_type_path(id)
20
+ ElasticSearchable.request :delete, index_mapping_path(id)
45
21
  rescue ElasticSearchable::ElasticError => e
46
22
  ElasticSearchable.logger.warn e
47
23
  end
48
24
 
49
25
  # helper method to generate elasticsearch url for this object type
50
- def index_type_path(action = nil)
51
- index_path [index_type, action].compact.join('/')
52
- end
53
-
54
- # helper method to generate elasticsearch url for this index
55
- def index_path(action = nil)
56
- ['', index_name, action].compact.join('/')
26
+ def index_mapping_path(action = nil)
27
+ ElasticSearchable.request_path [index_type, action].compact.join('/')
57
28
  end
58
29
 
59
30
  # reindex all records using bulk api
@@ -65,12 +36,12 @@ module ElasticSearchable
65
36
  #
66
37
  # TODO: move this to AREL relation to remove the options scope param
67
38
  def reindex(options = {})
68
- self.update_index_mapping
39
+ self.create_mapping
69
40
  options.reverse_merge! :page => 1, :per_page => 1000
70
41
  scope = options.delete(:scope) || self
71
42
  page = options[:page]
72
43
  per_page = options[:per_page]
73
- records = scope.limit(per_page).offset(per_page * (page -1)).to_a
44
+ records = scope.limit(per_page).offset(per_page * (page -1)).all
74
45
  while records.any? do
75
46
  ElasticSearchable.logger.debug "reindexing batch ##{page}..."
76
47
  actions = []
@@ -78,7 +49,7 @@ module ElasticSearchable
78
49
  next unless record.should_index?
79
50
  begin
80
51
  doc = ElasticSearchable.encode_json(record.as_json_for_index)
81
- actions << ElasticSearchable.encode_json({:index => {'_index' => index_name, '_type' => index_type, '_id' => record.id}})
52
+ actions << ElasticSearchable.encode_json({:index => {'_index' => ElasticSearchable.index_name, '_type' => index_type, '_id' => record.id}})
82
53
  actions << doc
83
54
  rescue => e
84
55
  ElasticSearchable.logger.warn "Unable to bulk index record: #{record.inspect} [#{e.message}]"
@@ -92,14 +63,11 @@ module ElasticSearchable
92
63
  end
93
64
 
94
65
  page += 1
95
- records = scope.limit(per_page).offset(per_page* (page-1)).to_a
66
+ records = scope.limit(per_page).offset(per_page* (page-1)).all
96
67
  end
97
68
  end
98
69
 
99
70
  private
100
- def index_name
101
- self.elastic_options[:index] || ElasticSearchable.default_index
102
- end
103
71
  def index_type
104
72
  self.elastic_options[:type] || self.table_name
105
73
  end
@@ -111,15 +79,15 @@ module ElasticSearchable
111
79
  # see http://www.elasticsearch.org/guide/reference/api/index_.html
112
80
  def reindex(lifecycle = nil)
113
81
  query = {}
114
- response = ElasticSearchable.request :put, self.class.index_type_path(id), :query => query, :json_body => as_json_for_index
82
+ query[:percolate] = "*" if _percolate_callbacks.any?
83
+ response = ElasticSearchable.request :put, self.class.index_mapping_path(self.id), :query => query, :json_body => self.as_json_for_index
115
84
 
116
85
  self.index_lifecycle = lifecycle ? lifecycle.to_sym : nil
117
- run_callbacks :index
86
+ _run_index_callbacks
118
87
 
119
- self.percolate if _percolate_callbacks.any?
120
- run_callbacks :percolate if percolations.any?
88
+ self.percolations = response['matches'] || []
89
+ _run_percolate_callbacks if self.percolations.any?
121
90
  end
122
-
123
91
  # document to index in elasticsearch
124
92
  def as_json_for_index
125
93
  original_include_root_in_json = self.class.include_root_in_json
@@ -128,12 +96,10 @@ module ElasticSearchable
128
96
  ensure
129
97
  self.class.include_root_in_json = original_include_root_in_json
130
98
  end
131
-
132
99
  def should_index?
133
100
  [self.class.elastic_options[:if]].flatten.compact.all? { |m| evaluate_elastic_condition(m) } &&
134
101
  ![self.class.elastic_options[:unless]].flatten.compact.any? { |m| evaluate_elastic_condition(m) }
135
102
  end
136
-
137
103
  # percolate this object to see what registered searches match
138
104
  # can be done on transient/non-persisted objects!
139
105
  # can be done automatically when indexing using :percolate => true config option
@@ -141,8 +107,8 @@ module ElasticSearchable
141
107
  def percolate(percolator_query = nil)
142
108
  body = {:doc => self.as_json_for_index}
143
109
  body[:query] = percolator_query if percolator_query
144
- response = ElasticSearchable.request :get, self.class.index_type_path('_percolate'), :json_body => body
145
- self.percolations = (response['matches'] || []).map { |match| match['_id'] }
110
+ response = ElasticSearchable.request :get, self.class.index_mapping_path('_percolate'), :json_body => body
111
+ self.percolations = response['matches'] || []
146
112
  self.percolations
147
113
  end
148
114
 
@@ -1,7 +1,6 @@
1
1
  module ElasticSearchable
2
2
  module Queries
3
3
  PER_PAGE_DEFAULT = 20
4
- MAX_RETRIES = 5
5
4
 
6
5
  def per_page
7
6
  PER_PAGE_DEFAULT
@@ -15,8 +14,8 @@ module ElasticSearchable
15
14
  # http://www.elasticsearch.com/docs/elasticsearch/rest_api/search/
16
15
  def search(query, options = {})
17
16
  page = (options.delete(:page) || 1).to_i
18
- size = (options[:size] ||= per_page_for_search(options))
19
17
  options[:fields] ||= '_id'
18
+ options[:size] ||= per_page_for_search(options)
20
19
  options[:from] ||= options[:size] * (page - 1)
21
20
  if query.is_a?(Hash)
22
21
  options[:query] = query
@@ -36,51 +35,19 @@ module ElasticSearchable
36
35
  query[:sort] = sort
37
36
  end
38
37
 
39
- ids_to_delete = []
40
- results = []
41
- ids = []
42
- hits_total = nil
43
- retries = MAX_RETRIES
38
+ response = ElasticSearchable.request :get, index_mapping_path('_search'), :query => query, :json_body => options
39
+ hits = response['hits']
40
+ ids = hits['hits'].collect {|h| h['_id'].to_i }
41
+ results = self.find(ids).sort_by {|result| ids.index(result.id) }
44
42
 
45
- loop do
46
- response = ElasticSearchable.request :get, index_type_path('_search'), :query => query, :json_body => options
47
- hits = response['hits']
48
- hits_total ||= hits['total'].to_i
49
- new_ids = collect_hit_ids(hits)
50
- new_results = collect_result_records(new_ids, hits)
51
- ids += new_ids
52
- results += new_results
53
-
54
- break if results.size >= ids.size || retries <= 0
55
-
56
- retries -= 1
57
-
58
- options[:from] = options[:from] + options[:size]
59
- options[:size] = ids.size - results.size
60
-
61
- ids_to_delete += (new_ids - new_results.map(&:id))
62
- ids -= ids_to_delete
63
- end
64
-
65
- ids_to_delete.each do |id|
66
- delete_id_from_index_backgrounded id
43
+ results.each do |result|
44
+ result.instance_variable_set '@hit', hits['hits'][ids.index(result.id)]
67
45
  end
68
46
 
69
- ElasticSearchable::Paginator.handler.new(results, page, size, hits_total - ids_to_delete.size)
47
+ ElasticSearchable::Paginator.handler.new(results, page, options[:size], hits['total'])
70
48
  end
71
49
 
72
50
  private
73
-
74
- def collect_hit_ids(hits)
75
- hits['hits'].collect {|h| h['_id'].to_i }
76
- end
77
-
78
- def collect_result_records(ids, hits)
79
- self.where(:id => ids).to_a.sort_by{ |result| ids.index(result.id) }.each do |result|
80
- result.instance_variable_set '@hit', hits['hits'][ids.index(result.id)]
81
- end
82
- end
83
-
84
51
  # determine the number of search results per page
85
52
  # supports will_paginate configuration by using:
86
53
  # Model.per_page
@@ -1,3 +1,3 @@
1
1
  module ElasticSearchable
2
- VERSION = '1.6'
2
+ VERSION = '2.0.0'
3
3
  end
@@ -4,14 +4,13 @@ require 'logger'
4
4
  require 'elastic_searchable/active_record_extensions'
5
5
 
6
6
  module ElasticSearchable
7
- DEFAULT_INDEX = 'elastic_searchable'
8
7
  include HTTParty
9
8
  format :json
10
9
  base_uri 'localhost:9200'
11
10
 
12
11
  class ElasticError < StandardError; end
13
12
  class << self
14
- attr_accessor :logger, :default_index, :offline
13
+ attr_accessor :logger, :index_name, :index_settings, :offline
15
14
 
16
15
  # execute a block of work without reindexing objects
17
16
  def offline(&block)
@@ -34,7 +33,7 @@ module ElasticSearchable
34
33
  # ElasticSearchable.debug_output outputs all http traffic to console
35
34
  def request(method, url, options = {})
36
35
  options.merge! :headers => {'Content-Type' => 'application/json'}
37
- options.merge! :body => ElasticSearchable.encode_json(options.delete(:json_body)) if options[:json_body]
36
+ options.merge! :body => self.encode_json(options.delete(:json_body)) if options[:json_body]
38
37
 
39
38
  response = self.send(method, url, options)
40
39
  logger.debug "elasticsearch request: #{method} #{url} #{"took #{response['took']}ms" if response['took']}"
@@ -47,6 +46,33 @@ module ElasticSearchable
47
46
  string.to_s.gsub(/([\(\)\[\]\{\}\?\\\"!\^\+\-\*:~])/,'\\\\\1')
48
47
  end
49
48
 
49
+ # create the index
50
+ # http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html
51
+ def create_index
52
+ options = {}
53
+ options[:settings] = self.index_settings if self.index_settings
54
+ self.request :put, self.request_path, :json_body => options
55
+ end
56
+
57
+ # explicitly refresh the index, making all operations performed since the last refresh
58
+ # available for search
59
+ #
60
+ # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/refresh/
61
+ def refresh_index
62
+ self.request :post, self.request_path('_refresh')
63
+ end
64
+
65
+ # deletes the entire index
66
+ # http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/delete_index/
67
+ def delete_index
68
+ self.request :delete, self.request_path
69
+ end
70
+
71
+ # helper method to generate elasticsearch url for this index
72
+ def request_path(action = nil)
73
+ ['', index_name, action].compact.join('/')
74
+ end
75
+
50
76
  private
51
77
  # all elasticsearch rest calls return a json response when an error occurs. ex:
52
78
  # {error: 'an error occurred' }
@@ -63,5 +89,5 @@ ElasticSearchable.logger.level = Logger::INFO
63
89
 
64
90
  # configure default index to be elastic_searchable
65
91
  # one index can hold many object 'types'
66
- ElasticSearchable.default_index = ElasticSearchable::DEFAULT_INDEX
92
+ ElasticSearchable.index_name = 'elastic_searchable'
67
93
 
data/test/database.yml ADDED
@@ -0,0 +1,3 @@
1
+ sqlite:
2
+ adapter: sqlite3
3
+ database: test/elastic_searchable.sqlite3
@@ -1,30 +1,24 @@
1
1
  require 'rubygems'
2
2
  require 'bundler'
3
- require 'yaml'
4
- require 'byebug'
5
-
6
3
  begin
7
- Bundler.setup
4
+ Bundler.setup(:default, :development)
8
5
  rescue Bundler::BundlerError => e
9
6
  $stderr.puts e.message
10
7
  $stderr.puts "Run `bundle install` to install missing gems"
11
8
  exit e.status_code
12
9
  end
13
- require 'rspec/matchers'
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+ require 'mocha'
13
+ require 'pry'
14
14
 
15
15
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
16
16
  $LOAD_PATH.unshift(File.dirname(__FILE__))
17
-
18
17
  require 'elastic_searchable'
19
- require 'prefactory'
20
-
21
- SINGLE_NODE_CLUSTER_CONFIG = {
22
- 'number_of_replicas' => 0,
23
- 'number_of_shards' => 1
24
- }
25
-
26
- Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
18
+ require 'setup_database'
27
19
 
28
- RSpec.configure do |config|
29
- config.include Prefactory
20
+ class Test::Unit::TestCase
21
+ def delete_index
22
+ ElasticSearchable.delete_index rescue nil
23
+ end
30
24
  end
File without changes