escargot 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+
2
+ module Escargot
3
+
4
+ module DistributedIndexing
5
+
6
+ def DistributedIndexing.load_dependencies
7
+ require 'resque'
8
+ end
9
+
10
+ def DistributedIndexing.create_index_for_model(model)
11
+ load_dependencies
12
+
13
+ index_version = model.create_index_version
14
+
15
+ model.find_in_batches(:select => model.primary_key) do |batch|
16
+ Escargot.queue_backend.enqueue(IndexDocuments, model.to_s, batch.map(&:id), index_version)
17
+ end
18
+
19
+ Escargot.queue_backend.enqueue(DeployNewVersion, model.index_name, index_version)
20
+ end
21
+
22
+ class IndexDocuments
23
+ @queue = :indexing
24
+
25
+ def self.perform(model_name, ids, index_version)
26
+ model = model_name.constantize
27
+ model.find(:all, :conditions => {model.primary_key => ids}).each do |record|
28
+ record.local_index_in_elastic_search(:index => index_version)
29
+ end
30
+ end
31
+ end
32
+
33
+ class ReIndexDocuments
34
+ @queue = :nrt
35
+
36
+ def self.perform(model_name, ids)
37
+ model = model_name.constantize
38
+ ids_found = []
39
+ model.find(:all, :conditions => {:id => ids}).each do |record|
40
+ record.local_index_in_elastic_search
41
+ ids_found << record.id
42
+ end
43
+
44
+ (ids - ids_found).each do |id|
45
+ model.delete_id_from_index(id)
46
+ end
47
+ end
48
+ end
49
+
50
+ class DeployNewVersion
51
+ @queue = :indexing
52
+ def self.perform(index, index_version)
53
+ $elastic_search_client.deploy_index_version(index, index_version)
54
+ end
55
+ end
56
+ end
57
+
58
+ end
@@ -0,0 +1,63 @@
1
+ module Escargot
2
+
3
+ module AdminIndexVersions
4
+
5
+ # creates an index to store a new index version. Returns its name
6
+ def create_index_version(index, create_options)
7
+ index_with_timestamp = "#{index}_#{Time.now.to_f}"
8
+ $elastic_search_client.create_index(index_with_timestamp, create_options)
9
+ return index_with_timestamp
10
+ end
11
+
12
+ # returns the full index name of the current version for this index
13
+ def current_index_version(index)
14
+ $elastic_search_client.index_status(index)["indices"].keys.first rescue nil
15
+ end
16
+
17
+ # "deploys" a new version as the current one
18
+ def deploy_index_version(index, new_version)
19
+ $elastic_search_client.refresh(new_version)
20
+ if current_version = current_index_version(index)
21
+ $elastic_search_client.alias_index(
22
+ :add => {new_version => index},
23
+ :remove => {current_version => index}
24
+ )
25
+ else
26
+ $elastic_search_client.alias_index(:add => {new_version => index})
27
+ end
28
+ end
29
+
30
+ # deletes all index versions older than the current one
31
+ def prune_index_versions(index)
32
+ current_version = current_index_version(index)
33
+ return unless current_version
34
+ old_versions = index_versions(index).select{|version| version_timestamp(version) < version_timestamp(current_version)}
35
+ old_versions.each do |version|
36
+ $elastic_search_client.delete_index(version)
37
+ end
38
+ end
39
+
40
+ # lists all current, old, an in-progress versions for this index
41
+ def index_versions(index)
42
+ $elastic_search_client.index_status()["indices"].keys.grep(/^#{index}_/)
43
+ end
44
+
45
+ private
46
+ def version_timestamp(version)
47
+ version.gsub(/^.*_/, "").to_f
48
+ end
49
+
50
+ end
51
+
52
+ module HitExtensions
53
+ def to_activerecord
54
+ model_class = _type.gsub(/-/,'/').classify.constantize
55
+ begin
56
+ model_class.find(id)
57
+ rescue ActiveRecord::RecordNotFound
58
+ nil
59
+ end
60
+ end
61
+ end
62
+
63
+ end
@@ -0,0 +1,19 @@
1
+ module Escargot
2
+
3
+ module LocalIndexing
4
+ def LocalIndexing.create_index_for_model(model)
5
+ model = model.constantize if model.kind_of?(String)
6
+
7
+ index_version = model.create_index_version
8
+
9
+ model.find_in_batches do |batch|
10
+ batch.each do |record|
11
+ record.local_index_in_elastic_search(:index => index_version)
12
+ end
13
+ end
14
+
15
+ $elastic_search_client.deploy_index_version(model.index_name, index_version)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,7 @@
1
+ module Escargot
2
+ module QueueBackend
3
+ # a generic queue
4
+ class Base
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ module Escargot
2
+ module QueueBackend
3
+ class Rescue < Base
4
+ def enqueue(classname, *arguments)
5
+ Resque.enqueue(classname, *arguments)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module Escargot
2
+ VERSION = "0.0.2"
3
+ end
data/lib/escargot.rb ADDED
@@ -0,0 +1,92 @@
1
+ # Escargot
2
+ require 'elasticsearch'
3
+ require 'escargot/activerecord_ex'
4
+ require 'escargot/elasticsearch_ex'
5
+ require 'escargot/local_indexing'
6
+ require 'escargot/distributed_indexing'
7
+ require 'escargot/queue_backend/base'
8
+ require 'escargot/queue_backend/resque'
9
+
10
+
11
+ module Escargot
12
+ def self.register_model(model)
13
+ return unless model.table_exists?
14
+ @indexed_models ||= []
15
+ @indexed_models.delete(model) if @indexed_models.include?(model)
16
+ @indexed_models << model
17
+ end
18
+
19
+ def self.indexed_models
20
+ @indexed_models || []
21
+ end
22
+
23
+ def self.queue_backend
24
+ @queue ||= Escargot::QueueBackend::Rescue.new
25
+ end
26
+
27
+ def self.flush_all_indexed_models
28
+ @indexed_models = []
29
+ end
30
+
31
+ # search_hits returns a raw ElasticSearch::Api::Hits object for the search results
32
+ # see #search for the valid options
33
+ def self.search_hits(query, options = {}, call_by_instance_method = false)
34
+ unless call_by_instance_method
35
+ if (options[:classes])
36
+ models = Array(options[:classes])
37
+ else
38
+ register_all_models
39
+ models = @indexed_models
40
+ end
41
+ options = options.merge({:index => models.map(&:index_name).join(',')})
42
+ end
43
+
44
+ if query.kind_of?(Hash)
45
+ query = {:query => query}
46
+ end
47
+ $elastic_search_client.search(query, options)
48
+ end
49
+
50
+ # search returns a will_paginate collection of ActiveRecord objects for the search results
51
+ #
52
+ # see ElasticSearch::Api::Index#search for the full list of valid options
53
+ #
54
+ # note that the collection may include nils if ElasticSearch returns a result hit for a
55
+ # record that has been deleted on the database
56
+ def self.search(query, options = {}, call_by_instance_method = false)
57
+ hits = Escargot.search_hits(query, options, call_by_instance_method)
58
+ hits_ar = hits.map{|hit| hit.to_activerecord}
59
+ results = WillPaginate::Collection.new(hits.current_page, hits.per_page, hits.total_entries)
60
+ results.replace(hits_ar)
61
+ results
62
+ end
63
+
64
+ # counts the number of results for this query.
65
+ def self.search_count(query = "*", options = {}, call_by_instance_method = false)
66
+ unless call_by_instance_method
67
+ if (options[:classes])
68
+ models = Array(options[:classes])
69
+ else
70
+ register_all_models
71
+ models = @indexed_models
72
+ end
73
+ options = options.merge({:index => models.map(&:index_name).join(',')})
74
+ end
75
+ $elastic_search_client.count(query, options)
76
+ end
77
+
78
+ private
79
+ def self.register_all_models
80
+ models = []
81
+ # Search all Models in the application Rails
82
+ Dir[File.join("#{RAILS_ROOT}/app/models".split(/\\/), "**", "*.rb")].each do |file|
83
+ model = file.gsub(/#{RAILS_ROOT}\/app\/models\/(.*?)\.rb/,'\1').classify.constantize
84
+ unless models.include?(model)
85
+ require file
86
+ end
87
+ models << model
88
+ end
89
+ end
90
+
91
+
92
+ end
@@ -0,0 +1,50 @@
1
+ # desc "Explaining what the task does"
2
+ # task :elastic_rails do
3
+ # # Task goes here
4
+ # end
5
+
6
+ namespace :escargot do
7
+ desc "indexes the models"
8
+ task :index, :models, :needs => [:environment, :load_all_models] do |t, args|
9
+ each_indexed_model(args) do |model|
10
+ puts "Indexing #{model}"
11
+ Escargot::LocalIndexing.create_index_for_model(model)
12
+ end
13
+ end
14
+
15
+ desc "indexes the models"
16
+ task :distributed_index, :models, :needs => [:environment, :load_all_models] do |t, args|
17
+ each_indexed_model(args) do |model|
18
+ puts "Indexing #{model}"
19
+ Escargot::DistributedIndexing.create_index_for_model(model)
20
+ end
21
+ end
22
+
23
+ desc "prunes old index versions for this models"
24
+ task :prune_versions, :models, :needs => [:environment, :load_all_models] do |t, args|
25
+ each_indexed_model(args) do |model|
26
+ $elastic_search_client.prune_index_versions(model.index_name)
27
+ end
28
+ end
29
+
30
+ task :load_all_models do
31
+ models = ActiveRecord::Base.send(:subclasses)
32
+ Dir["#{Rails.root}/app/models/*.rb", "#{Rails.root}/app/models/*/*.rb"].each do |file|
33
+ model = File.basename(file, ".*").classify
34
+ unless models.include?(model)
35
+ require file
36
+ end
37
+ models << model
38
+ end
39
+ end
40
+
41
+ private
42
+ def each_indexed_model(args)
43
+ if args[:models]
44
+ models = args[:models].split(",").map{|m| m.classify.constantize}
45
+ else
46
+ models = Escargot.indexed_models
47
+ end
48
+ models.each{|m| yield m}
49
+ end
50
+ end
data/rails/init.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'escargot'
2
+
3
+ ActiveRecord::Base.class_eval do
4
+ include Escargot::ActiveRecordExtensions
5
+ end
6
+
7
+ ElasticSearch::Api::Hit.class_eval do
8
+ include Escargot::HitExtensions
9
+ end
10
+
11
+ ElasticSearch::Client.class_eval do
12
+ include Escargot::AdminIndexVersions
13
+ end
14
+
15
+ unless File.exists?(Rails.root + "/config/elasticsearch.yml")
16
+ Rails.logger.warn "No config/elastic_search.yaml file found, connecting to localhost:9200"
17
+ $elastic_search_client = ElasticSearch.new("localhost:9200")
18
+ else
19
+ config = YAML.load_file(RAILS_ROOT + "/config/elasticsearch.yml")
20
+ $elastic_search_client = ElasticSearch.new(config["host"] + ":" + config["port"].to_s, :timeout => 20)
21
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ class AdminIndexTest < Test::Unit::TestCase
4
+ load_schema
5
+
6
+ class User < ActiveRecord::Base
7
+ elastic_index
8
+ end
9
+
10
+ def test_prune_index
11
+ index = User.index_name
12
+ 3.times.each do
13
+ Escargot::LocalIndexing.create_index_for_model(User)
14
+ end
15
+ User.refresh_index
16
+ sleep(1)
17
+ assert $elastic_search_client.index_versions(index).size > 1
18
+ assert $elastic_search_client.index_versions(index).include? $elastic_search_client.current_index_version(index)
19
+ $elastic_search_client.prune_index_versions(index)
20
+ assert $elastic_search_client.index_versions(index).size == 1
21
+ end
22
+
23
+ def teardown
24
+ User.delete_index
25
+ end
26
+ end
data/test/database.yml ADDED
@@ -0,0 +1,21 @@
1
+ sqlite:
2
+ :adapter: sqlite
3
+ :database: vendor/plugins/escargot/test/escargot_plugins.sqlite.db
4
+
5
+ sqlite3:
6
+ :adapter: sqlite3
7
+ :database: vendor/plugins/escargot/test/escargot_plugins.sqlite3.db
8
+
9
+ postgresql:
10
+ :adapter: postgresql
11
+ :username: postgres
12
+ :password: postgres
13
+ :database: escargot
14
+ :min_messages: ERROR
15
+
16
+ mysql:
17
+ :adapter: mysql
18
+ :host: localhost
19
+ :username: root
20
+ :password: password
21
+ :database: escargot
@@ -0,0 +1,21 @@
1
+ require 'test_helper'
2
+
3
+ class ElasticIndexTest < Test::Unit::TestCase
4
+
5
+ class User < ActiveRecord::Base
6
+ elastic_index
7
+ end
8
+
9
+ def test_index_name
10
+ assert_equal User.index_name, 'elastic_index_test-user'
11
+ end
12
+
13
+ def test_search_method_present
14
+ assert User.respond_to?(:search)
15
+ end
16
+
17
+ def test_registered_as_indexed_model
18
+ Escargot.indexed_models.include?(User)
19
+ end
20
+
21
+ end
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+ require 'test_helper'
3
+
4
+ # tests the behaviour of the index creation tasks that run in the distributed mode
5
+ # without real time support
6
+
7
+ class DistributedIndexCreation < Test::Unit::TestCase
8
+
9
+ load_schema
10
+ resque_available
11
+ class User < ActiveRecord::Base
12
+ elastic_index :updates => false
13
+ end
14
+
15
+ class LegacyUser < ActiveRecord::Base
16
+ set_primary_key :legacy_id
17
+ elastic_index :updates => false
18
+ end
19
+
20
+ def test_distributed_indexing
21
+ User.delete_all
22
+ User.delete_index
23
+
24
+ User.new(:name => 'John the Long').save!
25
+ User.new(:name => 'Peter the Young').save!
26
+ User.new(:name => 'Peter the Old').save!
27
+ User.new(:name => 'Bob the Skinny').save!
28
+ User.new(:name => 'Jamie the Flying Machine').save!
29
+
30
+ Escargot::DistributedIndexing.create_index_for_model(User)
31
+ Resque.run!
32
+ User.refresh_index
33
+
34
+ results = User.search("peter")
35
+
36
+ assert_equal results.total_entries, 2
37
+ assert_equal [results.first.name, results.second.name].sort, ['Peter the Old', 'Peter the Young']
38
+
39
+ results = User.search("LONG or SKINNY")
40
+ assert_equal results.total_entries, 2
41
+
42
+ results = User.search("*")
43
+ assert_equal results.total_entries, 5
44
+ end
45
+
46
+ # minimal test to ensure that models with a non-default primary key work
47
+ def test_legacy_model
48
+ LegacyUser.delete_all
49
+ LegacyUser.delete_index
50
+
51
+ LegacyUser.new(:name => 'John the Long').save!
52
+ LegacyUser.new(:name => 'Peter the Young').save!
53
+ LegacyUser.new(:name => 'Peter the Old').save!
54
+ LegacyUser.new(:name => 'Bob the Skinny').save!
55
+ LegacyUser.new(:name => 'Jamie the Flying Machine').save!
56
+
57
+ Escargot::DistributedIndexing.create_index_for_model(LegacyUser)
58
+ Resque.run!
59
+ LegacyUser.refresh_index
60
+
61
+ results = LegacyUser.search("peter")
62
+ assert_equal results.total_entries, 2
63
+ end
64
+
65
+ def test_index_rotation
66
+ # create a first version of the index
67
+ User.delete_all
68
+ User.delete_index
69
+
70
+ User.create(:name => 'John the Long')
71
+ User.create(:name => 'Peter the Fat')
72
+ User.create(:name => 'Bob the Skinny')
73
+ User.create(:name => 'Jamie the Flying Machine')
74
+
75
+ Escargot::DistributedIndexing.create_index_for_model(User)
76
+ Resque.run!
77
+
78
+ # create a second version of the index
79
+
80
+ User.find(:first).destroy
81
+ User.find(:first).destroy
82
+
83
+ Escargot::DistributedIndexing.create_index_for_model(User)
84
+ Resque.run!
85
+ User.refresh_index
86
+
87
+ # check that there are no traces of the older index
88
+ assert_equal User.search_count, 2
89
+ end
90
+
91
+ def teardown
92
+ User.delete_all
93
+ User.delete_index
94
+ LegacyUser.delete_all
95
+ LegacyUser.delete_index
96
+ end
97
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ require 'test_helper'
3
+
4
+ class CustomIndexOptions < Test::Unit::TestCase
5
+ load_schema
6
+
7
+ class User < ActiveRecord::Base
8
+ elastic_index(
9
+ :updates => false,
10
+ :index_options => {
11
+ "analysis.analyzer.default.tokenizer" => 'standard',
12
+ "analysis.analyzer.default.filter" => ["standard", "lowercase", "stop", "asciifolding"]
13
+ }
14
+ )
15
+ end
16
+
17
+ def test_asciifolding_option
18
+ User.delete_all
19
+ User.delete_index
20
+
21
+ User.create(:name => "Pedrín el Joven")
22
+ User.create(:name => 'Pedro el Viejo')
23
+ User.create(:name => 'Roberto el Delgado')
24
+ User.create(:name => 'Jamie la Máquina Voladora')
25
+
26
+ Escargot::LocalIndexing.create_index_for_model(User)
27
+
28
+ results = User.search("pedrin")
29
+ assert_equal results.total_entries, 1
30
+
31
+ User.delete_all
32
+ User.delete_index
33
+ end
34
+ end
@@ -0,0 +1,39 @@
1
+ # tests the Near Real Time support
2
+
3
+ require 'test_helper'
4
+
5
+ # tests the behaviour of the index creation tasks that run locally (in the "simple" mode)
6
+
7
+ class IndexedContentTest < Test::Unit::TestCase
8
+ load_schema
9
+
10
+ class User < ActiveRecord::Base
11
+ elastic_index
12
+
13
+ def indexed_json_document
14
+ to_json(:only => :name, :methods => :foo)
15
+ end
16
+
17
+ def foo
18
+ "FOO!"
19
+ end
20
+ end
21
+
22
+ def test_indexed_content
23
+ User.delete_index
24
+ User.delete_all
25
+
26
+ User.create(:name => 'Tim the Wise')
27
+ User.create(:name => 'Peter the Young')
28
+ User.create(:name => 'Peter the Old')
29
+ User.refresh_index
30
+
31
+ assert_equal User.search_count("Peter AND foo:FOO"), 2
32
+ end
33
+
34
+ def teardown
35
+ User.delete_all
36
+ User.delete_index
37
+ end
38
+
39
+ end
@@ -0,0 +1,51 @@
1
+ require 'test_helper'
2
+
3
+ # tests the behaviour of the index creation tasks that run locally (in the "simple" mode)
4
+ # without real time support
5
+
6
+ class LocalIndexCreation < Test::Unit::TestCase
7
+ load_schema
8
+
9
+ class User < ActiveRecord::Base
10
+ elastic_index :updates => false
11
+ end
12
+
13
+ def test_fast_index_creation
14
+ User.delete_all
15
+ User.delete_index
16
+
17
+ 10.times do
18
+ Escargot::LocalIndexing.create_index_for_model(User)
19
+ end
20
+ sleep(1)
21
+ end
22
+
23
+ def test_indexing_rotation
24
+ # create a first version of the index
25
+ User.delete_all
26
+ User.create(:name => 'John the Long')
27
+ User.create(:name => 'Peter the Fat')
28
+ User.create(:name => 'Bob the Skinny')
29
+ User.create(:name => 'Jamie the Flying Machine')
30
+
31
+ Escargot::LocalIndexing.create_index_for_model(User)
32
+
33
+ # create a second version of the index
34
+
35
+ User.find(:first).destroy
36
+ User.find(:first).destroy
37
+
38
+ Escargot::LocalIndexing.create_index_for_model(User)
39
+
40
+ # check that there are no trace of the older index
41
+
42
+ results = User.search("*")
43
+ assert_equal results.total_entries, 2
44
+ end
45
+
46
+ def teardown
47
+ User.delete_all
48
+ User.delete_index
49
+ end
50
+
51
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ require 'test_helper'
3
+
4
+ class Mappings < Test::Unit::TestCase
5
+ load_schema
6
+
7
+ class User < ActiveRecord::Base
8
+ elastic_index(
9
+ :mapping => {
10
+ :properties => {
11
+ :name => {:type => "string", :index => "not_analyzed"}
12
+ }
13
+ }
14
+ )
15
+ end
16
+
17
+ def test_not_analyzed_property
18
+
19
+ User.delete_all
20
+ User.delete_index
21
+ Escargot::LocalIndexing.create_index_for_model(User)
22
+
23
+ User.create(:name => 'Pedrín el Joven')
24
+ User.create(:name => 'Pedro el Viejo')
25
+ User.create(:name => 'Roberto el Delgado')
26
+ User.create(:name => 'Jamie la Máquina Voladora')
27
+
28
+ User.refresh_index
29
+
30
+ assert_equal User.search_count('name=pedro'), 0
31
+ assert_equal User.search_count('name="Pedro el Viejo"'), 1
32
+ end
33
+
34
+ def teardown
35
+ User.delete_all
36
+ User.delete_index
37
+ end
38
+ end