rmla 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "mebla"
18
+ gem.homepage = "http://github.com/cousine/mebla"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{An elasticsearch wrapper for mongoid odm based on slingshot.}
21
+ gem.description = %Q{
22
+ An elasticsearch wrapper for mongoid odm based on slingshot. Makes integration between ElasticSearch full-text
23
+ search engine and Mongoid documents seemless and simple.
24
+ }
25
+ gem.email = "omar.mekky@mashsolvents.com"
26
+ gem.authors = ["Omar Mekky"]
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ require 'yard'
44
+ YARD::Rake::YardocTask.new
data/TODO.md ADDED
@@ -0,0 +1,23 @@
1
+ TODO for version 1.1.0
2
+ ==============
3
+
4
+ * <strike>add ability to index embedded documents fields (as part of the parent document)</strike>
5
+ * <strike>add instructions for indexing methods to README.md</strike>
6
+ * <strike>add ability to index methods results</strike>
7
+
8
+ TODO for version 1.0.1
9
+ ==============
10
+
11
+ * <strike>properly handle sub classes</strike>
12
+
13
+ TODO for version 1.0.0
14
+ ==============
15
+
16
+ * <strike>add documentation for mebla in README.md</strike>
17
+ * <strike>add logging capabilities</strike>
18
+
19
+ Future plan
20
+ =======
21
+
22
+ * optimize : refractor result_set
23
+ * <strike>optimize : should find a solution for not refreshing the index while indexing embedded documents in lib/mebla/context</strike> not necessary since indexing/reindexing
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.1.14
@@ -0,0 +1,7 @@
1
+ Description:
2
+ Generates mebla's configuration file.
3
+
4
+ Example:
5
+ rails generate mebla:install
6
+
7
+ This will generate mebla's configuration file in config folder
@@ -0,0 +1,35 @@
1
+ # A wrapper for slingshot elastic-search adapter for Mongoid
2
+ module Mebla
3
+ # Generates the required files for Mebla to function
4
+ class InstallGenerator < Rails::Generators::Base
5
+ source_root File.expand_path('../templates', __FILE__)
6
+
7
+ # Generates mebla's configuration file
8
+ def generate_configuration
9
+ template "mebla.yml", "config/mebla.yml"
10
+ end
11
+
12
+ private
13
+ # Returns the rails application name
14
+ # @return [String]
15
+ def app_name
16
+ @app_name ||= defined_app_const_base? ? defined_app_name : File.basename(destination_root)
17
+ end
18
+
19
+ # @private
20
+ # Returns the rails application name underscored
21
+ # @return [String]
22
+ def defined_app_name
23
+ defined_app_const_base.underscore
24
+ end
25
+
26
+ # @private
27
+ # Returns the application CONSTANT
28
+ def defined_app_const_base
29
+ Rails.respond_to?(:application) && defined?(Rails::Application) &&
30
+ Rails.application.is_a?(Rails::Application) && Rails.application.class.name.sub(/::Application$/, "")
31
+ end
32
+
33
+ alias :defined_app_const_base? :defined_app_const_base
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ defaults: &defaults
2
+ host: localhost
3
+ port: 9200
4
+
5
+ development:
6
+ <<: *defaults
7
+ index: <%= app_name %>_development
8
+
9
+ test:
10
+ <<: *defaults
11
+ index: <%= app_name %>_test
12
+
13
+ production:
14
+ <<: *defaults
15
+ index: <%= app_name %>_production
@@ -0,0 +1,117 @@
1
+ require 'active_support'
2
+ require 'mebla/railtie' if defined?(Rails)
3
+
4
+ # A wrapper for slingshot elastic-search adapter for Mongoid
5
+ module Mebla
6
+ extend ActiveSupport::Autoload
7
+
8
+ # Dependencies
9
+ autoload :Mongoid, 'mongoid'
10
+ autoload :Slingshot, 'slingshot'
11
+ # Main modules
12
+ autoload :Configuration
13
+ autoload :Context
14
+ autoload :LogSubscriber
15
+ autoload :ResultSet
16
+ autoload :Search
17
+ # Errors
18
+ autoload :Errors
19
+ # Mongoid extensions
20
+ autoload :Mebla, 'mebla/mongoid/mebla'
21
+
22
+ # Register the logger
23
+ Mebla::LogSubscriber.attach_to :mebla
24
+
25
+ @@mebla_mutex = Mutex.new
26
+ @@context = nil
27
+
28
+ # Returns Mebla's context for minipulating the index
29
+ # @return [nil]
30
+ def self.context
31
+ if @@context.nil?
32
+ @@mebla_mutex.synchronize do
33
+ if @@context.nil?
34
+ @@context = Mebla::Context.new
35
+ end
36
+ end
37
+ end
38
+
39
+ @@context
40
+ end
41
+
42
+ # Resets the context (reloads Mebla)
43
+ # @return [nil]
44
+ def self.reset_context!
45
+ @@mebla_mutex.synchronize do
46
+ @@context = nil
47
+ end
48
+ end
49
+
50
+ # Check if mongoid is loaded
51
+ # @return [Boolean]
52
+ def self.mongoid?
53
+ !defined?(Mongoid).nil?
54
+ end
55
+
56
+ # Check if slingshot is loaded
57
+ # @return [Boolean]
58
+ def self.slingshot?
59
+ !defined?(Slingshot).nil?
60
+ end
61
+
62
+ # Check if elasticsearch is running
63
+ # @return [Boolean]
64
+ def self.elasticsearch?
65
+ result = Slingshot::Configuration.client.get "#{Slingshot::Configuration.url}/_status"
66
+ return (result =~ /error/) ? false: true
67
+ rescue RestClient::Exception
68
+ false
69
+ end
70
+
71
+ # Configure Mebla
72
+ #
73
+ # Example::
74
+ #
75
+ # Mebla.configure do |config|
76
+ # index = "mebla_index"
77
+ # host = "localhost"
78
+ # port = 9200
79
+ # end
80
+ def self.configure(&block)
81
+ yield Mebla::Configuration.instance
82
+ end
83
+
84
+
85
+ # Writes out a message to the log file according to the level given
86
+ # @note If no level is given a message of type Logger::UNKNOWN will be written to the log file
87
+ # @param [String] message
88
+ # @param [Symbol] level can be :debug, :warn or :info
89
+ # @return [nil]
90
+ def self.log(message, level = :none)
91
+ case level
92
+ when :debug
93
+ hook = "mebla_debug.mebla"
94
+ when :warn
95
+ hook = "mebla_warn.mebla"
96
+ when :info
97
+ hook = "mebla_info.mebla"
98
+ else
99
+ hook = "mebla_unknown.mebla"
100
+ end
101
+
102
+ ::ActiveSupport::Notifications.
103
+ instrument(hook, :message => message)
104
+ end
105
+
106
+ # Search the index
107
+ # @param [String] query a string representing the search query
108
+ # @param [String, Symbol, Array] type_names a string, symbol or array representing the models to be searcheds
109
+ # @return [Mebla::Search]
110
+ #
111
+ # Search for all documents with a field 'title' with a value 'Testing Search'::
112
+ #
113
+ # Mebla.search "title: Testing Search"
114
+ def self.search(query = "", type_names = nil)
115
+ Mebla::Search.new(query, type_names)
116
+ end
117
+ end
@@ -0,0 +1,71 @@
1
+ require 'erb'
2
+ require 'singleton'
3
+
4
+ # A wrapper for slingshot elastic-search adapter for Mongoid
5
+ module Mebla
6
+ # Parses the configuration file and holds important configuration attributes
7
+ class Configuration
8
+ include Singleton
9
+
10
+ attr_reader :log_dir
11
+ attr_accessor :index, :host, :port, :logger
12
+
13
+ # @private
14
+ # Initializes a new configuration object
15
+ def initialize
16
+ @log_dir = "#{Dir.pwd}/tmp/log"
17
+ parse_config
18
+
19
+ # Setup defaults
20
+ @index ||= "mebla"
21
+ @host ||= "localhost"
22
+ @port ||= 9200
23
+
24
+ make_tmp_dir
25
+ @logger = ActiveSupport::BufferedLogger.new(
26
+ open("#{@log_dir}/mebla.log", "a")
27
+ )
28
+ @logger.level = ActiveSupport::BufferedLogger::Severity::DEBUG
29
+
30
+ setup_logger
31
+
32
+ # Setup slingshot
33
+ Slingshot::Configuration.url(self.url)
34
+ end
35
+
36
+ # Sets up the default settings of the logger
37
+ # @return [nil]
38
+ def setup_logger
39
+ @logger.auto_flushing = true
40
+ end
41
+
42
+ # Returns the proper url for elasticsearch
43
+ # @return [String] url representation of the configuration options host and port
44
+ def url
45
+ "http://#{@host}:#{@port}"
46
+ end
47
+
48
+ private
49
+ # Creates tmp directory if it doesn't exist
50
+ # @return [nil]
51
+ def make_tmp_dir
52
+ FileUtils.mkdir_p @log_dir
53
+ Dir["#{@log_dir}/*"].each do |file|
54
+ FileUtils.rm_rf file
55
+ end
56
+ end
57
+
58
+ # Loads the configuration file
59
+ # @return [nil]
60
+ def parse_config
61
+ path = "#{Rails.root}/config/mebla.yml"
62
+ return unless File.exists?(path)
63
+
64
+ conf = YAML::load(ERB.new(IO.read(path)).result)[Rails.env]
65
+
66
+ conf.each do |key,value|
67
+ self.send("#{key}=", value) if self.respond_to?("#{key}=")
68
+ end unless conf.nil?
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,298 @@
1
+ # A wrapper for slingshot elastic-search adapter for Mongoid
2
+ module Mebla
3
+ # Handles indexing and reindexing
4
+ class Context
5
+ attr_reader :indexed_models, :slingshot_index, :slingshot_index_name
6
+ attr_reader :mappings
7
+
8
+ # @private
9
+ # Creates a new context object
10
+ def initialize
11
+ @indexed_models = []
12
+ @mappings = {}
13
+ @slingshot_index = Slingshot::Index.new(Mebla::Configuration.instance.index)
14
+ @slingshot_index_name = Mebla::Configuration.instance.index
15
+ end
16
+
17
+ # @private
18
+ # Adds a model to the list of indexed models
19
+ def add_indexed_model(model, mappings = {})
20
+ model = model.name if model.is_a?(Class)
21
+
22
+ @indexed_models << model
23
+ @indexed_models.uniq!
24
+ @indexed_models.sort!
25
+
26
+ @mappings.merge!(mappings)
27
+ end
28
+
29
+ # Deletes and rebuilds the index
30
+ # @note Doesn't index the data, use Mebla::Context#reindex_data to rebuild the index and index the data
31
+ # @return [nil]
32
+ def rebuild_index
33
+ # Only rebuild if the index exists
34
+ raise Mebla::Errors::MeblaIndexException.new("#{@slingshot_index_name} does not exist !! use #create_index to create the index first.") unless index_exists?
35
+
36
+ Mebla.log("Rebuilding index")
37
+
38
+ # Delete the index
39
+ if drop_index
40
+ # Create the index
41
+ return build_index
42
+ end
43
+ end
44
+
45
+ # Creates and indexes the document
46
+ # @note Doesn't index the data, use Mebla::Context#index_data to create the index and index the data
47
+ # @return [Boolean] true if operation is successful
48
+ def create_index
49
+ # Only create the index if it doesn't exist
50
+ raise Mebla::Errors::MeblaIndexException.new("#{@slingshot_index_name} already exists !! use #rebuild_index to rebuild the index.") if index_exists?
51
+
52
+ Mebla.log("Creating index")
53
+
54
+ # Create the index
55
+ build_index
56
+ end
57
+
58
+ # Deletes the index of the document
59
+ # @return [Boolean] true if operation is successful
60
+ def drop_index
61
+ # Only drop the index if it exists
62
+ return true unless index_exists?
63
+
64
+ Mebla.log("Dropping index: #{self.slingshot_index_name}", :debug)
65
+
66
+ # Drop the index
67
+ result = @slingshot_index.delete
68
+
69
+ Mebla.log("Dropped #{self.slingshot_index_name}: #{result.to_s}", :debug)
70
+
71
+ # Check that the index doesn't exist
72
+ !index_exists?
73
+ end
74
+
75
+ # Checks if the index exists and is available
76
+ # @return [Boolean] true if the index exists and is available, false otherwise
77
+ def index_exists?
78
+ begin
79
+ result = Slingshot::Configuration.client.get "#{Mebla::Configuration.instance.url}/#{@slingshot_index_name}/_status"
80
+ return (result =~ /error/) ? false : true
81
+ rescue RestClient::ResourceNotFound
82
+ return false
83
+ end
84
+ end
85
+
86
+ # Creates the index and indexes the data for all models or a list of models given
87
+ # @param *models a list of symbols each representing a model name to be indexed
88
+ # @return [nil]
89
+ def index_data(*models)
90
+ if models.nil? || models.empty?
91
+ only_index = @indexed_models
92
+ else
93
+ only_index = models.collect{|m| m.to_s}
94
+ end
95
+
96
+ Mebla.log("Indexing #{only_index.join(", ")}", :debug)
97
+
98
+ # Build up a bulk query to save processing and time
99
+ bulk_query = ""
100
+ # Keep track of indexed documents
101
+ indexed_count = {}
102
+
103
+ # Create the index
104
+ if create_index
105
+ # Start collecting documents
106
+ only_index.each do |model|
107
+ Mebla.log("Indexing: #{model}")
108
+ # Get the class
109
+ to_index = model.camelize.constantize
110
+
111
+ # Get the records
112
+ entries = []
113
+ unless to_index.embedded?
114
+ if to_index.sub_class?
115
+ entries = to_index.any_in(:_type => [to_index.name])
116
+ else
117
+ entries = to_index.any_in(:_type => [nil, to_index.name])
118
+ end
119
+ else
120
+ parent = to_index.embedded_parent
121
+ access_method = to_index.embedded_as
122
+
123
+ parent.all.each do |parent_record|
124
+ if to_index.sub_class?
125
+ entries += parent_record.send(access_method.to_sym).any_in(:_type => [to_index.name])
126
+ else
127
+ entries += parent_record.send(access_method.to_sym).any_in(:_type => [nil, to_index.name])
128
+ end
129
+ end
130
+ end
131
+
132
+ # Save the number of entries to be indexed
133
+ indexed_count[model] = entries.count
134
+
135
+ # Build the queries for this model
136
+ entries.each do |document|
137
+ attrs = {} #document.attributes.dup # make sure we dont modify the document it self
138
+ attrs[:id] = document.attributes["_id"] # the id is already added in the meta data of the action part of the query
139
+
140
+ # only index search fields and methods
141
+ document.class.search_fields.each do |field|
142
+ if document.attributes.keys.include?(field.to_s)
143
+ attrs[field] = document.attributes[field.to_s] # attribute
144
+ else
145
+ attrs[field] = document.send(field) # method
146
+ end
147
+ end
148
+
149
+ # index relational fields
150
+ document.class.search_relations.each do |relation, fields|
151
+ items = document.send(relation.to_sym) # get the relation document
152
+
153
+ next if items.nil?
154
+
155
+ # N relation side
156
+ if items.is_a?(Array) || items.is_a?(Mongoid::Relations::Targets::Enumerable)
157
+ next if items.empty?
158
+ attrs[relation] = []
159
+ items.each do |item|
160
+ if fields.is_a?(Array) # given multiple fields to index
161
+ fields_values = {}
162
+ fields.each do |field|
163
+ if item.attributes.keys.include?(field.to_s)
164
+ fields_values.merge!({ field => item.attributes[field.to_s] }) # attribute
165
+ else
166
+ fields_values.merge!({ field => item.send(field) }) # method
167
+ end
168
+ end
169
+ attrs[relation] << fields_values
170
+ else # only index one field in the relation
171
+ if item.attributes.keys.include?(fields.to_s)
172
+ attrs[relation] << { fields => item.attributes[fields.to_s] } # attribute
173
+ else
174
+ attrs[relation] << { fields => item.send(fields) } # method
175
+ end
176
+ end
177
+ end
178
+ # 1 relation side
179
+ else
180
+ attrs[relation] = {}
181
+ if fields.is_a?(Array) # given multiple fields to index
182
+ fields_values = {}
183
+ fields.each do |field|
184
+ if items.attributes.keys.include?(field.to_s)
185
+ fields_values.merge!({ field => items.attributes[field.to_s] }) # attribute
186
+ else
187
+ fields_values.merge!({ field => items.send(field) }) # method
188
+ end
189
+ end
190
+ attrs[relation].merge!(fields_values)
191
+ else # only index one field in the relation
192
+ if items.attributes.keys.include?(fields.to_s)
193
+ attrs[relation].merge!({ fields => items.attributes[fields.to_s] }) # attribute
194
+ else
195
+ attrs[relation].merge!({ fields => items.send(fields) }) # method
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ # If embedded get the parent id
202
+ if document.embedded?
203
+ parent_id = document.send(document.class.embedded_parent_foreign_key.to_sym).id.to_s
204
+ attrs[(document.class.embedded_parent_foreign_key + "_id").to_sym] = parent_id
205
+ attrs[:_parent] = parent_id
206
+
207
+ # Build add to the bulk query
208
+ bulk_query << build_bulk_query(@slingshot_index_name, to_index.slingshot_type_name, document.id.to_s, attrs, parent_id)
209
+ else
210
+ # Build add to the bulk query
211
+ bulk_query << build_bulk_query(@slingshot_index_name, to_index.slingshot_type_name, document.id.to_s, attrs)
212
+ end
213
+ end
214
+ end
215
+ else
216
+ raise Mebla::Errors::MeblaIndexException.new("Could not create #{@slingshot_index_name}!!!")
217
+ end
218
+
219
+ Mebla.log("Bulk indexing:\n#{bulk_query}", :debug)
220
+
221
+ # Send the query
222
+ response = Slingshot::Configuration.client.post "#{Mebla::Configuration.instance.url}/_bulk", bulk_query
223
+
224
+ # Only refresh the index if no error ocurred
225
+ unless response =~ /error/
226
+ # Log results
227
+ Mebla.log("Indexed #{only_index.count} model(s) to #{self.slingshot_index_name}: #{response}")
228
+ Mebla.log("Indexing Report:")
229
+ indexed_count.each do |model_name, count|
230
+ Mebla.log("Indexed #{model_name}: #{count} document(s)")
231
+ end
232
+
233
+ # Refresh the index
234
+ refresh_index
235
+ else
236
+ raise Mebla::Errors::MeblaIndexException.new("Indexing #{only_index.join(", ")} failed with the following response:\n #{response}")
237
+ end
238
+ rescue RestClient::Exception => error
239
+ raise Mebla::Errors::MeblaIndexException.new("Indexing #{only_index.join(", ")} failed with the following error: #{error.message}")
240
+ end
241
+
242
+ # Rebuilds the index and indexes the data for all models or a list of models given
243
+ # @param *models a list of symbols each representing a model name to rebuild it's index
244
+ # @return [nil]
245
+ def reindex_data(*models)
246
+ Mebla.log("Rendexing: #{self.slingshot_index_name}")
247
+
248
+ unless drop_index
249
+ raise Mebla::Errors::MeblaIndexException.new("Could not drop #{@slingshot_index_name}!!!")
250
+ end
251
+
252
+ # Create the index and index the data
253
+ if models && !models.empty?
254
+ index_data(models)
255
+ else
256
+ index_data
257
+ end
258
+ end
259
+
260
+ # Refreshes the index
261
+ # @return [nil]
262
+ def refresh_index
263
+ Mebla.log("Refreshing: #{self.slingshot_index_name}", :debug)
264
+
265
+ result = @slingshot_index.refresh
266
+
267
+ Mebla.log("Refreshed #{self.slingshot_index_name}: #{result}")
268
+ end
269
+
270
+ private
271
+ # Builds the index according to the mappings set
272
+ # @return [Boolean] true if the index was created successfully, false otherwise
273
+ def build_index
274
+ Mebla.log("Building #{self.slingshot_index_name}", :debug)
275
+ # Create the index
276
+ result = @slingshot_index.create :mappings => @mappings
277
+
278
+ Mebla.log("Created #{self.slingshot_index_name}: #{result.to_s}")
279
+
280
+ # Check if the index exists
281
+ index_exists?
282
+ end
283
+
284
+ # --
285
+ # OPTIMIZE: should find a solution for not refreshing the index while indexing embedded documents
286
+ # ++
287
+
288
+ # Builds a bulk index query
289
+ # @return [String]
290
+ def build_bulk_query(index_name, type, id, attributes, parent = nil)
291
+ attrs_to_json = ActiveSupport::JSON.encode(attributes).gsub(/\n/, " ")
292
+ <<-eos
293
+ { "index" : { "_index" : "#{index_name}", "_type" : "#{type}", "_id" : "#{id}"#{", \"_parent\" : \"#{parent}\"" if parent}, "refresh" : "true"} }
294
+ #{attrs_to_json}
295
+ eos
296
+ end
297
+ end
298
+ end