rmla 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "mebla"
18
+ gem.homepage = "http://github.com/cousine/mebla"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{An elasticsearch wrapper for mongoid odm based on slingshot.}
21
+ gem.description = %Q{
22
+ An elasticsearch wrapper for mongoid odm based on slingshot. Makes integration between ElasticSearch full-text
23
+ search engine and Mongoid documents seemless and simple.
24
+ }
25
+ gem.email = "omar.mekky@mashsolvents.com"
26
+ gem.authors = ["Omar Mekky"]
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ require 'yard'
44
+ YARD::Rake::YardocTask.new
data/TODO.md ADDED
@@ -0,0 +1,23 @@
1
+ TODO for version 1.1.0
2
+ ==============
3
+
4
+ * <strike>add ability to index embedded documents fields (as part of the parent document)</strike>
5
+ * <strike>add instructions for indexing methods to README.md</strike>
6
+ * <strike>add ability to index methods results</strike>
7
+
8
+ TODO for version 1.0.1
9
+ ==============
10
+
11
+ * <strike>properly handle sub classes</strike>
12
+
13
+ TODO for version 1.0.0
14
+ ==============
15
+
16
+ * <strike>add documentation for mebla in README.md</strike>
17
+ * <strike>add logging capabilities</strike>
18
+
19
+ Future plan
20
+ =======
21
+
22
+ * optimize : refractor result_set
23
+ * <strike>optimize : should find a solution for not refreshing the index while indexing embedded documents in lib/mebla/context</strike> not necessary since indexing/reindexing
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.1.14
@@ -0,0 +1,7 @@
1
+ Description:
2
+ Generates mebla's configuration file.
3
+
4
+ Example:
5
+ rails generate mebla:install
6
+
7
+ This will generate mebla's configuration file in config folder
@@ -0,0 +1,35 @@
1
+ # A wrapper for slingshot elastic-search adapter for Mongoid
2
+ module Mebla
3
+ # Generates the required files for Mebla to function
4
+ class InstallGenerator < Rails::Generators::Base
5
+ source_root File.expand_path('../templates', __FILE__)
6
+
7
+ # Generates mebla's configuration file
8
+ def generate_configuration
9
+ template "mebla.yml", "config/mebla.yml"
10
+ end
11
+
12
+ private
13
+ # Returns the rails application name
14
+ # @return [String]
15
+ def app_name
16
+ @app_name ||= defined_app_const_base? ? defined_app_name : File.basename(destination_root)
17
+ end
18
+
19
+ # @private
20
+ # Returns the rails application name underscored
21
+ # @return [String]
22
+ def defined_app_name
23
+ defined_app_const_base.underscore
24
+ end
25
+
26
+ # @private
27
+ # Returns the application CONSTANT
28
+ def defined_app_const_base
29
+ Rails.respond_to?(:application) && defined?(Rails::Application) &&
30
+ Rails.application.is_a?(Rails::Application) && Rails.application.class.name.sub(/::Application$/, "")
31
+ end
32
+
33
+ alias :defined_app_const_base? :defined_app_const_base
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ defaults: &defaults
2
+ host: localhost
3
+ port: 9200
4
+
5
+ development:
6
+ <<: *defaults
7
+ index: <%= app_name %>_development
8
+
9
+ test:
10
+ <<: *defaults
11
+ index: <%= app_name %>_test
12
+
13
+ production:
14
+ <<: *defaults
15
+ index: <%= app_name %>_production
@@ -0,0 +1,117 @@
1
+ require 'active_support'
2
+ require 'mebla/railtie' if defined?(Rails)
3
+
4
+ # A wrapper for slingshot elastic-search adapter for Mongoid
5
+ module Mebla
6
+ extend ActiveSupport::Autoload
7
+
8
+ # Dependencies
9
+ autoload :Mongoid, 'mongoid'
10
+ autoload :Slingshot, 'slingshot'
11
+ # Main modules
12
+ autoload :Configuration
13
+ autoload :Context
14
+ autoload :LogSubscriber
15
+ autoload :ResultSet
16
+ autoload :Search
17
+ # Errors
18
+ autoload :Errors
19
+ # Mongoid extensions
20
+ autoload :Mebla, 'mebla/mongoid/mebla'
21
+
22
+ # Register the logger
23
+ Mebla::LogSubscriber.attach_to :mebla
24
+
25
+ @@mebla_mutex = Mutex.new
26
+ @@context = nil
27
+
28
+ # Returns Mebla's context for minipulating the index
29
+ # @return [nil]
30
+ def self.context
31
+ if @@context.nil?
32
+ @@mebla_mutex.synchronize do
33
+ if @@context.nil?
34
+ @@context = Mebla::Context.new
35
+ end
36
+ end
37
+ end
38
+
39
+ @@context
40
+ end
41
+
42
+ # Resets the context (reloads Mebla)
43
+ # @return [nil]
44
+ def self.reset_context!
45
+ @@mebla_mutex.synchronize do
46
+ @@context = nil
47
+ end
48
+ end
49
+
50
+ # Check if mongoid is loaded
51
+ # @return [Boolean]
52
+ def self.mongoid?
53
+ !defined?(Mongoid).nil?
54
+ end
55
+
56
+ # Check if slingshot is loaded
57
+ # @return [Boolean]
58
+ def self.slingshot?
59
+ !defined?(Slingshot).nil?
60
+ end
61
+
62
+ # Check if elasticsearch is running
63
+ # @return [Boolean]
64
+ def self.elasticsearch?
65
+ result = Slingshot::Configuration.client.get "#{Slingshot::Configuration.url}/_status"
66
+ return (result =~ /error/) ? false: true
67
+ rescue RestClient::Exception
68
+ false
69
+ end
70
+
71
+ # Configure Mebla
72
+ #
73
+ # Example::
74
+ #
75
+ # Mebla.configure do |config|
76
+ # index = "mebla_index"
77
+ # host = "localhost"
78
+ # port = 9200
79
+ # end
80
+ def self.configure(&block)
81
+ yield Mebla::Configuration.instance
82
+ end
83
+
84
+
85
+ # Writes out a message to the log file according to the level given
86
+ # @note If no level is given a message of type Logger::UNKNOWN will be written to the log file
87
+ # @param [String] message
88
+ # @param [Symbol] level can be :debug, :warn or :info
89
+ # @return [nil]
90
+ def self.log(message, level = :none)
91
+ case level
92
+ when :debug
93
+ hook = "mebla_debug.mebla"
94
+ when :warn
95
+ hook = "mebla_warn.mebla"
96
+ when :info
97
+ hook = "mebla_info.mebla"
98
+ else
99
+ hook = "mebla_unknown.mebla"
100
+ end
101
+
102
+ ::ActiveSupport::Notifications.
103
+ instrument(hook, :message => message)
104
+ end
105
+
106
+ # Search the index
107
+ # @param [String] query a string representing the search query
108
+ # @param [String, Symbol, Array] type_names a string, symbol or array representing the models to be searcheds
109
+ # @return [Mebla::Search]
110
+ #
111
+ # Search for all documents with a field 'title' with a value 'Testing Search'::
112
+ #
113
+ # Mebla.search "title: Testing Search"
114
+ def self.search(query = "", type_names = nil)
115
+ Mebla::Search.new(query, type_names)
116
+ end
117
+ end
@@ -0,0 +1,71 @@
1
+ require 'erb'
2
+ require 'singleton'
3
+
4
+ # A wrapper for slingshot elastic-search adapter for Mongoid
5
+ module Mebla
6
+ # Parses the configuration file and holds important configuration attributes
7
+ class Configuration
8
+ include Singleton
9
+
10
+ attr_reader :log_dir
11
+ attr_accessor :index, :host, :port, :logger
12
+
13
+ # @private
14
+ # Initializes a new configuration object
15
+ def initialize
16
+ @log_dir = "#{Dir.pwd}/tmp/log"
17
+ parse_config
18
+
19
+ # Setup defaults
20
+ @index ||= "mebla"
21
+ @host ||= "localhost"
22
+ @port ||= 9200
23
+
24
+ make_tmp_dir
25
+ @logger = ActiveSupport::BufferedLogger.new(
26
+ open("#{@log_dir}/mebla.log", "a")
27
+ )
28
+ @logger.level = ActiveSupport::BufferedLogger::Severity::DEBUG
29
+
30
+ setup_logger
31
+
32
+ # Setup slingshot
33
+ Slingshot::Configuration.url(self.url)
34
+ end
35
+
36
+ # Sets up the default settings of the logger
37
+ # @return [nil]
38
+ def setup_logger
39
+ @logger.auto_flushing = true
40
+ end
41
+
42
+ # Returns the proper url for elasticsearch
43
+ # @return [String] url representation of the configuration options host and port
44
+ def url
45
+ "http://#{@host}:#{@port}"
46
+ end
47
+
48
+ private
49
+ # Creates tmp directory if it doesn't exist
50
+ # @return [nil]
51
+ def make_tmp_dir
52
+ FileUtils.mkdir_p @log_dir
53
+ Dir["#{@log_dir}/*"].each do |file|
54
+ FileUtils.rm_rf file
55
+ end
56
+ end
57
+
58
+ # Loads the configuration file
59
+ # @return [nil]
60
+ def parse_config
61
+ path = "#{Rails.root}/config/mebla.yml"
62
+ return unless File.exists?(path)
63
+
64
+ conf = YAML::load(ERB.new(IO.read(path)).result)[Rails.env]
65
+
66
+ conf.each do |key,value|
67
+ self.send("#{key}=", value) if self.respond_to?("#{key}=")
68
+ end unless conf.nil?
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,298 @@
1
+ # A wrapper for slingshot elastic-search adapter for Mongoid
2
+ module Mebla
3
+ # Handles indexing and reindexing
4
+ class Context
5
+ attr_reader :indexed_models, :slingshot_index, :slingshot_index_name
6
+ attr_reader :mappings
7
+
8
+ # @private
9
+ # Creates a new context object
10
+ def initialize
11
+ @indexed_models = []
12
+ @mappings = {}
13
+ @slingshot_index = Slingshot::Index.new(Mebla::Configuration.instance.index)
14
+ @slingshot_index_name = Mebla::Configuration.instance.index
15
+ end
16
+
17
+ # @private
18
+ # Adds a model to the list of indexed models
19
+ def add_indexed_model(model, mappings = {})
20
+ model = model.name if model.is_a?(Class)
21
+
22
+ @indexed_models << model
23
+ @indexed_models.uniq!
24
+ @indexed_models.sort!
25
+
26
+ @mappings.merge!(mappings)
27
+ end
28
+
29
+ # Deletes and rebuilds the index
30
+ # @note Doesn't index the data, use Mebla::Context#reindex_data to rebuild the index and index the data
31
+ # @return [nil]
32
+ def rebuild_index
33
+ # Only rebuild if the index exists
34
+ raise Mebla::Errors::MeblaIndexException.new("#{@slingshot_index_name} does not exist !! use #create_index to create the index first.") unless index_exists?
35
+
36
+ Mebla.log("Rebuilding index")
37
+
38
+ # Delete the index
39
+ if drop_index
40
+ # Create the index
41
+ return build_index
42
+ end
43
+ end
44
+
45
+ # Creates and indexes the document
46
+ # @note Doesn't index the data, use Mebla::Context#index_data to create the index and index the data
47
+ # @return [Boolean] true if operation is successful
48
+ def create_index
49
+ # Only create the index if it doesn't exist
50
+ raise Mebla::Errors::MeblaIndexException.new("#{@slingshot_index_name} already exists !! use #rebuild_index to rebuild the index.") if index_exists?
51
+
52
+ Mebla.log("Creating index")
53
+
54
+ # Create the index
55
+ build_index
56
+ end
57
+
58
+ # Deletes the index of the document
59
+ # @return [Boolean] true if operation is successful
60
+ def drop_index
61
+ # Only drop the index if it exists
62
+ return true unless index_exists?
63
+
64
+ Mebla.log("Dropping index: #{self.slingshot_index_name}", :debug)
65
+
66
+ # Drop the index
67
+ result = @slingshot_index.delete
68
+
69
+ Mebla.log("Dropped #{self.slingshot_index_name}: #{result.to_s}", :debug)
70
+
71
+ # Check that the index doesn't exist
72
+ !index_exists?
73
+ end
74
+
75
+ # Checks if the index exists and is available
76
+ # @return [Boolean] true if the index exists and is available, false otherwise
77
+ def index_exists?
78
+ begin
79
+ result = Slingshot::Configuration.client.get "#{Mebla::Configuration.instance.url}/#{@slingshot_index_name}/_status"
80
+ return (result =~ /error/) ? false : true
81
+ rescue RestClient::ResourceNotFound
82
+ return false
83
+ end
84
+ end
85
+
86
+ # Creates the index and indexes the data for all models or a list of models given
87
+ # @param *models a list of symbols each representing a model name to be indexed
88
+ # @return [nil]
89
+ def index_data(*models)
90
+ if models.nil? || models.empty?
91
+ only_index = @indexed_models
92
+ else
93
+ only_index = models.collect{|m| m.to_s}
94
+ end
95
+
96
+ Mebla.log("Indexing #{only_index.join(", ")}", :debug)
97
+
98
+ # Build up a bulk query to save processing and time
99
+ bulk_query = ""
100
+ # Keep track of indexed documents
101
+ indexed_count = {}
102
+
103
+ # Create the index
104
+ if create_index
105
+ # Start collecting documents
106
+ only_index.each do |model|
107
+ Mebla.log("Indexing: #{model}")
108
+ # Get the class
109
+ to_index = model.camelize.constantize
110
+
111
+ # Get the records
112
+ entries = []
113
+ unless to_index.embedded?
114
+ if to_index.sub_class?
115
+ entries = to_index.any_in(:_type => [to_index.name])
116
+ else
117
+ entries = to_index.any_in(:_type => [nil, to_index.name])
118
+ end
119
+ else
120
+ parent = to_index.embedded_parent
121
+ access_method = to_index.embedded_as
122
+
123
+ parent.all.each do |parent_record|
124
+ if to_index.sub_class?
125
+ entries += parent_record.send(access_method.to_sym).any_in(:_type => [to_index.name])
126
+ else
127
+ entries += parent_record.send(access_method.to_sym).any_in(:_type => [nil, to_index.name])
128
+ end
129
+ end
130
+ end
131
+
132
+ # Save the number of entries to be indexed
133
+ indexed_count[model] = entries.count
134
+
135
+ # Build the queries for this model
136
+ entries.each do |document|
137
+ attrs = {} #document.attributes.dup # make sure we dont modify the document it self
138
+ attrs[:id] = document.attributes["_id"] # the id is already added in the meta data of the action part of the query
139
+
140
+ # only index search fields and methods
141
+ document.class.search_fields.each do |field|
142
+ if document.attributes.keys.include?(field.to_s)
143
+ attrs[field] = document.attributes[field.to_s] # attribute
144
+ else
145
+ attrs[field] = document.send(field) # method
146
+ end
147
+ end
148
+
149
+ # index relational fields
150
+ document.class.search_relations.each do |relation, fields|
151
+ items = document.send(relation.to_sym) # get the relation document
152
+
153
+ next if items.nil?
154
+
155
+ # N relation side
156
+ if items.is_a?(Array) || items.is_a?(Mongoid::Relations::Targets::Enumerable)
157
+ next if items.empty?
158
+ attrs[relation] = []
159
+ items.each do |item|
160
+ if fields.is_a?(Array) # given multiple fields to index
161
+ fields_values = {}
162
+ fields.each do |field|
163
+ if item.attributes.keys.include?(field.to_s)
164
+ fields_values.merge!({ field => item.attributes[field.to_s] }) # attribute
165
+ else
166
+ fields_values.merge!({ field => item.send(field) }) # method
167
+ end
168
+ end
169
+ attrs[relation] << fields_values
170
+ else # only index one field in the relation
171
+ if item.attributes.keys.include?(fields.to_s)
172
+ attrs[relation] << { fields => item.attributes[fields.to_s] } # attribute
173
+ else
174
+ attrs[relation] << { fields => item.send(fields) } # method
175
+ end
176
+ end
177
+ end
178
+ # 1 relation side
179
+ else
180
+ attrs[relation] = {}
181
+ if fields.is_a?(Array) # given multiple fields to index
182
+ fields_values = {}
183
+ fields.each do |field|
184
+ if items.attributes.keys.include?(field.to_s)
185
+ fields_values.merge!({ field => items.attributes[field.to_s] }) # attribute
186
+ else
187
+ fields_values.merge!({ field => items.send(field) }) # method
188
+ end
189
+ end
190
+ attrs[relation].merge!(fields_values)
191
+ else # only index one field in the relation
192
+ if items.attributes.keys.include?(fields.to_s)
193
+ attrs[relation].merge!({ fields => items.attributes[fields.to_s] }) # attribute
194
+ else
195
+ attrs[relation].merge!({ fields => items.send(fields) }) # method
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ # If embedded get the parent id
202
+ if document.embedded?
203
+ parent_id = document.send(document.class.embedded_parent_foreign_key.to_sym).id.to_s
204
+ attrs[(document.class.embedded_parent_foreign_key + "_id").to_sym] = parent_id
205
+ attrs[:_parent] = parent_id
206
+
207
+ # Build add to the bulk query
208
+ bulk_query << build_bulk_query(@slingshot_index_name, to_index.slingshot_type_name, document.id.to_s, attrs, parent_id)
209
+ else
210
+ # Build add to the bulk query
211
+ bulk_query << build_bulk_query(@slingshot_index_name, to_index.slingshot_type_name, document.id.to_s, attrs)
212
+ end
213
+ end
214
+ end
215
+ else
216
+ raise Mebla::Errors::MeblaIndexException.new("Could not create #{@slingshot_index_name}!!!")
217
+ end
218
+
219
+ Mebla.log("Bulk indexing:\n#{bulk_query}", :debug)
220
+
221
+ # Send the query
222
+ response = Slingshot::Configuration.client.post "#{Mebla::Configuration.instance.url}/_bulk", bulk_query
223
+
224
+ # Only refresh the index if no error ocurred
225
+ unless response =~ /error/
226
+ # Log results
227
+ Mebla.log("Indexed #{only_index.count} model(s) to #{self.slingshot_index_name}: #{response}")
228
+ Mebla.log("Indexing Report:")
229
+ indexed_count.each do |model_name, count|
230
+ Mebla.log("Indexed #{model_name}: #{count} document(s)")
231
+ end
232
+
233
+ # Refresh the index
234
+ refresh_index
235
+ else
236
+ raise Mebla::Errors::MeblaIndexException.new("Indexing #{only_index.join(", ")} failed with the following response:\n #{response}")
237
+ end
238
+ rescue RestClient::Exception => error
239
+ raise Mebla::Errors::MeblaIndexException.new("Indexing #{only_index.join(", ")} failed with the following error: #{error.message}")
240
+ end
241
+
242
+ # Rebuilds the index and indexes the data for all models or a list of models given
243
+ # @param *models a list of symbols each representing a model name to rebuild it's index
244
+ # @return [nil]
245
+ def reindex_data(*models)
246
+ Mebla.log("Rendexing: #{self.slingshot_index_name}")
247
+
248
+ unless drop_index
249
+ raise Mebla::Errors::MeblaIndexException.new("Could not drop #{@slingshot_index_name}!!!")
250
+ end
251
+
252
+ # Create the index and index the data
253
+ if models && !models.empty?
254
+ index_data(models)
255
+ else
256
+ index_data
257
+ end
258
+ end
259
+
260
+ # Refreshes the index
261
+ # @return [nil]
262
+ def refresh_index
263
+ Mebla.log("Refreshing: #{self.slingshot_index_name}", :debug)
264
+
265
+ result = @slingshot_index.refresh
266
+
267
+ Mebla.log("Refreshed #{self.slingshot_index_name}: #{result}")
268
+ end
269
+
270
+ private
271
+ # Builds the index according to the mappings set
272
+ # @return [Boolean] true if the index was created successfully, false otherwise
273
+ def build_index
274
+ Mebla.log("Building #{self.slingshot_index_name}", :debug)
275
+ # Create the index
276
+ result = @slingshot_index.create :mappings => @mappings
277
+
278
+ Mebla.log("Created #{self.slingshot_index_name}: #{result.to_s}")
279
+
280
+ # Check if the index exists
281
+ index_exists?
282
+ end
283
+
284
+ # --
285
+ # OPTIMIZE: should find a solution for not refreshing the index while indexing embedded documents
286
+ # ++
287
+
288
+ # Builds a bulk index query
289
+ # @return [String]
290
+ def build_bulk_query(index_name, type, id, attributes, parent = nil)
291
+ attrs_to_json = ActiveSupport::JSON.encode(attributes).gsub(/\n/, " ")
292
+ <<-eos
293
+ { "index" : { "_index" : "#{index_name}", "_type" : "#{type}", "_id" : "#{id}"#{", \"_parent\" : \"#{parent}\"" if parent}, "refresh" : "true"} }
294
+ #{attrs_to_json}
295
+ eos
296
+ end
297
+ end
298
+ end