active_blur 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require 'active_blur/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'active_blur'
7
+ s.version = ActiveBlur::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Andrew Avenoso"]
10
+ s.homepage = "https://github.com/nearinfinity/active_blur"
11
+ s.summary = "Search ActiveRecord models using Blur"
12
+ s.description = "Active Blur is an Active Record extension library to enable creating indexes, updating indexes and searching indexes in Blur. The Active Record model objects that are configured to use this gem will automatically create tables in Blur, re-index upon updates in Blur, and allow searching (fielded and full text) in Blur."
13
+ s.license = "Apache-2.0"
14
+
15
+ s.files = `git ls-files`.split("\n").reject { |path| path =~ /^(Gemfile|.gitignore|Rakefile)/ }
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.add_dependency('rails', '~> 3.0.0')
21
+ s.add_dependency('thrift_client', '~> 0.7.0')
22
+
23
+ s.add_development_dependency('rspec', '2.7.0')
24
+ s.add_development_dependency('rdoc', '3.12')
25
+ end
@@ -0,0 +1,2 @@
1
+ require 'active_blur/active_blur'
2
+ require 'active_blur/railtie' if defined? Rails
@@ -0,0 +1,256 @@
1
+ require 'thrift_client'
2
+ require 'active_blur/thrift/blur'
3
+ require 'active_blur/search_results'
4
+
5
+ module ActiveBlur
6
+ @searchable_classes = {}
7
+ @blur_client = nil
8
+ @tables = []
9
+ @create_mode=false
10
+ @global_options = {:full_text_index=>true,
11
+ :default_analyzer=>:standard,
12
+ :full_text_analyzer=>:standard
13
+ }
14
+
15
+ class <<self
16
+
17
+ attr_accessor :create_mode
18
+
19
+ def setup_connection(connection_str, options={}, thrift_options={})
20
+ @global_options = @global_options.merge(options)
21
+ thrift_defaults = {:retries=>5}
22
+ thrift_options = thrift_defaults.merge(thrift_options)
23
+ if connection_str.class == String
24
+ urls = connection_str.split(/,/)
25
+ else
26
+ urls = Array(connection_str)
27
+ end
28
+ unless thrift_options[:server_retry_period]
29
+ thrift_options[:server_retry_period] = urls.size < 2 ? -1 : urls.size
30
+ end
31
+ @blur_client = ThriftClient.new Blur::Blur::Client, urls, thrift_options
32
+ puts "ActiveBlur connection setup"
33
+ @tables = client.tableList
34
+ puts "Found tables #{@tables}"
35
+ end
36
+
37
+ def setup_class(clazz, options={})
38
+ defaults = {:table_name=>clazz.table_name, :column_family=>clazz.table_name.singularize, :shard_count=>3}
39
+ options = defaults.merge(@global_options).merge(options)
40
+ options[:exclude] = options[:exclude].collect{|col| col.to_s} if options[:exclude]
41
+ raise "options[:cluster] is required" unless options[:cluster]
42
+ puts "setup options for #{clazz} are #{options.inspect}"
43
+ unless create_mode
44
+ ensure_table_exists! options[:table_name]
45
+ ensure_table_enabled! options[:table_name]
46
+ ensure_schemas_match! clazz, options
47
+ end
48
+ @searchable_classes[clazz] = options
49
+ end
50
+
51
+ def searchable_classes
52
+ @searchable_classes.keys
53
+ end
54
+
55
+ def create_table(clazz)
56
+ options = options_for clazz
57
+ raise "#{clazz.name} has not been configured to be searchable" unless options
58
+ raise "table #{options[:table_name]} already exists" if @tables.include? options[:table_name]
59
+ db_columns = columns_for_class(clazz, options)
60
+ table_descriptor = Blur::TableDescriptor.new
61
+ table_descriptor.shardCount = options[:shard_count]
62
+ table_descriptor.tableUri = calculate_table_uri(options)
63
+ table_descriptor.cluster = options[:cluster]
64
+ table_descriptor.name = options[:table_name]
65
+ analyzer = Blur::AnalyzerDefinition.new
66
+ analyzer.defaultDefinition = new_column_definition(options[:default_analyzer], options[:full_text_index])
67
+ analyzer.fullTextAnalyzerClassName = analyzer(options[:full_text_analyzer])
68
+
69
+ col_fam_definition = Blur::ColumnFamilyDefinition.new
70
+ col_fam_definition.defaultDefinition = new_column_definition(options[:default_analyzer], options[:full_text_index])
71
+ col_fam_definition.columnDefinitions = {}
72
+ columns_for_class(clazz, options).each do |col|
73
+ col_fam_definition.columnDefinitions[col] = new_column_definition(options[:default_analyzer], options[:full_text_index])
74
+ end
75
+ analyzer.columnFamilyDefinitions={options[:column_family]=>col_fam_definition}
76
+ table_descriptor.analyzerDefinition = analyzer
77
+ puts "about to create table #{table_descriptor.inspect}"
78
+ client.createTable(table_descriptor)
79
+ end
80
+
81
+ def search(clazz, query, options={})
82
+ model_options = options_for clazz
83
+ blur_query = Blur::BlurQuery.new
84
+ #query.facets = ???
85
+ simple_query = Blur::SimpleQuery.new
86
+ simple_query.queryStr = query
87
+ simple_query.superQueryOn = false
88
+ simple_query.type = score_type(options[:score_type]) unless options[:score_type].nil?
89
+ simple_query.postSuperFilter = options[:post_super_filter] unless options[:post_super_filter].nil?
90
+ simple_query.preSuperFilter = options[:pre_super_filter] unless options[:pre_super_filter].nil?
91
+ blur_query.simpleQuery = simple_query
92
+ blur_query.allowStaleData = options[:allow_stale_data] unless options[:allow_stale_data].nil?
93
+ blur_query.useCacheIfPresent = options[:use_cache_if_present] unless options[:use_cache_if_present].nil?
94
+ blur_query.start = options[:start] unless options[:start].nil?
95
+ blur_query.fetch = options[:fetch] unless options[:fetch].nil?
96
+ blur_query.minimumNumberOfResults = options[:minimum_number_of_results] unless options[:minimum_number_of_results].nil?
97
+ blur_query.maxQueryTime = options[:max_query_time] unless options[:max_query_time].nil?
98
+ blur_query.uuid = options[:uuid] unless options[:uuid].nil?
99
+ blur_query.userContext = options[:user_context] unless options[:user_context].nil?
100
+ blur_query.cacheResult = options[:cache_result] unless options[:cache_result].nil?
101
+ blur_query.modifyFileCaches = options[:modify_file_caches] unless options[:modify_file_caches].nil?
102
+ selector = Blur::Selector.new(:recordOnly=>true, :columnFamiliesToFetch=>['none_col_fam'])
103
+ blur_query.selector = selector
104
+ results = client.query(model_options[:table_name], blur_query)
105
+ SearchResults.new(results, clazz)
106
+ end
107
+
108
+ def reindex(clazz)
109
+ all = clazz.all
110
+ all.each do |obj|
111
+ index(obj)
112
+ end
113
+ end
114
+
115
+ def index(object)
116
+ puts "indexing #{object.class} with id #{object.id}"
117
+ options = options_for object.class
118
+ mutation = Blur::RowMutation.new
119
+ mutation.table = options[:table_name]
120
+ mutation.rowId = "#{object.class.name}-#{object.id}"
121
+ mutation.rowMutationType = Blur::RowMutationType::REPLACE_ROW
122
+ record_mutation = Blur::RecordMutation.new
123
+ record_mutation.recordMutationType = Blur::RecordMutationType::REPLACE_ENTIRE_RECORD
124
+ record = Blur::Record.new
125
+ record.recordId = object.id.to_s
126
+ record.family = options[:column_family]
127
+ record.columns = []
128
+ columns_for_class(object.class, options).each do |col|
129
+ column = Blur::Column.new
130
+ column.name = col
131
+ val = object.send(col)
132
+ if val
133
+ column.value = val.to_s
134
+ record.columns << column
135
+ end
136
+ end
137
+ record_mutation.record = record
138
+ mutation.recordMutations = [record_mutation]
139
+ client.mutate(mutation)
140
+ end
141
+
142
+ def remove(object)
143
+ remove_by_id(object.class, object.id)
144
+ end
145
+
146
+ def remove_by_id(clazz, id)
147
+ puts "removing #{clazz} with id #{id}"
148
+ options = options_for clazz
149
+ mutation = Blur::RowMutation.new
150
+ mutation.table = options[:table_name]
151
+ mutation.rowId = "#{clazz.name}-#{id}"
152
+ mutation.rowMutationType = Blur::RowMutationType::DELETE_ROW
153
+ client.mutate(mutation)
154
+ end
155
+
156
+ private
157
+
158
+ def new_column_definition(analyzer, full_text)
159
+ Blur::ColumnDefinition.new(:analyzerClassName=>analyzer(analyzer), :fullTextIndex=>full_text)
160
+ end
161
+
162
+ def analyzer(sym_or_class)
163
+ if sym_or_class.is_a? Symbol
164
+ case sym_or_class
165
+ when :standard
166
+ "org.apache.lucene.analysis.standard.StandardAnalyzer"
167
+ when :keyword
168
+ "org.apache.lucene.analysis.KeywordAnalyzer"
169
+ when :simple
170
+ "org.apache.lucene.analysis.SimpleAnalyzer"
171
+ when :stop
172
+ "org.apache.lucene.analysis.StopAnalyzer"
173
+ when :whitespace
174
+ "org.apache.lucene.analysis.WhitespaceAnalyzer"
175
+ else
176
+ raise "invalid analzyer #{sym_or_class}"
177
+ end
178
+ elsif sym_or_class.is_a? String
179
+ sym_or_class
180
+ else
181
+ raise "invalid analzyer #{sym_or_class}"
182
+ end
183
+ end
184
+
185
+ def score_type(sym)
186
+ case sym
187
+ when :best
188
+ Blur::ScoreType::BEST
189
+ when :super
190
+ Blur::ScoreType::SUPER
191
+ when :aggregate
192
+ Blur::ScoreType::AGGREGATE
193
+ when :constant
194
+ Blur::ScoreType::CONSTANT
195
+ else
196
+ raise "invalid score type #{sym}"
197
+ end
198
+ end
199
+
200
+ def client
201
+ raise "Active blur not configured" unless @blur_client
202
+ @blur_client
203
+ end
204
+
205
+ def options_for(clazz)
206
+ @searchable_classes[clazz]
207
+ end
208
+
209
+ def ensure_table_enabled!(table_name)
210
+ raise "#{table_name} is not enabled" unless client.describe(table_name).isEnabled
211
+ end
212
+
213
+ def ensure_table_exists!(table_name)
214
+ raise "table #{table_name} doesn't exist, run rake blur:create_table" unless @tables.include? table_name
215
+ end
216
+
217
+ def ensure_schemas_match!(clazz, options)
218
+ definition = client.describe(options[:table_name]).analyzerDefinition
219
+ if definition.defaultDefinition.nil? || definition.defaultDefinition.analyzerClassName != analyzer(options[:default_analyzer]) || definition.defaultDefinition.fullTextIndex != options[:full_text_index]
220
+ raise "default analyzer does not match, run rake blur:create_table"
221
+ end
222
+ if definition.fullTextAnalyzerClassName != analyzer(options[:full_text_analyzer])
223
+ raise "full text analyzer does not match, run rake blur:create_table"
224
+ end
225
+ if definition.columnFamilyDefinitions.nil? || definition.columnFamilyDefinitions[options[:column_family].nil?]
226
+ raise "#{options[:column_family]} column family not found in #{options[:table_name]} table"
227
+ end
228
+
229
+ blur_columns = definition.columnFamilyDefinitions[options[:column_family]].columnDefinitions
230
+ db_columns = columns_for_class(clazz, options)
231
+ if blur_columns.size != db_columns.size
232
+ raise "blur columns do not match, expected #{db_columns}, but was #{blur_columns}, run rake blur:create_table"
233
+ end
234
+ db_columns.each do |col|
235
+ if blur_columns[col].nil?
236
+ raise "blur columns do not match, #{col} was not found in blur, run rake blur:create_table"
237
+ end
238
+ end
239
+ end
240
+
241
+ def columns_for_class(clazz, options)
242
+ ignore = options[:ignore]
243
+ clazz.column_names.reject {|col| col == 'id' || (ignore && ignore.include?(col))}
244
+ end
245
+
246
+ def calculate_table_uri(options)
247
+ return options[:table_uri] if options[:table_uri]
248
+ raise ":table_uri or :table_uri_base must be set" unless options[:table_uri_base]
249
+ uri = options[:table_uri_base]
250
+ uri += '/' unless uri.last == '/'
251
+ uri += options[:table_name] + '/'
252
+ uri
253
+ end
254
+
255
+ end
256
+ end
@@ -0,0 +1,25 @@
1
+ module ActiveBlur
2
+ class Config
3
+ attr_accessor :connection_str, :thrift_options, :options
4
+
5
+ def initialize
6
+ self.thrift_options={}
7
+ self.options={}
8
+ end
9
+
10
+ def valid?
11
+ !invalid?
12
+ end
13
+
14
+ def invalid?
15
+ connection_str.nil?
16
+ end
17
+
18
+ def errors
19
+ return nil if valid?
20
+ errors = []
21
+ errors << "connection_str cannot be nil" if connection_str.nil?
22
+ errors.join("\n")
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ require 'active_blur/searchable'
2
+ require 'active_blur/config'
3
+ module ActiveBlur
4
+ class Railtie < Rails::Railtie
5
+
6
+ config.active_blur = ActiveBlur::Config.new
7
+
8
+ initializer "active_blur.initialize" do |app|
9
+ abc = app.config.active_blur
10
+ raise ArgumentError.new(abc.errors) if abc.invalid?
11
+ ActiveBlur.setup_connection(abc.connection_str, abc.options, abc.thrift_options)
12
+ end
13
+
14
+ initializer "active_blur.initialize_active_record" do
15
+ ActiveSupport.on_load(:active_record) do
16
+ include ActiveBlur::Searchable
17
+ end
18
+ end
19
+
20
+ rake_tasks do
21
+ load "tasks/create_table.rake"
22
+ load "tasks/reindex.rake"
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,53 @@
1
+ module ActiveBlur
2
+ class ScoredResult
3
+ include ActiveModel::Serialization
4
+ attr_accessor :score, :result
5
+ def initialize(score, result)
6
+ @score=score
7
+ @result=result
8
+ end
9
+ end
10
+
11
+ class SearchResults
12
+ include ActiveModel::Serialization
13
+ attr_accessor :total_results, :ids, :records, :clazz
14
+
15
+ def initialize(blur_results, clazz, should_fetch_records=false)
16
+ @total_results=blur_results.totalResults
17
+ @clazz = clazz
18
+ @ids=[]
19
+ #@records=[] if fetch_records
20
+ blur_results.results.each do |blur_result|
21
+ score = blur_result.score
22
+ fetch_result = blur_result.fetchResult
23
+ if fetch_result && fetch_result.exists && !fetch_result.deleted && fetch_result.recordResult && fetch_result.recordResult.record
24
+ record_id = fetch_result.recordResult.record.recordId
25
+ @ids << ScoredResult.new(score, record_id)
26
+ #@records << ScoredResult.new(score, clazz.find(record_id)) if fetch_records==true
27
+ end
28
+ end
29
+ fetch_records if should_fetch_records
30
+ end
31
+
32
+ def records
33
+ fetch_records unless @records
34
+ @records
35
+ end
36
+
37
+ private
38
+
39
+ def fetch_records()
40
+ @records=[]
41
+ ids.each do |scored_result|
42
+ begin
43
+ obj = clazz.find(scored_result.result)
44
+ obj.instance_variable_set(:@blur_score, scored_result.score)
45
+ @records << obj
46
+ rescue ActiveRecord::RecordNotFound
47
+ end
48
+ end
49
+ self
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,119 @@
1
+ module ActiveBlur
2
+ module Searchable
3
+ class <<self
4
+ def included(base) #:nodoc:
5
+ base.module_eval do
6
+ extend(ActsAsMethods)
7
+ end
8
+ end
9
+ end
10
+
11
+ module ActsAsMethods
12
+ #
13
+ # This method is defined on all ActiveRecord::Base subclasses. It
14
+ # is false for classes on which #searchable has not been called, and
15
+ # true for classes on which #searchable has been called.
16
+ #
17
+ # ==== Returns
18
+ #
19
+ # +false+
20
+ #
21
+ def searchable?
22
+ false
23
+ end
24
+
25
+ private
26
+
27
+ def searchable(options = {})
28
+ ActiveBlur.setup_class(self, options)
29
+ if searchable?
30
+ active_blur_options[:include].concat(Util::Array(options[:include]))
31
+ else
32
+ extend ClassMethods
33
+ include InstanceMethods
34
+
35
+ class_attribute :active_blur_options
36
+
37
+ unless options[:auto_index] == false
38
+ before_save :mark_for_auto_indexing
39
+ after_save :index_if_needed
40
+ end
41
+
42
+ unless options[:auto_remove] == false
43
+ after_destroy do |searchable|
44
+ searchable.blur_remove_from_index
45
+ end
46
+ end
47
+ #options[:include] = Util::Array(options[:include])
48
+
49
+ self.active_blur_options = options
50
+ end
51
+ end
52
+ end
53
+
54
+ module ClassMethods
55
+ def self.extended(base) #:nodoc:
56
+ class <<base
57
+ alias_method :search, :blur_search unless method_defined? :search
58
+ alias_method :reindex, :blur_reindex unless method_defined? :reindex
59
+ end
60
+ end
61
+
62
+ def blur_search(query,options = {})
63
+ ActiveBlur.search(self, query, options)
64
+ end
65
+
66
+ def blur_reindex()
67
+ ActiveBlur.reindex(self)
68
+ end
69
+
70
+ #
71
+ # Classes that have been defined as searchable return +true+ for this
72
+ # method.
73
+ #
74
+ # ==== Returns
75
+ #
76
+ # +true+
77
+ #
78
+ def searchable?
79
+ true
80
+ end
81
+ end
82
+
83
+ module InstanceMethods
84
+ def self.included(base) #:nodoc:
85
+ base.module_eval do
86
+ alias_method :index, :blur_index unless method_defined? :index
87
+ alias_method :remove_from_index, :blur_remove_from_index unless method_defined? :remove_from_index
88
+ alias_method :score, :blur_score unless method_defined? :score
89
+ end
90
+ end
91
+
92
+ def blur_index
93
+ ActiveBlur.index(self)
94
+ end
95
+
96
+ def blur_remove_from_index
97
+ ActiveBlur.remove(self)
98
+ end
99
+
100
+ def blur_score
101
+ @blur_score
102
+ end
103
+
104
+ private
105
+
106
+ def mark_for_auto_indexing
107
+ @marked_for_auto_indexing = new_record? || changed?
108
+ true
109
+ end
110
+
111
+ def index_if_needed
112
+ if @marked_for_auto_indexing
113
+ blur_index
114
+ remove_instance_variable(:@marked_for_auto_indexing)
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end