bigindex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.rdoc +58 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/examples/bigindex.yml +9 -0
- data/generators/bigindex/bigindex_generator.rb +17 -0
- data/generators/bigindex/templates/bigindex.rake +3 -0
- data/init.rb +27 -0
- data/install.rb +15 -0
- data/lib/big_index/adapters/abstract_adapter.rb +70 -0
- data/lib/big_index/adapters/solr_adapter.rb +180 -0
- data/lib/big_index/adapters.rb +11 -0
- data/lib/big_index/index_field.rb +41 -0
- data/lib/big_index/repository.rb +77 -0
- data/lib/big_index/resource.rb +462 -0
- data/lib/big_index/support/assertions.rb +8 -0
- data/lib/big_index/support.rb +3 -0
- data/lib/big_index.rb +108 -0
- data/lib/bigindex.rb +1 -0
- data/rails/init.rb +27 -0
- data/spec/connections/activerecord/activerecord.yml +7 -0
- data/spec/connections/activerecord/connection.rb +19 -0
- data/spec/connections/bigindex.yml +7 -0
- data/spec/connections/bigrecord/bigrecord.yml +13 -0
- data/spec/connections/bigrecord/connection.rb +29 -0
- data/spec/connections/bigrecord/migrations/20090706182535_add_animals_table.rb +13 -0
- data/spec/connections/bigrecord/migrations/20090706190623_add_books_table.rb +15 -0
- data/spec/connections/bigrecord/migrations/20090706193019_add_companies_table.rb +14 -0
- data/spec/connections/bigrecord/migrations/20090706194512_add_employees_table.rb +13 -0
- data/spec/connections/bigrecord/migrations/20090706195741_add_zoos_table.rb +13 -0
- data/spec/lib/activerecord/animal.rb +14 -0
- data/spec/lib/activerecord/book.rb +26 -0
- data/spec/lib/activerecord/novel.rb +10 -0
- data/spec/lib/bigrecord/animal.rb +11 -0
- data/spec/lib/bigrecord/book.rb +27 -0
- data/spec/lib/bigrecord/novel.rb +7 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +28 -0
- data/spec/unit/adapters/abstract_adapter_spec.rb +48 -0
- data/spec/unit/adapters/adapter_shared_spec.rb +10 -0
- data/spec/unit/adapters/solr_adapter_spec.rb +16 -0
- data/spec/unit/bigindex_setup_spec.rb +70 -0
- data/spec/unit/index_shared_spec.rb +59 -0
- data/spec/unit/index_spec.rb +225 -0
- data/spec/unit/inherited_class_spec.rb +42 -0
- data/tasks/gem.rb +20 -0
- data/tasks/rdoc.rb +8 -0
- data/tasks/spec.rb +38 -0
- data/vendor/solr/adapter_methods/search_results.rb +53 -0
- data/vendor/solr/adapter_methods/solr_result.rb +137 -0
- data/vendor/solr/adapter_methods.rb +360 -0
- data/vendor/solr/base.rb +159 -0
- data/vendor/solr.rb +20 -0
- metadata +147 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
|
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "index_shared_spec"))
|
|
3
|
+
|
|
4
|
+
describe BigIndex::Resource, "inheritance on" do
|
|
5
|
+
|
|
6
|
+
describe "base class" do
|
|
7
|
+
before(:each) do
|
|
8
|
+
@model_class = Book
|
|
9
|
+
Book.delete_all
|
|
10
|
+
Book.drop_index
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it_should_behave_like "a model with BigIndex::Resource"
|
|
14
|
+
|
|
15
|
+
it "should contain its own index fields" do
|
|
16
|
+
Book.index_configuration[:fields].size.should == 7
|
|
17
|
+
|
|
18
|
+
[:title, :title_partial_match, :author, :author_partial_match, :description, :current_time, :skipped_field].each do |field|
|
|
19
|
+
Book.index_configuration[:fields].map(&:field_name).should include(field)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "child class" do
|
|
25
|
+
before(:each) do
|
|
26
|
+
@model_class = Novel
|
|
27
|
+
Book.delete_all
|
|
28
|
+
Book.drop_index
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it_should_behave_like "a model with BigIndex::Resource"
|
|
32
|
+
|
|
33
|
+
it "should contain its own index fields and the ones from its superclass" do
|
|
34
|
+
Novel.index_configuration[:fields].size.should == 8
|
|
35
|
+
|
|
36
|
+
[:title, :title_partial_match, :author, :author_partial_match, :description, :current_time, :skipped_field, :publisher].each do |field|
|
|
37
|
+
Novel.index_configuration[:fields].map(&:field_name).should include(field)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
data/tasks/gem.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
begin
|
|
2
|
+
require 'jeweler'
|
|
3
|
+
|
|
4
|
+
Jeweler::Tasks.new do |gemspec|
|
|
5
|
+
gemspec.name = "bigindex"
|
|
6
|
+
gemspec.authors = ["openplaces.org"]
|
|
7
|
+
gemspec.email = "bigrecord@openplaces.org"
|
|
8
|
+
gemspec.homepage = "http://www.bigrecord.org"
|
|
9
|
+
gemspec.summary = "A Rails plugin that drops into models and provides indexing functionality. Uses an adapter/repository pattern inspired by Datamapper to abstract the actual indexer used in the background, and exposes the model to a simple indexing API."
|
|
10
|
+
gemspec.description = "A Rails plugin that drops into models and provides indexing functionality."
|
|
11
|
+
gemspec.files = FileList["{examples,generators,lib,rails,spec,tasks,vendor}/**/*","init.rb","install.rb","Rakefile","VERSION"].to_a
|
|
12
|
+
gemspec.extra_rdoc_files = FileList["MIT-LICENSE","README.rdoc"].to_a
|
|
13
|
+
|
|
14
|
+
gemspec.add_development_dependency "rspec"
|
|
15
|
+
gemspec.add_dependency "solr-ruby", ">= 0.0.7"
|
|
16
|
+
end
|
|
17
|
+
Jeweler::GemcutterTasks.new
|
|
18
|
+
rescue LoadError
|
|
19
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
|
20
|
+
end
|
data/tasks/rdoc.rb
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
desc 'Generate documentation for Bigindex.'
|
|
2
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
3
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
4
|
+
rdoc.title = 'Bigindex'
|
|
5
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
|
6
|
+
rdoc.rdoc_files.include('../README.rdoc')
|
|
7
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
8
|
+
end
|
data/tasks/spec.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
desc "Run #{DATA_STORES.join(" and ")} specs"
|
|
2
|
+
task :spec => DATA_STORES.map{|store| "spec:#{store}" }
|
|
3
|
+
|
|
4
|
+
namespace :spec do
|
|
5
|
+
unit_specs = Pathname.glob((ROOT + 'spec/unit/**/*_spec.rb').to_s).map{|f| f.to_s}
|
|
6
|
+
integration_specs = Pathname.glob((ROOT + 'spec/integration/**/*_spec.rb').to_s).map{|f| f.to_s}
|
|
7
|
+
all_specs = Pathname.glob((ROOT + 'spec/**/*_spec.rb').to_s).map{|f| f.to_s}
|
|
8
|
+
|
|
9
|
+
def run_spec(name, adapter, files, rcov)
|
|
10
|
+
if (files.class == String)
|
|
11
|
+
return run_spec(name, adapter, Pathname.glob(files.to_s).map{|f| f.to_s}, rcov)
|
|
12
|
+
else
|
|
13
|
+
Spec::Rake::SpecTask.new(name) do |t|
|
|
14
|
+
t.spec_opts << File.open("spec/spec.opts").readlines.map{|x| x.chomp}
|
|
15
|
+
t.spec_files = files
|
|
16
|
+
connection_path = "spec/connections/#{adapter}"
|
|
17
|
+
t.libs << "spec" << connection_path
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
DATA_STORES.each do |adapter|
|
|
23
|
+
task adapter.to_sym => "spec:#{adapter}:all"
|
|
24
|
+
|
|
25
|
+
namespace adapter.to_sym do
|
|
26
|
+
|
|
27
|
+
desc "Run all specifications"
|
|
28
|
+
run_spec('all', adapter, all_specs, false)
|
|
29
|
+
|
|
30
|
+
desc "Run unit specifications"
|
|
31
|
+
run_spec('unit', adapter, unit_specs, false)
|
|
32
|
+
|
|
33
|
+
desc "Run integration specifications"
|
|
34
|
+
run_spec('integration', adapter, integration_specs, false)
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module Solr
|
|
2
|
+
|
|
3
|
+
module AdapterMethods
|
|
4
|
+
|
|
5
|
+
class SearchResults
|
|
6
|
+
|
|
7
|
+
def initialize(solr_data={})
|
|
8
|
+
@solr_data = solr_data
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Returns an array with the instances. This method
|
|
12
|
+
# is also aliased as docs and records
|
|
13
|
+
def results
|
|
14
|
+
@solr_data[:docs]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns the total records found. This method is
|
|
18
|
+
# also aliased as num_found and total_hits
|
|
19
|
+
def total
|
|
20
|
+
@solr_data[:total]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Returns the facets when doing a faceted search
|
|
24
|
+
def facets
|
|
25
|
+
@solr_data[:facets]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Returns the highest score found. This method is
|
|
29
|
+
# also aliased as highest_score
|
|
30
|
+
def max_score
|
|
31
|
+
@solr_data[:max_score]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Returns the debugging information, notably the score 'explain'
|
|
35
|
+
def debug
|
|
36
|
+
@solr_data[:debug]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# FIXME: this is used only by find_articles so it shouldn't be declared here
|
|
40
|
+
def exact_match
|
|
41
|
+
@solr_data[:exact_match]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
alias docs results
|
|
45
|
+
alias records results
|
|
46
|
+
alias num_found total
|
|
47
|
+
alias total_hits total
|
|
48
|
+
alias highest_score max_score
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end # module AdapterMethods
|
|
52
|
+
|
|
53
|
+
end # module Solr
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
module Solr
|
|
2
|
+
|
|
3
|
+
module AdapterMethods
|
|
4
|
+
|
|
5
|
+
class SolrResult
|
|
6
|
+
#Dependencies.mark_for_unload self
|
|
7
|
+
|
|
8
|
+
attr_accessor :total_hits
|
|
9
|
+
attr_accessor :type
|
|
10
|
+
attr_accessor :score
|
|
11
|
+
attr_accessor :explain
|
|
12
|
+
attr_accessor :index_id
|
|
13
|
+
attr_accessor :solr_types
|
|
14
|
+
attr_accessor :blurb
|
|
15
|
+
attr_accessor :properties_blurb
|
|
16
|
+
attr_accessor :web_documents
|
|
17
|
+
|
|
18
|
+
def initialize(h, primary_key, total_hits, explain)
|
|
19
|
+
@attributes = {}
|
|
20
|
+
h.each do |k, v|
|
|
21
|
+
case k
|
|
22
|
+
when "score" then @score = v
|
|
23
|
+
when "type_s_mv" then @solr_types = v
|
|
24
|
+
when "pk_s" then @attributes["id"] ||= v
|
|
25
|
+
when "id"
|
|
26
|
+
@index_id = v
|
|
27
|
+
index_id_split = @index_id.split(":", 2)
|
|
28
|
+
@attributes["id"] ||= index_id_split[1]
|
|
29
|
+
@type = index_id_split[0]
|
|
30
|
+
else
|
|
31
|
+
# It's a normal case. Remove the suffix to make the result cleaner.
|
|
32
|
+
if k.size >= 3 and k[-3..-1] == "_mv"
|
|
33
|
+
k =~ /(.*)_.*_mv$/
|
|
34
|
+
elsif k.size >= 3 and k[-3..-1] == "_ni"
|
|
35
|
+
k =~ /(.*)_.*_ni$/
|
|
36
|
+
else
|
|
37
|
+
k =~ /(.*)_.*$/
|
|
38
|
+
end
|
|
39
|
+
@attributes[$1 || k] = v
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
@total_hits = total_hits
|
|
43
|
+
@explain = explain
|
|
44
|
+
|
|
45
|
+
if @solr_types
|
|
46
|
+
@solr_types.each do |t|
|
|
47
|
+
# add the shared behavior of the associated model class
|
|
48
|
+
self.extend(eval("#{t}::SharedMethods")) rescue nil
|
|
49
|
+
|
|
50
|
+
# add the shared behavior on Solr object
|
|
51
|
+
self.extend(eval("#{t}::IndexMethods")) rescue nil
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def attributes
|
|
57
|
+
@attributes.dup
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Returns the value of the attribute identified by <tt>attr_name</tt> after it has been typecast (for example,
|
|
61
|
+
# "2004-12-12" in a data column is cast to a date object, like Date.new(2004, 12, 12)).
|
|
62
|
+
# (Alias for the protected read_attribute method).
|
|
63
|
+
def [](attr_name)
|
|
64
|
+
@attributes[attr_name.to_s]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Updates the attribute identified by <tt>attr_name</tt> with the specified +value+.
|
|
68
|
+
# (Alias for the protected write_attribute method).
|
|
69
|
+
def []=(attr_name, value)
|
|
70
|
+
@attributes[attr_name.to_s] = value
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def id
|
|
74
|
+
self['id']
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def updated_at
|
|
78
|
+
Time.parse(self["updated_at"]) if self["updated_at"]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def created_at
|
|
82
|
+
Time.parse(self["updated_at"]) if self["updated_at"]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def properties_blurb_from_yaml(yaml_string)
|
|
86
|
+
yaml_loaded = YAML::load(yaml_string)
|
|
87
|
+
@properties_blurb = (yaml_loaded.nil? || yaml_loaded.empty? ? nil : yaml_loaded.collect{|b|[b.shift, b]})
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def method_missing(method_id, *arguments)
|
|
91
|
+
unless !arguments.empty?
|
|
92
|
+
self[method_id.to_s]
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# convert the lightweight solr result into a real object
|
|
97
|
+
def real(options={})
|
|
98
|
+
@real ||= self.type.constantize.find(self.id, options)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def delete_from_index
|
|
102
|
+
self.type.constantize.solr_delete(self.index_id)
|
|
103
|
+
self.type.constantize.solr_commit
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def ==(comparison_object)
|
|
107
|
+
comparison_object && self.id == comparison_object.id
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Delegates to ==
|
|
111
|
+
def eql?(comparison_object)
|
|
112
|
+
self == (comparison_object)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Delegates to id in order to allow two records of the same type and id to work with something like:
|
|
116
|
+
# [ Person.find(1), Person.find(2), Person.find(3) ] & [ Person.find(1), Person.find(4) ] # => [ Person.find(1) ]
|
|
117
|
+
def hash
|
|
118
|
+
id.hash
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def to_s
|
|
122
|
+
self.id.to_s
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def logger
|
|
126
|
+
begin
|
|
127
|
+
self.type.constantize.logger
|
|
128
|
+
rescue
|
|
129
|
+
nil
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
end # module AdapterMethods
|
|
136
|
+
|
|
137
|
+
end # module Solr
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/adapter_methods/solr_result"
|
|
2
|
+
require File.dirname(__FILE__) + "/adapter_methods/search_results"
|
|
3
|
+
|
|
4
|
+
module Solr
|
|
5
|
+
|
|
6
|
+
module AdapterMethods
|
|
7
|
+
|
|
8
|
+
public
|
|
9
|
+
|
|
10
|
+
def solr_add(add_xml)
|
|
11
|
+
@connection.solr_execute(Solr::Request::AddDocument.new(add_xml))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def solr_delete(solr_ids)
|
|
15
|
+
@connection.solr_execute(Solr::Request::Delete.new(:id => solr_ids))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def solr_commit
|
|
19
|
+
@connection.solr_execute(Solr::Request::Commit.new)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Optimizes the Solr index. Solr says:
|
|
23
|
+
#
|
|
24
|
+
# Optimizations can take nearly ten minutes to run.
|
|
25
|
+
# We are presuming optimizations should be run once following large
|
|
26
|
+
# batch-like updates to the collection and/or once a day.
|
|
27
|
+
#
|
|
28
|
+
# One of the solutions for this would be to create a cron job that
|
|
29
|
+
# runs every day at midnight and optmizes the index:
|
|
30
|
+
# 0 0 * * * /your_rails_dir/script/runner -e production "BigIndex::Repository.adapters[:default].solr_optimize"
|
|
31
|
+
#
|
|
32
|
+
def solr_optimize
|
|
33
|
+
@connection.solr_execute(Solr::Request::Optimize.new)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def all_classes_for_solr(model)
|
|
37
|
+
all_classes = []
|
|
38
|
+
current_class = model.class
|
|
39
|
+
base_class = current_class.base_class
|
|
40
|
+
while current_class != base_class
|
|
41
|
+
all_classes << current_class
|
|
42
|
+
current_class = current_class.superclass
|
|
43
|
+
end
|
|
44
|
+
all_classes << base_class
|
|
45
|
+
return all_classes
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# TODO: This is a big ugly method that needs to be refactored
|
|
49
|
+
def to_solr_doc(model)
|
|
50
|
+
configuration = model.index_configuration
|
|
51
|
+
logger = model.logger || nil
|
|
52
|
+
|
|
53
|
+
doc = Solr::Document.new
|
|
54
|
+
doc.boost = validate_boost(configuration[:boost], model) if configuration[:boost]
|
|
55
|
+
|
|
56
|
+
doc << {:id => model.index_id,
|
|
57
|
+
configuration[:type_field] => all_classes_for_solr(model),
|
|
58
|
+
configuration[:primary_key_field] => model.record_id.to_s}
|
|
59
|
+
|
|
60
|
+
# iterate through the fields and add them to the document,
|
|
61
|
+
configuration[:fields].each do |field|
|
|
62
|
+
next if (field[:skip] && field[:skip].call(model))
|
|
63
|
+
|
|
64
|
+
field_name = field.field_name
|
|
65
|
+
field_type = get_field_type(field.field_type) if field.field_type
|
|
66
|
+
field_boost= field[:boost] if field[:boost]
|
|
67
|
+
|
|
68
|
+
field_type ||= configuration[:facets] && configuration[:facets].include?(field) ? :facet : :text
|
|
69
|
+
field_boost ||= configuration[:default_boost]
|
|
70
|
+
|
|
71
|
+
# add the field to the document, but only if it's not the id field
|
|
72
|
+
# or the type field (from single table inheritance), since these
|
|
73
|
+
# fields have already been added above.
|
|
74
|
+
if field_name.to_s != model.class.primary_key and field_name.to_s != "type"
|
|
75
|
+
suffix = get_field_type(field_type)
|
|
76
|
+
value = model.send("#{field_name}_for_index")
|
|
77
|
+
if value.is_a?(Hash)
|
|
78
|
+
boost = value.values.first
|
|
79
|
+
value = value.keys.first
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
value = set_value_if_nil(field_type) if value.to_s == ""
|
|
83
|
+
|
|
84
|
+
# This next line ensures that e.g. nil dates are excluded from the
|
|
85
|
+
# document, since they choke Solr. Also ignores e.g. empty strings,
|
|
86
|
+
# but these can't be searched for anyway:
|
|
87
|
+
# http://www.mail-archive.com/solr-dev@lucene.apache.org/msg05423.html
|
|
88
|
+
next if value.nil? || value.to_s.strip.empty?
|
|
89
|
+
|
|
90
|
+
[value].flatten.each do |v|
|
|
91
|
+
v = set_value_if_nil(suffix) if value.to_s == ""
|
|
92
|
+
field = Solr::Field.new("#{field_name}_#{suffix}" => ERB::Util.html_escape(v.to_s))
|
|
93
|
+
field.boost = validate_boost((boost || field_boost), model)
|
|
94
|
+
doc << field
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
model.dynamic_fields(doc) if model.respond_to?(:dynamic_fields)
|
|
100
|
+
|
|
101
|
+
add_includes(doc, model) if configuration[:include]
|
|
102
|
+
return doc
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def add_includes(doc, model)
|
|
106
|
+
configuration = model.index_configuration
|
|
107
|
+
|
|
108
|
+
if configuration[:include].is_a?(Array)
|
|
109
|
+
configuration[:include].each do |association|
|
|
110
|
+
data = ""
|
|
111
|
+
klass = association.to_s.singularize
|
|
112
|
+
case model.class.reflect_on_association(association).macro
|
|
113
|
+
when :has_many, :has_and_belongs_to_many
|
|
114
|
+
records = model.send(association).to_a
|
|
115
|
+
unless records.empty?
|
|
116
|
+
records.each{|r| data << r.attributes.inject([]){|k,v| k << "#{v.first}=#{v.last}"}.join(" ")}
|
|
117
|
+
doc["#{klass}_t"] = data
|
|
118
|
+
end
|
|
119
|
+
when :has_one, :belongs_to
|
|
120
|
+
record = model.send(association)
|
|
121
|
+
unless record.nil?
|
|
122
|
+
data = record.attributes.inject([]){|k,v| k << "#{v.first}=#{v.last}"}.join(" ")
|
|
123
|
+
doc["#{klass}_t"] = data
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def validate_boost(boost, model)
|
|
131
|
+
configuration = model.index_configuration
|
|
132
|
+
logger = model.logger || nil
|
|
133
|
+
|
|
134
|
+
b = evaluate_condition(configuration[:boost], model) if configuration[:boost]
|
|
135
|
+
return b if b && b > 0
|
|
136
|
+
if boost.class != Float || boost < 0
|
|
137
|
+
logger.warn "The boost value has to be a float and posisive, but got #{boost}. Using default boost value." if logger
|
|
138
|
+
return configuration[:default_boost]
|
|
139
|
+
end
|
|
140
|
+
boost
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def condition_block?(condition)
|
|
144
|
+
condition.respond_to?("call") && (condition.arity == 1 || condition.arity == -1)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def evaluate_condition(condition, field)
|
|
148
|
+
case condition
|
|
149
|
+
when Symbol then field.send(condition)
|
|
150
|
+
when String then eval(condition, binding)
|
|
151
|
+
else
|
|
152
|
+
if condition_block?(condition)
|
|
153
|
+
condition.call(field)
|
|
154
|
+
else
|
|
155
|
+
raise(
|
|
156
|
+
ArgumentError,
|
|
157
|
+
"The :if option has to be either a symbol, string (to be eval'ed), proc/method, or " +
|
|
158
|
+
"class implementing a static validation method"
|
|
159
|
+
)
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Sets a default value when value being set is nil.
|
|
165
|
+
def set_value_if_nil(field_type)
|
|
166
|
+
case field_type
|
|
167
|
+
when "b", :boolean then return "false"
|
|
168
|
+
when "s", "t", "t_ns", "t_ni", "d", "ngrams", "auto", "lc", "em", :date, :string, :text, :text_not_stored, :text_not_indexed, :ngrams, :autocomplete, :lowercase, :exact_match then return ""
|
|
169
|
+
when "f", "rf", :float, :range_float then return 0.00
|
|
170
|
+
when "i", "ri", :integer, :range_integer then return 0
|
|
171
|
+
when "f_mv", "i_mv", "b_mv", "s_mv", "t_mv", "t_mv_ns", "d_mv", "rf_mv", "ri_mv", "ngrams_mv", "auto_mv", "lc_mv", "em_mv", "geo" then return []
|
|
172
|
+
when :float_array, :integer_array, :boolean_array, :string_array, :date_array, :range_float_array, :range_integer_array, :ngrams_array, :text_array, :text_array_not_stored, :autocomplete_array, :lowercase_array, :exact_match_array, :geo then return []
|
|
173
|
+
else
|
|
174
|
+
return nil
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
public # Making these methods public for anyone who wants to query the indexer directly
|
|
179
|
+
|
|
180
|
+
# Method used by mostly all the ClassMethods when doing a search
|
|
181
|
+
def parse_query(model, query=nil, options={}, models=nil)
|
|
182
|
+
configuration = model.index_configuration
|
|
183
|
+
|
|
184
|
+
valid_options = [:fields, :offset, :limit, :facets, :models, :results_format,
|
|
185
|
+
:order, :scores, :operator, :debug, :query_function, :include_deleted,
|
|
186
|
+
:view, :no_parsing, :force_reload, :timestamp]
|
|
187
|
+
query_options = {}
|
|
188
|
+
return if query.nil?
|
|
189
|
+
|
|
190
|
+
# TODO: This should provide a warning instead of raising an error. Use log? or something else...
|
|
191
|
+
# raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty?
|
|
192
|
+
|
|
193
|
+
begin
|
|
194
|
+
query_options[:start] = options[:offset]
|
|
195
|
+
query_options[:rows] = options[:limit] || 100
|
|
196
|
+
query_options[:debug_query] = options[:debug]
|
|
197
|
+
|
|
198
|
+
# first steps on the facet parameter processing
|
|
199
|
+
if options[:facets]
|
|
200
|
+
query_options[:facets] = {}
|
|
201
|
+
query_options[:facets][:limit] = -1 # TODO: make this configurable
|
|
202
|
+
query_options[:facets][:sort] = :count if options[:facets][:sort]
|
|
203
|
+
query_options[:facets][:mincount] = 0
|
|
204
|
+
query_options[:facets][:mincount] = 1 if options[:facets][:zeros] == false
|
|
205
|
+
query_options[:facets][:fields] = options[:facets][:fields].collect{|k| "#{k}_facet"} if options[:facets][:fields]
|
|
206
|
+
query_options[:filter_queries] = replace_types(options[:facets][:browse].collect{|k| "#{k.sub!(/ *: */,"_facet:")}"}) if options[:facets][:browse]
|
|
207
|
+
query_options[:facets][:queries] = replace_types(options[:facets][:query].collect{|k| "#{k.sub!(/ *: */,"_t:")}"}) if options[:facets][:query]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
if models.nil?
|
|
211
|
+
# TODO: use a filter query for type, allowing Solr to cache it individually
|
|
212
|
+
models = "#{configuration[:type_field]}:\"#{model.index_type}\"^0.01"
|
|
213
|
+
field_list = [configuration[:primary_key_field], configuration[:type_field]]
|
|
214
|
+
if options[:fields]
|
|
215
|
+
if options[:no_parsing]
|
|
216
|
+
field_list += options[:fields]
|
|
217
|
+
else
|
|
218
|
+
field_list += replace_types(model, options[:fields].collect{|f|"#{f}_t"}, false)
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
else
|
|
222
|
+
field_list = ["id"]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
query_options[:field_list] = field_list + ['id']
|
|
226
|
+
unless query.empty?
|
|
227
|
+
query = "(#{query.gsub(/ *: */,"_t:")}) AND #{models}" unless options[:no_parsing]
|
|
228
|
+
else
|
|
229
|
+
query = "#{models}"
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
order = options[:order]
|
|
233
|
+
order = order.split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ') }.join(',') if order && !options[:no_parsing]
|
|
234
|
+
|
|
235
|
+
query_options[:query] = options[:no_parsing] ? query : replace_types(model, [query])[0]
|
|
236
|
+
if options[:order]
|
|
237
|
+
# TODO: set the sort parameter instead of the old ;order. style.
|
|
238
|
+
query_options[:query] << ';' << (options[:no_parsing] ? order : replace_types(model, [order], false)[0])
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
@connection.solr_execute(Solr::Request::Standard.new(query_options))
|
|
242
|
+
rescue
|
|
243
|
+
raise "There was a problem executing your search: #{$!}"
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Parses the data returned from Solr
|
|
248
|
+
def parse_results(model, solr_data, options = {})
|
|
249
|
+
configuration = model.index_configuration
|
|
250
|
+
|
|
251
|
+
results = {
|
|
252
|
+
:docs => [],
|
|
253
|
+
:total => 0
|
|
254
|
+
}
|
|
255
|
+
configuration[:format] = options[:format]
|
|
256
|
+
configuration[:format] ||= :objects
|
|
257
|
+
|
|
258
|
+
results.update(:facets => {'facet_fields' => []}) if options[:facets]
|
|
259
|
+
|
|
260
|
+
return SearchResults.new(results) if solr_data.total_hits == 0
|
|
261
|
+
|
|
262
|
+
configuration.update(options) if options.is_a?(Hash)
|
|
263
|
+
|
|
264
|
+
ids = solr_data.hits.collect {|doc| doc["#{configuration[:primary_key_field]}"]}.flatten
|
|
265
|
+
#conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
|
|
266
|
+
|
|
267
|
+
if solr_data.data['debug'] and solr_data.data['debug']['explain']
|
|
268
|
+
explain_data = solr_data.data['debug']['explain']
|
|
269
|
+
end
|
|
270
|
+
explain_data ||= {}
|
|
271
|
+
|
|
272
|
+
if ids.size > 0
|
|
273
|
+
case configuration[:format]
|
|
274
|
+
when :objects
|
|
275
|
+
options.reject!{|k,v|![:view, :force_reload, :include_deleted, :timestamp].include?(k)}
|
|
276
|
+
options.merge({:bypass_index => true})
|
|
277
|
+
result = begin
|
|
278
|
+
reorder(model.find(ids, options), ids)
|
|
279
|
+
rescue
|
|
280
|
+
[]
|
|
281
|
+
end
|
|
282
|
+
when :ids
|
|
283
|
+
result = ids
|
|
284
|
+
else
|
|
285
|
+
result = solr_data.hits.collect do |d|
|
|
286
|
+
r = SolrResult.new(d, configuration[:primary_key_field], solr_data.total_hits, explain_data[d["id"]])
|
|
287
|
+
r.properties_blurb_from_yaml(solr_data.data['properties_blurb'][r.id]) if (solr_data.data['properties_blurb'] && solr_data.data['properties_blurb'][r.id])
|
|
288
|
+
r.blurb=(solr_data.data['blurbs'][r.id]) if solr_data.data['blurbs']
|
|
289
|
+
r
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
else
|
|
293
|
+
result = []
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
|
|
297
|
+
results.update(:debug => solr_data.data['debug'])
|
|
298
|
+
|
|
299
|
+
results.update({:docs => result, :total => solr_data.total_hits, :max_score => solr_data.max_score})
|
|
300
|
+
SearchResults.new(results)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Reorders the instances keeping the order returned from Solr
|
|
304
|
+
def reorder(things, ids)
|
|
305
|
+
ordered_things = []
|
|
306
|
+
ids.each do |id|
|
|
307
|
+
found = things.find {|thing| thing.record_id.to_s == id.to_s}
|
|
308
|
+
ordered_things << found if found
|
|
309
|
+
end
|
|
310
|
+
ordered_things
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Replaces the field types based on the types (if any) specified
|
|
314
|
+
# on the acts_as_solr call
|
|
315
|
+
def replace_types(model, strings, include_colon=true)
|
|
316
|
+
configuration = model.index_configuration
|
|
317
|
+
|
|
318
|
+
suffix = include_colon ? ":" : ""
|
|
319
|
+
if configuration[:fields] && configuration[:fields].is_a?(Array)
|
|
320
|
+
configuration[:fields].each do |index_field|
|
|
321
|
+
|
|
322
|
+
field_type = get_field_type(index_field.field_type)
|
|
323
|
+
field = "#{index_field.field_name.to_s}_#{field_type}#{suffix}"
|
|
324
|
+
|
|
325
|
+
# Replace the type suffix only when the previous and next character is not a letter or other character
|
|
326
|
+
# that is valid for a field name. That way, we ensure that we replace on a match of the field and not
|
|
327
|
+
# only a partial match (e.g. name_t & ancestor_name_t... without the begin and end check, when we
|
|
328
|
+
# replace name_t by name_s we would not only name_t but also the end of ancestor_name_t and the result
|
|
329
|
+
# would be name_s & ancestor_name_s)
|
|
330
|
+
strings.each_with_index do |s,i|
|
|
331
|
+
if suffix.empty?
|
|
332
|
+
strings[i] = s.gsub(/(^|[^a-z|A-Z|_|-|0-9])#{index_field.field_name.to_s}_t([^a-z|A-Z|_|-|0-9]|$)/) {|match| "#{$1}#{field}#{$2}"}
|
|
333
|
+
else
|
|
334
|
+
strings[i] = s.gsub(/(^|[^a-z|A-Z|_|-|0-9])#{index_field.field_name.to_s}_t#{suffix}/) {|match| "#{$1}#{field}"}
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# fix the primary key type as well
|
|
342
|
+
strings.each_with_index {|s,i| strings[i] = s.gsub(/pk_t#{suffix}/,"#{configuration[:primary_key_field]}#{suffix}") }
|
|
343
|
+
|
|
344
|
+
# fix the general blob
|
|
345
|
+
strings.each_with_index {|s,i| strings[i] = s.gsub(/blob_t#{suffix}/,"blob_t_mv#{suffix}") }
|
|
346
|
+
|
|
347
|
+
# fix *
|
|
348
|
+
strings.each_with_index {|s,i| strings[i] = s.gsub(/\*_t#{suffix}/,"*#{suffix}") }
|
|
349
|
+
|
|
350
|
+
strings
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def get_types(query)
|
|
354
|
+
query.scan(/[^ ]+[:]/).uniq.collect{|s|s.chomp(':')} if query
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
end # module AdapterMethods
|
|
359
|
+
|
|
360
|
+
end # module Solr
|