acts_as_solr 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/CHANGE_LOG +233 -0
- data/FORKED_CHANGES +3 -0
- data/LICENSE +19 -0
- data/README.markdown +94 -0
- data/README.rdoc +84 -0
- data/Rakefile +57 -0
- data/TESTING_THE_PLUGIN +25 -0
- data/VERSION +1 -0
- data/acts_as_solr.gemspec +237 -0
- data/config/solr.yml +15 -0
- data/config/solr_environment.rb +22 -0
- data/init.rb +21 -0
- data/install.rb +11 -0
- data/lib/acts_as_solr.rb +61 -0
- data/lib/acts_methods.rb +284 -0
- data/lib/class_methods.rb +239 -0
- data/lib/common_methods.rb +89 -0
- data/lib/deprecation.rb +61 -0
- data/lib/instance_methods.rb +181 -0
- data/lib/lazy_document.rb +18 -0
- data/lib/parser_methods.rb +230 -0
- data/lib/search_results.rb +69 -0
- data/lib/solr/connection.rb +191 -0
- data/lib/solr/document.rb +78 -0
- data/lib/solr/exception.rb +13 -0
- data/lib/solr/field.rb +39 -0
- data/lib/solr/importer/array_mapper.rb +26 -0
- data/lib/solr/importer/delimited_file_source.rb +38 -0
- data/lib/solr/importer/hpricot_mapper.rb +27 -0
- data/lib/solr/importer/mapper.rb +51 -0
- data/lib/solr/importer/solr_source.rb +43 -0
- data/lib/solr/importer/xpath_mapper.rb +35 -0
- data/lib/solr/importer.rb +19 -0
- data/lib/solr/indexer.rb +52 -0
- data/lib/solr/request/add_document.rb +63 -0
- data/lib/solr/request/base.rb +36 -0
- data/lib/solr/request/commit.rb +31 -0
- data/lib/solr/request/delete.rb +50 -0
- data/lib/solr/request/dismax.rb +46 -0
- data/lib/solr/request/index_info.rb +22 -0
- data/lib/solr/request/modify_document.rb +51 -0
- data/lib/solr/request/optimize.rb +21 -0
- data/lib/solr/request/ping.rb +36 -0
- data/lib/solr/request/select.rb +56 -0
- data/lib/solr/request/spellcheck.rb +30 -0
- data/lib/solr/request/standard.rb +402 -0
- data/lib/solr/request/update.rb +23 -0
- data/lib/solr/request.rb +26 -0
- data/lib/solr/response/add_document.rb +17 -0
- data/lib/solr/response/base.rb +42 -0
- data/lib/solr/response/commit.rb +17 -0
- data/lib/solr/response/delete.rb +13 -0
- data/lib/solr/response/dismax.rb +8 -0
- data/lib/solr/response/index_info.rb +26 -0
- data/lib/solr/response/modify_document.rb +17 -0
- data/lib/solr/response/optimize.rb +14 -0
- data/lib/solr/response/ping.rb +28 -0
- data/lib/solr/response/ruby.rb +42 -0
- data/lib/solr/response/select.rb +17 -0
- data/lib/solr/response/spellcheck.rb +20 -0
- data/lib/solr/response/standard.rb +64 -0
- data/lib/solr/response/xml.rb +42 -0
- data/lib/solr/response.rb +27 -0
- data/lib/solr/solrtasks.rb +27 -0
- data/lib/solr/util.rb +32 -0
- data/lib/solr/xml.rb +44 -0
- data/lib/solr.rb +21 -0
- data/lib/solr_fixtures.rb +13 -0
- data/lib/tasks/database.rake +18 -0
- data/lib/tasks/solr.rake +137 -0
- data/lib/tasks/test.rake +7 -0
- data/lib/will_paginate_support.rb +12 -0
- data/solr/CHANGES.txt +1207 -0
- data/solr/LICENSE.txt +712 -0
- data/solr/NOTICE.txt +90 -0
- data/solr/etc/jetty.xml +205 -0
- data/solr/etc/webdefault.xml +379 -0
- data/solr/lib/easymock.jar +0 -0
- data/solr/lib/jetty-6.1.3.jar +0 -0
- data/solr/lib/jetty-util-6.1.3.jar +0 -0
- data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
- data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
- data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
- data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
- data/solr/lib/servlet-api-2.4.jar +0 -0
- data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
- data/solr/lib/xpp3-1.1.3.4.O.jar +0 -0
- data/solr/logs/.empty-dir-for-git +0 -0
- data/solr/solr/README.txt +52 -0
- data/solr/solr/bin/abc +176 -0
- data/solr/solr/bin/abo +176 -0
- data/solr/solr/bin/backup +108 -0
- data/solr/solr/bin/backupcleaner +142 -0
- data/solr/solr/bin/commit +128 -0
- data/solr/solr/bin/optimize +129 -0
- data/solr/solr/bin/readercycle +129 -0
- data/solr/solr/bin/rsyncd-disable +77 -0
- data/solr/solr/bin/rsyncd-enable +76 -0
- data/solr/solr/bin/rsyncd-start +145 -0
- data/solr/solr/bin/rsyncd-stop +105 -0
- data/solr/solr/bin/scripts-util +83 -0
- data/solr/solr/bin/snapcleaner +148 -0
- data/solr/solr/bin/snapinstaller +168 -0
- data/solr/solr/bin/snappuller +248 -0
- data/solr/solr/bin/snappuller-disable +77 -0
- data/solr/solr/bin/snappuller-enable +77 -0
- data/solr/solr/bin/snapshooter +109 -0
- data/solr/solr/conf/admin-extra.html +31 -0
- data/solr/solr/conf/protwords.txt +21 -0
- data/solr/solr/conf/schema.xml +126 -0
- data/solr/solr/conf/scripts.conf +24 -0
- data/solr/solr/conf/solrconfig.xml +458 -0
- data/solr/solr/conf/stopwords.txt +57 -0
- data/solr/solr/conf/synonyms.txt +31 -0
- data/solr/solr/conf/xslt/example.xsl +132 -0
- data/solr/solr/conf/xslt/example_atom.xsl +63 -0
- data/solr/solr/conf/xslt/example_rss.xsl +62 -0
- data/solr/start.jar +0 -0
- data/solr/tmp/.empty-dir-for-git +0 -0
- data/solr/webapps/solr.war +0 -0
- data/test/config/solr.yml +2 -0
- data/test/db/connections/mysql/connection.rb +10 -0
- data/test/db/connections/sqlite/connection.rb +8 -0
- data/test/db/migrate/001_create_books.rb +15 -0
- data/test/db/migrate/002_create_movies.rb +12 -0
- data/test/db/migrate/003_create_categories.rb +11 -0
- data/test/db/migrate/004_create_electronics.rb +16 -0
- data/test/db/migrate/005_create_authors.rb +12 -0
- data/test/db/migrate/006_create_postings.rb +9 -0
- data/test/db/migrate/007_create_posts.rb +13 -0
- data/test/db/migrate/008_create_gadgets.rb +11 -0
- data/test/fixtures/authors.yml +9 -0
- data/test/fixtures/books.yml +13 -0
- data/test/fixtures/categories.yml +7 -0
- data/test/fixtures/db_definitions/mysql.sql +41 -0
- data/test/fixtures/electronics.yml +49 -0
- data/test/fixtures/movies.yml +9 -0
- data/test/fixtures/postings.yml +10 -0
- data/test/functional/acts_as_solr_test.rb +413 -0
- data/test/functional/association_indexing_test.rb +37 -0
- data/test/functional/faceted_search_test.rb +163 -0
- data/test/functional/multi_solr_search_test.rb +51 -0
- data/test/models/author.rb +10 -0
- data/test/models/book.rb +10 -0
- data/test/models/category.rb +8 -0
- data/test/models/electronic.rb +21 -0
- data/test/models/gadget.rb +9 -0
- data/test/models/movie.rb +17 -0
- data/test/models/novel.rb +2 -0
- data/test/models/post.rb +3 -0
- data/test/models/posting.rb +11 -0
- data/test/test_helper.rb +51 -0
- data/test/unit/acts_methods_shoulda.rb +70 -0
- data/test/unit/class_methods_shoulda.rb +90 -0
- data/test/unit/common_methods_shoulda.rb +112 -0
- data/test/unit/instance_methods_shoulda.rb +326 -0
- data/test/unit/lazy_document_shoulda.rb +35 -0
- data/test/unit/parser_instance.rb +19 -0
- data/test/unit/parser_methods_shoulda.rb +279 -0
- data/test/unit/solr_instance.rb +46 -0
- data/test/unit/test_helper.rb +26 -0
- metadata +259 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
module ActsAsSolr #:nodoc:
|
|
2
|
+
module ParserMethods
|
|
3
|
+
protected
|
|
4
|
+
|
|
5
|
+
# Method used by mostly all the ClassMethods when doing a search
|
|
6
|
+
def parse_query(query=nil, options={}, models=nil)
|
|
7
|
+
valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator, :include, :lazy, :highlight]
|
|
8
|
+
query_options = {}
|
|
9
|
+
|
|
10
|
+
return nil if (query.nil? || query.strip == '')
|
|
11
|
+
|
|
12
|
+
raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty?
|
|
13
|
+
begin
|
|
14
|
+
Deprecation.validate_query(options)
|
|
15
|
+
query_options[:start] = options[:offset]
|
|
16
|
+
query_options[:rows] = options[:limit]
|
|
17
|
+
query_options[:operator] = options[:operator]
|
|
18
|
+
|
|
19
|
+
if options[:highlight] == true
|
|
20
|
+
options[:highlight] = {:fields => "*"}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if options[:highlight]
|
|
24
|
+
query_options[:highlighting] = {}
|
|
25
|
+
query_options[:highlighting][:field_list] = []
|
|
26
|
+
query_options[:highlighting][:field_list] << options[:highlight][:fields].collect {|k| "#{k}_t"} if options[:highlight][:fields]
|
|
27
|
+
query_options[:highlighting][:require_field_match] = options[:highlight][:require_field_match] if options[:highlight][:require_field_match]
|
|
28
|
+
query_options[:highlighting][:max_snippets] = options[:highlight][:max_snippets] if options[:highlight][:max_snippets]
|
|
29
|
+
query_options[:highlighting][:prefix] = options[:highlight][:prefix] if options[:highlight][:prefix]
|
|
30
|
+
query_options[:highlighting][:suffix] = options[:highlight][:suffix] if options[:highlight][:suffix]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# first steps on the facet parameter processing
|
|
34
|
+
if options[:facets]
|
|
35
|
+
query_options[:facets] = {}
|
|
36
|
+
query_options[:facets][:limit] = -1 # TODO: make this configurable
|
|
37
|
+
query_options[:facets][:sort] = :count if options[:facets][:sort]
|
|
38
|
+
query_options[:facets][:mincount] = 0
|
|
39
|
+
query_options[:facets][:mincount] = 1 if options[:facets][:zeros] == false
|
|
40
|
+
# override the :zeros (it's deprecated anyway) if :mincount exists
|
|
41
|
+
query_options[:facets][:mincount] = options[:facets][:mincount] if options[:facets][:mincount]
|
|
42
|
+
query_options[:facets][:fields] = options[:facets][:fields].collect{|k| "#{k}_facet"} if options[:facets][:fields]
|
|
43
|
+
query_options[:filter_queries] = replace_types([*options[:facets][:browse]].collect{|k| "#{k.sub!(/ *: */,"_facet:")}"}) if options[:facets][:browse]
|
|
44
|
+
query_options[:facets][:queries] = replace_types(options[:facets][:query].collect{|k| "#{k.sub!(/ *: */,"_t:")}"}) if options[:facets][:query]
|
|
45
|
+
|
|
46
|
+
if options[:facets][:dates]
|
|
47
|
+
query_options[:date_facets] = {}
|
|
48
|
+
# if options[:facets][:dates][:fields] exists then :start, :end, and :gap must be there
|
|
49
|
+
if options[:facets][:dates][:fields]
|
|
50
|
+
[:start, :end, :gap].each { |k| raise "#{k} must be present in faceted date query" unless options[:facets][:dates].include?(k) }
|
|
51
|
+
query_options[:date_facets][:fields] = []
|
|
52
|
+
options[:facets][:dates][:fields].each { |f|
|
|
53
|
+
if f.kind_of? Hash
|
|
54
|
+
key = f.keys[0]
|
|
55
|
+
query_options[:date_facets][:fields] << {"#{key}_d" => f[key]}
|
|
56
|
+
validate_date_facet_other_options(f[key][:other]) if f[key][:other]
|
|
57
|
+
else
|
|
58
|
+
query_options[:date_facets][:fields] << "#{f}_d"
|
|
59
|
+
end
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
query_options[:date_facets][:start] = options[:facets][:dates][:start] if options[:facets][:dates][:start]
|
|
64
|
+
query_options[:date_facets][:end] = options[:facets][:dates][:end] if options[:facets][:dates][:end]
|
|
65
|
+
query_options[:date_facets][:gap] = options[:facets][:dates][:gap] if options[:facets][:dates][:gap]
|
|
66
|
+
query_options[:date_facets][:hardend] = options[:facets][:dates][:hardend] if options[:facets][:dates][:hardend]
|
|
67
|
+
query_options[:date_facets][:filter] = replace_types([*options[:facets][:dates][:filter]].collect{|k| "#{k.sub!(/ *:(?!\d) */,"_d:")}"}) if options[:facets][:dates][:filter]
|
|
68
|
+
|
|
69
|
+
if options[:facets][:dates][:other]
|
|
70
|
+
validate_date_facet_other_options(options[:facets][:dates][:other])
|
|
71
|
+
query_options[:date_facets][:other] = options[:facets][:dates][:other]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if models.nil?
|
|
78
|
+
# TODO: use a filter query for type, allowing Solr to cache it individually
|
|
79
|
+
models = "AND #{solr_type_condition}"
|
|
80
|
+
field_list = solr_configuration[:primary_key_field]
|
|
81
|
+
else
|
|
82
|
+
field_list = "id"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
query_options[:field_list] = [field_list, 'score']
|
|
86
|
+
query = "(#{query.gsub(/ *: */,"_t:")}) #{models}"
|
|
87
|
+
order = options[:order].split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ').gsub(/\bscore_t\b/, 'score') }.join(',') if options[:order]
|
|
88
|
+
query_options[:query] = replace_types([query])[0] # TODO adjust replace_types to work with String or Array
|
|
89
|
+
|
|
90
|
+
if options[:order]
|
|
91
|
+
# TODO: set the sort parameter instead of the old ;order. style.
|
|
92
|
+
query_options[:query] << ';' << replace_types([order], false)[0]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
ActsAsSolr::Post.execute(Solr::Request::Standard.new(query_options))
|
|
96
|
+
rescue
|
|
97
|
+
raise "There was a problem executing your search: #{$!} in #{$!.backtrace.first}"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def solr_type_condition
|
|
102
|
+
subclasses.inject("(#{solr_configuration[:type_field]}:#{self.name}") do |condition, subclass|
|
|
103
|
+
condition << " OR #{solr_configuration[:type_field]}:#{subclass.name}"
|
|
104
|
+
end << ')'
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Parses the data returned from Solr
|
|
108
|
+
def parse_results(solr_data, options = {})
|
|
109
|
+
results = {
|
|
110
|
+
:docs => [],
|
|
111
|
+
:total => 0
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
configuration = {
|
|
115
|
+
:format => :objects
|
|
116
|
+
}
|
|
117
|
+
results.update(:facets => {'facet_fields' => []}) if options[:facets]
|
|
118
|
+
return SearchResults.new(results) if (solr_data.nil? || solr_data.total_hits == 0)
|
|
119
|
+
|
|
120
|
+
configuration.update(options) if options.is_a?(Hash)
|
|
121
|
+
|
|
122
|
+
ids = solr_data.hits.collect {|doc| doc["#{solr_configuration[:primary_key_field]}"]}.flatten
|
|
123
|
+
|
|
124
|
+
result = find_objects(ids, options, configuration)
|
|
125
|
+
|
|
126
|
+
add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores]
|
|
127
|
+
|
|
128
|
+
highlighted = {}
|
|
129
|
+
solr_data.highlighting.map do |x,y|
|
|
130
|
+
e={}
|
|
131
|
+
y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil?
|
|
132
|
+
highlighted[x.gsub(/[^:]*:/,"").to_i]=e
|
|
133
|
+
end unless solr_data.highlighting.nil?
|
|
134
|
+
|
|
135
|
+
results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
|
|
136
|
+
results.update({:docs => result, :total => solr_data.total_hits, :max_score => solr_data.max_score, :query_time => solr_data.data['responseHeader']['QTime']})
|
|
137
|
+
results.update({:highlights=>highlighted})
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
sr = SearchResults.new(results)
|
|
141
|
+
|
|
142
|
+
sr.records.each do |model|
|
|
143
|
+
model.init_solr(results) if model.respond_to?(:init_solr)
|
|
144
|
+
end if sr.records
|
|
145
|
+
|
|
146
|
+
sr
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def find_objects(ids, options, configuration)
|
|
151
|
+
result = if configuration[:lazy] && configuration[:format] != :ids
|
|
152
|
+
ids.collect {|id| ActsAsSolr::LazyDocument.new(id, self)}
|
|
153
|
+
elsif configuration[:format] == :objects
|
|
154
|
+
conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
|
|
155
|
+
find_options = {:conditions => conditions}
|
|
156
|
+
find_options[:include] = options[:include] if options[:include]
|
|
157
|
+
result = reorder(self.find(:all, find_options), ids)
|
|
158
|
+
else
|
|
159
|
+
ids
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
result
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Reorders the instances keeping the order returned from Solr
|
|
166
|
+
def reorder(things, ids)
|
|
167
|
+
ordered_things = Array.new(things.size)
|
|
168
|
+
raise "Out of sync! Found #{ids.size} items in index, but only #{things.size} were found in database!" unless things.size == ids.size
|
|
169
|
+
things.each do |thing|
|
|
170
|
+
position = ids.index(thing.id)
|
|
171
|
+
ordered_things[position] = thing
|
|
172
|
+
end
|
|
173
|
+
ordered_things
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Replaces the field types based on the types (if any) specified
|
|
177
|
+
# on the acts_as_solr call
|
|
178
|
+
def replace_types(strings, include_colon=true)
|
|
179
|
+
suffix = include_colon ? ":" : ""
|
|
180
|
+
if configuration[:solr_fields]
|
|
181
|
+
configuration[:solr_fields].each do |name, options|
|
|
182
|
+
solr_name = options[:as] || name.to_s
|
|
183
|
+
solr_type = get_solr_field_type(options[:type])
|
|
184
|
+
field = "#{solr_name}_#{solr_type}#{suffix}"
|
|
185
|
+
strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
if configuration[:solr_includes]
|
|
189
|
+
configuration[:solr_includes].each do |association, options|
|
|
190
|
+
solr_name = options[:as] || association.to_s.singularize
|
|
191
|
+
solr_type = get_solr_field_type(options[:type])
|
|
192
|
+
field = "#{solr_name}_#{solr_type}#{suffix}"
|
|
193
|
+
strings.each_with_index {|s,i| strings[i] = s.gsub(/#{solr_name.to_s}_t#{suffix}/,field) }
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
strings
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Adds the score to each one of the instances found
|
|
200
|
+
def add_scores(results, solr_data)
|
|
201
|
+
with_score = []
|
|
202
|
+
solr_data.hits.each do |doc|
|
|
203
|
+
with_score.push([doc["score"],
|
|
204
|
+
results.find {|record| scorable_record?(record, doc) }])
|
|
205
|
+
end
|
|
206
|
+
with_score.each do |score, object|
|
|
207
|
+
class << object; attr_accessor :solr_score; end
|
|
208
|
+
object.solr_score = score
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def scorable_record?(record, doc)
|
|
213
|
+
doc_id = doc["#{solr_configuration[:primary_key_field]}"]
|
|
214
|
+
if doc_id.nil?
|
|
215
|
+
doc_id = doc["id"]
|
|
216
|
+
"#{record.class.name}:#{record_id(record)}" == doc_id.first.to_s
|
|
217
|
+
else
|
|
218
|
+
record_id(record).to_s == doc_id.to_s
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def validate_date_facet_other_options(options)
|
|
223
|
+
valid_other_options = [:after, :all, :before, :between, :none]
|
|
224
|
+
options = [options] unless options.kind_of? Array
|
|
225
|
+
bad_options = options.map {|x| x.to_sym} - valid_other_options
|
|
226
|
+
raise "Invalid option#{'s' if bad_options.size > 1} for faceted date's other param: #{bad_options.join(', ')}. May only be one of :after, :all, :before, :between, :none" if bad_options.size > 0
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
end
|
|
230
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module ActsAsSolr #:nodoc:
|
|
2
|
+
|
|
3
|
+
# TODO: Possibly looking into hooking it up with Solr::Response::Standard
|
|
4
|
+
#
|
|
5
|
+
# Class that returns the search results with four methods.
|
|
6
|
+
#
|
|
7
|
+
# books = Book.find_by_solr 'ruby'
|
|
8
|
+
#
|
|
9
|
+
# the above will return a SearchResults class with 4 methods:
|
|
10
|
+
#
|
|
11
|
+
# docs|results|records: will return an array of records found
|
|
12
|
+
#
|
|
13
|
+
# books.records.empty?
|
|
14
|
+
# => false
|
|
15
|
+
#
|
|
16
|
+
# total|num_found|total_hits: will return the total number of records found
|
|
17
|
+
#
|
|
18
|
+
# books.total
|
|
19
|
+
# => 2
|
|
20
|
+
#
|
|
21
|
+
# facets: will return the facets when doing a faceted search
|
|
22
|
+
#
|
|
23
|
+
# max_score|highest_score: returns the highest score found
|
|
24
|
+
#
|
|
25
|
+
# books.max_score
|
|
26
|
+
# => 1.3213213
|
|
27
|
+
#
|
|
28
|
+
#
|
|
29
|
+
class SearchResults
|
|
30
|
+
def initialize(solr_data={})
|
|
31
|
+
@solr_data = solr_data
|
|
32
|
+
# $log.debug "sd:#{solr_data.inspect}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns an array with the instances. This method
|
|
36
|
+
# is also aliased as docs and records
|
|
37
|
+
def results
|
|
38
|
+
@solr_data[:docs]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns the total records found. This method is
|
|
42
|
+
# also aliased as num_found and total_hits
|
|
43
|
+
def total
|
|
44
|
+
@solr_data[:total]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns the facets when doing a faceted search
|
|
48
|
+
def facets
|
|
49
|
+
@solr_data[:facets]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Returns the highest score found. This method is
|
|
53
|
+
# also aliased as highest_score
|
|
54
|
+
def max_score
|
|
55
|
+
@solr_data[:max_score]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def query_time
|
|
59
|
+
@solr_data[:query_time]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
alias docs results
|
|
63
|
+
alias records results
|
|
64
|
+
alias num_found total
|
|
65
|
+
alias total_hits total
|
|
66
|
+
alias highest_score max_score
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'net/http'
|
|
14
|
+
|
|
15
|
+
# TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh
|
|
16
|
+
|
|
17
|
+
class Solr::Connection
|
|
18
|
+
attr_reader :url, :autocommit, :connection
|
|
19
|
+
|
|
20
|
+
ILLEGAL_XML_CHARS = /\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0C|\x0E|\x0F|\x10|\x11|\x12|\x13|\x14|\x15|\x16|\x17|\x18|\x19|\x1A|\x1B|\x1C|\x1D|\x1E|\x1F/
|
|
21
|
+
|
|
22
|
+
# create a connection to a solr instance using the url for the solr
|
|
23
|
+
# application context:
|
|
24
|
+
#
|
|
25
|
+
# conn = Solr::Connection.new("http://example.com:8080/solr")
|
|
26
|
+
#
|
|
27
|
+
# if you would prefer to have all adds/updates autocommitted,
|
|
28
|
+
# use :autocommit => :on
|
|
29
|
+
#
|
|
30
|
+
# conn = Solr::Connection.new('http://example.com:8080/solr',
|
|
31
|
+
# :autocommit => :on)
|
|
32
|
+
|
|
33
|
+
def initialize(url="http://localhost:8983/solr", opts={})
|
|
34
|
+
@url = URI.parse(url)
|
|
35
|
+
unless @url.kind_of? URI::HTTP
|
|
36
|
+
raise "invalid http url: #{url}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# TODO: Autocommit seems nice at one level, but it currently is confusing because
|
|
40
|
+
# only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...))
|
|
41
|
+
# does not autocommit. Maybe #send should check for the request types that require a commit and
|
|
42
|
+
# commit in #send instead of the individual methods?
|
|
43
|
+
@autocommit = opts[:autocommit] == :on
|
|
44
|
+
|
|
45
|
+
# Not actually opening the connection yet, just setting up the persistent connection.
|
|
46
|
+
@connection = Net::HTTP.new(@url.host, @url.port)
|
|
47
|
+
|
|
48
|
+
@connection.read_timeout = opts[:timeout] if opts[:timeout]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# add a document to the index. you can pass in either a hash
|
|
52
|
+
#
|
|
53
|
+
# conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius')
|
|
54
|
+
#
|
|
55
|
+
# or a Solr::Document
|
|
56
|
+
#
|
|
57
|
+
# conn.add(Solr::Document.new(:id => 123, :title = 'On Writing')
|
|
58
|
+
#
|
|
59
|
+
# true/false will be returned to designate success/failure
|
|
60
|
+
|
|
61
|
+
def add(doc)
|
|
62
|
+
request = Solr::Request::AddDocument.new(doc)
|
|
63
|
+
response = send(request)
|
|
64
|
+
commit if @autocommit
|
|
65
|
+
return response.ok?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# update a document in the index (really just an alias to add)
|
|
69
|
+
|
|
70
|
+
def update(doc)
|
|
71
|
+
return add(doc)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# performs a standard query and returns a Solr::Response::Standard
|
|
75
|
+
#
|
|
76
|
+
# response = conn.query('borges')
|
|
77
|
+
#
|
|
78
|
+
# alternative you can pass in a block and iterate over hits
|
|
79
|
+
#
|
|
80
|
+
# conn.query('borges') do |hit|
|
|
81
|
+
# puts hit
|
|
82
|
+
# end
|
|
83
|
+
#
|
|
84
|
+
# options include:
|
|
85
|
+
#
|
|
86
|
+
# :sort, :default_field, :rows, :filter_queries, :debug_query,
|
|
87
|
+
# :explain_other, :facets, :highlighting, :mlt,
|
|
88
|
+
# :operator => :or / :and
|
|
89
|
+
# :start => defaults to 0
|
|
90
|
+
# :field_list => array, defaults to ["*", "score"]
|
|
91
|
+
|
|
92
|
+
def query(query, options={}, &action)
|
|
93
|
+
# TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false).
|
|
94
|
+
create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# performs a dismax search and returns a Solr::Response::Standard
|
|
98
|
+
#
|
|
99
|
+
# response = conn.search('borges')
|
|
100
|
+
#
|
|
101
|
+
# options are same as query, but also include:
|
|
102
|
+
#
|
|
103
|
+
# :tie_breaker, :query_fields, :minimum_match, :phrase_fields,
|
|
104
|
+
# :phrase_slop, :boost_query, :boost_functions
|
|
105
|
+
|
|
106
|
+
def search(query, options={}, &action)
|
|
107
|
+
create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# sends a commit message to the server
|
|
111
|
+
def commit(options={})
|
|
112
|
+
response = send(Solr::Request::Commit.new(options))
|
|
113
|
+
return response.ok?
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# sends an optimize message to the server
|
|
117
|
+
def optimize
|
|
118
|
+
response = send(Solr::Request::Optimize.new)
|
|
119
|
+
return response.ok?
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# pings the connection and returns true/false if it is alive or not
|
|
123
|
+
def ping
|
|
124
|
+
begin
|
|
125
|
+
response = send(Solr::Request::Ping.new)
|
|
126
|
+
return response.ok?
|
|
127
|
+
rescue
|
|
128
|
+
return false
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# delete a document from the index using the document id
|
|
133
|
+
def delete(document_id)
|
|
134
|
+
response = send(Solr::Request::Delete.new(:id => document_id))
|
|
135
|
+
commit if @autocommit
|
|
136
|
+
response.ok?
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# delete using a query
|
|
140
|
+
def delete_by_query(query)
|
|
141
|
+
response = send(Solr::Request::Delete.new(:query => query))
|
|
142
|
+
commit if @autocommit
|
|
143
|
+
response.ok?
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def info
|
|
147
|
+
send(Solr::Request::IndexInfo.new)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# send a given Solr::Request and return a RubyResponse or XmlResponse
|
|
151
|
+
# depending on the type of request
|
|
152
|
+
def send(request)
|
|
153
|
+
data = post(request)
|
|
154
|
+
Solr::Response::Base.make_response(request, data)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# send the http post request to solr; for convenience there are shortcuts
|
|
158
|
+
# to some requests: add(), query(), commit(), delete() or send()
|
|
159
|
+
def post(request)
|
|
160
|
+
if ENV["DEBUG"]
|
|
161
|
+
puts "POST #{@url.path + "/" + request.handler}"
|
|
162
|
+
puts "-- DATA -------------------"
|
|
163
|
+
puts request.to_s
|
|
164
|
+
puts "-- END DATA ---------------"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
response = @connection.post(@url.path + "/" + request.handler,
|
|
168
|
+
request.to_s.gsub(ILLEGAL_XML_CHARS, ''),
|
|
169
|
+
{ "Content-Type" => request.content_type })
|
|
170
|
+
|
|
171
|
+
case response
|
|
172
|
+
when Net::HTTPSuccess then response.body
|
|
173
|
+
else
|
|
174
|
+
response.error!
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
|
|
181
|
+
def create_and_send_query(klass, options = {}, &action)
|
|
182
|
+
request = klass.new(options)
|
|
183
|
+
response = send(request)
|
|
184
|
+
return response unless action
|
|
185
|
+
response.each {|hit| action.call(hit)}
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'solr/xml'
|
|
14
|
+
require 'solr/field'
|
|
15
|
+
|
|
16
|
+
class Solr::Document
|
|
17
|
+
include Enumerable
|
|
18
|
+
attr_accessor :boost
|
|
19
|
+
attr_reader :fields
|
|
20
|
+
|
|
21
|
+
# Create a new Solr::Document, optionally passing in a hash of
|
|
22
|
+
# key/value pairs for the fields
|
|
23
|
+
#
|
|
24
|
+
# doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
|
|
25
|
+
def initialize(hash={})
|
|
26
|
+
@fields = []
|
|
27
|
+
self << hash
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Append a Solr::Field
|
|
31
|
+
#
|
|
32
|
+
# doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
|
|
33
|
+
#
|
|
34
|
+
# If you are truly lazy you can simply pass in a hash:
|
|
35
|
+
#
|
|
36
|
+
# doc << {:creator => 'Jorge Luis Borges'}
|
|
37
|
+
def <<(fields)
|
|
38
|
+
case fields
|
|
39
|
+
when Hash
|
|
40
|
+
fields.each_pair do |name,value|
|
|
41
|
+
if value.respond_to?(:each) && !value.is_a?(String)
|
|
42
|
+
value.each {|v| @fields << Solr::Field.new(name => v)}
|
|
43
|
+
else
|
|
44
|
+
@fields << Solr::Field.new(name => value)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
when Solr::Field
|
|
48
|
+
@fields << fields
|
|
49
|
+
else
|
|
50
|
+
raise "must pass in Solr::Field or Hash"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# shorthand to allow hash lookups
|
|
55
|
+
# doc['name']
|
|
56
|
+
def [](name)
|
|
57
|
+
field = @fields.find {|f| f.name == name.to_s}
|
|
58
|
+
return field.value if field
|
|
59
|
+
return nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# shorthand to assign as a hash
|
|
63
|
+
def []=(name,value)
|
|
64
|
+
@fields << Solr::Field.new(name => value)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# convert the Document to a REXML::Element
|
|
68
|
+
def to_xml
|
|
69
|
+
e = Solr::XML::Element.new 'doc'
|
|
70
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
71
|
+
@fields.each {|f| e.add_element(f.to_xml)}
|
|
72
|
+
return e
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def each(*args, &blk)
|
|
76
|
+
fields.each(&blk)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Exception < Exception; end
|
data/lib/solr/field.rb
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
require 'solr/xml'
|
|
14
|
+
require 'time'
|
|
15
|
+
|
|
16
|
+
class Solr::Field
|
|
17
|
+
VALID_PARAMS = [:boost]
|
|
18
|
+
attr_accessor :name
|
|
19
|
+
attr_accessor :value
|
|
20
|
+
attr_accessor :boost
|
|
21
|
+
|
|
22
|
+
# Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
|
|
23
|
+
def initialize(params)
|
|
24
|
+
@boost = params[:boost]
|
|
25
|
+
name_key = (params.keys - VALID_PARAMS).first
|
|
26
|
+
@name, @value = name_key.to_s, params[name_key]
|
|
27
|
+
# Convert any Time values into UTC/XML schema format (which Solr requires).
|
|
28
|
+
@value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def to_xml
|
|
32
|
+
e = Solr::XML::Element.new 'field'
|
|
33
|
+
e.attributes['name'] = @name
|
|
34
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
35
|
+
e.text = @value
|
|
36
|
+
return e
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
|
|
16
|
+
# TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
|
|
17
|
+
|
|
18
|
+
# TODO: make merge conflict handling configurable. as is, the last map fields win.
|
|
19
|
+
def map(orig_data_array)
|
|
20
|
+
mapped_data = {}
|
|
21
|
+
orig_data_array.each_with_index do |data,i|
|
|
22
|
+
mapped_data.merge!(@mapping[i].map(data))
|
|
23
|
+
end
|
|
24
|
+
mapped_data
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
# For files with the first line containing field names
|
|
14
|
+
# Currently not designed for enormous files, as all lines are
|
|
15
|
+
# read into an array
|
|
16
|
+
class Solr::Importer::DelimitedFileSource
|
|
17
|
+
include Enumerable
|
|
18
|
+
|
|
19
|
+
def initialize(filename, splitter=/\t/)
|
|
20
|
+
@filename = filename
|
|
21
|
+
@splitter = splitter
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def each
|
|
25
|
+
lines = IO.readlines(@filename)
|
|
26
|
+
headers = lines[0].split(@splitter).collect{|h| h.chomp}
|
|
27
|
+
|
|
28
|
+
lines[1..-1].each do |line|
|
|
29
|
+
data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
|
|
30
|
+
def data.[](key)
|
|
31
|
+
self.assoc(key.to_s)[1]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
yield(data)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|