jbasdf-muck-solr 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGE_LOG +239 -0
- data/LICENSE +19 -0
- data/README.markdown +118 -0
- data/README.rdoc +107 -0
- data/Rakefile +99 -0
- data/TESTING_THE_PLUGIN +25 -0
- data/VERSION.yml +4 -0
- data/config/solr.yml +15 -0
- data/config/solr_environment.rb +32 -0
- data/lib/acts_as_solr/acts_methods.rb +352 -0
- data/lib/acts_as_solr/class_methods.rb +236 -0
- data/lib/acts_as_solr/common_methods.rb +89 -0
- data/lib/acts_as_solr/deprecation.rb +61 -0
- data/lib/acts_as_solr/instance_methods.rb +165 -0
- data/lib/acts_as_solr/lazy_document.rb +18 -0
- data/lib/acts_as_solr/parser_methods.rb +203 -0
- data/lib/acts_as_solr/search_results.rb +68 -0
- data/lib/acts_as_solr/solr_fixtures.rb +13 -0
- data/lib/acts_as_solr/tasks/database.rake +16 -0
- data/lib/acts_as_solr/tasks/solr.rake +135 -0
- data/lib/acts_as_solr/tasks/test.rake +5 -0
- data/lib/acts_as_solr/tasks.rb +10 -0
- data/lib/acts_as_solr.rb +65 -0
- data/lib/solr/connection.rb +177 -0
- data/lib/solr/document.rb +75 -0
- data/lib/solr/exception.rb +13 -0
- data/lib/solr/field.rb +36 -0
- data/lib/solr/importer/array_mapper.rb +26 -0
- data/lib/solr/importer/delimited_file_source.rb +38 -0
- data/lib/solr/importer/hpricot_mapper.rb +27 -0
- data/lib/solr/importer/mapper.rb +51 -0
- data/lib/solr/importer/solr_source.rb +41 -0
- data/lib/solr/importer/xpath_mapper.rb +35 -0
- data/lib/solr/importer.rb +19 -0
- data/lib/solr/indexer.rb +52 -0
- data/lib/solr/request/add_document.rb +58 -0
- data/lib/solr/request/base.rb +36 -0
- data/lib/solr/request/commit.rb +29 -0
- data/lib/solr/request/delete.rb +48 -0
- data/lib/solr/request/dismax.rb +46 -0
- data/lib/solr/request/index_info.rb +22 -0
- data/lib/solr/request/modify_document.rb +46 -0
- data/lib/solr/request/optimize.rb +19 -0
- data/lib/solr/request/ping.rb +36 -0
- data/lib/solr/request/select.rb +54 -0
- data/lib/solr/request/spellcheck.rb +30 -0
- data/lib/solr/request/standard.rb +402 -0
- data/lib/solr/request/update.rb +23 -0
- data/lib/solr/request.rb +26 -0
- data/lib/solr/response/add_document.rb +17 -0
- data/lib/solr/response/base.rb +42 -0
- data/lib/solr/response/commit.rb +15 -0
- data/lib/solr/response/delete.rb +13 -0
- data/lib/solr/response/dismax.rb +8 -0
- data/lib/solr/response/index_info.rb +26 -0
- data/lib/solr/response/modify_document.rb +17 -0
- data/lib/solr/response/optimize.rb +14 -0
- data/lib/solr/response/ping.rb +26 -0
- data/lib/solr/response/ruby.rb +42 -0
- data/lib/solr/response/select.rb +17 -0
- data/lib/solr/response/spellcheck.rb +20 -0
- data/lib/solr/response/standard.rb +60 -0
- data/lib/solr/response/xml.rb +39 -0
- data/lib/solr/response.rb +27 -0
- data/lib/solr/solrtasks.rb +27 -0
- data/lib/solr/util.rb +32 -0
- data/lib/solr/xml.rb +44 -0
- data/lib/solr.rb +26 -0
- data/solr/CHANGES.txt +1207 -0
- data/solr/LICENSE.txt +712 -0
- data/solr/NOTICE.txt +90 -0
- data/solr/etc/jetty.xml +205 -0
- data/solr/etc/webdefault.xml +379 -0
- data/solr/lib/easymock.jar +0 -0
- data/solr/lib/jetty-6.1.3.jar +0 -0
- data/solr/lib/jetty-util-6.1.3.jar +0 -0
- data/solr/lib/jsp-2.1/ant-1.6.5.jar +0 -0
- data/solr/lib/jsp-2.1/core-3.1.1.jar +0 -0
- data/solr/lib/jsp-2.1/jsp-2.1.jar +0 -0
- data/solr/lib/jsp-2.1/jsp-api-2.1.jar +0 -0
- data/solr/lib/servlet-api-2.4.jar +0 -0
- data/solr/lib/servlet-api-2.5-6.1.3.jar +0 -0
- data/solr/lib/xpp3-1.1.3.4.O.jar +0 -0
- data/solr/solr/README.txt +52 -0
- data/solr/solr/bin/abc +176 -0
- data/solr/solr/bin/abo +176 -0
- data/solr/solr/bin/backup +108 -0
- data/solr/solr/bin/backupcleaner +142 -0
- data/solr/solr/bin/commit +128 -0
- data/solr/solr/bin/optimize +129 -0
- data/solr/solr/bin/readercycle +129 -0
- data/solr/solr/bin/rsyncd-disable +77 -0
- data/solr/solr/bin/rsyncd-enable +76 -0
- data/solr/solr/bin/rsyncd-start +145 -0
- data/solr/solr/bin/rsyncd-stop +105 -0
- data/solr/solr/bin/scripts-util +83 -0
- data/solr/solr/bin/snapcleaner +148 -0
- data/solr/solr/bin/snapinstaller +168 -0
- data/solr/solr/bin/snappuller +248 -0
- data/solr/solr/bin/snappuller-disable +77 -0
- data/solr/solr/bin/snappuller-enable +77 -0
- data/solr/solr/bin/snapshooter +109 -0
- data/solr/solr/conf/admin-extra.html +31 -0
- data/solr/solr/conf/protwords.txt +21 -0
- data/solr/solr/conf/schema.xml +126 -0
- data/solr/solr/conf/scripts.conf +24 -0
- data/solr/solr/conf/solrconfig.xml +458 -0
- data/solr/solr/conf/stopwords.txt +57 -0
- data/solr/solr/conf/synonyms.txt +31 -0
- data/solr/solr/conf/xslt/example.xsl +132 -0
- data/solr/solr/conf/xslt/example_atom.xsl +63 -0
- data/solr/solr/conf/xslt/example_rss.xsl +62 -0
- data/solr/start.jar +0 -0
- data/solr/webapps/solr.war +0 -0
- data/test/config/solr.yml +2 -0
- data/test/db/connections/mysql/connection.rb +10 -0
- data/test/db/connections/sqlite/connection.rb +8 -0
- data/test/db/migrate/001_create_books.rb +15 -0
- data/test/db/migrate/002_create_movies.rb +12 -0
- data/test/db/migrate/003_create_categories.rb +11 -0
- data/test/db/migrate/004_create_electronics.rb +16 -0
- data/test/db/migrate/005_create_authors.rb +12 -0
- data/test/db/migrate/006_create_postings.rb +9 -0
- data/test/db/migrate/007_create_posts.rb +13 -0
- data/test/db/migrate/008_create_gadgets.rb +11 -0
- data/test/fixtures/authors.yml +9 -0
- data/test/fixtures/books.yml +13 -0
- data/test/fixtures/categories.yml +7 -0
- data/test/fixtures/db_definitions/mysql.sql +41 -0
- data/test/fixtures/electronics.yml +49 -0
- data/test/fixtures/movies.yml +9 -0
- data/test/fixtures/postings.yml +10 -0
- data/test/functional/acts_as_solr_test.rb +413 -0
- data/test/functional/association_indexing_test.rb +37 -0
- data/test/functional/faceted_search_test.rb +163 -0
- data/test/functional/multi_solr_search_test.rb +57 -0
- data/test/models/author.rb +10 -0
- data/test/models/book.rb +10 -0
- data/test/models/category.rb +8 -0
- data/test/models/electronic.rb +25 -0
- data/test/models/gadget.rb +9 -0
- data/test/models/movie.rb +17 -0
- data/test/models/novel.rb +2 -0
- data/test/models/post.rb +3 -0
- data/test/models/posting.rb +11 -0
- data/test/test_helper.rb +54 -0
- data/test/unit/acts_methods_shoulda.rb +68 -0
- data/test/unit/class_methods_shoulda.rb +85 -0
- data/test/unit/common_methods_shoulda.rb +111 -0
- data/test/unit/instance_methods_shoulda.rb +318 -0
- data/test/unit/lazy_document_shoulda.rb +34 -0
- data/test/unit/parser_instance.rb +19 -0
- data/test/unit/parser_methods_shoulda.rb +268 -0
- data/test/unit/solr_instance.rb +49 -0
- data/test/unit/test_helper.rb +24 -0
- metadata +241 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
dir = File.dirname(__FILE__)
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'rake'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'active_record'
|
|
6
|
+
require File.expand_path("#{dir}/solr_fixtures")
|
|
7
|
+
|
|
8
|
+
load File.expand_path("#{dir}/tasks/database.rake")
|
|
9
|
+
load File.expand_path("#{dir}/tasks/solr.rake")
|
|
10
|
+
load File.expand_path("#{dir}/tasks/test.rake")
|
data/lib/acts_as_solr.rb
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) 2006 Erik Hatcher, Thiago Jackiw
|
|
2
|
+
#
|
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
# furnished to do so, subject to the following conditions:
|
|
9
|
+
#
|
|
10
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
# copies or substantial portions of the Software.
|
|
12
|
+
#
|
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
# SOFTWARE.
|
|
20
|
+
|
|
21
|
+
require 'active_record'
|
|
22
|
+
require 'rexml/document'
|
|
23
|
+
require 'net/http'
|
|
24
|
+
require 'yaml'
|
|
25
|
+
require 'time'
|
|
26
|
+
require 'erb'
|
|
27
|
+
require 'rexml/xpath'
|
|
28
|
+
|
|
29
|
+
require File.dirname(__FILE__) + '/solr'
|
|
30
|
+
require File.dirname(__FILE__) + '/acts_as_solr/acts_methods'
|
|
31
|
+
require File.dirname(__FILE__) + '/acts_as_solr/common_methods'
|
|
32
|
+
require File.dirname(__FILE__) + '/acts_as_solr/parser_methods'
|
|
33
|
+
require File.dirname(__FILE__) + '/acts_as_solr/class_methods'
|
|
34
|
+
require File.dirname(__FILE__) + '/acts_as_solr/instance_methods'
|
|
35
|
+
require File.dirname(__FILE__) + '/acts_as_solr/common_methods'
|
|
36
|
+
require File.dirname(__FILE__) + '/acts_as_solr/deprecation'
|
|
37
|
+
require File.dirname(__FILE__) + '/acts_as_solr/search_results'
|
|
38
|
+
require File.dirname(__FILE__) + '/acts_as_solr/lazy_document'
|
|
39
|
+
module ActsAsSolr
|
|
40
|
+
|
|
41
|
+
class Post
|
|
42
|
+
def self.execute(request, core = nil)
|
|
43
|
+
begin
|
|
44
|
+
if File.exists?(RAILS_ROOT+'/config/solr.yml')
|
|
45
|
+
config = YAML::load_file(RAILS_ROOT+'/config/solr.yml')
|
|
46
|
+
url = config[ENV['RAILS_ENV']]['url']
|
|
47
|
+
# for backwards compatibility
|
|
48
|
+
url ||= "http://#{config[ENV['RAILS_ENV']]['host']}:#{config[ENV['RAILS_ENV']]['port']}/#{config[ENV['RAILS_ENV']]['servlet_path']}"
|
|
49
|
+
else
|
|
50
|
+
url = 'http://localhost:8982/solr'
|
|
51
|
+
end
|
|
52
|
+
url += "/" + core if !core.nil?
|
|
53
|
+
connection = Solr::Connection.new(url)
|
|
54
|
+
return connection.send(request)
|
|
55
|
+
rescue
|
|
56
|
+
raise "Couldn't connect to the Solr server at #{url}. #{$!}"
|
|
57
|
+
false
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# reopen ActiveRecord and include the acts_as_solr method
|
|
65
|
+
ActiveRecord::Base.extend ActsAsSolr::ActsMethods
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
# TODO: add a convenience method to POST a Solr .xml file, like Solr's example post.sh
|
|
14
|
+
|
|
15
|
+
class Solr::Connection
|
|
16
|
+
attr_reader :url, :autocommit, :connection
|
|
17
|
+
|
|
18
|
+
# create a connection to a solr instance using the url for the solr
|
|
19
|
+
# application context:
|
|
20
|
+
#
|
|
21
|
+
# conn = Solr::Connection.new("http://example.com:8080/solr")
|
|
22
|
+
#
|
|
23
|
+
# if you would prefer to have all adds/updates autocommitted,
|
|
24
|
+
# use :autocommit => :on
|
|
25
|
+
#
|
|
26
|
+
# conn = Solr::Connection.new('http://example.com:8080/solr',
|
|
27
|
+
# :autocommit => :on)
|
|
28
|
+
|
|
29
|
+
def initialize(url="http://localhost:8983/solr", opts={})
|
|
30
|
+
@url = URI.parse(url)
|
|
31
|
+
unless @url.kind_of? URI::HTTP
|
|
32
|
+
raise "invalid http url: #{url}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# TODO: Autocommit seems nice at one level, but it currently is confusing because
|
|
36
|
+
# only calls to Connection#add/#update/#delete, though a Connection#send(AddDocument.new(...))
|
|
37
|
+
# does not autocommit. Maybe #send should check for the request types that require a commit and
|
|
38
|
+
# commit in #send instead of the individual methods?
|
|
39
|
+
@autocommit = opts[:autocommit] == :on
|
|
40
|
+
|
|
41
|
+
# Not actually opening the connection yet, just setting up the persistent connection.
|
|
42
|
+
@connection = Net::HTTP.new(@url.host, @url.port)
|
|
43
|
+
|
|
44
|
+
@connection.read_timeout = opts[:timeout] if opts[:timeout]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# add a document to the index. you can pass in either a hash
|
|
48
|
+
#
|
|
49
|
+
# conn.add(:id => 123, :title => 'Tlon, Uqbar, Orbis Tertius')
|
|
50
|
+
#
|
|
51
|
+
# or a Solr::Document
|
|
52
|
+
#
|
|
53
|
+
# conn.add(Solr::Document.new(:id => 123, :title = 'On Writing')
|
|
54
|
+
#
|
|
55
|
+
# true/false will be returned to designate success/failure
|
|
56
|
+
|
|
57
|
+
def add(doc)
|
|
58
|
+
request = Solr::Request::AddDocument.new(doc)
|
|
59
|
+
response = send(request)
|
|
60
|
+
commit if @autocommit
|
|
61
|
+
return response.ok?
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# update a document in the index (really just an alias to add)
|
|
65
|
+
|
|
66
|
+
def update(doc)
|
|
67
|
+
return add(doc)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# performs a standard query and returns a Solr::Response::Standard
|
|
71
|
+
#
|
|
72
|
+
# response = conn.query('borges')
|
|
73
|
+
#
|
|
74
|
+
# alternative you can pass in a block and iterate over hits
|
|
75
|
+
#
|
|
76
|
+
# conn.query('borges') do |hit|
|
|
77
|
+
# puts hit
|
|
78
|
+
# end
|
|
79
|
+
#
|
|
80
|
+
# options include:
|
|
81
|
+
#
|
|
82
|
+
# :sort, :default_field, :rows, :filter_queries, :debug_query,
|
|
83
|
+
# :explain_other, :facets, :highlighting, :mlt,
|
|
84
|
+
# :operator => :or / :and
|
|
85
|
+
# :start => defaults to 0
|
|
86
|
+
# :field_list => array, defaults to ["*", "score"]
|
|
87
|
+
|
|
88
|
+
def query(query, options={}, &action)
|
|
89
|
+
# TODO: Shouldn't this return an exception if the Solr status is not ok? (rather than true/false).
|
|
90
|
+
create_and_send_query(Solr::Request::Standard, options.update(:query => query), &action)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# performs a dismax search and returns a Solr::Response::Standard
|
|
94
|
+
#
|
|
95
|
+
# response = conn.search('borges')
|
|
96
|
+
#
|
|
97
|
+
# options are same as query, but also include:
|
|
98
|
+
#
|
|
99
|
+
# :tie_breaker, :query_fields, :minimum_match, :phrase_fields,
|
|
100
|
+
# :phrase_slop, :boost_query, :boost_functions
|
|
101
|
+
|
|
102
|
+
def search(query, options={}, &action)
|
|
103
|
+
create_and_send_query(Solr::Request::Dismax, options.update(:query => query), &action)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# sends a commit message to the server
|
|
107
|
+
def commit(options={})
|
|
108
|
+
response = send(Solr::Request::Commit.new(options))
|
|
109
|
+
return response.ok?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# sends an optimize message to the server
|
|
113
|
+
def optimize
|
|
114
|
+
response = send(Solr::Request::Optimize.new)
|
|
115
|
+
return response.ok?
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# pings the connection and returns true/false if it is alive or not
|
|
119
|
+
def ping
|
|
120
|
+
begin
|
|
121
|
+
response = send(Solr::Request::Ping.new)
|
|
122
|
+
return response.ok?
|
|
123
|
+
rescue
|
|
124
|
+
return false
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# delete a document from the index using the document id
|
|
129
|
+
def delete(document_id)
|
|
130
|
+
response = send(Solr::Request::Delete.new(:id => document_id))
|
|
131
|
+
commit if @autocommit
|
|
132
|
+
response.ok?
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# delete using a query
|
|
136
|
+
def delete_by_query(query)
|
|
137
|
+
response = send(Solr::Request::Delete.new(:query => query))
|
|
138
|
+
commit if @autocommit
|
|
139
|
+
response.ok?
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def info
|
|
143
|
+
send(Solr::Request::IndexInfo.new)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# send a given Solr::Request and return a RubyResponse or XmlResponse
|
|
147
|
+
# depending on the type of request
|
|
148
|
+
def send(request)
|
|
149
|
+
data = post(request)
|
|
150
|
+
Solr::Response::Base.make_response(request, data)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# send the http post request to solr; for convenience there are shortcuts
|
|
154
|
+
# to some requests: add(), query(), commit(), delete() or send()
|
|
155
|
+
def post(request)
|
|
156
|
+
response = @connection.post(@url.path + "/" + request.handler,
|
|
157
|
+
request.to_s,
|
|
158
|
+
{ "Content-Type" => request.content_type })
|
|
159
|
+
|
|
160
|
+
case response
|
|
161
|
+
when Net::HTTPSuccess then response.body
|
|
162
|
+
else
|
|
163
|
+
response.error!
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
private
|
|
169
|
+
|
|
170
|
+
def create_and_send_query(klass, options = {}, &action)
|
|
171
|
+
request = klass.new(options)
|
|
172
|
+
response = send(request)
|
|
173
|
+
return response unless action
|
|
174
|
+
response.each {|hit| action.call(hit)}
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Document
|
|
14
|
+
include Enumerable
|
|
15
|
+
attr_accessor :boost
|
|
16
|
+
attr_reader :fields
|
|
17
|
+
|
|
18
|
+
# Create a new Solr::Document, optionally passing in a hash of
|
|
19
|
+
# key/value pairs for the fields
|
|
20
|
+
#
|
|
21
|
+
# doc = Solr::Document.new(:creator => 'Jorge Luis Borges')
|
|
22
|
+
def initialize(hash={})
|
|
23
|
+
@fields = []
|
|
24
|
+
self << hash
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Append a Solr::Field
|
|
28
|
+
#
|
|
29
|
+
# doc << Solr::Field.new(:creator => 'Jorge Luis Borges')
|
|
30
|
+
#
|
|
31
|
+
# If you are truly lazy you can simply pass in a hash:
|
|
32
|
+
#
|
|
33
|
+
# doc << {:creator => 'Jorge Luis Borges'}
|
|
34
|
+
def <<(fields)
|
|
35
|
+
case fields
|
|
36
|
+
when Hash
|
|
37
|
+
fields.each_pair do |name,value|
|
|
38
|
+
if value.respond_to?(:each) && !value.is_a?(String)
|
|
39
|
+
value.each {|v| @fields << Solr::Field.new(name => v)}
|
|
40
|
+
else
|
|
41
|
+
@fields << Solr::Field.new(name => value)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
when Solr::Field
|
|
45
|
+
@fields << fields
|
|
46
|
+
else
|
|
47
|
+
raise "must pass in Solr::Field or Hash"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# shorthand to allow hash lookups
|
|
52
|
+
# doc['name']
|
|
53
|
+
def [](name)
|
|
54
|
+
field = @fields.find {|f| f.name == name.to_s}
|
|
55
|
+
return field.value if field
|
|
56
|
+
return nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# shorthand to assign as a hash
|
|
60
|
+
def []=(name,value)
|
|
61
|
+
@fields << Solr::Field.new(name => value)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# convert the Document to a REXML::Element
|
|
65
|
+
def to_xml
|
|
66
|
+
e = Solr::XML::Element.new 'doc'
|
|
67
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
68
|
+
@fields.each {|f| e.add_element(f.to_xml)}
|
|
69
|
+
return e
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def each(*args, &blk)
|
|
73
|
+
fields.each(&blk)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Exception < Exception; end
|
data/lib/solr/field.rb
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Field
|
|
14
|
+
VALID_PARAMS = [:boost]
|
|
15
|
+
attr_accessor :name
|
|
16
|
+
attr_accessor :value
|
|
17
|
+
attr_accessor :boost
|
|
18
|
+
|
|
19
|
+
# Accepts an optional <tt>:boost</tt> parameter, used to boost the relevance of a particular field.
|
|
20
|
+
def initialize(params)
|
|
21
|
+
@boost = params[:boost]
|
|
22
|
+
name_key = (params.keys - VALID_PARAMS).first
|
|
23
|
+
@name, @value = name_key.to_s, params[name_key]
|
|
24
|
+
# Convert any Time values into UTC/XML schema format (which Solr requires).
|
|
25
|
+
@value = @value.respond_to?(:utc) ? @value.utc.xmlschema : @value.to_s
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_xml
|
|
29
|
+
e = Solr::XML::Element.new 'field'
|
|
30
|
+
e.attributes['name'] = @name
|
|
31
|
+
e.attributes['boost'] = @boost.to_s if @boost
|
|
32
|
+
e.text = @value
|
|
33
|
+
return e
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Solr::Importer::ArrayMapper < Solr::Importer::Mapper
|
|
16
|
+
# TODO document that initializer takes an array of Mappers [mapper1, mapper2, ... mapperN]
|
|
17
|
+
|
|
18
|
+
# TODO: make merge conflict handling configurable. as is, the last map fields win.
|
|
19
|
+
def map(orig_data_array)
|
|
20
|
+
mapped_data = {}
|
|
21
|
+
orig_data_array.each_with_index do |data,i|
|
|
22
|
+
mapped_data.merge!(@mapping[i].map(data))
|
|
23
|
+
end
|
|
24
|
+
mapped_data
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
# For files with the first line containing field names
|
|
14
|
+
# Currently not designed for enormous files, as all lines are
|
|
15
|
+
# read into an array
|
|
16
|
+
class Solr::Importer::DelimitedFileSource
|
|
17
|
+
include Enumerable
|
|
18
|
+
|
|
19
|
+
def initialize(filename, splitter=/\t/)
|
|
20
|
+
@filename = filename
|
|
21
|
+
@splitter = splitter
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def each
|
|
25
|
+
lines = IO.readlines(@filename)
|
|
26
|
+
headers = lines[0].split(@splitter).collect{|h| h.chomp}
|
|
27
|
+
|
|
28
|
+
lines[1..-1].each do |line|
|
|
29
|
+
data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
|
|
30
|
+
def data.[](key)
|
|
31
|
+
self.assoc(key.to_s)[1]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
yield(data)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require 'hpricot'
|
|
15
|
+
|
|
16
|
+
class Solr::Importer::HpricotMapper < Solr::Importer::Mapper
|
|
17
|
+
def field_data(doc, path)
|
|
18
|
+
doc.search(path.to_s).collect { |e| e.inner_html }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
rescue LoadError => e # If we can't load hpricot
|
|
22
|
+
class Solr::Importer::HpricotMapper
|
|
23
|
+
def initialize(mapping, options={})
|
|
24
|
+
raise "Hpricot not installed."
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Importer::Mapper
|
|
14
|
+
def initialize(mapping, options={})
|
|
15
|
+
@mapping = mapping
|
|
16
|
+
@options = options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def field_data(orig_data, field_name)
|
|
20
|
+
orig_data[field_name]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def mapped_field_value(orig_data, field_mapping)
|
|
24
|
+
case field_mapping
|
|
25
|
+
when String
|
|
26
|
+
field_mapping
|
|
27
|
+
when Proc
|
|
28
|
+
field_mapping.call(orig_data) # TODO pass in more context, like self or a function for field_data, etc
|
|
29
|
+
when Symbol
|
|
30
|
+
field_data(orig_data, @options[:stringify_symbols] ? field_mapping.to_s : field_mapping)
|
|
31
|
+
when Enumerable
|
|
32
|
+
field_mapping.collect {|orig_field_name| mapped_field_value(orig_data, orig_field_name)}.flatten
|
|
33
|
+
else
|
|
34
|
+
raise "Unknown mapping for #{field_mapping}"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def map(orig_data)
|
|
39
|
+
mapped_data = {}
|
|
40
|
+
@mapping.each do |solr_name, field_mapping|
|
|
41
|
+
value = mapped_field_value(orig_data, field_mapping)
|
|
42
|
+
mapped_data[solr_name] = value if value
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
mapped_data
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Importer::SolrSource
|
|
14
|
+
def initialize(solr_url, query, filter_queries=nil, options={})
|
|
15
|
+
@connection = Solr::Connection.new(solr_url)
|
|
16
|
+
@query = query
|
|
17
|
+
@filter_queries = filter_queries
|
|
18
|
+
|
|
19
|
+
@page_size = options[:page_size] || 1000
|
|
20
|
+
@field_list = options[:field_list] || ["*"]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def each
|
|
24
|
+
done = false
|
|
25
|
+
start = 0
|
|
26
|
+
until done do
|
|
27
|
+
# request N documents from a starting point
|
|
28
|
+
request = Solr::Request::Standard.new(:query => @query,
|
|
29
|
+
:rows => @page_size,
|
|
30
|
+
:start => start,
|
|
31
|
+
:field_list => @field_list,
|
|
32
|
+
:filter_queries => @filter_queries)
|
|
33
|
+
response = @connection.send(request)
|
|
34
|
+
response.each do |doc|
|
|
35
|
+
yield doc # TODO: perhaps convert to HashWithIndifferentAccess.new(doc), so stringify_keys isn't necessary
|
|
36
|
+
end
|
|
37
|
+
done = start + @page_size >= response.total_hits
|
|
38
|
+
start = start + @page_size
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require 'xml/libxml'
|
|
15
|
+
|
|
16
|
+
# For files with the first line containing field names
|
|
17
|
+
class Solr::Importer::XPathMapper < Solr::Importer::Mapper
|
|
18
|
+
def field_data(doc, xpath)
|
|
19
|
+
doc.find(xpath.to_s).collect do |node|
|
|
20
|
+
case node
|
|
21
|
+
when XML::Attr
|
|
22
|
+
node.value
|
|
23
|
+
when XML::Node
|
|
24
|
+
node.content
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
rescue LoadError => e # If we can't load libxml
|
|
30
|
+
class Solr::Importer::XPathMapper
|
|
31
|
+
def initialize(mapping, options={})
|
|
32
|
+
raise "libxml not installed"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
module Solr; module Importer; end; end
|
|
14
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/mapper")
|
|
15
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/array_mapper")
|
|
16
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/delimited_file_source")
|
|
17
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/hpricot_mapper")
|
|
18
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/xpath_mapper")
|
|
19
|
+
require File.expand_path("#{File.dirname(__FILE__)}/importer/solr_source")
|
data/lib/solr/indexer.rb
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
|
3
|
+
# the License. You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class Solr::Indexer
|
|
14
|
+
attr_reader :solr
|
|
15
|
+
|
|
16
|
+
# TODO: document options!
|
|
17
|
+
def initialize(data_source, mapper_or_mapping, options={})
|
|
18
|
+
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
|
19
|
+
@solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
|
20
|
+
|
|
21
|
+
@data_source = data_source
|
|
22
|
+
@mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
|
23
|
+
|
|
24
|
+
@buffer_docs = options[:buffer_docs]
|
|
25
|
+
@debug = options[:debug]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def index
|
|
29
|
+
buffer = []
|
|
30
|
+
@data_source.each do |record|
|
|
31
|
+
document = @mapper.map(record)
|
|
32
|
+
|
|
33
|
+
# TODO: check arrity of block, if 3, pass counter as 3rd argument
|
|
34
|
+
yield(record, document) if block_given? # TODO check return of block, if not true then don't index, or perhaps if document.empty?
|
|
35
|
+
|
|
36
|
+
buffer << document
|
|
37
|
+
|
|
38
|
+
if !@buffer_docs || buffer.size == @buffer_docs
|
|
39
|
+
add_docs(buffer)
|
|
40
|
+
buffer.clear
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
add_docs(buffer) if !buffer.empty?
|
|
44
|
+
|
|
45
|
+
@solr.commit unless @debug
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def add_docs(documents)
|
|
49
|
+
@solr.add(documents) unless @debug
|
|
50
|
+
puts documents.inspect if @debug
|
|
51
|
+
end
|
|
52
|
+
end
|