mwmitchell-solr 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +201 -0
- data/README.rdoc +91 -0
- data/Rakefile +72 -0
- data/examples/direct.rb +14 -0
- data/examples/http.rb +7 -0
- data/lib/core_ext.rb +8 -0
- data/lib/solr.rb +36 -0
- data/lib/solr/adapter.rb +7 -0
- data/lib/solr/adapter/common_methods.rb +89 -0
- data/lib/solr/adapter/direct.rb +65 -0
- data/lib/solr/adapter/http.rb +55 -0
- data/lib/solr/connection.rb +7 -0
- data/lib/solr/connection/base.rb +122 -0
- data/lib/solr/connection/search_ext.rb +110 -0
- data/lib/solr/indexer.rb +23 -0
- data/lib/solr/mapper.rb +56 -0
- data/lib/solr/mapper/rss.rb +27 -0
- data/lib/solr/message.rb +69 -0
- data/lib/solr/response.rb +143 -0
- data/test/adapter_common_methods_test.rb +49 -0
- data/test/connection_test_methods.rb +82 -0
- data/test/direct_test.rb +20 -0
- data/test/ext_pagination_test.rb +58 -0
- data/test/ext_search_test.rb +9 -0
- data/test/http_test.rb +13 -0
- data/test/indexer_test.rb +14 -0
- data/test/mapper_test.rb +105 -0
- data/test/message_test.rb +70 -0
- data/test/ruby-lang.org.rss.xml +391 -0
- data/test/test_helpers.rb +31 -0
- metadata +91 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
# This module is for HTTP + DirectSolrConnection (jruby) connections
|
2
|
+
# It provides common methods.
|
3
|
+
# The main query, update and index_info methods are here
|
4
|
+
# The classes that include this module only need to provide a request method like:
|
5
|
+
# send_request(request_path, params, data)
|
6
|
+
# where:
|
7
|
+
# request_path is a string to a handler (/select)
|
8
|
+
# params is a hash for query string params
|
9
|
+
# data is optional string of xml
|
10
|
+
#
|
11
|
+
#
|
12
|
+
module Solr::Adapter::CommonMethods
|
13
|
+
|
14
|
+
# send a request to the "select" handler
|
15
|
+
def query(params)
|
16
|
+
send_request @opts[:select_path], params
|
17
|
+
end
|
18
|
+
|
19
|
+
# sends data to the update handler
|
20
|
+
# data can be:
|
21
|
+
# string (valid solr update xml)
|
22
|
+
# object with respond_to?(:to_xml)
|
23
|
+
# params is a hash with valid solr update params
|
24
|
+
def update(data, params={})
|
25
|
+
send_request @opts[:update_path], params, data
|
26
|
+
end
|
27
|
+
|
28
|
+
# sends a request to the admin luke handler to get info on the index
|
29
|
+
def index_info(params={})
|
30
|
+
params[:numTerms]||=0
|
31
|
+
send_request @opts[:luke_path], params
|
32
|
+
end
|
33
|
+
|
34
|
+
def default_options
|
35
|
+
{
|
36
|
+
:select_path => '/select',
|
37
|
+
:update_path => '/update',
|
38
|
+
:luke_path => '/admin/luke'
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
# send a request to the adapter (allows requests like /admin/luke etc.)
|
43
|
+
def send_request(handler_path, params={}, data=nil)
|
44
|
+
params = map_params(params)
|
45
|
+
@adapter.send_request(handler_path, params, data)
|
46
|
+
end
|
47
|
+
|
48
|
+
# escapes a query key/value for http
|
49
|
+
def escape(s)
|
50
|
+
s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
|
51
|
+
'%'+$1.unpack('H2'*$1.size).join('%').upcase
|
52
|
+
}.tr(' ', '+')
|
53
|
+
end
|
54
|
+
|
55
|
+
def build_param(k,v)
|
56
|
+
"#{escape(k)}=#{escape(v)}"
|
57
|
+
end
|
58
|
+
|
59
|
+
# takes a path and a hash of query params, returns an escaped url with query params
|
60
|
+
def build_url(path, params_hash=nil)
|
61
|
+
query = hash_to_params(params_hash)
|
62
|
+
query ? path + '?' + query : path
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# converts hash into URL query string, keys get an alpha sort
|
67
|
+
# if a value is an array, the array values get mapped to the same key:
|
68
|
+
# hash_to_params(:q=>'blah', 'facet.field'=>['location_facet', 'format_facet'])
|
69
|
+
# returns:
|
70
|
+
# ?q=blah&facet.field=location_facet&facet.field=format.facet
|
71
|
+
#
|
72
|
+
# if a value is empty/nil etc., the key is not added
|
73
|
+
def hash_to_params(params)
|
74
|
+
return unless params.is_a?(Hash)
|
75
|
+
# copy params and convert keys to strings
|
76
|
+
params = params.inject({}){|acc,(k,v)| acc.merge({k.to_s, v}) }
|
77
|
+
# get sorted keys
|
78
|
+
params.keys.sort.inject([]) do |acc,k|
|
79
|
+
v = params[k]
|
80
|
+
if v.is_a?(Array)
|
81
|
+
acc << v.reject{|i|i.to_s.empty?}.collect{|vv|build_param(k, vv)}
|
82
|
+
elsif ! v.to_s.empty?
|
83
|
+
acc.push(build_param(k, v))
|
84
|
+
end
|
85
|
+
acc
|
86
|
+
end.join('&')
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
raise "JRuby Required" unless defined?(JRUBY_VERSION)
|
2
|
+
|
3
|
+
require 'java'
|
4
|
+
|
5
|
+
#
|
6
|
+
# Connection for JRuby + DirectSolrConnection
|
7
|
+
#
|
8
|
+
class Solr::Adapter::Direct
|
9
|
+
|
10
|
+
include Solr::Adapter::CommonMethods
|
11
|
+
|
12
|
+
attr_accessor :opts, :connection, :home_dir
|
13
|
+
|
14
|
+
# required: opts[:home_dir] is absolute path to solr home (the directory with "data", "config" etc.)
|
15
|
+
# opts must also contain either
|
16
|
+
# :dist_dir => 'absolute path to solr distribution root
|
17
|
+
# or
|
18
|
+
# :jar_paths => ['array of directories containing the solr lib/jars']
|
19
|
+
# OTHER OPTS:
|
20
|
+
# :select_path => 'the/select/handler'
|
21
|
+
# :update_path => 'the/update/handler'
|
22
|
+
# If a block is given, the @connection instance (DirectSolrConnection) is yielded
|
23
|
+
def initialize(opts, &block)
|
24
|
+
@home_dir = opts[:home_dir]
|
25
|
+
opts[:data_dir] ||= File.join(@home_dir , 'data')
|
26
|
+
if opts[:dist_dir]
|
27
|
+
# add the standard lib and dist directories to the :jar_paths
|
28
|
+
opts[:jar_paths] = [File.join(opts[:dist_dir], 'lib'), File.join(opts[:dist_dir], 'dist')]
|
29
|
+
end
|
30
|
+
@opts = default_options.merge(opts)
|
31
|
+
require_jars(@opts[:jar_paths])
|
32
|
+
import_dependencies
|
33
|
+
@connection = DirectSolrConnection.new(@home_dir, @opts[:data_dir], nil)
|
34
|
+
yield @connection if block_given?
|
35
|
+
end
|
36
|
+
|
37
|
+
# send a request to the connection
|
38
|
+
# request '/update', :wt=>:xml, '</commit>'
|
39
|
+
def send_request(request_url_path, params={}, data=nil)
|
40
|
+
data = data.to_xml if data.respond_to?(:to_xml)
|
41
|
+
full_path = build_url(request_url_path, params)
|
42
|
+
begin
|
43
|
+
@connection.request(full_path, data)
|
44
|
+
rescue
|
45
|
+
raise Solr::RequestError.new($!.message)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
# do the java import thingy
|
52
|
+
def import_dependencies
|
53
|
+
import org.apache.solr.servlet.DirectSolrConnection
|
54
|
+
end
|
55
|
+
|
56
|
+
# require the jar files
|
57
|
+
def require_jars(paths)
|
58
|
+
paths = [paths] unless paths.is_a?(Array)
|
59
|
+
paths.each do |path|
|
60
|
+
jar_pattern = File.join(path,"**", "*.jar")
|
61
|
+
Dir[jar_pattern].each {|jar_file| require jar_file}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Connection for standard HTTP Solr server
|
5
|
+
#
|
6
|
+
class Solr::Adapter::HTTP
|
7
|
+
|
8
|
+
include Solr::Adapter::CommonMethods
|
9
|
+
|
10
|
+
attr_accessor :opts, :connection, :url
|
11
|
+
|
12
|
+
# opts can have:
|
13
|
+
# :url => 'http://localhost:8080/solr'
|
14
|
+
# :select_path => '/the/url/path/to/the/select/handler'
|
15
|
+
# :update_path => '/the/url/path/to/the/update/handler'
|
16
|
+
# :luke_path => '/admin/luke'
|
17
|
+
#
|
18
|
+
# If a block is given, the @connection (Net::HTTP) instance is yielded
|
19
|
+
def initialize(opts={}, &block)
|
20
|
+
opts[:url]||='http://127.0.0.1:8983/solr'
|
21
|
+
@url = URI.parse(opts[:url])
|
22
|
+
@connection = Net::HTTP.new(@url.host, @url.port)
|
23
|
+
yield @connection if block_given?
|
24
|
+
@opts = default_options.merge(opts)
|
25
|
+
end
|
26
|
+
|
27
|
+
# send a request to the connection
|
28
|
+
# request '/update', :wt=>:xml, '</commit>'
|
29
|
+
def send_request(request_url_path, params={}, data=nil)
|
30
|
+
data = data.to_xml if data.respond_to?(:to_xml)
|
31
|
+
full_path = build_url(@url.path + request_url_path, params)
|
32
|
+
if data
|
33
|
+
response = @connection.post(full_path, data, post_headers)
|
34
|
+
else
|
35
|
+
response = @connection.get(full_path)
|
36
|
+
end
|
37
|
+
unless response.code=='200'
|
38
|
+
raise Solr::RequestError.new(parse_solr_html_error(response.body))
|
39
|
+
end
|
40
|
+
response.body
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
# The standard post headers
|
46
|
+
def post_headers
|
47
|
+
{"Content-Type" => 'text/xml', 'charset'=>'utf-8'}
|
48
|
+
end
|
49
|
+
|
50
|
+
# extracts the message from the solr error response
|
51
|
+
def parse_solr_html_error(html)
|
52
|
+
html.scan(/<pre>(.*)<\/pre>/mi).first.first.gsub(/</, '<').gsub(/>/, '>') rescue html
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
#
|
2
|
+
# Connection adapter decorator
|
3
|
+
#
|
4
|
+
class Solr::Connection::Base
|
5
|
+
|
6
|
+
attr_reader :adapter, :opts
|
7
|
+
|
8
|
+
include Solr::Connection::SearchExt
|
9
|
+
|
10
|
+
# conection is instance of:
|
11
|
+
# Solr::Adapter::HTTP
|
12
|
+
# Solr::Adapter::Direct (jRuby only)
|
13
|
+
def initialize(adapter, opts={})
|
14
|
+
@adapter=adapter
|
15
|
+
opts[:auto_commit]||=false
|
16
|
+
opts[:global_params]||={}
|
17
|
+
default_global_params = {
|
18
|
+
:wt=>:ruby,
|
19
|
+
:echoParams=>'EXPLICIT',
|
20
|
+
:debugQuery=>true
|
21
|
+
}
|
22
|
+
opts[:global_params] = default_global_params.merge(opts[:global_params])
|
23
|
+
@opts=opts
|
24
|
+
end
|
25
|
+
|
26
|
+
# sets default params etc.. - could be used as a mapping hook
|
27
|
+
# type of request should be passed in here? -> map_params(:query, {})
|
28
|
+
def map_params(params)
|
29
|
+
opts[:global_params].dup.merge(params).dup
|
30
|
+
end
|
31
|
+
|
32
|
+
# send request to the select handler
|
33
|
+
# params is hash with valid solr request params (:q, :fl, :qf etc..)
|
34
|
+
# if params[:wt] is not set, the default is :ruby (see opts[:global_params])
|
35
|
+
# if :wt is something other than :ruby, the raw response body is returned
|
36
|
+
# otherwise, an instance of Solr::Response::Query is returned
|
37
|
+
# NOTE: to get raw ruby, use :wt=>'ruby'
|
38
|
+
def query(params)
|
39
|
+
params = map_params(modify_params_for_pagination(params))
|
40
|
+
response = @adapter.query(params)
|
41
|
+
params[:wt]==:ruby ? Solr::Response::Query.new(response) : response
|
42
|
+
end
|
43
|
+
|
44
|
+
# Finds a document by its id
|
45
|
+
def find_by_id(id, params={})
|
46
|
+
params = map_params(params)
|
47
|
+
params[:q] = 'id:"#{id}"'
|
48
|
+
query params
|
49
|
+
end
|
50
|
+
|
51
|
+
def index_info(params={})
|
52
|
+
params = map_params(params)
|
53
|
+
response = @adapter.index_info(params)
|
54
|
+
params[:wt] == :ruby ? Solr::Response::IndexInfo.new(response) : response
|
55
|
+
end
|
56
|
+
|
57
|
+
# if :ruby is the :wt, then Solr::Response::Base is returned
|
58
|
+
# -- there's not really a way to figure out what kind of handler request this is.
|
59
|
+
|
60
|
+
def update(data, params={}, auto_commit=nil)
|
61
|
+
params = map_params(params)
|
62
|
+
response = @adapter.update(data, params)
|
63
|
+
self.commit if auto_commit.nil? ? @opts[:auto_commit]==true : auto_commit
|
64
|
+
params[:wt]==:ruby ? Solr::Response::Update.new(response) : response
|
65
|
+
end
|
66
|
+
|
67
|
+
def add(hash_or_array, opts={}, &block)
|
68
|
+
update message.add(hash_or_array, opts, &block)
|
69
|
+
end
|
70
|
+
|
71
|
+
# send </commit>
|
72
|
+
def commit(opts={})
|
73
|
+
update message.commit, opts, false
|
74
|
+
end
|
75
|
+
|
76
|
+
# send </optimize>
|
77
|
+
def optimize(opts={})
|
78
|
+
update message.optimize, opts
|
79
|
+
end
|
80
|
+
|
81
|
+
# send </rollback>
|
82
|
+
# NOTE: solr 1.4 only
|
83
|
+
def rollback(opts={})
|
84
|
+
update message.rollback, opts
|
85
|
+
end
|
86
|
+
|
87
|
+
# Delete one or many documents by id
|
88
|
+
# solr.delete_by_id 10
|
89
|
+
# solr.delete_by_id([12, 41, 199])
|
90
|
+
def delete_by_id(ids, opts={})
|
91
|
+
update message.delete_by_id(ids), opts
|
92
|
+
end
|
93
|
+
|
94
|
+
# delete one or many documents by query
|
95
|
+
# solr.delete_by_query 'available:0'
|
96
|
+
# solr.delete_by_query ['quantity:0', 'manu:"FQ"']
|
97
|
+
def delete_by_query(queries, opts={})
|
98
|
+
update message.delete_by_query(queries), opts
|
99
|
+
end
|
100
|
+
|
101
|
+
protected
|
102
|
+
|
103
|
+
# shortcut to solr::message
|
104
|
+
def message
|
105
|
+
Solr::Message
|
106
|
+
end
|
107
|
+
|
108
|
+
def modify_params_for_pagination(params)
|
109
|
+
return params unless params[:page]
|
110
|
+
params = params.dup # be nice
|
111
|
+
params[:per_page]||=10
|
112
|
+
params[:rows] = params.delete(:per_page).to_i
|
113
|
+
params[:start] = calculate_start(params.delete(:page).to_i, params[:rows])
|
114
|
+
params
|
115
|
+
end
|
116
|
+
|
117
|
+
def calculate_start(current_page, per_page)
|
118
|
+
page = current_page > 0 ? current_page : 1
|
119
|
+
(page - 1) * per_page
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module Solr::Connection::SearchExt
|
2
|
+
|
3
|
+
def search(query, params={})
|
4
|
+
if params[:fields].is_a?(Array)
|
5
|
+
params[:fl] = params.delete(:fields).join(' ')
|
6
|
+
else
|
7
|
+
params[:fl] = params.delete :fields
|
8
|
+
end
|
9
|
+
fq = build_filters(params.delete(:filters)).join(' ') if params[:filters]
|
10
|
+
if params[:fq] and fq
|
11
|
+
params[:fq] += " AND #{fq}"
|
12
|
+
else
|
13
|
+
params[:fq] = fq
|
14
|
+
end
|
15
|
+
facets = params.delete(:facets) if params[:facets]
|
16
|
+
if facets
|
17
|
+
if facets.is_a?(Array)
|
18
|
+
params << {:facet => true}
|
19
|
+
params += build_facets(facets)
|
20
|
+
elsif facets.is_a?(Hash)
|
21
|
+
params << {:facet => true}
|
22
|
+
params += build_facet(facets)
|
23
|
+
elsif facets.is_a?(String)
|
24
|
+
params += facets
|
25
|
+
else
|
26
|
+
raise 'facets must either be a Hash or an Array'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
params[:qt] ||= :dismax
|
30
|
+
self.query params
|
31
|
+
end
|
32
|
+
|
33
|
+
protected
|
34
|
+
|
35
|
+
# returns the query param
|
36
|
+
def build_query(queries)
|
37
|
+
query_string = ''
|
38
|
+
case queries
|
39
|
+
when String
|
40
|
+
query_string = queries
|
41
|
+
when Array
|
42
|
+
query_string = queries.join(' ')
|
43
|
+
when Hash
|
44
|
+
query_string_array = []
|
45
|
+
queries.each do |k,v|
|
46
|
+
if v.is_a?(Array) # add a filter for each value
|
47
|
+
v.each do |val|
|
48
|
+
query_string_array << "#{k}:#{val}"
|
49
|
+
end
|
50
|
+
elsif v.is_a?(Range)
|
51
|
+
query_string_array << "#{k}:[#{v.min} TO #{v.max}]"
|
52
|
+
else
|
53
|
+
query_string_array << "#{k}:#{v}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
query_string = query_string_array.join(' ')
|
57
|
+
end
|
58
|
+
query_string
|
59
|
+
end
|
60
|
+
|
61
|
+
def build_filters(filters)
|
62
|
+
params = []
|
63
|
+
# handle "ruby-ish" filters
|
64
|
+
case filters
|
65
|
+
when String
|
66
|
+
params << filters
|
67
|
+
when Array
|
68
|
+
filters.each { |f| params << f }
|
69
|
+
when Hash
|
70
|
+
filters.each do |k,v|
|
71
|
+
if v.is_a?(Array) # add a filter for each value
|
72
|
+
v.each do |val|
|
73
|
+
params << "#{k}:#{val}"
|
74
|
+
end
|
75
|
+
elsif v.is_a?(Range)
|
76
|
+
params << "#{k}:[#{v.min} TO #{v.max}]"
|
77
|
+
else
|
78
|
+
params << "#{k}:#{v}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
params
|
83
|
+
end
|
84
|
+
|
85
|
+
def build_facets(facet_array)
|
86
|
+
facet_array.inject([]) do |params, facet_hash|
|
87
|
+
params.push build_facet(facet_hash)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def build_facet(facet_hash)
|
92
|
+
params = []
|
93
|
+
facet_name = facet_hash['name'] || facet_hash[:name]
|
94
|
+
facet_hash.each do |k,v|
|
95
|
+
# handle some cases specially
|
96
|
+
if 'field' == k.to_s
|
97
|
+
params << {"facet.field" => v}
|
98
|
+
elsif 'query' == k.to_s
|
99
|
+
q = build_query("facet.query", v)
|
100
|
+
params << q
|
101
|
+
elsif ['name', :name].include?(k.to_s)
|
102
|
+
# do nothing
|
103
|
+
else
|
104
|
+
params << {"f.#{facet_hash[:field]}.facet.#{k}" => v}
|
105
|
+
end
|
106
|
+
end
|
107
|
+
params
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|