mwmitchell-rsolr 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.txt +41 -0
- data/LICENSE +201 -0
- data/README.rdoc +191 -0
- data/Rakefile +40 -0
- data/examples/direct.rb +20 -0
- data/examples/http.rb +16 -0
- data/lib/core_ext.rb +8 -0
- data/lib/rsolr.rb +34 -0
- data/lib/rsolr/connection.rb +7 -0
- data/lib/rsolr/connection/adapter.rb +7 -0
- data/lib/rsolr/connection/adapter/common_methods.rb +46 -0
- data/lib/rsolr/connection/adapter/direct.rb +80 -0
- data/lib/rsolr/connection/adapter/http.rb +51 -0
- data/lib/rsolr/connection/base.rb +121 -0
- data/lib/rsolr/connection/search_ext.rb +126 -0
- data/lib/rsolr/http_client.rb +115 -0
- data/lib/rsolr/http_client/adapter.rb +6 -0
- data/lib/rsolr/http_client/adapter/curb.rb +51 -0
- data/lib/rsolr/http_client/adapter/net_http.rb +48 -0
- data/lib/rsolr/indexer.rb +23 -0
- data/lib/rsolr/mapper.rb +62 -0
- data/lib/rsolr/mapper/rss.rb +29 -0
- data/lib/rsolr/message.rb +73 -0
- data/lib/rsolr/response.rb +8 -0
- data/lib/rsolr/response/base.rb +33 -0
- data/lib/rsolr/response/index_info.rb +22 -0
- data/lib/rsolr/response/query.rb +170 -0
- data/lib/rsolr/response/update.rb +4 -0
- data/test/connection/direct_test.rb +22 -0
- data/test/connection/http_test.rb +19 -0
- data/test/connection/search_ext_test_methods.rb +17 -0
- data/test/connection/test_methods.rb +122 -0
- data/test/http_client/curb_test.rb +19 -0
- data/test/http_client/net_http_test.rb +13 -0
- data/test/http_client/test_methods.rb +40 -0
- data/test/http_client/util_test.rb +40 -0
- data/test/mapper_test.rb +123 -0
- data/test/message_test.rb +87 -0
- data/test/pagination_test.rb +58 -0
- data/test/ruby-lang.org.rss.xml +391 -0
- data/test/test_helpers.rb +39 -0
- metadata +107 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
#require 'uri'
|
2
|
+
|
3
|
+
# A simple wrapper for different http client implementations.
|
4
|
+
# Supports #get and #post
|
5
|
+
# This was motivated by: http://apocryph.org/2008/11/09/more_indepth_analysis_ruby_http_client_performance/
|
6
|
+
# Net::HTTP is the default adapter
|
7
|
+
|
8
|
+
# Each adapter response should be a hash with the following keys:
|
9
|
+
# :status_code
|
10
|
+
# :url
|
11
|
+
# :body
|
12
|
+
# :path
|
13
|
+
# :params
|
14
|
+
# :data
|
15
|
+
# :headers
|
16
|
+
|
17
|
+
# Example:
|
18
|
+
# hclient = RSolr::HTTPClient.connect('http://www.google.com', :net_http)
|
19
|
+
# response = hclient.get('/search', :hl=>:en, :q=>:ruby, :btnG=>:Search)
|
20
|
+
# puts response[:status_code]
|
21
|
+
# puts response[:body]
|
22
|
+
|
23
|
+
module RSolr::HTTPClient
|
24
|
+
|
25
|
+
autoload :Adapter, 'rsolr/http_client/adapter'
|
26
|
+
|
27
|
+
class UnkownAdapterError < RuntimeError; end
|
28
|
+
|
29
|
+
def self.connect(url, adapter_name=:net_http)
|
30
|
+
case adapter_name
|
31
|
+
when :curb
|
32
|
+
klass = 'Curb'
|
33
|
+
when :net_http
|
34
|
+
klass = 'NetHTTP'
|
35
|
+
else
|
36
|
+
raise UnkownAdapterError.new("Name: #{adapter_name}")
|
37
|
+
end
|
38
|
+
Base.new RSolr::HTTPClient::Adapter.const_get(klass).new(url)
|
39
|
+
end
|
40
|
+
|
41
|
+
class Base
|
42
|
+
|
43
|
+
attr_reader :adapter
|
44
|
+
|
45
|
+
def initialize(adapter)
|
46
|
+
@adapter = adapter
|
47
|
+
end
|
48
|
+
|
49
|
+
def get(path, params={})
|
50
|
+
begin
|
51
|
+
http_context = @adapter.get(path, params)
|
52
|
+
rescue
|
53
|
+
raise RSolr::RequestError.new($!)
|
54
|
+
end
|
55
|
+
http_context
|
56
|
+
end
|
57
|
+
|
58
|
+
def post(path, data, params={}, headers={})
|
59
|
+
begin
|
60
|
+
http_context = @adapter.post(path, data, params, headers)
|
61
|
+
rescue
|
62
|
+
raise RSolr::RequestError.new($!)
|
63
|
+
end
|
64
|
+
http_context
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
module Util
|
70
|
+
|
71
|
+
# escapes a query key/value for http
|
72
|
+
def escape(s)
|
73
|
+
s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
|
74
|
+
'%'+$1.unpack('H2'*$1.size).join('%').upcase
|
75
|
+
}.tr(' ', '+')
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_url(url='', params={}, string_query='')
|
79
|
+
queries = [string_query, hash_to_params(params)]
|
80
|
+
queries.delete_if{|i| i.to_s.empty?}
|
81
|
+
url += "?#{queries.join('&')}" unless queries.empty?
|
82
|
+
url
|
83
|
+
end
|
84
|
+
|
85
|
+
def build_param(k,v)
|
86
|
+
"#{escape(k)}=#{escape(v)}"
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# converts hash into URL query string, keys get an alpha sort
|
91
|
+
# if a value is an array, the array values get mapped to the same key:
|
92
|
+
# hash_to_params(:q=>'blah', 'facet.field'=>['location_facet', 'format_facet'])
|
93
|
+
# returns:
|
94
|
+
# ?q=blah&facet.field=location_facet&facet.field=format.facet
|
95
|
+
#
|
96
|
+
# if a value is empty/nil etc., the key is not added
|
97
|
+
def hash_to_params(params)
|
98
|
+
return unless params.is_a?(Hash)
|
99
|
+
# copy params and convert keys to strings
|
100
|
+
params = params.inject({}){|acc,(k,v)| acc.merge({k.to_s, v}) }
|
101
|
+
# get sorted keys
|
102
|
+
params.keys.sort.inject([]) do |acc,k|
|
103
|
+
v = params[k]
|
104
|
+
if v.is_a?(Array)
|
105
|
+
acc << v.reject{|i|i.to_s.empty?}.collect{|vv|build_param(k, vv)}
|
106
|
+
elsif ! v.to_s.empty?
|
107
|
+
acc.push(build_param(k, v))
|
108
|
+
end
|
109
|
+
acc
|
110
|
+
end.join('&')
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'curb'
|
3
|
+
|
4
|
+
class RSolr::HTTPClient::Adapter::Curb
|
5
|
+
|
6
|
+
include RSolr::HTTPClient::Util
|
7
|
+
|
8
|
+
attr :uri
|
9
|
+
attr :c
|
10
|
+
|
11
|
+
def initialize(url)
|
12
|
+
@uri = URI.parse(url)
|
13
|
+
@c = ::Curl::Easy.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(path, params={})
|
17
|
+
@c.url = _build_url(path, params)
|
18
|
+
@c.multipart_form_post = false
|
19
|
+
@c.perform
|
20
|
+
create_http_context(path, params)
|
21
|
+
end
|
22
|
+
|
23
|
+
def post(path, data, params={}, headers={})
|
24
|
+
@c.url = _build_url(path, params)
|
25
|
+
@c.headers = headers
|
26
|
+
@c.http_post(data)
|
27
|
+
create_http_context(path, params, data, headers)
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
def create_http_context(path, params, data=nil, headers={})
|
33
|
+
{
|
34
|
+
:status_code=>@c.response_code.to_i,
|
35
|
+
:url=>@c.url,
|
36
|
+
:body=>@c.body_str,
|
37
|
+
:path=>path,
|
38
|
+
:params=>params,
|
39
|
+
:data=>data,
|
40
|
+
:headers=>headers
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def _build_url(path, params={})
|
45
|
+
url = @uri.scheme + '://' + @uri.host
|
46
|
+
url += ':' + @uri.port.to_s if @uri.port
|
47
|
+
url += @uri.path + path
|
48
|
+
build_url(url, params, @uri.query)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
class RSolr::HTTPClient::Adapter::NetHTTP
|
4
|
+
|
5
|
+
include RSolr::HTTPClient::Util
|
6
|
+
|
7
|
+
attr :uri
|
8
|
+
attr :c
|
9
|
+
|
10
|
+
def initialize(url)
|
11
|
+
@uri = URI.parse(url)
|
12
|
+
@c = Net::HTTP.new(@uri.host, @uri.port)
|
13
|
+
end
|
14
|
+
|
15
|
+
def get(path, params={})
|
16
|
+
url = _build_url(path, params)
|
17
|
+
net_http_response = @c.get(url)
|
18
|
+
create_http_context(net_http_response, url, path, params)
|
19
|
+
end
|
20
|
+
|
21
|
+
def post(path, data, params={}, headers={})
|
22
|
+
url = _build_url(path, params)
|
23
|
+
net_http_response = @c.post(url, data, headers)
|
24
|
+
create_http_context(net_http_response, url, path, params, data, headers)
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
|
29
|
+
def create_http_context(net_http_response, url, path, params, data=nil, headers={})
|
30
|
+
full_url = "#{@uri.scheme}://#{@uri.host}"
|
31
|
+
full_url += @uri.port ? ":#{@uri.port}" : ''
|
32
|
+
full_url += url
|
33
|
+
{
|
34
|
+
:status_code=>net_http_response.code.to_i,
|
35
|
+
:body=>net_http_response.body,
|
36
|
+
:url=>full_url,
|
37
|
+
:path=>path,
|
38
|
+
:params=>params,
|
39
|
+
:data=>data,
|
40
|
+
:headers=>headers
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def _build_url(path, params={})
|
45
|
+
build_url(@uri.path + path, params, @uri.query)
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class RSolr::Indexer
|
2
|
+
|
3
|
+
attr_reader :solr, :mapper, :opts
|
4
|
+
|
5
|
+
def initialize(solr, mapping_or_mapper, opts={})
|
6
|
+
@solr = solr
|
7
|
+
@mapper = mapping_or_mapper.is_a?(Hash) ? RSolr::Mapper::Base.new(mapping_or_mapper) : mapping_or_mapper
|
8
|
+
@opts = opts
|
9
|
+
end
|
10
|
+
|
11
|
+
# data - the raw data to send into the mapper
|
12
|
+
# params - url query params for solr /update handler
|
13
|
+
# commit - boolean; true==commit after adding, false==no commit after adding
|
14
|
+
# block can be used for modifying the "add", "doc" and "field" xml elements (for boosting etc.)
|
15
|
+
def index(data, params={}, &block)
|
16
|
+
docs = data.collect {|d| @mapper.map(d)}
|
17
|
+
@solr.add(docs, params) do |add, doc, field|
|
18
|
+
# check opts for :debug etc.?
|
19
|
+
yield add, doc, field if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/lib/rsolr/mapper.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module RSolr::Mapper
|
2
|
+
|
3
|
+
autoload :RSS, 'rsolr/mapper/rss'
|
4
|
+
|
5
|
+
class UnkownMappingValue < RuntimeError; end
|
6
|
+
|
7
|
+
class Base
|
8
|
+
|
9
|
+
attr_reader :mapping, :opts
|
10
|
+
|
11
|
+
def initialize(mapping={}, opts={}, &block)
|
12
|
+
@mapping = mapping
|
13
|
+
@opts = opts
|
14
|
+
yield @mapping if block_given?
|
15
|
+
end
|
16
|
+
|
17
|
+
# source - a hash or array of source data
|
18
|
+
# override_mapping - an alternate mapper
|
19
|
+
# returns an array with one or more mapped hashes
|
20
|
+
def map(source, override_mapping=nil, &block)
|
21
|
+
source = [source] if source.is_a?(Hash)
|
22
|
+
mapping = override_mapping || @mapping
|
23
|
+
index = -1
|
24
|
+
# collect a bunch of hashes...
|
25
|
+
source.collect do |src|
|
26
|
+
index += 1
|
27
|
+
# for each mapping item, inject data into a new hash
|
28
|
+
final_hash = mapping.inject({}) do |a_new_hash, (map_key, map_value)|
|
29
|
+
value = mapped_field_value(src, map_value, index)
|
30
|
+
value.to_s.empty? ? a_new_hash : a_new_hash.merge!({map_key=>value})
|
31
|
+
end
|
32
|
+
yield final_hash if block_given?
|
33
|
+
final_hash
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
# This is a hook method useful for subclassing
|
40
|
+
def source_field_value(source, field_name, index)
|
41
|
+
source[field_name]
|
42
|
+
end
|
43
|
+
|
44
|
+
def mapped_field_value(source, mapped_value, index)
|
45
|
+
case mapped_value
|
46
|
+
when String
|
47
|
+
mapped_value
|
48
|
+
when Symbol
|
49
|
+
source_field_value(source, mapped_value, index)
|
50
|
+
when Proc
|
51
|
+
mapped_value.call(source, index)
|
52
|
+
when Enumerable
|
53
|
+
mapped_value.collect {|key| source_field_value(source, key, index)}.flatten
|
54
|
+
else
|
55
|
+
# try to turn it into a string, else raise UnkownMappingValue
|
56
|
+
mapped_value.respond_to?(:to_s) ? mapped_value.to_s : raise(UnkownMappingValue.new(mapped_value))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#TODO - this could use the http wrapper stuff instead of open-uri/net::http
|
2
|
+
|
3
|
+
require 'rss'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
class RSolr::Mapper::RSS < RSolr::Mapper::Base
|
7
|
+
|
8
|
+
attr_reader :rss
|
9
|
+
|
10
|
+
# rss_file_or_url is file path or url (see open-uri)
|
11
|
+
# override_mapping is an alternate mapping (see Solr::Mapper::Base)
|
12
|
+
# returns array of mapped hashes
|
13
|
+
def map(rss_file_or_url, override_mapping=nil)
|
14
|
+
open(rss_file_or_url) do |feed|
|
15
|
+
@rss = RSS::Parser.parse(feed.read, false)
|
16
|
+
super(rss.items.collect, override_mapping)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# sends methods chain down into the @rss object
|
21
|
+
# example: :'channel.title' == @rss.channel.title
|
22
|
+
# if the method chain doesn't exist, the super #source_field_value method is called
|
23
|
+
def source_field_value(source, method_path, index)
|
24
|
+
method_path.to_s.split('.').inject(@rss) do |rss, m|
|
25
|
+
rss.respond_to?(m) ? rss.send(m.to_sym) : super(source, method_path, index)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
|
2
|
+
# http://builder.rubyforge.org/
|
3
|
+
require 'rubygems'
|
4
|
+
require 'builder'
|
5
|
+
|
6
|
+
# The Solr::Message class is the XML generation module for sending updates to Solr.
|
7
|
+
|
8
|
+
class RSolr::Message
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
def xml
|
13
|
+
Builder::XmlMarkup.new
|
14
|
+
end
|
15
|
+
|
16
|
+
# add({})
|
17
|
+
# add([{}, {}])
|
18
|
+
# add(docs) do |doc|
|
19
|
+
# doc.boost = 10.0
|
20
|
+
# end
|
21
|
+
def add(data, opts={}, &block)
|
22
|
+
data = [data] if data.respond_to?(:each_pair) # if it's a hash, put it in an array
|
23
|
+
xml.add(opts) do |add_xml|
|
24
|
+
data.each do |item|
|
25
|
+
add_xml.doc do |doc_xml|
|
26
|
+
# convert keys into strings and perform an alpha sort (easier testing between ruby and jruby)
|
27
|
+
# but probably not great for performance? whatever...
|
28
|
+
sorted_items = item.inject({}) {|acc,(k,v)| acc.merge({k.to_s=>v})}
|
29
|
+
sorted_items.keys.sort.each do |k|
|
30
|
+
doc_attrs = {:name=>k}
|
31
|
+
yield item, doc_attrs if block_given?
|
32
|
+
[sorted_items[k]].flatten.each do |v| # multiValued attributes
|
33
|
+
doc_xml.field(v, doc_attrs)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def commit(opts={})
|
42
|
+
xml.commit(opts)
|
43
|
+
end
|
44
|
+
|
45
|
+
def optimize(opts={})
|
46
|
+
xml.optimize(opts)
|
47
|
+
end
|
48
|
+
|
49
|
+
def rollback
|
50
|
+
xml.rollback
|
51
|
+
end
|
52
|
+
|
53
|
+
def delete_by_id(ids)
|
54
|
+
ids = [ids] unless ids.is_a?(Array)
|
55
|
+
xml.delete do |xml|
|
56
|
+
ids.each do |id|
|
57
|
+
xml.id(id)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def delete_by_query(queries)
|
63
|
+
queries = [queries] unless queries.is_a?(Array)
|
64
|
+
xml.delete do |xml|
|
65
|
+
queries.each do |query|
|
66
|
+
xml.query(query)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# default/base response object
|
2
|
+
# This is where the ruby "eval" happens
|
3
|
+
# So far, all response classes extend this
|
4
|
+
class RSolr::Response::Base
|
5
|
+
|
6
|
+
attr_reader :source
|
7
|
+
|
8
|
+
attr_reader :raw_response, :data, :header, :params, :status, :query_time
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
if data.is_a?(Hash) and data.has_key?(:body)
|
12
|
+
@raw_response = data[:body]
|
13
|
+
@data = Kernel.eval(@raw_response)
|
14
|
+
@source = data
|
15
|
+
else
|
16
|
+
if data.is_a?(String)
|
17
|
+
@raw_response = data
|
18
|
+
@data = Kernel.eval(@raw_response)
|
19
|
+
elsif data.is_a?(Hash)
|
20
|
+
@data = data
|
21
|
+
end
|
22
|
+
end
|
23
|
+
@header = @data['responseHeader']
|
24
|
+
@params = @header['params']
|
25
|
+
@status = @header['status']
|
26
|
+
@query_time = @header['QTime']
|
27
|
+
end
|
28
|
+
|
29
|
+
def ok?
|
30
|
+
self.status==0
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|