rsolr 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ # Must be executed using jruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
3
+
4
+ base = File.expand_path( File.dirname(__FILE__) )
5
+ dist = File.join(base, '..', 'apache-solr')
6
+ home = File.join(dist, 'example', 'solr')
7
+
8
+ solr = RSolr.connect(:direct, :home_dir=>home, :dist_dir=>dist)
9
+
10
+ Dir['../apache-solr/example/exampledocs/*.xml'].each do |xml_file|
11
+ puts "Updating with #{xml_file}"
12
+ solr.update File.read(xml_file)
13
+ end
14
+
15
+ solr.commit
16
+
17
+ puts
18
+
19
+ response = solr.select :q=>'ipod', :fq=>'price:[0 TO 50]', :rows=>2, :start=>0
20
+
21
+ docs = response['response']['docs']
22
+
23
+ docs.each do |doc|
24
+ puts doc['timestamp']
25
+ end
26
+
27
+ solr.delete_by_query('*:*') and solr.commit
28
+
29
+ solr.adapter.close
data/examples/http.rb ADDED
@@ -0,0 +1,25 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
2
+
3
+ # switch out the http adapter from net_http to curb
4
+ solr = RSolr.connect :adapter=>:curb
5
+
6
+ Dir['../apache-solr/example/exampledocs/*.xml'].each do |xml_file|
7
+ puts "Updating with #{xml_file}"
8
+ solr.update File.read(xml_file)
9
+ end
10
+
11
+ solr.commit
12
+
13
+ puts
14
+
15
+ response = solr.select(:q=>'ipod', :fq=>['price:[0 TO 50]'], :rows=>2, :start=>0)
16
+
17
+ puts "URL : #{response.adapter_response[:url]} -> #{response.adapter_response[:status_code]}"
18
+
19
+ puts
20
+
21
+ response['response']['docs'].each do |doc|
22
+ puts doc['timestamp']
23
+ end
24
+
25
+ solr.delete_by_query('*:*') and solr.commit
data/lib/rsolr.rb ADDED
@@ -0,0 +1,66 @@
1
+ # add this directory to the load path if it hasn't already been added
2
+
3
+ require 'rubygems'
4
+
5
+ $: << File.dirname(__FILE__) unless $:.include?(File.dirname(__FILE__))
6
+
7
+ module RSolr
8
+
9
+ VERSION = '0.9.5'
10
+
11
+ autoload :Message, 'rsolr/message'
12
+ autoload :Connection, 'rsolr/connection'
13
+ autoload :HTTPClient, 'rsolr/http_client'
14
+
15
+ # Factory for creating connections.
16
+ # 2 modes of argument operations:
17
+ # 1. first argument is solr-adapter type, second arg is options hash for solr-adapter instance.
18
+ # 2. options hash for solr-adapter only (no adapter type as first arg)
19
+ #
20
+ # Examples:
21
+ # # default http connection
22
+ # RSolr.connect
23
+ # # http connection with custom url
24
+ # RSolr.connect :url=>'http://solr.web100.org'
25
+ # # direct connection
26
+ # RSolr.connect :direct, :home_dir=>'solr', :dist_dir=>'solr-nightly'
27
+ def self.connect(*args)
28
+ type = args.first.is_a?(Symbol) ? args.shift : :http
29
+ opts = args
30
+ type_class = case type
31
+ when :http,nil
32
+ 'HTTP'
33
+ when :direct
34
+ 'Direct'
35
+ else
36
+ raise "Invalid connection type: #{type} - use :http, :direct or leave nil for :http/default"
37
+ end
38
+ adapter_class = RSolr::Connection::Adapter.const_get(type_class)
39
+ adapter = adapter_class.new(*opts)
40
+ RSolr::Connection::Base.new(adapter)
41
+ end
42
+
43
+ # A module that contains string related methods
44
+ module Char
45
+
46
+ # escape - from the solr-ruby library
47
+ # RSolr.escape('asdf')
48
+ # backslash everything that isn't a word character
49
+ def escape(value)
50
+ value.gsub(/(\W)/, '\\\\\1')
51
+ end
52
+
53
+ end
54
+
55
+ # send the escape method into the Connection class ->
56
+ # solr = RSolr.connect
57
+ # solr.escape('asdf')
58
+ RSolr::Connection::Base.send(:include, Char)
59
+
60
+ # bring escape into this module (RSolr) -> RSolr.escape('asdf')
61
+ extend Char
62
+
63
+ # RequestError is a common/generic exception class used by the adapters
64
+ class RequestError < RuntimeError; end
65
+
66
+ end
@@ -0,0 +1,124 @@
1
+ module RSolr::Connection
2
+
3
+ module Adapter
4
+ autoload :Direct, 'rsolr/connection/adapter/direct'
5
+ autoload :HTTP, 'rsolr/connection/adapter/http'
6
+ end
7
+
8
+ class Base
9
+
10
+ attr_reader :adapter
11
+
12
+ # "adapter" is instance of:
13
+ # RSolr::Adapter::HTTP
14
+ # RSolr::Adapter::Direct (jRuby only)
15
+ # or any other class that uses the connection "interface"
16
+ def initialize(adapter)
17
+ @adapter = adapter
18
+ end
19
+
20
+ # Send a request to a request handler using the method name.
21
+ def method_missing(method_name, *args, &blk)
22
+ request("/#{method_name}", *args, &blk)
23
+ end
24
+
25
+ # sends data to the update handler
26
+ # data can be a string of xml, or an object that returns xml from its #to_xml method
27
+ def update(data, params={})
28
+ request '/update', params, data
29
+ end
30
+
31
+ # send request solr
32
+ # params is hash with valid solr request params (:q, :fl, :qf etc..)
33
+ # if params[:wt] is not set, the default is :ruby
34
+ # if :wt is something other than :ruby, the raw response body is used
35
+ # otherwise, a simple Hash is returned
36
+ # NOTE: to get raw ruby, use :wt=>'ruby' <- a string, not a symbol like :ruby
37
+ #
38
+ #
39
+ def request(path, params={}, *extra)
40
+ response = @adapter.request(path, map_params(params), *extra)
41
+ adapt_response(response)
42
+ end
43
+
44
+ #
45
+ # single record:
46
+ # solr.update(:id=>1, :name=>'one')
47
+ #
48
+ # update using an array
49
+ # solr.update([{:id=>1, :name=>'one'}, {:id=>2, :name=>'two'}])
50
+ #
51
+ def add(doc, &block)
52
+ update message.add(doc, &block)
53
+ end
54
+
55
+ # send </commit>
56
+ def commit
57
+ update message.commit
58
+ end
59
+
60
+ # send </optimize>
61
+ def optimize
62
+ update message.optimize
63
+ end
64
+
65
+ # send </rollback>
66
+ # NOTE: solr 1.4 only
67
+ def rollback
68
+ update message.rollback
69
+ end
70
+
71
+ # Delete one or many documents by id
72
+ # solr.delete_by_id 10
73
+ # solr.delete_by_id([12, 41, 199])
74
+ def delete_by_id(id)
75
+ update message.delete_by_id(id)
76
+ end
77
+
78
+ # delete one or many documents by query
79
+ # solr.delete_by_query 'available:0'
80
+ # solr.delete_by_query ['quantity:0', 'manu:"FQ"']
81
+ def delete_by_query(query)
82
+ update message.delete_by_query(query)
83
+ end
84
+
85
+ # shortcut to RSolr::Message::Builder
86
+ def message
87
+ @message ||= RSolr::Message::Builder.new
88
+ end
89
+
90
+ protected
91
+
92
+ # sets default params etc.. - could be used as a mapping hook
93
+ # type of request should be passed in here? -> map_params(:query, {})
94
+ def map_params(params)
95
+ params||={}
96
+ {:wt=>:ruby}.merge(params)
97
+ end
98
+
99
+ # "adapter_response" must be a hash with the following keys:
100
+ # :params - a sub hash of standard solr params
101
+ # : body - the raw response body from the solr server
102
+ # This method will evaluate the :body value if the params[:wt] == :ruby
103
+ # otherwise, the body is returned
104
+ # The return object has a special method attached called #adapter_response
105
+ # This method gives you access to the original response from the adapter,
106
+ # so you can access things like the actual :url sent to solr,
107
+ # the raw :body, original :params and original :data
108
+ def adapt_response(adapter_response)
109
+ data = adapter_response[:body]
110
+ # if the wt is :ruby, evaluate the ruby string response
111
+ if adapter_response[:params][:wt] == :ruby
112
+ data = Kernel.eval(data)
113
+ end
114
+ # attach a method called #adapter_response that returns the original adapter response value
115
+ def data.adapter_response
116
+ @adapter_response
117
+ end
118
+ data.send(:instance_variable_set, '@adapter_response', adapter_response)
119
+ data
120
+ end
121
+
122
+ end
123
+
124
+ end
@@ -0,0 +1,88 @@
1
+ raise "JRuby Required" unless defined?(JRUBY_VERSION)
2
+
3
+ require 'java'
4
+
5
+ #
6
+ # Connection for JRuby + DirectSolrConnection
7
+ #
8
+ class RSolr::Connection::Adapter::Direct
9
+
10
+ include RSolr::HTTPClient::Util
11
+
12
+ attr_accessor :opts
13
+
14
+ # opts can be an instance of org.apache.solr.servlet.DirectSolrConnection
15
+ # if opts is NOT an instance of org.apache.solr.servlet.DirectSolrConnection
16
+ # then...
17
+ # required: opts[:home_dir] is absolute path to solr home (the directory with "data", "config" etc.)
18
+ # opts must also contain either
19
+ # :dist_dir => 'absolute path to solr distribution root
20
+ # or
21
+ # :jar_paths => ['array of directories containing the solr lib/jars']
22
+ # OTHER OPTS:
23
+ # :select_path => 'the/select/handler'
24
+ # :update_path => 'the/update/handler'
25
+ def initialize(opts, &block)
26
+ if defined?(Java::OrgApacheSolrCore::SolrCore) and opts.is_a?(Java::OrgApacheSolrCore::SolrCore)
27
+ @connection = org.apache.solr.servlet.DirectSolrConnection.new(opts)
28
+ elsif defined?(Java::OrgApacheSolrServlet::DirectSolrConnection) and opts.is_a?(Java::OrgApacheSolrServlet::DirectSolrConnection)
29
+ @connection = opts
30
+ else
31
+ opts[:data_dir] ||= File.join(opts[:home_dir].to_s, 'data')
32
+ if opts[:dist_dir] and ! opts[:jar_paths]
33
+ # add the standard lib and dist directories to the :jar_paths
34
+ opts[:jar_paths] = [File.join(opts[:dist_dir], 'lib'), File.join(opts[:dist_dir], 'dist')]
35
+ end
36
+ @opts = opts
37
+ end
38
+ end
39
+
40
+ # loads/imports the java dependencies
41
+ # sets the @connection instance variable if it has not yet been set
42
+ def connection
43
+ @connection ||= (
44
+ require_jars(@opts[:jar_paths]) if @opts[:jar_paths]
45
+ org.apache.solr.servlet.DirectSolrConnection.new(opts[:home_dir], @opts[:data_dir], nil)
46
+ )
47
+ end
48
+
49
+ def close
50
+ if @connection
51
+ @connection.close
52
+ @connection=nil
53
+ end
54
+ end
55
+
56
+ # send a request to the connection
57
+ # request '/select', :q=>'something'
58
+ # request '/update', :wt=>:xml, '</commit>'
59
+ def request(path, params={}, data=nil, opts={})
60
+ data = data.to_xml if data.respond_to?(:to_xml)
61
+ url = build_url(path, params)
62
+ begin
63
+ body = connection.request(url, data)
64
+ rescue
65
+ raise RSolr::RequestError.new($!.message)
66
+ end
67
+ {
68
+ :body=>body,
69
+ :url=>url,
70
+ :path=>path,
71
+ :params=>params,
72
+ :data=>data,
73
+ }
74
+ end
75
+
76
+ protected
77
+
78
+ # require the jar files
79
+ def require_jars(paths)
80
+ paths = [paths] unless paths.is_a?(Array)
81
+ paths.each do |path|
82
+ raise "Invalid jar path: #{path}" unless File.exists?(path)
83
+ jar_pattern = File.join(path,"**", "*.jar")
84
+ Dir[jar_pattern].each {|jar_file| require jar_file }
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # Connection for standard HTTP Solr server
3
+ #
4
+ class RSolr::Connection::Adapter::HTTP
5
+
6
+ include RSolr::HTTPClient::Util
7
+
8
+ attr_reader :opts
9
+
10
+ # opts can have:
11
+ # :url => 'http://localhost:8080/solr'
12
+ def initialize(opts={}, &block)
13
+ opts[:url] ||= 'http://127.0.0.1:8983/solr'
14
+ @opts = opts
15
+ end
16
+
17
+ def connection
18
+ @connection ||= RSolr::HTTPClient.connect(@opts)
19
+ end
20
+
21
+ # send a request to the connection
22
+ # request '/update', :wt=>:xml, '</commit>'
23
+ def request(path, params={}, *extra)
24
+ opts = extra[-1].kind_of?(Hash) ? extra.pop : {}
25
+ data = extra[0]
26
+ # force a POST, use the query string as the POST body
27
+ if opts[:method] == :post and data.to_s.empty?
28
+ http_context = connection.post(path, hash_to_query(params), {}, {'Content-Type' => 'application/x-www-form-urlencoded'})
29
+ else
30
+ if data
31
+ # standard POST, using "data" as the POST body
32
+ http_context = connection.post(path, data, params, {"Content-Type" => 'text/xml; charset=utf-8'})
33
+ else
34
+ # standard GET
35
+ http_context = connection.get(path, params)
36
+ end
37
+ end
38
+ raise RSolr::RequestError.new(http_context[:body]) unless http_context[:status_code] == 200
39
+ http_context
40
+ end
41
+
42
+ end
@@ -0,0 +1,149 @@
1
+ # A simple wrapper for different http client implementations.
2
+ # Supports #get and #post
3
+ # This was motivated by: http://apocryph.org/2008/11/09/more_indepth_analysis_ruby_http_client_performance/
4
+
5
+ # Each adapters' response should be a hash with the following keys:
6
+ # :status_code
7
+ # :url
8
+ # :body
9
+ # :path
10
+ # :params
11
+ # :data
12
+ # :headers
13
+
14
+ # Example:
15
+ # hclient = RSolr::HTTPClient.connect('http://www.google.com')
16
+ # # SAME AS
17
+ # hclient = RSolr::HTTPClient.connect(:net_http, 'http://www.google.com')
18
+ # hclient = RSolr::HTTPClient.connect(:curb, 'http://www.google.com')
19
+ # response = hclient.get('/search', :hl=>:en, :q=>:ruby, :btnG=>:Search)
20
+ # puts response[:status_code]
21
+ # puts response[:body]
22
+
23
+ require 'uri'
24
+
25
+ module RSolr::HTTPClient
26
+
27
+ module Adapter
28
+ autoload :Curb, 'rsolr/http_client/adapter/curb'
29
+ autoload :NetHTTP, 'rsolr/http_client/adapter/net_http'
30
+ end
31
+
32
+ class UnkownAdapterError < RuntimeError
33
+ end
34
+
35
+ class Base
36
+
37
+ attr_reader :adapter
38
+
39
+ # requires an instace of RSolr::HTTPClient::*
40
+ def initialize(adapter)
41
+ @adapter = adapter
42
+ end
43
+
44
+ # sends a GET reqest to the "path" variable
45
+ # an optional hash of "params" can be used,
46
+ # which is later transformed into a GET query string
47
+ def get(path, params={})
48
+ begin
49
+ http_context = @adapter.get(path, params)
50
+ rescue
51
+ raise RSolr::RequestError.new($!)
52
+ end
53
+ http_context
54
+ end
55
+
56
+ # sends a POST request to the "path" variable
57
+ # "data" is required, and must be a string
58
+ # "params" is an optional hash for query string params...
59
+ # "headers" is a hash for setting request header values.
60
+ def post(path, data, params={}, headers={})
61
+ begin
62
+ http_context = @adapter.post(path, data, params, headers)
63
+ rescue
64
+ raise RSolr::RequestError.new($!)
65
+ end
66
+ http_context
67
+ end
68
+
69
+ end
70
+
71
+ # Factory for creating connections.
72
+ # Can specify the connection type by
73
+ # using :net_http or :curb for the first argument.
74
+ # The ending arguments are always used for the connection adapter instance.
75
+ #
76
+ # Examples:
77
+ # # default net_http connection
78
+ # RSolr::HTTPClient.connect :url=>''
79
+ # # SAME AS
80
+ # RSolr::HTTPClient.connect :net_http, :url=>''
81
+ # # curb connection
82
+ # RSolr.connect :curb, :url=>''
83
+ def self.connect(*args)
84
+ type = args.first.is_a?(Symbol) ? args.shift : :net_http
85
+ opts = args
86
+ klass = case type
87
+ when :net_http,nil
88
+ 'NetHTTP'
89
+ when :curb
90
+ 'Curb'
91
+ else
92
+ raise UnkownAdapterError.new("Invalid adapter type: #{type} - use :curb or :net_http or blank for :net_http/default")
93
+ end
94
+ begin
95
+ Base.new Adapter.const_get(klass).new(*args)
96
+ end
97
+ end
98
+
99
+ module Util
100
+
101
+ # Performs URI escaping so that you can construct proper
102
+ # query strings faster. Use this rather than the cgi.rb
103
+ # version since it's faster. (Stolen from Rack).
104
+ def escape(s)
105
+ s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
106
+ '%'+$1.unpack('H2'*$1.size).join('%').upcase
107
+ }.tr(' ', '+')
108
+ end
109
+
110
+ # creates and returns a url as a string
111
+ # "url" is the base url
112
+ # "params" is an optional hash of GET style query params
113
+ # "string_query" is an extra query string that will be appended to the
114
+ # result of "url" and "params".
115
+ def build_url(url='', params={}, string_query='')
116
+ queries = [string_query, hash_to_query(params)]
117
+ queries.delete_if{|i| i.to_s.empty?}
118
+ url += "?#{queries.join('&')}" unless queries.empty?
119
+ url
120
+ end
121
+
122
+ # converts a key value pair to an escaped string:
123
+ # Example:
124
+ # build_param(:id, 1) == "id=1"
125
+ def build_param(k,v)
126
+ "#{escape(k)}=#{escape(v)}"
127
+ end
128
+
129
+ #
130
+ # converts hash into URL query string, keys get an alpha sort
131
+ # if a value is an array, the array values get mapped to the same key:
132
+ # hash_to_query(:q=>'blah', :fq=>['blah', 'blah'], :facet=>{:field=>['location_facet', 'format_facet']})
133
+ # returns:
134
+ # ?q=blah&fq=blah&fq=blah&facet.field=location_facet&facet.field=format.facet
135
+ #
136
+ # if a value is empty/nil etc., the key is not added
137
+ def hash_to_query(params)
138
+ params.map { |k, v|
139
+ if v.class == Array
140
+ hash_to_query(v.map { |x| [k, x] })
141
+ else
142
+ build_param k, v
143
+ end
144
+ }.join("&")
145
+ end
146
+
147
+ end
148
+
149
+ end