rsolr 0.12.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,78 @@
1
- require 'uri'
2
-
3
- module RSolr::Connection
4
-
5
- autoload :NetHttp, 'rsolr/connection/net_http'
6
- autoload :Utils, 'rsolr/connection/utils'
7
- autoload :Requestable, 'rsolr/connection/requestable'
8
-
9
- end
1
+ require 'net/http'
2
+ require 'net/https'
3
+
4
+ # The default/Net::Http adapter for RSolr.
5
+ class RSolr::Connection
6
+
7
+ # using the request_context hash,
8
+ # send a request,
9
+ # then return the standard rsolr response hash {:status, :body, :headers}
10
+ def execute client, request_context
11
+ h = http request_context[:uri], request_context[:proxy]
12
+ request = setup_raw_request request_context
13
+ request.body = request_context[:data] if request_context[:method] == :post and request_context[:data]
14
+ begin
15
+ response = h.request request
16
+ charset = response.type_params["charset"]
17
+ {:status => response.code.to_i, :headers => response.to_hash, :body => force_charset(response.body, charset)}
18
+ # catch the undefined closed? exception -- this is a confirmed ruby bug
19
+ rescue NoMethodError
20
+ $!.message == "undefined method `closed?' for nil:NilClass" ?
21
+ raise(Errno::ECONNREFUSED.new) :
22
+ raise($!)
23
+ end
24
+ end
25
+
26
+ protected
27
+
28
+ # This returns a singleton of a Net::HTTP or Net::HTTP.Proxy request object.
29
+ def http uri, proxy = nil
30
+ @http ||= (
31
+ http = if proxy
32
+ proxy_user, proxy_pass = proxy.userinfo.split(/:/) if proxy.userinfo
33
+ Net::HTTP.Proxy(proxy.host, proxy.port, proxy_user, proxy_pass).new uri.host, uri.port
34
+ else
35
+ Net::HTTP.new uri.host, uri.port
36
+ end
37
+ http.use_ssl = uri.port == 443 || uri.instance_of?(URI::HTTPS)
38
+ http
39
+ )
40
+ end
41
+
42
+ #
43
+ def setup_raw_request request_context
44
+ http_method = case request_context[:method]
45
+ when :get
46
+ Net::HTTP::Get
47
+ when :post
48
+ #require 'net/http/post/multipart'
49
+ #File === request_context[:data] ? Net::HTTP::Post::Multipart : Net::HTTP::Post
50
+ Net::HTTP::Post
51
+ when :head
52
+ Net::HTTP::Head
53
+ else
54
+ raise "Only :get, :post and :head http method types are allowed."
55
+ end
56
+ headers = request_context[:headers] || {}
57
+ # if http_method.to_s == "Net::HTTP::Post::Multipart"
58
+ # io = request_context[:data]
59
+ # UploadIO.convert! io, request_context[:headers]["Content-Type"], io.path, io.path
60
+ # raw_request =
61
+ # Net::HTTP::Post::Multipart.new(
62
+ # request_context[:path],
63
+ # :file => io)
64
+ # else
65
+ raw_request = http_method.new request_context[:uri].request_uri
66
+ # end
67
+ raw_request.initialize_http_header headers
68
+ raw_request
69
+ end
70
+
71
+ private
72
+
73
+ def force_charset body, charset
74
+ return body unless charset and body.respond_to?(:force_encoding)
75
+ body.force_encoding(charset)
76
+ end
77
+
78
+ end
@@ -0,0 +1,117 @@
1
+ module RSolr::Error
2
+
3
+ module SolrContext
4
+
5
+ attr_accessor :request, :response
6
+
7
+ def to_s
8
+ m = "#{super.to_s}"
9
+ if response
10
+ m << " - #{response[:status]} #{Http::STATUS_CODES[response[:status].to_i]}"
11
+ details = parse_solr_error_response response[:body]
12
+ m << "\nError: #{details}\n" if details
13
+ end
14
+ p = "\nURI: #{request[:uri].to_s}"
15
+ p = "\nRequest Headers: #{request[:headers].inspect}" if request[:headers]
16
+ p = "\nRequest Data: #{request[:data].inspect}" if request[:data]
17
+ p << "\n"
18
+ p << "\nBacktrace: " + self.backtrace[0..10].join("\n")
19
+ m << p
20
+ m
21
+ end
22
+
23
+ protected
24
+
25
+ def parse_solr_error_response body
26
+ begin
27
+ info = body.scan(/<pre>(.*)<\/pre>/mi)[0]
28
+ info = info.join if info.respond_to? :join
29
+
30
+ info ||= body # body may not contain <pre> elements
31
+
32
+ partial = info.to_s.split("\n")[0..10]
33
+ partial.join("\n").gsub("&gt;", ">").gsub("&lt;", "<")
34
+ rescue
35
+ nil
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ class Http < RuntimeError
42
+
43
+ include SolrContext
44
+
45
+ # ripped right from ActionPack
46
+ # Defines the standard HTTP status codes, by integer, with their
47
+ # corresponding default message texts.
48
+ # Source: http://www.iana.org/assignments/http-status-codes
49
+ STATUS_CODES = {
50
+ 100 => "Continue",
51
+ 101 => "Switching Protocols",
52
+ 102 => "Processing",
53
+
54
+ 200 => "OK",
55
+ 201 => "Created",
56
+ 202 => "Accepted",
57
+ 203 => "Non-Authoritative Information",
58
+ 204 => "No Content",
59
+ 205 => "Reset Content",
60
+ 206 => "Partial Content",
61
+ 207 => "Multi-Status",
62
+ 226 => "IM Used",
63
+
64
+ 300 => "Multiple Choices",
65
+ 301 => "Moved Permanently",
66
+ 302 => "Found",
67
+ 303 => "See Other",
68
+ 304 => "Not Modified",
69
+ 305 => "Use Proxy",
70
+ 307 => "Temporary Redirect",
71
+
72
+ 400 => "Bad Request",
73
+ 401 => "Unauthorized",
74
+ 402 => "Payment Required",
75
+ 403 => "Forbidden",
76
+ 404 => "Not Found",
77
+ 405 => "Method Not Allowed",
78
+ 406 => "Not Acceptable",
79
+ 407 => "Proxy Authentication Required",
80
+ 408 => "Request Timeout",
81
+ 409 => "Conflict",
82
+ 410 => "Gone",
83
+ 411 => "Length Required",
84
+ 412 => "Precondition Failed",
85
+ 413 => "Request Entity Too Large",
86
+ 414 => "Request-URI Too Long",
87
+ 415 => "Unsupported Media Type",
88
+ 416 => "Requested Range Not Satisfiable",
89
+ 417 => "Expectation Failed",
90
+ 422 => "Unprocessable Entity",
91
+ 423 => "Locked",
92
+ 424 => "Failed Dependency",
93
+ 426 => "Upgrade Required",
94
+
95
+ 500 => "Internal Server Error",
96
+ 501 => "Not Implemented",
97
+ 502 => "Bad Gateway",
98
+ 503 => "Service Unavailable",
99
+ 504 => "Gateway Timeout",
100
+ 505 => "HTTP Version Not Supported",
101
+ 507 => "Insufficient Storage",
102
+ 510 => "Not Extended"
103
+ }
104
+
105
+ def initialize request, response
106
+ @request, @response = request, response
107
+ end
108
+
109
+ end
110
+
111
+ # Thrown if the :wt is :ruby
112
+ # but the body wasn't succesfully parsed/evaluated
113
+ class InvalidRubyResponse < Http
114
+
115
+ end
116
+
117
+ end
@@ -0,0 +1,51 @@
1
+ module RSolr::Response
2
+
3
+ def self.extended base
4
+ if base["response"] && base["response"]["docs"]
5
+ base["response"]["docs"].tap do |d|
6
+ d.extend PaginatedDocSet
7
+ d.per_page = base.request[:params]["rows"]
8
+ d.start = base.request[:params]["start"]
9
+ d.total = base["response"]["numFound"].to_s.to_i
10
+ end
11
+ end
12
+ end
13
+
14
+ # A response module which gets mixed into the solr ["response"]["docs"] array.
15
+ module PaginatedDocSet
16
+
17
+ attr_accessor :start, :per_page, :total
18
+
19
+ # Returns the current page calculated from 'rows' and 'start'
20
+ def current_page
21
+ return 1 if start < 1
22
+ per_page_normalized = per_page < 1 ? 1 : per_page
23
+ @current_page ||= (start / per_page_normalized).ceil + 1
24
+ end
25
+
26
+ # Calcuates the total pages from 'numFound' and 'rows'
27
+ def total_pages
28
+ @total_pages ||= per_page > 0 ? (total / per_page.to_f).ceil : 1
29
+ end
30
+
31
+ # returns the previous page number or 1
32
+ def previous_page
33
+ @previous_page ||= (current_page > 1) ? current_page - 1 : 1
34
+ end
35
+
36
+ # returns the next page number or the last
37
+ def next_page
38
+ @next_page ||= (current_page == total_pages) ? total_pages : current_page+1
39
+ end
40
+
41
+ def has_next?
42
+ current_page < total_pages
43
+ end
44
+
45
+ def has_previous?
46
+ current_page > 1
47
+ end
48
+
49
+ end
50
+
51
+ end
data/lib/rsolr/uri.rb ADDED
@@ -0,0 +1,58 @@
1
+ require 'uri'
2
+
3
+ module RSolr::Uri
4
+
5
+ def create url
6
+ ::URI.parse url[-1] == ?/ ? url : "#{url}/"
7
+ end
8
+
9
+ # Returns a query string param pair as a string.
10
+ # Both key and value are escaped.
11
+ def build_param(k,v, escape = true)
12
+ escape ?
13
+ "#{escape_query_value(k)}=#{escape_query_value(v)}" :
14
+ "#{k}=#{v}"
15
+ end
16
+
17
+ # Return the bytesize of String; uses String#size under Ruby 1.8 and
18
+ # String#bytesize under 1.9.
19
+ if ''.respond_to?(:bytesize)
20
+ def bytesize(string)
21
+ string.bytesize
22
+ end
23
+ else
24
+ def bytesize(string)
25
+ string.size
26
+ end
27
+ end
28
+
29
+ # Creates a Solr based query string.
30
+ # Keys that have arrays values are set multiple times:
31
+ # params_to_solr(:q => 'query', :fq => ['a', 'b'])
32
+ # is converted to:
33
+ # ?q=query&fq=a&fq=b
34
+ def params_to_solr(params, escape = true)
35
+ mapped = params.map do |k, v|
36
+ next if v.to_s.empty?
37
+ if v.class == Array
38
+ params_to_solr(v.map { |x| [k, x] }, escape)
39
+ else
40
+ build_param k, v, escape
41
+ end
42
+ end
43
+ mapped.compact.join("&")
44
+ end
45
+
46
+ # Performs URI escaping so that you can construct proper
47
+ # query strings faster. Use this rather than the cgi.rb
48
+ # version since it's faster.
49
+ # (Stolen from Rack).
50
+ def escape_query_value(s)
51
+ s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/u) {
52
+ '%'+$1.unpack('H2'*bytesize($1)).join('%').upcase
53
+ }.tr(' ', '+')
54
+ end
55
+
56
+ extend self
57
+
58
+ end
data/lib/rsolr/xml.rb ADDED
@@ -0,0 +1,165 @@
1
+ require 'builder'
2
+
3
+ module RSolr::Xml
4
+
5
+ class Document
6
+
7
+ # "attrs" is a hash for setting the "doc" xml attributes
8
+ # "fields" is an array of Field objects
9
+ attr_accessor :attrs, :fields
10
+
11
+ # "doc_hash" must be a Hash/Mash object
12
+ # If a value in the "doc_hash" is an array,
13
+ # a field object is created for each value...
14
+ def initialize(doc_hash = {})
15
+ @fields = []
16
+ doc_hash.each_pair do |field,values|
17
+ # create a new field for each value (multi-valued)
18
+ # put non-array values into an array
19
+ values = [values] unless values.is_a?(Array)
20
+ values.each do |v|
21
+ next if v.to_s.empty?
22
+ @fields << RSolr::Xml::Field.new({:name=>field}, v.to_s)
23
+ end
24
+ end
25
+ @attrs={}
26
+ end
27
+
28
+ # returns an array of fields that match the "name" arg
29
+ def fields_by_name(name)
30
+ @fields.select{|f|f.name==name}
31
+ end
32
+
33
+ # returns the *first* field that matches the "name" arg
34
+ def field_by_name(name)
35
+ @fields.detect{|f|f.name==name}
36
+ end
37
+
38
+ #
39
+ # Add a field value to the document. Options map directly to
40
+ # XML attributes in the Solr <field> node.
41
+ # See http://wiki.apache.org/solr/UpdateXmlMessages#head-8315b8028923d028950ff750a57ee22cbf7977c6
42
+ #
43
+ # === Example:
44
+ #
45
+ # document.add_field('title', 'A Title', :boost => 2.0)
46
+ #
47
+ def add_field(name, value, options = {})
48
+ @fields << RSolr::Xml::Field.new(options.merge({:name=>name}), value)
49
+ end
50
+
51
+ end
52
+
53
+ class Field
54
+
55
+ # "attrs" is a hash for setting the "doc" xml attributes
56
+ # "value" is the text value for the node
57
+ attr_accessor :attrs, :value
58
+
59
+ # "attrs" must be a hash
60
+ # "value" should be something that responds to #_to_s
61
+ def initialize(attrs, value)
62
+ @attrs = attrs
63
+ @value = value
64
+ end
65
+
66
+ # the value of the "name" attribute
67
+ def name
68
+ @attrs[:name]
69
+ end
70
+
71
+ end
72
+
73
+ class Generator
74
+
75
+ def build &block
76
+ b = ::Builder::XmlMarkup.new(:indent => 0, :margin => 0, :encoding => 'UTF-8')
77
+ b.instruct!
78
+ block_given? ? yield(b) : b
79
+ end
80
+
81
+ # generates "add" xml for updating solr
82
+ # "data" can be a hash or an array of hashes.
83
+ # - each hash should be a simple key=>value pair representing a solr doc.
84
+ # If a value is an array, multiple fields will be created.
85
+ #
86
+ # "add_attrs" can be a hash for setting the add xml element attributes.
87
+ #
88
+ # This method can also accept a block.
89
+ # The value yielded to the block is a Message::Document; for each solr doc in "data".
90
+ # You can set xml element attributes for each "doc" element or individual "field" elements.
91
+ #
92
+ # For example:
93
+ #
94
+ # solr.add({:id=>1, :nickname=>'Tim'}, {:boost=>5.0, :commitWithin=>1.0}) do |doc_msg|
95
+ # doc_msg.attrs[:boost] = 10.00 # boost the document
96
+ # nickname = doc_msg.field_by_name(:nickname)
97
+ # nickname.attrs[:boost] = 20 if nickname.value=='Tim' # boost a field
98
+ # end
99
+ #
100
+ # would result in an add element having the attributes boost="10.0"
101
+ # and a commitWithin="1.0".
102
+ # Each doc element would have a boost="10.0".
103
+ # The "nickname" field would have a boost="20.0"
104
+ # if the doc had a "nickname" field with the value of "Tim".
105
+ #
106
+ def add data, add_attrs = nil, &block
107
+ add_attrs ||= {}
108
+ data = [data] unless data.is_a?(Array)
109
+ build do |xml|
110
+ xml.add(add_attrs) do |add_node|
111
+ data.each do |doc|
112
+ doc = RSolr::Xml::Document.new(doc) if doc.respond_to?(:each_pair)
113
+ yield doc if block_given?
114
+ add_node.doc(doc.attrs) do |doc_node|
115
+ doc.fields.each do |field_obj|
116
+ doc_node.field field_obj.value, field_obj.attrs
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ # generates a <commit/> message
125
+ def commit opts = nil
126
+ opts ||= {}
127
+ build {|xml| xml.commit(opts) }
128
+ end
129
+
130
+ # generates a <optimize/> message
131
+ def optimize opts = nil
132
+ opts ||= {}
133
+ build {|xml| xml.optimize(opts) }
134
+ end
135
+
136
+ # generates a <rollback/> message
137
+ def rollback
138
+ build {|xml| xml.rollback({}) }
139
+ end
140
+
141
+ # generates a <delete><id>ID</id></delete> message
142
+ # "ids" can be a single value or array of values
143
+ def delete_by_id ids
144
+ ids = [ids] unless ids.is_a?(Array)
145
+ build do |xml|
146
+ xml.delete do |delete_node|
147
+ ids.each { |id| delete_node.id(id) }
148
+ end
149
+ end
150
+ end
151
+
152
+ # generates a <delete><query>ID</query></delete> message
153
+ # "queries" can be a single value or an array of values
154
+ def delete_by_query(queries)
155
+ queries = [queries] unless queries.is_a?(Array)
156
+ build do |xml|
157
+ xml.delete do |delete_node|
158
+ queries.each { |query| delete_node.query(query) }
159
+ end
160
+ end
161
+ end
162
+
163
+ end
164
+
165
+ end
@@ -0,0 +1,111 @@
1
+ require 'java'
2
+ require 'rubygems'
3
+ require 'rsolr'
4
+
5
+ #
6
+ # Connection for JRuby + DirectSolrConnection
7
+ #
8
+ module RSolr::Direct
9
+
10
+ # load the java libs that ship with rsolr-direct
11
+ # RSolr.load_java_libs
12
+ # rsolr = RSolr.connect :direct, :solr_home => ''
13
+ def self.load_java_libs apache_solr_dir
14
+ @java_libs_loaded ||= (
15
+ base_dir = File.expand_path(apache_solr_dir)
16
+ ['lib', 'dist'].each do |sub|
17
+ Dir[File.join(base_dir, sub, '*.jar')].each do |jar|
18
+ require jar
19
+ end
20
+ end
21
+ true
22
+ )
23
+ end
24
+
25
+ RSolr.class_eval do
26
+ # RSolr.direct_connect :solr_home => 'apache-solr/example/solr'
27
+ # RSolr.direct_connect java_solr_core
28
+ # RSolr.direct_connect java_direct_solr_connection
29
+ def self.direct_connect *args, &blk
30
+ client = RSolr::Client.new RSolr::Direct::Connection.new(*args), {:url => false}
31
+ if block_given?
32
+ yield client
33
+ client.connection.close
34
+ nil
35
+ else
36
+ client
37
+ end
38
+ end
39
+ end
40
+
41
+ class Connection
42
+
43
+ attr_accessor :opts
44
+
45
+ class MissingRequiredJavaLibs < RuntimeError
46
+ end
47
+
48
+ class InvalidSolrHome < RuntimeError
49
+ end
50
+
51
+ # opts can be an instance of org.apache.solr.servlet.DirectSolrConnection
52
+ # if opts is NOT an instance of org.apache.solr.servlet.DirectSolrConnection
53
+ # then...
54
+ # required: opts[:solr_home] is absolute path to solr home (the directory with "data", "config" etc.)
55
+ def initialize opts
56
+ begin
57
+ org.apache.solr.servlet.DirectSolrConnection
58
+ rescue NameError
59
+ raise MissingRequiredJavaLibs
60
+ end
61
+ if opts.is_a?(Hash) and opts[:solr_home]
62
+ raise InvalidSolrHome unless File.exists?(opts[:solr_home])
63
+ opts[:data_dir] ||= File.join(opts[:solr_home], 'data')
64
+ @opts = opts
65
+ elsif opts.class.to_s == "Java::OrgApacheSolrCore::SolrCore"
66
+ @direct = org.apache.solr.servlet.DirectSolrConnection.new(opts)
67
+ elsif opts.class.to_s == "Java::OrgApacheSolrServlet::DirectSolrConnection"
68
+ @direct = opts
69
+ end
70
+ opts[:auto_connect] = true unless opts.key?(:auto_connect)
71
+ self.direct if opts[:auto_connect]
72
+ end
73
+
74
+ # sets the @direct instance variable if it has not yet been set
75
+ def direct
76
+ @direct ||= org.apache.solr.servlet.DirectSolrConnection.new(opts[:solr_home], @opts[:data_dir], nil)
77
+ end
78
+
79
+ # rsolr.connection.open
80
+ alias_method :open, :direct
81
+
82
+ def close
83
+ if @direct
84
+ @direct.close
85
+ @direct = nil
86
+ end
87
+ end
88
+
89
+ # send a request to the connection
90
+ def execute client, request_context
91
+ #data = request_context[:data]
92
+ #data = data.to_xml if data.respond_to?(:to_xml)
93
+ url = [request_context[:path], request_context[:query]].join("?")
94
+ url = "/" + url unless url[0].chr == "/"
95
+ begin
96
+ body = direct.request(url, request_context[:data])
97
+ rescue
98
+ $!.extend RSolr::Error::SolrContext
99
+ $!.request = request_context
100
+ raise $!
101
+ end
102
+ {
103
+ :status => 200,
104
+ :body => body,
105
+ :headers => {}
106
+ }
107
+ end
108
+
109
+ end
110
+
111
+ end
data/lib/rsolr.rb CHANGED
@@ -1,40 +1,24 @@
1
+ $: << "#{File.dirname(__FILE__)}" unless $:.include? File.dirname(__FILE__)
1
2
 
2
3
  require 'rubygems'
3
- $:.unshift File.dirname(__FILE__) unless $:.include?(File.dirname(__FILE__))
4
4
 
5
5
  module RSolr
6
6
 
7
- VERSION = '0.12.0'
7
+ %W(Response Char Client Error Connection Uri Xml).each{|n|autoload n.to_sym, "rsolr/#{n.downcase}"}
8
8
 
9
- autoload :Message, 'rsolr/message'
10
- autoload :Client, 'rsolr/client'
11
- autoload :Connection, 'rsolr/connection'
12
-
13
- def self.connect opts={}
14
- Client.new Connection::NetHttp.new(opts)
9
+ def self.version
10
+ @version ||= File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp
15
11
  end
16
12
 
17
- # A module that contains string related methods
18
- module Char
19
-
20
- # escape - from the solr-ruby library
21
- # RSolr.escape('asdf')
22
- # backslash everything that isn't a word character
23
- def escape(value)
24
- value.gsub(/(\W)/, '\\\\\1')
25
- end
26
-
27
- end
13
+ VERSION = self.version
28
14
 
29
- # send the escape method into the Connection class ->
30
- # solr = RSolr.connect
31
- # solr.escape('asdf')
32
- RSolr::Client.send(:include, Char)
15
+ def self.connect *args
16
+ driver = Class === args[0] ? args[0] : RSolr::Connection
17
+ opts = Hash === args[-1] ? args[-1] : {}
18
+ Client.new driver.new, opts
19
+ end
33
20
 
34
- # bring escape into this module (RSolr) -> RSolr.escape('asdf')
21
+ # RSolr.escape
35
22
  extend Char
36
23
 
37
- # RequestError is a common/generic exception class used by the adapters
38
- class RequestError < RuntimeError; end
39
-
40
24
  end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ describe "RSolr::Char" do
3
+
4
+ let(:char){Object.new.extend RSolr::Char}
5
+
6
+ it 'should escape everything that is not a word with \\' do
7
+ (0..255).each do |ascii|
8
+ chr = ascii.chr
9
+ esc = char.escape(chr)
10
+ if chr =~ /\W/
11
+ esc.to_s.should == "\\#{chr}"
12
+ else
13
+ esc.to_s.should == chr
14
+ end
15
+ end
16
+ end
17
+
18
+ end