mwmitchell-solr 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ # This module is for HTTP + DirectSolrConnection (jruby) connections
2
+ # It provides common methods.
3
+ # The main query, update and index_info methods are here
4
+ # The classes that include this module only need to provide a request method like:
5
+ # send_request(request_path, params, data)
6
+ # where:
7
+ # request_path is a string to a handler (/select)
8
+ # params is a hash for query string params
9
+ # data is optional string of xml
10
+ #
11
+ #
12
+ module Solr::Adapter::CommonMethods
13
+
14
+ # send a request to the "select" handler
15
+ def query(params)
16
+ send_request @opts[:select_path], params
17
+ end
18
+
19
+ # sends data to the update handler
20
+ # data can be:
21
+ # string (valid solr update xml)
22
+ # object with respond_to?(:to_xml)
23
+ # params is a hash with valid solr update params
24
+ def update(data, params={})
25
+ send_request @opts[:update_path], params, data
26
+ end
27
+
28
+ # sends a request to the admin luke handler to get info on the index
29
+ def index_info(params={})
30
+ params[:numTerms]||=0
31
+ send_request @opts[:luke_path], params
32
+ end
33
+
34
+ def default_options
35
+ {
36
+ :select_path => '/select',
37
+ :update_path => '/update',
38
+ :luke_path => '/admin/luke'
39
+ }
40
+ end
41
+
42
+ # send a request to the adapter (allows requests like /admin/luke etc.)
43
+ def send_request(handler_path, params={}, data=nil)
44
+ params = map_params(params)
45
+ @adapter.send_request(handler_path, params, data)
46
+ end
47
+
48
+ # escapes a query key/value for http
49
+ def escape(s)
50
+ s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
51
+ '%'+$1.unpack('H2'*$1.size).join('%').upcase
52
+ }.tr(' ', '+')
53
+ end
54
+
55
+ def build_param(k,v)
56
+ "#{escape(k)}=#{escape(v)}"
57
+ end
58
+
59
+ # takes a path and a hash of query params, returns an escaped url with query params
60
+ def build_url(path, params_hash=nil)
61
+ query = hash_to_params(params_hash)
62
+ query ? path + '?' + query : path
63
+ end
64
+
65
+ #
66
+ # converts hash into URL query string, keys get an alpha sort
67
+ # if a value is an array, the array values get mapped to the same key:
68
+ # hash_to_params(:q=>'blah', 'facet.field'=>['location_facet', 'format_facet'])
69
+ # returns:
70
+ # ?q=blah&facet.field=location_facet&facet.field=format.facet
71
+ #
72
+ # if a value is empty/nil etc., the key is not added
73
+ def hash_to_params(params)
74
+ return unless params.is_a?(Hash)
75
+ # copy params and convert keys to strings
76
+ params = params.inject({}){|acc,(k,v)| acc.merge({k.to_s, v}) }
77
+ # get sorted keys
78
+ params.keys.sort.inject([]) do |acc,k|
79
+ v = params[k]
80
+ if v.is_a?(Array)
81
+ acc << v.reject{|i|i.to_s.empty?}.collect{|vv|build_param(k, vv)}
82
+ elsif ! v.to_s.empty?
83
+ acc.push(build_param(k, v))
84
+ end
85
+ acc
86
+ end.join('&')
87
+ end
88
+
89
+ end
@@ -0,0 +1,65 @@
1
+ raise "JRuby Required" unless defined?(JRUBY_VERSION)
2
+
3
+ require 'java'
4
+
5
+ #
6
+ # Connection for JRuby + DirectSolrConnection
7
+ #
8
+ class Solr::Adapter::Direct
9
+
10
+ include Solr::Adapter::CommonMethods
11
+
12
+ attr_accessor :opts, :connection, :home_dir
13
+
14
+ # required: opts[:home_dir] is absolute path to solr home (the directory with "data", "config" etc.)
15
+ # opts must also contain either
16
+ # :dist_dir => 'absolute path to solr distribution root
17
+ # or
18
+ # :jar_paths => ['array of directories containing the solr lib/jars']
19
+ # OTHER OPTS:
20
+ # :select_path => 'the/select/handler'
21
+ # :update_path => 'the/update/handler'
22
+ # If a block is given, the @connection instance (DirectSolrConnection) is yielded
23
+ def initialize(opts, &block)
24
+ @home_dir = opts[:home_dir]
25
+ opts[:data_dir] ||= File.join(@home_dir , 'data')
26
+ if opts[:dist_dir]
27
+ # add the standard lib and dist directories to the :jar_paths
28
+ opts[:jar_paths] = [File.join(opts[:dist_dir], 'lib'), File.join(opts[:dist_dir], 'dist')]
29
+ end
30
+ @opts = default_options.merge(opts)
31
+ require_jars(@opts[:jar_paths])
32
+ import_dependencies
33
+ @connection = DirectSolrConnection.new(@home_dir, @opts[:data_dir], nil)
34
+ yield @connection if block_given?
35
+ end
36
+
37
+ # send a request to the connection
38
+ # request '/update', :wt=>:xml, '</commit>'
39
+ def send_request(request_url_path, params={}, data=nil)
40
+ data = data.to_xml if data.respond_to?(:to_xml)
41
+ full_path = build_url(request_url_path, params)
42
+ begin
43
+ @connection.request(full_path, data)
44
+ rescue
45
+ raise Solr::RequestError.new($!.message)
46
+ end
47
+ end
48
+
49
+ protected
50
+
51
+ # do the java import thingy
52
+ def import_dependencies
53
+ import org.apache.solr.servlet.DirectSolrConnection
54
+ end
55
+
56
+ # require the jar files
57
+ def require_jars(paths)
58
+ paths = [paths] unless paths.is_a?(Array)
59
+ paths.each do |path|
60
+ jar_pattern = File.join(path,"**", "*.jar")
61
+ Dir[jar_pattern].each {|jar_file| require jar_file}
62
+ end
63
+ end
64
+
65
+ end
@@ -0,0 +1,55 @@
1
+ require 'net/http'
2
+
3
+ #
4
+ # Connection for standard HTTP Solr server
5
+ #
6
+ class Solr::Adapter::HTTP
7
+
8
+ include Solr::Adapter::CommonMethods
9
+
10
+ attr_accessor :opts, :connection, :url
11
+
12
+ # opts can have:
13
+ # :url => 'http://localhost:8080/solr'
14
+ # :select_path => '/the/url/path/to/the/select/handler'
15
+ # :update_path => '/the/url/path/to/the/update/handler'
16
+ # :luke_path => '/admin/luke'
17
+ #
18
+ # If a block is given, the @connection (Net::HTTP) instance is yielded
19
+ def initialize(opts={}, &block)
20
+ opts[:url]||='http://127.0.0.1:8983/solr'
21
+ @url = URI.parse(opts[:url])
22
+ @connection = Net::HTTP.new(@url.host, @url.port)
23
+ yield @connection if block_given?
24
+ @opts = default_options.merge(opts)
25
+ end
26
+
27
+ # send a request to the connection
28
+ # request '/update', :wt=>:xml, '</commit>'
29
+ def send_request(request_url_path, params={}, data=nil)
30
+ data = data.to_xml if data.respond_to?(:to_xml)
31
+ full_path = build_url(@url.path + request_url_path, params)
32
+ if data
33
+ response = @connection.post(full_path, data, post_headers)
34
+ else
35
+ response = @connection.get(full_path)
36
+ end
37
+ unless response.code=='200'
38
+ raise Solr::RequestError.new(parse_solr_html_error(response.body))
39
+ end
40
+ response.body
41
+ end
42
+
43
+ protected
44
+
45
+ # The standard post headers
46
+ def post_headers
47
+ {"Content-Type" => 'text/xml', 'charset'=>'utf-8'}
48
+ end
49
+
50
+ # extracts the message from the solr error response
51
+ def parse_solr_html_error(html)
52
+ html.scan(/<pre>(.*)<\/pre>/mi).first.first.gsub(/&lt;/, '<').gsub(/&gt;/, '>') rescue html
53
+ end
54
+
55
+ end
@@ -0,0 +1,7 @@
1
+ module Solr::Connection
2
+
3
+ autoload :Base, 'solr/connection/base'
4
+ autoload :SearchExt, 'solr/connection/search_ext'
5
+ autoload :PaginationExt, 'solr/connection/pagination_ext'
6
+
7
+ end
@@ -0,0 +1,122 @@
1
+ #
2
+ # Connection adapter decorator
3
+ #
4
+ class Solr::Connection::Base
5
+
6
+ attr_reader :adapter, :opts
7
+
8
+ include Solr::Connection::SearchExt
9
+
10
+ # conection is instance of:
11
+ # Solr::Adapter::HTTP
12
+ # Solr::Adapter::Direct (jRuby only)
13
+ def initialize(adapter, opts={})
14
+ @adapter=adapter
15
+ opts[:auto_commit]||=false
16
+ opts[:global_params]||={}
17
+ default_global_params = {
18
+ :wt=>:ruby,
19
+ :echoParams=>'EXPLICIT',
20
+ :debugQuery=>true
21
+ }
22
+ opts[:global_params] = default_global_params.merge(opts[:global_params])
23
+ @opts=opts
24
+ end
25
+
26
+ # sets default params etc.. - could be used as a mapping hook
27
+ # type of request should be passed in here? -> map_params(:query, {})
28
+ def map_params(params)
29
+ opts[:global_params].dup.merge(params).dup
30
+ end
31
+
32
+ # send request to the select handler
33
+ # params is hash with valid solr request params (:q, :fl, :qf etc..)
34
+ # if params[:wt] is not set, the default is :ruby (see opts[:global_params])
35
+ # if :wt is something other than :ruby, the raw response body is returned
36
+ # otherwise, an instance of Solr::Response::Query is returned
37
+ # NOTE: to get raw ruby, use :wt=>'ruby'
38
+ def query(params)
39
+ params = map_params(modify_params_for_pagination(params))
40
+ response = @adapter.query(params)
41
+ params[:wt]==:ruby ? Solr::Response::Query.new(response) : response
42
+ end
43
+
44
+ # Finds a document by its id
45
+ def find_by_id(id, params={})
46
+ params = map_params(params)
47
+ params[:q] = 'id:"#{id}"'
48
+ query params
49
+ end
50
+
51
+ def index_info(params={})
52
+ params = map_params(params)
53
+ response = @adapter.index_info(params)
54
+ params[:wt] == :ruby ? Solr::Response::IndexInfo.new(response) : response
55
+ end
56
+
57
+ # if :ruby is the :wt, then Solr::Response::Base is returned
58
+ # -- there's not really a way to figure out what kind of handler request this is.
59
+
60
+ def update(data, params={}, auto_commit=nil)
61
+ params = map_params(params)
62
+ response = @adapter.update(data, params)
63
+ self.commit if auto_commit.nil? ? @opts[:auto_commit]==true : auto_commit
64
+ params[:wt]==:ruby ? Solr::Response::Update.new(response) : response
65
+ end
66
+
67
+ def add(hash_or_array, opts={}, &block)
68
+ update message.add(hash_or_array, opts, &block)
69
+ end
70
+
71
+ # send </commit>
72
+ def commit(opts={})
73
+ update message.commit, opts, false
74
+ end
75
+
76
+ # send </optimize>
77
+ def optimize(opts={})
78
+ update message.optimize, opts
79
+ end
80
+
81
+ # send </rollback>
82
+ # NOTE: solr 1.4 only
83
+ def rollback(opts={})
84
+ update message.rollback, opts
85
+ end
86
+
87
+ # Delete one or many documents by id
88
+ # solr.delete_by_id 10
89
+ # solr.delete_by_id([12, 41, 199])
90
+ def delete_by_id(ids, opts={})
91
+ update message.delete_by_id(ids), opts
92
+ end
93
+
94
+ # delete one or many documents by query
95
+ # solr.delete_by_query 'available:0'
96
+ # solr.delete_by_query ['quantity:0', 'manu:"FQ"']
97
+ def delete_by_query(queries, opts={})
98
+ update message.delete_by_query(queries), opts
99
+ end
100
+
101
+ protected
102
+
103
+ # shortcut to solr::message
104
+ def message
105
+ Solr::Message
106
+ end
107
+
108
+ def modify_params_for_pagination(params)
109
+ return params unless params[:page]
110
+ params = params.dup # be nice
111
+ params[:per_page]||=10
112
+ params[:rows] = params.delete(:per_page).to_i
113
+ params[:start] = calculate_start(params.delete(:page).to_i, params[:rows])
114
+ params
115
+ end
116
+
117
+ def calculate_start(current_page, per_page)
118
+ page = current_page > 0 ? current_page : 1
119
+ (page - 1) * per_page
120
+ end
121
+
122
+ end
@@ -0,0 +1,110 @@
1
+ module Solr::Connection::SearchExt
2
+
3
+ def search(query, params={})
4
+ if params[:fields].is_a?(Array)
5
+ params[:fl] = params.delete(:fields).join(' ')
6
+ else
7
+ params[:fl] = params.delete :fields
8
+ end
9
+ fq = build_filters(params.delete(:filters)).join(' ') if params[:filters]
10
+ if params[:fq] and fq
11
+ params[:fq] += " AND #{fq}"
12
+ else
13
+ params[:fq] = fq
14
+ end
15
+ facets = params.delete(:facets) if params[:facets]
16
+ if facets
17
+ if facets.is_a?(Array)
18
+ params << {:facet => true}
19
+ params += build_facets(facets)
20
+ elsif facets.is_a?(Hash)
21
+ params << {:facet => true}
22
+ params += build_facet(facets)
23
+ elsif facets.is_a?(String)
24
+ params += facets
25
+ else
26
+ raise 'facets must either be a Hash or an Array'
27
+ end
28
+ end
29
+ params[:qt] ||= :dismax
30
+ self.query params
31
+ end
32
+
33
+ protected
34
+
35
+ # returns the query param
36
+ def build_query(queries)
37
+ query_string = ''
38
+ case queries
39
+ when String
40
+ query_string = queries
41
+ when Array
42
+ query_string = queries.join(' ')
43
+ when Hash
44
+ query_string_array = []
45
+ queries.each do |k,v|
46
+ if v.is_a?(Array) # add a filter for each value
47
+ v.each do |val|
48
+ query_string_array << "#{k}:#{val}"
49
+ end
50
+ elsif v.is_a?(Range)
51
+ query_string_array << "#{k}:[#{v.min} TO #{v.max}]"
52
+ else
53
+ query_string_array << "#{k}:#{v}"
54
+ end
55
+ end
56
+ query_string = query_string_array.join(' ')
57
+ end
58
+ query_string
59
+ end
60
+
61
+ def build_filters(filters)
62
+ params = []
63
+ # handle "ruby-ish" filters
64
+ case filters
65
+ when String
66
+ params << filters
67
+ when Array
68
+ filters.each { |f| params << f }
69
+ when Hash
70
+ filters.each do |k,v|
71
+ if v.is_a?(Array) # add a filter for each value
72
+ v.each do |val|
73
+ params << "#{k}:#{val}"
74
+ end
75
+ elsif v.is_a?(Range)
76
+ params << "#{k}:[#{v.min} TO #{v.max}]"
77
+ else
78
+ params << "#{k}:#{v}"
79
+ end
80
+ end
81
+ end
82
+ params
83
+ end
84
+
85
+ def build_facets(facet_array)
86
+ facet_array.inject([]) do |params, facet_hash|
87
+ params.push build_facet(facet_hash)
88
+ end
89
+ end
90
+
91
+ def build_facet(facet_hash)
92
+ params = []
93
+ facet_name = facet_hash['name'] || facet_hash[:name]
94
+ facet_hash.each do |k,v|
95
+ # handle some cases specially
96
+ if 'field' == k.to_s
97
+ params << {"facet.field" => v}
98
+ elsif 'query' == k.to_s
99
+ q = build_query("facet.query", v)
100
+ params << q
101
+ elsif ['name', :name].include?(k.to_s)
102
+ # do nothing
103
+ else
104
+ params << {"f.#{facet_hash[:field]}.facet.#{k}" => v}
105
+ end
106
+ end
107
+ params
108
+ end
109
+
110
+ end