mwmitchell-rsolr 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGES.txt +41 -0
  2. data/LICENSE +201 -0
  3. data/README.rdoc +191 -0
  4. data/Rakefile +40 -0
  5. data/examples/direct.rb +20 -0
  6. data/examples/http.rb +16 -0
  7. data/lib/core_ext.rb +8 -0
  8. data/lib/rsolr.rb +34 -0
  9. data/lib/rsolr/connection.rb +7 -0
  10. data/lib/rsolr/connection/adapter.rb +7 -0
  11. data/lib/rsolr/connection/adapter/common_methods.rb +46 -0
  12. data/lib/rsolr/connection/adapter/direct.rb +80 -0
  13. data/lib/rsolr/connection/adapter/http.rb +51 -0
  14. data/lib/rsolr/connection/base.rb +121 -0
  15. data/lib/rsolr/connection/search_ext.rb +126 -0
  16. data/lib/rsolr/http_client.rb +115 -0
  17. data/lib/rsolr/http_client/adapter.rb +6 -0
  18. data/lib/rsolr/http_client/adapter/curb.rb +51 -0
  19. data/lib/rsolr/http_client/adapter/net_http.rb +48 -0
  20. data/lib/rsolr/indexer.rb +23 -0
  21. data/lib/rsolr/mapper.rb +62 -0
  22. data/lib/rsolr/mapper/rss.rb +29 -0
  23. data/lib/rsolr/message.rb +73 -0
  24. data/lib/rsolr/response.rb +8 -0
  25. data/lib/rsolr/response/base.rb +33 -0
  26. data/lib/rsolr/response/index_info.rb +22 -0
  27. data/lib/rsolr/response/query.rb +170 -0
  28. data/lib/rsolr/response/update.rb +4 -0
  29. data/test/connection/direct_test.rb +22 -0
  30. data/test/connection/http_test.rb +19 -0
  31. data/test/connection/search_ext_test_methods.rb +17 -0
  32. data/test/connection/test_methods.rb +122 -0
  33. data/test/http_client/curb_test.rb +19 -0
  34. data/test/http_client/net_http_test.rb +13 -0
  35. data/test/http_client/test_methods.rb +40 -0
  36. data/test/http_client/util_test.rb +40 -0
  37. data/test/mapper_test.rb +123 -0
  38. data/test/message_test.rb +87 -0
  39. data/test/pagination_test.rb +58 -0
  40. data/test/ruby-lang.org.rss.xml +391 -0
  41. data/test/test_helpers.rb +39 -0
  42. metadata +107 -0
@@ -0,0 +1,20 @@
1
+ # Must be executed using jruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
3
+
4
+ base = File.expand_path( File.dirname(__FILE__) )
5
+ dist = File.join(base, '..', 'apache-solr')
6
+ home = File.join(dist, 'example', 'solr')
7
+
8
+ solr = RSolr.connect(:direct, :home_dir=>home, :dist_dir=>dist)
9
+
10
+ `cd ../apache-solr/example/exampledocs && ./post.sh ./*.xml`
11
+
12
+ response = solr.search 'ipod', :filters=>{:price=>(0..50)}, :per_page=>2, :page=>1
13
+
14
+ solr.delete_by_query('*:*')
15
+
16
+ response.docs.each do |doc|
17
+ if doc.has?('timestamp')
18
+ puts doc.timestamp
19
+ end
20
+ end
data/examples/http.rb ADDED
@@ -0,0 +1,16 @@
1
+ # Must be executed using jruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
3
+
4
+ solr = RSolr.connect(:http)
5
+
6
+ `cd ../apache-solr/example/exampledocs && ./post.sh ./*.xml`
7
+
8
+ response = solr.search 'ipod', :filters=>{:price=>(0..50)}, :per_page=>2, :page=>1
9
+
10
+ solr.delete_by_query('*:*')
11
+
12
+ response.docs.each do |doc|
13
+ if doc.has?('timestamp')
14
+ puts doc.timestamp
15
+ end
16
+ end
data/lib/core_ext.rb ADDED
@@ -0,0 +1,8 @@
1
+ class Symbol
2
+
3
+ # allow symbol chaining: :one.two.three
4
+ def method_missing(m)
5
+ [self.to_s, m.to_s].join('.').to_sym
6
+ end
7
+
8
+ end
data/lib/rsolr.rb ADDED
@@ -0,0 +1,34 @@
1
+ # add this directory to the load path if it hasn't already been added
2
+ # load xout and rfuzz libs
3
+ proc {|base, files|
4
+ $: << base unless $:.include?(base) || $:.include?(File.expand_path(base))
5
+ files.each {|f| require f}
6
+ }.call(File.dirname(__FILE__), ['core_ext'])
7
+
8
+ module RSolr
9
+
10
+ VERSION = '0.5.7'
11
+
12
+ autoload :Message, 'rsolr/message'
13
+ autoload :Response, 'rsolr/response'
14
+ autoload :Connection, 'rsolr/connection'
15
+ autoload :Mapper, 'rsolr/mapper'
16
+ autoload :Indexer, 'rsolr/indexer'
17
+ autoload :HTTPClient, 'rsolr/http_client'
18
+
19
+ # factory for creating connections
20
+ # adapter name is either :http or :direct
21
+ # opts are sent to the adapter instance (:url for http, :dist_dir for :direct etc.)
22
+ # and to the connection instance
23
+ def self.connect(adapter_name, opts={})
24
+ types = {
25
+ :http=>'HTTP',
26
+ :direct=>'Direct'
27
+ }
28
+ adapter_class = RSolr::Connection::Adapter.const_get(types[adapter_name])
29
+ RSolr::Connection::Base.new(adapter_class.new(opts), opts)
30
+ end
31
+
32
+ class RequestError < RuntimeError; end
33
+
34
+ end
@@ -0,0 +1,7 @@
1
+ module RSolr::Connection
2
+
3
+ autoload :Base, 'rsolr/connection/base'
4
+ autoload :SearchExt, 'rsolr/connection/search_ext'
5
+ autoload :Adapter, 'rsolr/connection/adapter'
6
+
7
+ end
@@ -0,0 +1,7 @@
1
+ module RSolr::Connection::Adapter
2
+
3
+ autoload :Direct, 'rsolr/connection/adapter/direct'
4
+ autoload :HTTP, 'rsolr/connection/adapter/http'
5
+ autoload :CommonMethods, 'rsolr/connection/adapter/common_methods'
6
+
7
+ end
@@ -0,0 +1,46 @@
1
+ # This module is for HTTP + DirectSolrConnection (jruby) connections
2
+ # It provides common methods.
3
+ # The main query, update and index_info methods are here
4
+ # The classes that include this module only need to provide a request method like:
5
+ # send_request(request_path, params, data)
6
+ # where:
7
+ # request_path is a string to a handler (/select)
8
+ # params is a hash for query string params
9
+ # data is optional string of xml
10
+ module RSolr::Connection::Adapter::CommonMethods
11
+
12
+ # send a request to the "select" handler
13
+ def query(params)
14
+ send_request @opts[:select_path], params
15
+ end
16
+
17
+ # sends data to the update handler
18
+ # data can be:
19
+ # string (valid solr update xml)
20
+ # object with respond_to?(:to_xml)
21
+ # params is a hash with valid solr update params
22
+ def update(data, params={})
23
+ send_request @opts[:update_path], params, data
24
+ end
25
+
26
+ # sends a request to the admin luke handler to get info on the index
27
+ def index_info(params={})
28
+ params[:numTerms]||=0
29
+ send_request @opts[:luke_path], params
30
+ end
31
+
32
+ def default_options
33
+ {
34
+ :select_path => '/select',
35
+ :update_path => '/update',
36
+ :luke_path => '/admin/luke'
37
+ }
38
+ end
39
+
40
+ # send a request to the adapter (allows requests like /admin/luke etc.)
41
+ def send_request(handler_path, params={}, data=nil)
42
+ params = map_params(params)
43
+ @adapter.send_request(handler_path, params, data)
44
+ end
45
+
46
+ end
@@ -0,0 +1,80 @@
1
+ raise "JRuby Required" unless defined?(JRUBY_VERSION)
2
+
3
+ require 'java'
4
+
5
+ #
6
+ # Connection for JRuby + DirectSolrConnection
7
+ #
8
+ class RSolr::Connection::Adapter::Direct
9
+
10
+ include RSolr::HTTPClient::Util
11
+ include RSolr::Connection::Adapter::CommonMethods
12
+
13
+ attr_accessor :opts, :home_dir
14
+
15
+ # required: opts[:home_dir] is absolute path to solr home (the directory with "data", "config" etc.)
16
+ # opts must also contain either
17
+ # :dist_dir => 'absolute path to solr distribution root
18
+ # or
19
+ # :jar_paths => ['array of directories containing the solr lib/jars']
20
+ # OTHER OPTS:
21
+ # :select_path => 'the/select/handler'
22
+ # :update_path => 'the/update/handler'
23
+ def initialize(opts, &block)
24
+ @home_dir = opts[:home_dir].to_s
25
+ opts[:data_dir] ||= File.join(@home_dir, 'data')
26
+ if opts[:dist_dir]
27
+ # add the standard lib and dist directories to the :jar_paths
28
+ opts[:jar_paths] = [File.join(opts[:dist_dir], 'lib'), File.join(opts[:dist_dir], 'dist')]
29
+ end
30
+ @opts = default_options.merge(opts)
31
+ end
32
+
33
+ # loads/imports the java dependencies
34
+ # sets the @connection instance variable
35
+ def connection
36
+ @connection ||= (
37
+ require_jars(@opts[:jar_paths]) if @opts[:jar_paths]
38
+ import_dependencies
39
+ DirectSolrConnection.new(@home_dir, @opts[:data_dir], nil)
40
+ )
41
+ end
42
+
43
+ # send a request to the connection
44
+ # request '/update', :wt=>:xml, '</commit>'
45
+ def send_request(path, params={}, data=nil)
46
+ data = data.to_xml if data.respond_to?(:to_xml)
47
+ url = build_url(path, params)
48
+ begin
49
+ body = connection.request(url, data)
50
+ rescue
51
+ raise RSolr::RequestError.new($!.message)
52
+ end
53
+ {
54
+ :status_code=>'',
55
+ :body=>body,
56
+ :url=>url,
57
+ :path=>path,
58
+ :params=>params,
59
+ :data=>data,
60
+ :headers=>{}
61
+ }
62
+ end
63
+
64
+ protected
65
+
66
+ # do the java import thingy
67
+ def import_dependencies
68
+ import org.apache.solr.servlet.DirectSolrConnection
69
+ end
70
+
71
+ # require the jar files
72
+ def require_jars(paths)
73
+ paths = [paths] unless paths.is_a?(Array)
74
+ paths.each do |path|
75
+ jar_pattern = File.join(path,"**", "*.jar")
76
+ Dir[jar_pattern].each {|jar_file| require jar_file}
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Connection for standard HTTP Solr server
3
+ #
4
+ class RSolr::Connection::Adapter::HTTP
5
+
6
+ class << self
7
+ attr_accessor :client_adapter
8
+ end
9
+
10
+ @client_adapter = :net_http
11
+
12
+ include RSolr::Connection::Adapter::CommonMethods
13
+
14
+ attr_reader :opts
15
+
16
+ # opts can have:
17
+ # :url => 'http://localhost:8080/solr'
18
+ # :select_path => '/the/url/path/to/the/select/handler'
19
+ # :update_path => '/the/url/path/to/the/update/handler'
20
+ # :luke_path => '/admin/luke'
21
+ #
22
+ def initialize(opts={}, &block)
23
+ opts[:url]||='http://127.0.0.1:8983/solr'
24
+ @opts = default_options.merge(opts)
25
+ end
26
+
27
+ def connection
28
+ @connection ||= RSolr::HTTPClient.connect(@opts[:url], self.class.client_adapter)
29
+ end
30
+
31
+ # send a request to the connection
32
+ # request '/update', :wt=>:xml, '</commit>'
33
+ def send_request(path, params={}, data=nil)
34
+ data = data.to_xml if data.respond_to?(:to_xml)
35
+ if data
36
+ http_context = connection.post(path, data, params, post_headers)
37
+ else
38
+ http_context = connection.get(path, params)
39
+ end
40
+ raise RSolr::RequestError.new(http_context[:body]) unless http_context[:status_code] == 200
41
+ http_context
42
+ end
43
+
44
+ protected
45
+
46
+ # The standard post headers
47
+ def post_headers
48
+ {"Content-Type" => 'text/xml; charset=utf-8'}
49
+ end
50
+
51
+ end
@@ -0,0 +1,121 @@
1
+ #
2
+ # Connection adapter decorator
3
+ #
4
+ class RSolr::Connection::Base
5
+
6
+ attr_reader :adapter, :opts
7
+
8
+ include RSolr::Connection::SearchExt
9
+
10
+ # "adapter" is instance of:
11
+ # RSolr::Adapter::HTTP
12
+ # RSolr::Adapter::Direct (jRuby only)
13
+ def initialize(adapter, opts={})
14
+ @adapter=adapter
15
+ opts[:global_params]||={}
16
+ default_global_params = {
17
+ :wt=>:ruby,
18
+ :echoParams=>'EXPLICIT',
19
+ :debugQuery=>true
20
+ }
21
+ opts[:global_params] = default_global_params.merge(opts[:global_params])
22
+ @opts=opts
23
+ end
24
+
25
+ # sets default params etc.. - could be used as a mapping hook
26
+ # type of request should be passed in here? -> map_params(:query, {})
27
+ def map_params(params)
28
+ opts[:global_params].dup.merge(params).dup
29
+ end
30
+
31
+ # send request to the select handler
32
+ # params is hash with valid solr request params (:q, :fl, :qf etc..)
33
+ # if params[:wt] is not set, the default is :ruby (see opts[:global_params])
34
+ # if :wt is something other than :ruby, the raw response body is returned
35
+ # otherwise, an instance of RSolr::Response::Query is returned
36
+ # NOTE: to get raw ruby, use :wt=>'ruby'
37
+ def query(params)
38
+ params = map_params(modify_params_for_pagination(params))
39
+ response = @adapter.query(params)
40
+ params[:wt]==:ruby ? RSolr::Response::Query::Base.new(response) : response
41
+ end
42
+
43
+ # Finds a document by its id
44
+ def find_by_id(id, params={})
45
+ params = map_params(params)
46
+ params[:q] = 'id:"#{id}"'
47
+ query params
48
+ end
49
+
50
+ def index_info(params={})
51
+ params = map_params(params)
52
+ response = @adapter.index_info(params)
53
+ params[:wt] == :ruby ? RSolr::Response::IndexInfo.new(response) : response
54
+ end
55
+
56
+ # if :ruby is the :wt, then Solr::Response::Base is returned
57
+ # -- there's not really a way to figure out what kind of handler request this is.
58
+
59
+ def update(data, params={})
60
+ params = map_params(params)
61
+ response = @adapter.update(data, params)
62
+ params[:wt]==:ruby ? RSolr::Response::Update.new(response) : response
63
+ end
64
+
65
+ def add(hash_or_array, opts={}, &block)
66
+ update message.add(hash_or_array, opts, &block)
67
+ end
68
+
69
+ # send </commit>
70
+ def commit(opts={})
71
+ update message.commit, opts
72
+ end
73
+
74
+ # send </optimize>
75
+ def optimize(opts={})
76
+ update message.optimize, opts
77
+ end
78
+
79
+ # send </rollback>
80
+ # NOTE: solr 1.4 only
81
+ def rollback(opts={})
82
+ update message.rollback, opts
83
+ end
84
+
85
+ # Delete one or many documents by id
86
+ # solr.delete_by_id 10
87
+ # solr.delete_by_id([12, 41, 199])
88
+ def delete_by_id(ids, opts={})
89
+ update message.delete_by_id(ids), opts
90
+ end
91
+
92
+ # delete one or many documents by query
93
+ # solr.delete_by_query 'available:0'
94
+ # solr.delete_by_query ['quantity:0', 'manu:"FQ"']
95
+ def delete_by_query(queries, opts={})
96
+ update message.delete_by_query(queries), opts
97
+ end
98
+
99
+ protected
100
+
101
+ # shortcut to solr::message
102
+ def message
103
+ RSolr::Message
104
+ end
105
+
106
+ def modify_params_for_pagination(orig_params)
107
+ return orig_params unless orig_params[:page] || orig_params[:per_page]
108
+ params = orig_params.dup # be nice
109
+ params[:page] ||= 1
110
+ params[:per_page] ||= 10
111
+ params[:rows] = params.delete(:per_page).to_i
112
+ params[:start] = calculate_start(params.delete(:page).to_i, params[:rows])
113
+ params
114
+ end
115
+
116
+ def calculate_start(current_page, per_page)
117
+ page = current_page > 0 ? current_page : 1
118
+ (page - 1) * per_page
119
+ end
120
+
121
+ end
@@ -0,0 +1,126 @@
1
+ module RSolr::Connection::SearchExt
2
+
3
+ def search(q_param, params={})
4
+ if params[:fields]
5
+ fields = params.delete :fields
6
+ params[:fl] = fields.is_a?(Array) ? fields.join(' ') : fields
7
+ end
8
+
9
+ # adds quoted values to the :filters hash
10
+ if params[:phrase_filters]
11
+ phrase_filters = params.delete(:phrase_filters)
12
+ params[:filters] ||= {}
13
+ phrase_filters.each do |filter,values|
14
+ params[:filters][filter] ||= []
15
+ values.each do |v|
16
+ params[:filters][filter] << "\"#{v}\""
17
+ end
18
+ end
19
+ end
20
+
21
+ params[:fq] = build_filters(params.delete(:filters)) if params[:filters]
22
+ facets = params.delete(:facets) if params[:facets]
23
+
24
+ if facets
25
+ if facets.is_a?(Array)
26
+ params.merge!({:facet => true})
27
+ params.merge! build_facets(facets)
28
+ elsif facets.is_a?(Hash)
29
+ params.merge!({:facet => true})
30
+ #params += build_facet(facets)
31
+ elsif facets.is_a?(String)
32
+ #params += facets
33
+ else
34
+ raise 'facets must either be a Hash or an Array'
35
+ end
36
+ end
37
+ #params[:qt] ||= :dismax
38
+ params[:q] = build_query(q_param)
39
+ self.query params
40
+ end
41
+
42
+ protected
43
+
44
+ # returns the query param
45
+ def build_query(queries)
46
+ query_string = ''
47
+ case queries
48
+ when String
49
+ query_string = queries
50
+ when Array
51
+ query_string = queries.join(' ')
52
+ when Hash
53
+ query_string_array = []
54
+ queries.each do |k,v|
55
+ if v.is_a?(Array) # add a filter for each value
56
+ v.each do |val|
57
+ query_string_array << "#{k}:#{val}"
58
+ end
59
+ elsif v.is_a?(Range)
60
+ query_string_array << "#{k}:[#{v.min} TO #{v.max}]"
61
+ else
62
+ query_string_array << "#{k}:#{v}"
63
+ end
64
+ end
65
+ query_string = query_string_array.join(' ')
66
+ end
67
+ query_string
68
+ end
69
+
70
+ def build_filters(filters)
71
+ params = []
72
+ # handle "ruby-ish" filters
73
+ case filters
74
+ when String
75
+ params << filters
76
+ when Array
77
+ filters.each { |f| params << f }
78
+ when Hash
79
+ filters.each do |k,v|
80
+ if v.is_a?(Array) # add a filter for each value
81
+ v.each do |val|
82
+ params << "#{k}:#{val}"
83
+ end
84
+ elsif v.is_a?(Range)
85
+ params << "#{k}:[#{v.min} TO #{v.max}]"
86
+ else
87
+ params << "#{k}:#{v}"
88
+ end
89
+ end
90
+ end
91
+ params
92
+ end
93
+
94
+ def build_facets(facet_array)
95
+ facet_array.inject({}) do |p, facet_hash|
96
+ build_facet(facet_hash).each {|k| p.merge!(k) }
97
+ p
98
+ end
99
+ end
100
+
101
+ def build_facet(facet_hash)
102
+ params = []
103
+ facet_name = facet_hash['name'] || facet_hash[:name]
104
+ facet_hash.each do |k,v|
105
+ # handle some cases specially
106
+ if 'field' == k.to_s
107
+ params << {"facet.field" => v}
108
+ elsif 'query' == k.to_s
109
+ q = build_query(v)
110
+ params << {"facet.query"=>q}
111
+ if facet_name
112
+ # keep track of names => facet_queries
113
+ name_to_facet_query[facet_name] = q['facet.query']
114
+ end
115
+ else
116
+ params << {"f.#{facet_hash[:field]}.facet.#{k}" => v}
117
+ end
118
+ end
119
+ params
120
+ end
121
+
122
+ def name_to_facet_query
123
+ @name_to_facet_query ||= {}
124
+ end
125
+
126
+ end