mwmitchell-rsolr 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/CHANGES.txt +41 -0
  2. data/LICENSE +201 -0
  3. data/README.rdoc +191 -0
  4. data/Rakefile +40 -0
  5. data/examples/direct.rb +20 -0
  6. data/examples/http.rb +16 -0
  7. data/lib/core_ext.rb +8 -0
  8. data/lib/rsolr.rb +34 -0
  9. data/lib/rsolr/connection.rb +7 -0
  10. data/lib/rsolr/connection/adapter.rb +7 -0
  11. data/lib/rsolr/connection/adapter/common_methods.rb +46 -0
  12. data/lib/rsolr/connection/adapter/direct.rb +80 -0
  13. data/lib/rsolr/connection/adapter/http.rb +51 -0
  14. data/lib/rsolr/connection/base.rb +121 -0
  15. data/lib/rsolr/connection/search_ext.rb +126 -0
  16. data/lib/rsolr/http_client.rb +115 -0
  17. data/lib/rsolr/http_client/adapter.rb +6 -0
  18. data/lib/rsolr/http_client/adapter/curb.rb +51 -0
  19. data/lib/rsolr/http_client/adapter/net_http.rb +48 -0
  20. data/lib/rsolr/indexer.rb +23 -0
  21. data/lib/rsolr/mapper.rb +62 -0
  22. data/lib/rsolr/mapper/rss.rb +29 -0
  23. data/lib/rsolr/message.rb +73 -0
  24. data/lib/rsolr/response.rb +8 -0
  25. data/lib/rsolr/response/base.rb +33 -0
  26. data/lib/rsolr/response/index_info.rb +22 -0
  27. data/lib/rsolr/response/query.rb +170 -0
  28. data/lib/rsolr/response/update.rb +4 -0
  29. data/test/connection/direct_test.rb +22 -0
  30. data/test/connection/http_test.rb +19 -0
  31. data/test/connection/search_ext_test_methods.rb +17 -0
  32. data/test/connection/test_methods.rb +122 -0
  33. data/test/http_client/curb_test.rb +19 -0
  34. data/test/http_client/net_http_test.rb +13 -0
  35. data/test/http_client/test_methods.rb +40 -0
  36. data/test/http_client/util_test.rb +40 -0
  37. data/test/mapper_test.rb +123 -0
  38. data/test/message_test.rb +87 -0
  39. data/test/pagination_test.rb +58 -0
  40. data/test/ruby-lang.org.rss.xml +391 -0
  41. data/test/test_helpers.rb +39 -0
  42. metadata +107 -0
@@ -0,0 +1,20 @@
1
+ # Must be executed using jruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
3
+
4
+ base = File.expand_path( File.dirname(__FILE__) )
5
+ dist = File.join(base, '..', 'apache-solr')
6
+ home = File.join(dist, 'example', 'solr')
7
+
8
+ solr = RSolr.connect(:direct, :home_dir=>home, :dist_dir=>dist)
9
+
10
+ `cd ../apache-solr/example/exampledocs && ./post.sh ./*.xml`
11
+
12
+ response = solr.search 'ipod', :filters=>{:price=>(0..50)}, :per_page=>2, :page=>1
13
+
14
+ solr.delete_by_query('*:*')
15
+
16
+ response.docs.each do |doc|
17
+ if doc.has?('timestamp')
18
+ puts doc.timestamp
19
+ end
20
+ end
data/examples/http.rb ADDED
@@ -0,0 +1,16 @@
1
+ # Must be executed using jruby
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'rsolr')
3
+
4
+ solr = RSolr.connect(:http)
5
+
6
+ `cd ../apache-solr/example/exampledocs && ./post.sh ./*.xml`
7
+
8
+ response = solr.search 'ipod', :filters=>{:price=>(0..50)}, :per_page=>2, :page=>1
9
+
10
+ solr.delete_by_query('*:*')
11
+
12
+ response.docs.each do |doc|
13
+ if doc.has?('timestamp')
14
+ puts doc.timestamp
15
+ end
16
+ end
data/lib/core_ext.rb ADDED
@@ -0,0 +1,8 @@
1
+ class Symbol
2
+
3
+ # allow symbol chaining: :one.two.three
4
+ def method_missing(m)
5
+ [self.to_s, m.to_s].join('.').to_sym
6
+ end
7
+
8
+ end
data/lib/rsolr.rb ADDED
@@ -0,0 +1,34 @@
1
+ # add this directory to the load path if it hasn't already been added
2
+ # load xout and rfuzz libs
3
+ proc {|base, files|
4
+ $: << base unless $:.include?(base) || $:.include?(File.expand_path(base))
5
+ files.each {|f| require f}
6
+ }.call(File.dirname(__FILE__), ['core_ext'])
7
+
8
+ module RSolr
9
+
10
+ VERSION = '0.5.7'
11
+
12
+ autoload :Message, 'rsolr/message'
13
+ autoload :Response, 'rsolr/response'
14
+ autoload :Connection, 'rsolr/connection'
15
+ autoload :Mapper, 'rsolr/mapper'
16
+ autoload :Indexer, 'rsolr/indexer'
17
+ autoload :HTTPClient, 'rsolr/http_client'
18
+
19
+ # factory for creating connections
20
+ # adapter name is either :http or :direct
21
+ # opts are sent to the adapter instance (:url for http, :dist_dir for :direct etc.)
22
+ # and to the connection instance
23
+ def self.connect(adapter_name, opts={})
24
+ types = {
25
+ :http=>'HTTP',
26
+ :direct=>'Direct'
27
+ }
28
+ adapter_class = RSolr::Connection::Adapter.const_get(types[adapter_name])
29
+ RSolr::Connection::Base.new(adapter_class.new(opts), opts)
30
+ end
31
+
32
+ class RequestError < RuntimeError; end
33
+
34
+ end
@@ -0,0 +1,7 @@
1
+ module RSolr::Connection
2
+
3
+ autoload :Base, 'rsolr/connection/base'
4
+ autoload :SearchExt, 'rsolr/connection/search_ext'
5
+ autoload :Adapter, 'rsolr/connection/adapter'
6
+
7
+ end
@@ -0,0 +1,7 @@
1
+ module RSolr::Connection::Adapter
2
+
3
+ autoload :Direct, 'rsolr/connection/adapter/direct'
4
+ autoload :HTTP, 'rsolr/connection/adapter/http'
5
+ autoload :CommonMethods, 'rsolr/connection/adapter/common_methods'
6
+
7
+ end
@@ -0,0 +1,46 @@
1
+ # This module is for HTTP + DirectSolrConnection (jruby) connections
2
+ # It provides common methods.
3
+ # The main query, update and index_info methods are here
4
+ # The classes that include this module only need to provide a request method like:
5
+ # send_request(request_path, params, data)
6
+ # where:
7
+ # request_path is a string to a handler (/select)
8
+ # params is a hash for query string params
9
+ # data is optional string of xml
10
+ module RSolr::Connection::Adapter::CommonMethods
11
+
12
+ # send a request to the "select" handler
13
+ def query(params)
14
+ send_request @opts[:select_path], params
15
+ end
16
+
17
+ # sends data to the update handler
18
+ # data can be:
19
+ # string (valid solr update xml)
20
+ # object with respond_to?(:to_xml)
21
+ # params is a hash with valid solr update params
22
+ def update(data, params={})
23
+ send_request @opts[:update_path], params, data
24
+ end
25
+
26
+ # sends a request to the admin luke handler to get info on the index
27
+ def index_info(params={})
28
+ params[:numTerms]||=0
29
+ send_request @opts[:luke_path], params
30
+ end
31
+
32
+ def default_options
33
+ {
34
+ :select_path => '/select',
35
+ :update_path => '/update',
36
+ :luke_path => '/admin/luke'
37
+ }
38
+ end
39
+
40
+ # send a request to the adapter (allows requests like /admin/luke etc.)
41
+ def send_request(handler_path, params={}, data=nil)
42
+ params = map_params(params)
43
+ @adapter.send_request(handler_path, params, data)
44
+ end
45
+
46
+ end
@@ -0,0 +1,80 @@
1
+ raise "JRuby Required" unless defined?(JRUBY_VERSION)
2
+
3
+ require 'java'
4
+
5
+ #
6
+ # Connection for JRuby + DirectSolrConnection
7
+ #
8
+ class RSolr::Connection::Adapter::Direct
9
+
10
+ include RSolr::HTTPClient::Util
11
+ include RSolr::Connection::Adapter::CommonMethods
12
+
13
+ attr_accessor :opts, :home_dir
14
+
15
+ # required: opts[:home_dir] is absolute path to solr home (the directory with "data", "config" etc.)
16
+ # opts must also contain either
17
+ # :dist_dir => 'absolute path to solr distribution root
18
+ # or
19
+ # :jar_paths => ['array of directories containing the solr lib/jars']
20
+ # OTHER OPTS:
21
+ # :select_path => 'the/select/handler'
22
+ # :update_path => 'the/update/handler'
23
+ def initialize(opts, &block)
24
+ @home_dir = opts[:home_dir].to_s
25
+ opts[:data_dir] ||= File.join(@home_dir, 'data')
26
+ if opts[:dist_dir]
27
+ # add the standard lib and dist directories to the :jar_paths
28
+ opts[:jar_paths] = [File.join(opts[:dist_dir], 'lib'), File.join(opts[:dist_dir], 'dist')]
29
+ end
30
+ @opts = default_options.merge(opts)
31
+ end
32
+
33
+ # loads/imports the java dependencies
34
+ # sets the @connection instance variable
35
+ def connection
36
+ @connection ||= (
37
+ require_jars(@opts[:jar_paths]) if @opts[:jar_paths]
38
+ import_dependencies
39
+ DirectSolrConnection.new(@home_dir, @opts[:data_dir], nil)
40
+ )
41
+ end
42
+
43
+ # send a request to the connection
44
+ # request '/update', :wt=>:xml, '</commit>'
45
+ def send_request(path, params={}, data=nil)
46
+ data = data.to_xml if data.respond_to?(:to_xml)
47
+ url = build_url(path, params)
48
+ begin
49
+ body = connection.request(url, data)
50
+ rescue
51
+ raise RSolr::RequestError.new($!.message)
52
+ end
53
+ {
54
+ :status_code=>'',
55
+ :body=>body,
56
+ :url=>url,
57
+ :path=>path,
58
+ :params=>params,
59
+ :data=>data,
60
+ :headers=>{}
61
+ }
62
+ end
63
+
64
+ protected
65
+
66
+ # do the java import thingy
67
+ def import_dependencies
68
+ import org.apache.solr.servlet.DirectSolrConnection
69
+ end
70
+
71
+ # require the jar files
72
+ def require_jars(paths)
73
+ paths = [paths] unless paths.is_a?(Array)
74
+ paths.each do |path|
75
+ jar_pattern = File.join(path,"**", "*.jar")
76
+ Dir[jar_pattern].each {|jar_file| require jar_file}
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Connection for standard HTTP Solr server
3
+ #
4
+ class RSolr::Connection::Adapter::HTTP
5
+
6
+ class << self
7
+ attr_accessor :client_adapter
8
+ end
9
+
10
+ @client_adapter = :net_http
11
+
12
+ include RSolr::Connection::Adapter::CommonMethods
13
+
14
+ attr_reader :opts
15
+
16
+ # opts can have:
17
+ # :url => 'http://localhost:8080/solr'
18
+ # :select_path => '/the/url/path/to/the/select/handler'
19
+ # :update_path => '/the/url/path/to/the/update/handler'
20
+ # :luke_path => '/admin/luke'
21
+ #
22
+ def initialize(opts={}, &block)
23
+ opts[:url]||='http://127.0.0.1:8983/solr'
24
+ @opts = default_options.merge(opts)
25
+ end
26
+
27
+ def connection
28
+ @connection ||= RSolr::HTTPClient.connect(@opts[:url], self.class.client_adapter)
29
+ end
30
+
31
+ # send a request to the connection
32
+ # request '/update', :wt=>:xml, '</commit>'
33
+ def send_request(path, params={}, data=nil)
34
+ data = data.to_xml if data.respond_to?(:to_xml)
35
+ if data
36
+ http_context = connection.post(path, data, params, post_headers)
37
+ else
38
+ http_context = connection.get(path, params)
39
+ end
40
+ raise RSolr::RequestError.new(http_context[:body]) unless http_context[:status_code] == 200
41
+ http_context
42
+ end
43
+
44
+ protected
45
+
46
+ # The standard post headers
47
+ def post_headers
48
+ {"Content-Type" => 'text/xml; charset=utf-8'}
49
+ end
50
+
51
+ end
@@ -0,0 +1,121 @@
1
+ #
2
+ # Connection adapter decorator
3
+ #
4
+ class RSolr::Connection::Base
5
+
6
+ attr_reader :adapter, :opts
7
+
8
+ include RSolr::Connection::SearchExt
9
+
10
+ # "adapter" is instance of:
11
+ # RSolr::Adapter::HTTP
12
+ # RSolr::Adapter::Direct (jRuby only)
13
+ def initialize(adapter, opts={})
14
+ @adapter=adapter
15
+ opts[:global_params]||={}
16
+ default_global_params = {
17
+ :wt=>:ruby,
18
+ :echoParams=>'EXPLICIT',
19
+ :debugQuery=>true
20
+ }
21
+ opts[:global_params] = default_global_params.merge(opts[:global_params])
22
+ @opts=opts
23
+ end
24
+
25
+ # sets default params etc.. - could be used as a mapping hook
26
+ # type of request should be passed in here? -> map_params(:query, {})
27
+ def map_params(params)
28
+ opts[:global_params].dup.merge(params).dup
29
+ end
30
+
31
+ # send request to the select handler
32
+ # params is hash with valid solr request params (:q, :fl, :qf etc..)
33
+ # if params[:wt] is not set, the default is :ruby (see opts[:global_params])
34
+ # if :wt is something other than :ruby, the raw response body is returned
35
+ # otherwise, an instance of RSolr::Response::Query is returned
36
+ # NOTE: to get raw ruby, use :wt=>'ruby'
37
+ def query(params)
38
+ params = map_params(modify_params_for_pagination(params))
39
+ response = @adapter.query(params)
40
+ params[:wt]==:ruby ? RSolr::Response::Query::Base.new(response) : response
41
+ end
42
+
43
+ # Finds a document by its id
44
+ def find_by_id(id, params={})
45
+ params = map_params(params)
46
+ params[:q] = 'id:"#{id}"'
47
+ query params
48
+ end
49
+
50
+ def index_info(params={})
51
+ params = map_params(params)
52
+ response = @adapter.index_info(params)
53
+ params[:wt] == :ruby ? RSolr::Response::IndexInfo.new(response) : response
54
+ end
55
+
56
+ # if :ruby is the :wt, then Solr::Response::Base is returned
57
+ # -- there's not really a way to figure out what kind of handler request this is.
58
+
59
+ def update(data, params={})
60
+ params = map_params(params)
61
+ response = @adapter.update(data, params)
62
+ params[:wt]==:ruby ? RSolr::Response::Update.new(response) : response
63
+ end
64
+
65
+ def add(hash_or_array, opts={}, &block)
66
+ update message.add(hash_or_array, opts, &block)
67
+ end
68
+
69
+ # send </commit>
70
+ def commit(opts={})
71
+ update message.commit, opts
72
+ end
73
+
74
+ # send </optimize>
75
+ def optimize(opts={})
76
+ update message.optimize, opts
77
+ end
78
+
79
+ # send </rollback>
80
+ # NOTE: solr 1.4 only
81
+ def rollback(opts={})
82
+ update message.rollback, opts
83
+ end
84
+
85
+ # Delete one or many documents by id
86
+ # solr.delete_by_id 10
87
+ # solr.delete_by_id([12, 41, 199])
88
+ def delete_by_id(ids, opts={})
89
+ update message.delete_by_id(ids), opts
90
+ end
91
+
92
+ # delete one or many documents by query
93
+ # solr.delete_by_query 'available:0'
94
+ # solr.delete_by_query ['quantity:0', 'manu:"FQ"']
95
+ def delete_by_query(queries, opts={})
96
+ update message.delete_by_query(queries), opts
97
+ end
98
+
99
+ protected
100
+
101
+ # shortcut to solr::message
102
+ def message
103
+ RSolr::Message
104
+ end
105
+
106
+ def modify_params_for_pagination(orig_params)
107
+ return orig_params unless orig_params[:page] || orig_params[:per_page]
108
+ params = orig_params.dup # be nice
109
+ params[:page] ||= 1
110
+ params[:per_page] ||= 10
111
+ params[:rows] = params.delete(:per_page).to_i
112
+ params[:start] = calculate_start(params.delete(:page).to_i, params[:rows])
113
+ params
114
+ end
115
+
116
+ def calculate_start(current_page, per_page)
117
+ page = current_page > 0 ? current_page : 1
118
+ (page - 1) * per_page
119
+ end
120
+
121
+ end
@@ -0,0 +1,126 @@
1
+ module RSolr::Connection::SearchExt
2
+
3
+ def search(q_param, params={})
4
+ if params[:fields]
5
+ fields = params.delete :fields
6
+ params[:fl] = fields.is_a?(Array) ? fields.join(' ') : fields
7
+ end
8
+
9
+ # adds quoted values to the :filters hash
10
+ if params[:phrase_filters]
11
+ phrase_filters = params.delete(:phrase_filters)
12
+ params[:filters] ||= {}
13
+ phrase_filters.each do |filter,values|
14
+ params[:filters][filter] ||= []
15
+ values.each do |v|
16
+ params[:filters][filter] << "\"#{v}\""
17
+ end
18
+ end
19
+ end
20
+
21
+ params[:fq] = build_filters(params.delete(:filters)) if params[:filters]
22
+ facets = params.delete(:facets) if params[:facets]
23
+
24
+ if facets
25
+ if facets.is_a?(Array)
26
+ params.merge!({:facet => true})
27
+ params.merge! build_facets(facets)
28
+ elsif facets.is_a?(Hash)
29
+ params.merge!({:facet => true})
30
+ #params += build_facet(facets)
31
+ elsif facets.is_a?(String)
32
+ #params += facets
33
+ else
34
+ raise 'facets must either be a Hash or an Array'
35
+ end
36
+ end
37
+ #params[:qt] ||= :dismax
38
+ params[:q] = build_query(q_param)
39
+ self.query params
40
+ end
41
+
42
+ protected
43
+
44
+ # returns the query param
45
+ def build_query(queries)
46
+ query_string = ''
47
+ case queries
48
+ when String
49
+ query_string = queries
50
+ when Array
51
+ query_string = queries.join(' ')
52
+ when Hash
53
+ query_string_array = []
54
+ queries.each do |k,v|
55
+ if v.is_a?(Array) # add a filter for each value
56
+ v.each do |val|
57
+ query_string_array << "#{k}:#{val}"
58
+ end
59
+ elsif v.is_a?(Range)
60
+ query_string_array << "#{k}:[#{v.min} TO #{v.max}]"
61
+ else
62
+ query_string_array << "#{k}:#{v}"
63
+ end
64
+ end
65
+ query_string = query_string_array.join(' ')
66
+ end
67
+ query_string
68
+ end
69
+
70
+ def build_filters(filters)
71
+ params = []
72
+ # handle "ruby-ish" filters
73
+ case filters
74
+ when String
75
+ params << filters
76
+ when Array
77
+ filters.each { |f| params << f }
78
+ when Hash
79
+ filters.each do |k,v|
80
+ if v.is_a?(Array) # add a filter for each value
81
+ v.each do |val|
82
+ params << "#{k}:#{val}"
83
+ end
84
+ elsif v.is_a?(Range)
85
+ params << "#{k}:[#{v.min} TO #{v.max}]"
86
+ else
87
+ params << "#{k}:#{v}"
88
+ end
89
+ end
90
+ end
91
+ params
92
+ end
93
+
94
+ def build_facets(facet_array)
95
+ facet_array.inject({}) do |p, facet_hash|
96
+ build_facet(facet_hash).each {|k| p.merge!(k) }
97
+ p
98
+ end
99
+ end
100
+
101
+ def build_facet(facet_hash)
102
+ params = []
103
+ facet_name = facet_hash['name'] || facet_hash[:name]
104
+ facet_hash.each do |k,v|
105
+ # handle some cases specially
106
+ if 'field' == k.to_s
107
+ params << {"facet.field" => v}
108
+ elsif 'query' == k.to_s
109
+ q = build_query(v)
110
+ params << {"facet.query"=>q}
111
+ if facet_name
112
+ # keep track of names => facet_queries
113
+ name_to_facet_query[facet_name] = q['facet.query']
114
+ end
115
+ else
116
+ params << {"f.#{facet_hash[:field]}.facet.#{k}" => v}
117
+ end
118
+ end
119
+ params
120
+ end
121
+
122
+ def name_to_facet_query
123
+ @name_to_facet_query ||= {}
124
+ end
125
+
126
+ end